001: /*
002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
003: *
004: * Licensed under the Aduna BSD-style license.
005: */
006: package org.openrdf.sail.nativerdf.datastore;
007:
008: import java.io.File;
009: import java.io.IOException;
010: import java.io.RandomAccessFile;
011: import java.nio.ByteBuffer;
012: import java.nio.channels.FileChannel;
013: import java.util.Arrays;
014: import java.util.NoSuchElementException;
015:
016: /**
017: * Class supplying access to a data file. A data file stores data sequentially.
018: * Each entry starts with the entry's length (4 bytes), followed by the data
019: * itself. File offsets are used to identify entries.
020: *
021: * @author Arjohn Kampman
022: */
023: public class DataFile {
024:
025: /*-----------*
026: * Constants *
027: *-----------*/
028:
029: /**
030: * Magic number "Native Data File" to detect whether the file is actually a
031: * data file. The first three bytes of the file should be equal to this magic
032: * number.
033: */
034: private static final byte[] MAGIC_NUMBER = new byte[] { 'n', 'd',
035: 'f' };
036:
037: /**
038: * File format version, stored as the fourth byte in data files.
039: */
040: private static final byte FILE_FORMAT_VERSION = 1;
041:
042: private static final long HEADER_LENGTH = MAGIC_NUMBER.length + 1;
043:
044: /*-----------*
045: * Variables *
046: *-----------*/
047:
048: private File file;
049:
050: private RandomAccessFile raf;
051:
052: private FileChannel fileChannel;
053:
054: private boolean forceSync;
055:
056: /*--------------*
057: * Constructors *
058: *--------------*/
059:
060: public DataFile(File file) throws IOException {
061: this (file, false);
062: }
063:
064: public DataFile(File file, boolean forceSync) throws IOException {
065: this .file = file;
066: this .forceSync = forceSync;
067:
068: if (!file.exists()) {
069: boolean created = file.createNewFile();
070: if (!created) {
071: throw new IOException("Failed to create file: " + file);
072: }
073: }
074:
075: // Open a read/write channel to the file
076: raf = new RandomAccessFile(file, "rw");
077: fileChannel = raf.getChannel();
078:
079: if (fileChannel.size() == 0L) {
080: // Empty file, write header
081: ByteBuffer buf = ByteBuffer.allocate((int) HEADER_LENGTH);
082: buf.put(MAGIC_NUMBER);
083: buf.put(FILE_FORMAT_VERSION);
084: buf.rewind();
085:
086: fileChannel.write(buf, 0L);
087:
088: sync();
089: } else {
090: // Verify file header
091: ByteBuffer buf = ByteBuffer.allocate((int) HEADER_LENGTH);
092: fileChannel.read(buf, 0L);
093: buf.rewind();
094:
095: if (buf.remaining() < HEADER_LENGTH) {
096: throw new IOException(
097: "File too short to be a compatible data file");
098: }
099:
100: byte[] magicNumber = new byte[MAGIC_NUMBER.length];
101: buf.get(magicNumber);
102: byte version = buf.get();
103:
104: if (!Arrays.equals(MAGIC_NUMBER, magicNumber)) {
105: throw new IOException(
106: "File doesn't contain compatible data records");
107: }
108:
109: if (version > FILE_FORMAT_VERSION) {
110: throw new IOException(
111: "Unable to read data file; it uses a newer file format");
112: } else if (version != FILE_FORMAT_VERSION) {
113: throw new IOException(
114: "Unable to read data file; invalid file format version: "
115: + version);
116: }
117: }
118: }
119:
120: /*---------*
121: * Methods *
122: *---------*/
123:
124: public File getFile() {
125: return file;
126: }
127:
128: /**
129: * Stores the specified data and returns the byte-offset at which it has been
130: * stored.
131: *
132: * @param data
133: * The data to store, must not be <tt>null</tt>.
134: * @return The byte-offset in the file at which the data was stored.
135: */
136: public long storeData(byte[] data) throws IOException {
137: assert data != null : "data must not be null";
138:
139: long offset = fileChannel.size();
140:
141: ByteBuffer buf = ByteBuffer.allocate(data.length + 4);
142: buf.putInt(data.length);
143: buf.put(data);
144: buf.rewind();
145:
146: fileChannel.write(buf, offset);
147:
148: return offset;
149: }
150:
151: /**
152: * Gets the data that is stored at the specified offset.
153: *
154: * @param offset
155: * An offset in the data file, must be larger than 0.
156: * @return The data that was found on the specified offset.
157: * @exception IOException
158: * If an I/O error occurred.
159: */
160: public byte[] getData(long offset) throws IOException {
161: assert offset > 0 : "offset must be larger than 0, is: "
162: + offset;
163:
164: // TODO: maybe get more data in one go is more efficient?
165: ByteBuffer buf = ByteBuffer.allocate(4);
166: fileChannel.read(buf, offset);
167: int dataLength = buf.getInt(0);
168:
169: byte[] data = new byte[dataLength];
170: buf = ByteBuffer.wrap(data);
171: fileChannel.read(buf, offset + 4L);
172:
173: return data;
174: }
175:
176: /**
177: * Discards all stored data.
178: *
179: * @throws IOException
180: * If an I/O error occurred.
181: */
182: public void clear() throws IOException {
183: fileChannel.truncate(HEADER_LENGTH);
184: }
185:
186: /**
187: * Syncs any unstored data to the hash file.
188: */
189: public void sync() throws IOException {
190: if (forceSync) {
191: fileChannel.force(false);
192: }
193: }
194:
195: /**
196: * Closes the data file, releasing any file locks that it might have.
197: *
198: * @throws IOException
199: */
200: public void close() throws IOException {
201: raf.close();
202: }
203:
204: /**
205: * Gets an iterator that can be used to iterate over all stored data.
206: *
207: * @return a DataIterator.
208: */
209: public DataIterator iterator() {
210: return new DataIterator();
211: }
212:
213: /**
214: * An iterator that iterates over the data that is stored in a data file.
215: */
216: public class DataIterator {
217:
218: private long position = HEADER_LENGTH;
219:
220: public boolean hasNext() throws IOException {
221: return position < fileChannel.size();
222: }
223:
224: public byte[] next() throws IOException {
225: if (!hasNext()) {
226: throw new NoSuchElementException();
227: }
228:
229: byte[] data = getData(position);
230: position += (4 + data.length);
231: return data;
232: }
233: }
234: }
|