001: /*
002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
003: *
004: * Licensed under the Aduna BSD-style license.
005: */
006: package org.openrdf.sail.nativerdf.datastore;
007:
008: import java.io.File;
009: import java.io.IOException;
010: import java.util.Arrays;
011: import java.util.zip.CRC32;
012:
013: import info.aduna.io.ByteArrayUtil;
014:
015: /**
016: * Class that provides indexed storage and retrieval of arbitrary length data.
017: *
018: * @author Arjohn Kampman
019: */
020: public class DataStore {
021:
022: /*-----------*
023: * Variables *
024: *-----------*/
025:
026: private DataFile dataFile;
027:
028: private IDFile idFile;
029:
030: private HashFile hashFile;
031:
032: /**
033: * The checksum to use for calculating data hashes.
034: */
035: private CRC32 crc32 = new CRC32();
036:
037: /*--------------*
038: * Constructors *
039: *--------------*/
040:
041: public DataStore(File dataDir, String filePrefix)
042: throws IOException {
043: this (dataDir, filePrefix, false);
044: }
045:
046: public DataStore(File dataDir, String filePrefix, boolean forceSync)
047: throws IOException {
048: dataFile = new DataFile(new File(dataDir, filePrefix + ".dat"),
049: forceSync);
050: idFile = new IDFile(new File(dataDir, filePrefix + ".id"),
051: forceSync);
052: hashFile = new HashFile(
053: new File(dataDir, filePrefix + ".hash"), forceSync);
054: }
055:
056: /*---------*
057: * Methods *
058: *---------*/
059:
060: /**
061: * Gets the value for the specified ID.
062: *
063: * @param id
064: * A value ID, should be larger than 0.
065: * @return The value for the ID, or <tt>null</tt> if no such value could be
066: * found.
067: * @exception IOException
068: * If an I/O error occurred.
069: */
070: public byte[] getData(int id) throws IOException {
071: assert id > 0 : "id must be larger than 0, is: " + id;
072:
073: // Data not in cache or cache not used, fetch from file
074: long offset = idFile.getOffset(id);
075:
076: if (offset != 0L) {
077: return dataFile.getData(offset);
078: }
079:
080: return null;
081: }
082:
083: /**
084: * Gets the ID for the specified value.
085: *
086: * @param queryData
087: * The value to get the ID for, must not be <tt>null</tt>.
088: * @return The ID for the specified value, or <tt>-1</tt> if no such ID
089: * could be found.
090: * @exception IOException
091: * If an I/O error occurred.
092: */
093: public int getID(byte[] queryData) throws IOException {
094: assert queryData != null : "queryData must not be null";
095:
096: int id = -1;
097:
098: // Value not in cache or cache not used, fetch from file
099: int hash = getDataHash(queryData);
100: HashFile.IDIterator iter = hashFile.getIDIterator(hash);
101:
102: while ((id = iter.next()) >= 0) {
103: long offset = idFile.getOffset(id);
104: byte[] data = dataFile.getData(offset);
105:
106: if (Arrays.equals(queryData, data)) {
107: // Matching data found
108: break;
109: }
110: }
111:
112: return id;
113: }
114:
115: /**
116: * Returns the maximum value-ID that is in use.
117: *
118: * @return The largest ID, or <tt>0</tt> if the store does not contain any
119: * values.
120: * @throws IOException
121: * If an I/O error occurs.
122: */
123: public int getMaxID() throws IOException {
124: return idFile.getMaxID();
125: }
126:
127: /**
128: * Stores the supplied value and returns the ID that has been assigned to it.
129: * In case the data to store is already present, the ID of this existing data
130: * is returned.
131: *
132: * @param data
133: * The data to store, must not be <tt>null</tt>.
134: * @return The ID that has been assigned to the value.
135: * @exception IOException
136: * If an I/O error occurred.
137: */
138: public int storeData(byte[] data) throws IOException {
139: assert data != null : "data must not be null";
140:
141: int id = getID(data);
142:
143: if (id == -1) {
144: // Data not stored yet, store it under a new ID.
145: long offset = dataFile.storeData(data);
146: id = idFile.storeOffset(offset);
147: hashFile.storeID(getDataHash(data), id);
148: }
149:
150: return id;
151: }
152:
153: /**
154: * Synchronizes any recent changes to the data to disk.
155: *
156: * @exception IOException
157: * If an I/O error occurred.
158: */
159: public void sync() throws IOException {
160: hashFile.sync();
161: idFile.sync();
162: dataFile.sync();
163: }
164:
165: /**
166: * Removes all values from the DataStore.
167: *
168: * @exception IOException
169: * If an I/O error occurred.
170: */
171: public void clear() throws IOException {
172: hashFile.clear();
173: idFile.clear();
174: dataFile.clear();
175: }
176:
177: /**
178: * Closes the DataStore, releasing any file references, etc. In case a
179: * transaction is currently open, it will be rolled back. Once closed, the
180: * DataStore can no longer be used.
181: *
182: * @exception IOException
183: * If an I/O error occurred.
184: */
185: public void close() throws IOException {
186: hashFile.close();
187: idFile.close();
188: dataFile.close();
189: }
190:
191: /**
192: * Gets a hash code for the supplied data.
193: *
194: * @param data
195: * The data to calculate the hash code for.
196: * @return A hash code for the supplied data.
197: */
198: private int getDataHash(byte[] data) {
199: synchronized (crc32) {
200: crc32.update(data);
201: int crc = (int) crc32.getValue();
202: crc32.reset();
203: return crc;
204: }
205: }
206:
207: /*--------------------*
208: * Test/debug methods *
209: *--------------------*/
210:
211: public static void main(String[] args) throws Exception {
212: if (args.length < 2) {
213: System.err
214: .println("Usage: java org.openrdf.sesame.sailimpl.nativerdf.datastore.DataStore <data-dir> <file-prefix>");
215: return;
216: }
217:
218: System.out.println("Dumping DataStore contents...");
219: File dataDir = new File(args[0]);
220: DataStore dataStore = new DataStore(dataDir, args[1]);
221:
222: DataFile.DataIterator iter = dataStore.dataFile.iterator();
223: while (iter.hasNext()) {
224: byte[] data = iter.next();
225:
226: System.out.println(ByteArrayUtil.toHexString(data));
227: }
228: }
229: }
|