001: /*
002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
003: *
004: * Licensed under the Aduna BSD-style license.
005: */
006: package org.openrdf.sail.nativerdf;
007:
008: import java.io.File;
009: import java.io.IOException;
010:
011: import info.aduna.concurrent.locks.Lock;
012: import info.aduna.concurrent.locks.ReadWriteLockManager;
013: import info.aduna.concurrent.locks.WritePrefReadWriteLockManager;
014: import info.aduna.io.ByteArrayUtil;
015:
016: import org.openrdf.model.BNode;
017: import org.openrdf.model.Literal;
018: import org.openrdf.model.Resource;
019: import org.openrdf.model.Statement;
020: import org.openrdf.model.URI;
021: import org.openrdf.model.Value;
022: import org.openrdf.model.impl.ContextStatementImpl;
023: import org.openrdf.model.impl.StatementImpl;
024: import org.openrdf.model.impl.ValueFactoryBase;
025: import org.openrdf.sail.nativerdf.datastore.DataStore;
026: import org.openrdf.sail.nativerdf.model.NativeBNode;
027: import org.openrdf.sail.nativerdf.model.NativeLiteral;
028: import org.openrdf.sail.nativerdf.model.NativeResource;
029: import org.openrdf.sail.nativerdf.model.NativeURI;
030: import org.openrdf.sail.nativerdf.model.NativeValue;
031:
032: /**
033: * File-based indexed storage and retrieval of RDF values. ValueStore maps RDF
034: * values to integer IDs and vice-versa.
035: *
036: * @author Arjohn Kampman
037: */
038: public class ValueStore extends ValueFactoryBase {
039:
040: /*-----------*
041: * Constants *
042: *-----------*/
043:
044: private static final String FILENAME_PREFIX = "values";
045:
046: private static final int VALUE_CACHE_SIZE = 512;
047:
048: private static final int VALUE_ID_CACHE_SIZE = 128;
049:
050: private static final int NAMESPACE_CACHE_SIZE = 64;
051:
052: private static final int NAMESPACE_ID_CACHE_SIZE = 32;
053:
054: private static final byte VALUE_TYPE_MASK = 0x3; // 0000 0011
055:
056: private static final byte URI_VALUE = 0x1; // 0000 0001
057:
058: private static final byte BNODE_VALUE = 0x2; // 0000 0010
059:
060: private static final byte LITERAL_VALUE = 0x3; // 0000 0011
061:
062: /*-----------*
063: * Variables *
064: *-----------*/
065:
066: /**
067: * Used to do the actual storage of values, once they're translated to byte
068: * arrays.
069: */
070: private DataStore dataStore;
071:
072: /**
073: * Lock manager used to prevent the removal of values over multiple method
074: * calls. Note that values can still be added when read locks are active.
075: */
076: private ReadWriteLockManager lockManager = new WritePrefReadWriteLockManager();
077:
078: /**
079: * An object that indicates the revision of the value store, which is used to
080: * check if cached value IDs are still valid. In order to be valid, the
081: * ValueStoreRevision object of a NativeValue needs to be equal to this
082: * object.
083: */
084: private ValueStoreRevision revision;
085:
086: /**
087: * A simple cache containing the [VALUE_CACHE_SIZE] most-recently used values
088: * stored by their ID.
089: */
090: private LRUCache<Integer, NativeValue> valueCache;
091:
092: /**
093: * A simple cache containing the [ID_CACHE_SIZE] most-recently used value-IDs
094: * stored by their value.
095: */
096: private LRUCache<Value, Integer> valueIDCache;
097:
098: /**
099: * A simple cache containing the [NAMESPACE_CACHE_SIZE] most-recently used
100: * namespaces stored by their ID.
101: */
102: private LRUCache<Integer, String> namespaceCache;
103:
104: /**
105: * A simple cache containing the [NAMESPACE_ID_CACHE_SIZE] most-recently used
106: * namespace-IDs stored by their namespace.
107: */
108: private LRUCache<String, Integer> namespaceIDCache;
109:
110: /*--------------*
111: * Constructors *
112: *--------------*/
113:
114: public ValueStore(File dataDir) throws IOException {
115: this (dataDir, false);
116: }
117:
118: public ValueStore(File dataDir, boolean forceSync)
119: throws IOException {
120: super ();
121: dataStore = new DataStore(dataDir, FILENAME_PREFIX, forceSync);
122:
123: valueCache = new LRUCache<Integer, NativeValue>(
124: VALUE_CACHE_SIZE);
125: valueIDCache = new LRUCache<Value, Integer>(VALUE_ID_CACHE_SIZE);
126: namespaceCache = new LRUCache<Integer, String>(
127: NAMESPACE_CACHE_SIZE);
128: namespaceIDCache = new LRUCache<String, Integer>(
129: NAMESPACE_ID_CACHE_SIZE);
130:
131: setNewRevision();
132: }
133:
134: /*---------*
135: * Methods *
136: *---------*/
137:
138: /**
139: * Creates a new revision object for this value store, invalidating any IDs
140: * cached in NativeValue objects that were created by this value store.
141: */
142: private void setNewRevision() {
143: revision = new ValueStoreRevision(this );
144: }
145:
146: public ValueStoreRevision getRevision() {
147: return revision;
148: }
149:
150: /**
151: * Gets a read lock on this value store that can be used to prevent values
152: * from being removed while the lock is active.
153: */
154: public Lock getReadLock() throws InterruptedException {
155: return lockManager.getReadLock();
156: }
157:
158: /**
159: * Gets the value for the specified ID.
160: *
161: * @param id
162: * A value ID.
163: * @return The value for the ID, or <tt>null</tt> no such value could be
164: * found.
165: * @exception IOException
166: * If an I/O error occurred.
167: */
168: public NativeValue getValue(int id) throws IOException {
169: NativeValue resultValue = null;
170:
171: // Check value cache
172: Integer cacheID = new Integer(id);
173: synchronized (valueCache) {
174: resultValue = valueCache.get(cacheID);
175: }
176:
177: if (resultValue == null) {
178: // Value not in cache, fetch it from file
179: byte[] data = dataStore.getData(id);
180:
181: if (data != null) {
182: resultValue = data2value(id, data);
183:
184: // Store value in cache
185: synchronized (valueCache) {
186: valueCache.put(cacheID, resultValue);
187: }
188: }
189: }
190:
191: return resultValue;
192: }
193:
194: /**
195: * Gets the ID for the specified value.
196: *
197: * @param value
198: * A value.
199: * @return The ID for the specified value, or {@link NativeValue#UNKNOWN_ID}
200: * if no such ID could be found.
201: * @exception IOException
202: * If an I/O error occurred.
203: */
204: public int getID(Value value) throws IOException {
205: // Try to get the internal ID from the value itself
206: boolean isOwnValue = isOwnValue(value);
207:
208: if (isOwnValue) {
209: NativeValue nativeValue = (NativeValue) value;
210:
211: if (revisionIsCurrent(nativeValue)) {
212: int id = nativeValue.getInternalID();
213:
214: if (id != NativeValue.UNKNOWN_ID) {
215: return id;
216: }
217: }
218: }
219:
220: // Check cache
221: Integer cachedID = null;
222: synchronized (valueIDCache) {
223: cachedID = valueIDCache.get(value);
224: }
225:
226: if (cachedID != null) {
227: int id = cachedID.intValue();
228:
229: if (isOwnValue) {
230: // Store id in value for fast access in any consecutive calls
231: ((NativeValue) value).setInternalID(id, revision);
232: }
233:
234: return id;
235: }
236:
237: // ID not cached, search in file
238: byte[] data = value2data(value, false);
239:
240: if (data != null) {
241: int id = dataStore.getID(data);
242:
243: if (id != NativeValue.UNKNOWN_ID) {
244: if (isOwnValue) {
245: // Store id in value for fast access in any consecutive calls
246: ((NativeValue) value).setInternalID(id, revision);
247: } else {
248: // Store id in cache
249: synchronized (valueIDCache) {
250: valueIDCache.put(value, new Integer(id));
251: }
252: }
253: }
254:
255: return id;
256: }
257:
258: return NativeValue.UNKNOWN_ID;
259: }
260:
261: /**
262: * Stores the supplied value and returns the ID that has been assigned to it.
263: * In case the value was already present, the value will not be stored again
264: * and the ID of the existing value is returned.
265: *
266: * @param value
267: * The Value to store.
268: * @return The ID that has been assigned to the value.
269: * @exception IOException
270: * If an I/O error occurred.
271: */
272: public int storeValue(Value value) throws IOException {
273: // Try to get the internal ID from the value itself
274: boolean isOwnValue = isOwnValue(value);
275:
276: if (isOwnValue) {
277: NativeValue nativeValue = (NativeValue) value;
278:
279: if (revisionIsCurrent(nativeValue)) {
280: // Value's ID is still current
281: int id = nativeValue.getInternalID();
282:
283: if (id != NativeValue.UNKNOWN_ID) {
284: return id;
285: }
286: }
287: }
288:
289: // ID not stored in value itself, try the ID cache
290: Integer cachedID = null;
291: synchronized (valueIDCache) {
292: cachedID = valueIDCache.get(value);
293: }
294:
295: if (cachedID != null) {
296: int id = cachedID.intValue();
297:
298: if (isOwnValue) {
299: // Store id in value for fast access in any consecutive calls
300: ((NativeValue) value).setInternalID(id, revision);
301: }
302:
303: return id;
304: }
305:
306: // Unable to get internal ID in a cheap way, just store it in the data
307: // store which will handle duplicates
308: byte[] valueData = value2data(value, true);
309:
310: int id = dataStore.storeData(valueData);
311:
312: if (isOwnValue) {
313: // Store id in value for fast access in any consecutive calls
314: ((NativeValue) value).setInternalID(id, revision);
315: } else {
316: // Update cache
317: synchronized (valueIDCache) {
318: valueIDCache.put(value, new Integer(id));
319: }
320: }
321:
322: return id;
323: }
324:
325: /**
326: * Removes all values from the ValueStore.
327: *
328: * @exception IOException
329: * If an I/O error occurred.
330: */
331: public void clear() throws IOException {
332: try {
333: Lock writeLock = lockManager.getWriteLock();
334: try {
335: dataStore.clear();
336:
337: synchronized (valueCache) {
338: valueCache.clear();
339: }
340:
341: synchronized (valueIDCache) {
342: valueIDCache.clear();
343: }
344:
345: synchronized (namespaceCache) {
346: namespaceCache.clear();
347: }
348:
349: synchronized (namespaceIDCache) {
350: namespaceIDCache.clear();
351: }
352:
353: initBNodeParams();
354:
355: setNewRevision();
356: } finally {
357: writeLock.release();
358: }
359: } catch (InterruptedException e) {
360: IOException ioe = new IOException(
361: "Failed to acquire write lock");
362: ioe.initCause(e);
363: throw ioe;
364: }
365: }
366:
367: /**
368: * Synchronizes any changes that are cached in memory to disk.
369: *
370: * @exception IOException
371: * If an I/O error occurred.
372: */
373: public void sync() throws IOException {
374: dataStore.sync();
375: }
376:
377: /**
378: * Closes the ValueStore, releasing any file references, etc. Once closed,
379: * the ValueStore can no longer be used.
380: *
381: * @exception IOException
382: * If an I/O error occurred.
383: */
384: public void close() throws IOException {
385: dataStore.close();
386: valueCache = null;
387: valueIDCache = null;
388: namespaceCache = null;
389: namespaceIDCache = null;
390: }
391:
392: /**
393: * Checks if the supplied Value object is a NativeValue object that has been
394: * created by this ValueStore.
395: */
396: private boolean isOwnValue(Value value) {
397: return value instanceof NativeValue
398: && ((NativeValue) value).getValueStoreRevision()
399: .getValueStore() == this ;
400: }
401:
402: /**
403: * Checks if the revision of the supplied value object is still current.
404: */
405: private boolean revisionIsCurrent(NativeValue value) {
406: return revision.equals(value.getValueStoreRevision());
407: }
408:
409: private byte[] value2data(Value value, boolean create)
410: throws IOException {
411: if (value instanceof URI) {
412: return uri2data((URI) value, create);
413: } else if (value instanceof BNode) {
414: return bnode2data((BNode) value, create);
415: } else if (value instanceof Literal) {
416: return literal2data((Literal) value, create);
417: } else {
418: throw new IllegalArgumentException(
419: "value parameter should be a URI, BNode or Literal");
420: }
421: }
422:
423: private byte[] uri2data(URI uri, boolean create) throws IOException {
424: int nsID = getNamespaceID(uri.getNamespace(), create);
425:
426: if (nsID == -1) {
427: // Unknown namespace means unknown URI
428: return null;
429: }
430:
431: // Get local name in UTF-8
432: byte[] localNameData = uri.getLocalName().getBytes("UTF-8");
433:
434: // Combine parts in a single byte array
435: byte[] uriData = new byte[5 + localNameData.length];
436: uriData[0] = URI_VALUE;
437: ByteArrayUtil.putInt(nsID, uriData, 1);
438: ByteArrayUtil.put(localNameData, uriData, 5);
439:
440: return uriData;
441: }
442:
443: private byte[] bnode2data(BNode bNode, boolean create)
444: throws IOException {
445: byte[] idData = bNode.getID().getBytes("UTF-8");
446:
447: byte[] bNodeData = new byte[1 + idData.length];
448: bNodeData[0] = BNODE_VALUE;
449: ByteArrayUtil.put(idData, bNodeData, 1);
450:
451: return bNodeData;
452: }
453:
454: private byte[] literal2data(Literal literal, boolean create)
455: throws IOException {
456: // Get datatype ID
457: int datatypeID = NativeValue.UNKNOWN_ID;
458:
459: if (literal.getDatatype() != null) {
460: if (create) {
461: datatypeID = storeValue(literal.getDatatype());
462: } else {
463: datatypeID = getID(literal.getDatatype());
464:
465: if (datatypeID == NativeValue.UNKNOWN_ID) {
466: // Unknown datatype means unknown literal
467: return null;
468: }
469: }
470: }
471:
472: // Get language tag in UTF-8
473: byte[] langData = null;
474: int langDataLength = 0;
475: if (literal.getLanguage() != null) {
476: langData = literal.getLanguage().getBytes("UTF-8");
477: langDataLength = langData.length;
478: }
479:
480: // Get label in UTF-8
481: byte[] labelData = literal.getLabel().getBytes("UTF-8");
482:
483: // Combine parts in a single byte array
484: byte[] literalData = new byte[6 + langDataLength
485: + labelData.length];
486: literalData[0] = LITERAL_VALUE;
487: ByteArrayUtil.putInt(datatypeID, literalData, 1);
488: literalData[5] = (byte) langDataLength;
489: if (langData != null) {
490: ByteArrayUtil.put(langData, literalData, 6);
491: }
492: ByteArrayUtil.put(labelData, literalData, 6 + langDataLength);
493:
494: return literalData;
495: }
496:
497: private NativeValue data2value(int id, byte[] data)
498: throws IOException {
499: switch ((data[0] & VALUE_TYPE_MASK)) {
500: case URI_VALUE:
501: return data2uri(id, data);
502: case BNODE_VALUE:
503: return data2bnode(id, data);
504: case LITERAL_VALUE:
505: return data2literal(id, data);
506: default:
507: throw new IllegalArgumentException(
508: "data does not specify a known value type");
509: }
510: }
511:
512: private NativeURI data2uri(int id, byte[] data) throws IOException {
513: int nsID = ByteArrayUtil.getInt(data, 1);
514: String namespace = getNamespace(nsID);
515:
516: String localName = new String(data, 5, data.length - 5, "UTF-8");
517:
518: return new NativeURI(revision, namespace, localName, id);
519: }
520:
521: private NativeBNode data2bnode(int id, byte[] data)
522: throws IOException {
523: String nodeID = new String(data, 1, data.length - 1, "UTF-8");
524: return new NativeBNode(revision, nodeID, id);
525: }
526:
527: private NativeLiteral data2literal(int id, byte[] data)
528: throws IOException {
529: // Get datatype
530: int datatypeID = ByteArrayUtil.getInt(data, 1);
531: URI datatype = null;
532: if (datatypeID != NativeValue.UNKNOWN_ID) {
533: datatype = (URI) getValue(datatypeID);
534: }
535:
536: // Get language tag
537: String lang = null;
538: int langLength = data[5];
539: if (langLength > 0) {
540: lang = new String(data, 6, langLength, "UTF-8");
541: }
542:
543: // Get label
544: String label = new String(data, 6 + langLength, data.length - 6
545: - langLength, "UTF-8");
546:
547: if (datatype != null) {
548: return new NativeLiteral(revision, label, datatype, id);
549: } else if (lang != null) {
550: return new NativeLiteral(revision, label, lang, id);
551: } else {
552: return new NativeLiteral(revision, label, id);
553: }
554: }
555:
556: private int getNamespaceID(String namespace, boolean create)
557: throws IOException {
558: int id;
559:
560: Integer cacheID = null;
561: synchronized (namespaceIDCache) {
562: cacheID = namespaceIDCache.get(namespace);
563: }
564:
565: if (cacheID != null) {
566: id = cacheID.intValue();
567: } else {
568: byte[] namespaceData = namespace.getBytes("UTF-8");
569:
570: if (create) {
571: id = dataStore.storeData(namespaceData);
572: } else {
573: id = dataStore.getID(namespaceData);
574: }
575:
576: if (id != -1) {
577: namespaceIDCache.put(namespace, new Integer(id));
578: }
579: }
580:
581: return id;
582: }
583:
584: private String getNamespace(int id) throws IOException {
585: Integer cacheID = new Integer(id);
586: String namespace = null;
587:
588: synchronized (namespaceCache) {
589: namespace = namespaceCache.get(cacheID);
590: }
591:
592: if (namespace == null) {
593: byte[] namespaceData = dataStore.getData(id);
594: namespace = new String(namespaceData, "UTF-8");
595:
596: synchronized (namespaceCache) {
597: namespaceCache.put(cacheID, namespace);
598: }
599: }
600:
601: return namespace;
602: }
603:
604: /*-------------------------------------*
605: * Methods from interface ValueFactory *
606: *-------------------------------------*/
607:
608: public NativeURI createURI(String uri) {
609: return new NativeURI(revision, uri);
610: }
611:
612: public NativeURI createURI(String namespace, String localName) {
613: return new NativeURI(revision, namespace, localName);
614: }
615:
616: public NativeBNode createBNode(String nodeID) {
617: return new NativeBNode(revision, nodeID);
618: }
619:
620: public NativeLiteral createLiteral(String value) {
621: return new NativeLiteral(revision, value);
622: }
623:
624: public NativeLiteral createLiteral(String value, String language) {
625: return new NativeLiteral(revision, value, language);
626: }
627:
628: public NativeLiteral createLiteral(String value, URI datatype) {
629: return new NativeLiteral(revision, value, datatype);
630: }
631:
632: public Statement createStatement(Resource subject, URI predicate,
633: Value object) {
634: return new StatementImpl(subject, predicate, object);
635: }
636:
637: public Statement createStatement(Resource subject, URI predicate,
638: Value object, Resource context) {
639: return new ContextStatementImpl(subject, predicate, object,
640: context);
641: }
642:
643: /*----------------------------------------------------------------------*
644: * Methods for converting model objects to NativeStore-specific objects *
645: *----------------------------------------------------------------------*/
646:
647: public NativeValue getNativeValue(Value value) {
648: if (value instanceof Resource) {
649: return getNativeResource((Resource) value);
650: } else if (value instanceof Literal) {
651: return getNativeLiteral((Literal) value);
652: } else {
653: throw new IllegalArgumentException("Unknown value type: "
654: + value.getClass());
655: }
656: }
657:
658: public NativeResource getNativeResource(Resource resource) {
659: if (resource instanceof URI) {
660: return getNativeURI((URI) resource);
661: } else if (resource instanceof BNode) {
662: return getNativeBNode((BNode) resource);
663: } else {
664: throw new IllegalArgumentException(
665: "Unknown resource type: " + resource.getClass());
666: }
667: }
668:
669: /**
670: * Creates a NativeURI that is equal to the supplied URI. This method returns
671: * the supplied URI itself if it is already a NativeURI that has been created
672: * by this ValueStore, which prevents unnecessary object creations.
673: *
674: * @return A NativeURI for the specified URI.
675: */
676: public NativeURI getNativeURI(URI uri) {
677: if (isOwnValue(uri)) {
678: return (NativeURI) uri;
679: }
680:
681: return new NativeURI(revision, uri.toString());
682: }
683:
684: /**
685: * Creates a NativeBNode that is equal to the supplied bnode. This method
686: * returns the supplied bnode itself if it is already a NativeBNode that has
687: * been created by this ValueStore, which prevents unnecessary object
688: * creations.
689: *
690: * @return A NativeBNode for the specified bnode.
691: */
692: public NativeBNode getNativeBNode(BNode bnode) {
693: if (isOwnValue(bnode)) {
694: return (NativeBNode) bnode;
695: }
696:
697: return new NativeBNode(revision, bnode.getID());
698: }
699:
700: /**
701: * Creates an NativeLiteral that is equal to the supplied literal. This
702: * method returns the supplied literal itself if it is already a
703: * NativeLiteral that has been created by this ValueStore, which prevents
704: * unnecessary object creations.
705: *
706: * @return A NativeLiteral for the specified literal.
707: */
708: public NativeLiteral getNativeLiteral(Literal l) {
709: if (isOwnValue(l)) {
710: return (NativeLiteral) l;
711: }
712:
713: if (l.getLanguage() != null) {
714: return new NativeLiteral(revision, l.getLabel(), l
715: .getLanguage());
716: } else if (l.getDatatype() != null) {
717: NativeURI datatype = getNativeURI(l.getDatatype());
718: return new NativeLiteral(revision, l.getLabel(), datatype);
719: } else {
720: return new NativeLiteral(revision, l.getLabel());
721: }
722: }
723:
724: /*--------------------*
725: * Test/debug methods *
726: *--------------------*/
727:
728: public static void main(String[] args) throws Exception {
729: File dataDir = new File(args[0]);
730: ValueStore valueStore = new ValueStore(dataDir);
731:
732: int maxID = valueStore.dataStore.getMaxID();
733: for (int id = 1; id <= maxID; id++) {
734: Value value = valueStore.getValue(id);
735: System.out.println("[" + id + "] " + value.toString());
736: }
737: }
738: }
|