0001: /*
0002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
0003: *
0004: * Licensed under the Aduna BSD-style license.
0005: */
0006: package org.openrdf.sail.nativerdf;
0007:
0008: import java.io.File;
0009: import java.io.FileInputStream;
0010: import java.io.FileOutputStream;
0011: import java.io.IOException;
0012: import java.io.InputStream;
0013: import java.io.OutputStream;
0014: import java.util.Arrays;
0015: import java.util.HashSet;
0016: import java.util.Properties;
0017: import java.util.Set;
0018: import java.util.StringTokenizer;
0019:
0020: import org.slf4j.Logger;
0021: import org.slf4j.LoggerFactory;
0022:
0023: import info.aduna.io.ByteArrayUtil;
0024:
0025: import org.openrdf.sail.SailException;
0026: import org.openrdf.sail.nativerdf.btree.BTree;
0027: import org.openrdf.sail.nativerdf.btree.RecordComparator;
0028: import org.openrdf.sail.nativerdf.btree.RecordIterator;
0029:
0030: /**
0031: * File-based indexed storage and retrieval of RDF statements. TripleStore
0032: * stores statements in the form of four integer IDs. Each ID represent an RDF
0033: * value that is stored in a {@link ValueStore}. The four IDs refer to the
0034: * statement's subject, predicate, object and context. The ID <tt>0</tt> is
0035: * used to represent the "null" context and doesn't map to an actual RDF value.
0036: *
0037: * @author Arjohn Kampman
0038: */
0039: class TripleStore {
0040:
0041: final Logger logger = LoggerFactory.getLogger(this .getClass());
0042:
0043: /*-----------*
0044: * Constants *
0045: *-----------*/
0046:
0047: /**
0048: * The file name for the properties file.
0049: */
0050: private static final String PROPERTIES_FILE = "triples.prop";
0051:
0052: /**
0053: * The key used to store the triple store version in the properties file.
0054: */
0055: private static final String VERSION_KEY = "version";
0056:
0057: /**
0058: * The key used to store the triple indexes specification that specifies
0059: * which triple indexes exist.
0060: */
0061: private static final String INDEXES_KEY = "triple-indexes";
0062:
0063: /**
0064: * The version number for the current triple store.
0065: * <ul>
0066: * <li>version 0: The first version which used a single spo-index. This
0067: * version did not have a properties file yet.
0068: * <li>version 1: Introduces configurable triple indexes and the properties
0069: * file.
0070: * <li>version 10: Introduces a context field, essentially making this a
0071: * quad store.
0072: * <li>version 10a: Introduces transaction flags, this is backwards
0073: * compatible with version 10.
0074: * </ul>
0075: */
0076: private static final int SCHEME_VERSION = 10;
0077:
0078: // 17 bytes are used to represent a triple:
0079: // byte 0-3 : subject
0080: // byte 4-7 : predicate
0081: // byte 8-11: object
0082: // byte 12-15: context
0083: // byte 16: additional flag(s)
0084: static final int RECORD_LENGTH = 17;
0085:
0086: static final int SUBJ_IDX = 0;
0087:
0088: static final int PRED_IDX = 4;
0089:
0090: static final int OBJ_IDX = 8;
0091:
0092: static final int CONTEXT_IDX = 12;
0093:
0094: static final int FLAG_IDX = 16;
0095:
0096: /**
0097: * Bit field indicating that a statement has been explicitly added (instead
0098: * of being inferred).
0099: */
0100: static final byte EXPLICIT_FLAG = (byte) 0x1; // 0000 0001
0101:
0102: /**
0103: * Bit field indicating that a statement has been added in a (currently
0104: * active) transaction.
0105: */
0106: static final byte ADDED_FLAG = (byte) 0x2; // 0000 0010
0107:
0108: /**
0109: * Bit field indicating that a statement has been removed in a (currently
0110: * active) transaction.
0111: */
0112: static final byte REMOVED_FLAG = (byte) 0x4; // 0000 0100
0113:
0114: /**
0115: * Bit field indicating that the explicit flag has been toggled (from true to
0116: * false, or vice versa) in a (currently active) transaction.
0117: */
0118: static final byte TOGGLE_EXPLICIT_FLAG = (byte) 0x8; // 0000 1000
0119:
0120: /*-----------*
0121: * Variables *
0122: *-----------*/
0123:
0124: /**
0125: * The directory that is used to store the index files.
0126: */
0127: private File dir;
0128:
0129: /**
0130: * Object containing meta-data for the triple store. This includes
0131: */
0132: private Properties properties;
0133:
0134: /**
0135: * The array of triple indexes that are used to store and retrieve triples.
0136: */
0137: private TripleIndex[] indexes;
0138:
0139: private boolean forceSync;
0140:
0141: /**
0142: * Flag indicating whether one or more triples have been flagged as "added"
0143: * during the current transaction.
0144: */
0145: private boolean txnAddedTriples = false;
0146:
0147: /**
0148: * Flag indicating whether one or more triples have been flagged as "removed"
0149: * during the current transaction.
0150: */
0151: private boolean txnRemovedTriples = false;
0152:
0153: private RecordCache updatedTriplesCache;
0154:
0155: /*--------------*
0156: * Constructors *
0157: *--------------*/
0158:
0159: public TripleStore(File dir, String indexSpecStr)
0160: throws IOException, SailException {
0161: this (dir, indexSpecStr, false);
0162: }
0163:
0164: public TripleStore(File dir, String indexSpecStr, boolean forceSync)
0165: throws IOException, SailException {
0166: this .dir = dir;
0167: this .forceSync = forceSync;
0168: properties = new Properties();
0169:
0170: // Read triple properties file, restore indexes, re-index
0171: File propFile = new File(dir, PROPERTIES_FILE);
0172:
0173: if (propFile.exists()) {
0174: loadProperties(propFile);
0175:
0176: // Check version number
0177: String versionStr = properties.getProperty(VERSION_KEY);
0178: if (versionStr == null) {
0179: logger
0180: .warn("version missing in TripleStore's properties file");
0181: } else {
0182: try {
0183: int version = Integer.parseInt(versionStr);
0184: if (version < 10) {
0185: throw new SailException(
0186: "Directory contains incompatible triple data");
0187: } else if (version > SCHEME_VERSION) {
0188: throw new SailException(
0189: "Directory contains data that uses a newer data format");
0190: }
0191: } catch (NumberFormatException e) {
0192: logger
0193: .warn("Malformed version number in TripleStore's properties file");
0194: }
0195: }
0196: }
0197:
0198: Set<String> indexSpecs = parseIndexSpecList(indexSpecStr);
0199:
0200: if (indexSpecs.isEmpty()) {
0201: // Create default spoc and posc indexes
0202: logger
0203: .info("No indexes specified, defaulting to indexes: spoc, posc");
0204: indexSpecs.add("spoc");
0205: indexSpecs.add("posc");
0206: indexSpecStr = "spoc,posc";
0207: }
0208:
0209: // Initialize added indexes and delete removed ones:
0210: reindex(indexSpecs);
0211:
0212: if (!String.valueOf(SCHEME_VERSION).equals(
0213: properties.getProperty(VERSION_KEY))
0214: || !indexSpecStr.equals(properties
0215: .getProperty(INDEXES_KEY))) {
0216: // Store up-to-date properties
0217: properties.setProperty(VERSION_KEY, String
0218: .valueOf(SCHEME_VERSION));
0219: properties.setProperty(INDEXES_KEY, indexSpecStr);
0220: storeProperties(propFile);
0221: }
0222:
0223: // Create specified indexes
0224: indexes = new TripleIndex[indexSpecs.size()];
0225: int i = 0;
0226: for (String fieldSeq : indexSpecs) {
0227: logger.debug("Activating index '" + fieldSeq + "'...");
0228: indexes[i++] = new TripleIndex(fieldSeq);
0229: }
0230: }
0231:
0232: /*---------*
0233: * Methods *
0234: *---------*/
0235:
0236: /**
0237: * Parses a comma/whitespace-separated list of index specifications. Index
0238: * specifications are required to consists of 4 characters: 's', 'p', 'o' and
0239: * 'c'.
0240: *
0241: * @param indexSpecStr
0242: * A string like "spoc, pocs, cosp".
0243: * @return A Set containing the parsed index specifications.
0244: */
0245: private Set<String> parseIndexSpecList(String indexSpecStr)
0246: throws SailException {
0247: Set<String> indexes = new HashSet<String>();
0248:
0249: if (indexSpecStr != null) {
0250: StringTokenizer tok = new StringTokenizer(indexSpecStr,
0251: ", \t");
0252: while (tok.hasMoreTokens()) {
0253: String index = tok.nextToken().toLowerCase();
0254:
0255: // sanity checks
0256: if (index.length() != 4 || index.indexOf('s') == -1
0257: || index.indexOf('p') == -1
0258: || index.indexOf('o') == -1
0259: || index.indexOf('c') == -1) {
0260: throw new SailException("invalid value '" + index
0261: + "' in index specification: "
0262: + indexSpecStr);
0263: }
0264:
0265: indexes.add(index);
0266: }
0267: }
0268:
0269: return indexes;
0270: }
0271:
0272: private void reindex(Set<String> newIndexSpecs) throws IOException,
0273: SailException {
0274: // Check if the index specification has changed and update indexes if
0275: // necessary
0276: String currentIndexSpecStr = properties
0277: .getProperty(INDEXES_KEY);
0278: if (currentIndexSpecStr == null) {
0279: return;
0280: }
0281:
0282: Set<String> currentIndexSpecs = parseIndexSpecList(currentIndexSpecStr);
0283:
0284: if (currentIndexSpecs.isEmpty()) {
0285: throw new SailException(
0286: "Invalid index specification found in index properties");
0287: }
0288:
0289: // Determine the set of newly added indexes
0290: Set<String> addedIndexSpecs = new HashSet<String>(newIndexSpecs);
0291: addedIndexSpecs.removeAll(currentIndexSpecs);
0292:
0293: if (!addedIndexSpecs.isEmpty()) {
0294: // Initialize new indexes using an existing index as source
0295: String sourceIndexSpec = currentIndexSpecs.iterator()
0296: .next();
0297: TripleIndex sourceIndex = new TripleIndex(sourceIndexSpec);
0298:
0299: try {
0300: for (String fieldSeq : addedIndexSpecs) {
0301: logger.debug("Initializing new index '" + fieldSeq
0302: + "'...");
0303:
0304: TripleIndex addedIndex = new TripleIndex(fieldSeq);
0305: BTree addedBTree = addedIndex.getBTree();
0306:
0307: RecordIterator sourceIter = sourceIndex.getBTree()
0308: .iterateAll();
0309: try {
0310: byte[] value = null;
0311: while ((value = sourceIter.next()) != null) {
0312: addedBTree.insert(value);
0313: }
0314: } finally {
0315: sourceIter.close();
0316: }
0317:
0318: addedBTree.close();
0319: }
0320:
0321: logger.debug("New index(es) initialized");
0322: } finally {
0323: sourceIndex.getBTree().close();
0324: }
0325: }
0326:
0327: // Determine the set of removed indexes
0328: Set<String> removedIndexSpecs = new HashSet<String>(
0329: currentIndexSpecs);
0330: removedIndexSpecs.removeAll(newIndexSpecs);
0331:
0332: // Delete files for removed indexes
0333: for (String fieldSeq : removedIndexSpecs) {
0334: boolean deleted = getIndexFile(fieldSeq).delete();
0335:
0336: if (deleted) {
0337: logger.debug("Deleted file for removed " + fieldSeq
0338: + " index");
0339: } else {
0340: logger.warn("Unable to delete file for removed "
0341: + fieldSeq + " index");
0342: }
0343: }
0344: }
0345:
0346: public void close() throws IOException {
0347: for (int i = 0; i < indexes.length; i++) {
0348: indexes[i].getBTree().close();
0349: }
0350: indexes = null;
0351: }
0352:
0353: public RecordIterator getTriples(int subj, int pred, int obj,
0354: int context) throws IOException {
0355: // Return all triples except those that were added but not yet committed
0356: return getTriples(subj, pred, obj, context, 0, ADDED_FLAG);
0357: }
0358:
0359: public RecordIterator getTriples(int subj, int pred, int obj,
0360: int context, boolean readTransaction) throws IOException {
0361: if (readTransaction) {
0362: // Don't read removed statements
0363: return getTriples(subj, pred, obj, context, 0,
0364: TripleStore.REMOVED_FLAG);
0365: } else {
0366: // Don't read added statements
0367: return getTriples(subj, pred, obj, context, 0,
0368: TripleStore.ADDED_FLAG);
0369: }
0370: }
0371:
0372: public RecordIterator getTriples(int subj, int pred, int obj,
0373: int context, boolean explicit, boolean readTransaction)
0374: throws IOException {
0375: int flags = 0;
0376: int flagsMask = 0;
0377:
0378: if (readTransaction) {
0379: flagsMask |= TripleStore.REMOVED_FLAG;
0380: // 'explicit' is handled through an ExplicitStatementFilter
0381: } else {
0382: flagsMask |= TripleStore.ADDED_FLAG;
0383:
0384: if (explicit) {
0385: flags |= TripleStore.EXPLICIT_FLAG;
0386: flagsMask |= TripleStore.EXPLICIT_FLAG;
0387: }
0388: }
0389:
0390: RecordIterator btreeIter = getTriples(subj, pred, obj, context,
0391: flags, flagsMask);
0392:
0393: if (readTransaction && explicit) {
0394: // Filter implicit statements from the result
0395: btreeIter = new ExplicitStatementFilter(btreeIter);
0396: }
0397:
0398: return btreeIter;
0399: }
0400:
0401: /*-------------------------------------*
0402: * Inner class ExplicitStatementFilter *
0403: *-------------------------------------*/
0404:
0405: private static class ExplicitStatementFilter implements
0406: RecordIterator {
0407:
0408: private final RecordIterator wrappedIter;
0409:
0410: public ExplicitStatementFilter(RecordIterator wrappedIter) {
0411: this .wrappedIter = wrappedIter;
0412: }
0413:
0414: public byte[] next() throws IOException {
0415: byte[] result;
0416:
0417: while ((result = wrappedIter.next()) != null) {
0418: byte flags = result[TripleStore.FLAG_IDX];
0419: boolean explicit = (flags & TripleStore.EXPLICIT_FLAG) != 0;
0420: boolean toggled = (flags & TripleStore.TOGGLE_EXPLICIT_FLAG) != 0;
0421:
0422: if (explicit != toggled) {
0423: // Statement is either explicit and hasn't been toggled, or vice
0424: // versa
0425: break;
0426: }
0427: }
0428:
0429: return result;
0430: }
0431:
0432: public void set(byte[] value) throws IOException {
0433: wrappedIter.set(value);
0434: }
0435:
0436: public void close() throws IOException {
0437: wrappedIter.close();
0438: }
0439: } // end inner class ExplicitStatementFilter
0440:
0441: private RecordIterator getTriples(int subj, int pred, int obj,
0442: int context, int flags, int flagsMask) throws IOException {
0443: return getTriples(subj, pred, obj, context, flags, flagsMask,
0444: indexes);
0445: }
0446:
0447: private RecordIterator getTriples(int subj, int pred, int obj,
0448: int context, int flags, int flagsMask,
0449: TripleIndex... indexes) throws IOException {
0450: // Get best matching index
0451: int bestScore = -1;
0452: TripleIndex bestIndex = null;
0453: for (int i = 0; i < indexes.length; i++) {
0454: int score = indexes[i].getPatternScore(subj, pred, obj,
0455: context);
0456: if (score > bestScore) {
0457: bestScore = score;
0458: bestIndex = indexes[i];
0459: }
0460: }
0461:
0462: byte[] searchKey = getSearchKey(subj, pred, obj, context, flags);
0463: byte[] searchMask = getSearchMask(subj, pred, obj, context,
0464: flagsMask);
0465:
0466: if (bestScore > 0) {
0467: // Use ranged search
0468: byte[] minValue = getMinValue(subj, pred, obj, context);
0469: byte[] maxValue = getMaxValue(subj, pred, obj, context);
0470:
0471: return bestIndex.getBTree().iterateRangedValues(searchKey,
0472: searchMask, minValue, maxValue);
0473: } else {
0474: // Use sequential scan
0475: return bestIndex.getBTree().iterateValues(searchKey,
0476: searchMask);
0477: }
0478: }
0479:
0480: public void clear() throws IOException {
0481: for (int i = 0; i < indexes.length; i++) {
0482: indexes[i].getBTree().clear();
0483: }
0484: }
0485:
0486: public boolean storeTriple(int subj, int pred, int obj, int context)
0487: throws IOException {
0488: return storeTriple(subj, pred, obj, context, true);
0489: }
0490:
0491: public boolean storeTriple(int subj, int pred, int obj,
0492: int context, boolean explicit) throws IOException {
0493: boolean result = false;
0494:
0495: byte[] data = getData(subj, pred, obj, context, 0);
0496: byte[] storedData = indexes[0].getBTree().get(data);
0497:
0498: if (storedData == null) {
0499: // Statement does not yet exist
0500: data[FLAG_IDX] |= ADDED_FLAG;
0501: if (explicit) {
0502: data[FLAG_IDX] |= EXPLICIT_FLAG;
0503: }
0504:
0505: result = true;
0506: txnAddedTriples = true;
0507: } else {
0508: // Statement already exists, only modify its flags, see txn-flags.txt
0509: // for a description of the flag transformations
0510: byte flags = storedData[FLAG_IDX];
0511: boolean isExplicit = (flags & EXPLICIT_FLAG) != 0;
0512: boolean added = (flags & ADDED_FLAG) != 0;
0513: boolean removed = (flags & REMOVED_FLAG) != 0;
0514: boolean toggled = (flags & TOGGLE_EXPLICIT_FLAG) != 0;
0515:
0516: if (added) {
0517: // Statement has been added in the current transaction and is
0518: // invisible to other connections
0519: data[FLAG_IDX] |= ADDED_FLAG;
0520: if (explicit || isExplicit) {
0521: data[FLAG_IDX] |= EXPLICIT_FLAG;
0522: }
0523: } else {
0524: // Committed statement, must keep explicit flag the same
0525: if (isExplicit) {
0526: data[FLAG_IDX] |= EXPLICIT_FLAG;
0527: }
0528:
0529: if (explicit) {
0530: if (!isExplicit) {
0531: // Make inferred statement explicit
0532: data[FLAG_IDX] |= TOGGLE_EXPLICIT_FLAG;
0533: }
0534: } else {
0535: if (removed) {
0536: if (isExplicit) {
0537: // Re-add removed explicit statement as inferred
0538: data[FLAG_IDX] |= TOGGLE_EXPLICIT_FLAG;
0539: }
0540: } else if (toggled) {
0541: data[FLAG_IDX] |= TOGGLE_EXPLICIT_FLAG;
0542: }
0543: }
0544: }
0545:
0546: // Statement is new when it used to be removed
0547: result = removed;
0548: }
0549:
0550: if (storedData == null || !Arrays.equals(data, storedData)) {
0551: for (TripleIndex index : indexes) {
0552: index.getBTree().insert(data);
0553: }
0554:
0555: updatedTriplesCache.storeRecord(data);
0556: }
0557:
0558: return result;
0559: }
0560:
0561: public int removeTriples(int subj, int pred, int obj, int context)
0562: throws IOException {
0563: RecordIterator iter = getTriples(subj, pred, obj, context, 0, 0);
0564: return removeTriples(iter);
0565: }
0566:
0567: /**
0568: * @param subj
0569: * The subject for the pattern, or <tt>-1</tt> for a wildcard.
0570: * @param pred
0571: * The predicate for the pattern, or <tt>-1</tt> for a wildcard.
0572: * @param obj
0573: * The object for the pattern, or <tt>-1</tt> for a wildcard.
0574: * @param context
0575: * The context for the pattern, or <tt>-1</tt> for a wildcard.
0576: * @param explicit
0577: * Flag indicating whether explicit or inferred statements should be
0578: * removed; <tt>true</tt> removes explicit statements that match the
0579: * pattern, <tt>false</tt> removes inferred statements that match
0580: * the pattern.
0581: * @return The number of triples that were removed.
0582: * @throws IOException
0583: */
0584: public int removeTriples(int subj, int pred, int obj, int context,
0585: boolean explicit) throws IOException {
0586: byte flags = explicit ? EXPLICIT_FLAG : 0;
0587: RecordIterator iter = getTriples(subj, pred, obj, context,
0588: flags, EXPLICIT_FLAG);
0589: return removeTriples(iter);
0590: }
0591:
0592: private int removeTriples(RecordIterator iter) throws IOException {
0593: byte[] data = iter.next();
0594:
0595: if (data == null) {
0596: // no discarded triples
0597: return 0;
0598: }
0599:
0600: int count = 0;
0601:
0602: // Store the values that need to be removed in a tmp file and then
0603: // iterate over this file to set the REMOVED flag
0604: RecordCache removedTriplesCache = new SequentialRecordCache(
0605: dir, RECORD_LENGTH);
0606: try {
0607: while (data != null) {
0608: if ((data[FLAG_IDX] & REMOVED_FLAG) == 0) {
0609: data[FLAG_IDX] |= REMOVED_FLAG;
0610: removedTriplesCache.storeRecord(data);
0611: }
0612: data = iter.next();
0613: }
0614: iter.close();
0615:
0616: count = (int) removedTriplesCache.getRecordCount();
0617: updatedTriplesCache.storeRecords(removedTriplesCache);
0618:
0619: for (TripleIndex index : indexes) {
0620: BTree btree = index.getBTree();
0621:
0622: RecordIterator recIter = removedTriplesCache
0623: .getRecords();
0624: try {
0625: while ((data = recIter.next()) != null) {
0626: btree.insert(data);
0627: }
0628: } finally {
0629: recIter.close();
0630: }
0631: }
0632: } finally {
0633: removedTriplesCache.discard();
0634: }
0635:
0636: if (count > 0) {
0637: txnRemovedTriples = true;
0638: }
0639:
0640: return count;
0641: }
0642:
0643: private void discardTriples(RecordIterator iter) throws IOException {
0644: byte[] data = iter.next();
0645:
0646: if (data == null) {
0647: // no discarded triples
0648: return;
0649: }
0650:
0651: // Store the values that need to be discarded in a tmp file and then
0652: // iterate over this file to discard the values
0653: RecordCache recordCache = new SequentialRecordCache(dir,
0654: RECORD_LENGTH);
0655: try {
0656: while (data != null) {
0657: recordCache.storeRecord(data);
0658: data = iter.next();
0659: }
0660: iter.close();
0661:
0662: for (TripleIndex index : indexes) {
0663: BTree btree = index.getBTree();
0664:
0665: RecordIterator recIter = recordCache.getRecords();
0666: try {
0667: while ((data = recIter.next()) != null) {
0668: btree.remove(data);
0669: }
0670: } finally {
0671: recIter.close();
0672: }
0673: }
0674: } finally {
0675: recordCache.discard();
0676: }
0677: }
0678:
0679: public void startTransaction() throws IOException {
0680: // Create a record cache for storing updated triples with a maximum of
0681: // some 10% of the number of triples
0682: long maxRecords = indexes[0].getBTree().getValueCountEstimate() / 10L;
0683: updatedTriplesCache = new SortedRecordCache(dir, RECORD_LENGTH,
0684: maxRecords, new TripleComparator("spoc"));
0685: }
0686:
0687: public void commit() throws IOException {
0688: if (txnRemovedTriples) {
0689: RecordIterator iter = getTriples(-1, -1, -1, -1,
0690: REMOVED_FLAG, REMOVED_FLAG);
0691: try {
0692: discardTriples(iter);
0693: } finally {
0694: txnRemovedTriples = false;
0695: iter.close();
0696: }
0697: }
0698:
0699: boolean validCache = updatedTriplesCache.isValid();
0700:
0701: for (TripleIndex index : indexes) {
0702: BTree btree = index.getBTree();
0703:
0704: RecordIterator iter;
0705: if (validCache) {
0706: // Use the cached set of updated triples
0707: iter = updatedTriplesCache.getRecords();
0708: } else {
0709: // Cache is invalid; too much updates(?). Iterate over all triples
0710: iter = btree.iterateAll();
0711: }
0712:
0713: try {
0714: byte[] data = null;
0715: while ((data = iter.next()) != null) {
0716: byte flags = data[FLAG_IDX];
0717: boolean added = (flags & ADDED_FLAG) != 0;
0718: boolean removed = (flags & REMOVED_FLAG) != 0;
0719: boolean toggled = (flags & TOGGLE_EXPLICIT_FLAG) != 0;
0720:
0721: if (removed) {
0722: // Record has been discarded earlier, do not put it back in!
0723: continue;
0724: }
0725:
0726: if (added || toggled) {
0727: if (toggled) {
0728: data[FLAG_IDX] ^= EXPLICIT_FLAG;
0729: }
0730: if (added) {
0731: data[FLAG_IDX] ^= ADDED_FLAG;
0732: }
0733:
0734: if (validCache) {
0735: // We're iterating the cache
0736: btree.insert(data);
0737: } else {
0738: // We're iterating the BTree itself
0739: iter.set(data);
0740: }
0741: }
0742: }
0743: } finally {
0744: iter.close();
0745: }
0746: }
0747:
0748: updatedTriplesCache.discard();
0749: updatedTriplesCache = null;
0750:
0751: sync();
0752: }
0753:
0754: public void rollback() throws IOException {
0755: if (txnAddedTriples) {
0756: RecordIterator iter = getTriples(-1, -1, -1, -1,
0757: ADDED_FLAG, ADDED_FLAG);
0758: try {
0759: discardTriples(iter);
0760: } finally {
0761: txnAddedTriples = false;
0762: iter.close();
0763: }
0764: }
0765:
0766: boolean validCache = updatedTriplesCache.isValid();
0767:
0768: byte txnFlagsMask = ~(ADDED_FLAG | REMOVED_FLAG | TOGGLE_EXPLICIT_FLAG);
0769:
0770: for (TripleIndex index : indexes) {
0771: BTree btree = index.getBTree();
0772:
0773: RecordIterator iter;
0774: if (validCache) {
0775: // Use the cached set of updated triples
0776: iter = updatedTriplesCache.getRecords();
0777: } else {
0778: // Cache is invalid; too much updates(?). Iterate over all triples
0779: iter = btree.iterateAll();
0780: }
0781:
0782: try {
0783: byte[] data = null;
0784: while ((data = iter.next()) != null) {
0785: byte flags = data[FLAG_IDX];
0786: boolean removed = (flags & REMOVED_FLAG) != 0;
0787: boolean toggled = (flags & TOGGLE_EXPLICIT_FLAG) != 0;
0788:
0789: if (removed || toggled) {
0790: data[FLAG_IDX] &= txnFlagsMask;
0791:
0792: if (validCache) {
0793: // We're iterating the cache
0794: btree.insert(data);
0795: } else {
0796: // We're iterating the BTree itself
0797: iter.set(data);
0798: }
0799: }
0800: }
0801: } finally {
0802: iter.close();
0803: }
0804: }
0805:
0806: updatedTriplesCache.discard();
0807: updatedTriplesCache = null;
0808:
0809: sync();
0810: }
0811:
0812: protected void sync() throws IOException {
0813: for (int i = 0; i < indexes.length; i++) {
0814: indexes[i].getBTree().sync();
0815: }
0816: }
0817:
0818: private byte[] getData(int subj, int pred, int obj, int context,
0819: int flags) {
0820: byte[] data = new byte[RECORD_LENGTH];
0821:
0822: ByteArrayUtil.putInt(subj, data, SUBJ_IDX);
0823: ByteArrayUtil.putInt(pred, data, PRED_IDX);
0824: ByteArrayUtil.putInt(obj, data, OBJ_IDX);
0825: ByteArrayUtil.putInt(context, data, CONTEXT_IDX);
0826: data[FLAG_IDX] = (byte) flags;
0827:
0828: return data;
0829: }
0830:
0831: private byte[] getSearchKey(int subj, int pred, int obj,
0832: int context, int flags) {
0833: return getData(subj, pred, obj, context, flags);
0834: }
0835:
0836: private byte[] getSearchMask(int subj, int pred, int obj,
0837: int context, int flags) {
0838: byte[] mask = new byte[RECORD_LENGTH];
0839:
0840: if (subj != -1) {
0841: ByteArrayUtil.putInt(0xffffffff, mask, SUBJ_IDX);
0842: }
0843: if (pred != -1) {
0844: ByteArrayUtil.putInt(0xffffffff, mask, PRED_IDX);
0845: }
0846: if (obj != -1) {
0847: ByteArrayUtil.putInt(0xffffffff, mask, OBJ_IDX);
0848: }
0849: if (context != -1) {
0850: ByteArrayUtil.putInt(0xffffffff, mask, CONTEXT_IDX);
0851: }
0852: mask[FLAG_IDX] = (byte) flags;
0853:
0854: return mask;
0855: }
0856:
0857: private byte[] getMinValue(int subj, int pred, int obj, int context) {
0858: byte[] minValue = new byte[RECORD_LENGTH];
0859:
0860: ByteArrayUtil.putInt((subj == -1 ? 0x00000000 : subj),
0861: minValue, SUBJ_IDX);
0862: ByteArrayUtil.putInt((pred == -1 ? 0x00000000 : pred),
0863: minValue, PRED_IDX);
0864: ByteArrayUtil.putInt((obj == -1 ? 0x00000000 : obj), minValue,
0865: OBJ_IDX);
0866: ByteArrayUtil.putInt((context == -1 ? 0x00000000 : context),
0867: minValue, CONTEXT_IDX);
0868: minValue[FLAG_IDX] = (byte) 0;
0869:
0870: return minValue;
0871: }
0872:
0873: private byte[] getMaxValue(int subj, int pred, int obj, int context) {
0874: byte[] maxValue = new byte[RECORD_LENGTH];
0875:
0876: ByteArrayUtil.putInt((subj == -1 ? 0xffffffff : subj),
0877: maxValue, SUBJ_IDX);
0878: ByteArrayUtil.putInt((pred == -1 ? 0xffffffff : pred),
0879: maxValue, PRED_IDX);
0880: ByteArrayUtil.putInt((obj == -1 ? 0xffffffff : obj), maxValue,
0881: OBJ_IDX);
0882: ByteArrayUtil.putInt((context == -1 ? 0xffffffff : context),
0883: maxValue, CONTEXT_IDX);
0884: maxValue[FLAG_IDX] = (byte) 0xff;
0885:
0886: return maxValue;
0887: }
0888:
0889: private File getIndexFile(String fieldSeq) {
0890: return new File(dir, "triples-" + fieldSeq + ".dat");
0891: }
0892:
0893: private void loadProperties(File propFile) throws IOException {
0894: InputStream in = new FileInputStream(propFile);
0895: try {
0896: properties.clear();
0897: properties.load(in);
0898: } finally {
0899: in.close();
0900: }
0901: }
0902:
0903: private void storeProperties(File propFile) throws IOException {
0904: OutputStream out = new FileOutputStream(propFile);
0905: try {
0906: properties.store(out,
0907: "triple indexes meta-data, DO NOT EDIT!");
0908: } finally {
0909: out.close();
0910: }
0911: }
0912:
0913: /*-------------------------*
0914: * Inner class TripleIndex *
0915: *-------------------------*/
0916:
0917: private class TripleIndex {
0918:
0919: private TripleComparator tripleComparator;
0920:
0921: private BTree btree;
0922:
0923: public TripleIndex(String fieldSeq) throws IOException {
0924: tripleComparator = new TripleComparator(fieldSeq);
0925: File btreeFile = getIndexFile(fieldSeq);
0926: btree = new BTree(btreeFile, 2048, RECORD_LENGTH,
0927: tripleComparator, forceSync);
0928: }
0929:
0930: public char[] getFieldSeq() {
0931: return tripleComparator.getFieldSeq();
0932: }
0933:
0934: public File getFile() {
0935: return btree.getFile();
0936: }
0937:
0938: public BTree getBTree() {
0939: return btree;
0940: }
0941:
0942: /**
0943: * Determines the 'score' of this index on the supplied pattern of
0944: * subject, predicate, object and context IDs. The higher the score, the
0945: * better the index is suited for matching the pattern. Lowest score is 0,
0946: * which means that the index will perform a sequential scan.
0947: */
0948: public int getPatternScore(int subj, int pred, int obj,
0949: int context) {
0950: int score = 0;
0951:
0952: for (char field : tripleComparator.getFieldSeq()) {
0953: switch (field) {
0954: case 's':
0955: if (subj >= 0) {
0956: score++;
0957: } else {
0958: return score;
0959: }
0960: break;
0961: case 'p':
0962: if (pred >= 0) {
0963: score++;
0964: } else {
0965: return score;
0966: }
0967: break;
0968: case 'o':
0969: if (obj >= 0) {
0970: score++;
0971: } else {
0972: return score;
0973: }
0974: case 'c':
0975: if (context >= 0) {
0976: score++;
0977: } else {
0978: return score;
0979: }
0980: }
0981: }
0982:
0983: return score;
0984: }
0985: }
0986:
0987: /*------------------------------*
0988: * Inner class TripleComparator *
0989: *------------------------------*/
0990:
0991: /**
0992: * A RecordComparator that can be used to create indexes with a configurable
0993: * order of the subject, predicate, object and context fields.
0994: */
0995: private static class TripleComparator implements RecordComparator {
0996:
0997: private char[] fieldSeq;
0998:
0999: public TripleComparator(String fieldSeq) {
1000: this .fieldSeq = fieldSeq.toCharArray();
1001: }
1002:
1003: public char[] getFieldSeq() {
1004: return fieldSeq;
1005: }
1006:
1007: public final int compareBTreeValues(byte[] key, byte[] data,
1008: int offset, int length) {
1009: for (char field : fieldSeq) {
1010: int fieldIdx = 0;
1011:
1012: switch (field) {
1013: case 's':
1014: fieldIdx = SUBJ_IDX;
1015: break;
1016: case 'p':
1017: fieldIdx = PRED_IDX;
1018: break;
1019: case 'o':
1020: fieldIdx = OBJ_IDX;
1021: break;
1022: case 'c':
1023: fieldIdx = CONTEXT_IDX;
1024: break;
1025: default:
1026: throw new IllegalArgumentException(
1027: "invalid character '" + field
1028: + "' in field sequence: "
1029: + new String(fieldSeq));
1030: }
1031:
1032: int diff = ByteArrayUtil.compareRegion(key, fieldIdx,
1033: data, offset + fieldIdx, 4);
1034:
1035: if (diff != 0) {
1036: return diff;
1037: }
1038: }
1039:
1040: return 0;
1041: }
1042: }
1043: }
|