0001: /*
0002: * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
0003: *
0004: * Licensed under the Aduna BSD-style license.
0005: */
0006: package org.openrdf.sail.nativerdf.btree;
0007:
0008: import java.io.File;
0009: import java.io.IOException;
0010: import java.io.PrintStream;
0011: import java.io.RandomAccessFile;
0012: import java.nio.ByteBuffer;
0013: import java.nio.channels.FileChannel;
0014: import java.util.Arrays;
0015: import java.util.BitSet;
0016: import java.util.Iterator;
0017: import java.util.LinkedList;
0018:
0019: import info.aduna.io.ByteArrayUtil;
0020:
0021: /**
0022: * Implementation of an on-disk B-Tree using the <tt>java.nio</tt> classes
0023: * that are available in JDK 1.4 and newer. Documentation about B-Trees can be
0024: * found on-line at the following URLs:
0025: * <ul>
0026: * <li>http://cis.stvincent.edu/swd/btree/btree.html</li>,
0027: * <li>http://bluerwhite.org/btree/</li>, and
0028: * <li>http://semaphorecorp.com/btp/algo.html</li>.
0029: * </ul>
0030: * The first reference was used to implement this class.
0031: * <p>
0032: * TODO: clean up code
0033: *
0034: * @author Arjohn Kampman
0035: */
0036: public class BTree {
0037:
0038: /*-----------------*
0039: * Class constants *
0040: *-----------------*/
0041:
0042: private static final int FILE_FORMAT_VERSION = 1;
0043:
0044: private static final int HEADER_LENGTH = 16;
0045:
0046: private static final int MRU_CACHE_SIZE = 8;
0047:
0048: /*-----------*
0049: * Constants *
0050: *-----------*/
0051:
0052: /**
0053: * The BTree file.
0054: */
0055: private final File file;
0056:
0057: /**
0058: * A RandomAccessFile used to open and close a read/write FileChannel on the
0059: * BTree file.
0060: */
0061: private final RandomAccessFile raf;
0062:
0063: /**
0064: * The file channel used to read and write data from/to the BTree file.
0065: */
0066: private final FileChannel fileChannel;
0067:
0068: /**
0069: * Flag indicating whether file writes should be forced to disk using
0070: * {@link FileChannel#force(boolean)}.
0071: */
0072: private final boolean forceSync;
0073:
0074: /**
0075: * Object used to determine whether one value is lower, equal or greater than
0076: * another value. This determines the order of values in the BTree.
0077: */
0078: private final RecordComparator comparator;
0079:
0080: /**
0081: * List containing nodes that are currently "in use", used for caching.
0082: */
0083: private final LinkedList<Node> nodesInUse = new LinkedList<Node>();
0084:
0085: /**
0086: * List containing the most recently released nodes, used for caching.
0087: */
0088: private final LinkedList<Node> mruNodes = new LinkedList<Node>();;
0089:
0090: /**
0091: * Bit set recording which nodes have been allocated, using node IDs as
0092: * index.
0093: */
0094: private final BitSet allocatedNodes = new BitSet();
0095:
0096: /**
0097: * Flag indicating whether {@link #allocatedNodes} has been initialized.
0098: */
0099: private boolean allocatedNodesInitialized = false;
0100:
0101: // Stored or specified properties //
0102:
0103: /**
0104: * The block size to use for calculating BTree node size. For optimal
0105: * performance, the specified block size should be equal to the file system's
0106: * block size.
0107: */
0108: private final int blockSize;
0109:
0110: /**
0111: * The size of the values (byte arrays) in this BTree.
0112: */
0113: private final int valueSize;
0114:
0115: // Derived properties //
0116:
0117: /**
0118: * The size of a slot storing a node ID and a value.
0119: */
0120: private final int slotSize;
0121:
0122: /**
0123: * The maximum number of outgoing branches for a node.
0124: */
0125: private final int branchFactor;
0126:
0127: /**
0128: * The minimum number of values for a node (except for the root).
0129: */
0130: private final int minValueCount;
0131:
0132: /**
0133: * The size of a node in bytes.
0134: */
0135: private final int nodeSize;
0136:
0137: /*-----------*
0138: * Variables *
0139: *-----------*/
0140:
0141: /**
0142: * The ID of the root node, <tt>0</tt> to indicate that there is no root
0143: * node (i.e. the BTree is empty).
0144: */
0145: private int rootNodeID;
0146:
0147: /**
0148: * The highest ID number of the nodes in this BTree.
0149: */
0150: private int maxNodeID;
0151:
0152: /*--------------*
0153: * Constructors *
0154: *--------------*/
0155:
0156: /**
0157: * Creates a new BTree that uses an instance of
0158: * <tt>DefaultRecordComparator</tt> to compare values.
0159: *
0160: * @param dataFile
0161: * The file for the B-Tree.
0162: * @param blockSize
0163: * The size (in bytes) of a file block for a single node. Ideally, the
0164: * size specified is the size of a block in the used file system.
0165: * @param valueSize
0166: * The size (in bytes) of the fixed-length values that are or will be
0167: * stored in the B-Tree.
0168: * @throws IOException
0169: * In case the initialization of the B-Tree file failed.
0170: * @see DefaultRecordComparator
0171: */
0172: public BTree(File dataFile, int blockSize, int valueSize)
0173: throws IOException {
0174: this (dataFile, blockSize, valueSize, false);
0175: }
0176:
0177: /**
0178: * Creates a new BTree that uses an instance of
0179: * <tt>DefaultRecordComparator</tt> to compare values.
0180: *
0181: * @param dataFile
0182: * The file for the B-Tree.
0183: * @param blockSize
0184: * The size (in bytes) of a file block for a single node. Ideally, the
0185: * size specified is the size of a block in the used file system.
0186: * @param valueSize
0187: * The size (in bytes) of the fixed-length values that are or will be
0188: * stored in the B-Tree.
0189: * @param forceSync
0190: * Flag indicating whether updates should be synced to disk forcefully
0191: * by calling {@link FileChannel#force(boolean)}. This may have a
0192: * severe impact on write performance.
0193: * @throws IOException
0194: * In case the initialization of the B-Tree file failed.
0195: * @see DefaultRecordComparator
0196: */
0197: public BTree(File dataFile, int blockSize, int valueSize,
0198: boolean forceSync) throws IOException {
0199: this (dataFile, blockSize, valueSize,
0200: new DefaultRecordComparator(), forceSync);
0201: }
0202:
0203: /**
0204: * Creates a new BTree that uses the supplied <tt>RecordComparator</tt> to
0205: * compare the values that are or will be stored in the B-Tree.
0206: *
0207: * @param dataFile
0208: * The file for the B-Tree.
0209: * @param blockSize
0210: * The size (in bytes) of a file block for a single node. Ideally, the
0211: * size specified is the size of a block in the used file system.
0212: * @param valueSize
0213: * The size (in bytes) of the fixed-length values that are or will be
0214: * stored in the B-Tree.
0215: * @param comparator
0216: * The <tt>RecordComparator</tt> to use for determining whether one
0217: * value is smaller, larger or equal to another.
0218: * @throws IOException
0219: * In case the initialization of the B-Tree file failed.
0220: */
0221: public BTree(File dataFile, int blockSize, int valueSize,
0222: RecordComparator comparator) throws IOException {
0223: this (dataFile, blockSize, valueSize, comparator, false);
0224: }
0225:
0226: /**
0227: * Creates a new BTree that uses the supplied <tt>RecordComparator</tt> to
0228: * compare the values that are or will be stored in the B-Tree.
0229: *
0230: * @param dataFile
0231: * The file for the B-Tree.
0232: * @param blockSize
0233: * The size (in bytes) of a file block for a single node. Ideally, the
0234: * size specified is the size of a block in the used file system.
0235: * @param valueSize
0236: * The size (in bytes) of the fixed-length values that are or will be
0237: * stored in the B-Tree.
0238: * @param comparator
0239: * The <tt>RecordComparator</tt> to use for determining whether one
0240: * value is smaller, larger or equal to another.
0241: * @param forceSync
0242: * Flag indicating whether updates should be synced to disk forcefully
0243: * by calling {@link FileChannel#force(boolean)}. This may have a
0244: * severe impact on write performance.
0245: * @throws IOException
0246: * In case the initialization of the B-Tree file failed.
0247: */
0248: public BTree(File dataFile, int blockSize, int valueSize,
0249: RecordComparator comparator, boolean forceSync)
0250: throws IOException {
0251: if (dataFile == null) {
0252: throw new IllegalArgumentException(
0253: "dataFile must not be null");
0254: }
0255: if (blockSize < HEADER_LENGTH) {
0256: throw new IllegalArgumentException(
0257: "block size must be at least " + HEADER_LENGTH
0258: + " bytes");
0259: }
0260: if (valueSize <= 0) {
0261: throw new IllegalArgumentException(
0262: "value size must be larger than 0");
0263: }
0264: if (blockSize < 3 * valueSize + 20) {
0265: throw new IllegalArgumentException(
0266: "block size to small; must at least be able to store three values");
0267: }
0268: if (comparator == null) {
0269: throw new IllegalArgumentException(
0270: "comparator muts not be null");
0271: }
0272:
0273: this .file = dataFile;
0274: this .comparator = comparator;
0275: this .forceSync = forceSync;
0276:
0277: if (!file.exists()) {
0278: boolean created = file.createNewFile();
0279: if (!created) {
0280: throw new IOException("Failed to create file: " + file);
0281: }
0282: }
0283:
0284: // Open a read/write channel to the file
0285: raf = new RandomAccessFile(file, "rw");
0286: fileChannel = raf.getChannel();
0287:
0288: if (fileChannel.size() == 0L) {
0289: // Empty file, initialize it with the specified parameters
0290: this .blockSize = blockSize;
0291: this .valueSize = valueSize;
0292: this .rootNodeID = 0;
0293: this .maxNodeID = 0;
0294:
0295: writeFileHeader();
0296:
0297: sync();
0298: } else {
0299: // Read parameters from file
0300: ByteBuffer buf = ByteBuffer.allocate(HEADER_LENGTH);
0301: fileChannel.read(buf, 0L);
0302:
0303: buf.rewind();
0304:
0305: @SuppressWarnings("unused")
0306: int fileFormatVersion = buf.getInt();
0307: this .blockSize = buf.getInt();
0308: this .valueSize = buf.getInt();
0309: this .rootNodeID = buf.getInt();
0310:
0311: // Verify that the value sizes match
0312: if (this .valueSize != valueSize) {
0313: throw new IOException(
0314: "Specified value size ("
0315: + valueSize
0316: + ") is different from what is stored on disk ("
0317: + this .valueSize + ")");
0318: }
0319: }
0320:
0321: // Calculate derived properties
0322: slotSize = 4 + this .valueSize;
0323: branchFactor = 1 + (this .blockSize - 8) / slotSize;
0324: // bf=30 --> mvc=14; bf=29 --> mvc=14
0325: minValueCount = (branchFactor - 1) / 2;
0326: nodeSize = 8 + (branchFactor - 1) * slotSize;
0327: maxNodeID = Math.max(0, offset2nodeID(fileChannel.size()
0328: - nodeSize));
0329:
0330: // System.out.println("blockSize=" + this.blockSize);
0331: // System.out.println("valueSize=" + this.valueSize);
0332: // System.out.println("slotSize=" + this.slotSize);
0333: // System.out.println("branchFactor=" + this.branchFactor);
0334: // System.out.println("minimum value count=" + this.minValueCount);
0335: // System.out.println("nodeSize=" + this.nodeSize);
0336: }
0337:
0338: /*---------*
0339: * Methods *
0340: *---------*/
0341:
0342: /**
0343: * Gets the file that this BTree operates on.
0344: */
0345: public File getFile() {
0346: return file;
0347: }
0348:
0349: /**
0350: * Closes any opened files and release any resources used by this B-Tree. Any
0351: * pending changes will be synchronized to disk before closing. Once the
0352: * B-Tree has been closes, it can no longer be used.
0353: */
0354: public void close() throws IOException {
0355: sync();
0356:
0357: synchronized (nodesInUse) {
0358: nodesInUse.clear();
0359: }
0360: synchronized (mruNodes) {
0361: mruNodes.clear();
0362: }
0363:
0364: raf.close();
0365: }
0366:
0367: /**
0368: * Writes any changes that are cached in memory to disk.
0369: *
0370: * @throws IOException
0371: */
0372: public void sync() throws IOException {
0373: // Write any changed nodes that still reside in the cache to disk
0374: synchronized (nodesInUse) {
0375: for (Node node : nodesInUse) {
0376: if (node.dataChanged()) {
0377: node.write();
0378: }
0379: }
0380: }
0381:
0382: synchronized (mruNodes) {
0383: for (Node node : mruNodes) {
0384: if (node.dataChanged()) {
0385: node.write();
0386: }
0387: }
0388: }
0389:
0390: if (forceSync) {
0391: fileChannel.force(false);
0392: }
0393: }
0394:
0395: /**
0396: * Gets the value that matches the specified key.
0397: *
0398: * @param key
0399: * A value that is equal to the value that should be retrieved, at
0400: * least as far as the RecordComparator of this BTree is concerned.
0401: * @return The value matching the key, or <tt>null</tt> if no such value
0402: * could be found.
0403: */
0404: public byte[] get(byte[] key) throws IOException {
0405: if (rootNodeID == 0) {
0406: // Empty BTree
0407: return null;
0408: }
0409:
0410: Node node = readNode(rootNodeID);
0411:
0412: while (true) {
0413: int valueIdx = node.search(key);
0414:
0415: if (valueIdx >= 0) {
0416: // Return matching value
0417: byte[] result = node.getValue(valueIdx);
0418: node.release();
0419: return result;
0420: } else if (!node.isLeaf()) {
0421: // Returned index references the first value that is larger than
0422: // the key, search the child node just left of it (==same index).
0423: Node childNode = node.getChildNode(-valueIdx - 1);
0424: node.release();
0425: node = childNode;
0426: } else {
0427: // value not found
0428: node.release();
0429: return null;
0430: }
0431: }
0432: }
0433:
0434: /**
0435: * Returns an iterator that iterates over all values in this B-Tree.
0436: */
0437: public RecordIterator iterateAll() {
0438: return new RangeIterator(null, null, null, null);
0439: }
0440:
0441: /**
0442: * Returns an iterator that iterates over all values between minValue and
0443: * maxValue, inclusive.
0444: */
0445: public RecordIterator iterateRange(byte[] minValue, byte[] maxValue) {
0446: return new RangeIterator(null, null, minValue, maxValue);
0447: }
0448:
0449: /**
0450: * Returns an iterator that iterates over all values and returns the values
0451: * that match the supplied searchKey after searchMask has been applied to the
0452: * value.
0453: */
0454: public RecordIterator iterateValues(byte[] searchKey,
0455: byte[] searchMask) {
0456: return new RangeIterator(searchKey, searchMask, null, null);
0457: }
0458:
0459: /**
0460: * Returns an iterator that iterates over all values between minValue and
0461: * maxValue (inclusive) and returns the values that match the supplied
0462: * searchKey after searchMask has been applied to the value.
0463: */
0464: public RecordIterator iterateRangedValues(byte[] searchKey,
0465: byte[] searchMask, byte[] minValue, byte[] maxValue) {
0466: return new RangeIterator(searchKey, searchMask, minValue,
0467: maxValue);
0468: }
0469:
0470: /**
0471: * Returns an estimate for the number of values stored in this BTree.
0472: */
0473: public long getValueCountEstimate() throws IOException {
0474: int allocatedNodesCount;
0475:
0476: synchronized (allocatedNodes) {
0477: initAllocatedNodes();
0478: allocatedNodesCount = allocatedNodes.cardinality();
0479: }
0480:
0481: // Assume fill factor of 70%
0482: return (long) (allocatedNodesCount * (branchFactor - 1) * 0.7);
0483: }
0484:
0485: /**
0486: * Inserts the supplied value into the B-Tree. In case an equal value is
0487: * already present in the B-Tree this value is overwritten with the new value
0488: * and the old value is returned by this method.
0489: *
0490: * @param value
0491: * The value to insert into the B-Tree.
0492: * @return The old value that was replaced, if any.
0493: * @throws IOException
0494: * If an I/O error occurred.
0495: */
0496: public byte[] insert(byte[] value) throws IOException {
0497: Node rootNode = null;
0498:
0499: if (rootNodeID == 0) {
0500: // Empty B-Tree, create a root node
0501: rootNode = createNewNode();
0502: rootNodeID = rootNode.getID();
0503: writeFileHeader();
0504: } else {
0505: rootNode = readNode(rootNodeID);
0506: }
0507:
0508: InsertResult insertResult = insertInTree(value, 0, rootNode);
0509:
0510: if (insertResult.overflowValue != null) {
0511: // Root node overflowed, create a new root node and insert overflow
0512: // value-nodeID pair in it
0513: Node newRootNode = createNewNode();
0514: newRootNode.setChildNodeID(0, rootNode.getID());
0515: newRootNode.insertValueNodeIDPair(0,
0516: insertResult.overflowValue,
0517: insertResult.overflowNodeID);
0518:
0519: rootNodeID = newRootNode.getID();
0520: writeFileHeader();
0521: newRootNode.release();
0522: }
0523:
0524: rootNode.release();
0525:
0526: return insertResult.oldValue;
0527: }
0528:
0529: private InsertResult insertInTree(byte[] value, int nodeID,
0530: Node node) throws IOException {
0531: InsertResult insertResult = null;
0532:
0533: // Search value in node
0534: int valueIdx = node.search(value);
0535:
0536: if (valueIdx >= 0) {
0537: // Found an equal value, replace it
0538: insertResult = new InsertResult();
0539: insertResult.oldValue = node.getValue(valueIdx);
0540:
0541: // Do not replace the value if it's identical to the old
0542: // value to prevent possibly unnecessary disk writes
0543: if (!Arrays.equals(value, insertResult.oldValue)) {
0544: node.setValue(valueIdx, value);
0545: }
0546: } else {
0547: // valueIdx references the first value that is larger than the key
0548: valueIdx = -valueIdx - 1;
0549:
0550: if (node.isLeaf()) {
0551: // Leaf node, insert value here
0552: insertResult = insertInNode(value, nodeID, valueIdx,
0553: node);
0554: } else {
0555: // Not a leaf node, insert value in the child node just left of
0556: // the found value (==same index)
0557: Node childNode = node.getChildNode(valueIdx);
0558: insertResult = insertInTree(value, nodeID, childNode);
0559: childNode.release();
0560:
0561: if (insertResult.overflowValue != null) {
0562: // Child node overflowed, insert overflow in this node
0563: byte[] oldValue = insertResult.oldValue;
0564: insertResult = insertInNode(
0565: insertResult.overflowValue,
0566: insertResult.overflowNodeID, valueIdx, node);
0567: insertResult.oldValue = oldValue;
0568: }
0569: }
0570: }
0571:
0572: return insertResult;
0573: }
0574:
0575: private InsertResult insertInNode(byte[] value, int nodeID,
0576: int valueIdx, Node node) throws IOException {
0577: InsertResult insertResult = new InsertResult();
0578:
0579: if (node.isFull()) {
0580: // Leaf node is full and needs to be split
0581: Node newNode = createNewNode();
0582: insertResult.overflowValue = node.splitAndInsert(value,
0583: nodeID, valueIdx, newNode);
0584: insertResult.overflowNodeID = newNode.getID();
0585: newNode.release();
0586: } else {
0587: // Leaf node is not full, simply add the value to it
0588: node.insertValueNodeIDPair(valueIdx, value, nodeID);
0589: }
0590:
0591: return insertResult;
0592: }
0593:
0594: /**
0595: * struct-like class used to represent the result of an insert operation.
0596: */
0597: private static class InsertResult {
0598:
0599: /**
0600: * The old value that has been replaced by the insertion of a new value.
0601: */
0602: byte[] oldValue = null;
0603:
0604: /**
0605: * The value that was removed from a child node due to overflow.
0606: */
0607: byte[] overflowValue = null;
0608:
0609: /**
0610: * The nodeID to the right of 'overflowValue' that was removed from a
0611: * child node due to overflow.
0612: */
0613: int overflowNodeID = 0;
0614: }
0615:
0616: /**
0617: * Removes the value that matches the specified key from the B-Tree.
0618: *
0619: * @param key
0620: * A key that matches the value that should be removed from the
0621: * B-Tree.
0622: * @return The value that was removed from the B-Tree, or <tt>null</tt> if
0623: * no matching value was found.
0624: * @throws IOException
0625: * If an I/O error occurred.
0626: */
0627: public byte[] remove(byte[] key) throws IOException {
0628: byte[] result = null;
0629:
0630: if (rootNodeID != 0) {
0631: Node rootNode = readNode(rootNodeID);
0632:
0633: result = removeFromTree(key, rootNode);
0634:
0635: if (rootNode.isEmpty()) {
0636: // Root node has become empty as a result of the removal
0637: if (rootNode.isLeaf()) {
0638: // Nothing's left
0639: rootNodeID = 0;
0640: } else {
0641: // Collapse B-Tree one level
0642: rootNodeID = rootNode.getChildNodeID(0);
0643: rootNode.setChildNodeID(0, 0);
0644: }
0645:
0646: // Write new root node ID to file header
0647: writeFileHeader();
0648: }
0649:
0650: rootNode.release();
0651: }
0652:
0653: return result;
0654: }
0655:
0656: /**
0657: * Removes the value that matches the specified key from the tree starting at
0658: * the specified node and returns the removed value.
0659: *
0660: * @param key
0661: * A key that matches the value that should be removed from the
0662: * B-Tree.
0663: * @param node
0664: * The root of the (sub) tree.
0665: * @return The value that was removed from the B-Tree, or <tt>null</tt> if
0666: * no matching value was found.
0667: * @throws IOException
0668: * If an I/O error occurred.
0669: */
0670: private byte[] removeFromTree(byte[] key, Node node)
0671: throws IOException {
0672: byte[] value = null;
0673:
0674: // Search key
0675: int valueIdx = node.search(key);
0676:
0677: if (valueIdx >= 0) {
0678: // Found matching value in this node, remove it
0679: if (node.isLeaf()) {
0680: value = node.removeValueRight(valueIdx);
0681: } else {
0682: // Replace the matching value with the smallest value from the right
0683: // child node
0684: value = node.getValue(valueIdx);
0685:
0686: Node rightChildNode = node.getChildNode(valueIdx + 1);
0687: byte[] smallestValue = removeSmallestValueFromTree(rightChildNode);
0688:
0689: node.setValue(valueIdx, smallestValue);
0690:
0691: balanceChildNode(node, rightChildNode, valueIdx + 1);
0692:
0693: rightChildNode.release();
0694: }
0695: } else if (!node.isLeaf()) {
0696: // Recurse into left child node
0697: valueIdx = -valueIdx - 1;
0698: Node childNode = node.getChildNode(valueIdx);
0699: value = removeFromTree(key, childNode);
0700:
0701: balanceChildNode(node, childNode, valueIdx);
0702:
0703: childNode.release();
0704: }
0705:
0706: return value;
0707: }
0708:
0709: /**
0710: * Removes the smallest value from the tree starting at the specified node
0711: * and returns the removed value.
0712: *
0713: * @param node
0714: * The root of the (sub) tree.
0715: * @return The value that was removed from the B-Tree, or <tt>null</tt> if
0716: * the supplied node is empty.
0717: * @throws IOException
0718: * If an I/O error occurred.
0719: */
0720: private byte[] removeSmallestValueFromTree(Node node)
0721: throws IOException {
0722: byte[] value = null;
0723:
0724: if (node.isLeaf()) {
0725: if (!node.isEmpty()) {
0726: value = node.removeValueLeft(0);
0727: }
0728: } else {
0729: // Recurse into left-most child node
0730: Node childNode = node.getChildNode(0);
0731: value = removeSmallestValueFromTree(childNode);
0732: balanceChildNode(node, childNode, 0);
0733: childNode.release();
0734: }
0735:
0736: return value;
0737: }
0738:
0739: private void balanceChildNode(Node parentNode, Node childNode,
0740: int childIdx) throws IOException {
0741: if (childNode.getValueCount() < minValueCount) {
0742: // Child node contains too few values, try to borrow one from its right
0743: // sibling
0744: Node rightSibling = (childIdx < parentNode.getValueCount()) ? parentNode
0745: .getChildNode(childIdx + 1)
0746: : null;
0747:
0748: if (rightSibling != null
0749: && rightSibling.getValueCount() > minValueCount) {
0750: // Right sibling has enough values to give one up
0751: childNode.insertValueNodeIDPair(childNode
0752: .getValueCount(),
0753: parentNode.getValue(childIdx), rightSibling
0754: .getChildNodeID(0));
0755: parentNode.setValue(childIdx, rightSibling
0756: .removeValueLeft(0));
0757: } else {
0758: // Right sibling does not have enough values to give one up, try its
0759: // left sibling
0760: Node leftSibling = (childIdx > 0) ? parentNode
0761: .getChildNode(childIdx - 1) : null;
0762:
0763: if (leftSibling != null
0764: && leftSibling.getValueCount() > minValueCount) {
0765: // Left sibling has enough values to give one up
0766: childNode.insertNodeIDValuePair(0,
0767: leftSibling.getChildNodeID(leftSibling
0768: .getValueCount()), parentNode
0769: .getValue(childIdx - 1));
0770: parentNode.setValue(childIdx - 1, leftSibling
0771: .removeValueRight(leftSibling
0772: .getValueCount() - 1));
0773: } else {
0774: // Both siblings contain the minimum amount of values,
0775: // merge the child node with its left or right sibling
0776: if (leftSibling != null) {
0777: leftSibling.mergeWithRightSibling(parentNode
0778: .removeValueRight(childIdx - 1),
0779: childNode);
0780: } else {
0781: childNode.mergeWithRightSibling(parentNode
0782: .removeValueRight(childIdx),
0783: rightSibling);
0784: }
0785: }
0786:
0787: if (leftSibling != null) {
0788: leftSibling.release();
0789: }
0790: }
0791:
0792: if (rightSibling != null) {
0793: rightSibling.release();
0794: }
0795: }
0796: }
0797:
0798: /**
0799: * Removes all values from the B-Tree.
0800: *
0801: * @throws IOException
0802: * If an I/O error occurred.
0803: */
0804: public void clear() throws IOException {
0805: synchronized (nodesInUse) {
0806: nodesInUse.clear();
0807: }
0808: synchronized (mruNodes) {
0809: mruNodes.clear();
0810: }
0811: fileChannel.truncate(HEADER_LENGTH);
0812:
0813: rootNodeID = 0;
0814: maxNodeID = 0;
0815: allocatedNodes.clear();
0816:
0817: writeFileHeader();
0818: }
0819:
0820: private Node createNewNode() throws IOException {
0821: int newNodeID;
0822:
0823: synchronized (allocatedNodes) {
0824: initAllocatedNodes();
0825: newNodeID = allocatedNodes.nextClearBit(1);
0826: allocatedNodes.set(newNodeID);
0827: }
0828:
0829: if (newNodeID > maxNodeID) {
0830: maxNodeID = newNodeID;
0831: }
0832:
0833: Node node = new Node(newNodeID);
0834: node.use();
0835: synchronized (nodesInUse) {
0836: nodesInUse.add(node);
0837: }
0838: return node;
0839: }
0840:
0841: private Node readNode(int id) throws IOException {
0842: if (id <= 0) {
0843: throw new IllegalArgumentException(
0844: "id must be larger than 0, is: " + id);
0845: }
0846:
0847: // Check nodesInUse list
0848: synchronized (nodesInUse) {
0849: for (Node node : nodesInUse) {
0850: if (node.getID() == id) {
0851: node.use();
0852: return node;
0853: }
0854: }
0855:
0856: // Check mruNodes list
0857: synchronized (mruNodes) {
0858: Iterator<Node> iter = mruNodes.iterator();
0859: while (iter.hasNext()) {
0860: Node node = iter.next();
0861:
0862: if (node.getID() == id) {
0863: iter.remove();
0864: nodesInUse.add(node);
0865:
0866: node.use();
0867: return node;
0868: }
0869: }
0870: }
0871:
0872: // Read node from disk
0873: Node node = new Node(id);
0874: node.read();
0875: nodesInUse.add(node);
0876:
0877: node.use();
0878: return node;
0879: }
0880: }
0881:
0882: private void releaseNode(Node node) throws IOException {
0883: synchronized (nodesInUse) {
0884: nodesInUse.remove(node);
0885:
0886: if (node.isEmpty() && node.isLeaf()) {
0887: // Discard node
0888: synchronized (allocatedNodes) {
0889: initAllocatedNodes();
0890: allocatedNodes.clear(node.id);
0891: }
0892: synchronized (mruNodes) {
0893: mruNodes.remove(node);
0894: }
0895:
0896: node.write();
0897:
0898: if (node.id == maxNodeID) {
0899: // Shrink file
0900: synchronized (allocatedNodes) {
0901: maxNodeID = Math.max(0,
0902: allocatedNodes.length() - 1);
0903: }
0904: fileChannel.truncate(nodeID2offset(maxNodeID)
0905: + nodeSize);
0906: }
0907: } else {
0908: synchronized (mruNodes) {
0909: if (mruNodes.size() >= MRU_CACHE_SIZE) {
0910: // Remove least recently used node
0911: Node lruNode = mruNodes.removeLast();
0912: if (lruNode.dataChanged()) {
0913: lruNode.write();
0914: }
0915: }
0916: mruNodes.addFirst(node);
0917: }
0918: }
0919: }
0920: }
0921:
0922: private void initAllocatedNodes() throws IOException {
0923: if (!allocatedNodesInitialized) {
0924: if (rootNodeID != 0) {
0925: initAllocatedNodes(rootNodeID);
0926: }
0927: allocatedNodesInitialized = true;
0928: }
0929: }
0930:
0931: private void initAllocatedNodes(int nodeID) throws IOException {
0932: allocatedNodes.set(nodeID);
0933:
0934: Node node = readNode(nodeID);
0935:
0936: if (!node.isLeaf()) {
0937: for (int i = 0; i < node.getValueCount() + 1; i++) {
0938: initAllocatedNodes(node.getChildNodeID(i));
0939: }
0940: }
0941:
0942: node.release();
0943: }
0944:
0945: private long nodeID2offset(int id) {
0946: return (long) blockSize * id;
0947: }
0948:
0949: private int offset2nodeID(long offset) {
0950: return (int) (offset / blockSize);
0951: }
0952:
0953: private void writeFileHeader() throws IOException {
0954: ByteBuffer buf = ByteBuffer.allocate(HEADER_LENGTH);
0955: // for backwards compatibility in future versions
0956: buf.putInt(FILE_FORMAT_VERSION);
0957: buf.putInt(blockSize);
0958: buf.putInt(valueSize);
0959: buf.putInt(rootNodeID);
0960:
0961: buf.rewind();
0962:
0963: fileChannel.write(buf, 0L);
0964: }
0965:
0966: /*------------------*
0967: * Inner class Node *
0968: *------------------*/
0969:
0970: private class Node {
0971:
0972: /** This node's ID. */
0973: private int id;
0974:
0975: /** This node's data. */
0976: private byte[] data;
0977:
0978: /** The number of values containined in this node. */
0979: private int valueCount;
0980:
0981: /** The number of objects currently 'using' this node. */
0982: private int usageCount;
0983:
0984: /** Flag indicating whether the contents of data has changed. */
0985: private boolean dataChanged;
0986:
0987: /** Registered listeners that want to be notified of changes to the node. */
0988: private LinkedList<NodeListener> listeners = new LinkedList<NodeListener>();
0989:
0990: /**
0991: * Creates a new Node object with the specified ID.
0992: *
0993: * @param id
0994: * The node's ID, must be larger than <tt>0</tt>.
0995: * @throws IllegalArgumentException
0996: * If the specified <tt>id</tt> is <= <tt>0</tt>.
0997: */
0998: public Node(int id) {
0999: if (id <= 0) {
1000: throw new IllegalArgumentException(
1001: "id must be larger than 0, is: " + id);
1002: }
1003:
1004: this .id = id;
1005: this .valueCount = 0;
1006: this .usageCount = 0;
1007:
1008: // Allocate enough room to store one more value and node ID;
1009: // this greatly simplifies the algorithm for splitting a node.
1010: this .data = new byte[nodeSize + slotSize];
1011: }
1012:
1013: public int getID() {
1014: return id;
1015: }
1016:
1017: public boolean isLeaf() {
1018: return getChildNodeID(0) == 0;
1019: }
1020:
1021: public void use() {
1022: usageCount++;
1023: }
1024:
1025: public void release() throws IOException {
1026: assert usageCount > 0 : "Releasing node while usage count is "
1027: + usageCount;
1028:
1029: usageCount--;
1030:
1031: if (usageCount == 0) {
1032: releaseNode(this );
1033: }
1034: }
1035:
1036: public int getUsageCount() {
1037: return usageCount;
1038: }
1039:
1040: public boolean dataChanged() {
1041: return dataChanged;
1042: }
1043:
1044: public int getValueCount() {
1045: return valueCount;
1046: }
1047:
1048: public int getNodeCount() {
1049: if (isLeaf()) {
1050: return 0;
1051: } else {
1052: return valueCount + 1;
1053: }
1054: }
1055:
1056: /**
1057: * Checks if this node has any values.
1058: *
1059: * @return <tt>true</tt> if this node has no values, <tt>fals</tt> if
1060: * it has.
1061: */
1062: public boolean isEmpty() {
1063: return valueCount == 0;
1064: }
1065:
1066: public boolean isFull() {
1067: return valueCount == branchFactor - 1;
1068: }
1069:
1070: public byte[] getValue(int valueIdx) {
1071: return ByteArrayUtil.get(data, valueIdx2offset(valueIdx),
1072: valueSize);
1073: }
1074:
1075: public void setValue(int valueIdx, byte[] value) {
1076: ByteArrayUtil.put(value, data, valueIdx2offset(valueIdx));
1077: dataChanged = true;
1078: notifyValueChanged(valueIdx);
1079: }
1080:
1081: /**
1082: * Removes the value that can be found at the specified valueIdx and the
1083: * node ID directly to the right of it.
1084: *
1085: * @param valueIdx
1086: * A legal value index.
1087: * @return The value that was removed.
1088: * @see #removeValueLeft
1089: */
1090: public byte[] removeValueRight(int valueIdx) {
1091: byte[] value = getValue(valueIdx);
1092:
1093: int endOffset = valueIdx2offset(valueCount);
1094:
1095: if (valueIdx < valueCount - 1) {
1096: // Shift the rest of the data one slot to the left
1097: shiftData(valueIdx2offset(valueIdx + 1), endOffset,
1098: -slotSize);
1099: }
1100:
1101: // Clear last slot
1102: clearData(endOffset - slotSize, endOffset);
1103:
1104: setValueCount(--valueCount);
1105:
1106: dataChanged = true;
1107:
1108: notifyValueRemoved(valueIdx);
1109:
1110: return value;
1111: }
1112:
1113: /**
1114: * Removes the value that can be found at the specified valueIdx and the
1115: * node ID directly to the left of it.
1116: *
1117: * @param valueIdx
1118: * A legal value index.
1119: * @return The value that was removed.
1120: * @see #removeValueRight
1121: */
1122: public byte[] removeValueLeft(int valueIdx) {
1123: byte[] value = getValue(valueIdx);
1124:
1125: int endOffset = valueIdx2offset(valueCount);
1126:
1127: // Move the rest of the data one slot to the left
1128: shiftData(nodeIdx2offset(valueIdx + 1), endOffset,
1129: -slotSize);
1130:
1131: // Clear last slot
1132: clearData(endOffset - slotSize, endOffset);
1133:
1134: setValueCount(--valueCount);
1135:
1136: dataChanged = true;
1137:
1138: notifyValueRemoved(valueIdx);
1139:
1140: return value;
1141: }
1142:
1143: public int getChildNodeID(int nodeIdx) {
1144: return ByteArrayUtil.getInt(data, nodeIdx2offset(nodeIdx));
1145: }
1146:
1147: public void setChildNodeID(int nodeIdx, int nodeID) {
1148: ByteArrayUtil.putInt(nodeID, data, nodeIdx2offset(nodeIdx));
1149: dataChanged = true;
1150: }
1151:
1152: public Node getChildNode(int nodeIdx) throws IOException {
1153: int childNodeID = getChildNodeID(nodeIdx);
1154: return readNode(childNodeID);
1155: }
1156:
1157: /**
1158: * Searches the node for values that match the specified key and returns
1159: * its index. If no such value can be found, the index of the first value
1160: * that is larger is returned as a negative value by multiplying the index
1161: * with -1 and substracting 1 (result = -index - 1). The index can be
1162: * calculated from this negative value using the same function, i.e.:
1163: * index = -result - 1.
1164: */
1165: public int search(byte[] key) {
1166: int low = 0;
1167: int high = valueCount - 1;
1168:
1169: while (low <= high) {
1170: int mid = (low + high) >> 1;
1171: int diff = comparator.compareBTreeValues(key, data,
1172: valueIdx2offset(mid), valueSize);
1173:
1174: if (diff < 0) {
1175: // key smaller than middle value
1176: high = mid - 1;
1177: } else if (diff > 0) {
1178: // key larger than middle value
1179: low = mid + 1;
1180: } else {
1181: // key equal to middle value
1182: return mid;
1183: }
1184: }
1185: return -low - 1;
1186: }
1187:
1188: public void insertValueNodeIDPair(int valueIdx, byte[] value,
1189: int nodeID) {
1190: int offset = valueIdx2offset(valueIdx);
1191:
1192: if (valueIdx < valueCount) {
1193: // Shift values right of <offset> to the right
1194: shiftData(offset, valueIdx2offset(valueCount), slotSize);
1195: }
1196:
1197: // Insert the new value-nodeID pair
1198: ByteArrayUtil.put(value, data, offset);
1199: ByteArrayUtil.putInt(nodeID, data, offset + valueSize);
1200:
1201: // Raise the value count
1202: setValueCount(++valueCount);
1203:
1204: notifyValueAdded(valueIdx);
1205:
1206: dataChanged = true;
1207: }
1208:
1209: public void insertNodeIDValuePair(int nodeIdx, int nodeID,
1210: byte[] value) {
1211: int offset = nodeIdx2offset(nodeIdx);
1212:
1213: // Shift values right of <offset> to the right
1214: shiftData(offset, valueIdx2offset(valueCount), slotSize);
1215:
1216: // Insert the new slot
1217: ByteArrayUtil.putInt(nodeID, data, offset);
1218: ByteArrayUtil.put(value, data, offset + 4);
1219:
1220: // Raise the value count
1221: setValueCount(++valueCount);
1222:
1223: notifyValueAdded(nodeIdx);
1224:
1225: dataChanged = true;
1226: }
1227:
1228: /**
1229: * Splits the node, moving half of its values to the supplied new node,
1230: * inserting the supplied value-nodeID pair and returning the median
1231: * value. The behaviour of this method when called on a node that isn't
1232: * full is not specified and can produce unexpected results!
1233: *
1234: * @throws IOException
1235: */
1236: public byte[] splitAndInsert(byte[] newValue, int newNodeID,
1237: int newValueIdx, Node newNode) throws IOException {
1238: // First store the new value-node pair in data, then split it. This
1239: // can be done because data got one spare slot when it was allocated.
1240: insertValueNodeIDPair(newValueIdx, newValue, newNodeID);
1241:
1242: // Node now contains exactly [_branchFactor] values. The median
1243: // value at index [_branchFactor/2] is moved to the parent
1244: // node, the values left of the median stay in this node, the
1245: // values right of the median are moved to the new node.
1246: int medianIdx = branchFactor / 2;
1247: int medianOffset = valueIdx2offset(medianIdx);
1248: int splitOffset = medianOffset + valueSize;
1249:
1250: // Move all data (including the spare slot) to the right of
1251: // <splitOffset> to the new node
1252: System.arraycopy(data, splitOffset, newNode.data, 4,
1253: data.length - splitOffset);
1254:
1255: // Get the median value
1256: byte[] medianValue = getValue(medianIdx);
1257:
1258: // Clear the right half of the data in this node
1259: clearData(medianOffset, data.length);
1260:
1261: // Update the value counts
1262: setValueCount(medianIdx);
1263: newNode.setValueCount(branchFactor - medianIdx - 1);
1264: newNode.dataChanged = true;
1265:
1266: notifyNodeSplit(newNode, medianIdx);
1267:
1268: // Return the median value; it should be inserted into the parent node
1269: return medianValue;
1270: }
1271:
1272: public void mergeWithRightSibling(byte[] medianValue,
1273: Node rightSibling) throws IOException {
1274: // Append median value from parent node
1275: insertValueNodeIDPair(valueCount, medianValue, 0);
1276: // setValue(valueCount, medianValue);
1277:
1278: int rightIdx = valueCount;
1279:
1280: // Append all values and node references from right sibling
1281: System.arraycopy(rightSibling.data, 4, data,
1282: nodeIdx2offset(rightIdx),
1283: valueIdx2offset(rightSibling.valueCount) - 4);
1284:
1285: setValueCount(valueCount + rightSibling.valueCount);
1286:
1287: rightSibling.clearData(4,
1288: valueIdx2offset(rightSibling.valueCount));
1289: rightSibling.setValueCount(0);
1290: rightSibling.dataChanged = true;
1291:
1292: rightSibling.notifyNodeMerged(this , rightIdx);
1293: }
1294:
1295: public void register(NodeListener listener) {
1296: synchronized (listeners) {
1297: assert !listeners.contains(listener);
1298: listeners.add(listener);
1299: }
1300: }
1301:
1302: public void deregister(NodeListener listener) {
1303: synchronized (listeners) {
1304: assert listeners.contains(listener);
1305: listeners.remove(listener);
1306: }
1307: }
1308:
1309: private void notifyValueAdded(int index) {
1310: synchronized (listeners) {
1311: Iterator<NodeListener> iter = listeners.iterator();
1312:
1313: while (iter.hasNext()) {
1314: // Deregister if listener return true
1315: if (iter.next().valueAdded(this , index)) {
1316: iter.remove();
1317: }
1318: }
1319: }
1320: }
1321:
1322: private void notifyValueRemoved(int index) {
1323: synchronized (listeners) {
1324: Iterator<NodeListener> iter = listeners.iterator();
1325:
1326: while (iter.hasNext()) {
1327: // Deregister if listener return true
1328: if (iter.next().valueRemoved(this , index)) {
1329: iter.remove();
1330: }
1331: }
1332: }
1333: }
1334:
1335: private void notifyValueChanged(int index) {
1336: synchronized (listeners) {
1337: Iterator<NodeListener> iter = listeners.iterator();
1338:
1339: while (iter.hasNext()) {
1340: // Deregister if listener return true
1341: if (iter.next().valueChanged(this , index)) {
1342: iter.remove();
1343: }
1344: }
1345: }
1346: }
1347:
1348: private void notifyNodeSplit(Node rightNode, int medianIdx)
1349: throws IOException {
1350: synchronized (listeners) {
1351: Iterator<NodeListener> iter = listeners.iterator();
1352:
1353: while (iter.hasNext()) {
1354: boolean deregister = iter.next().nodeSplit(this ,
1355: rightNode, medianIdx);
1356:
1357: if (deregister) {
1358: iter.remove();
1359: }
1360: }
1361: }
1362: }
1363:
1364: private void notifyNodeMerged(Node targetNode, int mergeIdx)
1365: throws IOException {
1366: synchronized (listeners) {
1367: Iterator<NodeListener> iter = listeners.iterator();
1368:
1369: while (iter.hasNext()) {
1370: boolean deregister = iter.next().nodeMergedWith(
1371: this , targetNode, mergeIdx);
1372:
1373: if (deregister) {
1374: iter.remove();
1375: }
1376: }
1377: }
1378: }
1379:
1380: public void read() throws IOException {
1381: ByteBuffer buf = ByteBuffer.wrap(data);
1382: // Don't fill the spare slot in data:
1383: buf.limit(nodeSize);
1384: fileChannel.read(buf, nodeID2offset(id));
1385:
1386: valueCount = ByteArrayUtil.getInt(data, 0);
1387: }
1388:
1389: public void write() throws IOException {
1390: ByteBuffer buf = ByteBuffer.wrap(data);
1391: // Don't write the spare slot in data to the file:
1392: buf.limit(nodeSize);
1393: fileChannel.write(buf, nodeID2offset(id));
1394: dataChanged = false;
1395: }
1396:
1397: /**
1398: * Shifts the data between <tt>startOffset</tt> (inclusive) and
1399: * <tt>endOffset</tt> (exclusive) <tt>shift</tt> positions to the
1400: * right. Negative shift values can be used to shift data to the left.
1401: */
1402: private void shiftData(int startOffset, int endOffset, int shift) {
1403: System.arraycopy(data, startOffset, data, startOffset
1404: + shift, endOffset - startOffset);
1405: }
1406:
1407: /**
1408: * Clears the data between <tt>startOffset</tt> (inclusive) and
1409: * <tt>endOffset</tt> (exclusive). All bytes in this range will be set
1410: * to 0.
1411: */
1412: private void clearData(int startOffset, int endOffset) {
1413: Arrays.fill(data, startOffset, endOffset, (byte) 0);
1414: }
1415:
1416: private void setValueCount(int valueCount) {
1417: this .valueCount = valueCount;
1418: ByteArrayUtil.putInt(valueCount, data, 0);
1419: }
1420:
1421: private int valueIdx2offset(int id) {
1422: return 8 + id * slotSize;
1423: }
1424:
1425: private int nodeIdx2offset(int id) {
1426: return 4 + id * slotSize;
1427: }
1428: }
1429:
1430: /*--------------------------*
1431: * Inner class NodeListener *
1432: *--------------------------*/
1433:
1434: private interface NodeListener {
1435:
1436: /**
1437: * Signals to registered node listeners that a value has been added to a
1438: * node.
1439: *
1440: * @param node
1441: * The node which the value has been added to.
1442: * @param index
1443: * The index where the value was inserted.
1444: * @return Indicates whether the node listener should be deregistered as a
1445: * result of this event.
1446: */
1447: public boolean valueAdded(Node node, int index);
1448:
1449: /**
1450: * Signals to registered node listeners that a value has been removed from
1451: * a node.
1452: *
1453: * @param node
1454: * The node which the value has been removed from.
1455: * @param index
1456: * The index where the value was removed.
1457: * @return Indicates whether the node listener should be deregistered as a
1458: * result of this event.
1459: */
1460: public boolean valueRemoved(Node node, int index);
1461:
1462: /**
1463: * Signals to registered node listeners that a value has been changed.
1464: *
1465: * @param node
1466: * The node in which the value has been changed.
1467: * @param index
1468: * The index of the changed value.
1469: * @return Indicates whether the node listener should be deregistered as a
1470: * result of this event.
1471: */
1472: public boolean valueChanged(Node node, int index);
1473:
1474: /**
1475: * Signals to registered node listeners that a node has been split.
1476: *
1477: * @param node
1478: * The node which has been split.
1479: * @param newNode
1480: * The newly allocated node containing the "right" half of the
1481: * values.
1482: * @param medianIdx
1483: * The index where the node has been split. The value at this index
1484: * has been moved to the node's parent.
1485: * @return Indicates whether the node listener should be deregistered as a
1486: * result of this event.
1487: */
1488: public boolean nodeSplit(Node node, Node newNode, int medianIdx)
1489: throws IOException;
1490:
1491: /**
1492: * Signals to registered node listeners that two nodes have been merged.
1493: * All values from the source node have been appended to the value of the
1494: * target node.
1495: *
1496: * @param sourceNode
1497: * The node that donated its values to the target node.
1498: * @param targetNode
1499: * The node in which the values have been merged.
1500: * @param mergeIdx
1501: * The index of <tt>sourceNode</tt>'s values in
1502: * <tt>targetNode</tt>.
1503: *
1504: * @return Indicates whether the node listener should be deregistered with
1505: * the <em>source node</em> as a result of this event.
1506: */
1507: public boolean nodeMergedWith(Node sourceNode, Node targetNode,
1508: int mergeIdx) throws IOException;
1509: }
1510:
1511: /*-----------------------------*
1512: * Inner class SeqScanIterator *
1513: *-----------------------------*/
1514:
1515: // private class SeqScanIterator implements RecordIterator {
1516: //
1517: // private byte[] searchKey;
1518: //
1519: // private byte[] searchMask;
1520: //
1521: // private int currentNodeID;
1522: //
1523: // private Node currentNode;
1524: //
1525: // private int currentIdx;
1526: //
1527: // public SeqScanIterator(byte[] searchKey, byte[] searchMask) {
1528: // this.searchKey = searchKey;
1529: // this.searchMask = searchMask;
1530: // }
1531: //
1532: // public byte[] next()
1533: // throws IOException
1534: // {
1535: // while (currentNodeID <= maxNodeID) {
1536: // if (currentNode == null) {
1537: // // Read first node
1538: // currentNodeID = 1;
1539: // currentNode = readNode(currentNodeID);
1540: // currentIdx = 0;
1541: // }
1542: //
1543: // while (currentIdx < currentNode.getValueCount()) {
1544: // byte[] value = currentNode.getValue(currentIdx++);
1545: //
1546: // if (searchKey == null || ByteArrayUtil.matchesPattern(value, searchMask,
1547: // searchKey)) {
1548: // // Found a matches value
1549: // return value;
1550: // }
1551: // }
1552: //
1553: // currentNode.release();
1554: //
1555: // currentNodeID++;
1556: // currentNode = (currentNodeID <= maxNodeID) ? readNode(currentNodeID) :
1557: // null;
1558: // currentIdx = 0;
1559: // }
1560: //
1561: // return null;
1562: // }
1563: //
1564: // public void set(byte[] value) {
1565: // if (currentNode == null || currentIdx > currentNode.getValueCount()) {
1566: // throw new IllegalStateException();
1567: // }
1568: //
1569: // currentNode.setValue(currentIdx - 1, value);
1570: // }
1571: //
1572: // public void close()
1573: // throws IOException
1574: // {
1575: // if (currentNode != null) {
1576: // currentNodeID = maxNodeID + 1;
1577: //
1578: // currentNode.release();
1579: // currentNode = null;
1580: // }
1581: // }
1582: // }
1583: /*---------------------------*
1584: * Inner class RangeIterator *
1585: *---------------------------*/
1586:
1587: private class RangeIterator implements RecordIterator, NodeListener {
1588:
1589: private byte[] searchKey;
1590:
1591: private byte[] searchMask;
1592:
1593: private byte[] minValue;
1594:
1595: private byte[] maxValue;
1596:
1597: private boolean started;
1598:
1599: private Node currentNode;
1600:
1601: /**
1602: * Tracks the parent nodes of {@link #currentNode}.
1603: */
1604: private LinkedList<Node> parentNodeStack = new LinkedList<Node>();
1605:
1606: /**
1607: * Tracks the index of child nodes in parent nodes.
1608: */
1609: private LinkedList<Integer> parentIndexStack = new LinkedList<Integer>();
1610:
1611: private int currentIdx;
1612:
1613: public RangeIterator(byte[] searchKey, byte[] searchMask,
1614: byte[] minValue, byte[] maxValue) {
1615: this .searchKey = searchKey;
1616: this .searchMask = searchMask;
1617: this .minValue = minValue;
1618: this .maxValue = maxValue;
1619: this .started = false;
1620: }
1621:
1622: public byte[] next() throws IOException {
1623: if (!started) {
1624: started = true;
1625: findMinimum();
1626: }
1627:
1628: byte[] value = findNext(false);
1629: while (value != null) {
1630: if (maxValue != null
1631: && comparator.compareBTreeValues(maxValue,
1632: value, 0, value.length) < 0) {
1633: // Reached maximum value, stop iterating
1634: close();
1635: value = null;
1636: break;
1637: } else if (searchKey != null
1638: && !ByteArrayUtil.matchesPattern(value,
1639: searchMask, searchKey)) {
1640: // Value doesn't match search key/mask
1641: value = findNext(false);
1642: continue;
1643: } else {
1644: // Matching value found
1645: break;
1646: }
1647: }
1648:
1649: return value;
1650: }
1651:
1652: private void findMinimum() throws IOException {
1653: if (rootNodeID == 0) {
1654: // Empty BTree
1655: return;
1656: }
1657:
1658: currentNode = readNode(rootNodeID);
1659: currentNode.register(this );
1660: currentIdx = 0;
1661:
1662: // Search first value >= minValue, or the left-most value in case
1663: // minValue is null
1664: while (true) {
1665: if (minValue != null) {
1666: currentIdx = currentNode.search(minValue);
1667:
1668: if (currentIdx >= 0) {
1669: // Found exact match with minimum value
1670: break;
1671: } else {
1672: // currentIdx indicates the first value larger than the
1673: // minimum value
1674: currentIdx = -currentIdx - 1;
1675: }
1676: }
1677:
1678: if (currentNode.isLeaf()) {
1679: break;
1680: } else {
1681: parentNodeStack.add(currentNode);
1682: parentIndexStack.add(currentIdx);
1683:
1684: currentNode = currentNode.getChildNode(currentIdx);
1685: currentNode.register(this );
1686: currentIdx = 0;
1687: }
1688: }
1689: }
1690:
1691: private byte[] findNext(boolean returnedFromRecursion)
1692: throws IOException {
1693: if (currentNode == null) {
1694: return null;
1695: }
1696:
1697: if (returnedFromRecursion || currentNode.isLeaf()) {
1698: if (currentIdx >= currentNode.getValueCount()) {
1699: // No more values in this node, continue with parent node
1700: currentNode.deregister(this );
1701: currentNode.release();
1702: currentNode = popNodeStack();
1703: currentIdx = popIndexStack();
1704: return findNext(true);
1705: } else {
1706: return currentNode.getValue(currentIdx++);
1707: }
1708: } else {
1709: parentNodeStack.add(currentNode);
1710: parentIndexStack.add(currentIdx);
1711:
1712: currentNode = currentNode.getChildNode(currentIdx);
1713: currentNode.register(this );
1714: currentIdx = 0;
1715:
1716: return findNext(false);
1717: }
1718: }
1719:
1720: public void set(byte[] value) {
1721: if (currentNode == null
1722: || currentIdx > currentNode.getValueCount()) {
1723: throw new IllegalStateException();
1724: }
1725:
1726: currentNode.setValue(currentIdx - 1, value);
1727: }
1728:
1729: public void close() throws IOException {
1730: while (currentNode != null) {
1731: currentNode.deregister(this );
1732: currentNode.release();
1733: currentNode = popNodeStack();
1734: }
1735:
1736: assert parentNodeStack.isEmpty();
1737: parentIndexStack.clear();
1738: }
1739:
1740: private Node popNodeStack() {
1741: if (!parentNodeStack.isEmpty()) {
1742: return parentNodeStack.removeLast();
1743: }
1744:
1745: return null;
1746: }
1747:
1748: private int popIndexStack() {
1749: if (!parentIndexStack.isEmpty()) {
1750: return parentIndexStack.removeLast();
1751: }
1752:
1753: return 0;
1754: }
1755:
1756: public boolean valueAdded(Node node, int index) {
1757: if (node == currentNode) {
1758: if (index <= currentIdx) {
1759: currentIdx++;
1760: }
1761: } else {
1762: for (int i = 0; i < parentNodeStack.size(); i++) {
1763: if (node == parentNodeStack.get(i)) {
1764: if (index <= parentIndexStack.get(i)) {
1765: parentIndexStack.set(i, index + 1);
1766: }
1767:
1768: break;
1769: }
1770: }
1771: }
1772:
1773: return false;
1774: }
1775:
1776: public boolean valueRemoved(Node node, int index) {
1777: if (node == currentNode) {
1778: if (index <= currentIdx) {
1779: currentIdx--;
1780: }
1781: } else {
1782: for (int i = 0; i < parentNodeStack.size(); i++) {
1783: if (node == parentNodeStack.get(i)) {
1784: if (index <= parentIndexStack.get(i)) {
1785: parentIndexStack.set(i, index - 1);
1786: }
1787:
1788: break;
1789: }
1790: }
1791: }
1792:
1793: return false;
1794: }
1795:
1796: public boolean valueChanged(Node node, int index) {
1797: return false;
1798: }
1799:
1800: public boolean nodeSplit(Node node, Node newNode, int medianIdx)
1801: throws IOException {
1802: boolean deregister = false;
1803:
1804: if (node == currentNode) {
1805: if (currentIdx > medianIdx) {
1806: currentNode.release();
1807: deregister = true;
1808:
1809: newNode.use();
1810: newNode.register(this );
1811:
1812: currentNode = newNode;
1813: currentIdx -= medianIdx + 1;
1814: }
1815: } else {
1816: for (int i = 0; i < parentNodeStack.size(); i++) {
1817: Node parentNode = parentNodeStack.get(i);
1818:
1819: if (node == parentNode) {
1820: int parentIdx = parentIndexStack.get(i);
1821:
1822: if (parentIdx > medianIdx) {
1823: parentNode.release();
1824: deregister = true;
1825:
1826: newNode.use();
1827: newNode.register(this );
1828:
1829: parentNodeStack.set(i, newNode);
1830: parentIndexStack.set(i, parentIdx
1831: - medianIdx - 1);
1832: }
1833:
1834: break;
1835: }
1836: }
1837: }
1838:
1839: return deregister;
1840: }
1841:
1842: public boolean nodeMergedWith(Node sourceNode, Node targetNode,
1843: int mergeIdx) throws IOException {
1844: boolean deregister = false;
1845:
1846: if (sourceNode == currentNode) {
1847: currentNode.release();
1848: deregister = true;
1849:
1850: targetNode.use();
1851: targetNode.register(this );
1852:
1853: currentNode = targetNode;
1854: currentIdx += mergeIdx;
1855: } else {
1856: for (int i = 0; i < parentNodeStack.size(); i++) {
1857: Node parentNode = parentNodeStack.get(i);
1858:
1859: if (sourceNode == parentNode) {
1860: parentNode.release();
1861: deregister = true;
1862:
1863: targetNode.use();
1864: targetNode.register(this );
1865:
1866: parentNodeStack.set(i, targetNode);
1867: parentIndexStack.set(i, mergeIdx
1868: + parentIndexStack.get(i));
1869:
1870: break;
1871: }
1872: }
1873: }
1874:
1875: return deregister;
1876: }
1877: }
1878:
1879: /*--------------*
1880: * Test methods *
1881: *--------------*/
1882:
1883: public static void main(String[] args) throws Exception {
1884: System.out.println("Running BTree test...");
1885: if (args.length > 1) {
1886: runPerformanceTest(args);
1887: } else {
1888: runDebugTest(args);
1889: }
1890: System.out.println("Done.");
1891: }
1892:
1893: public static void runPerformanceTest(String[] args)
1894: throws Exception {
1895: File dataFile = new File(args[0]);
1896: int valueCount = Integer.parseInt(args[1]);
1897: RecordComparator comparator = new DefaultRecordComparator();
1898: BTree btree = new BTree(dataFile, 501, 13, comparator);
1899:
1900: java.util.Random random = new java.util.Random(0L);
1901: byte[] value = new byte[13];
1902:
1903: long startTime = System.currentTimeMillis();
1904: for (int i = 1; i <= valueCount; i++) {
1905: random.nextBytes(value);
1906: btree.insert(value);
1907: if (i % 50000 == 0) {
1908: System.out.println("Inserted " + i + " values in "
1909: + (System.currentTimeMillis() - startTime)
1910: + " ms");
1911: }
1912: }
1913:
1914: System.out
1915: .println("Iterating over all values in sequential order...");
1916: startTime = System.currentTimeMillis();
1917: RecordIterator iter = btree.iterateAll();
1918: value = iter.next();
1919: int count = 0;
1920: while (value != null) {
1921: count++;
1922: value = iter.next();
1923: }
1924: iter.close();
1925: System.out.println("Iteration over " + count
1926: + " items finished in "
1927: + (System.currentTimeMillis() - startTime) + " ms");
1928:
1929: // byte[][] values = new byte[count][13];
1930: //
1931: // iter = btree.iterateAll();
1932: // for (int i = 0; i < values.length; i++) {
1933: // values[i] = iter.next();
1934: // }
1935: // iter.close();
1936: //
1937: // startTime = System.currentTimeMillis();
1938: // for (int i = values.length - 1; i >= 0; i--) {
1939: // btree.remove(values[i]);
1940: // }
1941: // System.out.println("Removed all item in " + (System.currentTimeMillis()
1942: // - startTime) + " ms");
1943: }
1944:
1945: public static void runDebugTest(String[] args) throws Exception {
1946: File dataFile = new File(args[0]);
1947: BTree btree = new BTree(dataFile, 28, 1);
1948:
1949: btree.print(System.out);
1950:
1951: /*
1952: * System.out.println("Adding values..."); btree.startTransaction();
1953: * btree.insert("C".getBytes()); btree.insert("N".getBytes());
1954: * btree.insert("G".getBytes()); btree.insert("A".getBytes());
1955: * btree.insert("H".getBytes()); btree.insert("E".getBytes());
1956: * btree.insert("K".getBytes()); btree.insert("Q".getBytes());
1957: * btree.insert("M".getBytes()); btree.insert("F".getBytes());
1958: * btree.insert("W".getBytes()); btree.insert("L".getBytes());
1959: * btree.insert("T".getBytes()); btree.insert("Z".getBytes());
1960: * btree.insert("D".getBytes()); btree.insert("P".getBytes());
1961: * btree.insert("R".getBytes()); btree.insert("X".getBytes());
1962: * btree.insert("Y".getBytes()); btree.insert("S".getBytes());
1963: * btree.commitTransaction(); btree.print(System.out);
1964: * System.out.println("Removing values..."); System.out.println("Removing
1965: * H..."); btree.remove("H".getBytes()); btree.commitTransaction();
1966: * btree.print(System.out); System.out.println("Removing T...");
1967: * btree.remove("T".getBytes()); btree.commitTransaction();
1968: * btree.print(System.out); System.out.println("Removing R...");
1969: * btree.remove("R".getBytes()); btree.commitTransaction();
1970: * btree.print(System.out); System.out.println("Removing E...");
1971: * btree.remove("E".getBytes()); btree.commitTransaction();
1972: * btree.print(System.out); System.out.println("Values from I to U:");
1973: * RecordIterator iter = btree.iterateRange("I".getBytes(),
1974: * "V".getBytes()); byte[] value = iter.next(); while (value != null) {
1975: * System.out.print(new String(value) + " "); value = iter.next(); }
1976: * System.out.println();
1977: */
1978: }
1979:
1980: public void print(PrintStream out) throws IOException {
1981: out.println("---contents of BTree file---");
1982: out.println("Stored parameters:");
1983: out.println("block size = " + blockSize);
1984: out.println("value size = " + valueSize);
1985: out.println("root node ID = " + rootNodeID);
1986: out.println();
1987: out.println("Derived parameters:");
1988: out.println("slot size = " + slotSize);
1989: out.println("branch factor = " + branchFactor);
1990: out.println("min value count = " + minValueCount);
1991: out.println("node size = " + nodeSize);
1992: out.println("max node ID = " + maxNodeID);
1993: out.println();
1994:
1995: ByteBuffer buf = ByteBuffer.allocate(nodeSize);
1996: for (long offset = blockSize; offset < fileChannel.size(); offset += blockSize) {
1997: fileChannel.read(buf, offset);
1998: buf.rewind();
1999:
2000: int nodeID = offset2nodeID(offset);
2001: int count = buf.getInt();
2002: out.print("node " + nodeID + ": ");
2003: out.print("count=" + count + " ");
2004:
2005: byte[] value = new byte[valueSize];
2006:
2007: for (int i = 0; i < count; i++) {
2008: // node ID
2009: out.print(buf.getInt());
2010:
2011: // value
2012: buf.get(value);
2013: out.print("[" + ByteArrayUtil.toHexString(value) + "]");
2014: // out.print("["+new String(value)+"]");
2015: }
2016:
2017: // last node ID
2018: out.println(buf.getInt());
2019:
2020: buf.clear();
2021: }
2022: out.println("---end of BTree file---");
2023: }
2024: }
|