001: /**
002: * JDBM LICENSE v1.00
003: *
004: * Redistribution and use of this software and associated documentation
005: * ("Software"), with or without modification, are permitted provided
006: * that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain copyright
009: * statements and notices. Redistributions must also contain a
010: * copy of this document.
011: *
012: * 2. Redistributions in binary form must reproduce the
013: * above copyright notice, this list of conditions and the
014: * following disclaimer in the documentation and/or other
015: * materials provided with the distribution.
016: *
017: * 3. The name "JDBM" must not be used to endorse or promote
018: * products derived from this Software without prior written
019: * permission of Cees de Groot. For written permission,
020: * please contact cg@cdegroot.com.
021: *
022: * 4. Products derived from this Software may not be called "JDBM"
023: * nor may "JDBM" appear in their names without prior written
024: * permission of Cees de Groot.
025: *
026: * 5. Due credit should be given to the JDBM Project
027: * (http://jdbm.sourceforge.net/).
028: *
029: * THIS SOFTWARE IS PROVIDED BY THE JDBM PROJECT AND CONTRIBUTORS
030: * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
031: * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
032: * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
033: * CEES DE GROOT OR ANY CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
034: * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
035: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
036: * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
037: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
038: * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
039: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
040: * OF THE POSSIBILITY OF SUCH DAMAGE.
041: *
042: * Copyright 2001 (C) Alex Boisvert. All Rights Reserved.
043: * Contributions are Copyright (C) 2001 by their associated contributors.
044: *
045: */package jdbm.btree;
046:
047: import jdbm.RecordManager;
048:
049: import jdbm.helper.Serializer;
050: import jdbm.helper.Tuple;
051: import jdbm.helper.TupleBrowser;
052:
053: import java.io.Externalizable;
054: import java.io.IOException;
055: import java.io.ObjectInput;
056: import java.io.ObjectOutput;
057: import java.io.Serializable;
058:
059: import java.util.Comparator;
060:
061: /**
062: * B+Tree persistent indexing data structure. B+Trees are optimized for
063: * block-based, random I/O storage because they store multiple keys on
064: * one tree node (called <code>BPage</code>). In addition, the leaf nodes
065: * directly contain (inline) the values associated with the keys, allowing a
066: * single (or sequential) disk read of all the values on the page.
067: * <p>
068: * B+Trees are n-airy, yeilding log(N) search cost. They are self-balancing,
069: * preventing search performance degradation when the size of the tree grows.
070: * <p>
071: * Keys and associated values must be <code>Serializable</code> objects. The
072: * user is responsible to supply a serializable <code>Comparator</code> object
073: * to be used for the ordering of entries, which are also called <code>Tuple</code>.
074: * The B+Tree allows traversing the keys in forward and reverse order using a
075: * TupleBrowser obtained from the browse() methods.
076: * <p>
077: * This implementation does not directly support duplicate keys, but it is
078: * possible to handle duplicates by inlining or referencing an object collection
079: * as a value.
080: * <p>
081: * There is no limit on key size or value size, but it is recommended to keep
082: * both as small as possible to reduce disk I/O. This is especially true for
083: * the key size, which impacts all non-leaf <code>BPage</code> objects.
084: *
085: * @author <a href="mailto:boisvert@intalio.com">Alex Boisvert</a>
086: * @version $Id: BTree.java,v 1.6 2005/06/25 23:12:31 doomdark Exp $
087: */
088: public class BTree implements Externalizable {
089:
090: private static final boolean DEBUG = false;
091:
092: /**
093: * Version id for serialization.
094: */
095: final static long serialVersionUID = 1L;
096:
097: /**
098: * Default page size (number of entries per node)
099: */
100: public static final int DEFAULT_SIZE = 16;
101:
102: /**
103: * Page manager used to persist changes in BPages
104: */
105: protected transient RecordManager _recman;
106:
107: /**
108: * This BTree's record ID in the PageManager.
109: */
110: private transient long _recid;
111:
112: /**
113: * Comparator used to index entries.
114: */
115: protected Comparator _comparator;
116:
117: /**
118: * Serializer used to serialize index keys (optional)
119: */
120: protected Serializer _keySerializer;
121:
122: /**
123: * Serializer used to serialize index values (optional)
124: */
125: protected Serializer _valueSerializer;
126:
127: /**
128: * Height of the B+Tree. This is the number of BPages you have to traverse
129: * to get to a leaf BPage, starting from the root.
130: */
131: private int _height;
132:
133: /**
134: * Recid of the root BPage
135: */
136: private transient long _root;
137:
138: /**
139: * Number of entries in each BPage.
140: */
141: protected int _pageSize;
142:
143: /**
144: * Total number of entries in the BTree
145: */
146: protected int _entries;
147:
148: /**
149: * Serializer used for BPages of this tree
150: */
151: private transient BPage _bpageSerializer;
152:
153: /**
154: * No-argument constructor used by serialization.
155: */
156: public BTree() {
157: // empty
158: }
159:
160: /**
161: * Create a new persistent BTree, with 16 entries per node.
162: *
163: * @param recman Record manager used for persistence.
164: * @param comparator Comparator used to order index entries
165: */
166: public static BTree createInstance(RecordManager recman,
167: Comparator comparator) throws IOException {
168: return createInstance(recman, comparator, null, null,
169: DEFAULT_SIZE);
170: }
171:
172: /**
173: * Create a new persistent BTree, with 16 entries per node.
174: *
175: * @param recman Record manager used for persistence.
176: * @param keySerializer Serializer used to serialize index keys (optional)
177: * @param valueSerializer Serializer used to serialize index values (optional)
178: * @param comparator Comparator used to order index entries
179: */
180: public static BTree createInstance(RecordManager recman,
181: Comparator comparator, Serializer keySerializer,
182: Serializer valueSerializer) throws IOException {
183: return createInstance(recman, comparator, keySerializer,
184: valueSerializer, DEFAULT_SIZE);
185: }
186:
187: /**
188: * Create a new persistent BTree with the given number of entries per node.
189: *
190: * @param recman Record manager used for persistence.
191: * @param comparator Comparator used to order index entries
192: * @param keySerializer Serializer used to serialize index keys (optional)
193: * @param valueSerializer Serializer used to serialize index values (optional)
194: * @param pageSize Number of entries per page (must be even).
195: */
196: public static BTree createInstance(RecordManager recman,
197: Comparator comparator, Serializer keySerializer,
198: Serializer valueSerializer, int pageSize)
199: throws IOException {
200: BTree btree;
201:
202: if (recman == null) {
203: throw new IllegalArgumentException(
204: "Argument 'recman' is null");
205: }
206:
207: if (comparator == null) {
208: throw new IllegalArgumentException(
209: "Argument 'comparator' is null");
210: }
211:
212: if (!(comparator instanceof Serializable)) {
213: throw new IllegalArgumentException(
214: "Argument 'comparator' must be serializable");
215: }
216:
217: if (keySerializer != null
218: && !(keySerializer instanceof Serializable)) {
219: throw new IllegalArgumentException(
220: "Argument 'keySerializer' must be serializable");
221: }
222:
223: if (valueSerializer != null
224: && !(valueSerializer instanceof Serializable)) {
225: throw new IllegalArgumentException(
226: "Argument 'valueSerializer' must be serializable");
227: }
228:
229: // make sure there's an even number of entries per BPage
230: if ((pageSize & 1) != 0) {
231: throw new IllegalArgumentException(
232: "Argument 'pageSize' must be even");
233: }
234:
235: btree = new BTree();
236: btree._recman = recman;
237: btree._comparator = comparator;
238: btree._keySerializer = keySerializer;
239: btree._valueSerializer = valueSerializer;
240: btree._pageSize = pageSize;
241: btree._bpageSerializer = new BPage();
242: btree._bpageSerializer._btree = btree;
243: btree._recid = recman.insert(btree);
244: return btree;
245: }
246:
247: /**
248: * Load a persistent BTree.
249: *
250: * @param recman RecordManager used to store the persistent btree
251: * @param recid Record id of the BTree
252: */
253: public static BTree load(RecordManager recman, long recid)
254: throws IOException {
255: BTree btree = (BTree) recman.fetch(recid);
256: btree._recid = recid;
257: btree._recman = recman;
258: btree._bpageSerializer = new BPage();
259: btree._bpageSerializer._btree = btree;
260: return btree;
261: }
262:
263: /**
264: * Insert an entry in the BTree.
265: * <p>
266: * The BTree cannot store duplicate entries. An existing entry can be
267: * replaced using the <code>replace</code> flag. If an entry with the
268: * same key already exists in the BTree, its value is returned.
269: *
270: * @param key Insert key
271: * @param value Insert value
272: * @param replace Set to true to replace an existing key-value pair.
273: * @return Existing value, if any.
274: */
275: public synchronized Object insert(Object key, Object value,
276: boolean replace) throws IOException {
277: if (key == null) {
278: throw new IllegalArgumentException("Argument 'key' is null");
279: }
280: if (value == null) {
281: throw new IllegalArgumentException(
282: "Argument 'value' is null");
283: }
284:
285: BPage rootPage = getRoot();
286:
287: if (rootPage == null) {
288: // BTree is currently empty, create a new root BPage
289: if (DEBUG) {
290: System.out.println("BTree.insert() new root BPage");
291: }
292: rootPage = new BPage(this , key, value);
293: _root = rootPage._recid;
294: _height = 1;
295: _entries = 1;
296: _recman.update(_recid, this );
297: return null;
298: } else {
299: BPage.InsertResult insert = rootPage.insert(_height, key,
300: value, replace);
301: boolean dirty = false;
302: if (insert._overflow != null) {
303: // current root page overflowed, we replace with a new root page
304: if (DEBUG) {
305: System.out
306: .println("BTree.insert() replace root BPage due to overflow");
307: }
308: rootPage = new BPage(this , rootPage, insert._overflow);
309: _root = rootPage._recid;
310: _height += 1;
311: dirty = true;
312: }
313: if (insert._existing == null) {
314: _entries++;
315: dirty = true;
316: }
317: if (dirty) {
318: _recman.update(_recid, this );
319: }
320: // insert might have returned an existing value
321: return insert._existing;
322: }
323: }
324:
325: /**
326: * Remove an entry with the given key from the BTree.
327: *
328: * @param key Removal key
329: * @return Value associated with the key, or null if no entry with given
330: * key existed in the BTree.
331: */
332: public synchronized Object remove(Object key) throws IOException {
333: if (key == null) {
334: throw new IllegalArgumentException("Argument 'key' is null");
335: }
336:
337: BPage rootPage = getRoot();
338: if (rootPage == null) {
339: return null;
340: }
341: boolean dirty = false;
342: BPage.RemoveResult remove = rootPage.remove(_height, key);
343: if (remove._underflow && rootPage.isEmpty()) {
344: _height -= 1;
345: dirty = true;
346:
347: // TODO: check contract for BPages to be removed from recman.
348: if (_height == 0) {
349: _root = 0;
350: } else {
351: _root = rootPage.childBPage(_pageSize - 1)._recid;
352: }
353: }
354: if (remove._value != null) {
355: _entries--;
356: dirty = true;
357: }
358: if (dirty) {
359: _recman.update(_recid, this );
360: }
361: return remove._value;
362: }
363:
364: /**
365: * Find the value associated with the given key.
366: *
367: * @param key Lookup key.
368: * @return Value associated with the key, or null if not found.
369: */
370: public synchronized Object find(Object key) throws IOException {
371: if (key == null) {
372: throw new IllegalArgumentException("Argument 'key' is null");
373: }
374: BPage rootPage = getRoot();
375: if (rootPage == null) {
376: return null;
377: }
378:
379: Tuple tuple = new Tuple(null, null);
380: TupleBrowser browser = rootPage.find(_height, key);
381:
382: if (browser.getNext(tuple)) {
383: // find returns the matching key or the next ordered key, so we must
384: // check if we have an exact match
385: if (_comparator.compare(key, tuple.getKey()) != 0) {
386: return null;
387: } else {
388: return tuple.getValue();
389: }
390: } else {
391: return null;
392: }
393: }
394:
395: /**
396: * Find the value associated with the given key, or the entry immediately
397: * following this key in the ordered BTree.
398: *
399: * @param key Lookup key.
400: * @return Value associated with the key, or a greater entry, or null if no
401: * greater entry was found.
402: */
403: public synchronized Tuple findGreaterOrEqual(Object key)
404: throws IOException {
405: Tuple tuple;
406: TupleBrowser browser;
407:
408: if (key == null) {
409: // there can't be a key greater than or equal to "null"
410: // because null is considered an infinite key.
411: return null;
412: }
413:
414: tuple = new Tuple(null, null);
415: browser = browse(key);
416: if (browser.getNext(tuple)) {
417: return tuple;
418: } else {
419: return null;
420: }
421: }
422:
423: /**
424: * Get a browser initially positioned at the beginning of the BTree.
425: * <p><b>
426: * WARNING: If you make structural modifications to the BTree during
427: * browsing, you will get inconsistent browing results.
428: * </b>
429: *
430: * @return Browser positionned at the beginning of the BTree.
431: */
432: public synchronized TupleBrowser browse() throws IOException {
433: BPage rootPage = getRoot();
434: if (rootPage == null) {
435: return EmptyBrowser.INSTANCE;
436: }
437: TupleBrowser browser = rootPage.findFirst();
438: return browser;
439: }
440:
441: /**
442: * Get a browser initially positioned just before the given key.
443: * <p><b>
444: * WARNING: If you make structural modifications to the BTree during
445: * browsing, you will get inconsistent browing results.
446: * </b>
447: *
448: * @param key Key used to position the browser. If null, the browser
449: * will be positionned after the last entry of the BTree.
450: * (Null is considered to be an "infinite" key)
451: * @return Browser positionned just before the given key.
452: */
453: public synchronized TupleBrowser browse(Object key)
454: throws IOException {
455: BPage rootPage = getRoot();
456: if (rootPage == null) {
457: return EmptyBrowser.INSTANCE;
458: }
459: TupleBrowser browser = rootPage.find(_height, key);
460: return browser;
461: }
462:
463: /**
464: * Return the number of entries (size) of the BTree.
465: */
466: public synchronized int size() {
467: return _entries;
468: }
469:
470: /**
471: * Return the persistent record identifier of the BTree.
472: */
473: public long getRecid() {
474: return _recid;
475: }
476:
477: /**
478: * Return the root BPage, or null if it doesn't exist.
479: */
480: private BPage getRoot() throws IOException {
481: if (_root == 0) {
482: return null;
483: }
484: BPage root = (BPage) _recman.fetch(_root, _bpageSerializer);
485: root._recid = _root;
486: root._btree = this ;
487: return root;
488: }
489:
490: /**
491: * Implement Externalizable interface.
492: */
493: public void readExternal(ObjectInput in) throws IOException,
494: ClassNotFoundException {
495: _comparator = (Comparator) in.readObject();
496: _keySerializer = (Serializer) in.readObject();
497: _valueSerializer = (Serializer) in.readObject();
498: _height = in.readInt();
499: _root = in.readLong();
500: _pageSize = in.readInt();
501: _entries = in.readInt();
502: }
503:
504: /**
505: * Implement Externalizable interface.
506: */
507: public void writeExternal(ObjectOutput out) throws IOException {
508: out.writeObject(_comparator);
509: out.writeObject(_keySerializer);
510: out.writeObject(_valueSerializer);
511: out.writeInt(_height);
512: out.writeLong(_root);
513: out.writeInt(_pageSize);
514: out.writeInt(_entries);
515: }
516:
517: /*
518: public void assert() throws IOException {
519: BPage root = getRoot();
520: if ( root != null ) {
521: root.assertRecursive( _height );
522: }
523: }
524: */
525:
526: /*
527: public void dump() throws IOException {
528: BPage root = getRoot();
529: if ( root != null ) {
530: root.dumpRecursive( _height, 0 );
531: }
532: }
533: */
534:
535: /** PRIVATE INNER CLASS
536: * Browser returning no element.
537: */
538: static class EmptyBrowser extends TupleBrowser {
539:
540: static TupleBrowser INSTANCE = new EmptyBrowser();
541:
542: public boolean getNext(Tuple tuple) {
543: return false;
544: }
545:
546: public boolean getPrevious(Tuple tuple) {
547: return false;
548: }
549: }
550: }
|