001: // kelondroHashtable.java
002: // ------------------
003: // part of the Kelondro Database
004: // (C) by Michael Peter Christen; mc@anomic.de
005: // first published on http://www.anomic.de
006: // Frankfurt, Germany, 2005
007: // last major change: 21.06.2005
008: //
009: // This program is free software; you can redistribute it and/or modify
010: // it under the terms of the GNU General Public License as published by
011: // the Free Software Foundation; either version 2 of the License, or
012: // (at your option) any later version.
013: //
014: // This program is distributed in the hope that it will be useful,
015: // but WITHOUT ANY WARRANTY; without even the implied warranty of
016: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: // GNU General Public License for more details.
018: //
019: // You should have received a copy of the GNU General Public License
020: // along with this program; if not, write to the Free Software
021: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: //
023: // Using this software in any meaning (reading, learning, copying, compiling,
024: // running) means that you agree that the Author(s) is (are) not responsible
025: // for cost, loss of data or any harm that may be caused directly or indirectly
026: // by usage of this softare or this documentation. The usage of this software
027: // is on your own risk. The installation and usage (starting/running) of this
028: // software may allow other people or application to access your computer and
029: // any attached devices and is highly dependent on the configuration of the
030: // software which must be done by the user of the software; the author(s) is
031: // (are) also not responsible for proper configuration and usage of the
032: // software, even if provoked by documentation provided together with
033: // the software.
034: //
035: // Any changes to this file according to the GPL as documented in the file
036: // gpl.txt aside this file in the shipment you received can be done to the
037: // lines that follows this copyright notice here, but changes must not be
038: // done inside the copyright notive above. A re-distribution must contain
039: // the intact and unchanged copyright notice.
040: // Contributions and changes to the program code must be marked as such.
041:
042: /*
043:
044: we implement a hashtable based on folded binary trees
045: each hight in these binary trees represents one step of rehasing
046: the re-hashing is realised by extending the number of relevant bits in the given hash
047: We construct the binary tree as follows
048: - there exists no root node
049: - at height-1 are 2 nodes, and can be accessed by using only the least significant bit of the hash
050: - at height-2 are 4 nodes, addresses by (hash & 3) - mapping the 2 lsb of the hash
051: - at height-3 are 8 nodes, addresses by (hash & 7)
052: - .. and so on.
053: The number of nodes N(k) that are needed for a tree of height-k is
054:
055: N(k) = 2**k + N(k-1) = 2**(k + 1) - 2 [where k > 0]
056:
057: We fold this tree by putting all heights of the tree in a sequence
058:
059: Computation of the position (the index) of a node:
060: given:
061: hash h, with k significant bits (representing a height-k): h|k
062: then the position of a node node(h,k) is
063:
064: node(h,k) = N(k - 1) + h|k [where k > 0]
065:
066: We use these nodes to sequentially store a hash h at position node(h, 1), and
067: if that fails on node(h, 2), node(h, 3) and so on.
068:
069: This is highly inefficient for the most heights k = 1, ..., (?)
070: The 'inefficient-border' depends on the number of elements that we want to store.
071:
072: We therefore introduce an offset o which is the number of bits that are not used
073: at the beginning of (re-)hashing. But even if these o re-hasing steps are not done,
074: all bits of the hash are relevant.
075: Now the number of nodes N(k) that are needed is computed by N(k,o):
076:
077: N(k,o) = N(k) - N(o) = 2**(k + 1) - 2**(o + 1) [where k > o, o >= 0]
078:
079: When o=0 then this is equivalent to N(k).
080:
081: The node-formula must be adopted as well
082:
083: node(h,k,o) = N(k - 1, o) + h|k [where k > o, o >= 0]
084:
085: So if you set an offset o, this leads to a minimum number of nodes
086: at level k=o+1: node(0,o + 1,o) = N(o, o) = 0 (position of the first entry)
087:
088: Computatiion of the maxlen 'maxk', the maximum height of the tree for a given
089: number of maximum entries 'maxsize' in the hashtable:
090: maxk shall be computed in such a way, that N(k,o) <= maxsize, for any o or k
091: This means paricualary, that
092:
093: node(h,k,o) < maxsize
094:
095: for h|k we must consider the worst case:
096:
097: h|k (by maxk) = 2**k - 1
098:
099: therefore
100:
101: node(h,maxk,o) < maxsize
102: N(maxk - 1, o) + h|maxk < maxsize [where maxk > o, o >= 0]
103: 2**maxk - 2**(o + 1) + 2**maxk - 1 < maxsize [where maxk > o, o >= 0]
104: 2**maxk - 2**(o + 1) + 2**maxk < maxsize + 1 [where maxk > o, o >= 0]
105: 2**maxk + 2**maxk < maxsize + 2**(o + 1) + 1 [where maxk > o, o >= 0]
106: 2**(maxk+1) < maxsize + 2**(o + 1) + 1 [where maxk > o, o >= 0]
107: maxk < log2(maxsize + 2**(o + 1) + 1) [where maxk > o, o >= 0]
108:
109: setting maxk to
110:
111: maxk = log2(maxsize)
112:
113: will make this relation true in any case, even if maxk = log2(maxsize) + 1
114: would also be correct in some cases
115:
116: Now we can use the following formula to create the folded binary hash tree:
117:
118: node(h,k,o) = 2**k - 2**(o + 1) + h|k
119:
120: to compute the node index and
121:
122: maxk = log2(maxsize)
123:
124: to compute the upper limit of re-hashing
125:
126:
127: */
128:
129: package de.anomic.kelondro;
130:
131: import java.io.File;
132: import java.io.IOException;
133:
134: public class kelondroHashtable {
135:
136: private kelondroFixedWidthArray hashArray;
137: protected int offset;
138: protected int maxk;
139: private int maxrehash;
140: private kelondroRow.Entry dummyRow;
141:
142: private static final byte[] dummyKey = kelondroBase64Order.enhancedCoder
143: .encodeLong(0, 5).getBytes();
144:
145: public kelondroHashtable(File file, kelondroRow rowdef, int offset,
146: int maxsize, int maxrehash) throws IOException {
147: // this creates a new hashtable
148: // the key element is not part of the columns array
149: // this is unlike the kelondroTree, where the key is part of a row
150: // the offset is a number of bits that is omitted in the folded tree hierarchy
151: // a good number for offset is 8
152: // the maxsize number is the maximum number of elements in the hashtable
153: // this number is needed to omit grow of the table in case of re-hashing
154: // the maxsize is re-computed to a virtual folding height and will result in a tablesize
155: // less than the given maxsize. The actual maxsize can be retrieved by maxsize()
156: boolean fileExisted = file.exists();
157: this .hashArray = new kelondroFixedWidthArray(file,
158: extCol(rowdef), 6);
159: if (fileExisted) {
160: this .offset = hashArray.geti(0);
161: this .maxk = hashArray.geti(1);
162: this .maxrehash = hashArray.geti(2);
163: } else {
164: this .offset = offset;
165: this .maxk = kelondroMSetTools.log2a(maxsize); // equal to |log2(maxsize)| + 1
166: if (this .maxk >= kelondroMSetTools.log2a(maxsize
167: + power2(offset + 1) + 1) - 1)
168: this .maxk--;
169: this .maxrehash = maxrehash;
170: dummyRow = this .hashArray.row().newEntry();
171: dummyRow.setCol(0, dummyKey);
172: //for (int i = 0; i < hashArray.row().columns(); i++)
173: hashArray.seti(0, this .offset);
174: hashArray.seti(1, this .maxk);
175: hashArray.seti(2, this .maxrehash);
176: }
177: }
178:
179: private kelondroRow extCol(kelondroRow rowdef) {
180: kelondroColumn[] newCol = new kelondroColumn[rowdef.columns() + 1];
181: newCol[0] = new kelondroColumn("Cardinal key-4 {b256}");
182: for (int i = 0; i < rowdef.columns(); i++)
183: newCol[i + 1] = rowdef.column(i);
184: return new kelondroRow(newCol, rowdef.objectOrder,
185: rowdef.primaryKeyIndex);
186: }
187:
188: public static int power2(int x) {
189: int p = 1;
190: while (x > 0) {
191: p = p << 1;
192: x--;
193: }
194: return p;
195: }
196:
197: public synchronized byte[][] get(int key) throws IOException {
198: Object[] search = search(new Hash(key));
199: if (search[1] == null)
200: return null;
201: byte[][] row = (byte[][]) search[1];
202: byte[][] result = new byte[row.length - 1][];
203: System.arraycopy(row, 1, result, 0, row.length - 1);
204: return result;
205: }
206:
207: public synchronized kelondroRow.Entry put(int key,
208: kelondroRow.Entry rowentry) throws IOException {
209: Hash hash = new Hash(key);
210:
211: // find row
212: Object[] search = search(hash);
213: kelondroRow.Entry oldhkrow;
214: int rowNumber = ((Integer) search[0]).intValue();
215: if (search[1] == null) {
216: oldhkrow = null;
217: } else {
218: oldhkrow = (kelondroRow.Entry) search[1];
219: }
220:
221: // make space
222: while (rowNumber >= hashArray.size())
223: hashArray.set(hashArray.size(), dummyRow);
224:
225: // write row
226: kelondroRow.Entry newhkrow = hashArray.row().newEntry();
227: newhkrow.setCol(0, hash.key());
228: newhkrow.setCol(1, rowentry.bytes());
229: hashArray.set(rowNumber, newhkrow);
230: return hashArray.row().newEntry(oldhkrow.getColBytes(1));
231: }
232:
233: private Object[] search(Hash hash) throws IOException {
234: kelondroRow.Entry hkrow;
235: int rowKey;
236: int rowNumber;
237: do {
238: rowNumber = hash.node();
239: if (rowNumber >= hashArray.size())
240: return new Object[] { new Integer(rowNumber), null };
241: hkrow = hashArray.get(rowNumber);
242: rowKey = (int) hkrow.getColLong(0);
243: if (rowKey == 0)
244: return new Object[] { new Integer(rowNumber), null };
245: hash.rehash();
246: } while (rowKey != hash.key());
247: return new Object[] { new Integer(rowNumber), hkrow };
248: }
249:
250: private class Hash {
251: int key;
252: int hash;
253: int depth;
254:
255: public Hash(int key) {
256: this .key = key;
257: this .hash = key;
258: this .depth = offset + 1;
259: }
260:
261: public int key() {
262: return key;
263: }
264:
265: private int hash() {
266: return hash & (power2(depth) - 1); // apply mask
267: }
268:
269: public int depth() {
270: return depth;
271: }
272:
273: public void rehash() {
274: depth++;
275: if (depth > maxk) {
276: depth = offset + 1;
277: hash = (int) ((5 * (long) hash - 7) / 3 + 13);
278: }
279: }
280:
281: public int node() {
282: // node(h,k,o) = 2**k - 2**(o + 1) + h|k
283: return power2(depth) - power2(offset + 1) + hash();
284: }
285: }
286: }
|