001: // kelondroMHashMap.java
002: // -----------------------
003: // part of YaCy
004: // (C) by Michael Peter Christen; mc@anomic.de
005: // first published on http://www.anomic.de
006: // Frankfurt, Germany, 2005
007: // Created 08.12.2005
008: //
009: // This program is free software; you can redistribute it and/or modify
010: // it under the terms of the GNU General Public License as published by
011: // the Free Software Foundation; either version 2 of the License, or
012: // (at your option) any later version.
013: //
014: // This program is distributed in the hope that it will be useful,
015: // but WITHOUT ANY WARRANTY; without even the implied warranty of
016: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017: // GNU General Public License for more details.
018: //
019: // You should have received a copy of the GNU General Public License
020: // along with this program; if not, write to the Free Software
021: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022: //
023: // Using this software in any meaning (reading, learning, copying, compiling,
024: // running) means that you agree that the Author(s) is (are) not responsible
025: // for cost, loss of data or any harm that may be caused directly or indirectly
026: // by usage of this softare or this documentation. The usage of this software
027: // is on your own risk. The installation and usage (starting/running) of this
028: // software may allow other people or application to access your computer and
029: // any attached devices and is highly dependent on the configuration of the
030: // software which must be done by the user of the software; the author(s) is
031: // (are) also not responsible for proper configuration and usage of the
032: // software, even if provoked by documentation provided together with
033: // the software.
034: //
035: // Any changes to this file according to the GPL as documented in the file
036: // gpl.txt aside this file in the shipment you received can be done to the
037: // lines that follows this copyright notice here, but changes must not be
038: // done inside the copyright notive above. A re-distribution must contain
039: // the intact and unchanged copyright notice.
040: // Contributions and changes to the program code must be marked as such.
041:
042: package de.anomic.kelondro;
043:
044: import java.util.Iterator;
045:
046: public class kelondroMHashMap {
047:
048: private int keylen, valuelen, reclen, count;
049: private byte[] mem;
050: private byte[] emptykey;
051:
052: public kelondroMHashMap(int valuelen) {
053: // initializes a hash map with integer access key
054: this (4, valuelen);
055: }
056:
057: public kelondroMHashMap(int keylen, int valuelen) {
058: this .keylen = keylen;
059: this .valuelen = valuelen;
060: this .reclen = keylen + valuelen;
061: this .mem = new byte[1 * reclen];
062: this .count = 0;
063: this .emptykey = new byte[keylen];
064: for (int i = 0; i < keylen; i++)
065: emptykey[i] = 0;
066: for (int i = 0; i < (mem.length / reclen); i++)
067: System.arraycopy(emptykey, 0, mem, i * reclen, keylen);
068: }
069:
070: private boolean equals(int posKeyInMem, byte[] otherkey) {
071: assert (otherkey.length == keylen);
072: int pos = posKeyInMem * reclen;
073: int i = 0;
074: while (i < keylen)
075: if (mem[pos++] != otherkey[i++])
076: return false;
077: return true;
078: }
079:
080: private int rehashtry() {
081: return 1 + mem.length / 2;
082: }
083:
084: private int capacity() {
085: return mem.length / reclen;
086: }
087:
088: private static int hashkey(byte[] key, int capacity) {
089: int h = 0;
090: for (int i = 0; i < key.length; i++)
091: h = h * 15 + (0xff & key[i]);
092: //System.out.println("hash code of key " + new String(key) + " is " + h);
093: return h % capacity;
094: }
095:
096: private static int rehash(int previousKey, int capacity) {
097: if (previousKey == 0)
098: previousKey = capacity;
099: return previousKey - 1;
100: }
101:
102: public int size() {
103: return count;
104: }
105:
106: private int findExisting(byte[] key) {
107: // returns an index position if found; -1 otherwise
108: int hash = hashkey(key, capacity());
109: //System.out.println("first guess for key " + new String(key) + ": " + hash + "( capacity is " + capacity() + " )");
110: int testcount = 0;
111: while (testcount++ < rehashtry()) {
112: if (mem[hash * reclen] == 0)
113: return -1;
114: if (equals(hash, key))
115: return hash;
116: hash = rehash(hash, capacity());
117: }
118: return -1;
119: }
120:
121: private int findSpace(byte[] key) {
122: // returns an new index position which is empty
123: // if there is no space left -1 is returned
124: int hash = hashkey(key, capacity());
125: int testcount = 0;
126: while (testcount++ < rehashtry()) {
127: if (mem[hash * reclen] == 0)
128: return hash;
129: hash = rehash(hash, capacity());
130: }
131: return -1;
132: }
133:
134: private static int findSpace(byte[] m, byte[] key, int rl,
135: int trycount, int capacity) {
136: // returns an new index position which is empty
137: // if there is no space left -1 is returned
138: int hash = hashkey(key, capacity);
139: int testcount = 0;
140: while (testcount++ < trycount) {
141: if (m[hash * rl] == 0)
142: return hash;
143: hash = rehash(hash, capacity);
144: }
145: return -1;
146: }
147:
148: private static byte[] toByteKey(int v) {
149: assert (v >= 0);
150: v = v | 0x80000000; // set bit in first byte
151: return new byte[] { (byte) ((v >>> 24) & 0xFF),
152: (byte) ((v >>> 16) & 0xFF), (byte) ((v >>> 8) & 0xFF),
153: (byte) ((v >>> 0) & 0xFF) };
154: }
155:
156: public void put(int key, byte[] value) {
157: put(toByteKey(key), value);
158: }
159:
160: public void put(byte[] key, byte[] value) {
161: // inserts a new value or overwrites existing
162: // if the hash is full, a RuntimeException is thrown
163: // this method does not return the old value to avoid generation of objects
164: assert (key.length == keylen);
165: assert (value.length == valuelen);
166:
167: int hash = findExisting(key);
168: if (hash < 0) {
169: // insert new entry
170: hash = findSpace(key);
171: if (hash < 0) {
172: // increase space of hashtable
173: // create temporary bigger hashtable and insert all
174: synchronized (mem) {
175: System.out.println("increasing space to "
176: + mem.length * 2);
177: int newspace = mem.length * 2;
178: int newcapacity = capacity() * 2;
179: byte[] newmem = new byte[newspace];
180: Iterator<entry> i = entries();
181: kelondroMHashMap.entry e;
182: int mempos;
183: while (i.hasNext()) {
184: e = i.next();
185: hash = findSpace(newmem, e.key, reclen,
186: newspace, newcapacity);
187: mempos = hash * reclen;
188: System.arraycopy(e.key, 0, newmem, mempos,
189: keylen);
190: System.arraycopy(e.value, 0, newmem, mempos
191: + keylen, valuelen);
192: }
193: // finally insert new value
194: hash = findSpace(newmem, key, reclen, newspace,
195: newcapacity);
196: mempos = hash * reclen;
197: System.arraycopy(key, 0, newmem, mempos, keylen);
198: System.arraycopy(value, 0, newmem, mempos + keylen,
199: valuelen);
200: // move newmem to mem
201: mem = newmem;
202: newmem = null;
203: }
204: } else {
205: // there is enough space
206: //System.out.println("put " + new String(key) + " into cell " + hash);
207: int mempos = hash * reclen;
208: System.arraycopy(key, 0, mem, mempos, keylen);
209: System.arraycopy(value, 0, mem, mempos + keylen,
210: valuelen);
211: }
212: count++;
213: } else {
214: // overwrite old entry
215: int mempos = hash * reclen;
216: System.arraycopy(key, 0, mem, mempos, keylen);
217: System.arraycopy(value, 0, mem, mempos + keylen, valuelen);
218: }
219: }
220:
221: public byte[] get(int key) {
222: return get(toByteKey(key));
223: }
224:
225: public byte[] get(byte[] key) {
226: assert (key.length == keylen);
227:
228: int hash = findExisting(key);
229: //System.out.println("get " + new String(key) + " from cell " + hash);
230: if (hash < 0) {
231: return null;
232: }
233: // read old entry
234: byte[] value = new byte[valuelen];
235: System.arraycopy(mem, hash * reclen + keylen, value, 0,
236: valuelen);
237: return value;
238: }
239:
240: public void remove(int key) {
241: remove(toByteKey(key));
242: }
243:
244: public void remove(byte[] key) {
245: assert (key.length == keylen);
246:
247: System.out.println("REMOVE!");
248: int hash = findExisting(key);
249: if (hash >= 0) {
250: // overwrite old key
251: System.arraycopy(emptykey, 0, mem, hash * reclen, keylen);
252: count--;
253: }
254: }
255:
256: Iterator<entry> entries() {
257: return new entryIterator();
258: }
259:
260: public class entryIterator implements Iterator<entry> {
261:
262: int hashkey;
263:
264: public entryIterator() {
265: hashkey = anyhashpos(0);
266: }
267:
268: public boolean hasNext() {
269: return hashkey >= 0;
270: }
271:
272: public entry next() {
273: int i = hashkey;
274: hashkey = anyhashpos(hashkey + 1);
275: return new entry(i);
276: }
277:
278: public void remove() {
279: mem[hashkey * reclen] = 0;
280: }
281:
282: }
283:
284: public void removeany() {
285: //System.out.println("CALLED REMOVEANY");
286: int start = 0;
287: while (start < capacity()) {
288: if (mem[start * reclen] != 0) {
289: System.arraycopy(emptykey, 0, mem, start * reclen,
290: keylen);
291: count--;
292: return;
293: }
294: start++;
295: }
296: return;
297: }
298:
299: protected int anyhashpos(int start) {
300: while (start < capacity()) {
301: if (mem[start * reclen] != 0)
302: return start;
303: start++;
304: }
305: return -1;
306: }
307:
308: public byte[] anykey() {
309: int hash = 0;
310: int mempos;
311: while (hash < capacity()) {
312: mempos = hash * reclen;
313: if (mem[mempos] != 0) {
314: byte[] key = new byte[keylen];
315: System.arraycopy(mem, mempos, key, 0, keylen);
316: return key;
317: }
318: hash++;
319: }
320: return null;
321: }
322:
323: public class entry {
324: public byte[] key, value;
325:
326: public entry(int index) {
327: this .key = new byte[keylen];
328: this .value = new byte[valuelen];
329: int mempos = index * (reclen);
330: System.arraycopy(mem, mempos, key, 0, keylen);
331: System.arraycopy(mem, mempos + keylen, value, 0, valuelen);
332: }
333:
334: public entry(byte[] key, byte[] value) {
335: this .key = key;
336: this .value = value;
337: }
338: }
339:
340: public static void main(String[] args) {
341: long start = System.currentTimeMillis();
342: kelondroMHashMap map = new kelondroMHashMap(4);
343: for (int i = 0; i < 100; i++)
344: map.put(3333 + i, ("" + (1000 + i)).getBytes());
345: Iterator<entry> i = map.entries();
346: kelondroMHashMap.entry e;
347: System.out.println("Enumeration of elements: count="
348: + map.size());
349: int c = 0;
350: while (i.hasNext()) {
351: e = i.next();
352: System.out.println("key=" + new String(e.key) + ", value="
353: + new String(e.value) + ", retrieved="
354: + new String(map.get(e.key)));
355: c++;
356: }
357: System.out.println("c = " + c + "; re-catch:");
358: for (int j = 0; j < 100; j++) {
359: System.out.println("key=" + j + ", retrieved="
360: + new String(map.get(3333 + j)));
361: }
362: System.out.println("runtime = "
363: + (System.currentTimeMillis() - start));
364: }
365: }
|