001: /*=============================================================================
002: * Copyright Texas Instruments 2003. All Rights Reserved.
003: *
004: * This program is free software; you can redistribute it and/or
005: * modify it under the terms of the GNU Lesser General Public
006: * License as published by the Free Software Foundation; either
007: * version 2 of the License, or (at your option) any later version.
008: *
009: * This program is distributed in the hope that it will be useful,
010: * but WITHOUT ANY WARRANTY; without even the implied warranty of
011: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012: * Lesser General Public License for more details.
013: *
014: * You should have received a copy of the GNU Lesser General Public
015: * License along with this library; if not, write to the Free Software
016: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
017: *
018: * $ProjectHeader: OSCRIPT 0.155 Fri, 20 Dec 2002 18:34:22 -0800 rclark $
019: */
020:
021: package oscript.util;
022:
023: import oscript.exceptions.*;
024:
025: /**
026: * A symbol table implementation based on a open hash map, using double
027: * hashing as a strategy to avoid collisions. Double hasing uses two hash
028: * functions to compute an index into the table:
029: * <pre>
030: * Idx(i,k) := H1(k) + i * H2(k)
031: * </pre>
032: * where for a given key <code>k</code>, <code>i</code> is incremented from
033: * <code>0</code> until an unused table slot is found.
034: * <p>
035: * Threading note: this class is not synchronized, but is designed to
036: * save to read from multiple threads, while write from a single thread
037: * context (at a time).
038: *
039: * @author Rob Clark (rob@ti.com)
040: * @version 1.0
041: */
042: public class OpenHashSymbolTable implements SymbolTable,
043: java.io.Externalizable {
044: /**
045: * The loading factor, the capacity must always be size * load
046: */
047: private/*final*/float load;
048:
049: /**
050: * The state is maintained in an inner class, so it can be automically
051: * updated when we have to resize the the tables.
052: */
053: private State state;
054:
055: private class State {
056: /**
057: * The number of mappings that exist in the table.
058: */
059: public int size;
060:
061: /**
062: * Once the <code>size</code> exceeds the threshold, it is time to
063: * grow the table and re-hash.
064: */
065: public final int threshold;
066:
067: /**
068: * A key value of <code>0</code> in the <code>keys</code> indicates an
069: * unused slot.
070: */
071: public final int[] keys;
072: public final int[] vals;
073:
074: /**
075: * State constructor
076: */
077: State(int capacity) {
078: capacity = checkCapacity(capacity);
079: threshold = (int) (load * (float) capacity);
080: size = 0;
081: keys = new int[capacity];
082: vals = new int[capacity];
083: }
084: }
085:
086: /**
087: * Class Constructor.
088: */
089: public OpenHashSymbolTable() {
090: this (10, 0.75f);
091: }
092:
093: /**
094: * Class Constructor.
095: *
096: * @param capacity the initial capacity
097: * @param load the loading factor
098: */
099: public OpenHashSymbolTable(int capacity, float load) {
100: this .load = load;
101:
102: if (capacity < 2)
103: capacity = 2;
104:
105: state = new State(capacity);
106:
107: if (COLLECT_STATS)
108: logIncreaseCapacity(state.keys.length);
109: }
110:
111: /**
112: */
113: public float getLoad() {
114: return load;
115: }
116:
117: /**
118: * Get the index that the specified symbol maps to.
119: *
120: * @param id the id of the symbol to get a mapping for
121: * @return an index, or <code>-1</code> if no mapping exists for the
122: * specified symbol
123: */
124: public final int get(int id) {
125: State state = this .state;
126:
127: if (COLLECT_STATS)
128: logGet();
129:
130: if (oscript.data.Value.DEBUG)
131: if (id < MIN_SYMBOL_ID)
132: throw new ProgrammingErrorException("bad id: " + id);
133:
134: int n = h1(id, state.keys.length);
135: int k = 0;
136:
137: if (COLLECT_STATS)
138: logLookup();
139:
140: for (int i = 0; i < state.keys.length; i++) {
141: if (COLLECT_STATS)
142: logProbe();
143:
144: if (state.keys[n] == 0)
145: return -1;
146:
147: if (state.keys[n] == id)
148: return state.vals[n];
149:
150: if (k == 0)
151: k = h2(id, state.keys.length);
152:
153: n = Math.abs(n + k) % state.keys.length;
154: }
155:
156: return -1;
157: }
158:
159: /**
160: * Get the index that the specified symbol maps to, and create a new one
161: * if a mapping does not already exist. If a new mapping is created,
162: * it's value is the next successive array index, ie. the the previous
163: * array index plus one. The first mapping created has the value zero.
164: *
165: * @param id the id of the symbol to get a mapping for
166: * @return an index
167: */
168: public int create(int id) {
169: State state = this .state;
170:
171: if (COLLECT_STATS)
172: logCreate();
173:
174: if (oscript.data.Value.DEBUG)
175: if (id < MIN_SYMBOL_ID)
176: throw new ProgrammingErrorException("bad id: " + id);
177:
178: // first ensure capacity... assume we actually will be creating
179: // a new entry, because that is the common case:
180: if ((state.size + 1) >= state.threshold) {
181: int[] oldkeys = state.keys;
182: int[] oldvals = state.vals;
183:
184: // reset to new bigger size:
185: State newState = new State(2 * oldkeys.length);
186:
187: if (COLLECT_STATS)
188: logIncreaseCapacity(state.keys.length - oldkeys.length);
189:
190: for (int n = 0; n < oldkeys.length; n++)
191: if (oldkeys[n] != 0)
192: putIfNotIn(newState, oldkeys[n], oldvals[n]);
193:
194: // automically update state:
195: this .state = state = newState;
196: }
197:
198: return putIfNotIn(state, id, state.size);
199: }
200:
201: private static final int putIfNotIn(State state, int id, int val) {
202: int n = h1(id, state.keys.length);
203: int k = 0;
204:
205: if (COLLECT_STATS)
206: logPutImpl();
207: if (COLLECT_STATS)
208: logLookup();
209:
210: for (int i = 0; i < state.keys.length; i++) {
211: if (COLLECT_STATS)
212: logProbe();
213:
214: if (state.keys[n] == 0) {
215: // order is important here, because in the common case this will
216: // not be synchronized:
217: state.vals[n] = val;
218: state.keys[n] = id;
219: if (COLLECT_STATS)
220: logAdd();
221: state.size++;
222: return state.vals[n];
223: }
224:
225: if (state.keys[n] == id)
226: return state.vals[n];
227:
228: if (k == 0)
229: k = h2(id, state.keys.length);
230:
231: n = (n + k) % state.keys.length;
232: }
233:
234: throw new ProgrammingErrorException(
235: "shouldn't get here if load factor is less than one!!");
236: }
237:
238: /**
239: * Some statistics gathering code... all calls to the stat gathering methods
240: * should be wrapped with an if(COLLECT_STATS) so it gets compiled out for
241: * a regular build.
242: */
243:
244: private static final boolean COLLECT_STATS = false;
245:
246: private static final synchronized void logGet() {
247: numGets++;
248: }
249:
250: private static final synchronized void logCreate() {
251: numCreates++;
252: }
253:
254: private static final synchronized void logPutImpl() {
255: numPutImpl++;
256: } // including re-hashing
257:
258: private static final synchronized void logLookup() {
259: numLookups++;
260: }
261:
262: private static final synchronized void logProbe() {
263: numProbes++;
264: }
265:
266: private static final synchronized void logAdd() {
267: totalSize++;
268: }
269:
270: private static final synchronized void logIncreaseCapacity(
271: int amount) {
272: totalCapacity += amount;
273: }
274:
275: private static int numGets = 0;
276: private static int numCreates = 0;
277: private static int numPutImpl = 0;
278: private static int numLookups = 0;
279: private static int numProbes = 0;
280: private static int totalSize = 0;
281: private static int totalCapacity = 0;
282:
283: public static final synchronized String getStats() {
284: return ("OpenHashSymbolTable stats\n"
285: + "------------------- -----\n"
286: + " numGets: "
287: + numGets
288: + "\n"
289: + " numCreates: "
290: + numCreates
291: + "\n"
292: + " numPutImpl: "
293: + numPutImpl
294: + " ("
295: + (numPutImpl - numCreates)
296: + " due to rehash)\n"
297: + " numLookups: "
298: + numLookups
299: + "\n"
300: + " numProbes: "
301: + numProbes
302: + "\n"
303: + " avgProbesPerLookup: "
304: + (((float) numProbes) / ((float) numLookups))
305: + "\n"
306: + " totalSize: "
307: + totalSize
308: + "\n"
309: + " totalCapacity: " + totalCapacity + "\n");
310: }
311:
312: /**
313: * The number of mappings that exist in this table.
314: *
315: * @return the number of mappings in the table
316: */
317: public int size() {
318: return state.size;
319: }
320:
321: /**
322: * Return an iteration of the keys (symbols) into this table. To conform to
323: * the {@link java.util.Iterator} interface, each symbol is wrapped (boxed)
324: * in a {@link Integer}.
325: *
326: * @return an iteration of symbols that are keys into this table
327: */
328: public synchronized java.util.Iterator symbols() {
329: return new java.util.Iterator() {
330:
331: private int idx = -1;
332: private int[] keys = OpenHashSymbolTable.this .state.keys;
333: private Object next = null;
334:
335: public boolean hasNext() {
336: refresh();
337: return next != null;
338: }
339:
340: public Object next() {
341: if (!hasNext())
342: throw new java.util.NoSuchElementException(
343: "no more elements");
344:
345: refresh();
346:
347: Object r = next;
348: next = null;
349: return r;
350: }
351:
352: private void refresh() {
353: if (next == null) {
354: while (++idx < keys.length) {
355: if (keys[idx] != 0) {
356: next = new Integer(keys[idx]);
357: return;
358: }
359: }
360: }
361: }
362:
363: public void remove() {
364: throw new UnsupportedOperationException("remove");
365: }
366: };
367: }
368:
369: /*
370: * The hash functions... for now I'm using endianess swap as the primary
371: * hash function (which should deal well with symbol ids from 0 to n...
372: * ideal would probably be a bitwise reverse, but I think this is good
373: * enough
374: *
375: * These hashing functions rely on the table size being a prime number.
376: * Using an alternate version of h2() could result in this restriction
377: * being lifted, but I am not sure which approach performs better, so
378: * some benchmarking is probably in order...
379: */
380: private static final int h1(int k, int capacity) {
381: int v = (((k & 0xff000000) >> 24) | ((k & 0x00ff0000) >> 8)
382: | ((k & 0x0000ff00) << 8) | ((k & 0x000000ff) << 24));
383:
384: v %= capacity;
385:
386: if (v < 0)
387: v = -v;
388:
389: return v;
390: }
391:
392: private static final int h2(int k, int capacity) {
393: return 1 + (k % (capacity - 2));
394: }
395:
396: private final static int checkCapacity(int sz) {
397: int idx = java.util.Arrays.binarySearch(Primes.PRIMES, sz);
398: if (idx < 0)
399: idx = -idx - 1;
400:
401: // XXX choose closest prime number, rather than next largest:
402: if ((idx > 0)
403: && ((Primes.PRIMES[idx] - sz) > (sz - Primes.PRIMES[idx - 1])))
404: idx--;
405: // XXX
406:
407: return Primes.PRIMES[idx];
408: }
409:
410: /*=======================================================================*/
411: public void readExternal(java.io.ObjectInput in)
412: throws java.io.IOException {
413: load = in.readFloat();
414:
415: int capacity = in.readInt();
416:
417: State state = new State(capacity);
418: for (int i = 0; i < capacity; i++) {
419: state.keys[i] = in.readInt();
420: state.vals[i] = in.readInt();
421: }
422:
423: state.size = in.readInt();
424:
425: this .state = state;
426: }
427:
428: public void writeExternal(java.io.ObjectOutput out)
429: throws java.io.IOException {
430: out.writeFloat(load);
431:
432: State state = this .state;
433: int capacity = state.keys.length;
434:
435: out.writeInt(capacity);
436:
437: for (int i = 0; i < capacity; i++) {
438: out.writeInt(state.keys[i]);
439: out.writeInt(state.vals[i]);
440: }
441:
442: out.writeInt(state.size);
443: }
444: /*=======================================================================*/
445:
446: }
447:
448: /*
449: * Local Variables:
450: * tab-width: 2
451: * indent-tabs-mode: nil
452: * mode: java
453: * c-indentation-style: java
454: * c-basic-offset: 2
455: * eval: (c-set-offset 'substatement-open '0)
456: * eval: (c-set-offset 'case-label '+)
457: * eval: (c-set-offset 'inclass '+)
458: * eval: (c-set-offset 'inline-open '0)
459: * End:
460: */
|