001: /*
002: (c) Copyright 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
003: All rights reserved - see end of file.
004: $Id: HashCommon.java,v 1.15 2008/02/01 11:29:37 chris-dollin Exp $
005: */
006:
007: package com.hp.hpl.jena.mem;
008:
009: import java.util.*;
010:
011: import com.hp.hpl.jena.shared.BrokenException;
012: import com.hp.hpl.jena.util.iterator.*;
013:
014: /**
015: Shared stuff for our hashing implementations: does the base work for
016: hashing and growth sizes.
017: @author kers
018: */
019: public abstract class HashCommon {
020: /**
021: Jeremy suggests, from his experiments, that load factors more than
022: 0.6 leave the table too dense, and little advantage is gained below 0.4.
023: Although that was with a quadratic probe, I'm borrowing the same
024: plausible range, and use 0.5 by default.
025: */
026: protected static final double loadFactor = 0.5;
027:
028: /**
029: The keys of whatever table it is we're implementing. Since we share code
030: for triple sets and for node->bunch maps, it has to be an Object array; we
031: take the casting hit.
032: */
033: protected Object[] keys;
034:
035: /**
036: The capacity (length) of the key array.
037: */
038: public int capacity;
039:
040: /**
041: The threshold number of elements above which we resize the table;
042: equal to the capacity times the load factor.
043: */
044: protected int threshold;
045:
046: /**
047: The number of active elements in the table, maintained incrementally.
048: */
049: protected int size = 0;
050:
051: /**
052: A count of the number of changes applied to this Hash object, used for
053: detecting concurrent modifications.
054: */
055: protected int changes;
056:
057: /**
058: Initialise this hashed thingy to have <code>initialCapacity</code> as its
059: capacity and the corresponding threshold. All the key elements start out
060: null.
061: */
062: protected HashCommon(int initialCapacity) {
063: keys = new Object[capacity = initialCapacity];
064: threshold = (int) (capacity * loadFactor);
065: }
066:
067: /**
068: A hashed structure may become empty as a side-effect of a .remove on one
069: of its iterators: a container can request notification of this by passing
070: a <code>NotifyEmpty</code> object in when the iterator is constructed,
071: and its <code>emptied</code> method is called when the bunch
072: becomes empty.
073: @author kers
074: */
075: public static interface NotifyEmpty {
076: /**
077: A NotifyEmpty instance that ignores the notification.
078: */
079: public static NotifyEmpty ignore = new NotifyEmpty() {
080: public void emptied() {
081: }
082: };
083:
084: /**
085: Method to call to notify that the collection has become empty.
086: */
087: public void emptied();
088: }
089:
090: /**
091: When removeFrom [or remove] removes a key, it calls this method to
092: remove any associated values, passing in the index of the key's slot.
093: Subclasses override if they have any associated values.
094: */
095: protected void removeAssociatedValues(int here) {
096: }
097:
098: /**
099: When removeFrom [or remove] moves a key, it calls this method to move
100: any associated values, passing in the index of the slot <code>here</code>
101: to move to and the index of the slot <code>scan</code> to move from.
102: Subclasses override if they have any associated values.
103: */
104: protected void moveAssociatedValues(int here, int scan) {
105: }
106:
107: /**
108: Answer the item at index <code>i</code> of <code>keys</code>. This
109: method is for testing purposes <i>only</i>.
110: */
111: public Object getItemForTestingAt(int i) {
112: return keys[i];
113: }
114:
115: /**
116: Answer the initial index for the object <code>key</code> in the table.
117: With luck, this will be the final position for that object. The initial index
118: will always be non-negative and less than <code>capacity</code>.
119: <p>
120: Implementation note: do <i>not</i> use <code>Math.abs</code> to turn a
121: hashcode into a positive value; there is a single specific integer on which
122: it does not work. (Hence, here, the use of bitmasks.)
123: */
124: protected final int initialIndexFor(Object key) {
125: return (improveHashCode(key.hashCode()) & 0x7fffffff)
126: % capacity;
127: }
128:
129: /**
130: Answer the transformed hash code, intended to be an improvement
131: on the objects own hashcode. The magic number 127 is performance
132: voodoo to (try to) eliminate problems experienced by Wolfgang.
133: */
134: protected int improveHashCode(int hashCode) {
135: return hashCode * 127;
136: }
137:
138: /**
139: Search for the slot in which <code>key</code> is found. If it is absent,
140: return the index of the free slot in which it could be placed. If it is present,
141: return the bitwise complement of the index of the slot it appears in. Hence
142: negative values imply present, positive absent, and there's no confusion
143: around 0.
144: */
145: protected final int findSlot(Object key) {
146: int index = initialIndexFor(key);
147: while (true) {
148: Object current = keys[index];
149: if (current == null)
150: return index;
151: if (key.equals(current))
152: return ~index;
153: if (--index < 0)
154: index += capacity;
155: }
156: }
157:
158: /**
159: Remove the object <code>key</code> from this hash's keys if it
160: is present (if it's absent, do nothing). If a key is removed, the
161: <code>removeAssociatedValues</code> will be removed. If a key
162: is moved, the <code>moveAssociatedValues</code> method will
163: be called.
164: */
165: public void remove(Object key) {
166: int slot = findSlot(key);
167: if (slot < 0)
168: removeFrom(~slot);
169: }
170:
171: /**
172: Work out the capacity and threshold sizes for a new improved bigger
173: table (bigger by a factor of two, at present).
174: */
175: protected void growCapacityAndThreshold() {
176: capacity = nextSize(capacity * 2);
177: threshold = (int) (capacity * loadFactor);
178: }
179:
180: static final int[] primes = { 7, 19, 37, 79, 149, 307, 617, 1237,
181: 2477, 4957, 9923, 19853, 39709, 79423, 158849, 317701,
182: 635413, 1270849, 2541701, 5083423 };
183:
184: protected static int nextSize(int atLeast) {
185: for (int i = 0; i < primes.length; i += 1)
186: if (primes[i] > atLeast)
187: return primes[i];
188: return atLeast;
189: }
190:
191: /**
192: Remove the triple at element <code>i</code> of <code>contents</code>.
193: This is an implementation of Knuth's Algorithm R from tAoCP vol3, p 527,
194: with exchanging of the roles of i and j so that they can be usefully renamed
195: to <i>here</i> and <i>scan</i>.
196: <p>
197: It relies on linear probing but doesn't require a distinguished REMOVED
198: value. Since we resize the table when it gets fullish, we don't worry [much]
199: about the overhead of the linear probing.
200: <p>
201: Iterators running over the keys may miss elements that are moved from the
202: top of the table to the bottom because of Iterator::remove. removeFrom
203: returns such a moved key as its result, and null otherwise.
204: */
205: protected Object removeFrom(int here) {
206: final int original = here;
207: Object wrappedAround = null;
208: size -= 1;
209: while (true) {
210: keys[here] = null;
211: removeAssociatedValues(here);
212: int scan = here;
213: while (true) {
214: if (--scan < 0)
215: scan += capacity;
216: Object key = keys[scan];
217: if (key == null)
218: return wrappedAround;
219: int r = initialIndexFor(key);
220: if (scan <= r && r < here || r < here && here < scan
221: || here < scan && scan <= r) { /* Nothing. We'd have preferred an `unless` statement. */
222: } else {
223: // System.err.println( ">> move from " + scan + " to " + here + " [original = " + original + ", r = " + r + "]" );
224: if (here <= original && scan > original) {
225: // System.err.println( "]] recording wrapped " );
226: wrappedAround = keys[scan];
227: }
228: keys[here] = keys[scan];
229: moveAssociatedValues(here, scan);
230: here = scan;
231: break;
232: }
233: }
234: }
235: }
236:
237: void showkeys() {
238: if (false) {
239: System.err.print(">> KEYS:");
240: for (int i = 0; i < capacity; i += 1)
241: if (keys[i] != null)
242: System.err.print(" " + initialIndexFor(keys[i])
243: + "@" + i + "::" + keys[i]);
244: System.err.println();
245: }
246: }
247:
248: public ExtendedIterator keyIterator() {
249: return keyIterator(NotifyEmpty.ignore);
250: }
251:
252: public ExtendedIterator keyIterator(final NotifyEmpty container) {
253: showkeys();
254: final List movedKeys = new ArrayList();
255: ExtendedIterator basic = new BasicKeyIterator(changes,
256: container, movedKeys);
257: ExtendedIterator leftovers = new MovedKeysIterator(changes,
258: container, movedKeys);
259: return basic.andThen(leftovers);
260: }
261:
262: /**
263: The MovedKeysIterator iterates over the elements of the <code>keys</code>
264: list. It's not sufficient to just use List::iterator, because the .remove
265: method must remove elements from the hash table itself.
266: <p>
267: Note that the list supplied on construction will be empty: it is filled before
268: the first call to <code>hasNext()</code>.
269: */
270: protected final class MovedKeysIterator extends NiceIterator {
271: private final List keys;
272:
273: protected int index = 0;
274: final int initialChanges;
275: final NotifyEmpty container;
276:
277: protected MovedKeysIterator(int initialChanges,
278: NotifyEmpty container, List keys) {
279: this .keys = keys;
280: this .initialChanges = initialChanges;
281: this .container = container;
282: }
283:
284: public boolean hasNext() {
285: if (changes > initialChanges)
286: throw new ConcurrentModificationException();
287: return index < keys.size();
288: }
289:
290: public Object next() {
291: if (changes > initialChanges)
292: throw new ConcurrentModificationException();
293: if (hasNext() == false)
294: noElements("");
295: return keys.get(index++);
296: }
297:
298: public void remove() {
299: if (changes > initialChanges)
300: throw new ConcurrentModificationException();
301: HashCommon.this .remove(keys.get(index - 1));
302: if (size == 0)
303: container.emptied();
304: }
305: }
306:
307: /**
308: The BasicKeyIterator iterates over the <code>keys</code> array.
309: If a .remove call moves an unprocessed key underneath the iterator's
310: index, that key value is added to the <code>movedKeys</code>
311: list supplied to the constructor.
312: */
313: protected final class BasicKeyIterator extends NiceIterator {
314: protected final List movedKeys;
315:
316: int index = 0;
317: final int initialChanges;
318: final NotifyEmpty container;
319:
320: protected BasicKeyIterator(int initialChanges,
321: NotifyEmpty container, List movedKeys) {
322: this .movedKeys = movedKeys;
323: this .initialChanges = initialChanges;
324: this .container = container;
325: }
326:
327: public boolean hasNext() {
328: if (changes > initialChanges)
329: throw new ConcurrentModificationException();
330: while (index < capacity && keys[index] == null)
331: index += 1;
332: return index < capacity;
333: }
334:
335: public Object next() {
336: if (changes > initialChanges)
337: throw new ConcurrentModificationException();
338: if (hasNext() == false)
339: noElements("HashCommon keys");
340: return keys[index++];
341: }
342:
343: public void remove() {
344: if (changes > initialChanges)
345: throw new ConcurrentModificationException();
346: // System.err.println( ">> keyIterator::remove, size := " + size +
347: // ", removing " + keys[index + 1] );
348: Object moved = removeFrom(index - 1);
349: if (moved != null)
350: movedKeys.add(moved);
351: if (size == 0)
352: container.emptied();
353: if (size < 0)
354: throw new BrokenException("BROKEN");
355: showkeys();
356: }
357: }
358: }
359:
360: /*
361: * (c) Copyright 2005, 2006, 2007, 2008 Hewlett-Packard Development Company, LP
362: * All rights reserved.
363: *
364: * Redistribution and use in source and binary forms, with or without
365: * modification, are permitted provided that the following conditions
366: * are met:
367: * 1. Redistributions of source code must retain the above copyright
368: * notice, this list of conditions and the following disclaimer.
369: * 2. Redistributions in binary form must reproduce the above copyright
370: * notice, this list of conditions and the following disclaimer in the
371: * documentation and/or other materials provided with the distribution.
372: * 3. The name of the author may not be used to endorse or promote products
373: * derived from this software without specific prior written permission.
374: *
375: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
376: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
377: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
378: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
379: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
380: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
381: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
382: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
383: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
384: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
385: */
|