001: /* SoftHashMap
002: *
003: * $Id: SoftSettingsHash.java 4665 2006-09-26 00:20:33Z paul_jack $
004: *
005: * Created on Mar 18, 2004
006: *
007: * Copyright (C) 2004 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.crawler.settings;
026:
027: import java.lang.ref.Reference;
028: import java.lang.ref.ReferenceQueue;
029: import java.lang.ref.SoftReference;
030: import java.util.ConcurrentModificationException;
031: import java.util.Iterator;
032: import java.util.NoSuchElementException;
033:
034: public class SoftSettingsHash {
035:
036: /**
037: * The maximum capacity, used if a higher value is implicitly specified
038: * by either of the constructors with arguments.
039: * MUST be a power of two <= 1<<30.
040: */
041: private static final int MAXIMUM_CAPACITY = 1 << 30;
042:
043: /**
044: * The load fast used.
045: */
046: private static final float LOAD_FACTOR = 0.75f;
047:
048: /**
049: * The table, resized as necessary. Length MUST Always be a power of two.
050: */
051: private SettingsEntry[] table;
052:
053: /**
054: * The number of key-value mappings contained in this hash.
055: */
056: private int size;
057:
058: /**
059: * The next size value at which to resize (capacity * load factor).
060: */
061: private int threshold;
062:
063: /**
064: * Reference queue for cleared entries
065: */
066: private final ReferenceQueue<? super String> queue = new ReferenceQueue<String>();
067:
068: /**
069: * The number of times this HashMap has been structurally modified
070: * Structural modifications are those that change the number of mappings in
071: * the HashMap or otherwise modify its internal structure (e.g.,
072: * rehash). This field is used to make iterators on Collection-views of
073: * the HashMap fail-fast. (See ConcurrentModificationException).
074: */
075: private volatile int modCount;
076:
077: /**
078: * Constructs a new, empty <tt>SoftSettingsHash</tt> with the given initial
079: * capacity.
080: *
081: * @param initialCapacity The initial capacity of the
082: * <tt>SoftSettingsHash</tt>
083: * @throws IllegalArgumentException If the initial capacity is negative.
084: */
085: public SoftSettingsHash(int initialCapacity) {
086: if (initialCapacity < 0)
087: throw new IllegalArgumentException(
088: "Illegal Initial Capacity: " + initialCapacity);
089: if (initialCapacity > MAXIMUM_CAPACITY)
090: initialCapacity = MAXIMUM_CAPACITY;
091:
092: int capacity = 1;
093: while (capacity < initialCapacity)
094: capacity <<= 1;
095: table = new SettingsEntry[capacity];
096: threshold = (int) (capacity * LOAD_FACTOR);
097: }
098:
099: /**
100: * Check for equality of non-null reference x and possibly-null y. By
101: * default uses Object.equals.
102: */
103: static boolean eq(Object x, Object y) {
104: return x == y || x.equals(y);
105: }
106:
107: /**
108: * Return index for hash code h.
109: */
110: static int indexFor(int h, int length) {
111: return h & (length - 1);
112: }
113:
114: /**
115: * Expunge stale entries from the table.
116: */
117: private void expungeStaleEntries() {
118: SettingsEntry entry;
119: Reference ref;
120: while ((ref = queue.poll()) != null) {
121: entry = (SettingsEntry) ref;
122: int h = entry.hash;
123: int i = indexFor(h, table.length);
124:
125: SettingsEntry prev = table[i];
126: SettingsEntry p = prev;
127: while (p != null) {
128: SettingsEntry next = p.next;
129: if (p == entry) {
130: if (prev == entry)
131: table[i] = next;
132: else
133: prev.next = next;
134: entry.next = null; // Help GC
135: entry.settings = null; // " "
136: size--;
137: break;
138: }
139: prev = p;
140: p = next;
141: }
142: }
143: }
144:
145: /**
146: * Returns the number of key-value mappings in this map.
147: * This result is a snapshot, and may not reflect unprocessed
148: * entries that will be removed before next attempted access
149: * because they are no longer referenced.
150: */
151: public int size() {
152: if (size == 0)
153: return 0;
154: expungeStaleEntries();
155: return size;
156: }
157:
158: /**
159: * Returns the value to which the specified key is mapped in this weak
160: * hash map, or <tt>null</tt> if the map contains no mapping for
161: * this key. Null is also returned if the element has been GC'ed.
162: *
163: * @param key the key whose associated settings object is to be returned.
164: * @return the settings object represented by the key, or
165: * <tt>null</tt> if the map contains no mapping for this key.
166: * @see #put(String, CrawlerSettings)
167: */
168: public CrawlerSettings get(String key) {
169: if (key == null) {
170: throw new NullPointerException("Null key");
171: }
172: int hash = hash(key);
173: expungeStaleEntries();
174: int index = indexFor(hash, table.length);
175: SettingsEntry e = table[index];
176: while (e != null) {
177: if (e.hash == hash && eq(key, e.get()))
178: return e.settings;
179: e = e.next;
180: }
181: return null;
182: }
183:
184: /**
185: * Associates the specified settings object with the specified key in this
186: * hash.
187: *
188: * If the hash previously contained a settings object for this key, the old
189: * object is replaced.
190: *
191: * @param key key with which the specified settings object is to be
192: * associated.
193: * @param settings settings object to be associated with the specified key.
194: * @return previous value associated with specified key, or <tt>null</tt>
195: * if there was no mapping for key.
196: */
197: public CrawlerSettings put(String key, CrawlerSettings settings) {
198: if (settings == null) {
199: throw new NullPointerException("Settings object was null");
200: }
201: if (key == null) {
202: throw new NullPointerException("Null key");
203: }
204: int hash = hash(key);
205: expungeStaleEntries();
206: int i = indexFor(hash, table.length);
207:
208: for (SettingsEntry entry = table[i]; entry != null; entry = entry.next) {
209: if (hash == entry.hash && eq(key, entry.get())) {
210: CrawlerSettings oldValue = entry.settings;
211: if (settings != oldValue)
212: entry.settings = settings;
213: return oldValue;
214: }
215: }
216:
217: modCount++;
218: table[i] = new SettingsEntry(key, settings, queue, hash,
219: table[i]);
220: if (++size >= threshold)
221: resize(table.length * 2);
222: return null;
223: }
224:
225: public CrawlerSettings put(SettingsEntry entry) {
226: return put(entry.getKey(), entry.getValue());
227: }
228:
229: /**
230: * Rehashes the contents of this hash into a new <tt>HashMap</tt> instance
231: * with a larger capacity. This method is called automatically when the
232: * number of keys in this map exceeds its capacity and load factor.
233: *
234: * Note that this method is a no-op if it's called with newCapacity ==
235: * 2*MAXIMUM_CAPACITY (which is Integer.MIN_VALUE).
236: *
237: * @param newCapacity the new capacity, MUST be a power of two.
238: */
239: void resize(int newCapacity) {
240: expungeStaleEntries();
241: SettingsEntry[] oldTable = table;
242: int oldCapacity = oldTable.length;
243:
244: // check if needed
245: if (size < threshold || oldCapacity > newCapacity)
246: return;
247:
248: SettingsEntry[] newTable = new SettingsEntry[newCapacity];
249:
250: transfer(oldTable, newTable);
251: table = newTable;
252:
253: /*
254: * If ignoring null elements and processing ref queue caused massive
255: * shrinkage, then restore old table. This should be rare, but avoids
256: * unbounded expansion of garbage-filled tables.
257: */
258: if (size >= threshold / 2) {
259: threshold = (int) (newCapacity * LOAD_FACTOR);
260: } else {
261: expungeStaleEntries();
262: transfer(newTable, oldTable);
263: table = oldTable;
264: }
265: }
266:
267: /** Transfer all entries from src to dest tables */
268: private void transfer(SettingsEntry[] src, SettingsEntry[] dest) {
269: for (int j = 0; j < src.length; ++j) {
270: SettingsEntry entry = src[j];
271: src[j] = null;
272: while (entry != null) {
273: SettingsEntry next = entry.next;
274: Object key = entry.get();
275: if (key == null) {
276: entry.next = null; // Help GC
277: entry.settings = null; // " "
278: size--;
279: } else {
280: int i = indexFor(entry.hash, dest.length);
281: entry.next = dest[i];
282: dest[i] = entry;
283: }
284: entry = next;
285: }
286: }
287: }
288:
289: /**
290: * Removes the settings object identified by the key from this hash if
291: * present.
292: *
293: * @param key key whose element is to be removed from the hash.
294: * @return previous value associated with specified key, or <tt>null</tt>
295: * if there was no mapping for key.
296: */
297: public Object remove(String key) {
298: if (key == null) {
299: throw new NullPointerException("Null key");
300: }
301: int hash = hash(key);
302: expungeStaleEntries();
303: int i = indexFor(hash, table.length);
304: SettingsEntry prev = table[i];
305: SettingsEntry entry = prev;
306:
307: while (entry != null) {
308: SettingsEntry next = entry.next;
309: if (hash == entry.hash && eq(key, entry.get())) {
310: modCount++;
311: size--;
312: if (prev == entry)
313: table[i] = next;
314: else
315: prev.next = next;
316: return entry.settings;
317: }
318: prev = entry;
319: entry = next;
320: }
321:
322: return null;
323: }
324:
325: /**
326: * Removes all settings object from this hash.
327: */
328: public void clear() {
329: // clear out ref queue. We don't need to expunge entries
330: // since table is getting cleared.
331: while (queue.poll() != null)
332: ;
333:
334: modCount++;
335: SettingsEntry tab[] = table;
336: for (int i = 0; i < tab.length; ++i)
337: tab[i] = null;
338: size = 0;
339:
340: // Allocation of array may have caused GC, which may have caused
341: // additional entries to go stale. Removing these entries from the
342: // reference queue will make them eligible for reclamation.
343: while (queue.poll() != null)
344: ;
345: }
346:
347: /**
348: * The entries in this hash extend SoftReference, using the host string
349: * as the key.
350: */
351: static class SettingsEntry extends SoftReference<String> {
352: private CrawlerSettings settings;
353: private final int hash;
354: private SettingsEntry next;
355:
356: /**
357: * Create new entry.
358: */
359: SettingsEntry(String key, CrawlerSettings settings,
360: ReferenceQueue<? super String> queue, int hash,
361: SettingsEntry next) {
362: super (key, queue);
363: this .settings = settings;
364: this .hash = hash;
365: this .next = next;
366: }
367:
368: public String getKey() {
369: return (String) this .get();
370: }
371:
372: public CrawlerSettings getValue() {
373: return settings;
374: }
375:
376: public boolean equals(Object o) {
377: if (!(o instanceof SettingsEntry))
378: return false;
379: SettingsEntry e = (SettingsEntry) o;
380: String key1 = getKey();
381: String key2 = e.getKey();
382: if (key1 == key2 || (key1 != null && key1.equals(key2))) {
383: CrawlerSettings setting1 = getValue();
384: CrawlerSettings setting2 = e.getValue();
385: if (setting1 == setting2
386: || (setting1 != null && setting1
387: .equals(setting2)))
388: return true;
389: }
390: return false;
391: }
392: }
393:
394: /** Iterator over all elements in hash.
395: */
396: class EntryIterator implements Iterator {
397: int index;
398: SettingsEntry entry = null;
399: SettingsEntry lastReturned = null;
400: int expectedModCount = modCount;
401:
402: /**
403: * Strong reference needed to avoid disappearance of key
404: * between hasNext and next
405: */
406: String nextKey = null;
407:
408: /**
409: * Strong reference needed to avoid disappearance of key
410: * between nextEntry() and any use of the entry
411: */
412: String currentKey = null;
413:
414: EntryIterator() {
415: index = (size() != 0 ? table.length : 0);
416: }
417:
418: public boolean hasNext() {
419: SettingsEntry[] t = table;
420:
421: while (nextKey == null) {
422: SettingsEntry e = entry;
423: int i = index;
424: while (e == null && i > 0)
425: e = t[--i];
426: entry = e;
427: index = i;
428: if (e == null) {
429: currentKey = null;
430: return false;
431: }
432: nextKey = (String) e.get(); // hold on to key in strong ref
433: if (nextKey == null)
434: entry = entry.next;
435: }
436: return true;
437: }
438:
439: /** The common parts of next() across different types of iterators */
440: public Object next() {
441: return nextEntry();
442: }
443:
444: public SettingsEntry nextEntry() {
445: if (modCount != expectedModCount)
446: throw new ConcurrentModificationException();
447: if (nextKey == null && !hasNext())
448: throw new NoSuchElementException();
449:
450: lastReturned = entry;
451: entry = entry.next;
452: currentKey = nextKey;
453: nextKey = null;
454: return lastReturned;
455: }
456:
457: public void remove() {
458: if (lastReturned == null)
459: throw new IllegalStateException();
460: if (modCount != expectedModCount)
461: throw new ConcurrentModificationException();
462:
463: SoftSettingsHash.this .remove(currentKey);
464: expectedModCount = modCount;
465: lastReturned = null;
466: currentKey = null;
467: }
468:
469: }
470:
471: /** Make hash value from a String.
472: *
473: * @param key the string for which to create hash value.
474: * @return the hash value.
475: */
476: static int hash(String key) {
477: int hash = key.hashCode();
478:
479: hash += ~(hash << 9);
480: hash ^= (hash >>> 14);
481: hash += (hash << 4);
482: hash ^= (hash >>> 10);
483: return hash;
484: }
485:
486: public EntryIterator iterator() {
487: return new EntryIterator();
488: }
489:
490: }
|