001: /* MemLongSet
002: *
003: * $Id: MemLongFPSet.java 4644 2006-09-20 22:40:21Z paul_jack $
004: *
005: * Created on Oct 19, 2003
006: *
007: * Copyright (C) 2003 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.util.fingerprint;
026:
027: import java.io.Serializable;
028: import java.util.logging.Logger;
029:
030: import org.archive.util.AbstractLongFPSet;
031:
032: /**
033: * Open-addressing in-memory hash set for holding primitive long fingerprints.
034: *
035: * @author Gordon Mohr
036: */
037: public class MemLongFPSet extends AbstractLongFPSet implements
038: LongFPSet, Serializable {
039:
040: private static final long serialVersionUID = -4301879539092625698L;
041:
042: private static Logger logger = Logger.getLogger(MemLongFPSet.class
043: .getName());
044: private static final int DEFAULT_CAPACITY_POWER_OF_TWO = 10;
045: private static final float DEFAULT_LOAD_FACTOR = 0.75f;
046: protected byte[] slots;
047: protected long[] values;
048:
049: public MemLongFPSet() {
050: this (DEFAULT_CAPACITY_POWER_OF_TWO, DEFAULT_LOAD_FACTOR);
051: }
052:
053: /**
054: * @param capacityPowerOfTwo The capacity as the exponent of a power of 2.
055: * e.g if the capacity is <code>4</code> this means <code>2^^4</code>
056: * entries.
057: * @param loadFactor The load factor as a fraction. This gives the amount
058: * of free space to keep in the Set.
059: */
060: public MemLongFPSet(int capacityPowerOfTwo, float loadFactor) {
061: super (capacityPowerOfTwo, loadFactor);
062: slots = new byte[1 << capacityPowerOfTwo];
063: for (int i = 0; i < (1 << capacityPowerOfTwo); i++) {
064: slots[i] = EMPTY; // flag value for unused
065: }
066: values = new long[1 << capacityPowerOfTwo];
067: }
068:
069: protected void setAt(long i, long val) {
070: slots[(int) i] = 1;
071: values[(int) i] = val;
072: }
073:
074: protected long getAt(long i) {
075: return values[(int) i];
076: }
077:
078: protected void makeSpace() {
079: grow();
080: }
081:
082: private void grow() {
083: // Catastrophic event. Log its occurance.
084: logger.info("Doubling fingerprinting slots to "
085: + (1 << this .capacityPowerOfTwo));
086: long[] oldValues = values;
087: byte[] oldSlots = slots;
088: capacityPowerOfTwo++;
089: values = new long[1 << capacityPowerOfTwo];
090: slots = new byte[1 << capacityPowerOfTwo];
091: for (int i = 0; i < (1 << capacityPowerOfTwo); i++) {
092: slots[i] = EMPTY; // flag value for unused
093: }
094: count = 0;
095: for (int i = 0; i < oldValues.length; i++) {
096: if (oldSlots[i] >= 0) {
097: add(oldValues[i]);
098: }
099: }
100: }
101:
102: protected void relocate(long val, long oldIndex, long newIndex) {
103: values[(int) newIndex] = values[(int) oldIndex];
104: slots[(int) newIndex] = slots[(int) oldIndex];
105: slots[(int) oldIndex] = EMPTY;
106: }
107:
108: protected int getSlotState(long i) {
109: return slots[(int) i];
110: }
111:
112: protected void clearAt(long index) {
113: slots[(int) index] = EMPTY;
114: values[(int) index] = 0;
115: }
116:
117: public boolean quickContains(long fp) {
118: return contains(fp);
119: }
120: }
|