001: /* ArrayLongFPCache
002: *
003: * $Id: ArrayLongFPCache.java 3870 2005-10-06 05:01:49Z gojomo $
004: *
005: * Created on Oct 5, 2005
006: *
007: * Copyright (C) 2005 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.util.fingerprint;
026:
027: /**
028: * Simple long fingerprint cache using a backing array; any long maps to
029: * one of 'smear' slots. Longs inserted should be randomly distributed,
030: *
031: * @author gojomo
032: */
033: public class ArrayLongFPCache implements LongFPSet {
034: public static final int DEFAULT_CAPACITY = 1 << 20; // 1 million, 8MB
035: public static final int DEFAULT_SMEAR = 5;
036:
037: long cache[] = new long[DEFAULT_CAPACITY];
038: int smear = DEFAULT_SMEAR;
039: int count = 0;
040:
041: public void setCapacity(int newCapacity) {
042: long[] oldCache = cache;
043: cache = new long[newCapacity];
044: for (int i = 0; i < oldCache.length; i++) {
045: add(oldCache[i]);
046: }
047: }
048:
049: /* (non-Javadoc)
050: * @see org.archive.util.fingerprint.LongFPSet#add(long)
051: */
052: public boolean add(long l) {
053: if (contains(l)) {
054: return false;
055: }
056: int index = (Math.abs((int) (l % cache.length)) + (count % smear))
057: % cache.length;
058: count++;
059: cache[index] = l;
060: return true;
061: }
062:
063: /* (non-Javadoc)
064: * @see org.archive.util.fingerprint.LongFPSet#contains(long)
065: */
066: public boolean contains(long l) {
067: int index = Math.abs((int) (l % cache.length));
068: for (int i = index; i < index + smear; i++) {
069: if (cache[i % cache.length] == l) {
070: return true;
071: }
072: }
073: return false;
074: }
075:
076: /* (non-Javadoc)
077: * @see org.archive.util.fingerprint.LongFPSet#remove(long)
078: */
079: public boolean remove(long l) {
080: int index = Math.abs((int) (l % cache.length));
081: for (int i = index; i < index + smear; i++) {
082: if (cache[i % cache.length] == l) {
083: cache[i % cache.length] = 0;
084: count = Math.min(count, cache.length);
085: count--;
086: return true;
087: }
088: }
089: return false;
090: }
091:
092: /* (non-Javadoc)
093: * @see org.archive.util.fingerprint.LongFPSet#count()
094: */
095: public long count() {
096: return Math.min(count, cache.length);
097: }
098:
099: /* (non-Javadoc)
100: * @see org.archive.util.fingerprint.LongFPSet#quickContains(long)
101: */
102: public boolean quickContains(long fp) {
103: return contains(fp);
104: }
105:
106: public int cacheLength() {
107: return cache.length;
108: }
109:
110: }
|