01: /* Copyright (C) 2003 Internet Archive.
02: *
03: * This file is part of the Heritrix web crawler (crawler.archive.org).
04: *
05: * Heritrix is free software; you can redistribute it and/or modify
06: * it under the terms of the GNU Lesser Public License as published by
07: * the Free Software Foundation; either version 2.1 of the License, or
08: * any later version.
09: *
10: * Heritrix is distributed in the hope that it will be useful,
11: * but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13: * GNU Lesser Public License for more details.
14: *
15: * You should have received a copy of the GNU Lesser Public License
16: * along with Heritrix; if not, write to the Free Software
17: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18: *
19: * MemFPUURISet.java
20: * Created on Oct 1, 2003
21: *
22: * $Header$
23: */
24: package org.archive.crawler.util;
25:
26: import java.io.Serializable;
27:
28: import org.archive.util.ArchiveUtils;
29: import org.archive.util.fingerprint.LongFPSet;
30:
31: import st.ata.util.FPGenerator;
32:
33: /**
34: * UriUniqFilter storing 64-bit UURI fingerprints, using an internal LongFPSet
35: * instance.
36: *
37: * The passed LongFPSet internal instance may be disk or memory based. Accesses
38: * to the underlying LongFPSet are synchronized.
39: *
40: * @author gojomo
41: */
42: public class FPUriUniqFilter extends SetBasedUriUniqFilter implements
43: Serializable {
44: // Be robust against trivial implementation changes
45: private static final long serialVersionUID = ArchiveUtils
46: .classnameBasedUID(FPUriUniqFilter.class, 1);
47:
48: // private static Logger logger =
49: // Logger.getLogger(FPUriUniqFilter.class.getName());
50:
51: private LongFPSet fpset;
52: private transient FPGenerator fpgen = FPGenerator.std64;
53:
54: /**
55: * Create FPUriUniqFilter wrapping given long set
56: *
57: * @param fpset
58: */
59: public FPUriUniqFilter(LongFPSet fpset) {
60: this .fpset = fpset;
61: }
62:
63: private long getFp(CharSequence canonical) {
64: return fpgen.fp(canonical);
65: }
66:
67: protected boolean setAdd(CharSequence uri) {
68: return fpset.add(getFp(uri));
69: }
70:
71: protected long setCount() {
72: return fpset.count();
73: }
74:
75: protected boolean setRemove(CharSequence uri) {
76: return fpset.remove(getFp(uri));
77: }
78: }
|