01: /* BloomFilter
02: *
03: * $Id: BloomFilter.java 3655 2005-07-05 19:20:03Z gojomo $
04: *
05: * Created on Jun 30, 2005
06: *
07: * Copyright (C) 2005 Internet Archive; an adaptation of
08: * LGPL work (C) Sebastiano Vigna
09: *
10: * This file is part of the Heritrix web crawler (crawler.archive.org).
11: *
12: * Heritrix is free software; you can redistribute it and/or modify
13: * it under the terms of the GNU Lesser Public License as published by
14: * the Free Software Foundation; either version 2.1 of the License, or
15: * any later version.
16: *
17: * Heritrix is distributed in the hope that it will be useful,
18: * but WITHOUT ANY WARRANTY; without even the implied warranty of
19: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20: * GNU Lesser Public License for more details.
21: *
22: * You should have received a copy of the GNU Lesser Public License
23: * along with Heritrix; if not, write to the Free Software
24: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25: */
26:
27: package org.archive.util;
28:
29: /**
30: * Common interface for different Bloom filter
31: * implementations
32: *
33: * @author Gordon Mohr
34: */
35: public interface BloomFilter {
36: /** The number of character sequences in the filter.
37: *
38: * @return the number of character sequences in the filter (but see {@link #contains(CharSequence)}).
39: */
40: public abstract int size();
41:
42: /** Checks whether the given character sequence is in this filter.
43: *
44: * <P>Note that this method may return true on a character sequence that is has
45: * not been added to the filter. This will happen with probability 2<sub>-<var>d</var></sub>,
46: * where <var>d</var> is the number of hash functions specified at creation time, if
47: * the number of the elements in the filter is less than <var>n</var>, the number
48: * of expected elements specified at creation time.
49: *
50: * @param s a character sequence.
51: * @return true if the sequence is in the filter (or if a sequence with the
52: * same hash sequence is in the filter).
53: */
54: public abstract boolean contains(final CharSequence s);
55:
56: /** Adds a character sequence to the filter.
57: *
58: * @param s a character sequence.
59: * @return true if the character sequence was not in the filter (but see {@link #contains(CharSequence)}).
60: */
61: public abstract boolean add(final CharSequence s);
62:
63: /**
64: * The amount of memory in bytes consumed by the bloom
65: * bitfield.
66: *
67: * @return memory used by bloom bitfield, in bytes
68: */
69: public abstract long getSizeBytes();
70: }
|