001: /* BenchmarkBlooms
002: *
003: * $Id: BenchmarkBlooms.java 3868 2005-10-05 19:58:56Z gojomo $
004: *
005: * Created on Jun 30, 2005
006: *
007: * Copyright (C) 2005 Internet Archive
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.util;
026:
027: /**
028: * Simple benchmarking of different BloomFilter
029: * implementations.
030: *
031: * Take care when interpreting results; the effect of GC,
032: * dynamic compilation, and any other activity on test
033: * machine may affect relative time tallies in unpredictable
034: * ways.
035: *
036: * @author Gordon Mohr
037: */
038: public class BenchmarkBlooms {
039:
040: public static void main(String[] args) {
041: (new BenchmarkBlooms()).instanceMain(args);
042: }
043:
044: public void instanceMain(String[] args) {
045: int reps = (args.length > 0) ? Integer.parseInt(args[0]) : 3;
046: int n_expected = (args.length > 1) ? Integer.parseInt(args[1])
047: : 10000000;
048: int d_hashes = (args.length > 2) ? Integer.parseInt(args[2])
049: : 22;
050: int adds = (args.length > 3) ? Integer.parseInt(args[3])
051: : 5000000;
052: String prefix = (args.length > 4) ? args[4]
053: : "http://www.archive.org/";
054:
055: System.out.println("reps=" + reps + " n_expected=" + n_expected
056: + " d_hashes=" + d_hashes + " adds=" + adds
057: + " prefix=" + prefix);
058:
059: BloomFilter bloom64;
060: BloomFilter bloom32;
061: BloomFilter bloom32split;
062: BloomFilter bloom32p2;
063: BloomFilter bloom32p2split;
064: for (int r = 0; r < reps; r++) {
065: bloom32 = new BloomFilter32bit(n_expected, d_hashes);
066: testBloom(bloom32, adds, prefix);
067: bloom32 = null;
068: bloom32split = new BloomFilter32bitSplit(n_expected,
069: d_hashes);
070: testBloom(bloom32split, adds, prefix);
071: bloom32split = null;
072: bloom64 = new BloomFilter64bit(n_expected, d_hashes);
073: testBloom(bloom64, adds, prefix);
074: bloom64 = null;
075: bloom32p2 = new BloomFilter32bp2(n_expected, d_hashes);
076: testBloom(bloom32p2, adds, prefix);
077: bloom32p2 = null;
078: bloom32p2split = new BloomFilter32bp2Split(n_expected,
079: d_hashes);
080: testBloom(bloom32p2split, adds, prefix);
081: bloom32p2split = null;
082: }
083: }
084:
085: /**
086: * @param bloom
087: * @param prefix
088: * @param adds
089: * @param d_hashes
090: */
091: private void testBloom(BloomFilter bloom, int adds, String prefix) {
092: System.gc();
093: long startTime = System.currentTimeMillis();
094: long falsePositives = 0;
095: for (int i = 0; i < adds; i++) {
096: if (!bloom.add(prefix + Integer.toString(i))) {
097: falsePositives++;
098: }
099: }
100: long finishTime = System.currentTimeMillis();
101: System.out.println(bloom.getClass().getName() + ": "
102: + (finishTime - startTime) + "ms "
103: + bloom.getSizeBytes() + "bytes " + falsePositives
104: + "false");
105: }
106: }
|