01: /* MemFPMergeUriUniqFilter
02: *
03: * $Id: MemFPMergeUriUniqFilter.java 4647 2006-09-22 18:39:39Z paul_jack $
04: *
05: * Created on Dec 14, 2005
06: *
07: * Copyright (C) 2005 Internet Archive.
08: *
09: * This file is part of the Heritrix web crawler (crawler.archive.org).
10: *
11: * Heritrix is free software; you can redistribute it and/or modify
12: * it under the terms of the GNU Lesser Public License as published by
13: * the Free Software Foundation; either version 2.1 of the License, or
14: * any later version.
15: *
16: * Heritrix is distributed in the hope that it will be useful,
17: * but WITHOUT ANY WARRANTY; without even the implied warranty of
18: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: * GNU Lesser Public License for more details.
20: *
21: * You should have received a copy of the GNU Lesser Public License
22: * along with Heritrix; if not, write to the Free Software
23: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: */
25: package org.archive.crawler.util;
26:
27: import it.unimi.dsi.fastutil.longs.LongArrayList;
28: import it.unimi.dsi.fastutil.longs.LongIterator;
29:
30: /**
31: * Crude all-in-memory FP-merging UriUniqFilter.
32: *
33: * @author gojomo
34: */
35: public class MemFPMergeUriUniqFilter extends FPMergeUriUniqFilter {
36: protected LongArrayList allFps = new LongArrayList();
37: protected LongArrayList newFps;
38:
39: /* (non-Javadoc)
40: * @see org.archive.crawler.util.FPMergeUriUniqFilter#beginFpMerge()
41: */
42: protected LongIterator beginFpMerge() {
43: newFps = new LongArrayList(
44: (int) (allFps.size() + (pending() / 2)));
45: return allFps.iterator();
46: }
47:
48: /* (non-Javadoc)
49: * @see org.archive.crawler.util.FPMergeUriUniqFilter#addNewFp(java.lang.Long)
50: */
51: protected void addNewFp(long currFp) {
52: newFps.add(currFp);
53: }
54:
55: /* (non-Javadoc)
56: * @see org.archive.crawler.util.FPMergeUriUniqFilter#finishFpMerge()
57: */
58: protected void finishFpMerge() {
59: allFps = newFps;
60: newFps = null;
61: }
62:
63: /* (non-Javadoc)
64: * @see org.archive.crawler.datamodel.UriUniqFilter#count()
65: */
66: public long count() {
67: return allFps.size();
68: }
69:
70: }
|