01: /* Copyright (C) 2003 Internet Archive.
02: *
03: * This file is part of the Heritrix web crawler (crawler.archive.org).
04: *
05: * Heritrix is free software; you can redistribute it and/or modify
06: * it under the terms of the GNU Lesser Public License as published by
07: * the Free Software Foundation; either version 2.1 of the License, or
08: * any later version.
09: *
10: * Heritrix is distributed in the hope that it will be useful,
11: * but WITHOUT ANY WARRANTY; without even the implied warranty of
12: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13: * GNU Lesser Public License for more details.
14: *
15: * You should have received a copy of the GNU Lesser Public License
16: * along with Heritrix; if not, write to the Free Software
17: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18: *
19: * SeedCachingScope.java
20: * Created on Mar 25, 2005
21: *
22: * $Header$
23: */
24: package org.archive.crawler.scope;
25:
26: import java.util.ArrayList;
27: import java.util.Iterator;
28: import java.util.List;
29:
30: import org.archive.crawler.datamodel.CrawlURI;
31: import org.archive.net.UURI;
32:
33: /**
34: * A CrawlScope that caches its seed list for the
35: * convenience of scope-tests that are based on the
36: * seeds.
37: *
38: * @author gojomo
39: *
40: */
41: public class SeedCachingScope extends ClassicScope {
42:
43: private static final long serialVersionUID = 300230673616424926L;
44:
45: //private static final Logger logger =
46: // Logger.getLogger(SeedCachingScope.class.getName());
47: List<UURI> seeds;
48:
49: public SeedCachingScope(String name) {
50: super (name);
51: }
52:
53: /* (non-Javadoc)
54: * @see org.archive.crawler.framework.CrawlScope#addSeed(org.archive.crawler.datamodel.UURI)
55: */
56: public boolean addSeed(CrawlURI curi) {
57: if (super .addSeed(curi) == false) {
58: // failed
59: return false;
60: }
61: // FIXME: This is not thread-safe.
62: List<UURI> newSeeds = new ArrayList<UURI>(seeds);
63: newSeeds.add(curi.getUURI());
64: seeds = newSeeds;
65: return true;
66: }
67:
68: /* (non-Javadoc)
69: * @see org.archive.crawler.framework.CrawlScope#refreshSeeds()
70: */
71: public synchronized void refreshSeeds() {
72: super .refreshSeeds();
73: seeds = null;
74: fillSeedsCache();
75: }
76:
77: /* (non-Javadoc)
78: * @see org.archive.crawler.framework.CrawlScope#seedsIterator()
79: */
80: public Iterator<UURI> seedsIterator() {
81: fillSeedsCache();
82: return seeds.iterator();
83: }
84:
85: /**
86: * Ensure seeds cache is created/filled
87: */
88: protected synchronized void fillSeedsCache() {
89: if (seeds == null) {
90: seeds = new ArrayList<UURI>();
91: Iterator<UURI> iter = super.seedsIterator();
92: while (iter.hasNext()) {
93: seeds.add(iter.next());
94: }
95: }
96: }
97: }
|