01: /* AcceptDecideRule
02: *
03: * $Id: PrerequisiteAcceptDecideRule.java 4649 2006-09-25 17:16:55Z paul_jack $
04: *
05: * Created on Mar 3, 2005
06: *
07: * Copyright (C) 2005 Internet Archive.
08: *
09: * This file is part of the Heritrix web crawler (crawler.archive.org).
10: *
11: * Heritrix is free software; you can redistribute it and/or modify
12: * it under the terms of the GNU Lesser Public License as published by
13: * the Free Software Foundation; either version 2.1 of the License, or
14: * any later version.
15: *
16: * Heritrix is distributed in the hope that it will be useful,
17: * but WITHOUT ANY WARRANTY; without even the implied warranty of
18: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: * GNU Lesser Public License for more details.
20: *
21: * You should have received a copy of the GNU Lesser Public License
22: * along with Heritrix; if not, write to the Free Software
23: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: */
25: package org.archive.crawler.deciderules;
26:
27: import org.archive.crawler.datamodel.CandidateURI;
28: import org.archive.crawler.extractor.Link;
29:
30: /**
31: * Rule which ACCEPTs all 'prerequisite' URIs (those with a 'P' in
32: * the last hopsPath position). Good in a late position to ensure
33: * other scope settings don't lock out necessary prerequisites.
34: *
35: * @author gojomo
36: */
37: public class PrerequisiteAcceptDecideRule extends AcceptDecideRule {
38:
39: private static final long serialVersionUID = 2762042167111186142L;
40:
41: public PrerequisiteAcceptDecideRule(String name) {
42: super (name);
43: setDescription("PrerequisiteAcceptDecideRule. ACCEPTs "
44: + "all CrawlURIs discovered via a prerequisite "
45: + "'link'.");
46: }
47:
48: public Object decisionFor(Object object) {
49: try {
50: String hopsPath = ((CandidateURI) object).getPathFromSeed();
51: if (hopsPath != null
52: && hopsPath.length() > 0
53: && hopsPath.charAt(hopsPath.length() - 1) == Link.PREREQ_HOP) {
54: return ACCEPT;
55: }
56: } catch (ClassCastException e) {
57: // Do nothing
58: }
59: return PASS;
60: }
61: }
|