01: /* HasViaDecideRule
02: *
03: * $Id: HasViaDecideRule.java 4649 2006-09-25 17:16:55Z paul_jack $
04: *
05: * Created on Aug 11, 2006
06: *
07: * Copyright (C) 2006 Internet Archive.
08: *
09: * This file is part of the Heritrix web crawler (crawler.archive.org).
10: *
11: * Heritrix is free software; you can redistribute it and/or modify
12: * it under the terms of the GNU Lesser Public License as published by
13: * the Free Software Foundation; either version 2.1 of the License, or
14: * any later version.
15: *
16: * Heritrix is distributed in the hope that it will be useful,
17: * but WITHOUT ANY WARRANTY; without even the implied warranty of
18: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: * GNU Lesser Public License for more details.
20: *
21: * You should have received a copy of the GNU Lesser Public License
22: * along with Heritrix; if not, write to the Free Software
23: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: */
25: package org.archive.crawler.deciderules;
26:
27: import org.archive.crawler.datamodel.CandidateURI;
28:
29: /**
30: * Rule applies the configured decision for any URI which has a 'via'
31: * (essentially, any URI that was a seed or some kinds of mid-crawl adds).
32: *
33: * @author gojomo
34: */
35: public class HasViaDecideRule extends PredicatedDecideRule {
36:
37: private static final long serialVersionUID = 1670292311303097735L;
38:
39: /**
40: * Usual constructor.
41: * @param name Name of this DecideRule.
42: */
43: public HasViaDecideRule(String name) {
44: super (name);
45: setDescription("HasViaDecideRule. Applies configured decision "
46: + "to any URI that has a 'via'.");
47: }
48:
49: /**
50: * Evaluate whether given object is over the threshold number of
51: * hops.
52: *
53: * @param object
54: * @return true if the mx-hops is exceeded
55: */
56: protected boolean evaluate(Object object) {
57: try {
58: CandidateURI curi = (CandidateURI) object;
59: return curi.getVia() != null;
60: } catch (ClassCastException e) {
61: // if not CrawlURI, always disregard
62: return false;
63: }
64: }
65: }
|