01: package net.javacoding.jspider.mod.rule;
02:
03: import net.javacoding.jspider.api.model.Decision;
04: import net.javacoding.jspider.api.model.Site;
05: import net.javacoding.jspider.core.SpiderContext;
06: import net.javacoding.jspider.core.rule.impl.BaseRuleImpl;
07: import net.javacoding.jspider.core.model.DecisionInternal;
08:
09: import java.net.URL;
10:
11: /**
12: * Rule implementation that only accepts a resource URL if it is external to
13: * the site currently being spidered.
14: *
15: * $Id: ExternallyReferencedOnlyRule.java,v 1.2 2003/04/25 21:29:06 vanrogu Exp $
16: *
17: * @author Günther Van Roey
18: */
19: public class ExternallyReferencedOnlyRule extends BaseRuleImpl {
20:
21: public Decision apply(SpiderContext context, Site currentSite,
22: URL url) {
23: if (currentSite == null) {
24: return new DecisionInternal(Decision.RULE_DONTCARE);
25: } else {
26: if (currentSite.getHost().equalsIgnoreCase(url.getHost())
27: && (currentSite.getPort() == url.getPort())) {
28: return new DecisionInternal(Decision.RULE_IGNORE,
29: "url is within same site - not ignored");
30: } else {
31: return new DecisionInternal(Decision.RULE_ACCEPT,
32: "url is accepted because it is referenced from another site");
33: }
34: }
35: }
36:
37: }
|