01: package org.openedit.sitesearch;
02:
03: import java.util.HashSet;
04: import java.util.Iterator;
05: import java.util.Set;
06:
07: import org.apache.commons.logging.Log;
08: import org.apache.commons.logging.LogFactory;
09:
10: import com.openedit.util.PathUtilities;
11:
12: public class Status {
13: private static final Log log = LogFactory.getLog(Status.class);
14: Set fieldVisitedHrefs;
15: Set fieldAllowedSites;
16:
17: String filter = ".jsp .php .html .htm .pdf"; //TODO: Externalize this into search.xml file
18:
19: public Set getVisitedHrefs() {
20: if (fieldVisitedHrefs == null) {
21: fieldVisitedHrefs = new HashSet();
22: }
23: return fieldVisitedHrefs;
24: }
25:
26: public void setVisitedHrefs(Set inVisitedHrefs) {
27: fieldVisitedHrefs = inVisitedHrefs;
28: }
29:
30: public Set getAllowedSites() {
31: if (fieldAllowedSites == null) {
32: fieldAllowedSites = new HashSet();
33: }
34: return fieldAllowedSites;
35: }
36:
37: public void setAllowedSites(Set inAllowedSites) {
38: fieldAllowedSites = inAllowedSites;
39: }
40:
41: public boolean followHref(String inHref) {
42: if (getVisitedHrefs().contains(inHref)) {
43: return false;
44: }
45: //TODO: Use Mime Type to figure this out. It works for OpenEdit sites for now
46: if (inHref.indexOf(".") > -1) {
47: String ext = PathUtilities.extractPageType(inHref);
48: if (ext != null && filter.indexOf(ext.toLowerCase()) == -1) {
49: return false;
50: }
51: }
52: for (Iterator iterator = getAllowedSites().iterator(); iterator
53: .hasNext();) {
54: String root = (String) iterator.next();
55: if (inHref.startsWith(root)) {
56: return true;
57: }
58: }
59: log.info("Skip external " + inHref);
60: return false;
61: }
62:
63: public void addVisitedUrl(String inUrl) {
64: if (inUrl.endsWith("/")) {
65: getVisitedHrefs().add(inUrl + "index.html");
66: getVisitedHrefs().add(inUrl + "index.htm");
67: } else if (inUrl.endsWith("/index.html")
68: || inUrl.endsWith("/index.html")) {
69: getVisitedHrefs().add(
70: PathUtilities.extractDirectoryPath(inUrl));
71: }
72: getVisitedHrefs().add(inUrl);
73: }
74:
75: }
|