01: package net.javacoding.jspider.api.model;
02:
03: import java.net.URL;
04:
05: /**
06: *
07: * $Id: Site.java,v 1.23 2003/04/10 16:19:03 vanrogu Exp $
08: *
09: * @author Günther Van Roey
10: */
11: public interface Site {
12:
13: /**
14: * if the site is newly discovered, we're not going to fetch resources
15: * until we have interpreted the site's robots.txt rules.
16: */
17: public static final int STATE_DISCOVERED = 0;
18:
19: /**
20: * if the robots.txt was handled (interpreted or missing), we can
21: * fetch resources from it.
22: */
23: public static final int STATE_ROBOTSTXT_HANDLED = 1;
24:
25: /**
26: * if robots.txt couldn't be fetched (but seems to be there), no resources
27: * will be fetched from the site!
28: */
29: public static final int STATE_ROBOTSTXT_ERROR = 2;
30:
31: /**
32: * if robots.txt was not found, all resources can be fetched !
33: */
34: public static final int STATE_ROBOTSTXT_UNEXISTING = 3;
35:
36: /**
37: * if robots.txt was skipped, all resources can be fetched !
38: */
39: public static final int STATE_ROBOTSTXT_SKIPPED = 4;
40:
41: public int getState();
42:
43: public String getHost();
44:
45: public int getPort();
46:
47: public boolean isRobotsTXTHandled();
48:
49: public boolean getObeyRobotsTXT();
50:
51: public boolean getFetchRobotsTXT();
52:
53: public URL getURL();
54:
55: public Folder[] getRootFolders();
56:
57: public Folder getRootFolder(String name);
58:
59: public Resource[] getRootResources();
60:
61: public Resource[] getAllResources();
62:
63: public Cookie[] getCookies();
64:
65: public String getCookieString();
66:
67: public boolean getUseCookies();
68:
69: public boolean getUseProxy();
70:
71: public String getUserAgent();
72:
73: public boolean isBaseSite();
74:
75: public boolean mustHandle();
76:
77: }
|