001: package net.javacoding.jspider.core.model;
002:
003: import net.javacoding.jspider.api.model.*;
004: import net.javacoding.jspider.Constants;
005: import net.javacoding.jspider.core.storage.spi.StorageSPI;
006:
007: import java.net.URL;
008:
009: /**
010: * $Id: SiteInternal.java,v 1.13 2003/04/29 17:53:48 vanrogu Exp $
011: */
012: public class SiteInternal implements Site {
013:
014: protected StorageSPI storage;
015:
016: protected int id;
017: protected boolean handle;
018: protected URL url;
019: protected String host;
020: protected int port;
021: protected int hashCode;
022: protected boolean isBaseSite;
023: protected boolean hasRobotsTXT;
024: protected boolean useCookies;
025: protected boolean useProxy;
026: protected int state;
027: protected boolean obeyRobotsTXT;
028: protected boolean fetchRobotsTXT;
029: protected String userAgent;
030:
031: public SiteInternal(StorageSPI storage, int id, boolean handle,
032: URL url, boolean isBaseSite) {
033: this (storage, id, handle, url, Site.STATE_DISCOVERED, true,
034: true, true, Constants.USERAGENT, isBaseSite);
035: }
036:
037: public SiteInternal(int id, StorageSPI storage, URL url) {
038: this (storage, id, true, url, false);
039: }
040:
041: public SiteInternal(StorageSPI storage, int id, boolean handle,
042: URL url, int state, boolean obeyRobotsTXT,
043: boolean useProxy, boolean useCookies, String userAgent,
044: boolean isBaseSite) {
045: this .storage = storage;
046: this .handle = handle;
047: this .id = id;
048: this .url = url;
049: this .host = url.getHost();
050: this .port = url.getPort();
051: this .state = state;
052: this .obeyRobotsTXT = obeyRobotsTXT;
053: this .useProxy = useProxy;
054: this .useCookies = useCookies;
055: this .userAgent = userAgent;
056: this .isBaseSite = isBaseSite;
057: hashCode = (host + port).hashCode();
058: }
059:
060: public int getId() {
061: return id;
062: }
063:
064: public void setId(int id) {
065: this .id = id;
066: }
067:
068: public int getState() {
069: return state;
070: }
071:
072: public String getHost() {
073: return host;
074: }
075:
076: public int getPort() {
077: return port;
078: }
079:
080: public boolean isRobotsTXTHandled() {
081: return (state == Site.STATE_ROBOTSTXT_HANDLED)
082: || (state == Site.STATE_ROBOTSTXT_UNEXISTING)
083: || (state == Site.STATE_ROBOTSTXT_ERROR)
084: || (state == Site.STATE_ROBOTSTXT_SKIPPED);
085: }
086:
087: public URL getURL() {
088: return url;
089: }
090:
091: public Folder[] getRootFolders() {
092: return storage.getFolderDAO().findSiteRootFolders(this );
093: }
094:
095: public Folder getRootFolder(String name) {
096: Folder[] folders = getRootFolders();
097: for (int i = 0; i < folders.length; i++) {
098: Folder folder = folders[i];
099: if (folder.getName().equals(name)) {
100: return folder;
101: }
102: }
103: return null;
104: }
105:
106: public Resource[] getRootResources() {
107: return storage.getResourceDAO().getRootResources(this );
108: }
109:
110: public Resource[] getAllResources() {
111: return storage.getResourceDAO().getBySite(this );
112: }
113:
114: public Cookie[] getCookies() {
115: return storage.getCookieDAO().find(id);
116: }
117:
118: public String getCookieString() {
119: Cookie[] c = getCookies();
120: StringBuffer sb = new StringBuffer();
121: for (int i = 0; i < c.length; i++) {
122: Cookie cookie = c[i];
123: sb.append(cookie.getName());
124: sb.append("=");
125: sb.append(cookie.getValue());
126: sb.append("; ");
127: }
128: return sb.toString();
129: }
130:
131: public boolean getUseCookies() {
132: return useCookies;
133: }
134:
135: public boolean equals(Object object) {
136: if (object instanceof Site) {
137: Site other = (Site) object;
138: return (other.getHost().equalsIgnoreCase(host) && other
139: .getPort() == port);
140: } else {
141: return false;
142: }
143: }
144:
145: public int hashCode() {
146: return hashCode;
147: }
148:
149: public boolean getUseProxy() {
150: return useProxy;
151: }
152:
153: public void registerNoRobotsTXTFound() {
154: state = Site.STATE_ROBOTSTXT_UNEXISTING;
155: }
156:
157: public void registerRobotsTXTError() {
158: state = Site.STATE_ROBOTSTXT_ERROR;
159: }
160:
161: public void registerRobotsTXT() {
162: this .state = Site.STATE_ROBOTSTXT_HANDLED;
163: }
164:
165: public void registerRobotsTXTSkipped() {
166: this .state = Site.STATE_ROBOTSTXT_SKIPPED;
167: }
168:
169: public void setUseCookies(boolean useCookies) {
170: this .useCookies = useCookies;
171: }
172:
173: public void setUseProxy(boolean useProxy) {
174: this .useProxy = useProxy;
175: }
176:
177: public void setObeyRobotsTXT(boolean obey) {
178: this .obeyRobotsTXT = obey;
179: }
180:
181: public boolean getObeyRobotsTXT() {
182: return this .obeyRobotsTXT;
183: }
184:
185: public boolean getFetchRobotsTXT() {
186: return fetchRobotsTXT;
187: }
188:
189: public void setFetchRobotsTXT(boolean fetchRobotsTXT) {
190: this .fetchRobotsTXT = fetchRobotsTXT;
191: }
192:
193: public String translateState() {
194: switch (state) {
195: case Site.STATE_DISCOVERED:
196: return "DISCOVERED";
197: case Site.STATE_ROBOTSTXT_ERROR:
198: return "ROBOTSTXT_ERROR";
199: case Site.STATE_ROBOTSTXT_UNEXISTING:
200: return "ROBOTSTXT_UNEXISTING";
201: case Site.STATE_ROBOTSTXT_HANDLED:
202: return "ROBOTSTXT_HANDLED";
203: case Site.STATE_ROBOTSTXT_SKIPPED:
204: return "ROBOTSTXT_SKIPPED";
205: }
206: return "<ERROR_UNKNOWN_STATE>";
207: }
208:
209: public String toString() {
210: return "[Site: " + url + " - " + translateState()
211: + ((isBaseSite) ? " *" : "") + "]";
212: }
213:
214: public String getUserAgent() {
215: return userAgent;
216: }
217:
218: public void setUserAgent(String userAgent) {
219: this .userAgent = userAgent;
220: }
221:
222: public void setBaseSite(boolean isBaseSite) {
223: this .isBaseSite = isBaseSite;
224: }
225:
226: public boolean isBaseSite() {
227: return isBaseSite;
228: }
229:
230: public boolean mustHandle() {
231: return handle;
232: }
233:
234: /**
235: * needed for easy templating
236: */
237: public boolean getMustHandle() {
238: return handle;
239: }
240:
241: public void setHandle(boolean mustHandle) {
242: this.handle = mustHandle;
243: }
244:
245: }
|