001: package net.matuschek.spider;
002:
003: /*********************************************
004: Copyright (c) 2001 by Daniel Matuschek
005: *********************************************/
006:
007: import java.net.MalformedURLException;
008: import java.net.URL;
009:
010: import net.matuschek.http.HttpConstants;
011:
012: /**
013: * The RobotTask implements a simple object that represents a task
014: * for the web robot.
015: *
016: * @author Daniel Matuschek
017: * @version $Id: RobotTask.java,v 1.7 2003/02/25 13:34:48 oliver_schmidt Exp $
018: */
019: public class RobotTask implements Comparable {
020:
021: /**
022: * Creates a new RobotTask with the given parameters
023: *
024: * @param url - the URL of the RobotTask
025: * @param maxDepth - maximal search depth starting from this task
026: * @param referer - content of the HTTP Referer header, use "-" if
027: * you don't want to use a Referer
028: */
029: public RobotTask(URL url, int maxDepth, String referer) {
030: setUrl(url);
031: this .maxDepth = maxDepth;
032: this .referer = referer;
033: }
034:
035: /**
036: * Creates a new RobotTask with the given parameters
037: *
038: * @param urlString - the URL (as String) of the RobotTask
039: * @param maxDepth - maximal search depth starting from this task
040: * @param referer - content of the HTTP Referer header, use "-" if
041: * you don't want to use a Referer
042: */
043: public RobotTask(String urlString, int maxDepth, String referer) {
044: this .urlString = urlString;
045: this .maxDepth = maxDepth;
046: this .referer = referer;
047: }
048:
049: public URL getUrl() {
050: try {
051: return new URL(urlString);
052: } catch (MalformedURLException e) {
053: e.printStackTrace();
054: return null;
055: }
056: }
057:
058: public void setUrl(URL url) {
059: urlString = url.toString();
060: hashCode = 0;
061: }
062:
063: public int getMaxDepth() {
064: return maxDepth;
065: }
066:
067: public void setMaxDepth(int maxDepth) {
068: this .maxDepth = maxDepth;
069: }
070:
071: public String getReferer() {
072: return referer;
073: }
074:
075: public void setReferer(String referer) {
076: this .referer = referer;
077: }
078:
079: public int getMethod() {
080: return method;
081: }
082:
083: public void setMethod(int method) {
084: this .method = method;
085: hashCode = 0;
086: }
087:
088: public String getParamString() {
089: return paramString;
090: }
091:
092: public void setParamString(String paramString) {
093: this .paramString = paramString;
094: hashCode = 0;
095: }
096:
097: /**
098: * two RobotTasks are equal, if they represent the
099: * same URL
100: */
101: public boolean equals(Object o) {
102: try {
103: return (compareTo(o) == 0);
104: } catch (ClassCastException e) {
105: return false;
106: }
107: }
108:
109: /**
110: * Implements a natural order for RobotTasks. This is based
111: * on
112: * @param o another RobotTask object to compare to
113: * @return 0 if o is equal to this object, 1 if it is smaller,
114: * -1 otherwise
115: * @exception ClassCastException if o is no RobotTask object
116: */
117: public int compareTo(Object o) throws ClassCastException {
118: RobotTask r = (RobotTask) o;
119:
120: if (r == null) {
121: throw new ClassCastException("object to compare to is null");
122: }
123:
124: int diff = hashCode() - r.hashCode();
125: if (diff == 0) {
126: String me = this .getInternalStringRepresentation();
127: String it = r.getInternalStringRepresentation();
128: diff = me.compareTo(it);
129: }
130:
131: return diff;
132: }
133:
134: /**
135: * Gets a String representation for this RobotTask object. Format
136: * may change without notice. Should be used for debugging and logging
137: * only.
138: * @return a String represantation for this task
139: */
140: public String toString() {
141: return urlString + " " + paramString + " Method " + method;
142: }
143:
144: /**
145: * Gets a hashcode for this object. It is based on the String hash code
146: * implementation used with the internal string representation of this
147: * object
148: */
149: public int hashCode() {
150: if (hashCode != 0) {
151: return hashCode;
152: }
153: hashCode = getInternalStringRepresentation().hashCode();
154: return hashCode;
155: }
156:
157: /**
158: * Gets an internal String representation for comparisons
159: * and hash code generation.
160: *
161: * Currently this contains the url, the parameters and the method.
162: * Because this is only used as an internal key and the URL and
163: * parameters can be very long, I have decided to use the MD5 hash of
164: * the longer representation.
165: *
166: * @return a String that should be unique for every object
167: */
168: public String getInternalStringRepresentation() {
169: return (paramString == null && method == HttpConstants.GET) ? urlString
170: : urlString + paramString + method;
171: }
172:
173: // protected URL url; -> urlString holds all the information to save memory
174: private int maxDepth;
175: private String referer;
176: protected int method = HttpConstants.GET;
177: protected String paramString = null;
178:
179: protected int hashCode = 0; // cached hashCode for quick retrieval and comparison
180: protected String urlString; // URL as String because the class URL uses lots of space
181: protected int retries = 0; // number of retries
182:
183: /*
184: * Increases retries and returns increased value.
185: */
186: public int retry() {
187: return ++retries;
188: }
189:
190: }
|