001: /*
002: * WebSphinx web-crawling toolkit
003: *
004: * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
005: * reserved.
006: *
007: * Redistribution and use in source and binary forms, with or without
008: * modification, are permitted provided that the following conditions
009: * are met:
010: *
011: * 1. Redistributions of source code must retain the above copyright
012: * notice, this list of conditions and the following disclaimer.
013: *
014: * 2. Redistributions in binary form must reproduce the above copyright
015: * notice, this list of conditions and the following disclaimer in
016: * the documentation and/or other materials provided with the
017: * distribution.
018: *
019: * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
020: * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021: * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
022: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
023: * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
024: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
025: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
026: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
027: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
028: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
029: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: *
031: */
032:
033: package websphinx;
034:
035: /**
036: * Link event. A LinkEvent is issued when the crawler
037: * starts or stops retrieving a link, and when it makes
038: * a decision about a link.
039: */
040: public class LinkEvent {
041: Crawler crawler;
042: int id;
043: Link link;
044: Throwable exception;
045:
046: /**
047: * No event occured on this link yet. Never delivered in a LinkEvent,
048: * but may be returned by link.getStatus().
049: */
050: public static final int NONE = 0;
051:
052: /**
053: * Link was rejected by shouldVisit()
054: */
055: public static final int SKIPPED = 1;
056:
057: /**
058: * Link has already been visited during the crawl, so it was skipped.
059: */
060: public static final int ALREADY_VISITED = 2;
061:
062: /**
063: * Link was accepted by walk() but exceeds the maximum depth from the start set.
064: */
065: public static final int TOO_DEEP = 3;
066:
067: /**
068: * Link was accepted by walk() and is waiting to be downloaded
069: */
070: public static final int QUEUED = 4;
071:
072: /**
073: * Link is being retrieved
074: */
075: public static final int RETRIEVING = 5;
076:
077: /**
078: * An error occurred in retrieving the page.
079: * The error can be obtained from getException().
080: */
081: public static final int ERROR = 6;
082:
083: /**
084: * Link has been retrieved
085: */
086: public static final int DOWNLOADED = 7;
087:
088: /**
089: * Link has been thoroughly processed by crawler
090: */
091: public static final int VISITED = 8;
092:
093: /**
094: * Map from id code (RETRIEVING) to name ("retrieving")
095: */
096: public static final String[] eventName = { "none", "skipped",
097: "already visited", "too deep", "queued", "retrieving",
098: "error", "downloaded", "visited" };
099:
100: /**
101: * Make a LinkEvent.
102: * @param crawler Crawler that generated this event
103: * @param id event code, like LinkEvent.RETRIEVING
104: * @param link Link on which this event occurred
105: */
106: public LinkEvent(Crawler crawler, int id, Link link) {
107: this .crawler = crawler;
108: this .id = id;
109: this .link = link;
110: }
111:
112: /**
113: * Make a LinkEvent for an error.
114: * @param crawler Crawler that generated this event
115: * @param id Event code, usually ERROR
116: * @param link Link on which this event occurred
117: * @param exception Throwable
118: */
119: public LinkEvent(Crawler crawler, int id, Link link,
120: Throwable exception) {
121: this .crawler = crawler;
122: this .id = id;
123: this .link = link;
124: this .exception = exception;
125: }
126:
127: /**
128: * Get crawler that generated the event
129: * @return crawler
130: */
131: public Crawler getCrawler() {
132: return crawler;
133: }
134:
135: /**
136: * Get event id
137: * @return id
138: */
139: public int getID() {
140: return id;
141: }
142:
143: /**
144: * Get event name (string equivalent to its ID)
145: * @return id
146: */
147: public String getName() {
148: return eventName[id];
149: }
150:
151: /**
152: * Get link to which this event occurred.
153: * @return link
154: */
155: public Link getLink() {
156: return link;
157: }
158:
159: /**
160: * Get exception related to this event. Valid when ID == ERROR.
161: * @return exception object
162: */
163: public Throwable getException() {
164: return exception;
165: }
166:
167: /**
168: * Convert this event to a String describing it.
169: */
170: public String toString() {
171: String result;
172: if (id == ERROR)
173: result = exception.toString();
174: else
175: result = eventName[id];
176: result += " " + link.toDescription();
177: return result;
178: }
179: }
|