001: /*
002: * WebSphinx web-crawling toolkit
003: *
004: * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
005: * reserved.
006: *
007: * Redistribution and use in source and binary forms, with or without
008: * modification, are permitted provided that the following conditions
009: * are met:
010: *
011: * 1. Redistributions of source code must retain the above copyright
012: * notice, this list of conditions and the following disclaimer.
013: *
014: * 2. Redistributions in binary form must reproduce the above copyright
015: * notice, this list of conditions and the following disclaimer in
016: * the documentation and/or other materials provided with the
017: * distribution.
018: *
019: * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
020: * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021: * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
022: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
023: * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
024: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
025: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
026: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
027: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
028: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
029: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: *
031: */
032:
033: package websphinx;
034:
035: import java.io.File;
036: import java.io.OutputStream;
037: import java.io.IOException;
038: import java.util.Date; //#ifdef JDK1.1
039: import java.io.PrintWriter;
040:
041: //#endif JDK1.1
042: /*#ifdef JDK1.0
043: import java.io.PrintStream;
044: #endif JDK1.0*/
045:
046: /**
047: * Crawling monitor that writes messages to standard output or a file.
048: * Acts as both a CrawlListener (monitoring start and end of the crawl)
049: * and as a LinkListener (monitoring page retrieval).
050: */
051: public class EventLog implements CrawlListener, LinkListener {
052:
053: //#ifdef JDK1.1
054: PrintWriter stream;
055: //#endif JDK1.1
056: /*#ifdef JDK1.0
057: PrintStream stream;
058: #endif JDK1.0*/
059: boolean onlyNetworkEvents = true;
060:
061: /**
062: * Make a EventLog that writes to standard output.
063: */
064: public EventLog() {
065: this (System.out);
066: }
067:
068: /**
069: * Make a EventLog that writes to a stream.
070: */
071: public EventLog(OutputStream out) {
072: /*#ifdef JDK1.0
073: stream = new PrintStream (out, true);
074: #endif JDK1.0*/
075: //#ifdef JDK1.1
076: stream = new PrintWriter(out, true);
077: //#endif JDK1.1
078: }
079:
080: /**
081: * Make a EventLog that writes to a file. The file is overwritten.
082: * @param filename File to which crawling event messages are written
083: */
084: public EventLog(String filename) throws IOException {
085: /*#ifdef JDK1.0
086: stream = new PrintStream (Access.getAccess ().writeFile (new File(filename), false));
087: #endif JDK1.0*/
088: //#ifdef JDK1.1
089: stream = new PrintWriter(Access.getAccess().writeFile(
090: new File(filename), false));
091: //#endif JDK1.1
092: }
093:
094: /**
095: * Set whether logger prints only network-related LinkEvents.
096: * If true, then the logger only prints LinkEvents where
097: * LinkEvent.isNetworkEvent() returns true. If false,
098: * then the logger prints all LinkEvents. Default is true.
099: * @param flag true iff only network LinkEvents should be logged
100: */
101: public void setOnlyNetworkEvents(boolean flag) {
102: onlyNetworkEvents = flag;
103: }
104:
105: /**
106: * Test whether logger prints only network-related LinkEvents.
107: * If true, then the logger only prints LinkEvents where
108: * LinkEvent.isNetworkEvent() returns true. If false,
109: * then the logger prints all LinkEvents. Default is true.
110: * @return true iff only network LinkEvents are logged
111: */
112: public boolean getOnlyNetworkEvents() {
113: return onlyNetworkEvents;
114: }
115:
116: /**
117: * Notify that the crawler started.
118: */
119: public void started(CrawlEvent event) {
120: stream.println(new Date() + ": *** started "
121: + event.getCrawler());
122: }
123:
124: /**
125: * Notify that the crawler has stopped.
126: */
127: public void stopped(CrawlEvent event) {
128: stream.println(new Date() + ": *** finished "
129: + event.getCrawler());
130: }
131:
132: /**
133: * Notify that the crawler's state was cleared.
134: */
135: public void cleared(CrawlEvent event) {
136: stream.println(new Date() + ": *** cleared "
137: + event.getCrawler());
138: }
139:
140: /**
141: * Notify that the crawler timed out.
142: */
143: public void timedOut(CrawlEvent event) {
144: stream.println(new Date() + ": *** timed out "
145: + event.getCrawler());
146: }
147:
148: /**
149: * Notify that the crawler paused.
150: */
151: public void paused(CrawlEvent event) {
152: stream.println(new Date() + ": *** paused "
153: + event.getCrawler());
154: }
155:
156: /**
157: * Notify that a link event occured.
158: */
159: public void crawled(LinkEvent event) {
160: switch (event.getID()) {
161: case LinkEvent.RETRIEVING:
162: case LinkEvent.DOWNLOADED:
163: case LinkEvent.VISITED:
164: case LinkEvent.ERROR:
165: break;
166: default:
167: if (onlyNetworkEvents)
168: return;
169: break;
170: }
171: stream.println(new Date() + ": " + event);
172:
173: Throwable exc = event.getException();
174: if (exc != null && !(exc instanceof IOException))
175: exc.printStackTrace(stream);
176: }
177:
178: /**
179: * Create a EventLog that prints to standard error and attach it to a crawler.
180: * This is a convenience method.
181: * @param crawler Crawler to be monitored
182: */
183: public static EventLog monitor(Crawler crawler) {
184: EventLog logger = new EventLog(System.err);
185: crawler.addCrawlListener(logger);
186: crawler.addLinkListener(logger);
187: return logger;
188: }
189: }
|