001: /*
002: * WebSphinx web-crawling toolkit
003: *
004: * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
005: * reserved.
006: *
007: * Redistribution and use in source and binary forms, with or without
008: * modification, are permitted provided that the following conditions
009: * are met:
010: *
011: * 1. Redistributions of source code must retain the above copyright
012: * notice, this list of conditions and the following disclaimer.
013: *
014: * 2. Redistributions in binary form must reproduce the above copyright
015: * notice, this list of conditions and the following disclaimer in
016: * the documentation and/or other materials provided with the
017: * distribution.
018: *
019: * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
020: * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021: * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
022: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
023: * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
024: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
025: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
026: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
027: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
028: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
029: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: *
031: */
032:
033: package websphinx;
034:
035: import rcm.util.Timer;
036:
037: /**
038: * Run a crawler periodically.
039: */
040: public class Chronicle extends Timer implements Runnable {
041: Crawler crawler;
042: int interval;
043: boolean running = false;
044: boolean triggered = false;
045:
046: /**
047: * Make a Chronicle.
048: * @param crawler Crawler to run periodically
049: * @param interval Invocation interval, in seconds. Crawler is invoked
050: * every interval seconds. If the crawler is still running
051: * when interval seconds have elapsed, it is aborted.
052: *
053: */
054: public Chronicle(Crawler crawler, int interval) {
055: this .crawler = crawler;
056: this .interval = interval;
057: }
058:
059: /**
060: * Start chronicling. Starts a background thread which
061: * starts the crawler immediately, then re-runs the crawler
062: * every interval seconds from now until stop() is called.
063: */
064: public void start() {
065: if (running)
066: return;
067:
068: running = true;
069: set(interval * 1000, true);
070: Thread thread = new Thread(this , crawler.getName());
071: thread.start();
072: }
073:
074: /**
075: * Stop chronicling. Also stops the crawler, if it's currently running.
076: */
077: public synchronized void stop() {
078: if (!running)
079: return;
080:
081: running = false;
082: crawler.stop();
083: notify();
084: cancel();
085: }
086:
087: /**
088: * Background thread that runs the crawler. Clients shouldn't
089: * call this.
090: */
091: public synchronized void run() {
092: try {
093: while (running) {
094: crawler.run();
095: while (!triggered)
096: wait();
097: triggered = false;
098: }
099: } catch (InterruptedException e) {
100: }
101: }
102:
103: protected synchronized void alarm() {
104: crawler.stop();
105: triggered = true;
106: notify();
107: }
108:
109: //#ifdef JDK1.1
110: // FIX: allow crawler class name (starting up Workbench to configure it)
111: public static void main(String[] args) throws Exception {
112: java.io.ObjectInputStream in = new java.io.ObjectInputStream(
113: new java.io.FileInputStream(args[0]));
114: Crawler loadedCrawler = (Crawler) in.readObject();
115: in.close();
116:
117: EventLog.monitor(loadedCrawler);
118:
119: Chronicle track = new Chronicle(loadedCrawler, Integer
120: .parseInt(args[1]));
121: track.start();
122: }
123: //#endif JDK1.1
124: }
|