01: package net.matuschek.examples;
02:
03: /*********************************************
04: Copyright (c) 2001 by Daniel Matuschek
05: *********************************************/
06:
07: import java.net.URL;
08:
09: import net.matuschek.spider.WebRobot;
10: import net.matuschek.spider.WebRobotCallback;
11:
12: import org.apache.log4j.BasicConfigurator;
13:
14: /**
15: * This example program shows how it is possible
16: * to use the WebRobotCallback to stop after a given number
17: * of documents has been retrieved.
18: *
19: * @author Daniel Matuschek
20: * @version $Revision: 1.3 $
21: */
22: public class StopDownload {
23:
24: class DownloadStopper implements WebRobotCallback {
25:
26: /** maximal number of documents to retrieve */
27: int max = 0;
28:
29: /** current number of retrieved documents */
30: int count = 0;
31:
32: /** WebRobot to control */
33: WebRobot robot = null;
34:
35: public DownloadStopper(int max, WebRobot robot) {
36: this .max = max;
37: this .robot = robot;
38: }
39:
40: /**
41: * Increases the number of retrieved documents and stops
42: * the robot, if the number has reached the maximum
43: */
44: public void webRobotRetrievedDoc(String url, int size) {
45: count++;
46: if (count >= max) {
47: robot.stopRobot();
48: }
49: }
50:
51: // ignore these methods
52: public void webRobotDone() {
53: };
54:
55: public void webRobotSleeping(boolean sleeping) {
56: }
57:
58: public void webRobotUpdateQueueStatus(int length) {
59: }
60: }
61:
62: public StopDownload() {
63: }
64:
65: public void run() throws Exception {
66: WebRobot robby = new WebRobot();
67: robby.setStartURL(new URL("http://www.matuschek.net"));
68: robby.setMaxDepth(1);
69: robby.setSleepTime(0);
70:
71: // download only the first 5 documents
72: DownloadStopper stopit = new DownloadStopper(5, robby);
73: robby.setWebRobotCallback(stopit);
74:
75: robby.run();
76: }
77:
78: public static void main(String[] args) throws Exception {
79: BasicConfigurator.configure();
80: StopDownload stopper = new StopDownload();
81: stopper.run();
82: }
83: }
|