01: /*
02: * Project: ExampleServerMultiThreaded
03: *
04: * $Id: ExampleServerMultiThreaded.java,v 1.2 2006/08/26 10:33:11 ltorunski Exp $
05: */
06: package com.torunski.crawler.examples;
07:
08: import java.util.Collection;
09: import java.util.Iterator;
10:
11: import com.torunski.crawler.MultiThreadedCrawler;
12: import com.torunski.crawler.filter.ServerFilter;
13: import com.torunski.crawler.model.MaxIterationsModel;
14:
15: /**
16: * Example for ExampleServerMultiThreaded.
17: *
18: * Description: Starts multiply threads for downloading and parsing HTML pages.
19: * Result: By trying to use the maximum bandwidth the crawling process is faster.
20: *
21: * @author Lars Torunski
22: * @version $Id: ExampleServerMultiThreaded.java,v 1.2 2006/08/26 10:33:11 ltorunski Exp $
23: */
24: public class ExampleServerMultiThreaded {
25:
26: public static void main(String[] args) {
27:
28: if (args.length != 1) {
29: System.out.println("TestServerMultiThread for Crawler");
30: System.out
31: .println("Usage: java com.torunski.crawler.examples.TestServerMultiThread [http server]");
32: return;
33: }
34:
35: MultiThreadedCrawler crawler = new MultiThreadedCrawler(8, 1);
36: crawler.setLinkFilter(new ServerFilter(args[0]));
37: crawler.setModel(new MaxIterationsModel(32));
38: crawler.start(args[0], "/");
39:
40: // show visited links
41: Collection visitedLinks = crawler.getModel().getVisitedURIs();
42: System.out.println("Links visited=" + visitedLinks.size());
43:
44: Iterator list = visitedLinks.iterator();
45: while (list.hasNext()) {
46: System.out.println(list.next());
47: }
48:
49: // show visited links
50: Collection notVisitedLinks = crawler.getModel()
51: .getToVisitURIs();
52:
53: System.out.println("Links NOT visited="
54: + notVisitedLinks.size());
55: Iterator listNot = notVisitedLinks.iterator();
56: while (listNot.hasNext()) {
57: System.out.println(listNot.next());
58: }
59: }
60:
61: }
|