01: /*
02: * Project: ExampleVakanzenLinkGraph
03: *
04: * $Id: ExampleVakanzenLinkGraph.java,v 1.2 2006/08/26 10:33:12 ltorunski Exp $
05: */
06: package com.torunski.crawler.examples;
07:
08: import java.util.Collection;
09: import java.util.Iterator;
10:
11: import com.torunski.crawler.Crawler;
12: import com.torunski.crawler.events.LinkGraphParserEventListener;
13: import com.torunski.crawler.filter.ServerFilter;
14: import com.torunski.crawler.link.Link;
15: import com.torunski.crawler.link.LinkGraph;
16: import com.torunski.crawler.model.MaxDepthModel;
17:
18: /**
19: * Example for the vakanzen site and a link graph.
20: *
21: * Description: Using the LinkGraphParserEventListener to create a link graph of the pages.
22: * Result: A graph of pages with incoming and outgoing links.
23: *
24: * @author Lars Torunski
25: * @version $Id: ExampleVakanzenLinkGraph.java,v 1.2 2006/08/26 10:33:12 ltorunski Exp $
26: */
27: public class ExampleVakanzenLinkGraph {
28:
29: private static final String SERVER = "http://www.oppenheim-karriere.de";
30:
31: private static final String START = "/Vakanzen_main";
32:
33: public static void main(String[] args) {
34:
35: LinkGraphParserEventListener graph = new LinkGraphParserEventListener();
36:
37: Crawler crawler = new Crawler();
38: crawler.setModel(new MaxDepthModel(2));
39: crawler.setLinkFilter(new ServerFilter(SERVER));
40: crawler.addParserListener(graph);
41:
42: crawler.start(SERVER, START);
43:
44: // the root
45: System.out.println("Origin = " + graph.getOrigin());
46:
47: // statistics
48: Collection visitedLinks = crawler.getModel().getVisitedURIs();
49: System.out.println("Links visited =" + visitedLinks.size());
50: System.out.println("Links unvisited="
51: + crawler.getModel().getToVisitURIs().size());
52: System.out
53: .println("Links in graph =" + graph.getLinks().size());
54:
55: // show link graph of the visited links
56: Iterator list = visitedLinks.iterator();
57: while (list.hasNext()) {
58: Link link = (Link) list.next();
59: System.out.println(link.getURI());
60:
61: LinkGraph linkGraph = graph.getLink(link.getURI());
62: Iterator inLinks = linkGraph.iteratorOfInLinks();
63:
64: while (inLinks.hasNext()) {
65: System.out.println("-> in: " + inLinks.next());
66: }
67:
68: Iterator outLinks = linkGraph.iteratorOfOutLinks();
69: while (outLinks.hasNext()) {
70: System.out.println("-> out: " + outLinks.next());
71: }
72: }
73:
74: }
75:
76: }
|