001: /*
002: * WebSphinx web-crawling toolkit
003: *
004: * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
005: * reserved.
006: *
007: * Redistribution and use in source and binary forms, with or without
008: * modification, are permitted provided that the following conditions
009: * are met:
010: *
011: * 1. Redistributions of source code must retain the above copyright
012: * notice, this list of conditions and the following disclaimer.
013: *
014: * 2. Redistributions in binary form must reproduce the above copyright
015: * notice, this list of conditions and the following disclaimer in
016: * the documentation and/or other materials provided with the
017: * distribution.
018: *
019: * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
020: * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021: * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
022: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
023: * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
024: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
025: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
026: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
027: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
028: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
029: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: *
031: */
032:
033: package websphinx.workbench;
034:
035: import websphinx.*;
036: import java.io.File;
037: import java.io.IOException;
038: import java.net.URL;
039: import java.net.MalformedURLException;
040:
041: public class MirrorAction implements Action, CrawlListener {
042: String directory;
043: boolean useBrowser;
044:
045: transient File dir;
046: transient Mirror mirror;
047:
048: public MirrorAction(String directory, boolean useBrowser) {
049: this .directory = directory;
050: this .useBrowser = useBrowser;
051: }
052:
053: public boolean equals(Object object) {
054: if (!(object instanceof MirrorAction))
055: return false;
056: MirrorAction a = (MirrorAction) object;
057: return same(a.directory, directory)
058: && a.useBrowser == useBrowser;
059: }
060:
061: private boolean same(String s1, String s2) {
062: if (s1 == null || s2 == null)
063: return s1 == s2;
064: else
065: return s1.equals(s2);
066: }
067:
068: public String getDirectory() {
069: return directory;
070: }
071:
072: public boolean getUseBrowser() {
073: return useBrowser;
074: }
075:
076: private void showit() {
077: Browser browser = Context.getBrowser();
078: if (browser != null)
079: try {
080: browser.show(Link.FileToURL(dir));
081: } catch (MalformedURLException e) {
082: }
083: }
084:
085: public synchronized void visit(Page page) {
086: try {
087: mirror.writePage(page);
088: } catch (IOException e) {
089: throw new RuntimeException(e.toString());
090: }
091: }
092:
093: public void connected(Crawler crawler) {
094: crawler.addCrawlListener(this );
095: }
096:
097: public void disconnected(Crawler crawler) {
098: crawler.removeCrawlListener(this );
099: }
100:
101: /**
102: * Notify that the crawler started.
103: */
104: public void started(CrawlEvent event) {
105: if (mirror == null) {
106: try {
107: dir = (directory != null) ? new File(directory)
108: : Access.getAccess().makeTemporaryFile(
109: "mirror", "");
110: mirror = new Mirror(dir.toString());
111:
112: Crawler crawler = event.getCrawler();
113: Link[] roots = crawler.getRoots();
114: for (int i = 0; i < roots.length; ++i)
115: mirror.mapDir(roots[i].getURL(), dir.toString());
116: } catch (IOException e) {
117: System.err.println(e); // FIX: use GUI when available
118: }
119: }
120: }
121:
122: /**
123: * Notify that the crawler ran out of links to crawl
124: */
125: public void stopped(CrawlEvent event) {
126: try {
127: if (mirror != null) {
128: mirror.close();
129: mirror = null;
130:
131: if (useBrowser)
132: showit();
133: }
134: } catch (IOException e) {
135: System.err.println(e); // FIX: use GUI when available
136: }
137: }
138:
139: /**
140: * Notify that the crawler's state was cleared.
141: */
142: public void cleared(CrawlEvent event) {
143: try {
144: if (mirror != null) {
145: mirror.close();
146: mirror = null;
147:
148: if (useBrowser)
149: showit();
150: }
151: } catch (IOException e) {
152: System.err.println(e); // FIX: use GUI when available
153: }
154: }
155:
156: /**
157: * Notify that the crawler timed out.
158: */
159: public void timedOut(CrawlEvent event) {
160: try {
161: if (mirror != null) {
162: mirror.close();
163: mirror = null;
164:
165: if (useBrowser)
166: showit();
167: }
168: } catch (IOException e) {
169: System.err.println(e); // FIX: use GUI when available
170: }
171: }
172:
173: /**
174: * Notify that the crawler is paused.
175: */
176: public void paused(CrawlEvent event) {
177: try {
178: if (mirror != null) {
179: mirror.rewrite();
180: if (useBrowser)
181: showit();
182: }
183: } catch (IOException e) {
184: System.err.println(e); // FIX: use GUI when available
185: }
186: }
187:
188: }
|