001: /*
002: * WebSphinx web-crawling toolkit
003: *
004: * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
005: * reserved.
006: *
007: * Redistribution and use in source and binary forms, with or without
008: * modification, are permitted provided that the following conditions
009: * are met:
010: *
011: * 1. Redistributions of source code must retain the above copyright
012: * notice, this list of conditions and the following disclaimer.
013: *
014: * 2. Redistributions in binary form must reproduce the above copyright
015: * notice, this list of conditions and the following disclaimer in
016: * the documentation and/or other materials provided with the
017: * distribution.
018: *
019: * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
020: * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021: * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
022: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
023: * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
024: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
025: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
026: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
027: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
028: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
029: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: *
031: */
032:
033: package websphinx.workbench;
034:
035: import websphinx.*;
036: import java.io.File;
037: import java.io.IOException;
038:
039: public class ConcatAction implements Action, CrawlListener {
040: String filename;
041: boolean useBrowser;
042: String prolog, header, footer, divider, epilog;
043:
044: transient File file;
045: transient Concatenator concat;
046:
047: public ConcatAction(String filename, boolean useBrowser) {
048: this .filename = filename;
049: this .useBrowser = useBrowser;
050: }
051:
052: public ConcatAction(String filename, boolean useBrowser,
053: String prolog, String header, String footer,
054: String divider, String epilog) {
055: this (filename, useBrowser);
056: this .prolog = prolog;
057: this .header = header;
058: this .footer = footer;
059: this .divider = divider;
060: this .epilog = epilog;
061: }
062:
063: public boolean equals(Object object) {
064: if (!(object instanceof ConcatAction))
065: return false;
066: ConcatAction a = (ConcatAction) object;
067: return same(a.filename, filename) && a.useBrowser == useBrowser;
068: }
069:
070: private boolean same(String s1, String s2) {
071: if (s1 == null || s2 == null)
072: return s1 == s2;
073: else
074: return s1.equals(s2);
075: }
076:
077: public String getFilename() {
078: return filename;
079: }
080:
081: public boolean getUseBrowser() {
082: return useBrowser;
083: }
084:
085: private transient boolean oldSync;
086:
087: public void connected(Crawler crawler) {
088: oldSync = crawler.getSynchronous();
089: crawler.setSynchronous(true);
090: crawler.addCrawlListener(this );
091: }
092:
093: public void disconnected(Crawler crawler) {
094: crawler.setSynchronous(oldSync);
095: crawler.removeCrawlListener(this );
096: }
097:
098: private void showit() {
099: Browser browser = Context.getBrowser();
100: if (browser != null)
101: browser.show(file);
102: }
103:
104: public synchronized void visit(Page page) {
105: try {
106: concat.writePage(page);
107: } catch (IOException e) {
108: throw new RuntimeException(e.toString());
109: }
110: }
111:
112: /**
113: * Notify that the crawler started.
114: */
115: public void started(CrawlEvent event) {
116: if (concat == null) {
117: try {
118: file = (filename != null) ? new File(filename) : Access
119: .getAccess().makeTemporaryFile("concat",
120: ".html");
121: concat = new Concatenator(file.toString());
122:
123: if (prolog != null)
124: concat.setProlog(prolog);
125: if (header != null)
126: concat.setPageHeader(header);
127: if (footer != null)
128: concat.setPageFooter(footer);
129: if (divider != null)
130: concat.setDivider(divider);
131: if (epilog != null)
132: concat.setEpilog(epilog);
133: } catch (IOException e) {
134: System.err.println(e); // FIX: use GUI when available
135: }
136: }
137: }
138:
139: /**
140: * Notify that the crawler ran out of links to crawl
141: */
142: public void stopped(CrawlEvent event) {
143: if (concat != null) {
144: try {
145: concat.close();
146: concat = null;
147: if (useBrowser)
148: showit();
149: } catch (IOException e) {
150: System.err.println(e); // FIX: use GUI when available
151: }
152: }
153: }
154:
155: /**
156: * Notify that the crawler's state was cleared.
157: */
158: public void cleared(CrawlEvent event) {
159: try {
160: if (concat != null) {
161: concat.close();
162: concat = null;
163: if (useBrowser)
164: showit();
165: }
166: } catch (IOException e) {
167: System.err.println(e); // FIX: use GUI when available
168: }
169: }
170:
171: /**
172: * Notify that the crawler timed out.
173: */
174: public void timedOut(CrawlEvent event) {
175: try {
176: if (concat != null) {
177: concat.close();
178: concat = null;
179: if (useBrowser)
180: showit();
181: }
182: } catch (IOException e) {
183: System.err.println(e); // FIX: use GUI when available
184: }
185: }
186:
187: /**
188: * Notify that the crawler is paused.
189: */
190: public void paused(CrawlEvent event) {
191: try {
192: if (concat != null) {
193: concat.rewrite();
194: if (useBrowser)
195: showit();
196: }
197: } catch (IOException e) {
198: System.err.println(e); // FIX: use GUI when available
199: }
200: }
201:
202: }
|