001: /*
002: * WebSphinx web-crawling toolkit
003: *
004: * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
005: * reserved.
006: *
007: * Redistribution and use in source and binary forms, with or without
008: * modification, are permitted provided that the following conditions
009: * are met:
010: *
011: * 1. Redistributions of source code must retain the above copyright
012: * notice, this list of conditions and the following disclaimer.
013: *
014: * 2. Redistributions in binary form must reproduce the above copyright
015: * notice, this list of conditions and the following disclaimer in
016: * the documentation and/or other materials provided with the
017: * distribution.
018: *
019: * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
020: * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021: * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
022: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
023: * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
024: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
025: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
026: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
027: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
028: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
029: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: *
031: */
032:
033: package websphinx.workbench;
034:
035: import websphinx.*;
036: import java.io.IOException;
037: import rcm.awt.PopupDialog;
038:
039: public class Script implements Action, LinkPredicate, PagePredicate {
040: String script;
041: boolean asLinkPredicate;
042:
043: transient Crawler crawler;
044: transient ScriptInterpreter interp;
045: transient Object function;
046:
047: public Script(String script, boolean asLinkPredicate) {
048: this .script = script;
049: this .asLinkPredicate = asLinkPredicate;
050: }
051:
052: public String getScript() {
053: return script;
054: }
055:
056: public boolean equals(Object object) {
057: if (!(object instanceof Script))
058: return false;
059: Script s = (Script) object;
060: return s.script.equals(script)
061: && s.asLinkPredicate == asLinkPredicate;
062: }
063:
064: static String[] argsLink = { "crawler", "link" };
065: static String[] argsPage = { "crawler", "page" };
066:
067: public void connected(Crawler crawler) {
068: this .crawler = crawler;
069: interp = Context.getScriptInterpreter();
070: if (interp != null) {
071: try {
072: function = interp.lambda(asLinkPredicate ? argsLink
073: : argsPage, script);
074: } catch (ScriptException e) {
075: PopupDialog.warn(null, "Script Error", e.toString());
076: function = null;
077: }
078: }
079: }
080:
081: public void disconnected(Crawler crawler) {
082: crawler = null;
083: interp = null;
084: function = null;
085: }
086:
087: public boolean shouldVisit(Link link) {
088: try {
089: if (interp == null || function == null)
090: // FIX: use GUI to signal error
091: throw new ScriptException(
092: "Scripting language is not available");
093:
094: Object[] args = new Object[2];
095: args[0] = crawler;
096: args[1] = link;
097: return toBool(interp.apply(function, args));
098: } catch (ScriptException e) {
099: System.err.println(e); // FIX: use GUI when available
100: return false;
101: }
102: }
103:
104: public boolean shouldActOn(Page page) {
105: try {
106: if (interp == null || function == null)
107: throw new ScriptException(
108: "Scripting language is not available");
109:
110: Object[] args = new Object[2];
111: args[0] = crawler;
112: args[1] = page;
113: return toBool(interp.apply(function, args));
114: } catch (ScriptException e) {
115: System.err.println(e); // FIX: use GUI when available
116: return false;
117: }
118: }
119:
120: public void visit(Page page) {
121: try {
122: if (interp == null || function == null)
123: // FIX: use GUI to signal error
124: throw new ScriptException(
125: "Scripting language is not available");
126:
127: Object[] args = new Object[2];
128: args[0] = crawler;
129: args[1] = page;
130: interp.apply(function, args);
131: } catch (ScriptException e) {
132: throw new RuntimeException(e.toString());
133: }
134: }
135:
136: boolean toBool(Object obj) {
137: if (!(obj instanceof Boolean))
138: return false;
139: return ((Boolean) obj).booleanValue();
140: }
141:
142: }
|