001: /* BeanShellDecideRule
002: *
003: * $Id: BeanShellDecideRule.java 4649 2006-09-25 17:16:55Z paul_jack $
004: *
005: * Created on Aug 7, 2006
006: *
007: * Copyright (C) 2006 Internet Archive.
008: *
009: * This file is part of the Heritrix web crawler (crawler.archive.org).
010: *
011: * Heritrix is free software; you can redistribute it and/or modify
012: * it under the terms of the GNU Lesser Public License as published by
013: * the Free Software Foundation; either version 2.1 of the License, or
014: * any later version.
015: *
016: * Heritrix is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
019: * GNU Lesser Public License for more details.
020: *
021: * You should have received a copy of the GNU Lesser Public License
022: * along with Heritrix; if not, write to the Free Software
023: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
024: */
025: package org.archive.crawler.deciderules;
026:
027: import java.io.File;
028: import java.io.IOException;
029: import java.util.Collections;
030: import java.util.HashMap;
031: import java.util.Map;
032: import java.util.logging.Level;
033: import java.util.logging.Logger;
034:
035: import org.archive.crawler.settings.SimpleType;
036: import org.archive.crawler.settings.Type;
037:
038: import bsh.EvalError;
039: import bsh.Interpreter;
040:
041: /**
042: * Rule which runs a groovy script to make its decision.
043: *
044: * Script source may be provided via a file local to the crawler.
045: *
046: * Variables available to the script include 'object' (the object to be
047: * evaluated, typically a CandidateURI or CrawlURI), 'self'
048: * (this GroovyDecideRule instance), and 'controller' (the crawl's
049: * CrawlController instance).
050: *
051: * TODO: reduce copy & paste with GroovyProcessor
052: *
053: * @author gojomo
054: */
055: public class BeanShellDecideRule extends DecideRule {
056:
057: private static final long serialVersionUID = -8433859929199308527L;
058:
059: private static final Logger logger = Logger
060: .getLogger(BeanShellDecideRule.class.getName());
061:
062: /** setting for script file */
063: public final static String ATTR_SCRIPT_FILE = "script-file";
064:
065: /** whether each thread should have its own script runner (true), or
066: * they should share a single script runner with synchronized access */
067: public final static String ATTR_ISOLATE_THREADS = "isolate-threads";
068:
069: protected ThreadLocal<Interpreter> threadInterpreter = new ThreadLocal<Interpreter>();;
070: protected Interpreter sharedInterpreter;
071: public Map<Object, Object> sharedMap = Collections
072: .synchronizedMap(new HashMap<Object, Object>());
073: protected boolean initialized = false;
074:
075: public BeanShellDecideRule(String name) {
076: super (name);
077: setDescription("BeanShellDecideRule. Runs the BeanShell script "
078: + "source (supplied via a file path) against "
079: + "the current URI. Source should define a script method "
080: + "'decisionFor(object)' which will be passed the object"
081: + "to be evaluated and returns one of self.ACCEPT, "
082: + "self.REJECT, or self.PASS. "
083: + "The script may access this BeanShellDecideRule via"
084: + "the 'self' variable and the CrawlController via the "
085: + "'controller' variable. Runs the groovy script source "
086: + "(supplied via a file path) against the "
087: + "current URI.");
088: Type t = addElementToDefinition(new SimpleType(
089: ATTR_SCRIPT_FILE, "BeanShell script file", ""));
090: t.setOverrideable(false);
091: t = addElementToDefinition(new SimpleType(
092: ATTR_ISOLATE_THREADS,
093: "Whether each ToeThread should get its own independent "
094: + "script context, or they should share synchronized access "
095: + "to one context. Default is true, meaning each threads "
096: + "gets its own isolated context.", true));
097: t.setOverrideable(false);
098: }
099:
100: public synchronized Object decisionFor(Object object) {
101: // depending on previous configuration, interpreter may
102: // be local to this thread or shared
103: Interpreter interpreter = getInterpreter();
104: synchronized (interpreter) {
105: // synchronization is harmless for local thread interpreter,
106: // necessary for shared interpreter
107: try {
108: interpreter.set("object", object);
109: return interpreter.eval("decisionFor(object)");
110: } catch (EvalError e) {
111: // TODO Auto-generated catch block
112: e.printStackTrace();
113: return PASS;
114: }
115: }
116: }
117:
118: /**
119: * Get the proper Interpreter instance -- either shared or local
120: * to this thread.
121: * @return Interpreter to use
122: */
123: protected Interpreter getInterpreter() {
124: if (sharedInterpreter == null
125: && !(Boolean) getUncheckedAttribute(null,
126: ATTR_ISOLATE_THREADS)) {
127: // initialize
128: sharedInterpreter = newInterpreter();
129: }
130: if (sharedInterpreter != null) {
131: return sharedInterpreter;
132: }
133: Interpreter interpreter = threadInterpreter.get();
134: if (interpreter == null) {
135: interpreter = newInterpreter();
136: threadInterpreter.set(interpreter);
137: }
138: return interpreter;
139: }
140:
141: /**
142: * Create a new Interpreter instance, preloaded with any supplied
143: * source file and the variables 'self' (this
144: * BeanShellProcessor) and 'controller' (the CrawlController).
145: *
146: * @return the new Interpreter instance
147: */
148: protected Interpreter newInterpreter() {
149: Interpreter interpreter = new Interpreter();
150: try {
151: interpreter.set("self", this );
152: interpreter.set("controller", getController());
153:
154: String filePath = (String) getUncheckedAttribute(null,
155: ATTR_SCRIPT_FILE);
156: if (filePath.length() > 0) {
157: try {
158: File file = getSettingsHandler()
159: .getPathRelativeToWorkingDirectory(filePath);
160: interpreter.source(file.getPath());
161: } catch (IOException e) {
162: logger.log(Level.SEVERE,
163: "unable to read script file", e);
164: }
165: }
166: } catch (EvalError e) {
167: // TODO Auto-generated catch block
168: e.printStackTrace();
169: }
170:
171: return interpreter;
172: }
173:
174: /**
175: * Setup (or reset) Intepreter variables, as appropraite based on
176: * thread-isolation setting.
177: */
178: public void kickUpdate() {
179: // TODO make it so running state (tallies, etc.) isn't lost on changes
180: // unless unavoidable
181: if ((Boolean) getUncheckedAttribute(null, ATTR_ISOLATE_THREADS)) {
182: sharedInterpreter = null;
183: threadInterpreter = new ThreadLocal<Interpreter>();
184: } else {
185: sharedInterpreter = newInterpreter();
186: threadInterpreter = null;
187: }
188: }
189: }
|