001: /* Copyright (c) 2006-2007, Vladimir Nikic
002: All rights reserved.
003:
004: Redistribution and use of this software in source and binary forms,
005: with or without modification, are permitted provided that the following
006: conditions are met:
007:
008: * Redistributions of source code must retain the above
009: copyright notice, this list of conditions and the
010: following disclaimer.
011:
012: * Redistributions in binary form must reproduce the above
013: copyright notice, this list of conditions and the
014: following disclaimer in the documentation and/or other
015: materials provided with the distribution.
016:
017: * The name of Web-Harvest may not be used to endorse or promote
018: products derived from this software without specific prior
019: written permission.
020:
021: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
022: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
023: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
024: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
025: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
026: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
027: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
028: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
029: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
030: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
031: POSSIBILITY OF SUCH DAMAGE.
032:
033: You can contact Vladimir Nikic by sending e-mail to
034: nikic_vladimir@yahoo.com. Please include the word "Web-Harvest" in the
035: subject line.
036: */
037: package org.webharvest.runtime;
038:
039: import org.apache.log4j.Logger;
040: import org.webharvest.definition.IElementDef;
041: import org.webharvest.definition.ScraperConfiguration;
042: import org.webharvest.runtime.processors.BaseProcessor;
043: import org.webharvest.runtime.processors.CallProcessor;
044: import org.webharvest.runtime.processors.HttpProcessor;
045: import org.webharvest.runtime.processors.ProcessorResolver;
046: import org.webharvest.runtime.scripting.ScriptEngine;
047: import org.webharvest.runtime.variables.IVariable;
048: import org.webharvest.runtime.variables.NodeVariable;
049: import org.webharvest.runtime.web.HttpClientManager;
050: import org.webharvest.utils.CommonUtil;
051: import org.webharvest.utils.Stack;
052:
053: import java.util.Iterator;
054: import java.util.List;
055:
056: /**
057: * Basic runtime class.
058: */
059: public class Scraper {
060:
061: protected static Logger log = Logger.getLogger(Scraper.class);
062:
063: private ScraperConfiguration configuration;
064: private String workingDir;
065: private ScraperContext context;
066:
067: private transient boolean isDebugMode = false;
068:
069: private HttpClientManager httpClientManager;
070:
071: // stack of running functions
072: private transient Stack runningFunctions = new Stack();
073:
074: // stack of running http processors
075: private transient Stack runningHttpProcessors = new Stack();
076:
077: // shows depth of running processors during execution
078: private transient int runningLevel = 1;
079:
080: // default script engine used throughout the configuration execution
081: ScriptEngine scriptEngine = null;
082:
083: /**
084: * Constructor.
085: * @param configuration
086: * @param workingDir
087: */
088: public Scraper(ScraperConfiguration configuration, String workingDir) {
089: this .configuration = configuration;
090: this .workingDir = CommonUtil.adaptFilename(workingDir);
091:
092: this .httpClientManager = new HttpClientManager();
093:
094: this .context = new ScraperContext();
095: this .scriptEngine = new ScriptEngine(this .context);
096: }
097:
098: /**
099: * Adds parameter with specified name and value to the context.
100: * This way some predefined variables can be put in runtime context
101: * before execution starts.
102: * @param name
103: * @param value
104: */
105: public void addVariableToContext(String name, Object value) {
106: this .context.put(name, new NodeVariable(value));
107: }
108:
109: public IVariable execute(List ops) {
110: Iterator it = ops.iterator();
111: while (it.hasNext()) {
112: IElementDef elementDef = (IElementDef) it.next();
113: BaseProcessor processor = ProcessorResolver
114: .createProcessor(elementDef);
115:
116: if (processor != null) {
117: processor.run(this , context);
118: }
119: }
120:
121: return new NodeVariable("");
122: }
123:
124: public void execute() {
125: long startTime = System.currentTimeMillis();
126: execute(configuration.getOperations());
127: log.info("Configuration executed in "
128: + (System.currentTimeMillis() - startTime) + "ms.");
129: }
130:
131: public ScraperContext getContext() {
132: return context;
133: }
134:
135: public ScraperConfiguration getConfiguration() {
136: return configuration;
137: }
138:
139: public String getWorkingDir() {
140: return this .workingDir;
141: }
142:
143: public HttpClientManager getHttpClientManager() {
144: return httpClientManager;
145: }
146:
147: public void addRunningFunction(CallProcessor callProcessor) {
148: runningFunctions.push(callProcessor);
149: }
150:
151: public CallProcessor getRunningFunction() {
152: return (CallProcessor) runningFunctions.peek();
153: }
154:
155: public void removeRunningFunction() {
156: if (runningFunctions.size() > 0) {
157: runningFunctions.pop();
158: }
159: }
160:
161: public HttpProcessor getRunningHttpProcessor() {
162: return (HttpProcessor) runningHttpProcessors.peek();
163: }
164:
165: public void setRunningHttpProcessor(HttpProcessor httpProcessor) {
166: runningHttpProcessors.push(httpProcessor);
167: }
168:
169: public void removeRunningHttpProcessor() {
170: if (runningHttpProcessors.size() > 0) {
171: runningHttpProcessors.pop();
172: }
173: }
174:
175: public void increaseRunningLevel() {
176: this .runningLevel++;
177: }
178:
179: public void decreaseRunningLevel() {
180: this .runningLevel--;
181: }
182:
183: public int getRunningLevel() {
184: return runningLevel;
185: }
186:
187: public boolean isDebugMode() {
188: return isDebugMode;
189: }
190:
191: public void setDebug(boolean debug) {
192: this .isDebugMode = debug;
193: }
194:
195: public ScriptEngine getScriptEngine() {
196: return runningFunctions.size() > 0 ? getRunningFunction()
197: .getScriptEngine() : this.scriptEngine;
198: }
199:
200: }
|