001: /* Copyright (c) 2006-2007, Vladimir Nikic
002: All rights reserved.
003:
004: Redistribution and use of this software in source and binary forms,
005: with or without modification, are permitted provided that the following
006: conditions are met:
007:
008: * Redistributions of source code must retain the above
009: copyright notice, this list of conditions and the
010: following disclaimer.
011:
012: * Redistributions in binary form must reproduce the above
013: copyright notice, this list of conditions and the
014: following disclaimer in the documentation and/or other
015: materials provided with the distribution.
016:
017: * The name of Web-Harvest may not be used to endorse or promote
018: products derived from this software without specific prior
019: written permission.
020:
021: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
022: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
023: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
024: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
025: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
026: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
027: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
028: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
029: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
030: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
031: POSSIBILITY OF SUCH DAMAGE.
032:
033: You can contact Vladimir Nikic by sending e-mail to
034: nikic_vladimir@yahoo.com. Please include the word "Web-Harvest" in the
035: subject line.
036: */
037: package org.webharvest.runtime.processors;
038:
039: import java.io.UnsupportedEncodingException;
040: import java.util.*;
041:
042: import org.apache.commons.httpclient.NameValuePair;
043: import org.webharvest.definition.HttpDef;
044: import org.webharvest.exception.HttpException;
045: import org.webharvest.runtime.Scraper;
046: import org.webharvest.runtime.ScraperContext;
047: import org.webharvest.runtime.scripting.ScriptEngine;
048: import org.webharvest.runtime.templaters.BaseTemplater;
049: import org.webharvest.runtime.variables.*;
050: import org.webharvest.runtime.web.HttpClientManager;
051: import org.webharvest.runtime.web.HttpResponseWrapper;
052:
053: /**
054: * Http processor.
055: */
056: public class HttpProcessor extends BaseProcessor {
057:
058: private HttpDef httpDef;
059:
060: List httpParams = new ArrayList();
061: Map httpHeaderMap = new HashMap();
062:
063: public HttpProcessor(HttpDef httpDef) {
064: super (httpDef);
065: this .httpDef = httpDef;
066: }
067:
068: public IVariable execute(Scraper scraper, ScraperContext context) {
069: scraper.setRunningHttpProcessor(this );
070:
071: ScriptEngine scriptEngine = scraper.getScriptEngine();
072: String url = BaseTemplater.execute(httpDef.getUrl(),
073: scriptEngine);
074: String method = BaseTemplater.execute(httpDef.getMethod(),
075: scriptEngine);
076: String charset = BaseTemplater.execute(httpDef.getCharset(),
077: scriptEngine);
078: String username = BaseTemplater.execute(httpDef.getUsername(),
079: scriptEngine);
080: String password = BaseTemplater.execute(httpDef.getPassword(),
081: scriptEngine);
082:
083: if (charset == null) {
084: charset = scraper.getConfiguration().getCharset();
085: }
086:
087: // executes body of HTTP processor
088: executeBody(httpDef, scraper, context);
089:
090: HttpClientManager manager = scraper.getHttpClientManager();
091:
092: HttpResponseWrapper res = manager.execute(method, url, charset,
093: username, password, httpParams, httpHeaderMap);
094:
095: scraper.removeRunningHttpProcessor();
096:
097: String mimeType = res.getMimeType();
098:
099: log.info("Downloaded: " + url + ", mime type = " + mimeType
100: + ", length = " + res.getBody().length + "B.");
101:
102: IVariable result;
103:
104: if (mimeType == null
105: || mimeType.toLowerCase().indexOf("text") == 0) {
106: String text;
107: try {
108: text = new String(res.getBody(), charset);
109: } catch (UnsupportedEncodingException e) {
110: throw new HttpException("Charset " + charset
111: + " is not supported!", e);
112: }
113:
114: result = new NodeVariable(text);
115: } else {
116: result = new NodeVariable(res.getBody());
117: }
118:
119: return result;
120: }
121:
122: protected void addHttpParam(String name, String value) {
123: httpParams.add(new NameValuePair(name, value));
124: }
125:
126: protected void addHttpHeader(String name, String value) {
127: httpHeaderMap.put(name, value);
128: }
129:
130: }
|