001: /*
002: * This file is part of the WfMOpen project.
003: * Copyright (C) 2001-2004 Danet GmbH (www.danet.de), GS-AN.
004: * All rights reserved.
005: *
006: * This program is free software; you can redistribute it and/or modify
007: * it under the terms of the GNU General Public License as published by
008: * the Free Software Foundation; either version 2 of the License, or
009: * (at your option) any later version.
010: *
011: * This program is distributed in the hope that it will be useful,
012: * but WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014: * GNU General Public License for more details.
015: *
016: * You should have received a copy of the GNU General Public License
017: * along with this program; if not, write to the Free Software
018: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
019: *
020: * $Id: BrowserTool.java,v 1.2 2006/09/29 12:32:12 drmlipp Exp $
021: *
022: * $Log: BrowserTool.java,v $
023: * Revision 1.2 2006/09/29 12:32:12 drmlipp
024: * Consistently using WfMOpen as projct name now.
025: *
026: * Revision 1.1.1.1 2004/08/18 15:17:39 drmlipp
027: * Update to 1.2
028: *
029: * Revision 1.5 2004/04/07 20:42:24 lipp
030: * Added authentication.
031: *
032: * Revision 1.4 2004/04/07 19:47:35 lipp
033: * Added proxy support.
034: *
035: * Revision 1.3 2004/04/07 15:46:52 lipp
036: * Using tagsoup now instead of jtidy.
037: *
038: * Revision 1.2 2004/04/06 21:19:07 lipp
039: * Several fixes.
040: *
041: * Revision 1.1 2004/04/06 15:34:57 lipp
042: * Initial version.
043: *
044: */
045: package de.danet.an.workflow.tools.webclient;
046:
047: import java.io.IOException;
048: import java.io.InputStream;
049:
050: import java.util.HashMap;
051: import java.util.Map;
052: import java.util.regex.Pattern;
053:
054: import java.rmi.RemoteException;
055:
056: import org.apache.commons.httpclient.HostConfiguration;
057: import org.apache.commons.httpclient.HttpClient;
058: import org.apache.commons.httpclient.HttpMethod;
059: import org.apache.commons.httpclient.HttpRecoverableException;
060: import org.apache.commons.httpclient.UsernamePasswordCredentials;
061: import org.apache.commons.httpclient.methods.GetMethod;
062: import org.apache.commons.httpclient.methods.PostMethod;
063:
064: import org.ccil.cowan.tagsoup.Parser;
065: import org.xml.sax.InputSource;
066: import org.xml.sax.SAXException;
067:
068: import de.danet.an.workflow.util.SAXEventBufferImpl;
069:
070: import de.danet.an.workflow.api.Activity;
071: import de.danet.an.workflow.api.FormalParameter;
072:
073: import de.danet.an.workflow.spis.aii.ApplicationNotStoppedException;
074: import de.danet.an.workflow.spis.aii.CannotExecuteException;
075: import de.danet.an.workflow.spis.aii.ContextRequester;
076: import de.danet.an.workflow.spis.aii.ResultProvider;
077: import de.danet.an.workflow.spis.aii.ResultProvider.ExceptionResult;
078: import de.danet.an.workflow.spis.aii.ToolAgent;
079: import de.danet.an.workflow.spis.aii.ToolAgentContext;
080:
081: /**
082: * This class provides a tool for accessing HTML pages in the internet.
083: *
084: * @author <a href="mailto:lipp@danet.de">Michael Lipp</a>
085: * @version $Revision: 1.2 $
086: */
087:
088: public class BrowserTool implements ToolAgent, ResultProvider,
089: ContextRequester {
090:
091: private static final org.apache.commons.logging.Log logger = org.apache.commons.logging.LogFactory
092: .getLog(BrowserTool.class);
093:
094: private static String proxyHost = System
095: .getProperty("http.proxyHost");
096: private static int proxyPort = 80;
097: private static String nonProxyHosts = System
098: .getProperty("http.nonProxyHosts");
099: private static Pattern nonProxyHostsPattern = null;
100: static {
101: String port = System.getProperty("http.proxyPort");
102: if (proxyHost != null && port != null) {
103: proxyPort = Integer.parseInt(port);
104: }
105: if (nonProxyHosts != null) {
106: String s = Pattern.compile("\\.").matcher(nonProxyHosts)
107: .replaceAll("\\\\.");
108: s = Pattern.compile("\\*").matcher(s).replaceAll(".*?");
109: s = Pattern.compile("\\|").matcher("(" + s + ")")
110: .replaceAll(")|(");
111: logger.debug("http.nonProxyHosts pattern is: " + s);
112: nonProxyHostsPattern = Pattern.compile(s);
113: }
114: }
115:
116: /** Authenticate? */
117: private boolean authenticate = false;
118: private String authRealm = null;
119: private String authUsername = null;
120: private String authPassword = null;
121:
122: /** The result container. */
123: private ThreadLocal result = new ThreadLocal();
124:
125: /** The cached workflow engine context. */
126: private ThreadLocal ctx = new ThreadLocal();
127:
128: /**
129: * Creates an instance of <code>BrowserTool</code>
130: * with all attributes initialized to default values.
131: */
132: public BrowserTool() {
133: }
134:
135: /**
136: * Determine if request is to be authenticated.
137: * @param value "<code>true</code>" or "<code>false</code>"
138: */
139: public void setAuthenticate(String value) {
140: authenticate = Boolean.valueOf(value).booleanValue();
141: }
142:
143: /**
144: * Set the realm used for authentication. If set, implies that the
145: * request is authticated, and no realm parameter is expected as
146: * argument to invoke.
147: * @param value the value to be used
148: */
149: public void setRealm(String value) {
150: authRealm = value;
151: authenticate = true;
152: }
153:
154: /**
155: * Set the username used for authentication. If set, implies that
156: * the request is authticated, and no username parameter is
157: * expected as argument to invoke.
158: * @param value the value to be used
159: */
160: public void setUsername(String value) {
161: authUsername = value;
162: authenticate = true;
163: }
164:
165: /**
166: * Set the password used for authentication. If set, implies that
167: * the request is authticated, and no password parameter is
168: * expected as argument to invoke.
169: * @param value the value to be used
170: */
171: public void setPassword(String value) {
172: authPassword = value;
173: authenticate = true;
174: }
175:
176: /**
177: * Makes an engine context available to the tool agent.
178: * @param context the engine context
179: */
180: public void setToolAgentContext(ToolAgentContext context) {
181: ctx.set(context);
182: }
183:
184: // Implementation of de.danet.an.workflow.spis.aii.ToolAgent
185:
186: /* Comment copied from interface. */
187: public void invoke(Activity act, FormalParameter[] formPars,
188: Map actPars) throws CannotExecuteException, RemoteException {
189: int paramIdx = 0;
190: String statusPN = formPars[paramIdx++].id();
191: String resultPN = formPars[paramIdx++].id();
192: String realm = authRealm;
193: String username = authUsername;
194: String password = authPassword;
195: if (authenticate) {
196: if (realm == null) {
197: realm = (String) actPars.get(formPars[paramIdx++].id());
198: }
199: if (username == null) {
200: username = (String) actPars.get(formPars[paramIdx++]
201: .id());
202: }
203: if (password == null) {
204: password = (String) actPars.get(formPars[paramIdx++]
205: .id());
206: }
207: }
208: String methodPN = formPars[paramIdx++].id();
209: String urlPN = formPars[paramIdx++].id();
210:
211: HttpClient client = new HttpClient();
212: String url = (String) actPars.get(urlPN);
213: if (url == null || url.length() == 0) {
214: throw new CannotExecuteException(
215: "Second parameter must be valid url.");
216: }
217: String methodParm = (String) actPars.get(methodPN);
218: HttpMethod method = null;
219: if (methodParm.equals("GET")) {
220: method = new GetMethod(url);
221: } else if (methodParm.equals("POST")) {
222: method = new PostMethod(url);
223: } else {
224: throw new CannotExecuteException(
225: "First parameter must be \"GET\" or \"POST\", is \""
226: + methodParm + "\"");
227: }
228: HostConfiguration hc = method.getHostConfiguration();
229: if (hc != null && proxyHost != null) {
230: if (nonProxyHostsPattern == null
231: || !nonProxyHostsPattern.matcher(hc.getHost())
232: .matches()) {
233: if (logger.isDebugEnabled()) {
234: logger.debug("Using proxy to access " + url);
235: }
236: hc.setProxy(proxyHost, proxyPort);
237: } else {
238: if (logger.isDebugEnabled()) {
239: logger.debug("Accessing " + url + " directly");
240: }
241: }
242: }
243: if (authenticate) {
244: client.getState()
245: .setCredentials(
246: realm,
247: hc.getHost(),
248: new UsernamePasswordCredentials(username,
249: password));
250: }
251: int statusCode = -1;
252: try {
253: for (int attempts = 0; attempts < 3; attempts += 1) {
254: try {
255: statusCode = client.executeMethod(method);
256: break;
257: } catch (HttpRecoverableException e) {
258: logger.info("Recoverable exception accessing \""
259: + url + "\": " + e.getMessage());
260: }
261: }
262: } catch (IOException e) {
263: logger.warn("Problem accessing \"" + url
264: + "\" (abandoning): " + e.getMessage());
265: result.set(new ExceptionResult("CannotAccessException"));
266: return;
267: }
268: if (statusCode == -1) {
269: result.set(new ExceptionResult(
270: "RetryCountExceededException"));
271: return;
272: }
273: Map res = new HashMap();
274: result.set(res);
275: res.put(statusPN, new Integer(statusCode));
276:
277: InputStream in = null;
278: try {
279: in = method.getResponseBodyAsStream();
280: } catch (IOException e) {
281: String msg = "Cannot get response body as stream: "
282: + e.getMessage();
283: logger.error(msg, e);
284: throw new CannotExecuteException(msg);
285: }
286: if (in == null) {
287: res.put(resultPN, null);
288: return;
289: }
290:
291: Parser parser = new Parser();
292: SAXEventBufferImpl sb = new SAXEventBufferImpl();
293: parser.setContentHandler(sb);
294: try {
295: parser.parse(new InputSource(in));
296: } catch (IOException e) {
297: throw new CannotExecuteException(
298: "Cannot parse response body: " + e.getMessage());
299: } catch (SAXException e) {
300: throw new CannotExecuteException(
301: "Cannot parse response body: " + e.getMessage());
302: }
303: sb.pack();
304: res.put(resultPN, sb);
305: }
306:
307: /**
308: * Return the result evaluated during {@link ToolAgent#invoke
309: * <code>invoke</code>}. The method will only be called once after
310: * each invoke, i.e. the attribute holding the result be be
311: * cleared in this method.
312: *
313: * @return the result data or <code>null</code> if the invocation
314: * does not return any data.
315: */
316: public Object result() {
317: Object res = result.get();
318: result.set(null);
319: return res;
320: }
321:
322: /* Comment copied from interface. */
323: public void terminate(Activity activity)
324: throws ApplicationNotStoppedException, RemoteException {
325: throw new ApplicationNotStoppedException(
326: "Terminate not implemented for BrowserTool.");
327: }
328:
329: }
|