001: /**********************************************************************************
002: *
003: * Copyright (c) 2003, 2004 The Regents of the University of Michigan, Trustees of Indiana University,
004: * Board of Trustees of the Leland Stanford, Jr., University, and The MIT Corporation
005: *
006: * Licensed under the Educational Community License Version 1.0 (the "License");
007: * By obtaining, using and/or copying this Original Work, you agree that you have read,
008: * understand, and will comply with the terms and conditions of the Educational Community License.
009: * You may obtain a copy of the License at:
010: *
011: * http://cvs.sakaiproject.org/licenses/license_1_0.html
012: *
013: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
014: * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
015: * AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
016: * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
017: * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
018: *
019: **********************************************************************************/package edu.indiana.lib.twinpeaks.search;
020:
021: import edu.indiana.lib.twinpeaks.net.*;
022: import edu.indiana.lib.twinpeaks.util.*;
023:
024: import java.io.*;
025: import java.net.*;
026: import java.util.*;
027:
028: import javax.servlet.*;
029: import javax.servlet.http.*;
030: import javax.xml.parsers.*;
031:
032: import org.w3c.dom.*;
033: import org.xml.sax.*;
034:
035: /**
036: * Base class for HTTP search activities
037: */
038: public abstract class HttpTransactionQueryBase extends QueryBase
039: implements HttpTransactionQueryInterface {
040:
041: private static org.apache.commons.logging.Log _log = LogUtils
042: .getLog(HttpTransactionQueryBase.class);
043:
044: /**
045: * Name of the cookie List (stored with session context)
046: */
047: private static final String COOKIELIST = "CookieList";
048: /**
049: * The stored query URL (stored with session context)
050: */
051: private static final String QUERYURL = "QueryUrl";
052: /**
053: * Stored query form (stored with session context)
054: */
055: private static final String QUERYFORM = "QueryForm";
056: /**
057: * General purpose parameter name prefix
058: */
059: private static final String GP_PREFIX = "GP_";
060:
061: private HttpTransaction _transaction;
062: private boolean _followRedirects;
063: private int _redirectBehavior;
064: private String _method;
065: private String _searchString;
066: private String _url;
067: private String _searchResult;
068: private SessionContext _session;
069:
070: /**
071: * Constructor
072: */
073: public HttpTransactionQueryBase() {
074: _transaction = null;
075: _session = null;
076: _method = "POST";
077: _followRedirects = false;
078: _searchString = null;
079: _url = null;
080: _searchResult = null;
081: }
082:
083: /**
084: * One time initialization
085: * @param session SessionContext object
086: */
087: public void initialize(SessionContext session) {
088:
089: _session = session;
090:
091: if ((_session.get(COOKIELIST)) == null) {
092: _session.put(COOKIELIST, CookieUtils.newCookieList());
093: }
094: _transaction = new HttpTransaction();
095: _transaction.initialize((List) _session.get(COOKIELIST));
096: }
097:
098: /**
099: * Set search URL
100: * @param url URL string
101: */
102: public void setUrl(String url) {
103: _url = url;
104: }
105:
106: /**
107: * Set search URL
108: * @param url URL object
109: */
110: public void setUrl(URL url) {
111: _url = url.toString();
112: }
113:
114: /**
115: * Fetch the current search URL
116: * @return The URL (as a String)
117: */
118: public String getUrl() {
119: return _url;
120: }
121:
122: /**
123: * Specify the search text
124: * @param searchString Text to look for
125: */
126: public void setSearchString(String searchString) {
127: _searchString = searchString;
128: }
129:
130: /**
131: * Fetch the current search text
132: * @return The search string
133: */
134: public String getSearchString() {
135: return _searchString;
136: }
137:
138: /**
139: * Set the HTTP query method (post or get)
140: * @param method <code>METHOD_POST</code> or <code>METHOD_GET</code>
141: */
142: public void setQueryMethod(String method) {
143: _method = method;
144: }
145:
146: /**
147: * Fetch the current HTTP query method
148: * @return The method (as text)
149: */
150: public String getQueryMethod() {
151: return _method;
152: }
153:
154: /**
155: * Fetch a named HTTP response parameter
156: * @param name Parameter name
157: * @return Parameter value
158: */
159: public String getResponseHeader(String name) {
160: return _transaction.getResponseHeader(name);
161: }
162:
163: /**
164: * Set the default character set for this transaction
165: * @param cs Character set (UTF-8, ISO-8859-1, etc)
166: */
167: public void setDefaultCharacterSet(String cs) {
168: _transaction.setDefaultCharacterSet(cs);
169: }
170:
171: /**
172: * Fetch the response character set
173: * @return Character set designation (as a String)
174: */
175: public String getResponseCharacterSet() {
176: return _transaction.getResponseCharacterSet();
177: }
178:
179: /*
180: * The following "query" methods are used only for EBSCO - should
181: * they be moved to EbscoQueryBase?
182: */
183:
184: /**
185: * Create the session context name for a specified consumer
186: * @param base The base name for session context object
187: * @param consumer A unique name for the "user"
188: * @return Full session context name
189: */
190: private String scn(String base, String consumer) {
191: StringBuffer name = new StringBuffer(base);
192:
193: if (!StringUtils.isNull(consumer)) {
194: name.append('.');
195: name.append(consumer);
196: }
197: return name.toString();
198: }
199:
200: /**
201: * Create the session context name for a specified consumer/parameter pair
202: * @param base The base name for session context object
203: * @param consumer A unique name for the "user"
204: * @return Full session context name
205: */
206: private String gp_scn(String base, String consumer) {
207: StringBuffer name = new StringBuffer(GP_PREFIX);
208:
209: name.append(scn(base, consumer));
210: return name.toString();
211: }
212:
213: /**
214: * Save the URL for the query page
215: * @param consumer A unique name for the "user" of this object
216: * @param queryUrl Address of the final query page
217: */
218: public void setQueryUrl(String consumer, String queryUrl) {
219: _session.put(scn(QUERYURL, consumer), queryUrl);
220: }
221:
222: /**
223: * Fetch the URL for the query
224: * @param consumer A unique name for the "user" of this object
225: * @return Address of the final query page
226: */
227: public String getQueryUrl(String consumer) {
228: return (String) _session.get(scn(QUERYURL, consumer));
229: }
230:
231: /**
232: * Delete a stored query URL
233: * @param consumer A unique name for the "user" of this object
234: */
235: public void removeQueryUrl(String consumer) {
236: _session.remove(scn(QUERYURL, consumer));
237: }
238:
239: /**
240: * Save the final query form as a DOM document
241: * @param consumer A unique name for the "user" of this object
242: * @param queryForm Query page as a DOM document
243: */
244: public void setQueryDocument(String consumer, Document queryForm) {
245: _session.put(scn(QUERYFORM, consumer), queryForm);
246: }
247:
248: /**
249: * Fetch the final query form as a DOM document
250: * @param consumer A unique name for the "user" of this object
251: * @return Query form (as a DOM document)
252: */
253: public Document getQueryDocument(String consumer) {
254: return (Document) _session.get(scn(QUERYFORM, consumer));
255: }
256:
257: /**
258: * Save a general purpose parameter
259: * @param consumer A unique name for the "user" of this object
260: * @param name Parameter name
261: * @param value Parameter value
262: */
263: public void setSessionParameter(String consumer, String name,
264: String value) {
265: _session.put(gp_scn(name, consumer), value);
266: }
267:
268: /**
269: * Fetch the requested general purpose parameter
270: * @param consumer A unique name for the "user" of this object
271: * @param name Parameter name
272: * @return Parameter value (null if none)
273: */
274: public String getSessionParameter(String consumer, String name) {
275: return (String) _session.get(gp_scn(name, consumer));
276: }
277:
278: /**
279: * Save a general purpose parameter
280: * @param consumer A unique name for the "user" of this object
281: * @param name Parameter name
282: * @param value Parameter value
283: */
284: public void setSessionValue(String consumer, String name,
285: Object value) {
286: _session.put(gp_scn(name, consumer), value);
287: }
288:
289: /**
290: * Delete the requested general purpose parameter
291: * @param consumer A unique name for the "user" of this object
292: * @param name Parameter name
293: */
294: public void removeSessionParameter(String consumer, String name) {
295: _session.remove(gp_scn(name, consumer));
296: }
297:
298: /**
299: * Fetch the requested general purpose parameter
300: * @param consumer A unique name for the "user" of this object
301: * @param name Parameter name
302: * @return Parameter value (null if none)
303: */
304: public Object getSessionValue(String consumer, String name) {
305: return _session.get(gp_scn(name, consumer));
306: }
307:
308: /**
309: * Get the SessionContext object for this user
310: * @return The current SessionContext
311: */
312: public SessionContext getSessionContext() {
313: return _session;
314: }
315:
316: /**
317: * Establish a mechanism for handling redirects
318: * @param behavior Specifies the desired behavior. Use one of:
319: *<ul>
320: *<li> REDIRECT_AUTOMATIC - <code>URLConnection</code> handles
321: all redirects
322: *<li> REDIRECT_MANAGED - The <code>submit()</code> code
323: * handles any redirects
324: *<li> REDIRECT_MANAGED_SINGLESTEP - The caller will handle each redirect
325: *</ul>
326: */
327: public void setRedirectBehavior(int behavior)
328: throws SearchException {
329:
330: switch (behavior) {
331: case REDIRECT_AUTOMATIC:
332: _followRedirects = true;
333: break;
334:
335: case REDIRECT_MANAGED:
336: case REDIRECT_MANAGED_SINGLESTEP:
337: _followRedirects = false;
338: break;
339:
340: default:
341: throw new SearchException("Invalid redirect behavior: "
342: + behavior);
343: }
344: _redirectBehavior = behavior;
345: }
346:
347: /**
348: * Set the "file preservation state" for getBaseUrlSpecification()
349: * @param state true to preserve URL file portion
350: */
351: public void setPreserveBaseUrlFile(boolean state) {
352: _transaction.setPreserveBaseUrlFile(state);
353: }
354:
355: /**
356: * Should URLConnection follow redirects?
357: * @return true if URLConnection should handle redirects
358: */
359: public boolean getFollowRedirects() {
360: return _followRedirects;
361: }
362:
363: /**
364: * Set up a name=value pair
365: * @param name Parameter name
366: * @param value Parameter value
367: */
368: public void setParameter(String name, String value) {
369: _transaction.setParameter(name, value);
370: }
371:
372: /**
373: * Get a named parameter
374: * @param name Parameter name
375: * @return Parameter value
376: */
377: public String getParameter(String name) {
378: return _transaction.getParameter(name);
379: }
380:
381: /**
382: * Get the parameter name associated with the 1st occurance of this value
383: * @param value Parameter value
384: * @return Parameter name
385: */
386: public String getParameterName(String value) {
387: return _transaction.getParameterName(value);
388: }
389:
390: /**
391: * Clear the parameter list
392: */
393: public void clearParameters() {
394: _transaction.clearParameters();
395: }
396:
397: /**
398: * Submit a request (POST or GET) and read the response. Various aspects
399: * of the response can be inspected using the "getXXX()" methods.
400: * @return Submission status code (200 = success)
401: */
402: public int submit() throws SearchException {
403: int status;
404:
405: /*
406: * Send the request
407: */
408: try {
409: _transaction.setFollowRedirects(_followRedirects);
410: _transaction.setTransactionType(_method);
411:
412: status = _transaction.doTransaction(_url);
413:
414: switch (_redirectBehavior) {
415: case REDIRECT_AUTOMATIC:
416: case REDIRECT_MANAGED_SINGLESTEP:
417: return status;
418:
419: default:
420: break;
421: }
422: /*
423: * Were we redirected to another page? If so, try to fetch
424: */
425: while (HttpTransactionUtils.isHttpRedirect(status)) {
426: String location = _transaction
427: .getResponseHeader("Location");
428: String baseUrl = _transaction.getBaseUrlSpecification();
429: URL fullUrl = newFullUrl(baseUrl, location);
430:
431: setUrl(fullUrl);
432:
433: _transaction.setTransactionType("GET");
434: status = _transaction.doTransaction(fullUrl);
435: }
436: /*
437: * Done, return final status
438: */
439: return status;
440:
441: } catch (Exception exception) {
442: _log.error("Exception seen, the current URL is \""
443: + getUrl() + "\"");
444: exception.printStackTrace(System.out);
445: throw new SearchException(exception.toString());
446: }
447: }
448:
449: /**
450: * Get the server response text
451: * @return The response (as a String)
452: */
453: public String getResponseString() {
454: return _transaction.getResponseString();
455: }
456:
457: /**
458: * Get the server response text
459: * @return The response (as a byte array)
460: */
461: public byte[] getResponseBytes() {
462: return _transaction.getResponseBytes();
463: }
464:
465: /**
466: * Parse the server response (override as required)
467: * @return Response Document
468: */
469: public Document getResponseDocument() throws SearchException {
470: try {
471: return DomUtils.parseHtmlBytes(getResponseBytes());
472:
473: } catch (Exception exception) {
474: throw new SearchException(exception.toString());
475: }
476: }
477:
478: /*
479: * Helpers
480: */
481:
482: /**
483: * Locate the HTML BODY element in the page document
484: * @param pageDocument An HTML page (as a DOM)
485: * @return The body Element
486: */
487: public Element getBody(Document pageDocument) {
488: Element root = pageDocument.getDocumentElement();
489:
490: return DomUtils.getElement(root, "BODY");
491: }
492:
493: /**
494: * Construct a new URL from base and relative components
495: * @param baseComponent Base URL - the relative URL is added to this
496: * @param relativeComponent A partial (or full) URL that represents our target
497: * @return A full URL composed of the relative URL combined with "missing"
498: * portions taken from the base
499: */
500: public URL newFullUrl(String baseComponent, String relativeComponent) {
501: try {
502: URL baseUrl = new URL(baseComponent);
503: return new URL(baseUrl, relativeComponent);
504:
505: } catch (MalformedURLException exception) {
506: throw new SearchException(exception.toString());
507: }
508: }
509:
510: /**
511: * Set query parameters based on page-wide INPUTs
512: * @param pageDocument The search engine query page (as a DOM Document)
513: * @param nameList A list of the parameters we're looking for
514: * @deprecated Replaced by {@link #setParametersFromInputNames()}
515: */
516: public void setParametersFromInputs(Document pageDocument,
517: List nameList) {
518: setParametersFromInputNames(pageDocument, nameList);
519: }
520:
521: /**
522: * Set query parameters based on page-wide INPUTs
523: * @param pageDocument The search engine query page (as a DOM Document)
524: * @param nameList A list of the parameters we're looking for
525: */
526: public void setParametersFromInputNames(Document pageDocument,
527: List nameList) {
528: setParametersFromNameList(DomUtils.getElementList(
529: getBody(pageDocument), "INPUT"), nameList);
530: }
531:
532: /**
533: * Set query parameters based on page-wide INPUTs
534: * @param pageDocument The search engine query page (as a DOM Document)
535: * @param nameList A list of the parameters we're looking for
536: */
537: public void setParametersFromInputValues(Document pageDocument,
538: List nameList) {
539: setParametersFromValueList(DomUtils.getElementList(
540: getBody(pageDocument), "INPUT"), nameList);
541: }
542:
543: /**
544: * Produce a target URL for this query by combining the form "action" value
545: * with the base URL of the query page
546: * @param pageDocument The search engine query page (as a DOM Document)
547: * @param formName The name of the FORM to lookup
548: * (eg <code>FORM name="formName"</code>)
549: * @param nameList A list of the parameters we're looking for
550: */
551: public void setParametersFromFormInputs(Document pageDocument,
552: String formName, List nameList) throws SearchException {
553: Element formElement;
554:
555: if ((formElement = getFormElement(pageDocument, formName)) == null) {
556: throw new SearchException("No such form: " + formName);
557: }
558: setParametersFromElementInputs(formElement, nameList);
559: }
560:
561: /**
562: * Set query parameters based on INPUTs within an Element
563: * @param element The base element (often a FORM)
564: * @param nameList A list of the parameters we're looking for
565: */
566: private void setParametersFromElementInputs(Element element,
567: List nameList) {
568: setParametersFromNameList(DomUtils.getElementList(element,
569: "INPUT"), nameList);
570: }
571:
572: /**
573: * Set query parameters based on element names (save name=value pairs)
574: * @param nodeList List of Elements to evaluate
575: * @param nameList A list of the parameters we're looking for
576: */
577: public void setParametersFromNameList(NodeList nodeList,
578: List nameList) {
579: setParametersFromList(nodeList, KEY, "name", "value", nameList);
580: }
581:
582: /**
583: * Set query parameters based on element values (save name=value pairs)
584: * @param nodeList List of Elements to evaluate
585: * @param nameList A list of the parameters we're looking for
586: */
587: public void setParametersFromValueList(NodeList nodeList,
588: List nameList) {
589: setParametersFromList(nodeList, VALUE, "value", "name",
590: nameList);
591: }
592:
593: /**
594: * {@link #setParametersFromInputNames()}: Use one of KEY or VALUE as the saved parameter name
595: */
596: private static final int KEY = 0;
597: private static final int VALUE = 1;
598:
599: /**
600: * Set query parameters based on element attributes
601: * @param nodeList List of Elements to evaluate
602: * @param useAsParameterName Use one of KEY or VALUE as the saved parameter name
603: * @param key Parameter "name"
604: * @param value Parameter "value"
605: * @param nameList A list of the parameters we're looking for
606: */
607: private void setParametersFromList(NodeList nodeList,
608: int useAsParameterName, String key, String value,
609: List nameList) {
610: int nodeSize = nodeList.getLength();
611:
612: for (int i = 0; i < nodeSize; i++) {
613: Element element = (Element) nodeList.item(i);
614: String fetchedValue = element.getAttribute(key);
615:
616: if (nameList.contains(fetchedValue)) {
617:
618: switch (useAsParameterName) {
619: case KEY:
620: setParameter(fetchedValue, element
621: .getAttribute(value));
622: break;
623:
624: case VALUE:
625: setParameter(element.getAttribute(value),
626: fetchedValue);
627: break;
628:
629: default:
630: throw new IllegalArgumentException(
631: "Unknown name selection: "
632: + useAsParameterName);
633: }
634: }
635: }
636: }
637:
638: /**
639: * Produce a target URL for this query by combining an anchor "href" value
640: * with the base URL of the query page
641: * @param anchor Anchor element
642: */
643: public void setUrlFromAnchor(Element anchor) throws SearchException {
644: String href = anchor.getAttribute("href");
645:
646: try {
647: setUrl(newFullUrl(_transaction.getBaseUrlSpecification(),
648: href));
649:
650: } catch (MalformedURLException exception) {
651: throw new SearchException(exception.toString());
652: }
653: }
654:
655: /**
656: * Produce a target URL for this query by combining the form "action" value
657: * with the base URL of the query page
658: * @param pageDocument The search engine query page (as a DOM Document)
659: * @param formName The name of the FORM to lookup
660: * (eg <code>FORM name="formName"</code>)
661: */
662: public void setUrlFromForm(Document pageDocument, String formName)
663: throws SearchException {
664: Element form;
665:
666: if ((form = getFormElement(pageDocument, formName)) == null) {
667: throw new SearchException("No such form: " + formName);
668: }
669:
670: try {
671: setUrl(newFullUrl(_transaction.getBaseUrlSpecification(),
672: form.getAttribute("action")));
673: } catch (MalformedURLException exception) {
674: throw new SearchException(exception.toString());
675: }
676: }
677:
678: /**
679: * Find a named FORM element
680: * @param pageDocument The search engine query page (as a DOM Document)
681: * @param formName The name of the FORM to lookup
682: * (eg <code>FORM name="formName"</code>)
683: */
684: public Element getFormElement(Document pageDocument, String formName) {
685: return DomUtils.selectFirstElementByAttributeValue(
686: getBody(pageDocument), "FORM", "name", formName);
687: }
688: }
|