001: /*
002: * WebSphinx web-crawling toolkit
003: *
004: * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
005: * reserved.
006: *
007: * Redistribution and use in source and binary forms, with or without
008: * modification, are permitted provided that the following conditions
009: * are met:
010: *
011: * 1. Redistributions of source code must retain the above copyright
012: * notice, this list of conditions and the following disclaimer.
013: *
014: * 2. Redistributions in binary form must reproduce the above copyright
015: * notice, this list of conditions and the following disclaimer in
016: * the documentation and/or other materials provided with the
017: * distribution.
018: *
019: * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
020: * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
021: * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
022: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
023: * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
024: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
025: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
026: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
027: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
028: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
029: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
030: *
031: */
032:
033: package websphinx;
034:
035: import java.net.URL;
036: import java.net.MalformedURLException;
037: import java.net.URLEncoder;
038:
039: /**
040: * <FORM> element in an HTML page.
041: */
042: public class Form extends Link {
043:
044: /**
045: * Make a LinkElement from a start tag and end tag and a base URL (for relative references).
046: * The tags must be on the same page.
047: * @param startTag Start tag of element
048: * @param endTag End tag of element
049: * @param base Base URL used for relative references
050: */
051: public Form(Tag startTag, Tag endTag, URL base)
052: throws MalformedURLException {
053: super (startTag, endTag, base);
054: }
055:
056: /**
057: * Construct the URL for this form, from its start tag and a base URL (for relative references).
058: * @param tag Start tag of form.
059: * @param base Base URL used for relative references
060: * @return URL to which the button points
061: */
062: protected URL urlFromHref(Tag tag, URL base)
063: throws MalformedURLException {
064: String href = tag.getHTMLAttribute("action");
065: if (href == null)
066: // without an action attribute, the URL defaults to the base
067: return base;
068: return new URL(base, href);
069: }
070:
071: /**
072: * Get the method used to access this link.
073: * @return GET or POST.
074: */
075: public int getMethod() {
076: return getHTMLAttribute("method", "GET").equalsIgnoreCase(
077: "post") ? POST : GET;
078: }
079:
080: /**
081: * Construct the query that would be submitted if the form's SUBMIT button were pressed.
082: * @return a URL representing the submitted form, or null if the form cannot be represented as a URL.
083: */
084: public URL makeQuery() {
085: return makeQuery(null);
086: }
087:
088: /**
089: * Construct the query that would be submitted if the specified button were pressed.
090: * @param button form button that triggers the submission.
091: * @return a URL representing the submitted form, or null if the form cannot be represented as a URL.
092: */
093: public URL makeQuery(FormButton button) {
094: StringBuffer querybuf = new StringBuffer();
095: makeQuery(getChild(), querybuf);
096:
097: if (button != null) {
098: String type = button.getHTMLAttribute("type", "");
099: String name = button.getHTMLAttribute("name", "");
100: String value = button.getHTMLAttribute("value", "");
101:
102: if (type.equalsIgnoreCase("submit")) {
103: passArgument(querybuf, name, value);
104: } else if (type.equalsIgnoreCase("image")) {
105: // simulate an imagemap click
106: passArgument(querybuf, name + ".x", "0");
107: passArgument(querybuf, name + ".y", "0");
108: }
109: }
110:
111: String href = getURL().toExternalForm() + "?";
112: if (querybuf.length() > 0)
113: href += querybuf.toString().substring(1); // deletes '&' from front of querybuf
114:
115: try {
116: return new URL(href);
117: } catch (MalformedURLException e) {
118: throw new RuntimeException("internal error: " + e);
119: }
120: }
121:
122: // appends "&name=val&name=val..." to query
123: // for all form fields found among elements and their children
124: private void makeQuery(Element elem, StringBuffer query) {
125: for (Element e = elem; e != null; e = e.getSibling()) {
126: String tagName = e.getTagName();
127: if (tagName == Tag.INPUT) {
128: String type = e.getHTMLAttribute("type", "text")
129: .toLowerCase();
130:
131: if ( // always pass these fields
132: type.equals("text")
133: || type.equals("password")
134: || type.equals("hidden")
135:
136: // pass these fields if checked
137: || ((type.equals("checkbox") || type
138: .equals("radio")) && e
139: .hasHTMLAttribute("checked"))) {
140: passArgument(query, e.getHTMLAttribute("name", ""),
141: e.getHTMLAttribute("value", ""));
142: }
143: } else if (tagName == Tag.SELECT) {
144: String name = e.getHTMLAttribute("name", "");
145: for (Element opt = e.getChild(); opt != null; opt = opt
146: .getSibling()) {
147: if (opt.getTagName() == Tag.OPTION
148: && opt.hasHTMLAttribute("selected")) {
149: passArgument(query, name, opt.getHTMLAttribute(
150: "value", ""));
151: }
152: }
153: } else if (tagName == Tag.TEXTAREA) {
154: passArgument(query, e.getHTMLAttribute("name", ""), e
155: .toText());
156: } else {
157: makeQuery(e.getChild(), query);
158: }
159: }
160: }
161:
162: private void passArgument(StringBuffer query, String name,
163: String value) {
164: query.append('&');
165: query.append(URLEncoder.encode(name)); // FIX: should name be encoded?
166: query.append('=');
167: query.append(URLEncoder.encode(value));
168: }
169: }
|