001: // Jericho HTML Parser - Java based library for analysing and manipulating HTML
002: // Version 2.5
003: // Copyright (C) 2007 Martin Jericho
004: // http://jerichohtml.sourceforge.net/
005: //
006: // This library is free software; you can redistribute it and/or
007: // modify it under the terms of either one of the following licences:
008: //
009: // 1. The Eclipse Public License (EPL) version 1.0,
010: // included in this distribution in the file licence-epl-1.0.html
011: // or available at http://www.eclipse.org/legal/epl-v10.html
012: //
013: // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
014: // included in this distribution in the file licence-lgpl-2.1.txt
015: // or available at http://www.gnu.org/licenses/lgpl.txt
016: //
017: // This library is distributed on an "AS IS" basis,
018: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
019: // See the individual licence texts for more details.
020:
021: package au.id.jericho.lib.html;
022:
023: import java.util.*;
024:
025: /**
026: * Represents a <em>field</em> in an HTML <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html">form</a>,
027: * a <em>field</em> being defined as the group of all {@linkplain FormControl form controls}
028: * having the same {@linkplain FormControl#getName() name}.
029: * <p>
030: * The {@link #getFormControls()} method can be used to obtain the collection of this field's constituent
031: * {@link FormControl} objects.
032: * <p>
033: * The {@link FormFields} class, which represents a collection of <code>FormField</code> objects, provides the highest level
034: * interface for dealing with form fields and controls. For the most common tasks it can be used directly without
035: * the need to work with its constituent <code>FormField</code> or {@link FormControl} objects.
036: * <p>
037: * The <code>FormField</code> class serves two main purposes:
038: * <ol>
039: * <li style="margin-bottom: 1.5em">
040: * Provide methods for the modification and retrieval of form control <a href="FormControl.html#SubmissionValue">submission values</a>
041: * while ensuring that the states of all the field's constituent form controls remain consistent with each other.
042: * <p>
043: * The methods available for this purpose are:<br />
044: * {@link #getValues() Collection getValues()}<br />
045: * {@link #clearValues() void clearValues()}<br />
046: * {@link #setValues(Collection) void setValues(Collection)}<br />
047: * {@link #setValue(CharSequence) boolean setValue(CharSequence)}<br />
048: * {@link #addValue(CharSequence) boolean addValue(CharSequence)}<br />
049: * <p>
050: * Although the {@link FormControl} class provides methods for directly modifying the submission values
051: * of individual form controls, it is generally recommended to use the interface provided by the {@link FormFields} class
052: * unless there is a specific requirement for the lower level functionality.
053: * The {@link FormFields} class contains convenience methods providing most of the functionality of the above methods,
054: * as well as some higher level functionality such as the ability to set the form
055: * <a href="#SubmissionValue">submission values</a> as a complete <a href="FormFields.html#FieldDataSet">field data set</a>
056: * using the {@link FormFields#setDataSet(Map)} method.
057: * <li><a name="DataStructureProperties"></a>
058: * Provide a means of determining the data structure of the field, allowing a server receiving a
059: * <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#submit-format">submitted</a>
060: * <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-data-set">form data set</a>
061: * to interpret and store the data in an appropriate way.
062: * <p>
063: * The properties available for this purpose are:<br />
064: * {@link #allowsMultipleValues() boolean allowsMultipleValues()}<br />
065: * {@link #getUserValueCount() int getUserValueCount()}<br />
066: * {@link #getPredefinedValues() Collection getPredefinedValues()}<br />
067: * <p>
068: * The {@link FormFields#getColumnLabels()} and {@link FormFields#getColumnValues(Map)} methods utilise these properties
069: * to convert data from a <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-data-set">form data set</a>
070: * (represented as a <a href="#FieldDataSet">field data set</a>) into a simple array format,
071: * suitable for storage in a tabular format such as a database table or <code>.CSV</code> file.
072: * <p>
073: * The properties need only be utilised directly in the event that a
074: * <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-data-set">form data set</a> is to be converted
075: * from its <a href="FormFields.html#FieldDataSet">normal format</a> into some other type of data structure.
076: * </ol>
077: * A form field which allows user values normally consists of a single
078: * <a href="FormControl.html#UserValueControl">user value control</a>,
079: * such as a {@link FormControlType#TEXT TEXT} control.
080: * <p>
081: * When a form field consists of more than one control, these controls are normally all
082: * <a href="FormControl.html#PredefinedValueControl">predefined value controls</a> of the same
083: * {@linkplain FormControlType type}, such as {@link FormControlType#CHECKBOX CHECKBOX} controls.
084: * <p>
085: * Form fields consisting of more than one control do not necessarily return {@linkplain #allowsMultipleValues() multiple values}.
086: * A form field consisting of {@link FormControlType#CHECKBOX CHECKBOX} controls can return multiple values, whereas
087: * a form field consisting of {@link FormControlType#CHECKBOX RADIO} controls returns at most one value.
088: * <p>
089: * The HTML author can disregard convention and mix all types of controls with the same name in the same form,
090: * or include multiple <a href="FormControl.html#UserValueControl">user value controls</a> of the same name.
091: * The evidence that such an unusual combination is present is when {@link #getUserValueCount()}<code>>1</code>.
092: * <p>
093: * <code>FormField</code> instances are created automatically with the creation of a {@link FormFields} collection.
094: * <p>
095: * The case sensitivity of form field names is determined by the
096: * {@link Config#CurrentCompatibilityMode}<code>.</code>{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} property.
097: *
098: * @see FormFields
099: * @see FormControl
100: * @see FormControlType
101: */
102: public final class FormField {
103: private final String name;
104: private int userValueCount = 0;
105: private boolean allowsMultipleValues = false;
106: private LinkedHashSet predefinedValues = null; // String objects, null if none
107: private final LinkedHashSet formControls = new LinkedHashSet();
108: private transient FormControl firstFormControl = null; // this field is simply a cache for the getFirstFormControl() method
109: int columnIndex; // see FormFields.initColumns()
110:
111: /** Constructor called from FormFields class. */
112: FormField(final String name) {
113: this .name = name;
114: }
115:
116: /**
117: * Returns the <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#control-name">control name</a> shared by all of this field's constituent {@linkplain FormControl controls}.
118: * <p>
119: * If {@link Config#CurrentCompatibilityMode}<code>.</code>{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() isFormFieldNameCaseInsensitive()}
120: * is <code>true</code>, the grouping of the controls by name is case insensitive
121: * and this method always returns the name in lower case.
122: * <p>
123: * Since a form field is simply a group of controls with the same name, the terms <i>control name</i> and
124: * <i>field name</i> are for the most part synonymous, with only a possible difference in case differentiating them.
125: *
126: * @return the <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#control-name">control name</a> shared by all of this field's constituent {@linkplain FormControl controls}.
127: * @see FormControl#getName()
128: */
129: public String getName() {
130: return name;
131: }
132:
133: /**
134: * Returns a collection of all the constituent {@linkplain FormControl form controls} in this field.
135: * <p>
136: * An iterator over this collection returns the controls in the order of appearance in the source.
137: *
138: * @return a collection of all the constituent {@linkplain FormControl form controls} in this field.
139: * @see #getFormControl()
140: * @see #getFormControl(String predefinedValue)
141: */
142: public Collection getFormControls() {
143: return formControls;
144: }
145:
146: /**
147: * Returns the constituent {@link FormControl} with the specified {@linkplain FormControl#getPredefinedValue() predefined value}.
148: * <p>
149: * Specifying a predefined value of <code>null</code> returns the first control without a predefined value.
150: *
151: * @param predefinedValue the predefined value of the control to be returned, or <code>null</code> to return the first control without a predefined value.
152: * @return the constituent {@link FormControl} with the specified {@linkplain FormControl#getPredefinedValue() predefined value}, or <code>null</code> if none exists.
153: * @see #getFormControl()
154: * @see #getFormControls()
155: */
156: public FormControl getFormControl(final String predefinedValue) {
157: if (predefinedValue == null) {
158: for (final Iterator i = formControls.iterator(); i
159: .hasNext();) {
160: final FormControl formControl = (FormControl) i.next();
161: if (!formControl.getFormControlType()
162: .hasPredefinedValue())
163: return formControl;
164: if (formControl.getFormControlType().getElementName() != Tag.SELECT
165: && formControl.getPredefinedValue() == null)
166: return formControl;
167: }
168: } else {
169: for (final Iterator i = formControls.iterator(); i
170: .hasNext();) {
171: final FormControl formControl = (FormControl) i.next();
172: if (formControl.getFormControlType().getElementName() == Tag.SELECT) {
173: if (formControl.getPredefinedValues().contains(
174: predefinedValue))
175: return formControl;
176: } else {
177: if (predefinedValue.equals(formControl
178: .getPredefinedValue()))
179: return formControl;
180: }
181: }
182: }
183: return null;
184: }
185:
186: /**
187: * Returns the first {@link FormControl} from this field.
188: * @return the first {@link FormControl} from this field, guaranteed not <code>null</code>.
189: * @see #getFormControl(String predefinedValue)
190: * @see #getFormControls()
191: */
192: public FormControl getFormControl() {
193: return (FormControl) formControls.iterator().next();
194: }
195:
196: /**
197: * Indicates whether the field allows multiple values.
198: * <p>
199: * Returns <code>false</code> in any one of the following circumstances:
200: * <ul>
201: * <li>The field consists of only one control (unless it is a
202: * {@linkplain FormControlType#SELECT_MULTIPLE multiple select} with more than one option)
203: * <li>The field consists entirely of {@linkplain FormControlType#RADIO radio buttons}
204: * <li>The field consists entirely of {@linkplain FormControlType#isSubmit() submit} buttons
205: * </ul>
206: * If none of these three conditions are met, the method returns <code>true</code>.
207: *
208: * @return <code>true</code> if the field allows multiple values, otherwise <code>false</code>.
209: */
210: public boolean allowsMultipleValues() {
211: return allowsMultipleValues;
212: }
213:
214: /**
215: * Returns the number of constituent <a href="FormControl.html#UserValueControl">user value controls</a> in this field.
216: * This should in most cases be either <code>0</code> or <code>1</code>.
217: * <p>
218: * A value of <code>0</code> indicates the field values consist only of
219: * {@linkplain #getPredefinedValues() predefined values}, which is the case when the field consists only of
220: * <a href="FormControl.html#PredefinedValueControl">predefined value controls</a>.
221: * <p>
222: * A value of <code>1</code> indicates the field values consist of at most one value set by the user.
223: * It is still possible in this case to receive multiple values in the unlikely event that the HTML author mixed
224: * controls of different types with the same name, but any other values would consist only of
225: * {@linkplain #getPredefinedValues() predefined values}.
226: * <p>
227: * A value greater than <code>1</code> indicates that the HTML author has included more than one
228: * <a href="FormControl.html#UserValueControl">user value control</a> with the same name.
229: * This would nearly always indicate an unintentional error in the HTML source document,
230: * in which case your application can either log a warning that a poorly designed form has been encountered,
231: * or take special action to try to interpret the multiple user values that might be submitted.
232: *
233: * @return the number of constituent <a href="FormControl.html#UserValueControl">user value controls</a> in this field.
234: */
235: public int getUserValueCount() {
236: return userValueCount;
237: }
238:
239: /**
240: * Returns a collection of the {@linkplain FormControl#getPredefinedValue() predefined values} of all constituent {@linkplain FormControl controls} in this field.
241: * <p>
242: * All objects in the returned collection are of type <code>String</code>, with no <code>null</code> entries.
243: * <p>
244: * An interator over this collection returns the values in the order of appearance in the source document.
245: *
246: * @return a collection of the {@linkplain FormControl#getPredefinedValue() predefined values} of all constituent {@linkplain FormControl controls} in this field, or <code>null</code> if none.
247: * @see FormControl#getPredefinedValues()
248: */
249: public Collection getPredefinedValues() {
250: return predefinedValues != null ? predefinedValues
251: : Collections.EMPTY_SET;
252: }
253:
254: /**
255: * Returns a collection of the <a href="#FieldSubmissionValues">field submission values</a>.
256: * <p>
257: * The term <i><a name="FieldSubmissionValues">field submission values</a></i> is used in this library to refer to the aggregate of all the
258: * <a href="FormControl.html#SubmissionValue">submission values</a> of a field's constituent {@linkplain #getFormControls() form controls}.
259: * <p>
260: * All objects in the returned collection are of type <code>CharSequence</code>, with no <code>null</code> entries.
261: *
262: * @return a collection of the <a href="#FieldSubmissionValue">field submission values</a>, guaranteed not <code>null</code>.
263: */
264: public Collection getValues() {
265: final HashSet values = new HashSet();
266: for (final Iterator i = formControls.iterator(); i.hasNext();)
267: ((FormControl) i.next()).addValuesTo(values);
268: return values;
269: }
270:
271: /**
272: * Clears the <a href="FormControl.html#SubmissionValue">submission values</a> of all the constituent {@linkplain #getFormControls() form controls} in this field.
273: * @see FormControl#clearValues()
274: */
275: public void clearValues() {
276: for (final Iterator i = formControls.iterator(); i.hasNext();)
277: ((FormControl) i.next()).clearValues();
278: }
279:
280: /**
281: * Sets the <a href="#FieldSubmissionValues">field submission values</a> of this field to the specified values.
282: * <p>
283: * This is equivalent to calling {@link #clearValues()} followed by {@link #addValue(CharSequence) addValue(value)} for each
284: * value in the specified collection.
285: * <p>
286: * The specified collection must not contain any <code>null</code> values.
287: *
288: * @param values the new <a href="#FieldSubmissionValues">field submission values</a> of this field.
289: * @see #addValue(CharSequence value)
290: */
291: public void setValues(final Collection values) {
292: clearValues();
293: addValues(values);
294: }
295:
296: /**
297: * Sets the <a href="#FieldSubmissionValues">field submission values</a> of this field to the single specified value.
298: * <p>
299: * This is equivalent to calling {@link #clearValues()} followed by {@link #addValue(CharSequence) addValue(value)}.
300: * <p>
301: * The return value indicates whether any of the constituent form controls "accepted" the value.
302: * A return value of <code>false</code> implies an error condition as the specified value is not compatible with this field.
303: * <p>
304: * Specifying a <code>null</code> value is equivalent to calling {@link #clearValues()} alone, and always returns <code>true</code>.
305: * <p>
306: * See the {@link #addValue(CharSequence value)} method for more information.
307: *
308: * @param value the new <a href="#FieldSubmissionValues">field submission value</a> of this field, or <code>null</code> to {@linkplain #clearValues() clear} the field of all submission values.
309: * @return <code>true</code> if one of the constituent {@linkplain #getFormControls() form controls} accepts the value, otherwise <code>false</code>.
310: * @see FormFields#setValue(String fieldName, CharSequence value)
311: */
312: public boolean setValue(final CharSequence value) {
313: clearValues();
314: return value != null ? addValue(value) : true;
315: }
316:
317: /**
318: * Adds the specified value to the <a href="#FieldSubmissionValues">field submission values</a> of this field.
319: * <p>
320: * This is achieved internally by attempting to {@linkplain FormControl#addValue(CharSequence) add the value} to every constituent
321: * {@linkplain #getFormControls() form control} until one "accepts" it.
322: * <p>
323: * The return value indicates whether any of the constituent form controls accepted the value.
324: * A return value of <code>false</code> implies an error condition as the specified value is not compatible with this field.
325: * <p>
326: * In the unusual case that this field consists of multiple form controls, but not all of them are
327: * <a href="FormControl.html#PredefinedValueControl">predefined value controls</a>, priority is given to the predefined value controls
328: * before attempting to add the value to the <a href="FormControl.html#UserValueControl">user value controls</a>.
329: *
330: * @param value the new <a href="#FieldSubmissionValues">field submission value</a> to add to this field, must not be <code>null</code>.
331: * @return <code>true</code> if one of the constituent {@linkplain #getFormControls() form controls} accepts the value, otherwise <code>false</code>.
332: */
333: public boolean addValue(final CharSequence value) {
334: if (value == null)
335: throw new IllegalArgumentException(
336: "value argument must not be null");
337: if (formControls.size() == 1)
338: return getFirstFormControl().addValue(value);
339: List userValueControls = null;
340: for (final Iterator i = formControls.iterator(); i.hasNext();) {
341: final FormControl formControl = (FormControl) i.next();
342: if (!formControl.getFormControlType().hasPredefinedValue()) {
343: // A user value control has been found, but is not the only control with this name.
344: // This shouldn't normally happen in a well designed form, but we will save the user value control
345: // for later and give all predefined value controls first opportunity to take the value.
346: if (userValueControls == null)
347: userValueControls = new LinkedList();
348: userValueControls.add(formControl);
349: continue;
350: }
351: if (formControl.addValue(value))
352: return true; // return value of true from formControl.addValue(value) means the value was taken by the control
353: }
354: if (userValueControls == null)
355: return false;
356: for (final Iterator i = userValueControls.iterator(); i
357: .hasNext();) {
358: final FormControl formControl = (FormControl) i.next();
359: if (formControl.addValue(value))
360: return true;
361: }
362: return false;
363: }
364:
365: /**
366: * Returns a string representation of this object useful for debugging purposes.
367: * @return a string representation of this object useful for debugging purposes.
368: */
369: public String getDebugInfo() {
370: final StringBuffer sb = new StringBuffer();
371: sb.append("Field: ").append(name).append(", UserValueCount=")
372: .append(userValueCount).append(
373: ", AllowsMultipleValues=").append(
374: allowsMultipleValues);
375: if (predefinedValues != null) {
376: for (final Iterator i = predefinedValues.iterator(); i
377: .hasNext();) {
378: sb.append(Config.NewLine).append("PredefinedValue: ");
379: sb.append(i.next());
380: }
381: }
382: for (final Iterator i = formControls.iterator(); i.hasNext();) {
383: sb.append(Config.NewLine).append("FormControl: ");
384: sb.append(((FormControl) i.next()).getDebugInfo());
385: }
386: sb.append(Config.NewLine).append(Config.NewLine);
387: return sb.toString();
388: }
389:
390: /**
391: * Returns a string representation of this object useful for debugging purposes.
392: * <p>
393: * This is equivalent to {@link #getDebugInfo()}.
394: *
395: * @return a string representation of this object useful for debugging purposes.
396: */
397: public String toString() {
398: return getDebugInfo();
399: }
400:
401: void addValues(final Collection values) {
402: if (values != null)
403: for (final Iterator i = values.iterator(); i.hasNext();)
404: addValue((CharSequence) i.next());
405: }
406:
407: void addValues(final CharSequence[] values) {
408: if (values != null)
409: for (int i = 0; i < values.length; i++)
410: addValue(values[i]);
411: }
412:
413: void addFormControl(final FormControl formControl,
414: final String predefinedValue) {
415: // predefinedValue==null if we are adding a user value
416: if (predefinedValue == null) {
417: userValueCount++;
418: } else {
419: if (predefinedValues == null)
420: predefinedValues = new LinkedHashSet();
421: predefinedValues.add(predefinedValue);
422: }
423: formControls.add(formControl);
424: allowsMultipleValues = calculateAllowsMultipleValues(formControl);
425: }
426:
427: private boolean calculateAllowsMultipleValues(
428: final FormControl newFormControl) {
429: // false if only one control (unless it is a multiple select with more than one option),
430: // or all of the controls are radio buttons, or all of the controls are submit buttons
431: if (allowsMultipleValues || userValueCount > 1)
432: return true;
433: if (userValueCount == 1)
434: return predefinedValues != null;
435: // at this stage we know userValueCount==0 && predefinedValues.size()>=1
436: if (predefinedValues.size() == 1)
437: return false;
438: final FormControlType newFormControlType = newFormControl
439: .getFormControlType();
440: if (formControls.size() == 1)
441: return newFormControlType == FormControlType.SELECT_MULTIPLE;
442: // at this stage we know there are multiple predefined values in multiple controls.
443: // if all of the controls are radio buttons or all are submit buttons, allowsMultipleValues is false, otherwise true.
444: // checking only the first control and the new control is equivalent to checking them all because if they weren't all
445: // the same allowsMultipleValues would already be true.
446: final FormControlType firstFormControlType = getFirstFormControl()
447: .getFormControlType();
448: if (newFormControlType == FormControlType.RADIO
449: && firstFormControlType == FormControlType.RADIO)
450: return false;
451: if (newFormControlType.isSubmit()
452: && firstFormControlType.isSubmit())
453: return false;
454: return true;
455: }
456:
457: FormControl getFirstFormControl() {
458: // formControls must be ordered collection for this method to work.
459: // It has to return the first FormControl entered into the collection
460: // for the algorithm in calculateAllowsMultipleValues() to work.
461: if (firstFormControl == null)
462: firstFormControl = (FormControl) formControls.iterator()
463: .next();
464: return firstFormControl;
465: }
466:
467: /** only called from FormFields class */
468: void merge(final FormField formField) {
469: if (formField.userValueCount > userValueCount)
470: userValueCount = formField.userValueCount;
471: allowsMultipleValues = allowsMultipleValues
472: || formField.allowsMultipleValues;
473: if (predefinedValues == null) {
474: predefinedValues = formField.predefinedValues;
475: } else if (formField.predefinedValues != null) {
476: for (final Iterator i = formField.predefinedValues
477: .iterator(); i.hasNext();)
478: predefinedValues.add(i.next());
479: }
480: for (final Iterator i = formField.getFormControls().iterator(); i
481: .hasNext();)
482: formControls.add(i.next());
483: }
484: }
|