001: // Jericho HTML Parser - Java based library for analysing and manipulating HTML
002: // Version 2.5
003: // Copyright (C) 2007 Martin Jericho
004: // http://jerichohtml.sourceforge.net/
005: //
006: // This library is free software; you can redistribute it and/or
007: // modify it under the terms of either one of the following licences:
008: //
009: // 1. The Eclipse Public License (EPL) version 1.0,
010: // included in this distribution in the file licence-epl-1.0.html
011: // or available at http://www.eclipse.org/legal/epl-v10.html
012: //
013: // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
014: // included in this distribution in the file licence-lgpl-2.1.txt
015: // or available at http://www.gnu.org/licenses/lgpl.txt
016: //
017: // This library is distributed on an "AS IS" basis,
018: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
019: // See the individual licence texts for more details.
020:
021: package au.id.jericho.lib.html;
022:
023: import java.util.*;
024: import java.io.*;
025:
026: /**
027: * Implements an {@link OutputSegment} whose content is a list of attribute name/value pairs.
028: * <p>
029: * This output segment is designed to replace the original {@link Attributes} segment in the source,
030: * providing a simple means of adding, modifying and removing attributes.
031: * <p>
032: * Each instance of this class contains a <code>java.util.Map</code> of name/value pairs which can either be
033: * specified directly in the constructor or initialised to the same entries as the source {@link Attributes}
034: * specified in the constructor.
035: * This map can be accessed via the {@link #getMap()} method, and its entries modified as required before output.
036: * <p>
037: * Keys in the map must be <code>String</code> objects, and values must implement the <code>CharSequence</code> interface.
038: * <p>
039: * An attribute with no value is represented by a map entry with a <code>null</code> value.
040: * <p>
041: * Attribute values are stored unencoded in the map, and are automatically
042: * {@linkplain CharacterReference#encode(CharSequence) encoded} if necessary during output.
043: * <p>
044: * The use of invalid characters in attribute names results in unspecified behaviour.
045: * <p>
046: * Note that methods in the <code>Attributes</code> class treat attribute names as case insensitive,
047: * whereas the <code>Map</code> treats them as case sensitive.
048: * <h4>Example of Usage:</h4>
049: * <pre>
050: * Source source=new Source(htmlDocument);
051: * Attributes bodyAttributes
052: * =source.findNextStartTag(0,Tag.BODY).getAttributes();
053: * AttributesOutputSegment bodyAttributesOutputSegment
054: * =new AttributesOutputSegment(bodyAttributes,true);
055: * bodyAttributesOutputSegment.getMap().put("bgcolor","green");
056: * OutputDocument outputDocument=new OutputDocument(source);
057: * outputDocument.register(bodyAttributesOutputSegment);
058: * String htmlDocumentWithGreenBackground=outputDocument.toString();
059: * </pre>
060: * <p>
061: * This class has been removed from the public API and the functionality replaced with the
062: * {@link OutputDocument#replace(Attributes, Map)} and {@link OutputDocument#replace(Attributes, boolean convertNamesToLowerCase)} methods.
063: *
064: * @see OutputDocument
065: * @see Attributes
066: */
067: class AttributesOutputSegment implements OutputSegment {
068: private final int begin;
069: private final int end;
070: private final Map map;
071:
072: /**
073: * Constructs a new <code>AttributesOutputSegment</code> with the same span and initial name/value entries as the specified source {@link Attributes}.
074: * <p>
075: * Specifying a value of <code>true</code> as an argument to the <code>convertNamesToLowerCase</code> parameter
076: * causes all attribute names to be converted to lower case in the map.
077: * This simplifies the process of finding/updating specific attributes since map keys are case sensitive.
078: * <p>
079: * Attribute values are automatically {@linkplain CharacterReference#decode(CharSequence) decoded} before
080: * being loaded into the map.
081: * <p>
082: * Calling this constructor with the following code:
083: * <div style="margin-left: 2em"><code>new AttributesOutputSegment(attributes, convertNamesToLowerCase)</code></div>
084: * is logically equivalent to calling:
085: * <div style="margin-left: 2em"><code>new AttributesOutputSegment(attributes, attributes.populateMap(new LinkedHashMap(), convertNamesToLowerCase))</code></div>
086: * <p>
087: * The use of <code>LinkedHashMap</code> to implement the map ensures (probably unnecessarily) that
088: * existing attributes are output in the same order as they appear in the source document, and new
089: * attributes are output in the same order as they are added.
090: *
091: * @param attributes the <code>Attributes</code> defining the span and initial name/value entries of the new <code>AttributesOutputSegment</code>.
092: * @param convertNamesToLowerCase specifies whether all attribute names are converted to lower case in the map.
093: * @see #AttributesOutputSegment(Attributes,Map)
094: */
095: public AttributesOutputSegment(final Attributes attributes,
096: final boolean convertNamesToLowerCase) {
097: this (attributes, attributes.getMap(convertNamesToLowerCase));
098: }
099:
100: /**
101: * Constructs a new <code>AttributesOutputSegment</code> with the same span
102: * as the specified source {@link Attributes}, using the specified <code>Map</code> to
103: * store the entries.
104: * <p>
105: * This constructor might be used if the <code>Map</code> containing the new attribute values
106: * should not be preloaded with the same entries as the source attributes, or a map implementation
107: * other than <code>LinkedHashMap</code> is required.
108: *
109: * @param attributes the <code>Attributes</code> defining the span of the new <code>AttributesOutputSegment</code>.
110: * @param map the <code>Map</code> containing the name/value entries.
111: * @see #AttributesOutputSegment(Attributes, boolean convertNamesToLowerCase)
112: */
113: public AttributesOutputSegment(final Attributes attributes,
114: final Map map) {
115: if (map == null || attributes == null)
116: throw new IllegalArgumentException(
117: "both arguments must be non-null");
118: begin = attributes.getBegin();
119: end = attributes.getEnd();
120: this .map = map;
121: }
122:
123: public int getBegin() {
124: return begin;
125: }
126:
127: public int getEnd() {
128: return end;
129: }
130:
131: /**
132: * Returns the <code>Map</code> containing the name/value entries to be output.
133: * @return the <code>Map</code> containing the name/value entries to be output.
134: */
135: public Map getMap() {
136: return map;
137: }
138:
139: /**
140: * Writes the contents of the {@linkplain #getMap() map} as HTML attribute name/value pairs to the specified <code>Writer</code>.
141: * <p>
142: * Each attribute is preceded by a single space, and all values are
143: * {@linkplain CharacterReference#encode(CharSequence) encoded} and enclosed in double quotes.
144: *
145: * @param writer the destination <code>java.io.Writer</code> for the output.
146: * @throws IOException if an I/O exception occurs.
147: * @see Attributes#generateHTML(Map attributesMap)
148: */
149: public void writeTo(final Writer writer) throws IOException {
150: Attributes.appendHTML(writer, map);
151: }
152:
153: public long getEstimatedMaximumOutputLength() {
154: return (end - begin) * 2;
155: }
156:
157: public String toString() {
158: return Attributes.generateHTML(map);
159: }
160:
161: public String getDebugInfo() {
162: StringWriter stringWriter = new StringWriter();
163: stringWriter.getBuffer().append("(p").append(begin)
164: .append("-p").append(end).append("):");
165: try {
166: writeTo(stringWriter);
167: } catch (IOException ex) {
168: } // IOException never occurs in StringWriter
169: return stringWriter.toString();
170: }
171: }
|