001: // Jericho HTML Parser - Java based library for analysing and manipulating HTML
002: // Version 2.5
003: // Copyright (C) 2007 Martin Jericho
004: // http://jerichohtml.sourceforge.net/
005: //
006: // This library is free software; you can redistribute it and/or
007: // modify it under the terms of either one of the following licences:
008: //
009: // 1. The Eclipse Public License (EPL) version 1.0,
010: // included in this distribution in the file licence-epl-1.0.html
011: // or available at http://www.eclipse.org/legal/epl-v10.html
012: //
013: // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
014: // included in this distribution in the file licence-lgpl-2.1.txt
015: // or available at http://www.gnu.org/licenses/lgpl.txt
016: //
017: // This library is distributed on an "AS IS" basis,
018: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
019: // See the individual licence texts for more details.
020:
021: package au.id.jericho.lib.html;
022:
023: import java.util.*;
024:
025: /**
026: * Represents an <a target="_blank" href="http://www.w3.org/TR/html401/intro/sgmltut.html#h-3.2.1">element</a>
027: * in a specific {@linkplain Source source} document, which encompasses a {@linkplain #getStartTag() start tag},
028: * an optional {@linkplain #getEndTag() end tag} and all {@linkplain #getContent() content} in between.
029: * <p>
030: * Take the following HTML segment as an example:
031: * <p>
032: * <code><p>This is a sample paragraph.</p></code>
033: * <p>
034: * The whole segment is represented by an <code>Element</code> object. This is comprised of the {@link StartTag} "<code><p></code>",
035: * the {@link EndTag} "<code></p></code>", as well as the text in between.
036: * An element may also contain other elements between its start and end tags.
037: * <p>
038: * The term <i><a name="Normal">normal element</a></i> refers to an element having a {@linkplain #getStartTag() start tag}
039: * with a {@linkplain StartTag#getStartTagType() type} of {@link StartTagType#NORMAL}.
040: * This comprises all {@linkplain HTMLElements HTML elements} and <a href="HTMLElements.html#NonHTMLElement">non-HTML elements</a>.
041: * <p>
042: * <code>Element</code> instances are obtained using one of the following methods:
043: * <ul>
044: * <li>{@link StartTag#getElement()}
045: * <li>{@link EndTag#getElement()}
046: * <li>{@link Segment#findAllElements()}
047: * <li>{@link Segment#findAllElements(String name)}
048: * <li>{@link Segment#findAllElements(StartTagType)}
049: * </ul>
050: * See also the {@link HTMLElements} class, and the
051: * <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-element">XML 1.0 specification for elements</a>.
052: * <h3><a name="Structure">Element Structure</a></h3>
053: * <p>
054: * The three possible structures of an element are listed below:
055: * <dl class="Separated">
056: * <dt><a name="SingleTag">Single Tag Element</a>:
057: * <dd>
058: * Example:<br />
059: * <code><img src="mypicture.jpg"></code>
060: * <p>
061: * The element consists only of a single {@linkplain #getStartTag() start tag} and has no {@linkplain #getContent() element content}
062: * (although the start tag itself may have {@linkplain StartTag#getTagContent() tag content}).
063: * <br />{@link #getEndTag()}<code>==null</code>
064: * <br />{@link #isEmpty()}<code>==true</code>
065: * <br />{@link #getEnd() getEnd()}<code>==</code>{@link #getStartTag()}<code>.</code>{@link #getEnd() getEnd()}
066: * <p>
067: * This occurs in the following situations:
068: * <ul class="Unseparated">
069: * <li>An <a href="HTMLElements.html#HTMLElement">HTML element</a> for which the {@linkplain HTMLElements#getEndTagForbiddenElementNames() end tag is forbidden}.
070: * <li>An <a href="HTMLElements.html#HTMLElement">HTML element</a> for which the {@linkplain HTMLElements#getEndTagRequiredElementNames() end tag is required},
071: * but the end tag is not present in the source document.
072: * <li>An <a href="HTMLElements.html#HTMLElement">HTML element</a> for which the {@linkplain HTMLElements#getEndTagOptionalElementNames() end tag is optional},
073: * where the <a href="#ImplicitlyTerminated">implicitly terminating</a> tag is situated immediately after the element's
074: * {@linkplain #getStartTag() start tag}.
075: * <li>An {@linkplain #isEmptyElementTag() empty element tag}
076: * <li>A <a href="HTMLElements.html#NonHTMLElement">non-HTML element</a> that is not an {@linkplain #isEmptyElementTag() empty element tag} but is missing its end tag.
077: * <li>An element with a start tag of a {@linkplain StartTag#getStartTagType() type} that does not define a
078: * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}.
079: * <li>An element with a start tag of a {@linkplain StartTag#getStartTagType() type} that does define a
080: * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type} but is missing its end tag.
081: * </ul>
082: * <dt><a name="ExplicitlyTerminated">Explicitly Terminated Element</a>:
083: * <dd>
084: * Example:<br />
085: * <code><p>This is a sample paragraph.</p></code>
086: * <p>
087: * The element consists of a {@linkplain #getStartTag() start tag}, {@linkplain #getContent() content},
088: * and an {@linkplain #getEndTag() end tag}.
089: * <br />{@link #getEndTag()}<code>!=null</code>.
090: * <br />{@link #isEmpty()}<code>==false</code> (provided the end tag doesn't immediately follow the start tag)
091: * <br />{@link #getEnd() getEnd()}<code>==</code>{@link #getEndTag()}<code>.</code>{@link #getEnd() getEnd()}.
092: * <p>
093: * This occurs in the following situations, assuming the start tag's matching end tag is present in the source document:
094: * <ul class="Unseparated">
095: * <li>An <a href="HTMLElements.html#HTMLElement">HTML element</a> for which the end tag is either
096: * {@linkplain HTMLElements#getEndTagRequiredElementNames() required} or {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}.
097: * <li>A <a href="HTMLElements.html#NonHTMLElement">non-HTML element</a> that is not an {@linkplain #isEmptyElementTag() empty element tag}.
098: * <li>An element with a start tag of a {@linkplain StartTag#getStartTagType() type} that defines a
099: * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}.
100: * </ul>
101: * <dt><a name="ImplicitlyTerminated">Implicitly Terminated Element</a>:
102: * <dd>
103: * Example:<br />
104: * <code><p>This text is included in the paragraph element even though no end tag is present.</code><br />
105: * <code><p>This is the next paragraph.</code>
106: * <p>
107: * The element consists of a {@linkplain #getStartTag() start tag} and {@linkplain #getContent() content},
108: * but no {@linkplain #getEndTag() end tag}.
109: * <br />{@link #getEndTag()}<code>==null</code>.
110: * <br />{@link #isEmpty()}<code>==false</code>
111: * <br />{@link #getEnd() getEnd()}<code>!=</code>{@link #getStartTag()}<code>.</code>{@link #getEnd() getEnd()}.
112: * <p>
113: * This only occurs in an <a href="HTMLElements.html#HTMLElement">HTML element</a> for which the
114: * {@linkplain HTMLElements#getEndTagOptionalElementNames() end tag is optional}.
115: * <p>
116: * The element ends at the start of a tag which implies the termination of the element, called the <i>implicitly terminating tag</i>.
117: * If the implicitly terminating tag is situated immediately after the element's {@linkplain #getStartTag() start tag},
118: * the element is classed as a <a href="#SingleTag">single tag element</a>.
119: * <p>
120: * See the <a href="Element.html#ParsingRulesHTMLEndTagOptional">element parsing rules for HTML elements with optional end tags</a>
121: * for details on which tags can implicitly terminate a given element.
122: * <p>
123: * See also the documentation of the {@link HTMLElements#getEndTagOptionalElementNames()} method.
124: * </dl>
125: * <h3><a name="ParsingRules">Element Parsing Rules</a></h3>
126: * The following rules describe the algorithm used in the {@link StartTag#getElement()} method to construct an element.
127: * The detection of the start tag's matching end tag or other terminating tags always takes into account the possible nesting of elements.
128: * <p>
129: * <ul class="Separated">
130: * <li>
131: * If the start tag has a {@linkplain StartTag#getStartTagType() type} of {@link StartTagType#NORMAL}:
132: * <ul>
133: * <li>
134: * If the {@linkplain StartTag#getName() name} of the start tag matches one of the
135: * recognised {@linkplain HTMLElementName HTML element names} (indicating an <a href="HTMLElements.html#HTMLElement">HTML element</a>):
136: * <ul>
137: * <li>
138: * <a name="ParsingRulesHTMLEndTagForbidden"></a>
139: * If the end tag for an element of this {@linkplain StartTag#getName() name} is
140: * {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden},
141: * the parser does not conduct any search for an end tag and a <a href="#SingleTag">single tag element</a> is created.
142: * <li>
143: * <a name="ParsingRulesHTMLEndTagRequired"></a>
144: * If the end tag for an element of this {@linkplain StartTag#getName() name} is
145: * {@linkplain HTMLElements#getEndTagRequiredElementNames() required}, the parser searches for the start tag's matching end tag.
146: * <ul class="Unseparated">
147: * <li>
148: * If the matching end tag is found, an <a href="#ExplicitlyTerminated">explicitly terminated element</a> is created.
149: * <li>
150: * If no matching end tag is found, the source document is not valid HTML and the incident is
151: * {@linkplain Source#getLogger() logged} as a missing required end tag.
152: * In this situation a <a href="#SingleTag">single tag element</a> is created.
153: * </ul>
154: * <li>
155: * <a name="ParsingRulesHTMLEndTagOptional"></a>
156: * If the end tag for an element of this {@linkplain StartTag#getName() name} is
157: * {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}, the parser searches not only for the start tag's matching end tag,
158: * but also for any other tag that <a href="#ImplicitlyTerminated">implicitly terminates</a> the element.
159: * <br />For each tag (<i>T2</i>) following the start tag (<i>ST1</i>) of this element (<i>E1</i>):
160: * <ul class="Unseparated">
161: * <li>
162: * If <i>T2</i> is a start tag:
163: * <ul>
164: * <li>
165: * If the {@linkplain StartTag#getName() name} of <i>T2</i> is in the list of
166: * {@linkplain HTMLElements#getNonterminatingElementNames(String) non-terminating element names} for <i>E1</i>,
167: * then continue evaluating tags from the {@linkplain Element#getEnd() end} of <i>T2</i>'s corresponding
168: * {@linkplain StartTag#getElement() element}.
169: * <li>
170: * If the {@linkplain StartTag#getName() name} of <i>T2</i> is in the list of
171: * {@linkplain HTMLElements#getTerminatingStartTagNames(String) terminating start tag names} for <i>E1</i>,
172: * then <i>E1</i> ends at the {@linkplain StartTag#getBegin() beginning} of <i>T2</i>.
173: * If <i>T2</i> follows immediately after <i>ST1</i>, a <a href="#SingleTag">single tag element</a> is created,
174: * otherwise an <a href="#ImplicitlyTerminated">implicitly terminated element</a> is created.
175: * </ul>
176: * <li>
177: * If <i>T2</i> is an end tag:
178: * <ul>
179: * <li>
180: * If the {@linkplain EndTag#getName() name} of <i>T2</i> is the same as that of <i>ST1</i>,
181: * an <a href="#ExplicitlyTerminated">explicitly terminated element</a> is created.
182: * <li>
183: * If the {@linkplain EndTag#getName() name} of <i>T2</i> is in the list of
184: * {@linkplain HTMLElements#getTerminatingEndTagNames(String) terminating end tag names} for <i>E1</i>,
185: * then <i>E1</i> ends at the {@linkplain EndTag#getBegin() beginning} of <i>T2</i>.
186: * If <i>T2</i> follows immediately after <i>ST1</i>, a <a href="#SingleTag">single tag element</a> is created,
187: * otherwise an <a href="#ImplicitlyTerminated">implicitly terminated element</a> is created.
188: * </ul>
189: * <li>
190: * If no more tags are present in the source document, then <i>E1</i> ends at the end of the file, and an
191: * <a href="#ImplicitlyTerminated">implicitly terminated element</a> is created.
192: * </ul>
193: * </ul>
194: * Note that the syntactical indication of an {@linkplain StartTag#isEmptyElementTag() empty-element tag} in the start tag
195: * is ignored when determining the end of <a href="HTMLElements.html#HTMLElement">HTML elements</a>.
196: * See the documentation of the {@link #isEmptyElementTag()} method for more information.
197: * <li>
198: * If the {@linkplain StartTag#getName() name} of the start tag does not match one of the
199: * recognised {@linkplain HTMLElementName HTML element names} (indicating a <a href="HTMLElements.html#NonHTMLElement">non-HTML element</a>):
200: * <ul>
201: * <li>
202: * If the start tag is an {@linkplain StartTag#isEmptyElementTag() empty-element tag},
203: * the parser does not conduct any search for an end tag and a <a href="#SingleTag">single tag element</a> is created.
204: * <li>
205: * Otherwise, section <a target="_blank" href="http://www.w3.org/TR/REC-xml#CleanAttrVals">3.1</a>
206: * of the XML 1.0 specification states that a matching end tag MUST be present, and
207: * the parser searches for the start tag's matching end tag.
208: * <ul class="Unseparated">
209: * <li>
210: * If the matching end tag is found, an <a href="#ExplicitlyTerminated">explicitly terminated element</a> is created.
211: * <li>
212: * If no matching end tag is found, the source document is not valid XML and the incident is
213: * {@linkplain Source#getLogger() logged} as a missing required end tag.
214: * In this situation a <a href="#SingleTag">single tag element</a> is created.
215: * </ul>
216: * </ul>
217: * </ul>
218: * <li>
219: * If the start tag has any {@linkplain StartTag#getStartTagType() type} other than {@link StartTagType#NORMAL}:
220: * <ul>
221: * <li>
222: * If the start tag's type does not define a {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type},
223: * the parser does not conduct any search for an end tag and a <a href="#SingleTag">single tag element</a> is created.
224: * <li>
225: * If the start tag's type does define a {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type},
226: * the parser assumes that a matching end tag is required and searches for it.
227: * <ul class="Unseparated">
228: * <li>
229: * If the matching end tag is found, an <a href="#ExplicitlyTerminated">explicitly terminated element</a> is created.
230: * <li>
231: * If no matching end tag is found, the missing required end tag is {@linkplain Source#getLogger() logged}
232: * and a <a href="#SingleTag">single tag element</a> is created.
233: * </ul>
234: * </ul>
235: * </ul>
236: * @see HTMLElements
237: */
238: public final class Element extends Segment implements HTMLElementName {
239: private final StartTag startTag;
240: private final EndTag endTag;
241: private Segment content = null;
242: Element parentElement = Element.NOT_CACHED;
243: private int depth = -1;
244:
245: static final Element NOT_CACHED = new Element();
246:
247: Element(final Source source, final StartTag startTag,
248: final EndTag endTag) {
249: super (source, startTag.begin, endTag == null ? startTag.end
250: : endTag.end);
251: this .startTag = startTag;
252: this .endTag = (endTag == null || endTag.length() == 0) ? null
253: : endTag;
254: }
255:
256: private Element() {
257: startTag = null;
258: endTag = null;
259: }
260:
261: /**
262: * Returns the parent of this element in the document element hierarchy.
263: * <p>
264: * The {@link Source#fullSequentialParse()} method should be called after construction of the <code>Source</code> object if this method is to be used.
265: * <p>
266: * This method returns <code>null</code> for a <a href="Source.html#TopLevelElement">top-level element</a>,
267: * as well as any element formed from a {@linkplain TagType#isServerTag() server tag}, regardless of whether it is nested inside a normal element.
268: * <p>
269: * See the {@link Source#getChildElements()} method for more details.
270: *
271: * @return the parent of this element in the document element hierarchy, or <code>null</code> if this element is a <a href="Source.html#TopLevelElement">top-level element</a>.
272: * @see #getChildElements()
273: */
274: public Element getParentElement() {
275: if (parentElement == Element.NOT_CACHED) {
276: source.getChildElements();
277: if (parentElement == Element.NOT_CACHED)
278: parentElement = null;
279: }
280: return parentElement;
281: }
282:
283: /**
284: * Returns a list of the immediate children of this element in the document element hierarchy.
285: * <p>
286: * The objects in the list are all of type {@link Element}.
287: * <p>
288: * See the {@link Source#getChildElements()} method for more details.
289: *
290: * @return a list of the immediate children of this element in the document element hierarchy, guaranteed not <code>null</code>.
291: * @see #getParentElement()
292: */
293: public final List getChildElements() {
294: return childElements != null ? childElements
295: : getChildElements(-1);
296: }
297:
298: final List getChildElements(int depth) {
299: if (depth != -1)
300: this .depth = depth;
301: if (childElements == null) {
302: if (!Config.IncludeServerTagsInElementHierarchy
303: && end == startTag.end) {
304: childElements = Collections.EMPTY_LIST;
305: } else {
306: final int childDepth = (depth == -1 ? -1 : depth + 1);
307: childElements = new ArrayList();
308: int pos = Config.IncludeServerTagsInElementHierarchy ? begin + 1
309: : startTag.end;
310: final int maxChildBegin = (Config.IncludeServerTagsInElementHierarchy || endTag == null) ? end
311: : endTag.begin;
312: while (true) {
313: final StartTag childStartTag = source
314: .findNextStartTag(pos);
315: if (childStartTag == null
316: || childStartTag.begin >= maxChildBegin)
317: break;
318: if (Config.IncludeServerTagsInElementHierarchy) {
319: if (childStartTag.begin < startTag.end
320: && !childStartTag.getTagType()
321: .isServerTag()
322: && !startTag.getTagType().isServerTag()) {
323: // A start tag is found within another start tag, but neither is a server tag.
324: // This only legitimately happens in very rare cases like entity definitions in doctype.
325: // We don't want to include the child elements in the hierarchy.
326: pos = childStartTag.end;
327: continue;
328: }
329: } else if (childStartTag.getTagType().isServerTag()) {
330: pos = childStartTag.end;
331: continue;
332: }
333: final Element childElement = childStartTag
334: .getElement();
335: if (childElement.end > end
336: && source.logger.isInfoEnabled())
337: source.logger.info("Child "
338: + childElement.getDebugInfo()
339: + " extends beyond end of parent "
340: + getDebugInfo());
341: childElement.getChildElements(childDepth);
342: if (childElement.parentElement == Element.NOT_CACHED) { // make sure element was not added as a child of a descendent element (can happen with overlapping elements)
343: childElement.parentElement = this ;
344: childElements.add(childElement);
345: }
346: pos = childElement.end;
347: }
348: }
349: }
350: return childElements;
351: }
352:
353: /**
354: * Returns the nesting depth of this element in the document element hierarchy.
355: * <p>
356: * The {@link Source#fullSequentialParse()} method should be called after construction of the <code>Source</code> object if this method is to be used.
357: * <p>
358: * A <a href="Source.html#TopLevelElement">top-level element</a> has a nesting depth of <code>0</code>.
359: * <p>
360: * An element formed from a {@linkplain TagType#isServerTag() server tag} always have a nesting depth of <code>0</code>,
361: * regardless of whether it is nested inside a normal element.
362: * <p>
363: * See the {@link Source#getChildElements()} method for more details.
364: *
365: * @return the nesting depth of this element in the document element hierarchy.
366: * @see #getParentElement()
367: */
368: public int getDepth() {
369: if (depth == -1) {
370: getParentElement();
371: if (depth == -1)
372: depth = 0;
373: }
374: return depth;
375: }
376:
377: /**
378: * Returns the segment representing the <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-content">content</a> of the element.
379: * <p>
380: * This segment spans between the end of the start tag and the start of the end tag.
381: * If the end tag is not present, the content reaches to the end of the element.
382: * <p>
383: * Note that before version 2.0 this method returned <code>null</code> if the element was {@linkplain #isEmpty() empty},
384: * whereas now a zero-length segment is returned.
385: *
386: * @return the segment representing the content of the element, guaranteed not <code>null</code>.
387: */
388: public Segment getContent() {
389: if (content == null)
390: content = new Segment(source, startTag.end, getContentEnd());
391: return content;
392: }
393:
394: /**
395: * Returns the start tag of the element.
396: * @return the start tag of the element.
397: */
398: public StartTag getStartTag() {
399: return startTag;
400: }
401:
402: /**
403: * Returns the end tag of the element.
404: * <p>
405: * If the element has no end tag this method returns <code>null</code>.
406: *
407: * @return the end tag of the element, or <code>null</code> if the element has no end tag.
408: */
409: public EndTag getEndTag() {
410: return endTag;
411: }
412:
413: /**
414: * Returns the {@linkplain StartTag#getName() name} of the {@linkplain #getStartTag() start tag} of this element, always in lower case.
415: * <p>
416: * This is equivalent to {@link #getStartTag()}<code>.</code>{@link StartTag#getName() getName()}.
417: * <p>
418: * See the {@link Tag#getName()} method for more information.
419: *
420: * @return the name of the {@linkplain #getStartTag() start tag} of this element, always in lower case.
421: */
422: public String getName() {
423: return startTag.getName();
424: }
425:
426: /**
427: * Indicates whether this element has zero-length {@linkplain #getContent() content}.
428: * <p>
429: * This is equivalent to {@link #getContent()}<code>.</code>{@link Segment#length() length()}<code>==0</code>.
430: * <p>
431: * Note that this is a broader definition than that of both the
432: * <a target="_blank" href="http://www.w3.org/TR/html401/intro/sgmltut.html#didx-element-4">HTML definition of an empty element</a>,
433: * which is only those elements whose end tag is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}, and the
434: * <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-empty">XML definition of an empty element</a>,
435: * which is "either a start-tag immediately followed by an end-tag, or an {@linkplain #isEmptyElementTag() empty-element tag}".
436: * The other possibility covered by this property is the case of an <a href="HTMLElements.html#HTMLElement">HTML element</a> with an
437: * {@linkplain HTMLElements#getEndTagOptionalElementNames() optional} end tag that is immediately followed by another tag that implicitly
438: * terminates the element.
439: *
440: * @return <code>true</code> if this element has zero-length {@linkplain #getContent() content}, otherwise <code>false</code>.
441: * @see #isEmptyElementTag()
442: */
443: public boolean isEmpty() {
444: return startTag.end == getContentEnd();
445: }
446:
447: /**
448: * Indicates whether this element is an <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-eetag">empty-element tag</a>.
449: * <p>
450: * It is signified by an {@linkplain #isEmpty() empty} element with the characters "<code>/></code>" at the end of the
451: * {@linkplain #getStartTag() start tag}.
452: * <p>
453: * This is equivalent to {@link #isEmpty()}<code> && </code>{@link #getStartTag()}<code>.</code>{@link StartTag#isEmptyElementTag() isEmptyElementTag()}.
454: * <p>
455: * The {@link StartTag#isEmptyElementTag()} property only checks whether the start tag syntactically an
456: * <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-eetag">empty-element tag</a>, whereas this property also makes sure
457: * the element is in fact {@linkplain #isEmpty() empty}.
458: * <p>
459: * A syntactical empty-element tag that is not actually empty can occur if the end tag of an <a href="HTMLElements.html#HTMLElement">HTML element</a>
460: * is either {@linkplain HTMLElements#getEndTagRequiredElementNames() required} or {@linkplain HTMLElements#getEndTagOptionalElementNames() optional},
461: * but the start tag is erroneously terminated with the characters "<code>/></code>" in the source document.
462: * All major browsers ignore the syntactical hint of an empty element in this case, even in an
463: * <a target="_blank" href="http://www.w3.org/TR/xhtml1/">XHTML</a> document, so this parser does the same.
464: *
465: * @return <code>true</code> if this element is an <a target="_blank" href="http://www.w3.org/TR/REC-xml#dt-eetag">empty-element tag</a>, otherwise <code>false</code>.
466: */
467: public boolean isEmptyElementTag() {
468: return isEmpty() && startTag.isEmptyElementTag();
469: }
470:
471: /**
472: * Returns the attributes specified in this element's start tag.
473: * <p>
474: * This is equivalent to {@link #getStartTag()}<code>.</code>{@link StartTag#getAttributes() getAttributes()}.
475: *
476: * @return the attributes specified in this element's start tag.
477: * @see StartTag#getAttributes()
478: */
479: public Attributes getAttributes() {
480: return getStartTag().getAttributes();
481: }
482:
483: /**
484: * Returns the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name (case insensitive).
485: * <p>
486: * Returns <code>null</code> if the {@linkplain #getStartTag() start tag of this element} does not
487: * {@linkplain StartTagType#hasAttributes() have attributes},
488: * no attribute with the specified name exists or the attribute {@linkplain Attribute#hasValue() has no value}.
489: * <p>
490: * This is equivalent to {@link #getStartTag()}<code>.</code>{@link StartTag#getAttributeValue(String) getAttributeValue(attributeName)}.
491: *
492: * @param attributeName the name of the attribute to get.
493: * @return the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name, or <code>null</code> if the attribute does not exist or {@linkplain Attribute#hasValue() has no value}.
494: */
495: public String getAttributeValue(final String attributeName) {
496: return getStartTag().getAttributeValue(attributeName);
497: }
498:
499: /**
500: * Returns the {@link FormControl} defined by this element.
501: * @return the {@link FormControl} defined by this element, or <code>null</code> if it is not a <a target="_blank" href="http://www.w3.org/TR/html401/interact/forms.html#form-controls">control</a>.
502: */
503: public FormControl getFormControl() {
504: return FormControl.construct(this );
505: }
506:
507: public String getDebugInfo() {
508: if (this == NOT_CACHED)
509: return "NOT_CACHED";
510: final StringBuffer sb = new StringBuffer();
511: sb.append("Element ");
512: startTag.appendDebugTag(sb);
513: if (!isEmpty())
514: sb.append('-');
515: if (endTag != null)
516: sb.append(endTag);
517: sb.append(' ');
518: startTag.appendDebugTagType(sb);
519: sb.append(super .getDebugInfo());
520: return sb.toString();
521: }
522:
523: int getContentEnd() {
524: return endTag != null ? endTag.begin : end;
525: }
526: }
|