001: /*
002: * Copyright (c) 2002-2008 Gargoyle Software Inc. All rights reserved.
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * 1. Redistributions of source code must retain the above copyright notice,
008: * this list of conditions and the following disclaimer.
009: * 2. Redistributions in binary form must reproduce the above copyright notice,
010: * this list of conditions and the following disclaimer in the documentation
011: * and/or other materials provided with the distribution.
012: * 3. The end-user documentation included with the redistribution, if any, must
013: * include the following acknowledgment:
014: *
015: * "This product includes software developed by Gargoyle Software Inc.
016: * (http://www.GargoyleSoftware.com/)."
017: *
018: * Alternately, this acknowledgment may appear in the software itself, if
019: * and wherever such third-party acknowledgments normally appear.
020: * 4. The name "Gargoyle Software" must not be used to endorse or promote
021: * products derived from this software without prior written permission.
022: * For written permission, please contact info@GargoyleSoftware.com.
023: * 5. Products derived from this software may not be called "HtmlUnit", nor may
024: * "HtmlUnit" appear in their name, without prior written permission of
025: * Gargoyle Software Inc.
026: *
027: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
028: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
029: * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARGOYLE
030: * SOFTWARE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
031: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
032: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
033: * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
034: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
035: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
036: * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
037: */
038: package com.gargoylesoftware.htmlunit.html;
039:
040: import java.io.IOException;
041: import java.io.StringReader;
042: import java.lang.reflect.InvocationTargetException;
043: import java.net.URL;
044: import java.nio.charset.Charset;
045: import java.util.HashMap;
046: import java.util.Map;
047: import java.util.Stack;
048:
049: import org.apache.xerces.parsers.AbstractSAXParser;
050: import org.apache.xerces.util.DefaultErrorHandler;
051: import org.apache.xerces.xni.XNIException;
052: import org.apache.xerces.xni.parser.XMLInputSource;
053: import org.apache.xerces.xni.parser.XMLParseException;
054: import org.cyberneko.html.HTMLConfiguration;
055: import org.xml.sax.Attributes;
056: import org.xml.sax.ContentHandler;
057: import org.xml.sax.Locator;
058: import org.xml.sax.SAXException;
059: import org.xml.sax.ext.LexicalHandler;
060:
061: import com.gargoylesoftware.htmlunit.Assert;
062: import com.gargoylesoftware.htmlunit.ObjectInstantiationException;
063: import com.gargoylesoftware.htmlunit.TextUtil;
064: import com.gargoylesoftware.htmlunit.WebResponse;
065: import com.gargoylesoftware.htmlunit.WebWindow;
066:
067: /**
068: * SAX parser implementation that uses the neko {@link org.cyberneko.html.HTMLConfiguration}
069: * to parse HTML into a HtmlUnit-specific DOM (HU-DOM) tree.
070: * <p>
071: * <em>Note that the parser currently does not handle CDATA or comment sections, i.e. these
072: * do not appear in the resulting DOM tree</em>
073: *
074: * @version $Revision: 2132 $
075: * @author <a href="mailto:cse@dynabean.de">Christian Sell</a>
076: * @author David K. Taylor
077: * @author Chris Erskine
078: * @author Ahmed Ashour
079: */
080: public final class HTMLParser {
081:
082: private static final Map ELEMENT_FACTORIES = new HashMap();
083: private static boolean IgnoreOutsideContent_;
084:
085: static {
086: ELEMENT_FACTORIES.put("input", InputElementFactory.instance);
087:
088: final DefaultElementFactory defaultElementFactory = new DefaultElementFactory();
089: ELEMENT_FACTORIES.put(HtmlAnchor.TAG_NAME,
090: defaultElementFactory);
091: ELEMENT_FACTORIES.put(HtmlApplet.TAG_NAME,
092: defaultElementFactory);
093: ELEMENT_FACTORIES.put(HtmlAddress.TAG_NAME,
094: defaultElementFactory);
095: ELEMENT_FACTORIES.put(HtmlArea.TAG_NAME, defaultElementFactory);
096: ELEMENT_FACTORIES.put(HtmlBase.TAG_NAME, defaultElementFactory);
097: ELEMENT_FACTORIES.put(HtmlBaseFont.TAG_NAME,
098: defaultElementFactory);
099: ELEMENT_FACTORIES.put(HtmlBidirectionalOverride.TAG_NAME,
100: defaultElementFactory);
101: ELEMENT_FACTORIES.put(HtmlBlockQuote.TAG_NAME,
102: defaultElementFactory);
103: ELEMENT_FACTORIES.put(HtmlBody.TAG_NAME, defaultElementFactory);
104: ELEMENT_FACTORIES
105: .put(HtmlBreak.TAG_NAME, defaultElementFactory);
106: ELEMENT_FACTORIES.put(HtmlButton.TAG_NAME,
107: defaultElementFactory);
108: ELEMENT_FACTORIES.put(HtmlCaption.TAG_NAME,
109: defaultElementFactory);
110: ELEMENT_FACTORIES.put(HtmlCenter.TAG_NAME,
111: defaultElementFactory);
112: ELEMENT_FACTORIES.put(HtmlTableColumn.TAG_NAME,
113: defaultElementFactory);
114: ELEMENT_FACTORIES.put(HtmlTableColumnGroup.TAG_NAME,
115: defaultElementFactory);
116: ELEMENT_FACTORIES.put(HtmlDefinitionDescription.TAG_NAME,
117: defaultElementFactory);
118: ELEMENT_FACTORIES.put(HtmlDeletedText.TAG_NAME,
119: defaultElementFactory);
120: ELEMENT_FACTORIES.put(HtmlTextDirection.TAG_NAME,
121: defaultElementFactory);
122: ELEMENT_FACTORIES.put(HtmlDivision.TAG_NAME,
123: defaultElementFactory);
124: ELEMENT_FACTORIES.put(HtmlDefinitionList.TAG_NAME,
125: defaultElementFactory);
126: ELEMENT_FACTORIES.put(HtmlDefinitionTerm.TAG_NAME,
127: defaultElementFactory);
128: ELEMENT_FACTORIES.put(HtmlFieldSet.TAG_NAME,
129: defaultElementFactory);
130: ELEMENT_FACTORIES.put(HtmlFont.TAG_NAME, defaultElementFactory);
131: ELEMENT_FACTORIES.put(HtmlForm.TAG_NAME, defaultElementFactory);
132: ELEMENT_FACTORIES
133: .put(HtmlFrame.TAG_NAME, defaultElementFactory);
134: ELEMENT_FACTORIES.put(HtmlFrameSet.TAG_NAME,
135: defaultElementFactory);
136: ELEMENT_FACTORIES.put(HtmlHeader1.TAG_NAME,
137: defaultElementFactory);
138: ELEMENT_FACTORIES.put(HtmlHeader2.TAG_NAME,
139: defaultElementFactory);
140: ELEMENT_FACTORIES.put(HtmlHeader3.TAG_NAME,
141: defaultElementFactory);
142: ELEMENT_FACTORIES.put(HtmlHeader4.TAG_NAME,
143: defaultElementFactory);
144: ELEMENT_FACTORIES.put(HtmlHeader5.TAG_NAME,
145: defaultElementFactory);
146: ELEMENT_FACTORIES.put(HtmlHeader6.TAG_NAME,
147: defaultElementFactory);
148: ELEMENT_FACTORIES.put(HtmlHead.TAG_NAME, defaultElementFactory);
149: ELEMENT_FACTORIES.put(HtmlHorizontalRule.TAG_NAME,
150: defaultElementFactory);
151: ELEMENT_FACTORIES.put(HtmlHtml.TAG_NAME, defaultElementFactory);
152: ELEMENT_FACTORIES.put(HtmlInlineFrame.TAG_NAME,
153: defaultElementFactory);
154: ELEMENT_FACTORIES
155: .put(HtmlImage.TAG_NAME, defaultElementFactory);
156: ELEMENT_FACTORIES.put(HtmlInsertedText.TAG_NAME,
157: defaultElementFactory);
158: ELEMENT_FACTORIES.put(HtmlIsIndex.TAG_NAME,
159: defaultElementFactory);
160: ELEMENT_FACTORIES
161: .put(HtmlLabel.TAG_NAME, defaultElementFactory);
162: ELEMENT_FACTORIES.put(HtmlLegend.TAG_NAME,
163: defaultElementFactory);
164: ELEMENT_FACTORIES.put(HtmlListItem.TAG_NAME,
165: defaultElementFactory);
166: ELEMENT_FACTORIES.put(HtmlLink.TAG_NAME, defaultElementFactory);
167: ELEMENT_FACTORIES.put(HtmlMap.TAG_NAME, defaultElementFactory);
168: ELEMENT_FACTORIES.put(HtmlMenu.TAG_NAME, defaultElementFactory);
169: ELEMENT_FACTORIES.put(HtmlMeta.TAG_NAME, defaultElementFactory);
170: ELEMENT_FACTORIES.put(HtmlNoFrames.TAG_NAME,
171: defaultElementFactory);
172: ELEMENT_FACTORIES.put(HtmlNoScript.TAG_NAME,
173: defaultElementFactory);
174: ELEMENT_FACTORIES.put(HtmlObject.TAG_NAME,
175: defaultElementFactory);
176: ELEMENT_FACTORIES.put(HtmlOrderedList.TAG_NAME,
177: defaultElementFactory);
178: ELEMENT_FACTORIES.put(HtmlOptionGroup.TAG_NAME,
179: defaultElementFactory);
180: ELEMENT_FACTORIES.put(HtmlOption.TAG_NAME,
181: defaultElementFactory);
182: ELEMENT_FACTORIES.put(HtmlParagraph.TAG_NAME,
183: defaultElementFactory);
184: ELEMENT_FACTORIES.put(HtmlParameter.TAG_NAME,
185: defaultElementFactory);
186: ELEMENT_FACTORIES.put(HtmlPreformattedText.TAG_NAME,
187: defaultElementFactory);
188: ELEMENT_FACTORIES.put(HtmlInlineQuotation.TAG_NAME,
189: defaultElementFactory);
190: ELEMENT_FACTORIES.put(HtmlScript.TAG_NAME,
191: defaultElementFactory);
192: ELEMENT_FACTORIES.put(HtmlSelect.TAG_NAME,
193: defaultElementFactory);
194: ELEMENT_FACTORIES.put(HtmlSpan.TAG_NAME, defaultElementFactory);
195: ELEMENT_FACTORIES
196: .put(HtmlStyle.TAG_NAME, defaultElementFactory);
197: ELEMENT_FACTORIES
198: .put(HtmlTitle.TAG_NAME, defaultElementFactory);
199:
200: ELEMENT_FACTORIES
201: .put(HtmlTable.TAG_NAME, defaultElementFactory);
202: ELEMENT_FACTORIES.put(HtmlTableBody.TAG_NAME,
203: defaultElementFactory);
204: ELEMENT_FACTORIES.put(HtmlTableDataCell.TAG_NAME,
205: defaultElementFactory);
206: ELEMENT_FACTORIES.put(HtmlTableHeaderCell.TAG_NAME,
207: defaultElementFactory);
208: ELEMENT_FACTORIES.put(HtmlTableRow.TAG_NAME,
209: defaultElementFactory);
210:
211: ELEMENT_FACTORIES.put(HtmlTextArea.TAG_NAME,
212: defaultElementFactory);
213: ELEMENT_FACTORIES.put(HtmlTableFooter.TAG_NAME,
214: defaultElementFactory);
215: ELEMENT_FACTORIES.put(HtmlTableHeader.TAG_NAME,
216: defaultElementFactory);
217: ELEMENT_FACTORIES.put(HtmlUnorderedList.TAG_NAME,
218: defaultElementFactory);
219: }
220:
221: /**
222: * Set the flag to control validation of the HTML content that is outside of the
223: * BODY and HTML tags. This flag is false by default to maintain compatibility with
224: * current NekoHTML defaults.
225: * @param ignoreOutsideContent - boolean flag to set
226: */
227: public static void setIgnoreOutsideContent(
228: final boolean ignoreOutsideContent) {
229: IgnoreOutsideContent_ = ignoreOutsideContent;
230: }
231:
232: /**
233: * Get the state of the flag to ignore content outside the BODY and HTML tags
234: * @return - The current state
235: */
236: public static boolean getIgnoreOutsideContent() {
237: return IgnoreOutsideContent_;
238: }
239:
240: /**
241: * @param tagName an HTML element tag name
242: * @return a factory for creating HtmlElements representing the given tag
243: */
244: public static IElementFactory getFactory(final String tagName) {
245: final IElementFactory result = (IElementFactory) ELEMENT_FACTORIES
246: .get(tagName);
247:
248: if (result != null) {
249: return result;
250: } else {
251: return UnknownElementFactory.instance;
252: }
253: }
254:
255: /**
256: * You should never need to create one of these!
257: */
258: private HTMLParser() {
259: }
260:
261: /**
262: * Parses the HTML content from the given string into an object tree representation.
263: *
264: * @param parent the parent for the new nodes
265: * @param source the (X)HTML to be parsed
266: * @throws SAXException if a SAX error occurs
267: * @throws IOException if an IO error occurs
268: */
269: public static void parseFragment(final DomNode parent,
270: final String source) throws SAXException, IOException {
271:
272: final URL url = parent.getPage().getWebResponse().getUrl();
273: final HtmlUnitDOMBuilder domBuilder = new HtmlUnitDOMBuilder(
274: parent, url);
275: domBuilder
276: .setFeature(
277: "http://cyberneko.org/html/features/balance-tags/document-fragment",
278: true);
279: final XMLInputSource in = new XMLInputSource(null, parent
280: .getPage().getWebResponse().getUrl().toString(), null,
281: new StringReader(source), null);
282:
283: domBuilder.parse(in);
284: }
285:
286: /**
287: * parse the HTML content from the given WebResponse into an object tree representation
288: *
289: * @param webResponse the response data
290: * @param webWindow the web window into which the page is to be loaded
291: * @return the page object which forms the root of the DOM tree, or <code>null</code> if the <HTML>
292: * tag is missing
293: * @throws java.io.IOException io error
294: */
295: public static HtmlPage parse(final WebResponse webResponse,
296: final WebWindow webWindow) throws IOException {
297: final HtmlPage page = new HtmlPage(webResponse.getUrl(),
298: webResponse, webWindow);
299: webWindow.setEnclosedPage(page);
300:
301: final HtmlUnitDOMBuilder domBuilder = new HtmlUnitDOMBuilder(
302: page, webResponse.getUrl());
303: String charSet = webResponse.getContentCharSet();
304: if (!Charset.isSupported(charSet)) {
305: charSet = TextUtil.DEFAULT_CHARSET;
306: }
307: final XMLInputSource in = new XMLInputSource(null, webResponse
308: .getUrl().toString(), null, webResponse
309: .getContentAsStream(), charSet);
310:
311: try {
312: domBuilder.parse(in);
313: } catch (final XNIException e) {
314: // extract enclosed exception
315: final Throwable origin = extractNestedException(e);
316: throw new RuntimeException("Failed parsing content from "
317: + webResponse.getUrl(), origin);
318: }
319: return domBuilder.page_;
320: }
321:
322: /**
323: * Extract nested exception within an XNIException
324: * (Nekohtml uses reflection and generated exceptions are wrapped many times
325: * within XNIException and InvocationTargetException)
326: * @param e the original XNIException
327: * @return the cause exception
328: */
329: static Throwable extractNestedException(final Throwable e) {
330: Throwable originalException = e;
331: Throwable cause = ((XNIException) e).getException();
332: while (cause != null) {
333: originalException = cause;
334: if (cause instanceof XNIException) {
335: cause = ((XNIException) cause).getException();
336: } else if (cause instanceof InvocationTargetException) {
337: cause = cause.getCause();
338: } else {
339: cause = null;
340: }
341: }
342: return originalException;
343: }
344:
345: /**
346: * The parser and DOM builder. This class subclasses Xerces's AbstractSAXParser and implements
347: * the ContentHandler interface. Thus all parser APIs are kept private. The ContentHandler methods
348: * consume SAX events to build the page DOM
349: */
350: private static final class HtmlUnitDOMBuilder extends
351: AbstractSAXParser implements ContentHandler, LexicalHandler {
352: private final HtmlPage page_;
353:
354: private Locator locator_;
355: private final Stack stack_ = new Stack();
356:
357: private DomNode currentNode_;
358: private StringBuffer characters_;
359: private boolean headParsed_ = false;
360:
361: /**
362: * create a new builder for parsing the given response contents
363: * @param webResponse the response data
364: * @param webWindow the web window into which the page is to be loaded
365: */
366: private HtmlUnitDOMBuilder(final DomNode page, final URL url) {
367: super (new HTMLConfiguration());
368: this .page_ = page.getPage();
369:
370: currentNode_ = page;
371: stack_.push(currentNode_);
372:
373: final HTMLParserListener listener = page_.getWebClient()
374: .getHTMLParserListener();
375: final boolean reportErrors;
376: if (listener != null) {
377: reportErrors = true;
378: fConfiguration.setErrorHandler(new HTMLErrorHandler(
379: listener, url));
380: } else {
381: reportErrors = false;
382: }
383:
384: try {
385: setFeature(
386: "http://cyberneko.org/html/features/augmentations",
387: true);
388: setProperty(
389: "http://cyberneko.org/html/properties/names/elems",
390: "lower");
391: setFeature(
392: "http://cyberneko.org/html/features/report-errors",
393: reportErrors);
394: setFeature(
395: "http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
396: IgnoreOutsideContent_);
397:
398: setContentHandler(this );
399: setLexicalHandler(this ); //comments and CDATA
400:
401: } catch (final SAXException e) {
402: throw new ObjectInstantiationException(
403: "unable to create HTML parser", e);
404: }
405: }
406:
407: /**
408: * @return the document locator
409: */
410: public Locator getLocator() {
411: return locator_;
412: }
413:
414: /**
415: * set the document locator
416: * @param locator
417: */
418: public void setDocumentLocator(final Locator locator) {
419: locator_ = locator;
420: }
421:
422: /** @inheritDoc ContentHandler#startDocument() */
423: public void startDocument() throws SAXException {
424: }
425:
426: /** @inheritDoc ContentHandler#startElement(String,String,String,Attributes) */
427: public void startElement(final String namespaceURI,
428: final String localName, final String qName,
429: final Attributes atts) throws SAXException {
430:
431: handleCharacters();
432:
433: final String tagLower = localName.toLowerCase();
434:
435: if (tagLower.equals("head")) {
436: headParsed_ = true;
437: }
438: // add a head if none was there
439: else if (!headParsed_
440: && (tagLower.equals("body") || tagLower
441: .equals("frameset"))) {
442: final IElementFactory factory = getElementFactory("head");
443: final HtmlElement newElement = factory.createElement(
444: page_, "head", null);
445: currentNode_.appendDomChild(newElement);
446: headParsed_ = true;
447: }
448: // add a <tbody> if a <tr> is directly in <table>
449: else if (tagLower.equals("tr")
450: && currentNode_.getNodeName().equals("table")) {
451: final IElementFactory factory = getElementFactory("tbody");
452: final HtmlElement newElement = factory.createElement(
453: page_, "tbody", null);
454: currentNode_.appendDomChild(newElement);
455: currentNode_ = newElement;
456: stack_.push(currentNode_);
457: }
458:
459: final IElementFactory factory = getElementFactory(tagLower);
460: final HtmlElement newElement = factory.createElement(page_,
461: tagLower, atts);
462: newElement.setStartLocation(locator_.getLineNumber(),
463: locator_.getColumnNumber());
464: currentNode_.appendDomChild(newElement);
465: currentNode_ = newElement;
466: stack_.push(currentNode_);
467: }
468:
469: /** @inheritDoc ContentHandler@endElement(String,String,String) */
470: public void endElement(final String namespaceURI,
471: final String localName, final String qName)
472: throws SAXException {
473:
474: handleCharacters();
475:
476: final DomNode previousNode = (DomNode) stack_.pop(); //remove currentElement from stack
477: previousNode.setEndLocation(locator_.getLineNumber(),
478: locator_.getColumnNumber());
479: previousNode.onAllChildrenAddedToPage();
480:
481: // if we have added a extra node (tbody), we should remove it
482: if (!currentNode_.getNodeName().equalsIgnoreCase(localName)) {
483: stack_.pop(); //remove extra node from stack
484: }
485:
486: if (!stack_.isEmpty()) {
487: currentNode_ = (DomNode) stack_.peek();
488: }
489: }
490:
491: /** @inheritDoc ContentHandler#characters(char,int,int) */
492: public void characters(final char ch[], final int start,
493: final int length) throws SAXException {
494:
495: if (characters_ == null) {
496: characters_ = new StringBuffer();
497: }
498: characters_.append(ch, start, length);
499: }
500:
501: /** @inheritDoc ContentHandler#ignorableWhitespace(char,int,int) */
502: public void ignorableWhitespace(final char ch[],
503: final int start, final int length) throws SAXException {
504:
505: if (characters_ == null) {
506: characters_ = new StringBuffer();
507: }
508: characters_.append(ch, start, length);
509: }
510:
511: /**
512: * pick up the character data accumulated so far and add it to the
513: * current element as a text node
514: */
515: private void handleCharacters() {
516:
517: if (characters_ != null && characters_.length() > 0) {
518: final DomText text = new DomText(page_, characters_
519: .toString());
520: currentNode_.appendDomChild(text);
521: characters_.setLength(0);
522: }
523: }
524:
525: /**
526: * @param tagName an HTML tag name, in lowercase
527: * @return the pre-registered element factory for the tag, or an UnknownElementFactory
528: */
529: private IElementFactory getElementFactory(final String tagName) {
530:
531: final IElementFactory factory = (IElementFactory) ELEMENT_FACTORIES
532: .get(tagName);
533:
534: if (factory != null) {
535: return factory;
536: } else {
537: return UnknownElementFactory.instance;
538: }
539: }
540:
541: /** @inheritDoc ContentHandler#endDocument() */
542: public void endDocument() throws SAXException {
543: handleCharacters();
544: final DomNode currentPage = page_;
545: currentPage.setEndLocation(locator_.getLineNumber(),
546: locator_.getColumnNumber());
547: }
548:
549: /** @inheritDoc ContentHandler#startPrefixMapping(String,String) */
550: public void startPrefixMapping(final String prefix,
551: final String uri) throws SAXException {
552: }
553:
554: /** @inheritDoc ContentHandler#endPrefixMapping(String) */
555: public void endPrefixMapping(final String prefix)
556: throws SAXException {
557: }
558:
559: /** @inheritDoc ContentHandler#processingInstrucction(String,String) */
560: public void processingInstruction(final String target,
561: final String data) throws SAXException {
562: }
563:
564: /** @inheritDoc ContentHandler#skippedEntity(String) */
565: public void skippedEntity(final String name)
566: throws SAXException {
567: }
568:
569: // LexicalHandler methods
570:
571: /** @inheritDoc LexicalHandler#comment(char[],int,int) */
572: public void comment(final char[] ch, final int start,
573: final int length) {
574: handleCharacters();
575: final DomComment comment = new DomComment(page_, String
576: .valueOf(ch, start, length));
577: currentNode_.appendDomChild(comment);
578: }
579:
580: /** @inheritDoc LexicalHandler#endCDATA() */
581: public void endCDATA() {
582: }
583:
584: /** @inheritDoc LexicalHandler#endDTD() */
585: public void endDTD() {
586: }
587:
588: /** @inheritDoc LexicalHandler#endEntity() */
589: public void endEntity(final String name) {
590: }
591:
592: /** @inheritDoc LexicalHandler#startCDATA() */
593: public void startCDATA() {
594: }
595:
596: /** @inheritDoc LexicalHandler#startDTD(String,String,String) */
597: public void startDTD(final String name, final String publicId,
598: final String systemId) {
599: }
600:
601: /** @inheritDoc LexicalHandler#startEntity(String) */
602: public void startEntity(final String name) {
603: }
604: }
605: }
606:
607: /**
608: * Utility to transmit parsing errors to a {@link HTMLParserListener}.
609: */
610: class HTMLErrorHandler extends DefaultErrorHandler {
611: private final HTMLParserListener listener_;
612: private final URL url_;
613:
614: HTMLErrorHandler(final HTMLParserListener listener, final URL url) {
615: Assert.notNull("listener", listener);
616: Assert.notNull("url", url);
617: listener_ = listener;
618: url_ = url;
619: }
620:
621: /** @see DefaultErrorHandler#error(String,String,XMLParseException) */
622: public void error(final String domain, final String key,
623: final XMLParseException exception) throws XNIException {
624: listener_.error(exception.getMessage(), url_, exception
625: .getLineNumber(), exception.getColumnNumber(), key);
626: }
627:
628: /** @see DefaultErrorHandler#warning(String,String,XMLParseException) */
629: public void warning(final String domain, final String key,
630: final XMLParseException exception) throws XNIException {
631: listener_.warning(exception.getMessage(), url_, exception
632: .getLineNumber(), exception.getColumnNumber(), key);
633: }
634: }
|