01: package com.meterware.httpunit.parsing;
02:
03: /********************************************************************************************************************
04: * $Id: HTMLParser.java,v 1.3 2002/12/26 04:59:35 russgold Exp $
05: *
06: * Copyright (c) 2002, Russell Gold
07: *
08: * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
09: * documentation files (the "Software"), to deal in the Software without restriction, including without limitation
10: * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
11: * to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12: *
13: * The above copyright notice and this permission notice shall be included in all copies or substantial portions
14: * of the Software.
15: *
16: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
17: * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19: * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20: * DEALINGS IN THE SOFTWARE.
21: *
22: *******************************************************************************************************************/
23: import org.xml.sax.SAXException;
24:
25: import java.net.URL;
26: import java.io.IOException;
27:
28: /**
29: * A front end to a DOM parser that can handle HTML.
30: *
31: * @since 1.5.2
32: * @author <a href="mailto:russgold@httpunit.org">Russell Gold</a>
33: * @author <a href="mailto:bw@xmlizer.biz">Bernhard Wagner</a>
34: **/
35: public interface HTMLParser {
36:
37: /**
38: * Parses the specified text string as a Document, registering it in the HTMLPage.
39: * Any error reporting will be annotated with the specified URL.
40: */
41: public void parse(URL baseURL, String pageText,
42: DocumentAdapter adapter) throws IOException, SAXException;
43:
44: /**
45: * Removes any string artifacts placed in the text by the parser. For example, a parser may choose to encode
46: * an HTML entity as a special character. This method should convert that character to normal text.
47: */
48: public String getCleanedText(String string);
49:
50: /**
51: * Returns true if this parser supports preservation of the case of tag and attribute names.
52: */
53: public boolean supportsPreserveTagCase();
54:
55: /**
56: * Returns true if this parser can return an HTMLDocument object.
57: */
58: public boolean supportsReturnHTMLDocument();
59:
60: /**
61: * Returns true if this parser can display parser warnings.
62: */
63: public boolean supportsParserWarnings();
64: }
|