01: package com.meterware.httpunit.parsing;
02:
03: /********************************************************************************************************************
04: * $Id: NekoHTMLParser.java,v 1.4 2003/03/09 20:35:47 russgold Exp $
05: *
06: * Copyright (c) 2002-2003, Russell Gold
07: *
08: * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
09: * documentation files (the "Software"), to deal in the Software without restriction, including without limitation
10: * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
11: * to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12: *
13: * The above copyright notice and this permission notice shall be included in all copies or substantial portions
14: * of the Software.
15: *
16: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
17: * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19: * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20: * DEALINGS IN THE SOFTWARE.
21: *
22: *******************************************************************************************************************/
23: import org.xml.sax.SAXException;
24: import org.xml.sax.InputSource;
25:
26: import java.net.URL;
27: import java.io.IOException;
28: import java.io.StringReader;
29:
30: /**
31: *
32: * @author <a href="mailto:russgold@httpunit.org">Russell Gold</a>
33: * @author <a href="mailto:bw@xmlizer.biz">Bernhard Wagner</a>
34: * @author <a href="mailto:Artashes.Aghajanyan@lycos-europe.com">Artashes Aghajanyan</a>
35: **/
36: class NekoHTMLParser implements HTMLParser {
37:
38: public void parse(URL pageURL, String pageText,
39: DocumentAdapter adapter) throws IOException, SAXException {
40: try {
41: NekoDOMParser parser = NekoDOMParser.newParser(adapter,
42: pageURL);
43: parser.parse(new InputSource(new StringReader(pageText)));
44: adapter.setRootNode(parser.getDocument());
45: } catch (NekoDOMParser.ScriptException e) {
46: throw e.getException();
47: }
48: }
49:
50: public String getCleanedText(String string) {
51: return (string == null) ? "" : string.replace(NBSP, ' ');
52: }
53:
54: public boolean supportsPreserveTagCase() {
55: return true;
56: }
57:
58: public boolean supportsReturnHTMLDocument() {
59: return true;
60: }
61:
62: public boolean supportsParserWarnings() {
63: return true;
64: }
65:
66: final private static char NBSP = (char) 160; // non-breaking space, defined by nekoHTML
67: }
|