01: /*
02: * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
03: *
04: * This file is part of Resin(R) Open Source
05: *
06: * Each copy or derived work must preserve the copyright notice and this
07: * notice unmodified.
08: *
09: * Resin Open Source is free software; you can redistribute it and/or modify
10: * it under the terms of the GNU General Public License as published by
11: * the Free Software Foundation; either version 2 of the License, or
12: * (at your option) any later version.
13: *
14: * Resin Open Source is distributed in the hope that it will be useful,
15: * but WITHOUT ANY WARRANTY; without even the implied warranty of
16: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
17: * of NON-INFRINGEMENT. See the GNU General Public License for more
18: * details.
19: *
20: * You should have received a copy of the GNU General Public License
21: * along with Resin Open Source; if not, write to the
22: * Free SoftwareFoundation, Inc.
23: * 59 Temple Place, Suite 330
24: * Boston, MA 02111-1307 USA
25: *
26: * @author Scott Ferguson
27: */
28:
29: package com.caucho.xml;
30:
31: /**
32: * A forgiving HTML parser interface.
33: *
34: * <p>The forgiving HTML parser is useful for extracting information from
35: * the web since many sites have not-quite-standard HTML.
36: *
37: * <p>To parse a file into a DOM Document use
38: * <pre><code>
39: * Document doc = new Html().parseDocument("foo.html");
40: * </code></pre>
41: *
42: * <p>To parse a string into a DOM Document use
43: * <pre><code>
44: * String html = "<h1>small test</h1>";
45: * Document doc = new Html().parseDocumentString(html);
46: * </code></pre>
47: *
48: * <p>To parse a file using the SAX API use
49: * <pre><code>
50: * Html html = new Html();
51: * html.setContentHandler(myContentHandler);
52: * html.parse("foo.html");
53: * </code></pre>
54: */
55: public class LooseHtml extends XmlParser {
56: /**
57: * Create a new forgiving HTML parser
58: */
59: public LooseHtml() {
60: super (new HtmlPolicy(), null);
61:
62: _policy.forgiving = true;
63: _forgiving = true;
64: _extraForgiving = true;
65: }
66: }
|