001: /*
002: * Copyright 2002-2008 Andy Clark
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.cyberneko.html.parsers;
018:
019: import org.apache.xerces.xni.Augmentations;
020: import org.apache.xerces.xni.XNIException;
021: import org.cyberneko.html.HTMLConfiguration;
022:
023: /**
024: * A DOM parser for HTML documents.
025: *
026: * @author Andy Clark
027: *
028: * @version $Id: DOMParser.java,v 1.5 2005/02/14 03:56:54 andyc Exp $
029: */
030: public class DOMParser
031: /***/
032: extends org.apache.xerces.parsers.DOMParser {
033: /***
034: // NOTE: It would be better to extend from AbstractDOMParser but
035: // most users will find it easier if the API is just like the
036: // Xerces DOM parser. By extending directly from DOMParser,
037: // users can register SAX error handlers, entity resolvers,
038: // and the like. -Ac
039: extends org.apache.xerces.parsers.AbstractDOMParser {
040: /***/
041:
042: //
043: // Constructors
044: //
045: /** Default constructor. */
046: public DOMParser() {
047: super (new HTMLConfiguration());
048: /*** extending DOMParser ***/
049: try {
050: setProperty(
051: "http://apache.org/xml/properties/dom/document-class-name",
052: "org.apache.html.dom.HTMLDocumentImpl");
053: } catch (org.xml.sax.SAXNotRecognizedException e) {
054: throw new RuntimeException(
055: "http://apache.org/xml/properties/dom/document-class-name property not recognized");
056: } catch (org.xml.sax.SAXNotSupportedException e) {
057: throw new RuntimeException(
058: "http://apache.org/xml/properties/dom/document-class-name property not supported");
059: }
060: /*** extending AbstractDOMParser ***
061: fConfiguration.setProperty("http://apache.org/xml/properties/dom/document-class-name",
062: "org.apache.html.dom.HTMLDocumentImpl");
063: /***/
064: } // <init>()
065:
066: //
067: // XMLDocumentHandler methods
068: //
069:
070: /** Doctype declaration. */
071: public void doctypeDecl(String root, String pubid, String sysid,
072: Augmentations augs) throws XNIException {
073:
074: // NOTE: Xerces HTML DOM implementation (up to and including
075: // 2.5.0) throws a heirarchy request error exception
076: // when a doctype node is appended to the tree. So,
077: // don't insert this node into the tree for those
078: // versions... -Ac
079:
080: String VERSION = org.apache.xerces.impl.Version.getVersion();
081: boolean okay = true;
082: if (VERSION.startsWith("Xerces-J 2.")) {
083: okay = getParserSubVersion() > 5;
084: }
085: // REVISIT: As soon as XML4J is updated with the latest code
086: // from Xerces, then this needs to be updated to
087: // check XML4J's version. -Ac
088: else if (VERSION.startsWith("XML4J")) {
089: okay = false;
090: }
091:
092: // if okay, insert doctype; otherwise, don't risk it
093: if (okay) {
094: super .doctypeDecl(root, pubid, sysid, augs);
095: }
096:
097: } // doctypeDecl(String,String,String,Augmentations)
098:
099: //
100: // Private static methods
101: //
102:
103: /** Returns the parser's sub-version number. */
104: private static int getParserSubVersion() {
105: try {
106: String VERSION = org.apache.xerces.impl.Version
107: .getVersion();
108: int index1 = VERSION.indexOf('.') + 1;
109: int index2 = VERSION.indexOf('.', index1);
110: if (index2 == -1) {
111: index2 = VERSION.length();
112: }
113: return Integer.parseInt(VERSION.substring(index1, index2));
114: } catch (Exception e) {
115: return -1;
116: }
117: } // getParserSubVersion():int
118:
119: } // class DOMParser
|