001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.components.parser;
018:
019: import java.io.IOException;
020:
021: import javax.xml.parsers.DocumentBuilder;
022: import javax.xml.parsers.DocumentBuilderFactory;
023: import javax.xml.parsers.ParserConfigurationException;
024: import javax.xml.parsers.SAXParserFactory;
025:
026: import org.apache.avalon.excalibur.pool.Poolable;
027: import org.apache.avalon.framework.activity.Disposable;
028: import org.apache.avalon.framework.component.ComponentException;
029: import org.apache.avalon.framework.component.ComponentManager;
030: import org.apache.avalon.framework.component.Composable;
031: import org.apache.avalon.framework.parameters.ParameterException;
032: import org.apache.avalon.framework.parameters.Parameterizable;
033: import org.apache.avalon.framework.parameters.Parameters;
034: import org.apache.cocoon.components.resolver.Resolver;
035: import org.apache.cocoon.util.ClassUtils;
036: import org.apache.cocoon.xml.AbstractXMLProducer;
037: import org.w3c.dom.DOMImplementation;
038: import org.w3c.dom.Document;
039: import org.xml.sax.ErrorHandler;
040: import org.xml.sax.InputSource;
041: import org.xml.sax.SAXException;
042: import org.xml.sax.SAXParseException;
043: import org.xml.sax.XMLReader;
044:
045: /**
046: * An XMLParser that is only dependant on JAXP 1.1 compliant parsers.
047: *
048: * The configuration can contain the following parameters :
049: * <ul>
050: * <li>validate (boolean, default = <code>false</code>) : should the parser
051: * validate parsed documents ?
052: * </li>
053: * <li>namespace-prefixes (boolean, default = <code>false</code>) : do we want
054: * namespaces declarations also as 'xmlns:' attributes ?<br>
055: * <i>Note</i> : setting this to <code>true</code> confuses some XSL
056: * processors (e.g. Saxon).
057: * </li>
058: * <li>reuse-parsers (boolean, default = <code>true</code>) : do we want to reuse
059: * parsers or create a new parser for each parse ?<br>
060: * <i>Note</i> : even if this parameter is <code>true</code>, parsers are not
061: * recycled in case of parsing errors : some parsers (e.g. Xerces) don't like
062: * to be reused after failure.
063: * </li>
064: * <li>sax-parser-factory (string, optional) : the name of the <code>SAXParserFactory</code>
065: * implementation class to be used instead of using the standard JAXP mechanism
066: * (<code>SAXParserFactory.newInstance()</code>). This allows to choose
067: * unambiguously the JAXP implementation to be used when several of them are
068: * available in the classpath.
069: * </li>
070: * <li>document-builder-factory (string, optional) : the name of the
071: * <code>DocumentBuilderFactory</code> implementation to be used (similar to
072: * <code>sax-parser-factory</code> for DOM).
073: * </li>
074: * </ul>
075: *
076: * @deprecated The Avalon XML Parser is now used inside Cocoon. This role
077: * will be removed in future releases.
078:
079: * @author <a href="mailto:bloritsch@apache.org">Berin Loritsch</a>
080: * @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
081: * @author <a href="mailto:sylvain@apache.org">Sylvain Wallez</a>
082: * @version CVS $Id: JaxpParser.java 433543 2006-08-22 06:22:54Z crossley $
083: */
084: public class JaxpParser extends AbstractXMLProducer implements Parser,
085: ErrorHandler, Composable, Parameterizable, Disposable, Poolable {
086:
087: /** the SAX Parser factory */
088: protected SAXParserFactory factory;
089:
090: /** the Document Builder factory */
091: protected DocumentBuilderFactory docFactory;
092:
093: /** The SAX reader. It is created lazily by {@link #setupXMLReader()}
094: and cleared if a parsing error occurs. */
095: protected XMLReader reader;
096:
097: /** The DOM builder. It is created lazily by {@link #setupDocumentBuilder()}
098: and cleared if a parsing error occurs. */
099: protected DocumentBuilder docBuilder;
100:
101: /** the component manager */
102: protected ComponentManager manager;
103:
104: /** the Entity Resolver */
105: protected Resolver resolver;
106:
107: /** do we want namespaces also as attributes ? */
108: protected boolean nsPrefixes;
109:
110: /** do we want to reuse parsers ? */
111: protected boolean reuseParsers;
112:
113: /**
114: * Get the Entity Resolver from the component manager
115: */
116: public void compose(ComponentManager manager)
117: throws ComponentException {
118: this .manager = manager;
119: if (manager.hasComponent(Resolver.ROLE)) {
120: if (getLogger().isDebugEnabled()) {
121: getLogger().debug("Looking up " + Resolver.ROLE);
122: }
123: this .resolver = (Resolver) manager.lookup(Resolver.ROLE);
124: }
125: }
126:
127: /**
128: * Dispose
129: */
130: public void dispose() {
131: if (this .manager != null) {
132: this .manager.release(this .resolver);
133: }
134: }
135:
136: /**
137: * Configure
138: */
139: public void parameterize(Parameters params)
140: throws ParameterException {
141: // Validation and namespace prefixes parameters
142: boolean validate = params.getParameterAsBoolean("validate",
143: false);
144: this .nsPrefixes = params.getParameterAsBoolean(
145: "namespace-prefixes", false);
146: this .reuseParsers = params.getParameterAsBoolean(
147: "reuse-parsers", true);
148:
149: // Get the SAXFactory
150: String className = params.getParameter("sax-parser-factory",
151: null);
152: if (className == null) {
153: factory = SAXParserFactory.newInstance();
154: } else {
155: // Will use specific class
156: try {
157: Class factoryClass = ClassUtils.loadClass(className);
158: factory = (SAXParserFactory) factoryClass.newInstance();
159: } catch (Exception e) {
160: throw new ParameterException(
161: "Cannot load SAXParserFactory class "
162: + className, e);
163: }
164: }
165: factory.setNamespaceAware(true);
166: factory.setValidating(validate);
167:
168: // Get the DocumentFactory
169: className = params.getParameter("document-builder-factory",
170: null);
171: if (className == null) {
172: this .docFactory = DocumentBuilderFactory.newInstance();
173: } else {
174: // Will use specific class
175: try {
176: Class factoryClass = ClassUtils.loadClass(className);
177: this .docFactory = (DocumentBuilderFactory) factoryClass
178: .newInstance();
179: } catch (Exception e) {
180: throw new ParameterException(
181: "Cannot load DocumentBuilderFactory class "
182: + className, e);
183: }
184: }
185:
186: docFactory.setNamespaceAware(true);
187: docFactory.setValidating(validate);
188: }
189:
190: public void parse(InputSource in) throws SAXException, IOException {
191: setupXMLReader();
192: try {
193: this .reader.setProperty(
194: "http://xml.org/sax/properties/lexical-handler",
195: super .lexicalHandler);
196: } catch (SAXException e) {
197: getLogger()
198: .warn(
199: "SAX2 driver does not support property: "
200: + "'http://xml.org/sax/properties/lexical-handler'");
201: }
202:
203: this .reader.setErrorHandler(this );
204: this .reader.setContentHandler(super .contentHandler);
205: if (this .resolver != null) {
206: reader.setEntityResolver(this .resolver);
207: }
208:
209: // Ensure we will use a fresh new parser at next parse in case of failure
210: XMLReader tmpReader = this .reader;
211: this .reader = null;
212:
213: tmpReader.parse(in);
214:
215: // Here, parsing was successful : restore this.reader
216: if (this .reuseParsers)
217: this .reader = tmpReader;
218: }
219:
220: /**
221: * Create a new Document object.
222: */
223: public Document newDocument() {
224: setupDocumentBuilder();
225: return this .docBuilder.newDocument();
226: }
227:
228: /**
229: * Create a new Document object with a specified DOCTYPE.
230: */
231: public Document newDocument(String name) {
232: return this .newDocument(name, null, null);
233: }
234:
235: /**
236: * Create a new Document object with a specified DOCTYPE, public ID and
237: * system ID.
238: */
239: public Document newDocument(String name, String publicId,
240: String systemId) {
241: setupDocumentBuilder();
242: // Fixme: is there a better way to achieve this?
243: DOMImplementation impl = this .docBuilder.newDocument()
244: .getImplementation();
245: return impl.createDocument(null, name, impl.createDocumentType(
246: name, publicId, systemId));
247: }
248:
249: /**
250: * Parses a new Document object from the given InputSource.
251: */
252: public Document parseDocument(InputSource input)
253: throws SAXException, IOException {
254: setupDocumentBuilder();
255:
256: // Ensure we will use a fresh new parser at next parse in case of failure
257: DocumentBuilder tmpBuilder = this .docBuilder;
258: this .docBuilder = null;
259:
260: Document result = tmpBuilder.parse(input);
261:
262: // Here, parsing was successful : restore this.builder
263: if (this .reuseParsers)
264: this .docBuilder = tmpBuilder;
265:
266: return result;
267: }
268:
269: /**
270: * Creates a new <code>XMLReader</code> if needed.
271: */
272: protected void setupXMLReader() throws SAXException {
273: if (this .reader == null) {
274: // Create the XMLReader
275: try {
276: this .reader = factory.newSAXParser().getXMLReader();
277: this .reader
278: .setFeature(
279: "http://xml.org/sax/features/namespace-prefixes",
280: nsPrefixes);
281: } catch (Exception e) {
282: getLogger().error("Cannot produce a valid parser", e);
283: throw new SAXException("Cannot produce a valid parser",
284: e);
285: }
286: }
287: }
288:
289: /**
290: * Creates a new <code>DocumentBuilder</code> if needed.
291: */
292: protected void setupDocumentBuilder() {
293: if (this .docBuilder == null) {
294: try {
295: this .docBuilder = this .docFactory.newDocumentBuilder();
296: } catch (ParserConfigurationException pce) {
297: getLogger().error("Could not create DocumentBuilder",
298: pce);
299: throw new org.apache.avalon.framework.CascadingRuntimeException(
300: "Could not create DocumentBuilder", pce);
301: }
302: }
303: }
304:
305: /**
306: * Receive notification of a recoverable error.
307: */
308: public void error(SAXParseException e) throws SAXException {
309: throw new SAXException("Error parsing " + e.getSystemId()
310: + " (line " + e.getLineNumber() + " col. "
311: + e.getColumnNumber() + "): " + e.getMessage(), e);
312: }
313:
314: /**
315: * Receive notification of a fatal error.
316: */
317: public void fatalError(SAXParseException e) throws SAXException {
318: throw new SAXException("Fatal error parsing " + e.getSystemId()
319: + " (line " + e.getLineNumber() + " col. "
320: + e.getColumnNumber() + "): " + e.getMessage(), e);
321: }
322:
323: /**
324: * Receive notification of a warning.
325: */
326: public void warning(SAXParseException e) throws SAXException {
327: throw new SAXException("Warning parsing " + e.getSystemId()
328: + " (line " + e.getLineNumber() + " col. "
329: + e.getColumnNumber() + "): " + e.getMessage(), e);
330: }
331: }
|