001: /* SAXBuilder.java
002:
003: {{IS_NOTE
004:
005: Purpose:
006: Description:
007: History:
008: 2001/10/25 13:21:14, Create, Tom M. Yeh.
009: }}IS_NOTE
010:
011: Copyright (C) 2001 Potix Corporation. All Rights Reserved.
012:
013: {{IS_RIGHT
014: This program is distributed under GPL Version 2.0 in the hope that
015: it will be useful, but WITHOUT ANY WARRANTY.
016: }}IS_RIGHT
017: */
018: package org.zkoss.idom.input;
019:
020: import java.io.IOException;
021: import java.io.File;
022: import java.io.InputStream;
023: import java.io.Reader;
024: import java.net.URL;
025:
026: import javax.xml.parsers.SAXParser;
027: import javax.xml.parsers.SAXParserFactory;
028: import javax.xml.parsers.ParserConfigurationException;
029:
030: import org.xml.sax.InputSource;
031: import org.xml.sax.EntityResolver;
032: import org.xml.sax.ErrorHandler;
033: import org.xml.sax.SAXException;
034:
035: import org.zkoss.util.logging.Log;
036: import org.zkoss.idom.Document;
037:
038: /**
039: * The builder based on SAX parsers.
040: *
041: * <p>A new instance of {@link SAXHandler} is created and configured
042: * each time one of the build methods is called.
043: *
044: * @author tomyeh
045: * @see SAXHandler
046: */
047: public class SAXBuilder {
048: private static final Log log = Log.lookup(SAXHandler.class);
049:
050: /** The parser. */
051: private final SAXParser _parser;
052: /** The iDOM factory. */
053: private IDOMFactory _factory;
054: /** Whether to ignore ignorable whitespace */
055: private boolean _ignoreWhitespaces = false;
056: /** Whether expansion of entities should occur */
057: private boolean _expandEntities = true;
058: /** Whether to convert CData to Text and coalesce them. */
059: private boolean _coalescing = false;
060: /** Whether to ignore comments. */
061: private boolean _ignoreComments = false;
062: /** The error handler. */
063: private ErrorHandler _errHandler = null;
064: /** The entity resolver. */
065: private EntityResolver _resolver = null;
066:
067: /**
068: * Constructor which reuses a parser.
069: */
070: public SAXBuilder(SAXParser parser) {
071: if (parser == null)
072: throw new NullPointerException("parser");
073: _parser = parser;
074: }
075:
076: /**
077: * Constructor that creates the parser on-the-fly.
078: *
079: * @param nsaware whether the parser is namespace aware
080: * @param validate whether the parser shall validate the document
081: *
082: * @exception ParserConfigurationException if a parser cannot be created
083: * which satisfies the requested configuration.
084: *
085: * @see #SAXBuilder(boolean, boolean, boolean)
086: */
087: public SAXBuilder(boolean nsaware, boolean validate)
088: throws ParserConfigurationException, SAXException {
089: SAXParserFactory fty = SAXParserFactory.newInstance();
090:
091: // SAX2 namespace-prefixes should be true for either builder
092: fty.setFeature(
093: "http://xml.org/sax/features/namespace-prefixes", true);
094:
095: // Set SAX2 namespaces feature appropriately
096: fty.setFeature("http://xml.org/sax/features/namespaces",
097: nsaware);
098: fty.setNamespaceAware(nsaware);
099:
100: fty.setFeature("http://xml.org/sax/features/validation",
101: validate);
102: try {
103: fty.setFeature(
104: "http://apache.org/xml/features/validation/schema",
105: validate);
106: } catch (org.xml.sax.SAXNotRecognizedException ex) {
107: //IGNORE IT (crisom doesn't support it)
108: }
109: fty.setValidating(validate);
110:
111: _parser = fty.newSAXParser();
112: }
113:
114: /**
115: * Constructor that creates the parser on-the-fly, that accepts
116: * an additional option, smartIgnore.
117: *
118: * <p>When parsing XML for input purpose only, it is better to use this
119: * constructor with smartIgnore true, and then comments will be ignored
120: * CDATA will be coalesced with TEXT. A smaller DOM tree is formed.
121: *
122: * @param nsaware whether the parser is namespace aware
123: * @param validate whether the parser shall validate the document
124: * @param smartIgnore whether to ignore comments and ignorable-whitesace
125: * (if validate is true), and to coalesce
126: *
127: * @exception ParserConfigurationException if a parser cannot be created
128: * which satisfies the requested configuration.
129: */
130: public SAXBuilder(boolean nsaware, boolean validate,
131: boolean smartIgnore) throws ParserConfigurationException,
132: SAXException {
133: this (nsaware, validate);
134: if (smartIgnore) {
135: setIgnoringComments(true);
136: setCoalescing(true);
137: if (validate)
138: setIgnoringElementContentWhitespace(true);
139: }
140: }
141:
142: /**
143: * Tests whether to ignore whitespaces in element content.
144: */
145: public final boolean isIgnoringElementContentWhitespace() {
146: return _ignoreWhitespaces;
147: }
148:
149: /**
150: * Sets whether the parser should elminate whitespace in
151: * element content. They are known as "ignorable whitespace".
152: * Only whitespace which is contained within element content that has
153: * an element only content model will be eliminated (see XML Rec 2.10).
154: *
155: * <p>For this setting to take effect requires that validation be turned on.
156: *
157: * <p>Default: false.
158: *
159: * @param ignore Whether to ignore whitespaces in element content.
160: */
161: public final void setIgnoringElementContentWhitespace(boolean ignore) {
162: _ignoreWhitespaces = ignore;
163: }
164:
165: /**
166: * Tests whether to expand entity reference nodes.
167: */
168: public final boolean isExpandEntityReferences() {
169: return _expandEntities;
170: }
171:
172: /**
173: * Sets whether to expand entities during parsing.
174: * A true means to expand entities as normal content. A false means to
175: * leave entities unexpanded as <code>EntityReference</code> objects.
176: *
177: * <p>Default: true.
178: *
179: * @param expand whether entity expansion should occur.
180: */
181: public final void setExpandEntityReferences(boolean expand) {
182: _expandEntities = expand;
183: }
184:
185: /**
186: * Indicates whether or not the factory is configured to produce parsers
187: * which converts CDATA to Text and appends it to the adjacent (if any)
188: * Text node.
189: *
190: * <p>Default: false.
191: *
192: * @return true if the factory is configured to produce parsers which
193: * converts CDATA nodes to Text nodes
194: * and appends it to the adjacent (if any) Text node; false otherwise.
195: */
196: public final boolean isCoalescing() {
197: return _coalescing;
198: }
199:
200: /**
201: * Specifies that the parser produced by this code will convert
202: * CDATA to Text and append it to the adjacent (if any) text.
203: *
204: * <p>Default: false.
205: */
206: public final void setCoalescing(boolean coalescing) {
207: _coalescing = coalescing;
208: }
209:
210: /**
211: * Indicates whether or not the factory is configured to produce parsers
212: * which ignores comments.
213: *
214: * <p>Default: false.
215: *
216: * @return true if the factory is configured to produce parsers
217: * which ignores comments; false otherwise.
218: */
219: public final boolean isIgnoringComments() {
220: return _ignoreComments;
221: }
222:
223: /**
224: * Specifies that the parser produced by this code will ignore comments.
225: *
226: * <p>Default: false.
227: */
228: public final void setIgnoringComments(boolean ignoreComments) {
229: _ignoreComments = ignoreComments;
230: }
231:
232: /**
233: * Specifies the org.xml.sax.ErrorHandler to be used to report errors
234: * present in the XML document to be parsed.
235: * <p>Default: null -- to use the default imple-mentation and behavior.
236: */
237: public final void setErrorHandler(ErrorHandler eh) {
238: _errHandler = eh;
239: }
240:
241: /**
242: * Gets the org.xml.sax.ErrorHandler.
243: *
244: * @return the error handler; null to use the default implementation
245: */
246: public final ErrorHandler getErrorHandler() {
247: return _errHandler;
248: }
249:
250: /**
251: * Specifies the org.xml.sax.EntityResolver to be used to resolve
252: * entities present in the XML docu-ment to be parsed.
253: * <p>Default: null -- to use the default implementation and behavior.
254: */
255: public final void setEntityResolver(org.xml.sax.EntityResolver er) {
256: _resolver = er;
257: }
258:
259: /**
260: * Gets the org.xml.sax.EntityResolver.
261: *
262: * @return the enity resolverr; null to use the default implementation
263: */
264: public final EntityResolver getEntityResolver() {
265: return _resolver;
266: }
267:
268: /**
269: * Tests whether or not this parser is configured to understand namespaces.
270: */
271: public final boolean isNamespaceAware() {
272: return _parser.isNamespaceAware();
273: }
274:
275: /**
276: * Tests whether or not this parser is configured to validate XML documents.
277: */
278: public final boolean isValidating() {
279: return _parser.isValidating();
280: }
281:
282: /**
283: * Gets the iDOM factory. Null for DefaultIDOMFactory.THE.
284: */
285: public final IDOMFactory getIDOMFactory() {
286: return _factory;
287: }
288:
289: /**
290: * Sets the iDOM factory. Null for DefaultIDOMFactory.THE.
291: */
292: public final void setIDOMFactory(IDOMFactory factory) {
293: _factory = factory;
294: }
295:
296: /**
297: * Gets the sax parser.
298: */
299: public final SAXParser getParser() {
300: return _parser;
301: }
302:
303: /**
304: * Build an iDOM tree from a file.
305: */
306: public final Document build(File src) throws SAXException,
307: IOException {
308: SAXHandler handler = newHandler();
309: _parser.parse(src, handler);
310: return handler.getDocument();
311: }
312:
313: /**
314: * Build an iDOM tree from a input stream.
315: */
316: public final Document build(InputStream src) throws SAXException,
317: IOException {
318: SAXHandler handler = newHandler();
319: _parser.parse(src, handler);
320: return handler.getDocument();
321: }
322:
323: /**
324: * Build an iDOM tree from a input source.
325: */
326: public final Document build(InputSource src) throws SAXException,
327: IOException {
328: SAXHandler handler = newHandler();
329: _parser.parse(src, handler);
330: return handler.getDocument();
331: }
332:
333: /**
334: * Build an iDOM tree from a URI string.
335: */
336: public final Document build(String uri) throws SAXException,
337: IOException {
338: SAXHandler handler = newHandler();
339: _parser.parse(uri, handler);
340: return handler.getDocument();
341: }
342:
343: /**
344: * Build an iDOM tree from a URL.
345: */
346: public final Document build(URL url) throws SAXException,
347: IOException {
348: SAXHandler handler = newHandler();
349: _parser.parse(url.toExternalForm(), handler);
350: return handler.getDocument();
351: }
352:
353: /**
354: * Build an iDOM tree from a Reader.
355: */
356: public final Document build(Reader src) throws SAXException,
357: IOException {
358: SAXHandler handler = newHandler();
359: _parser.parse(new InputSource(src), handler);
360: return handler.getDocument();
361: }
362:
363: /**
364: * Creates a Sax Handler.
365: * Deriving class might override to provide a subclass of SAXHandler.
366: */
367: protected SAXHandler newHandler() throws SAXException {
368: SAXHandler handler = new SAXHandler(_factory);
369:
370: //configure handler
371: handler.setIgnoringElementContentWhitespace(_ignoreWhitespaces);
372: handler.setExpandEntityReferences(_expandEntities);
373: handler.setCoalescing(_coalescing);
374: handler.setIgnoringComments(_ignoreComments);
375: handler.setErrorHandler(_errHandler);
376: handler.setEntityResolver(_resolver);
377:
378: //configure parser
379: try { //The standard property
380: _parser.setProperty(
381: "http://xml.org/sax/properties/lexical-handler",
382: handler);
383: } catch (Exception ex) {
384: try { //some use this property
385: _parser.setProperty(
386: "http://xml.org/sax/handlers/LexicalHandler",
387: handler);
388: } catch (Exception ex2) {
389: log.warning("lexical-handler not supported");
390: }
391: }
392:
393: if (!isExpandEntityReferences()) { //not expanding?
394: try { //then, we need DeclHandler
395: _parser
396: .setProperty(
397: "http://xml.org/sax/properties/declaration-handler",
398: handler);
399: } catch (Exception ex) {
400: log.warning("declaration-handler not supported");
401: }
402: }
403:
404: return handler;
405: }
406: }
|