0001: /*--
0002:
0003: $Id: SAXBuilder.java,v 1.1 2005/04/27 09:32:40 wittek Exp $
0004:
0005: Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
0006: All rights reserved.
0007:
0008: Redistribution and use in source and binary forms, with or without
0009: modification, are permitted provided that the following conditions
0010: are met:
0011:
0012: 1. Redistributions of source code must retain the above copyright
0013: notice, this list of conditions, and the following disclaimer.
0014:
0015: 2. Redistributions in binary form must reproduce the above copyright
0016: notice, this list of conditions, and the disclaimer that follows
0017: these conditions in the documentation and/or other materials
0018: provided with the distribution.
0019:
0020: 3. The name "JDOM" must not be used to endorse or promote products
0021: derived from this software without prior written permission. For
0022: written permission, please contact <request_AT_jdom_DOT_org>.
0023:
0024: 4. Products derived from this software may not be called "JDOM", nor
0025: may "JDOM" appear in their name, without prior written permission
0026: from the JDOM Project Management <request_AT_jdom_DOT_org>.
0027:
0028: In addition, we request (but do not require) that you include in the
0029: end-user documentation provided with the redistribution and/or in the
0030: software itself an acknowledgement equivalent to the following:
0031: "This product includes software developed by the
0032: JDOM Project (http://www.jdom.org/)."
0033: Alternatively, the acknowledgment may be graphical using the logos
0034: available at http://www.jdom.org/images/logos.
0035:
0036: THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0037: WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0038: OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0039: DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
0040: CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0041: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0042: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
0043: USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0044: ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
0045: OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
0046: OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0047: SUCH DAMAGE.
0048:
0049: This software consists of voluntary contributions made by many
0050: individuals on behalf of the JDOM Project and was originally
0051: created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
0052: Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
0053: on the JDOM Project, please see <http://www.jdom.org/>.
0054:
0055: */
0056:
0057: package org.jdom.input;
0058:
0059: import java.io.*;
0060: import java.lang.reflect.*;
0061: import java.net.*;
0062: import java.util.*;
0063:
0064: import org.jdom.*;
0065:
0066: import org.xml.sax.*;
0067: import org.xml.sax.helpers.XMLReaderFactory;
0068:
0069: /**
0070: * Builds a JDOM document from files, streams, readers, URLs, or a SAX {@link
0071: * org.xml.sax.InputSource} instance using a SAX parser. The builder uses a
0072: * third-party SAX parser (chosen by JAXP by default, or you can choose
0073: * manually) to handle the parsing duties and simply listens to the SAX events
0074: * to construct a document. Details which SAX does not provide, such as
0075: * whitespace outside the root element, are not represented in the JDOM
0076: * document. Information about SAX can be found at <a
0077: * href="http://www.saxproject.org">http://www.saxproject.org</a>.
0078: * <p>
0079: * Known issues: Relative paths for a {@link DocType} or {@link EntityRef} may
0080: * be converted by the SAX parser into absolute paths.
0081: *
0082: * @version $Revision: 1.1 $, $Date: 2005/04/27 09:32:40 $
0083: * @author Jason Hunter
0084: * @author Brett McLaughlin
0085: * @author Dan Schaffer
0086: * @author Philip Nelson
0087: * @author Alex Rosen
0088: */
0089: public class SAXBuilder {
0090:
0091: private static final String CVS_ID = "@(#) $RCSfile: SAXBuilder.java,v $ $Revision: 1.1 $ $Date: 2005/04/27 09:32:40 $ $Name: $";
0092:
0093: /**
0094: * Default parser class to use. This is used when no other parser
0095: * is given and JAXP isn't available.
0096: */
0097: private static final String DEFAULT_SAX_DRIVER = "org.apache.xerces.parsers.SAXParser";
0098:
0099: /** Whether validation should occur */
0100: private boolean validate;
0101:
0102: /** Whether expansion of entities should occur */
0103: private boolean expand = true;
0104:
0105: /** Adapter class to use */
0106: private String saxDriverClass;
0107:
0108: /** ErrorHandler class to use */
0109: private ErrorHandler saxErrorHandler = null;
0110:
0111: /** EntityResolver class to use */
0112: private EntityResolver saxEntityResolver = null;
0113:
0114: /** DTDHandler class to use */
0115: private DTDHandler saxDTDHandler = null;
0116:
0117: /** XMLFilter instance to use */
0118: private XMLFilter saxXMLFilter = null;
0119:
0120: /** The factory for creating new JDOM objects */
0121: private JDOMFactory factory = new DefaultJDOMFactory();
0122:
0123: /** Whether to ignore ignorable whitespace */
0124: private boolean ignoringWhite = false;
0125:
0126: /** User-specified features to be set on the SAX parser */
0127: private HashMap features = new HashMap(5);
0128:
0129: /** User-specified properties to be set on the SAX parser */
0130: private HashMap properties = new HashMap(5);
0131:
0132: /**
0133: * Whether parser reuse is allowed.
0134: * <p>Default: <code>true</code></p>
0135: */
0136: private boolean reuseParser = true;
0137:
0138: /** The current SAX parser, if parser reuse has been activated. */
0139: private XMLReader saxParser = null;
0140:
0141: /**
0142: * Creates a new SAXBuilder which will attempt to first locate
0143: * a parser via JAXP, then will try to use a set of default
0144: * SAX Drivers. The underlying parser will not validate.
0145: */
0146: public SAXBuilder() {
0147: this (false);
0148: }
0149:
0150: /**
0151: * Creates a new SAXBuilder which will attempt to first locate
0152: * a parser via JAXP, then will try to use a set of default
0153: * SAX Drivers. The underlying parser will validate or not
0154: * according to the given parameter.
0155: *
0156: * @param validate <code>boolean</code> indicating if
0157: * validation should occur.
0158: */
0159: public SAXBuilder(boolean validate) {
0160: this .validate = validate;
0161: }
0162:
0163: /**
0164: * Creates a new SAXBuilder using the specified SAX parser.
0165: * The underlying parser will not validate.
0166: *
0167: * @param saxDriverClass <code>String</code> name of SAX Driver
0168: * to use for parsing.
0169: */
0170: public SAXBuilder(String saxDriverClass) {
0171: this (saxDriverClass, false);
0172: }
0173:
0174: /**
0175: * Creates a new SAXBuilder using the specified SAX parser.
0176: * The underlying parser will validate or not
0177: * according to the given parameter.
0178: *
0179: * @param saxDriverClass <code>String</code> name of SAX Driver
0180: * to use for parsing.
0181: * @param validate <code>boolean</code> indicating if
0182: * validation should occur.
0183: */
0184: public SAXBuilder(String saxDriverClass, boolean validate) {
0185: this .saxDriverClass = saxDriverClass;
0186: this .validate = validate;
0187: }
0188:
0189: /**
0190: * Returns the driver class assigned in the constructor, or null if none.
0191: *
0192: * @return the driver class assigned in the constructor
0193: */
0194: public String getDriverClass() {
0195: return saxDriverClass;
0196: }
0197:
0198: /**
0199: * Returns the current {@link org.jdom.JDOMFactory} in use.
0200: * @return the factory in use
0201: */
0202: public JDOMFactory getFactory() {
0203: return factory;
0204: }
0205:
0206: /**
0207: * This sets a custom JDOMFactory for the builder. Use this to build
0208: * the tree with your own subclasses of the JDOM classes.
0209: *
0210: * @param factory <code>JDOMFactory</code> to use
0211: */
0212: public void setFactory(JDOMFactory factory) {
0213: this .factory = factory;
0214: }
0215:
0216: /**
0217: * Returns whether validation is to be performed during the build.
0218: *
0219: * @return whether validation is to be performed during the build
0220: */
0221: public boolean getValidation() {
0222: return validate;
0223: }
0224:
0225: /**
0226: * This sets validation for the builder.
0227: *
0228: * @param validate <code>boolean</code> indicating whether validation
0229: * should occur.
0230: */
0231: public void setValidation(boolean validate) {
0232: this .validate = validate;
0233: }
0234:
0235: /**
0236: * Returns the {@link ErrorHandler} assigned, or null if none.
0237: * @return the ErrorHandler assigned, or null if none
0238: */
0239: public ErrorHandler getErrorHandler() {
0240: return saxErrorHandler;
0241: }
0242:
0243: /**
0244: * This sets custom ErrorHandler for the <code>Builder</code>.
0245: *
0246: * @param errorHandler <code>ErrorHandler</code>
0247: */
0248: public void setErrorHandler(ErrorHandler errorHandler) {
0249: saxErrorHandler = errorHandler;
0250: }
0251:
0252: /**
0253: * Returns the {@link EntityResolver} assigned, or null if none.
0254: *
0255: * @return the EntityResolver assigned
0256: */
0257: public EntityResolver getEntityResolver() {
0258: return saxEntityResolver;
0259: }
0260:
0261: /**
0262: * This sets custom EntityResolver for the <code>Builder</code>.
0263: *
0264: * @param entityResolver <code>EntityResolver</code>
0265: */
0266: public void setEntityResolver(EntityResolver entityResolver) {
0267: saxEntityResolver = entityResolver;
0268: }
0269:
0270: /**
0271: * Returns the {@link DTDHandler} assigned, or null if none.
0272: *
0273: * @return the DTDHandler assigned
0274: */
0275: public DTDHandler getDTDHandler() {
0276: return saxDTDHandler;
0277: }
0278:
0279: /**
0280: * This sets custom DTDHandler for the <code>Builder</code>.
0281: *
0282: * @param dtdHandler <code>DTDHandler</code>
0283: */
0284: public void setDTDHandler(DTDHandler dtdHandler) {
0285: saxDTDHandler = dtdHandler;
0286: }
0287:
0288: /**
0289: * Returns the {@link XMLFilter} used during parsing, or null if none.
0290: *
0291: * @return the XMLFilter used during parsing
0292: */
0293: public XMLFilter getXMLFilter() {
0294: return saxXMLFilter;
0295: }
0296:
0297: /**
0298: * This sets a custom {@link org.xml.sax.XMLFilter} for the builder.
0299: *
0300: * @param xmlFilter the filter to use
0301: */
0302: public void setXMLFilter(XMLFilter xmlFilter) {
0303: saxXMLFilter = xmlFilter;
0304: }
0305:
0306: /**
0307: * Returns whether element content whitespace is to be ignored during the
0308: * build.
0309: *
0310: * @return whether element content whitespace is to be ignored during the
0311: * build
0312: */
0313: public boolean getIgnoringElementContentWhitespace() {
0314: return ignoringWhite;
0315: }
0316:
0317: /**
0318: * Specifies whether or not the parser should elminate whitespace in
0319: * element content (sometimes known as "ignorable whitespace") when
0320: * building the document. Only whitespace which is contained within
0321: * element content that has an element only content model will be
0322: * eliminated (see XML Rec 3.2.1). For this setting to take effect
0323: * requires that validation be turned on. The default value of this
0324: * setting is <code>false</code>.
0325: *
0326: * @param ignoringWhite Whether to ignore ignorable whitespace
0327: */
0328: public void setIgnoringElementContentWhitespace(
0329: boolean ignoringWhite) {
0330: this .ignoringWhite = ignoringWhite;
0331: }
0332:
0333: /**
0334: * Returns whether the contained SAX parser instance is reused across
0335: * multiple parses. The default is true.
0336: *
0337: * @return whether the contained SAX parser instance is reused across
0338: * multiple parses
0339: */
0340: public boolean getReuseParser() {
0341: return reuseParser;
0342: }
0343:
0344: /**
0345: * Specifies whether this builder shall reuse the same SAX parser
0346: * when performing subsequent parses or allocate a new parser for
0347: * each parse. The default value of this setting is
0348: * <code>true</code> (parser reuse).
0349: * <p>
0350: * <strong>Note</strong>: As SAX parser instances are not thread safe,
0351: * the parser reuse feature should not be used with SAXBuilder instances
0352: * shared among threads.</p>
0353: *
0354: * @param reuseParser Whether to reuse the SAX parser.
0355: */
0356: public void setReuseParser(boolean reuseParser) {
0357: this .reuseParser = reuseParser;
0358: this .saxParser = null;
0359: }
0360:
0361: /**
0362: * This sets a feature on the SAX parser. See the SAX documentation for
0363: * </p>
0364: * <p>
0365: * NOTE: SAXBuilder requires that some particular features of the SAX parser be
0366: * set up in certain ways for it to work properly. The list of such features
0367: * may change in the future. Therefore, the use of this method may cause
0368: * parsing to break, and even if it doesn't break anything today it might
0369: * break parsing in a future JDOM version, because what JDOM parsers require
0370: * may change over time. Use with caution.
0371: * </p>
0372: *
0373: * @param name The feature name, which is a fully-qualified URI.
0374: * @param value The requested state of the feature (true or false).
0375: */
0376: public void setFeature(String name, boolean value) {
0377: // Save the specified feature for later.
0378: features.put(name, new Boolean(value));
0379: }
0380:
0381: /**
0382: * This sets a property on the SAX parser. See the SAX documentation for
0383: * more information.
0384: * <p>
0385: * NOTE: SAXBuilder requires that some particular properties of the SAX parser be
0386: * set up in certain ways for it to work properly. The list of such properties
0387: * may change in the future. Therefore, the use of this method may cause
0388: * parsing to break, and even if it doesn't break anything today it might
0389: * break parsing in a future JDOM version, because what JDOM parsers require
0390: * may change over time. Use with caution.
0391: * </p>
0392: *
0393: * @param name The property name, which is a fully-qualified URI.
0394: * @param value The requested value for the property.
0395: */
0396: public void setProperty(String name, Object value) {
0397: // Save the specified property for later.
0398: properties.put(name, value);
0399: }
0400:
0401: /**
0402: * This builds a document from the supplied
0403: * input source.
0404: *
0405: * @param in <code>InputSource</code> to read from
0406: * @return <code>Document</code> resultant Document object
0407: * @throws JDOMException when errors occur in parsing
0408: * @throws IOException when an I/O error prevents a document
0409: * from being fully parsed
0410: */
0411: public Document build(InputSource in) throws JDOMException,
0412: IOException {
0413: SAXHandler contentHandler = null;
0414:
0415: try {
0416: // Create and configure the content handler.
0417: contentHandler = createContentHandler();
0418: configureContentHandler(contentHandler);
0419:
0420: XMLReader parser = this .saxParser;
0421: if (parser == null) {
0422: // Create and configure the parser.
0423: parser = createParser();
0424:
0425: // Install optional filter
0426: if (saxXMLFilter != null) {
0427: // Connect filter chain to parser
0428: XMLFilter root = saxXMLFilter;
0429: while (root.getParent() instanceof XMLFilter) {
0430: root = (XMLFilter) root.getParent();
0431: }
0432: root.setParent(parser);
0433:
0434: // Read from filter
0435: parser = saxXMLFilter;
0436: }
0437:
0438: // Configure parser
0439: configureParser(parser, contentHandler);
0440:
0441: if (reuseParser == true) {
0442: this .saxParser = parser;
0443: }
0444: } else {
0445: // Reset content handler as SAXHandler instances cannot
0446: // be reused
0447: configureParser(parser, contentHandler);
0448: }
0449:
0450: // Parse the document.
0451: parser.parse(in);
0452:
0453: return contentHandler.getDocument();
0454: } catch (SAXParseException e) {
0455: Document doc = contentHandler.getDocument();
0456: if (doc.hasRootElement() == false) {
0457: doc = null;
0458: }
0459:
0460: String systemId = e.getSystemId();
0461: if (systemId != null) {
0462: throw new JDOMParseException("Error on line "
0463: + e.getLineNumber() + " of document "
0464: + systemId, e, doc);
0465: } else {
0466: throw new JDOMParseException("Error on line "
0467: + e.getLineNumber(), e, doc);
0468: }
0469: } catch (SAXException e) {
0470: throw new JDOMParseException("Error in building: "
0471: + e.getMessage(), e, contentHandler.getDocument());
0472: } finally {
0473: // Explicitly nullify the handler to encourage GC
0474: // It's a stack var so this shouldn't be necessary, but it
0475: // seems to help on some JVMs
0476: contentHandler = null;
0477: }
0478: }
0479:
0480: /**
0481: * This creates the SAXHandler that will be used to build the Document.
0482: *
0483: * @return <code>SAXHandler</code> - resultant SAXHandler object.
0484: */
0485: protected SAXHandler createContentHandler() {
0486: SAXHandler contentHandler = new SAXHandler(factory);
0487: return contentHandler;
0488: }
0489:
0490: /**
0491: * This configures the SAXHandler that will be used to build the Document.
0492: * <p>
0493: * The default implementation simply passes through some configuration
0494: * settings that were set on the SAXBuilder: setExpandEntities() and
0495: * setIgnoringElementContentWhitespace().
0496: * </p>
0497: */
0498: protected void configureContentHandler(SAXHandler contentHandler) {
0499: // Setup pass through behavior
0500: contentHandler.setExpandEntities(expand);
0501: contentHandler
0502: .setIgnoringElementContentWhitespace(ignoringWhite);
0503: }
0504:
0505: /**
0506: * This creates the XMLReader to be used for reading the XML document.
0507: * <p>
0508: * The default behavior is to (1) use the saxDriverClass, if it has been
0509: * set, (2) try to obtain a parser from JAXP, if it is available, and
0510: * (3) if all else fails, use a hard-coded default parser (currently
0511: * the Xerces parser). Subclasses may override this method to determine
0512: * the parser to use in a different way.
0513: * </p>
0514: *
0515: * @return <code>XMLReader</code> - resultant XMLReader object.
0516: */
0517: protected XMLReader createParser() throws JDOMException {
0518: XMLReader parser = null;
0519: if (saxDriverClass != null) {
0520: // The user knows that they want to use a particular class
0521: try {
0522: parser = XMLReaderFactory
0523: .createXMLReader(saxDriverClass);
0524:
0525: // Configure parser
0526: setFeaturesAndProperties(parser, true);
0527: } catch (SAXException e) {
0528: throw new JDOMException("Could not load "
0529: + saxDriverClass, e);
0530: }
0531: } else {
0532: // Try using JAXP...
0533: // Note we need JAXP 1.1, and if JAXP 1.0 is all that's
0534: // available then the getXMLReader call fails and we skip
0535: // to the hard coded default parser
0536: try {
0537: // Get factory class and method.
0538: Class factoryClass = Class
0539: .forName("org.jdom.input.JAXPParserFactory");
0540:
0541: Method createParser = factoryClass.getMethod(
0542: "createParser", new Class[] { boolean.class,
0543: Map.class, Map.class });
0544:
0545: // Create SAX parser.
0546: parser = (XMLReader) createParser.invoke(null,
0547: new Object[] { new Boolean(validate), features,
0548: properties });
0549:
0550: // Configure parser
0551: setFeaturesAndProperties(parser, false);
0552: } catch (JDOMException e) {
0553: throw e;
0554: } catch (NoClassDefFoundError e) {
0555: // The class loader failed to resolve the dependencies
0556: // of org.jdom.input.JAXPParserFactory. This probably means
0557: // that no JAXP parser is present in its class path.
0558: // => Ignore and try allocating default SAX parser instance.
0559: } catch (Exception e) {
0560: // Ignore and try allocating default SAX parser instance.
0561: }
0562: }
0563:
0564: // Check to see if we got a parser yet, if not, try to use a
0565: // hard coded default
0566: if (parser == null) {
0567: try {
0568: parser = XMLReaderFactory
0569: .createXMLReader(DEFAULT_SAX_DRIVER);
0570: // System.out.println("using default " + DEFAULT_SAX_DRIVER);
0571: saxDriverClass = parser.getClass().getName();
0572:
0573: // Configure parser
0574: setFeaturesAndProperties(parser, true);
0575: } catch (SAXException e) {
0576: throw new JDOMException(
0577: "Could not load default SAX parser: "
0578: + DEFAULT_SAX_DRIVER, e);
0579: }
0580: }
0581:
0582: return parser;
0583: }
0584:
0585: /**
0586: * This configures the XMLReader to be used for reading the XML document.
0587: * <p>
0588: * The default implementation sets various options on the given XMLReader,
0589: * such as validation, DTD resolution, entity handlers, etc., according
0590: * to the options that were set (e.g. via <code>setEntityResolver</code>)
0591: * and set various SAX properties and features that are required for JDOM
0592: * internals. These features may change in future releases, so change this
0593: * behavior at your own risk.
0594: * </p>
0595: */
0596: protected void configureParser(XMLReader parser,
0597: SAXHandler contentHandler) throws JDOMException {
0598:
0599: // Setup SAX handlers.
0600:
0601: parser.setContentHandler(contentHandler);
0602:
0603: if (saxEntityResolver != null) {
0604: parser.setEntityResolver(saxEntityResolver);
0605: }
0606:
0607: if (saxDTDHandler != null) {
0608: parser.setDTDHandler(saxDTDHandler);
0609: } else {
0610: parser.setDTDHandler(contentHandler);
0611: }
0612:
0613: if (saxErrorHandler != null) {
0614: parser.setErrorHandler(saxErrorHandler);
0615: } else {
0616: parser.setErrorHandler(new BuilderErrorHandler());
0617: }
0618:
0619: // Setup lexical reporting.
0620: boolean lexicalReporting = false;
0621: try {
0622: parser.setProperty(
0623: "http://xml.org/sax/handlers/LexicalHandler",
0624: contentHandler);
0625: lexicalReporting = true;
0626: } catch (SAXNotSupportedException e) {
0627: // No lexical reporting available
0628: } catch (SAXNotRecognizedException e) {
0629: // No lexical reporting available
0630: }
0631:
0632: // Some parsers use alternate property for lexical handling (grr...)
0633: if (!lexicalReporting) {
0634: try {
0635: parser
0636: .setProperty(
0637: "http://xml.org/sax/properties/lexical-handler",
0638: contentHandler);
0639: lexicalReporting = true;
0640: } catch (SAXNotSupportedException e) {
0641: // No lexical reporting available
0642: } catch (SAXNotRecognizedException e) {
0643: // No lexical reporting available
0644: }
0645: }
0646:
0647: // Try setting the DeclHandler if entity expansion is off
0648: if (!expand) {
0649: try {
0650: parser
0651: .setProperty(
0652: "http://xml.org/sax/properties/declaration-handler",
0653: contentHandler);
0654: } catch (SAXNotSupportedException e) {
0655: // No lexical reporting available
0656: } catch (SAXNotRecognizedException e) {
0657: // No lexical reporting available
0658: }
0659: }
0660: }
0661:
0662: private void setFeaturesAndProperties(XMLReader parser,
0663: boolean coreFeatures) throws JDOMException {
0664: // Set any user-specified features on the parser.
0665: Iterator iter = features.keySet().iterator();
0666: while (iter.hasNext()) {
0667: String name = (String) iter.next();
0668: Boolean value = (Boolean) features.get(name);
0669: internalSetFeature(parser, name, value.booleanValue(), name);
0670: }
0671:
0672: // Set any user-specified properties on the parser.
0673: iter = properties.keySet().iterator();
0674: while (iter.hasNext()) {
0675: String name = (String) iter.next();
0676: internalSetProperty(parser, name, properties.get(name),
0677: name);
0678: }
0679:
0680: if (coreFeatures) {
0681: // Set validation.
0682: try {
0683: internalSetFeature(parser,
0684: "http://xml.org/sax/features/validation",
0685: validate, "Validation");
0686: } catch (JDOMException e) {
0687: // If validation is not supported, and the user is requesting
0688: // that we don't validate, that's fine - don't throw an
0689: // exception.
0690: if (validate)
0691: throw e;
0692: }
0693:
0694: // Setup some namespace features.
0695: internalSetFeature(parser,
0696: "http://xml.org/sax/features/namespaces", true,
0697: "Namespaces");
0698: internalSetFeature(parser,
0699: "http://xml.org/sax/features/namespace-prefixes",
0700: true, "Namespace prefixes");
0701: }
0702:
0703: // Set entity expansion
0704: // Note SAXHandler can work regardless of how this is set, but when
0705: // entity expansion it's worth it to try to tell the parser not to
0706: // even bother with external general entities.
0707: // Apparently no parsers yet support this feature.
0708: // XXX It might make sense to setEntityResolver() with a resolver
0709: // that simply ignores external general entities
0710: try {
0711: if (parser
0712: .getFeature("http://xml.org/sax/features/external-general-entities") != expand) {
0713: parser
0714: .setFeature(
0715: "http://xml.org/sax/features/external-general-entities",
0716: expand);
0717: }
0718: } catch (SAXNotRecognizedException e) { /* Ignore... */
0719: } catch (SAXNotSupportedException e) { /* Ignore... */
0720: }
0721: }
0722:
0723: /**
0724: * Tries to set a feature on the parser. If the feature cannot be set,
0725: * throws a JDOMException describing the problem.
0726: */
0727: private void internalSetFeature(XMLReader parser, String feature,
0728: boolean value, String displayName) throws JDOMException {
0729: try {
0730: parser.setFeature(feature, value);
0731: } catch (SAXNotSupportedException e) {
0732: throw new JDOMException(displayName
0733: + " feature not supported for SAX driver "
0734: + parser.getClass().getName());
0735: } catch (SAXNotRecognizedException e) {
0736: throw new JDOMException(displayName
0737: + " feature not recognized for SAX driver "
0738: + parser.getClass().getName());
0739: }
0740: }
0741:
0742: /**
0743: * <p>
0744: * Tries to set a property on the parser. If the property cannot be set,
0745: * throws a JDOMException describing the problem.
0746: * </p>
0747: */
0748: private void internalSetProperty(XMLReader parser, String property,
0749: Object value, String displayName) throws JDOMException {
0750: try {
0751: parser.setProperty(property, value);
0752: } catch (SAXNotSupportedException e) {
0753: throw new JDOMException(displayName
0754: + " property not supported for SAX driver "
0755: + parser.getClass().getName());
0756: } catch (SAXNotRecognizedException e) {
0757: throw new JDOMException(displayName
0758: + " property not recognized for SAX driver "
0759: + parser.getClass().getName());
0760: }
0761: }
0762:
0763: /**
0764: * <p>
0765: * This builds a document from the supplied
0766: * input stream.
0767: * </p>
0768: *
0769: * @param in <code>InputStream</code> to read from
0770: * @return <code>Document</code> resultant Document object
0771: * @throws JDOMException when errors occur in parsing
0772: * @throws IOException when an I/O error prevents a document
0773: * from being fully parsed.
0774: */
0775: public Document build(InputStream in) throws JDOMException,
0776: IOException {
0777: return build(new InputSource(in));
0778: }
0779:
0780: /**
0781: * <p>
0782: * This builds a document from the supplied
0783: * filename.
0784: * </p>
0785: *
0786: * @param file <code>File</code> to read from
0787: * @return <code>Document</code> resultant Document object
0788: * @throws JDOMException when errors occur in parsing
0789: * @throws IOException when an I/O error prevents a document
0790: * from being fully parsed
0791: */
0792: public Document build(File file) throws JDOMException, IOException {
0793: try {
0794: URL url = fileToURL(file);
0795: return build(url);
0796: } catch (MalformedURLException e) {
0797: throw new JDOMException("Error in building", e);
0798: }
0799: }
0800:
0801: /**
0802: * <p>
0803: * This builds a document from the supplied
0804: * URL.
0805: * </p>
0806: *
0807: * @param url <code>URL</code> to read from.
0808: * @return <code>Document</code> - resultant Document object.
0809: * @throws JDOMException when errors occur in parsing
0810: * @throws IOException when an I/O error prevents a document
0811: * from being fully parsed.
0812: */
0813: public Document build(URL url) throws JDOMException, IOException {
0814: String systemID = url.toExternalForm();
0815: return build(new InputSource(systemID));
0816: }
0817:
0818: /**
0819: * <p>
0820: * This builds a document from the supplied
0821: * input stream.
0822: * </p>
0823: *
0824: * @param in <code>InputStream</code> to read from.
0825: * @param systemId base for resolving relative URIs
0826: * @return <code>Document</code> resultant Document object
0827: * @throws JDOMException when errors occur in parsing
0828: * @throws IOException when an I/O error prevents a document
0829: * from being fully parsed
0830: */
0831: public Document build(InputStream in, String systemId)
0832: throws JDOMException, IOException {
0833:
0834: InputSource src = new InputSource(in);
0835: src.setSystemId(systemId);
0836: return build(src);
0837: }
0838:
0839: /**
0840: * <p>
0841: * This builds a document from the supplied
0842: * Reader. It's the programmer's responsibility to make sure
0843: * the reader matches the encoding of the file. It's often easier
0844: * and safer to use an InputStream rather than a Reader, and to let the
0845: * parser auto-detect the encoding from the XML declaration.
0846: * </p>
0847: *
0848: * @param characterStream <code>Reader</code> to read from
0849: * @return <code>Document</code> resultant Document object
0850: * @throws JDOMException when errors occur in parsing
0851: * @throws IOException when an I/O error prevents a document
0852: * from being fully parsed
0853: */
0854: public Document build(Reader characterStream) throws JDOMException,
0855: IOException {
0856: return build(new InputSource(characterStream));
0857: }
0858:
0859: /**
0860: * <p>
0861: * This builds a document from the supplied
0862: * Reader. It's the programmer's responsibility to make sure
0863: * the reader matches the encoding of the file. It's often easier
0864: * and safer to use an InputStream rather than a Reader, and to let the
0865: * parser auto-detect the encoding from the XML declaration.
0866: * </p>
0867: *
0868: * @param characterStream <code>Reader</code> to read from.
0869: * @param systemId base for resolving relative URIs
0870: * @return <code>Document</code> resultant Document object
0871: * @throws JDOMException when errors occur in parsing
0872: * @throws IOException when an I/O error prevents a document
0873: * from being fully parsed
0874: */
0875: public Document build(Reader characterStream, String systemId)
0876: throws JDOMException, IOException {
0877:
0878: InputSource src = new InputSource(characterStream);
0879: src.setSystemId(systemId);
0880: return build(src);
0881: }
0882:
0883: /**
0884: * <p>
0885: * This builds a document from the supplied
0886: * URI.
0887: * </p>
0888: * @param systemId URI for the input
0889: * @return <code>Document</code> resultant Document object
0890: * @throws JDOMException when errors occur in parsing
0891: * @throws IOException when an I/O error prevents a document
0892: * from being fully parsed
0893: */
0894: public Document build(String systemId) throws JDOMException,
0895: IOException {
0896: return build(new InputSource(systemId));
0897: }
0898:
0899: // /**
0900: // * Imitation of File.toURL(), a JDK 1.2 method, reimplemented
0901: // * here to work with JDK 1.1.
0902: // *
0903: // * @see java.io.File
0904: // *
0905: // * @param f the file to convert
0906: // * @return the file path converted to a file: URL
0907: // */
0908: // protected URL fileToURL(File f) throws MalformedURLException {
0909: // String path = f.getAbsolutePath();
0910: // if (File.separatorChar != '/') {
0911: // path = path.replace(File.separatorChar, '/');
0912: // }
0913: // if (!path.startsWith("/")) {
0914: // path = "/" + path;
0915: // }
0916: // if (!path.endsWith("/") && f.isDirectory()) {
0917: // path = path + "/";
0918: // }
0919: // return new URL("file", "", path);
0920: // }
0921:
0922: /** Custom File.toUrl() implementation to handle special chars in file names
0923: *
0924: * @param file file object whose path will be converted
0925: * @return URL form of the file, with special characters handled
0926: * @throws MalformedURLException if there's a problem constructing a URL
0927: */
0928: private static URL fileToURL(File file)
0929: throws MalformedURLException {
0930: StringBuffer buffer = new StringBuffer();
0931: String path = file.getAbsolutePath();
0932:
0933: // Convert non-URL style file separators
0934: if (File.separatorChar != '/') {
0935: path = path.replace(File.separatorChar, '/');
0936: }
0937:
0938: // Make sure it starts at root
0939: if (!path.startsWith("/")) {
0940: buffer.append('/');
0941: }
0942:
0943: // Copy, converting URL special characters as we go
0944: int len = path.length();
0945: for (int i = 0; i < len; i++) {
0946: char c = path.charAt(i);
0947: if (c == ' ')
0948: buffer.append("%20");
0949: else if (c == '#')
0950: buffer.append("%23");
0951: else if (c == '%')
0952: buffer.append("%25");
0953: else if (c == '&')
0954: buffer.append("%26");
0955: else if (c == ';')
0956: buffer.append("%3B");
0957: else if (c == '<')
0958: buffer.append("%3C");
0959: else if (c == '=')
0960: buffer.append("%3D");
0961: else if (c == '>')
0962: buffer.append("%3E");
0963: else if (c == '?')
0964: buffer.append("%3F");
0965: else if (c == '~')
0966: buffer.append("%7E");
0967: else
0968: buffer.append(c);
0969: }
0970:
0971: // Make sure directories end with slash
0972: if (!path.endsWith("/") && file.isDirectory()) {
0973: buffer.append('/');
0974: }
0975:
0976: // Return URL
0977: return new URL("file", "", buffer.toString());
0978: }
0979:
0980: /**
0981: * Returns whether or not entities are being expanded into normal text
0982: * content.
0983: *
0984: * @return whether entities are being expanded
0985: */
0986: public boolean getExpandEntities() {
0987: return expand;
0988: }
0989:
0990: /**
0991: * <p>
0992: * This sets whether or not to expand entities for the builder.
0993: * A true means to expand entities as normal content. A false means to
0994: * leave entities unexpanded as <code>EntityRef</code> objects. The
0995: * default is true.
0996: * </p>
0997: * <p>
0998: * When this setting is false, the internal DTD subset is retained; when
0999: * this setting is true, the internal DTD subset is not retained.
1000: * </p>
1001: * <p>
1002: * Note that Xerces (at least up to 1.4.4) has a bug where entities
1003: * in attribute values will be misreported if this flag is turned off,
1004: * resulting in entities to appear within element content. When turning
1005: * entity expansion off either avoid entities in attribute values, or
1006: * use another parser like Crimson.
1007: * http://nagoya.apache.org/bugzilla/show_bug.cgi?id=6111
1008: * </p>
1009: *
1010: * @param expand <code>boolean</code> indicating whether entity expansion
1011: * should occur.
1012: */
1013: public void setExpandEntities(boolean expand) {
1014: this.expand = expand;
1015: }
1016: }
|