0001: /*
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: */
0017:
0018: package org.apache.xerces.impl;
0019:
0020: import java.io.FileOutputStream;
0021: import java.io.IOException;
0022: import java.io.InputStream;
0023: import java.io.InputStreamReader;
0024: import java.io.OutputStream;
0025: import java.io.Reader;
0026: import java.io.StringReader;
0027: import java.lang.reflect.Method;
0028: import java.net.HttpURLConnection;
0029: import java.net.URL;
0030: import java.net.URLConnection;
0031: import java.security.AccessController;
0032: import java.security.PrivilegedAction;
0033: import java.util.Hashtable;
0034: import java.util.Iterator;
0035: import java.util.Locale;
0036: import java.util.Map;
0037: import java.util.Stack;
0038: import java.util.StringTokenizer;
0039:
0040: import org.apache.xerces.impl.io.ASCIIReader;
0041: import org.apache.xerces.impl.io.Latin1Reader;
0042: import org.apache.xerces.impl.io.UCSReader;
0043: import org.apache.xerces.impl.io.UTF8Reader;
0044: import org.apache.xerces.impl.msg.XMLMessageFormatter;
0045: import org.apache.xerces.impl.validation.ValidationManager;
0046: import org.apache.xerces.util.AugmentationsImpl;
0047: import org.apache.xerces.util.EncodingMap;
0048: import org.apache.xerces.util.HTTPInputSource;
0049: import org.apache.xerces.util.SecurityManager;
0050: import org.apache.xerces.util.SymbolTable;
0051: import org.apache.xerces.util.URI;
0052: import org.apache.xerces.util.XMLChar;
0053: import org.apache.xerces.util.XMLEntityDescriptionImpl;
0054: import org.apache.xerces.util.XMLResourceIdentifierImpl;
0055: import org.apache.xerces.xni.Augmentations;
0056: import org.apache.xerces.xni.XMLResourceIdentifier;
0057: import org.apache.xerces.xni.XNIException;
0058: import org.apache.xerces.xni.parser.XMLComponent;
0059: import org.apache.xerces.xni.parser.XMLComponentManager;
0060: import org.apache.xerces.xni.parser.XMLConfigurationException;
0061: import org.apache.xerces.xni.parser.XMLEntityResolver;
0062: import org.apache.xerces.xni.parser.XMLInputSource;
0063:
0064: /**
0065: * The entity manager handles the registration of general and parameter
0066: * entities; resolves entities; and starts entities. The entity manager
0067: * is a central component in a standard parser configuration and this
0068: * class works directly with the entity scanner to manage the underlying
0069: * xni.
0070: * <p>
0071: * This component requires the following features and properties from the
0072: * component manager that uses it:
0073: * <ul>
0074: * <li>http://xml.org/sax/features/validation</li>
0075: * <li>http://xml.org/sax/features/external-general-entities</li>
0076: * <li>http://xml.org/sax/features/external-parameter-entities</li>
0077: * <li>http://apache.org/xml/features/allow-java-encodings</li>
0078: * <li>http://apache.org/xml/properties/internal/symbol-table</li>
0079: * <li>http://apache.org/xml/properties/internal/error-reporter</li>
0080: * <li>http://apache.org/xml/properties/internal/entity-resolver</li>
0081: * </ul>
0082: *
0083: * @xerces.internal
0084: *
0085: * @author Andy Clark, IBM
0086: * @author Arnaud Le Hors, IBM
0087: *
0088: * @version $Id: XMLEntityManager.java 572040 2007-09-02 17:24:29Z mrglavas $
0089: */
0090: public class XMLEntityManager implements XMLComponent,
0091: XMLEntityResolver {
0092:
0093: //
0094: // Constants
0095: //
0096:
0097: /** Default buffer size (2048). */
0098: public static final int DEFAULT_BUFFER_SIZE = 2048;
0099:
0100: /** Default buffer size before we've finished with the XMLDecl: */
0101: public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
0102:
0103: /** Default internal entity buffer size (512). */
0104: public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 512;
0105:
0106: // feature identifiers
0107:
0108: /** Feature identifier: validation. */
0109: protected static final String VALIDATION = Constants.SAX_FEATURE_PREFIX
0110: + Constants.VALIDATION_FEATURE;
0111:
0112: /** Feature identifier: external general entities. */
0113: protected static final String EXTERNAL_GENERAL_ENTITIES = Constants.SAX_FEATURE_PREFIX
0114: + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE;
0115:
0116: /** Feature identifier: external parameter entities. */
0117: protected static final String EXTERNAL_PARAMETER_ENTITIES = Constants.SAX_FEATURE_PREFIX
0118: + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE;
0119:
0120: /** Feature identifier: allow Java encodings. */
0121: protected static final String ALLOW_JAVA_ENCODINGS = Constants.XERCES_FEATURE_PREFIX
0122: + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
0123:
0124: /** Feature identifier: warn on duplicate EntityDef */
0125: protected static final String WARN_ON_DUPLICATE_ENTITYDEF = Constants.XERCES_FEATURE_PREFIX
0126: + Constants.WARN_ON_DUPLICATE_ENTITYDEF_FEATURE;
0127:
0128: /** Feature identifier: standard uri conformant */
0129: protected static final String STANDARD_URI_CONFORMANT = Constants.XERCES_FEATURE_PREFIX
0130: + Constants.STANDARD_URI_CONFORMANT_FEATURE;
0131:
0132: protected static final String PARSER_SETTINGS = Constants.XERCES_FEATURE_PREFIX
0133: + Constants.PARSER_SETTINGS;
0134:
0135: // property identifiers
0136:
0137: /** Property identifier: symbol table. */
0138: protected static final String SYMBOL_TABLE = Constants.XERCES_PROPERTY_PREFIX
0139: + Constants.SYMBOL_TABLE_PROPERTY;
0140:
0141: /** Property identifier: error reporter. */
0142: protected static final String ERROR_REPORTER = Constants.XERCES_PROPERTY_PREFIX
0143: + Constants.ERROR_REPORTER_PROPERTY;
0144:
0145: /** Property identifier: entity resolver. */
0146: protected static final String ENTITY_RESOLVER = Constants.XERCES_PROPERTY_PREFIX
0147: + Constants.ENTITY_RESOLVER_PROPERTY;
0148:
0149: // property identifier: ValidationManager
0150: protected static final String VALIDATION_MANAGER = Constants.XERCES_PROPERTY_PREFIX
0151: + Constants.VALIDATION_MANAGER_PROPERTY;
0152:
0153: /** property identifier: buffer size. */
0154: protected static final String BUFFER_SIZE = Constants.XERCES_PROPERTY_PREFIX
0155: + Constants.BUFFER_SIZE_PROPERTY;
0156:
0157: /** property identifier: security manager. */
0158: protected static final String SECURITY_MANAGER = Constants.XERCES_PROPERTY_PREFIX
0159: + Constants.SECURITY_MANAGER_PROPERTY;
0160:
0161: // recognized features and properties
0162:
0163: /** Recognized features. */
0164: private static final String[] RECOGNIZED_FEATURES = { VALIDATION,
0165: EXTERNAL_GENERAL_ENTITIES, EXTERNAL_PARAMETER_ENTITIES,
0166: ALLOW_JAVA_ENCODINGS, WARN_ON_DUPLICATE_ENTITYDEF,
0167: STANDARD_URI_CONFORMANT };
0168:
0169: /** Feature defaults. */
0170: private static final Boolean[] FEATURE_DEFAULTS = { null,
0171: Boolean.TRUE, Boolean.TRUE, Boolean.FALSE, Boolean.FALSE,
0172: Boolean.FALSE };
0173:
0174: /** Recognized properties. */
0175: private static final String[] RECOGNIZED_PROPERTIES = {
0176: SYMBOL_TABLE, ERROR_REPORTER, ENTITY_RESOLVER,
0177: VALIDATION_MANAGER, BUFFER_SIZE, SECURITY_MANAGER, };
0178:
0179: /** Property defaults. */
0180: private static final Object[] PROPERTY_DEFAULTS = { null, null,
0181: null, null, new Integer(DEFAULT_BUFFER_SIZE), null, };
0182:
0183: private static final String XMLEntity = "[xml]".intern();
0184: private static final String DTDEntity = "[dtd]".intern();
0185:
0186: // debugging
0187:
0188: /**
0189: * Debug printing of buffer. This debugging flag works best when you
0190: * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
0191: * 64 characters.
0192: */
0193: private static final boolean DEBUG_BUFFER = false;
0194:
0195: /** Debug some basic entities. */
0196: private static final boolean DEBUG_ENTITIES = false;
0197:
0198: /** Debug switching readers for encodings. */
0199: private static final boolean DEBUG_ENCODINGS = false;
0200:
0201: // should be diplayed trace resolving messages
0202: private static final boolean DEBUG_RESOLVER = false;
0203:
0204: //
0205: // Data
0206: //
0207:
0208: // features
0209:
0210: /**
0211: * Validation. This feature identifier is:
0212: * http://xml.org/sax/features/validation
0213: */
0214: protected boolean fValidation;
0215:
0216: /**
0217: * External general entities. This feature identifier is:
0218: * http://xml.org/sax/features/external-general-entities
0219: */
0220: protected boolean fExternalGeneralEntities = true;
0221:
0222: /**
0223: * External parameter entities. This feature identifier is:
0224: * http://xml.org/sax/features/external-parameter-entities
0225: */
0226: protected boolean fExternalParameterEntities = true;
0227:
0228: /**
0229: * Allow Java encoding names. This feature identifier is:
0230: * http://apache.org/xml/features/allow-java-encodings
0231: */
0232: protected boolean fAllowJavaEncodings;
0233:
0234: /** warn on duplicate Entity declaration.
0235: * http://apache.org/xml/features/warn-on-duplicate-entitydef
0236: */
0237: protected boolean fWarnDuplicateEntityDef;
0238:
0239: /**
0240: * standard uri conformant (strict uri).
0241: * http://apache.org/xml/features/standard-uri-conformant
0242: */
0243: protected boolean fStrictURI;
0244:
0245: // properties
0246:
0247: /**
0248: * Symbol table. This property identifier is:
0249: * http://apache.org/xml/properties/internal/symbol-table
0250: */
0251: protected SymbolTable fSymbolTable;
0252:
0253: /**
0254: * Error reporter. This property identifier is:
0255: * http://apache.org/xml/properties/internal/error-reporter
0256: */
0257: protected XMLErrorReporter fErrorReporter;
0258:
0259: /**
0260: * Entity resolver. This property identifier is:
0261: * http://apache.org/xml/properties/internal/entity-resolver
0262: */
0263: protected XMLEntityResolver fEntityResolver;
0264:
0265: /**
0266: * Validation manager. This property identifier is:
0267: * http://apache.org/xml/properties/internal/validation-manager
0268: */
0269: protected ValidationManager fValidationManager;
0270:
0271: // settings
0272:
0273: /**
0274: * Buffer size. We get this value from a property. The default size
0275: * is used if the input buffer size property is not specified.
0276: * REVISIT: do we need a property for internal entity buffer size?
0277: */
0278: protected int fBufferSize = DEFAULT_BUFFER_SIZE;
0279:
0280: // stores defaults for entity expansion limit if it has
0281: // been set on the configuration.
0282: protected SecurityManager fSecurityManager = null;
0283:
0284: /**
0285: * True if the document entity is standalone. This should really
0286: * only be set by the document source (e.g. XMLDocumentScanner).
0287: */
0288: protected boolean fStandalone;
0289:
0290: /**
0291: * True if the current document contains parameter entity references.
0292: */
0293: protected boolean fHasPEReferences;
0294:
0295: // are the entities being parsed in the external subset?
0296: // NOTE: this *is not* the same as whether they're external entities!
0297: protected boolean fInExternalSubset = false;
0298:
0299: // handlers
0300:
0301: /** Entity handler. */
0302: protected XMLEntityHandler fEntityHandler;
0303:
0304: // scanner
0305:
0306: /** Current entity scanner. */
0307: protected XMLEntityScanner fEntityScanner;
0308:
0309: /** XML 1.0 entity scanner. */
0310: protected XMLEntityScanner fXML10EntityScanner;
0311:
0312: /** XML 1.1 entity scanner. */
0313: protected XMLEntityScanner fXML11EntityScanner;
0314:
0315: // entity expansion limit (contains useful data if and only if
0316: // fSecurityManager is non-null)
0317: protected int fEntityExpansionLimit = 0;
0318: // entity currently being expanded:
0319: protected int fEntityExpansionCount = 0;
0320:
0321: // entities
0322:
0323: /** Entities. */
0324: protected final Hashtable fEntities = new Hashtable();
0325:
0326: /** Entity stack. */
0327: protected final Stack fEntityStack = new Stack();
0328:
0329: /** Current entity. */
0330: protected ScannedEntity fCurrentEntity;
0331:
0332: // shared context
0333:
0334: /** Shared declared entities. */
0335: protected Hashtable fDeclaredEntities;
0336:
0337: // temp vars
0338:
0339: /** Resource identifer. */
0340: private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
0341:
0342: /** Augmentations for entities. */
0343: private final Augmentations fEntityAugs = new AugmentationsImpl();
0344:
0345: /** Pool of byte buffers. */
0346: private final ByteBufferPool fByteBufferPool = new ByteBufferPool(
0347: fBufferSize);
0348:
0349: /** Temporary storage for the current entity's byte buffer. */
0350: private byte[] fTempByteBuffer = null;
0351:
0352: /** Pool of character buffers. */
0353: private final CharacterBufferPool fCharacterBufferPool = new CharacterBufferPool(
0354: fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE);
0355:
0356: //
0357: // Constructors
0358: //
0359:
0360: /** Default constructor. */
0361: public XMLEntityManager() {
0362: this (null);
0363: } // <init>()
0364:
0365: /**
0366: * Constructs an entity manager that shares the specified entity
0367: * declarations during each parse.
0368: * <p>
0369: * <strong>REVISIT:</strong> We might want to think about the "right"
0370: * way to expose the list of declared entities. For now, the knowledge
0371: * how to access the entity declarations is implicit.
0372: */
0373: public XMLEntityManager(XMLEntityManager entityManager) {
0374:
0375: // save shared entity declarations
0376: fDeclaredEntities = entityManager != null ? entityManager
0377: .getDeclaredEntities() : null;
0378:
0379: setScannerVersion(Constants.XML_VERSION_1_0);
0380: } // <init>(XMLEntityManager)
0381:
0382: //
0383: // Public methods
0384: //
0385:
0386: /**
0387: * Sets whether the document entity is standalone.
0388: *
0389: * @param standalone True if document entity is standalone.
0390: */
0391: public void setStandalone(boolean standalone) {
0392: fStandalone = standalone;
0393: } // setStandalone(boolean)
0394:
0395: /** Returns true if the document entity is standalone. */
0396: public boolean isStandalone() {
0397: return fStandalone;
0398: } // isStandalone():boolean
0399:
0400: /**
0401: * Notifies the entity manager that the current document
0402: * being processed contains parameter entity references.
0403: */
0404: final void notifyHasPEReferences() {
0405: fHasPEReferences = true;
0406: } // notifyHasPEReferences
0407:
0408: /**
0409: * Returns true if the document contains parameter entity references.
0410: */
0411: final boolean hasPEReferences() {
0412: return fHasPEReferences;
0413: } // hasPEReferences():boolean
0414:
0415: /**
0416: * Sets the entity handler. When an entity starts and ends, the
0417: * entity handler is notified of the change.
0418: *
0419: * @param entityHandler The new entity handler.
0420: */
0421: public void setEntityHandler(XMLEntityHandler entityHandler) {
0422: fEntityHandler = entityHandler;
0423: } // setEntityHandler(XMLEntityHandler)
0424:
0425: // this simply returns the fResourceIdentifier object;
0426: // this should only be used with caution by callers that
0427: // carefully manage the entity manager's behaviour, so that
0428: // this doesn't returning meaningless or misleading data.
0429: // @return a reference to the current fResourceIdentifier object
0430: public XMLResourceIdentifier getCurrentResourceIdentifier() {
0431: return fResourceIdentifier;
0432: }
0433:
0434: // this simply returns the fCurrentEntity object;
0435: // this should only be used with caution by callers that
0436: // carefully manage the entity manager's behaviour, so that
0437: // this doesn't returning meaningless or misleading data.
0438: // @return a reference to the current fCurrentEntity object
0439: public ScannedEntity getCurrentEntity() {
0440: return fCurrentEntity;
0441: }
0442:
0443: /**
0444: * Adds an internal entity declaration.
0445: * <p>
0446: * <strong>Note:</strong> This method ignores subsequent entity
0447: * declarations.
0448: * <p>
0449: * <strong>Note:</strong> The name should be a unique symbol. The
0450: * SymbolTable can be used for this purpose.
0451: *
0452: * @param name The name of the entity.
0453: * @param text The text of the entity.
0454: *
0455: * @see SymbolTable
0456: */
0457: public void addInternalEntity(String name, String text) {
0458: if (!fEntities.containsKey(name)) {
0459: Entity entity = new InternalEntity(name, text,
0460: fInExternalSubset);
0461: fEntities.put(name, entity);
0462: } else {
0463: if (fWarnDuplicateEntityDef) {
0464: fErrorReporter.reportError(
0465: XMLMessageFormatter.XML_DOMAIN,
0466: "MSG_DUPLICATE_ENTITY_DEFINITION",
0467: new Object[] { name },
0468: XMLErrorReporter.SEVERITY_WARNING);
0469: }
0470: }
0471:
0472: } // addInternalEntity(String,String)
0473:
0474: /**
0475: * Adds an external entity declaration.
0476: * <p>
0477: * <strong>Note:</strong> This method ignores subsequent entity
0478: * declarations.
0479: * <p>
0480: * <strong>Note:</strong> The name should be a unique symbol. The
0481: * SymbolTable can be used for this purpose.
0482: *
0483: * @param name The name of the entity.
0484: * @param publicId The public identifier of the entity.
0485: * @param literalSystemId The system identifier of the entity.
0486: * @param baseSystemId The base system identifier of the entity.
0487: * This is the system identifier of the entity
0488: * where <em>the entity being added</em> and
0489: * is used to expand the system identifier when
0490: * the system identifier is a relative URI.
0491: * When null the system identifier of the first
0492: * external entity on the stack is used instead.
0493: *
0494: * @see SymbolTable
0495: */
0496: public void addExternalEntity(String name, String publicId,
0497: String literalSystemId, String baseSystemId)
0498: throws IOException {
0499: if (!fEntities.containsKey(name)) {
0500: if (baseSystemId == null) {
0501: // search for the first external entity on the stack
0502: int size = fEntityStack.size();
0503: if (size == 0 && fCurrentEntity != null
0504: && fCurrentEntity.entityLocation != null) {
0505: baseSystemId = fCurrentEntity.entityLocation
0506: .getExpandedSystemId();
0507: }
0508: for (int i = size - 1; i >= 0; i--) {
0509: ScannedEntity externalEntity = (ScannedEntity) fEntityStack
0510: .elementAt(i);
0511: if (externalEntity.entityLocation != null
0512: && externalEntity.entityLocation
0513: .getExpandedSystemId() != null) {
0514: baseSystemId = externalEntity.entityLocation
0515: .getExpandedSystemId();
0516: break;
0517: }
0518: }
0519: }
0520: Entity entity = new ExternalEntity(name,
0521: new XMLEntityDescriptionImpl(name, publicId,
0522: literalSystemId, baseSystemId,
0523: expandSystemId(literalSystemId,
0524: baseSystemId, false)), null,
0525: fInExternalSubset);
0526: fEntities.put(name, entity);
0527: } else {
0528: if (fWarnDuplicateEntityDef) {
0529: fErrorReporter.reportError(
0530: XMLMessageFormatter.XML_DOMAIN,
0531: "MSG_DUPLICATE_ENTITY_DEFINITION",
0532: new Object[] { name },
0533: XMLErrorReporter.SEVERITY_WARNING);
0534: }
0535: }
0536:
0537: } // addExternalEntity(String,String,String,String)
0538:
0539: /**
0540: * Checks whether an entity given by name is external.
0541: *
0542: * @param entityName The name of the entity to check.
0543: * @return True if the entity is external, false otherwise
0544: * (including when the entity is not declared).
0545: */
0546: public boolean isExternalEntity(String entityName) {
0547:
0548: Entity entity = (Entity) fEntities.get(entityName);
0549: if (entity == null) {
0550: return false;
0551: }
0552: return entity.isExternal();
0553: }
0554:
0555: /**
0556: * Checks whether the declaration of an entity given by name is
0557: // in the external subset.
0558: *
0559: * @param entityName The name of the entity to check.
0560: * @return True if the entity was declared in the external subset, false otherwise
0561: * (including when the entity is not declared).
0562: */
0563: public boolean isEntityDeclInExternalSubset(String entityName) {
0564:
0565: Entity entity = (Entity) fEntities.get(entityName);
0566: if (entity == null) {
0567: return false;
0568: }
0569: return entity.isEntityDeclInExternalSubset();
0570: }
0571:
0572: /**
0573: * Adds an unparsed entity declaration.
0574: * <p>
0575: * <strong>Note:</strong> This method ignores subsequent entity
0576: * declarations.
0577: * <p>
0578: * <strong>Note:</strong> The name should be a unique symbol. The
0579: * SymbolTable can be used for this purpose.
0580: *
0581: * @param name The name of the entity.
0582: * @param publicId The public identifier of the entity.
0583: * @param systemId The system identifier of the entity.
0584: * @param notation The name of the notation.
0585: *
0586: * @see SymbolTable
0587: */
0588: public void addUnparsedEntity(String name, String publicId,
0589: String systemId, String baseSystemId, String notation) {
0590: if (!fEntities.containsKey(name)) {
0591: Entity entity = new ExternalEntity(name,
0592: new XMLEntityDescriptionImpl(name, publicId,
0593: systemId, baseSystemId, null), notation,
0594: fInExternalSubset);
0595: fEntities.put(name, entity);
0596: } else {
0597: if (fWarnDuplicateEntityDef) {
0598: fErrorReporter.reportError(
0599: XMLMessageFormatter.XML_DOMAIN,
0600: "MSG_DUPLICATE_ENTITY_DEFINITION",
0601: new Object[] { name },
0602: XMLErrorReporter.SEVERITY_WARNING);
0603: }
0604: }
0605: } // addUnparsedEntity(String,String,String,String)
0606:
0607: /**
0608: * Checks whether an entity given by name is unparsed.
0609: *
0610: * @param entityName The name of the entity to check.
0611: * @return True if the entity is unparsed, false otherwise
0612: * (including when the entity is not declared).
0613: */
0614: public boolean isUnparsedEntity(String entityName) {
0615:
0616: Entity entity = (Entity) fEntities.get(entityName);
0617: if (entity == null) {
0618: return false;
0619: }
0620: return entity.isUnparsed();
0621: }
0622:
0623: /**
0624: * Checks whether an entity given by name is declared.
0625: *
0626: * @param entityName The name of the entity to check.
0627: * @return True if the entity is declared, false otherwise.
0628: */
0629: public boolean isDeclaredEntity(String entityName) {
0630:
0631: Entity entity = (Entity) fEntities.get(entityName);
0632: return entity != null;
0633: }
0634:
0635: /**
0636: * Resolves the specified public and system identifiers. This
0637: * method first attempts to resolve the entity based on the
0638: * EntityResolver registered by the application. If no entity
0639: * resolver is registered or if the registered entity handler
0640: * is unable to resolve the entity, then default entity
0641: * resolution will occur.
0642: *
0643: * @param resourceIdentifier The XMLResourceIdentifier for the resource to resolve.
0644: *
0645: * @return Returns an input source that wraps the resolved entity.
0646: * This method will never return null.
0647: *
0648: * @throws IOException Thrown on i/o error.
0649: * @throws XNIException Thrown by entity resolver to signal an error.
0650: */
0651: public XMLInputSource resolveEntity(
0652: XMLResourceIdentifier resourceIdentifier)
0653: throws IOException, XNIException {
0654: if (resourceIdentifier == null)
0655: return null;
0656: String publicId = resourceIdentifier.getPublicId();
0657: String literalSystemId = resourceIdentifier
0658: .getLiteralSystemId();
0659: String baseSystemId = resourceIdentifier.getBaseSystemId();
0660: String expandedSystemId = resourceIdentifier
0661: .getExpandedSystemId();
0662: // if no base systemId given, assume that it's relative
0663: // to the systemId of the current scanned entity
0664: // Sometimes the system id is not (properly) expanded.
0665: // We need to expand the system id if:
0666: // a. the expanded one was null; or
0667: // b. the base system id was null, but becomes non-null from the current entity.
0668: boolean needExpand = (expandedSystemId == null);
0669: // REVISIT: why would the baseSystemId ever be null? if we
0670: // didn't have to make this check we wouldn't have to reuse the
0671: // fXMLResourceIdentifier object...
0672: if (baseSystemId == null && fCurrentEntity != null
0673: && fCurrentEntity.entityLocation != null) {
0674: baseSystemId = fCurrentEntity.entityLocation
0675: .getExpandedSystemId();
0676: if (baseSystemId != null)
0677: needExpand = true;
0678: }
0679: if (needExpand)
0680: expandedSystemId = expandSystemId(literalSystemId,
0681: baseSystemId, false);
0682:
0683: // give the entity resolver a chance
0684: XMLInputSource xmlInputSource = null;
0685: if (fEntityResolver != null) {
0686: resourceIdentifier.setBaseSystemId(baseSystemId);
0687: resourceIdentifier.setExpandedSystemId(expandedSystemId);
0688: xmlInputSource = fEntityResolver
0689: .resolveEntity(resourceIdentifier);
0690: }
0691:
0692: // do default resolution
0693: // REVISIT: what's the correct behavior if the user provided an entity
0694: // resolver (fEntityResolver != null), but resolveEntity doesn't return
0695: // an input source (xmlInputSource == null)?
0696: // do we do default resolution, or do we just return null? -SG
0697: if (xmlInputSource == null) {
0698: // REVISIT: when systemId is null, I think we should return null.
0699: // is this the right solution? -SG
0700: //if (systemId != null)
0701: xmlInputSource = new XMLInputSource(publicId,
0702: literalSystemId, baseSystemId);
0703: }
0704:
0705: if (DEBUG_RESOLVER) {
0706: System.err.println("XMLEntityManager.resolveEntity("
0707: + publicId + ")");
0708: System.err.println(" = " + xmlInputSource);
0709: }
0710:
0711: return xmlInputSource;
0712:
0713: } // resolveEntity(XMLResourceIdentifier):XMLInputSource
0714:
0715: /**
0716: * Starts a named entity.
0717: *
0718: * @param entityName The name of the entity to start.
0719: * @param literal True if this entity is started within a literal
0720: * value.
0721: *
0722: * @throws IOException Thrown on i/o error.
0723: * @throws XNIException Thrown by entity handler to signal an error.
0724: */
0725: public void startEntity(String entityName, boolean literal)
0726: throws IOException, XNIException {
0727:
0728: // was entity declared?
0729: Entity entity = (Entity) fEntities.get(entityName);
0730: if (entity == null) {
0731: if (fEntityHandler != null) {
0732: String encoding = null;
0733: fResourceIdentifier.clear();
0734: fEntityAugs.removeAllItems();
0735: fEntityAugs.putItem(Constants.ENTITY_SKIPPED,
0736: Boolean.TRUE);
0737: fEntityHandler.startEntity(entityName,
0738: fResourceIdentifier, encoding, fEntityAugs);
0739: fEntityAugs.removeAllItems();
0740: fEntityAugs.putItem(Constants.ENTITY_SKIPPED,
0741: Boolean.TRUE);
0742: fEntityHandler.endEntity(entityName, fEntityAugs);
0743: }
0744: return;
0745: }
0746:
0747: // should we skip external entities?
0748: boolean external = entity.isExternal();
0749: if (external
0750: && (fValidationManager == null || !fValidationManager
0751: .isCachedDTD())) {
0752: boolean unparsed = entity.isUnparsed();
0753: boolean parameter = entityName.startsWith("%");
0754: boolean general = !parameter;
0755: if (unparsed || (general && !fExternalGeneralEntities)
0756: || (parameter && !fExternalParameterEntities)) {
0757: if (fEntityHandler != null) {
0758: fResourceIdentifier.clear();
0759: final String encoding = null;
0760: ExternalEntity externalEntity = (ExternalEntity) entity;
0761: //REVISIT: since we're storing expandedSystemId in the
0762: // externalEntity, how could this have got here if it wasn't already
0763: // expanded??? - neilg
0764: String extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation
0765: .getLiteralSystemId()
0766: : null);
0767: String extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation
0768: .getBaseSystemId()
0769: : null);
0770: String expandedSystemId = expandSystemId(
0771: extLitSysId, extBaseSysId, false);
0772: fResourceIdentifier
0773: .setValues(
0774: (externalEntity.entityLocation != null ? externalEntity.entityLocation
0775: .getPublicId()
0776: : null), extLitSysId,
0777: extBaseSysId, expandedSystemId);
0778: fEntityAugs.removeAllItems();
0779: fEntityAugs.putItem(Constants.ENTITY_SKIPPED,
0780: Boolean.TRUE);
0781: fEntityHandler.startEntity(entityName,
0782: fResourceIdentifier, encoding, fEntityAugs);
0783: fEntityAugs.removeAllItems();
0784: fEntityAugs.putItem(Constants.ENTITY_SKIPPED,
0785: Boolean.TRUE);
0786: fEntityHandler.endEntity(entityName, fEntityAugs);
0787: }
0788: return;
0789: }
0790: }
0791:
0792: // is entity recursive?
0793: int size = fEntityStack.size();
0794: for (int i = size; i >= 0; i--) {
0795: Entity activeEntity = i == size ? fCurrentEntity
0796: : (Entity) fEntityStack.elementAt(i);
0797: if (activeEntity.name == entityName) {
0798: StringBuffer path = new StringBuffer(entityName);
0799: for (int j = i + 1; j < size; j++) {
0800: activeEntity = (Entity) fEntityStack.elementAt(j);
0801: path.append(" -> ");
0802: path.append(activeEntity.name);
0803: }
0804: path.append(" -> ");
0805: path.append(fCurrentEntity.name);
0806: path.append(" -> ");
0807: path.append(entityName);
0808: fErrorReporter.reportError(
0809: XMLMessageFormatter.XML_DOMAIN,
0810: "RecursiveReference", new Object[] {
0811: entityName, path.toString() },
0812: XMLErrorReporter.SEVERITY_FATAL_ERROR);
0813: if (fEntityHandler != null) {
0814: fResourceIdentifier.clear();
0815: final String encoding = null;
0816: if (external) {
0817: ExternalEntity externalEntity = (ExternalEntity) entity;
0818: // REVISIT: for the same reason above...
0819: String extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation
0820: .getLiteralSystemId()
0821: : null);
0822: String extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation
0823: .getBaseSystemId()
0824: : null);
0825: String expandedSystemId = expandSystemId(
0826: extLitSysId, extBaseSysId, false);
0827: fResourceIdentifier
0828: .setValues(
0829: (externalEntity.entityLocation != null ? externalEntity.entityLocation
0830: .getPublicId()
0831: : null), extLitSysId,
0832: extBaseSysId, expandedSystemId);
0833: }
0834: fEntityAugs.removeAllItems();
0835: fEntityAugs.putItem(Constants.ENTITY_SKIPPED,
0836: Boolean.TRUE);
0837: fEntityHandler.startEntity(entityName,
0838: fResourceIdentifier, encoding, fEntityAugs);
0839: fEntityAugs.removeAllItems();
0840: fEntityAugs.putItem(Constants.ENTITY_SKIPPED,
0841: Boolean.TRUE);
0842: fEntityHandler.endEntity(entityName, fEntityAugs);
0843: }
0844: return;
0845: }
0846: }
0847:
0848: // resolve external entity
0849: XMLInputSource xmlInputSource = null;
0850: if (external) {
0851: ExternalEntity externalEntity = (ExternalEntity) entity;
0852: xmlInputSource = resolveEntity(externalEntity.entityLocation);
0853: }
0854:
0855: // wrap internal entity
0856: else {
0857: InternalEntity internalEntity = (InternalEntity) entity;
0858: Reader reader = new StringReader(internalEntity.text);
0859: xmlInputSource = new XMLInputSource(null, null, null,
0860: reader, null);
0861: }
0862:
0863: // start the entity
0864: startEntity(entityName, xmlInputSource, literal, external);
0865:
0866: } // startEntity(String,boolean)
0867:
0868: /**
0869: * Starts the document entity. The document entity has the "[xml]"
0870: * pseudo-name.
0871: *
0872: * @param xmlInputSource The input source of the document entity.
0873: *
0874: * @throws IOException Thrown on i/o error.
0875: * @throws XNIException Thrown by entity handler to signal an error.
0876: */
0877: public void startDocumentEntity(XMLInputSource xmlInputSource)
0878: throws IOException, XNIException {
0879: startEntity(XMLEntity, xmlInputSource, false, true);
0880: } // startDocumentEntity(XMLInputSource)
0881:
0882: /**
0883: * Starts the DTD entity. The DTD entity has the "[dtd]"
0884: * pseudo-name.
0885: *
0886: * @param xmlInputSource The input source of the DTD entity.
0887: *
0888: * @throws IOException Thrown on i/o error.
0889: * @throws XNIException Thrown by entity handler to signal an error.
0890: */
0891: public void startDTDEntity(XMLInputSource xmlInputSource)
0892: throws IOException, XNIException {
0893: startEntity(DTDEntity, xmlInputSource, false, true);
0894: } // startDTDEntity(XMLInputSource)
0895:
0896: // indicate start of external subset so that
0897: // location of entity decls can be tracked
0898: public void startExternalSubset() {
0899: fInExternalSubset = true;
0900: }
0901:
0902: public void endExternalSubset() {
0903: fInExternalSubset = false;
0904: }
0905:
0906: /**
0907: * Starts an entity.
0908: * <p>
0909: * This method can be used to insert an application defined XML
0910: * entity stream into the parsing stream.
0911: *
0912: * @param name The name of the entity.
0913: * @param xmlInputSource The input source of the entity.
0914: * @param literal True if this entity is started within a
0915: * literal value.
0916: * @param isExternal whether this entity should be treated as an internal or external entity.
0917: *
0918: * @throws IOException Thrown on i/o error.
0919: * @throws XNIException Thrown by entity handler to signal an error.
0920: */
0921: public void startEntity(String name, XMLInputSource xmlInputSource,
0922: boolean literal, boolean isExternal) throws IOException,
0923: XNIException {
0924:
0925: String encoding = setupCurrentEntity(name, xmlInputSource,
0926: literal, isExternal);
0927:
0928: //when entity expansion limit is set by the Application, we need to
0929: //check for the entity expansion limit set by the parser, if number of entity
0930: //expansions exceeds the entity expansion limit, parser will throw fatal error.
0931: // Note that this is intentionally unbalanced; it counts
0932: // the number of expansions *per document*.
0933: if (fSecurityManager != null
0934: && fEntityExpansionCount++ > fEntityExpansionLimit) {
0935: fErrorReporter
0936: .reportError(XMLMessageFormatter.XML_DOMAIN,
0937: "EntityExpansionLimitExceeded",
0938: new Object[] { new Integer(
0939: fEntityExpansionLimit) },
0940: XMLErrorReporter.SEVERITY_FATAL_ERROR);
0941: // is there anything better to do than reset the counter?
0942: // at least one can envision debugging applications where this might
0943: // be useful...
0944: fEntityExpansionCount = 0;
0945: }
0946:
0947: // call handler
0948: if (fEntityHandler != null) {
0949: fEntityHandler.startEntity(name, fResourceIdentifier,
0950: encoding, null);
0951: }
0952:
0953: } // startEntity(String,XMLInputSource)
0954:
0955: /**
0956: * This method uses the passed-in XMLInputSource to make
0957: * fCurrentEntity usable for reading.
0958: * @param name name of the entity (XML is it's the document entity)
0959: * @param xmlInputSource the input source, with sufficient information
0960: * to begin scanning characters.
0961: * @param literal True if this entity is started within a
0962: * literal value.
0963: * @param isExternal whether this entity should be treated as an internal or external entity.
0964: * @throws IOException if anything can't be read
0965: * XNIException If any parser-specific goes wrong.
0966: * @return the encoding of the new entity or null if a character stream was employed
0967: */
0968: public String setupCurrentEntity(String name,
0969: XMLInputSource xmlInputSource, boolean literal,
0970: boolean isExternal) throws IOException, XNIException {
0971: // get information
0972:
0973: final String publicId = xmlInputSource.getPublicId();
0974: String literalSystemId = xmlInputSource.getSystemId();
0975: String baseSystemId = xmlInputSource.getBaseSystemId();
0976: String encoding = xmlInputSource.getEncoding();
0977: final boolean encodingExternallySpecified = (encoding != null);
0978: Boolean isBigEndian = null;
0979: fTempByteBuffer = null;
0980:
0981: // create reader
0982: InputStream stream = null;
0983: Reader reader = xmlInputSource.getCharacterStream();
0984: // First chance checking strict URI
0985: String expandedSystemId = expandSystemId(literalSystemId,
0986: baseSystemId, fStrictURI);
0987: if (baseSystemId == null) {
0988: baseSystemId = expandedSystemId;
0989: }
0990: if (reader == null) {
0991: stream = xmlInputSource.getByteStream();
0992: if (stream == null) {
0993: URL location = new URL(expandedSystemId);
0994: URLConnection connect = location.openConnection();
0995: if (!(connect instanceof HttpURLConnection)) {
0996: stream = connect.getInputStream();
0997: } else {
0998: boolean followRedirects = true;
0999:
1000: // setup URLConnection if we have an HTTPInputSource
1001: if (xmlInputSource instanceof HTTPInputSource) {
1002: final HttpURLConnection urlConnection = (HttpURLConnection) connect;
1003: final HTTPInputSource httpInputSource = (HTTPInputSource) xmlInputSource;
1004:
1005: // set request properties
1006: Iterator propIter = httpInputSource
1007: .getHTTPRequestProperties();
1008: while (propIter.hasNext()) {
1009: Map.Entry entry = (Map.Entry) propIter
1010: .next();
1011: urlConnection.setRequestProperty(
1012: (String) entry.getKey(),
1013: (String) entry.getValue());
1014: }
1015:
1016: // set preference for redirection
1017: followRedirects = httpInputSource
1018: .getFollowHTTPRedirects();
1019: if (!followRedirects) {
1020: setInstanceFollowRedirects(urlConnection,
1021: followRedirects);
1022: }
1023: }
1024:
1025: stream = connect.getInputStream();
1026:
1027: // REVISIT: If the URLConnection has external encoding
1028: // information, we should be reading it here. It's located
1029: // in the charset parameter of Content-Type. -- mrglavas
1030:
1031: if (followRedirects) {
1032: String redirect = connect.getURL().toString();
1033: // E43: Check if the URL was redirected, and then
1034: // update literal and expanded system IDs if needed.
1035: if (!redirect.equals(expandedSystemId)) {
1036: literalSystemId = redirect;
1037: expandedSystemId = redirect;
1038: }
1039: }
1040: }
1041: }
1042: // wrap this stream in RewindableInputStream
1043: stream = new RewindableInputStream(stream);
1044:
1045: // perform auto-detect of encoding if necessary
1046: if (encoding == null) {
1047: // read first four bytes and determine encoding
1048: final byte[] b4 = new byte[4];
1049: int count = 0;
1050: for (; count < 4; count++) {
1051: b4[count] = (byte) stream.read();
1052: }
1053: if (count == 4) {
1054: Object[] encodingDesc = getEncodingName(b4, count);
1055: encoding = (String) (encodingDesc[0]);
1056: isBigEndian = (Boolean) (encodingDesc[1]);
1057:
1058: stream.reset();
1059: // Special case UTF-8 files with BOM created by Microsoft
1060: // tools. It's more efficient to consume the BOM than make
1061: // the reader perform extra checks. -Ac
1062: if (count > 2 && encoding.equals("UTF-8")) {
1063: int b0 = b4[0] & 0xFF;
1064: int b1 = b4[1] & 0xFF;
1065: int b2 = b4[2] & 0xFF;
1066: if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
1067: // ignore first three bytes...
1068: stream.skip(3);
1069: }
1070: }
1071: reader = createReader(stream, encoding, isBigEndian);
1072: } else {
1073: reader = createReader(stream, encoding, isBigEndian);
1074: }
1075: }
1076:
1077: // use specified encoding
1078: else {
1079: encoding = encoding.toUpperCase(Locale.ENGLISH);
1080:
1081: // If encoding is UTF-8, consume BOM if one is present.
1082: if (encoding.equals("UTF-8")) {
1083: final int[] b3 = new int[3];
1084: int count = 0;
1085: for (; count < 3; ++count) {
1086: b3[count] = stream.read();
1087: if (b3[count] == -1)
1088: break;
1089: }
1090: if (count == 3) {
1091: if (b3[0] != 0xEF || b3[1] != 0xBB
1092: || b3[2] != 0xBF) {
1093: // First three bytes are not BOM, so reset.
1094: stream.reset();
1095: }
1096: } else {
1097: stream.reset();
1098: }
1099: reader = createReader(stream, encoding, isBigEndian);
1100: }
1101: // If encoding is UTF-16, we still need to read the first four bytes
1102: // in order to discover the byte order.
1103: else if (encoding.equals("UTF-16")) {
1104: final int[] b4 = new int[4];
1105: int count = 0;
1106: for (; count < 4; ++count) {
1107: b4[count] = stream.read();
1108: if (b4[count] == -1)
1109: break;
1110: }
1111: stream.reset();
1112:
1113: String utf16Encoding = "UTF-16";
1114: if (count >= 2) {
1115: final int b0 = b4[0];
1116: final int b1 = b4[1];
1117: if (b0 == 0xFE && b1 == 0xFF) {
1118: // UTF-16, big-endian
1119: utf16Encoding = "UTF-16BE";
1120: isBigEndian = Boolean.TRUE;
1121: } else if (b0 == 0xFF && b1 == 0xFE) {
1122: // UTF-16, little-endian
1123: utf16Encoding = "UTF-16LE";
1124: isBigEndian = Boolean.FALSE;
1125: } else if (count == 4) {
1126: final int b2 = b4[2];
1127: final int b3 = b4[3];
1128: if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00
1129: && b3 == 0x3F) {
1130: // UTF-16, big-endian, no BOM
1131: utf16Encoding = "UTF-16BE";
1132: isBigEndian = Boolean.TRUE;
1133: }
1134: if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F
1135: && b3 == 0x00) {
1136: // UTF-16, little-endian, no BOM
1137: utf16Encoding = "UTF-16LE";
1138: isBigEndian = Boolean.FALSE;
1139: }
1140: }
1141: }
1142: reader = createReader(stream, utf16Encoding,
1143: isBigEndian);
1144: }
1145: // If encoding is UCS-4, we still need to read the first four bytes
1146: // in order to discover the byte order.
1147: else if (encoding.equals("ISO-10646-UCS-4")) {
1148: final int[] b4 = new int[4];
1149: int count = 0;
1150: for (; count < 4; ++count) {
1151: b4[count] = stream.read();
1152: if (b4[count] == -1)
1153: break;
1154: }
1155: stream.reset();
1156:
1157: // Ignore unusual octet order for now.
1158: if (count == 4) {
1159: // UCS-4, big endian (1234)
1160: if (b4[0] == 0x00 && b4[1] == 0x00
1161: && b4[2] == 0x00 && b4[3] == 0x3C) {
1162: isBigEndian = Boolean.TRUE;
1163: }
1164: // UCS-4, little endian (1234)
1165: else if (b4[0] == 0x3C && b4[1] == 0x00
1166: && b4[2] == 0x00 && b4[3] == 0x00) {
1167: isBigEndian = Boolean.FALSE;
1168: }
1169: }
1170: reader = createReader(stream, encoding, isBigEndian);
1171: }
1172: // If encoding is UCS-2, we still need to read the first four bytes
1173: // in order to discover the byte order.
1174: else if (encoding.equals("ISO-10646-UCS-2")) {
1175: final int[] b4 = new int[4];
1176: int count = 0;
1177: for (; count < 4; ++count) {
1178: b4[count] = stream.read();
1179: if (b4[count] == -1)
1180: break;
1181: }
1182: stream.reset();
1183:
1184: if (count == 4) {
1185: // UCS-2, big endian
1186: if (b4[0] == 0x00 && b4[1] == 0x3C
1187: && b4[2] == 0x00 && b4[3] == 0x3F) {
1188: isBigEndian = Boolean.TRUE;
1189: }
1190: // UCS-2, little endian
1191: else if (b4[0] == 0x3C && b4[1] == 0x00
1192: && b4[2] == 0x3F && b4[3] == 0x00) {
1193: isBigEndian = Boolean.FALSE;
1194: }
1195: }
1196: reader = createReader(stream, encoding, isBigEndian);
1197: } else {
1198: reader = createReader(stream, encoding, isBigEndian);
1199: }
1200: }
1201:
1202: // read one character at a time so we don't jump too far
1203: // ahead, converting characters from the byte stream in
1204: // the wrong encoding
1205: if (DEBUG_ENCODINGS) {
1206: System.out
1207: .println("$$$ no longer wrapping reader in OneCharReader");
1208: }
1209: //reader = new OneCharReader(reader);
1210: }
1211:
1212: // We've seen a new Reader.
1213: // Push it on the stack so we can close it later.
1214: fReaderStack.push(reader);
1215:
1216: // push entity on stack
1217: if (fCurrentEntity != null) {
1218: fEntityStack.push(fCurrentEntity);
1219: }
1220:
1221: // create entity
1222: fCurrentEntity = new ScannedEntity(
1223: name,
1224: new XMLResourceIdentifierImpl(publicId,
1225: literalSystemId, baseSystemId, expandedSystemId),
1226: stream, reader, fTempByteBuffer, encoding, literal,
1227: false, isExternal);
1228: fCurrentEntity
1229: .setEncodingExternallySpecified(encodingExternallySpecified);
1230: fEntityScanner.setCurrentEntity(fCurrentEntity);
1231: fResourceIdentifier.setValues(publicId, literalSystemId,
1232: baseSystemId, expandedSystemId);
1233: return encoding;
1234: } //setupCurrentEntity(String, XMLInputSource, boolean, boolean): String
1235:
1236: // set version of scanner to use
1237: public void setScannerVersion(short version) {
1238: if (version == Constants.XML_VERSION_1_0) {
1239: if (fXML10EntityScanner == null) {
1240: fXML10EntityScanner = new XMLEntityScanner();
1241: }
1242: fXML10EntityScanner.reset(fSymbolTable, this ,
1243: fErrorReporter);
1244: fEntityScanner = fXML10EntityScanner;
1245: fEntityScanner.setCurrentEntity(fCurrentEntity);
1246: } else {
1247: if (fXML11EntityScanner == null) {
1248: fXML11EntityScanner = new XML11EntityScanner();
1249: }
1250: fXML11EntityScanner.reset(fSymbolTable, this ,
1251: fErrorReporter);
1252: fEntityScanner = fXML11EntityScanner;
1253: fEntityScanner.setCurrentEntity(fCurrentEntity);
1254: }
1255: } // setScannerVersion(short)
1256:
1257: /** Returns the entity scanner. */
1258: public XMLEntityScanner getEntityScanner() {
1259: if (fEntityScanner == null) {
1260: // default to 1.0
1261: if (fXML10EntityScanner == null) {
1262: fXML10EntityScanner = new XMLEntityScanner();
1263: }
1264: fXML10EntityScanner.reset(fSymbolTable, this ,
1265: fErrorReporter);
1266: fEntityScanner = fXML10EntityScanner;
1267: }
1268: return fEntityScanner;
1269: } // getEntityScanner():XMLEntityScanner
1270:
1271: // A stack containing all the open readers
1272: protected Stack fReaderStack = new Stack();
1273:
1274: /**
1275: * Close all opened InputStreams and Readers opened by this parser.
1276: */
1277: public void closeReaders() {
1278: // close all readers
1279: for (int i = fReaderStack.size() - 1; i >= 0; i--) {
1280: try {
1281: ((Reader) fReaderStack.pop()).close();
1282: } catch (IOException e) {
1283: // ignore
1284: }
1285: }
1286: }
1287:
1288: //
1289: // XMLComponent methods
1290: //
1291:
1292: /**
1293: * Resets the component. The component can query the component manager
1294: * about any features and properties that affect the operation of the
1295: * component.
1296: *
1297: * @param componentManager The component manager.
1298: *
1299: * @throws SAXException Thrown by component on initialization error.
1300: * For example, if a feature or property is
1301: * required for the operation of the component, the
1302: * component manager may throw a
1303: * SAXNotRecognizedException or a
1304: * SAXNotSupportedException.
1305: */
1306: public void reset(XMLComponentManager componentManager)
1307: throws XMLConfigurationException {
1308:
1309: boolean parser_settings;
1310: try {
1311: parser_settings = componentManager
1312: .getFeature(PARSER_SETTINGS);
1313: } catch (XMLConfigurationException e) {
1314: parser_settings = true;
1315: }
1316:
1317: if (!parser_settings) {
1318: // parser settings have not been changed
1319: reset();
1320: return;
1321: }
1322:
1323: // sax features
1324: try {
1325: fValidation = componentManager.getFeature(VALIDATION);
1326: } catch (XMLConfigurationException e) {
1327: fValidation = false;
1328: }
1329: try {
1330: fExternalGeneralEntities = componentManager
1331: .getFeature(EXTERNAL_GENERAL_ENTITIES);
1332: } catch (XMLConfigurationException e) {
1333: fExternalGeneralEntities = true;
1334: }
1335: try {
1336: fExternalParameterEntities = componentManager
1337: .getFeature(EXTERNAL_PARAMETER_ENTITIES);
1338: } catch (XMLConfigurationException e) {
1339: fExternalParameterEntities = true;
1340: }
1341:
1342: // xerces features
1343: try {
1344: fAllowJavaEncodings = componentManager
1345: .getFeature(ALLOW_JAVA_ENCODINGS);
1346: } catch (XMLConfigurationException e) {
1347: fAllowJavaEncodings = false;
1348: }
1349:
1350: try {
1351: fWarnDuplicateEntityDef = componentManager
1352: .getFeature(WARN_ON_DUPLICATE_ENTITYDEF);
1353: } catch (XMLConfigurationException e) {
1354: fWarnDuplicateEntityDef = false;
1355: }
1356:
1357: try {
1358: fStrictURI = componentManager
1359: .getFeature(STANDARD_URI_CONFORMANT);
1360: } catch (XMLConfigurationException e) {
1361: fStrictURI = false;
1362: }
1363:
1364: // xerces properties
1365: fSymbolTable = (SymbolTable) componentManager
1366: .getProperty(SYMBOL_TABLE);
1367: fErrorReporter = (XMLErrorReporter) componentManager
1368: .getProperty(ERROR_REPORTER);
1369: try {
1370: fEntityResolver = (XMLEntityResolver) componentManager
1371: .getProperty(ENTITY_RESOLVER);
1372: } catch (XMLConfigurationException e) {
1373: fEntityResolver = null;
1374: }
1375: try {
1376: fValidationManager = (ValidationManager) componentManager
1377: .getProperty(VALIDATION_MANAGER);
1378: } catch (XMLConfigurationException e) {
1379: fValidationManager = null;
1380: }
1381: try {
1382: fSecurityManager = (SecurityManager) componentManager
1383: .getProperty(SECURITY_MANAGER);
1384: } catch (XMLConfigurationException e) {
1385: fSecurityManager = null;
1386: }
1387:
1388: // reset general state
1389: reset();
1390:
1391: } // reset(XMLComponentManager)
1392:
1393: // reset general state. Should not be called other than by
1394: // a class acting as a component manager but not
1395: // implementing that interface for whatever reason.
1396: public void reset() {
1397: fEntityExpansionLimit = (fSecurityManager != null) ? fSecurityManager
1398: .getEntityExpansionLimit()
1399: : 0;
1400:
1401: // initialize state
1402: fStandalone = false;
1403: fHasPEReferences = false;
1404: fEntities.clear();
1405: fEntityStack.removeAllElements();
1406: fEntityExpansionCount = 0;
1407:
1408: fCurrentEntity = null;
1409: // reset scanner
1410: if (fXML10EntityScanner != null) {
1411: fXML10EntityScanner.reset(fSymbolTable, this ,
1412: fErrorReporter);
1413: }
1414: if (fXML11EntityScanner != null) {
1415: fXML11EntityScanner.reset(fSymbolTable, this ,
1416: fErrorReporter);
1417: }
1418:
1419: // DEBUG
1420: if (DEBUG_ENTITIES) {
1421: addInternalEntity("text", "Hello, World.");
1422: addInternalEntity("empty-element", "<foo/>");
1423: addInternalEntity("balanced-element", "<foo></foo>");
1424: addInternalEntity("balanced-element-with-text",
1425: "<foo>Hello, World</foo>");
1426: addInternalEntity("balanced-element-with-entity",
1427: "<foo>&text;</foo>");
1428: addInternalEntity("unbalanced-entity", "<foo>");
1429: addInternalEntity("recursive-entity",
1430: "<foo>&recursive-entity2;</foo>");
1431: addInternalEntity("recursive-entity2",
1432: "<bar>&recursive-entity3;</bar>");
1433: addInternalEntity("recursive-entity3",
1434: "<baz>&recursive-entity;</baz>");
1435: try {
1436: addExternalEntity("external-text", null,
1437: "external-text.ent", "test/external-text.xml");
1438: addExternalEntity("external-balanced-element", null,
1439: "external-balanced-element.ent",
1440: "test/external-balanced-element.xml");
1441: addExternalEntity("one", null, "ent/one.ent",
1442: "test/external-entity.xml");
1443: addExternalEntity("two", null, "ent/two.ent",
1444: "test/ent/one.xml");
1445: } catch (IOException ex) {
1446: // should never happen
1447: }
1448: }
1449:
1450: // copy declared entities
1451: if (fDeclaredEntities != null) {
1452: java.util.Enumeration keys = fDeclaredEntities.keys();
1453: while (keys.hasMoreElements()) {
1454: Object key = keys.nextElement();
1455: Object value = fDeclaredEntities.get(key);
1456: fEntities.put(key, value);
1457: }
1458: }
1459: fEntityHandler = null;
1460:
1461: } // reset(XMLComponentManager)
1462:
1463: /**
1464: * Returns a list of feature identifiers that are recognized by
1465: * this component. This method may return null if no features
1466: * are recognized by this component.
1467: */
1468: public String[] getRecognizedFeatures() {
1469: return (String[]) (RECOGNIZED_FEATURES.clone());
1470: } // getRecognizedFeatures():String[]
1471:
1472: /**
1473: * Sets the state of a feature. This method is called by the component
1474: * manager any time after reset when a feature changes state.
1475: * <p>
1476: * <strong>Note:</strong> Components should silently ignore features
1477: * that do not affect the operation of the component.
1478: *
1479: * @param featureId The feature identifier.
1480: * @param state The state of the feature.
1481: *
1482: * @throws SAXNotRecognizedException The component should not throw
1483: * this exception.
1484: * @throws SAXNotSupportedException The component should not throw
1485: * this exception.
1486: */
1487: public void setFeature(String featureId, boolean state)
1488: throws XMLConfigurationException {
1489:
1490: // xerces features
1491: if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
1492: final int suffixLength = featureId.length()
1493: - Constants.XERCES_FEATURE_PREFIX.length();
1494: if (suffixLength == Constants.ALLOW_JAVA_ENCODINGS_FEATURE
1495: .length()
1496: && featureId
1497: .endsWith(Constants.ALLOW_JAVA_ENCODINGS_FEATURE)) {
1498: fAllowJavaEncodings = state;
1499: }
1500: }
1501:
1502: } // setFeature(String,boolean)
1503:
1504: /**
1505: * Returns a list of property identifiers that are recognized by
1506: * this component. This method may return null if no properties
1507: * are recognized by this component.
1508: */
1509: public String[] getRecognizedProperties() {
1510: return (String[]) (RECOGNIZED_PROPERTIES.clone());
1511: } // getRecognizedProperties():String[]
1512:
1513: /**
1514: * Sets the value of a property. This method is called by the component
1515: * manager any time after reset when a property changes value.
1516: * <p>
1517: * <strong>Note:</strong> Components should silently ignore properties
1518: * that do not affect the operation of the component.
1519: *
1520: * @param propertyId The property identifier.
1521: * @param value The value of the property.
1522: *
1523: * @throws SAXNotRecognizedException The component should not throw
1524: * this exception.
1525: * @throws SAXNotSupportedException The component should not throw
1526: * this exception.
1527: */
1528: public void setProperty(String propertyId, Object value)
1529: throws XMLConfigurationException {
1530:
1531: // Xerces properties
1532: if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
1533: final int suffixLength = propertyId.length()
1534: - Constants.XERCES_PROPERTY_PREFIX.length();
1535:
1536: if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY
1537: .length()
1538: && propertyId
1539: .endsWith(Constants.SYMBOL_TABLE_PROPERTY)) {
1540: fSymbolTable = (SymbolTable) value;
1541: return;
1542: }
1543: if (suffixLength == Constants.ERROR_REPORTER_PROPERTY
1544: .length()
1545: && propertyId
1546: .endsWith(Constants.ERROR_REPORTER_PROPERTY)) {
1547: fErrorReporter = (XMLErrorReporter) value;
1548: return;
1549: }
1550: if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY
1551: .length()
1552: && propertyId
1553: .endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) {
1554: fEntityResolver = (XMLEntityResolver) value;
1555: return;
1556: }
1557: if (suffixLength == Constants.BUFFER_SIZE_PROPERTY.length()
1558: && propertyId
1559: .endsWith(Constants.BUFFER_SIZE_PROPERTY)) {
1560: Integer bufferSize = (Integer) value;
1561: if (bufferSize != null
1562: && bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) {
1563: fBufferSize = bufferSize.intValue();
1564: fEntityScanner.setBufferSize(fBufferSize);
1565: fByteBufferPool.setBufferSize(fBufferSize);
1566: fCharacterBufferPool
1567: .setExternalBufferSize(fBufferSize);
1568: }
1569: }
1570: if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY
1571: .length()
1572: && propertyId
1573: .endsWith(Constants.SECURITY_MANAGER_PROPERTY)) {
1574: fSecurityManager = (SecurityManager) value;
1575: fEntityExpansionLimit = (fSecurityManager != null) ? fSecurityManager
1576: .getEntityExpansionLimit()
1577: : 0;
1578: }
1579: }
1580:
1581: } // setProperty(String,Object)
1582:
1583: /**
1584: * Returns the default state for a feature, or null if this
1585: * component does not want to report a default value for this
1586: * feature.
1587: *
1588: * @param featureId The feature identifier.
1589: *
1590: * @since Xerces 2.2.0
1591: */
1592: public Boolean getFeatureDefault(String featureId) {
1593: for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
1594: if (RECOGNIZED_FEATURES[i].equals(featureId)) {
1595: return FEATURE_DEFAULTS[i];
1596: }
1597: }
1598: return null;
1599: } // getFeatureDefault(String):Boolean
1600:
1601: /**
1602: * Returns the default state for a property, or null if this
1603: * component does not want to report a default value for this
1604: * property.
1605: *
1606: * @param propertyId The property identifier.
1607: *
1608: * @since Xerces 2.2.0
1609: */
1610: public Object getPropertyDefault(String propertyId) {
1611: for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
1612: if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
1613: return PROPERTY_DEFAULTS[i];
1614: }
1615: }
1616: return null;
1617: } // getPropertyDefault(String):Object
1618:
1619: //
1620: // Public static methods
1621: //
1622:
1623: // current value of the "user.dir" property
1624: private static String gUserDir;
1625: // cached URI object for the current value of the escaped "user.dir" property stored as a URI
1626: private static URI gUserDirURI;
1627: // which ASCII characters need to be escaped
1628: private static final boolean gNeedEscaping[] = new boolean[128];
1629: // the first hex character if a character needs to be escaped
1630: private static final char gAfterEscaping1[] = new char[128];
1631: // the second hex character if a character needs to be escaped
1632: private static final char gAfterEscaping2[] = new char[128];
1633: private static final char[] gHexChs = { '0', '1', '2', '3', '4',
1634: '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
1635: // initialize the above 3 arrays
1636: static {
1637: for (int i = 0; i <= 0x1f; i++) {
1638: gNeedEscaping[i] = true;
1639: gAfterEscaping1[i] = gHexChs[i >> 4];
1640: gAfterEscaping2[i] = gHexChs[i & 0xf];
1641: }
1642: gNeedEscaping[0x7f] = true;
1643: gAfterEscaping1[0x7f] = '7';
1644: gAfterEscaping2[0x7f] = 'F';
1645: char[] escChs = { ' ', '<', '>', '#', '%', '"', '{', '}', '|',
1646: '\\', '^', '~', '[', ']', '`' };
1647: int len = escChs.length;
1648: char ch;
1649: for (int i = 0; i < len; i++) {
1650: ch = escChs[i];
1651: gNeedEscaping[ch] = true;
1652: gAfterEscaping1[ch] = gHexChs[ch >> 4];
1653: gAfterEscaping2[ch] = gHexChs[ch & 0xf];
1654: }
1655: }
1656:
1657: private static PrivilegedAction GET_USER_DIR_SYSTEM_PROPERTY = new PrivilegedAction() {
1658: public Object run() {
1659: return System.getProperty("user.dir");
1660: }
1661: };
1662:
1663: // To escape the "user.dir" system property, by using %HH to represent
1664: // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%'
1665: // and '"'. It's a static method, so needs to be synchronized.
1666: // this method looks heavy, but since the system property isn't expected
1667: // to change often, so in most cases, we only need to return the URI
1668: // that was escaped before.
1669: // According to the URI spec, non-ASCII characters (whose value >= 128)
1670: // need to be escaped too.
1671: // REVISIT: don't know how to escape non-ASCII characters, especially
1672: // which encoding to use. Leave them for now.
1673: private static synchronized URI getUserDir()
1674: throws URI.MalformedURIException {
1675: // get the user.dir property
1676: String userDir = "";
1677: try {
1678: userDir = (String) AccessController
1679: .doPrivileged(GET_USER_DIR_SYSTEM_PROPERTY);
1680: } catch (SecurityException se) {
1681: }
1682:
1683: // return empty string if property value is empty string.
1684: if (userDir.length() == 0)
1685: return new URI("file", "", "", null, null);
1686:
1687: // compute the new escaped value if the new property value doesn't
1688: // match the previous one
1689: if (gUserDirURI != null && userDir.equals(gUserDir)) {
1690: return gUserDirURI;
1691: }
1692:
1693: // record the new value as the global property value
1694: gUserDir = userDir;
1695:
1696: char separator = java.io.File.separatorChar;
1697: userDir = userDir.replace(separator, '/');
1698:
1699: int len = userDir.length(), ch;
1700: StringBuffer buffer = new StringBuffer(len * 3);
1701: // change C:/blah to /C:/blah
1702: if (len >= 2 && userDir.charAt(1) == ':') {
1703: ch = Character.toUpperCase(userDir.charAt(0));
1704: if (ch >= 'A' && ch <= 'Z') {
1705: buffer.append('/');
1706: }
1707: }
1708:
1709: // for each character in the path
1710: int i = 0;
1711: for (; i < len; i++) {
1712: ch = userDir.charAt(i);
1713: // if it's not an ASCII character, break here, and use UTF-8 encoding
1714: if (ch >= 128)
1715: break;
1716: if (gNeedEscaping[ch]) {
1717: buffer.append('%');
1718: buffer.append(gAfterEscaping1[ch]);
1719: buffer.append(gAfterEscaping2[ch]);
1720: // record the fact that it's escaped
1721: } else {
1722: buffer.append((char) ch);
1723: }
1724: }
1725:
1726: // we saw some non-ascii character
1727: if (i < len) {
1728: // get UTF-8 bytes for the remaining sub-string
1729: byte[] bytes = null;
1730: byte b;
1731: try {
1732: bytes = userDir.substring(i).getBytes("UTF-8");
1733: } catch (java.io.UnsupportedEncodingException e) {
1734: // should never happen
1735: return new URI("file", "", userDir, null, null);
1736: }
1737: len = bytes.length;
1738:
1739: // for each byte
1740: for (i = 0; i < len; i++) {
1741: b = bytes[i];
1742: // for non-ascii character: make it positive, then escape
1743: if (b < 0) {
1744: ch = b + 256;
1745: buffer.append('%');
1746: buffer.append(gHexChs[ch >> 4]);
1747: buffer.append(gHexChs[ch & 0xf]);
1748: } else if (gNeedEscaping[b]) {
1749: buffer.append('%');
1750: buffer.append(gAfterEscaping1[b]);
1751: buffer.append(gAfterEscaping2[b]);
1752: } else {
1753: buffer.append((char) b);
1754: }
1755: }
1756: }
1757:
1758: // change blah/blah to blah/blah/
1759: if (!userDir.endsWith("/"))
1760: buffer.append('/');
1761:
1762: gUserDirURI = new URI("file", "", buffer.toString(), null, null);
1763:
1764: return gUserDirURI;
1765: }
1766:
1767: /**
1768: * Absolutizes a URI using the current value
1769: * of the "user.dir" property as the base URI. If
1770: * the URI is already absolute, this is a no-op.
1771: *
1772: * @param uri the URI to absolutize
1773: */
1774: public static void absolutizeAgainstUserDir(URI uri)
1775: throws URI.MalformedURIException {
1776: uri.absolutize(getUserDir());
1777: }
1778:
1779: /**
1780: * Expands a system id and returns the system id as a URI, if
1781: * it can be expanded. A return value of null means that the
1782: * identifier is already expanded. An exception thrown
1783: * indicates a failure to expand the id.
1784: *
1785: * @param systemId The systemId to be expanded.
1786: *
1787: * @return Returns the URI string representing the expanded system
1788: * identifier. A null value indicates that the given
1789: * system identifier is already expanded.
1790: *
1791: */
1792: public static String expandSystemId(String systemId,
1793: String baseSystemId, boolean strict)
1794: throws URI.MalformedURIException {
1795:
1796: // check if there is a system id before
1797: // trying to expand it.
1798: if (systemId == null) {
1799: return null;
1800: }
1801:
1802: // system id has to be a valid URI
1803: if (strict) {
1804: return expandSystemIdStrictOn(systemId, baseSystemId);
1805: }
1806:
1807: // Assume the URIs are well-formed. If it turns out they're not, try fixing them up.
1808: try {
1809: return expandSystemIdStrictOff(systemId, baseSystemId);
1810: } catch (URI.MalformedURIException e) {
1811: // continue on...
1812: }
1813:
1814: // check for bad parameters id
1815: if (systemId.length() == 0) {
1816: return systemId;
1817: }
1818:
1819: // normalize id
1820: String id = fixURI(systemId);
1821:
1822: // normalize base
1823: URI base = null;
1824: URI uri = null;
1825: try {
1826: if (baseSystemId == null || baseSystemId.length() == 0
1827: || baseSystemId.equals(systemId)) {
1828: base = getUserDir();
1829: } else {
1830: try {
1831: base = new URI(fixURI(baseSystemId).trim());
1832: } catch (URI.MalformedURIException e) {
1833: if (baseSystemId.indexOf(':') != -1) {
1834: // for xml schemas we might have baseURI with
1835: // a specified drive
1836: base = new URI("file", "", fixURI(baseSystemId)
1837: .trim(), null, null);
1838: } else {
1839: base = new URI(getUserDir(),
1840: fixURI(baseSystemId));
1841: }
1842: }
1843: }
1844: // expand id
1845: uri = new URI(base, id.trim());
1846: } catch (Exception e) {
1847: // let it go through
1848:
1849: }
1850:
1851: if (uri == null) {
1852: return systemId;
1853: }
1854: return uri.toString();
1855:
1856: } // expandSystemId(String,String,boolean):String
1857:
1858: /**
1859: * Helper method for expandSystemId(String,String,boolean):String
1860: */
1861: private static String expandSystemIdStrictOn(String systemId,
1862: String baseSystemId) throws URI.MalformedURIException {
1863:
1864: URI systemURI = new URI(systemId, true);
1865: // If it's already an absolute one, return it
1866: if (systemURI.isAbsoluteURI()) {
1867: return systemId;
1868: }
1869:
1870: // If there isn't a base URI, use the working directory
1871: URI baseURI = null;
1872: if (baseSystemId == null || baseSystemId.length() == 0) {
1873: baseURI = getUserDir();
1874: } else {
1875: baseURI = new URI(baseSystemId, true);
1876: if (!baseURI.isAbsoluteURI()) {
1877: // assume "base" is also a relative uri
1878: baseURI.absolutize(getUserDir());
1879: }
1880: }
1881:
1882: // absolutize the system identifier using the base URI
1883: systemURI.absolutize(baseURI);
1884:
1885: // return the string rep of the new uri (an absolute one)
1886: return systemURI.toString();
1887:
1888: // if any exception is thrown, it'll get thrown to the caller.
1889:
1890: } // expandSystemIdStrictOn(String,String):String
1891:
1892: /**
1893: * Helper method for expandSystemId(String,String,boolean):String
1894: */
1895: private static String expandSystemIdStrictOff(String systemId,
1896: String baseSystemId) throws URI.MalformedURIException {
1897:
1898: URI systemURI = new URI(systemId, true);
1899: // If it's already an absolute one, return it
1900: if (systemURI.isAbsoluteURI()) {
1901: if (systemURI.getScheme().length() > 1) {
1902: return systemId;
1903: }
1904: /**
1905: * If the scheme's length is only one character,
1906: * it's likely that this was intended as a file
1907: * path. Fixing this up in expandSystemId to
1908: * maintain backwards compatibility.
1909: */
1910: throw new URI.MalformedURIException();
1911: }
1912:
1913: // If there isn't a base URI, use the working directory
1914: URI baseURI = null;
1915: if (baseSystemId == null || baseSystemId.length() == 0) {
1916: baseURI = getUserDir();
1917: } else {
1918: baseURI = new URI(baseSystemId, true);
1919: if (!baseURI.isAbsoluteURI()) {
1920: // assume "base" is also a relative uri
1921: baseURI.absolutize(getUserDir());
1922: }
1923: }
1924:
1925: // absolutize the system identifier using the base URI
1926: systemURI.absolutize(baseURI);
1927:
1928: // return the string rep of the new uri (an absolute one)
1929: return systemURI.toString();
1930:
1931: // if any exception is thrown, it'll get thrown to the caller.
1932:
1933: } // expandSystemIdStrictOff(String,String):String
1934:
1935: /**
1936: * Attempt to set whether redirects will be followed for an <code>HttpURLConnection</code>.
1937: * This may fail on earlier JDKs which do not support setting this preference.
1938: */
1939: public static void setInstanceFollowRedirects(
1940: HttpURLConnection urlCon, boolean followRedirects) {
1941: try {
1942: Method method = HttpURLConnection.class.getMethod(
1943: "setInstanceFollowRedirects",
1944: new Class[] { Boolean.TYPE });
1945: method.invoke(urlCon,
1946: new Object[] { followRedirects ? Boolean.TRUE
1947: : Boolean.FALSE });
1948: }
1949: // setInstanceFollowRedirects doesn't exist.
1950: catch (Exception exc) {
1951: }
1952: }
1953:
1954: public static OutputStream createOutputStream(String uri)
1955: throws IOException {
1956: // URI was specified. Handle relative URIs.
1957: String expanded = XMLEntityManager.expandSystemId(uri, null,
1958: true);
1959: URL url = new URL(expanded != null ? expanded : uri);
1960: OutputStream out = null;
1961: String protocol = url.getProtocol();
1962: String host = url.getHost();
1963: // Use FileOutputStream if this URI is for a local file.
1964: if (protocol.equals("file")
1965: && (host == null || host.length() == 0 || host
1966: .equals("localhost"))) {
1967: out = new FileOutputStream(getPathWithoutEscapes(url
1968: .getPath()));
1969: }
1970: // Try to write to some other kind of URI. Some protocols
1971: // won't support this, though HTTP should work.
1972: else {
1973: URLConnection urlCon = url.openConnection();
1974: urlCon.setDoInput(false);
1975: urlCon.setDoOutput(true);
1976: urlCon.setUseCaches(false); // Enable tunneling.
1977: if (urlCon instanceof HttpURLConnection) {
1978: // The DOM L3 REC says if we are writing to an HTTP URI
1979: // it is to be done with an HTTP PUT.
1980: HttpURLConnection httpCon = (HttpURLConnection) urlCon;
1981: httpCon.setRequestMethod("PUT");
1982: }
1983: out = urlCon.getOutputStream();
1984: }
1985: return out;
1986: }
1987:
1988: private static String getPathWithoutEscapes(String origPath) {
1989: if (origPath != null && origPath.length() != 0
1990: && origPath.indexOf('%') != -1) {
1991: // Locate the escape characters
1992: StringTokenizer tokenizer = new StringTokenizer(origPath,
1993: "%");
1994: StringBuffer result = new StringBuffer(origPath.length());
1995: int size = tokenizer.countTokens();
1996: result.append(tokenizer.nextToken());
1997: for (int i = 1; i < size; ++i) {
1998: String token = tokenizer.nextToken();
1999: // Decode the 2 digit hexadecimal number following % in '%nn'
2000: result.append((char) Integer.valueOf(
2001: token.substring(0, 2), 16).intValue());
2002: result.append(token.substring(2));
2003: }
2004: return result.toString();
2005: }
2006: return origPath;
2007: }
2008:
2009: //
2010: // Protected methods
2011: //
2012:
2013: /**
2014: * Ends an entity.
2015: *
2016: * @throws XNIException Thrown by entity handler to signal an error.
2017: */
2018: void endEntity() throws XNIException {
2019:
2020: // call handler
2021: if (DEBUG_BUFFER) {
2022: System.out.print("(endEntity: ");
2023: print(fCurrentEntity);
2024: System.out.println();
2025: }
2026: if (fEntityHandler != null) {
2027: fEntityHandler.endEntity(fCurrentEntity.name, null);
2028: }
2029:
2030: // Close the reader for the current entity once we're
2031: // done with it, and remove it from our stack. If parsing
2032: // is halted at some point, the rest of the readers on
2033: // the stack will be closed during cleanup.
2034: try {
2035: fCurrentEntity.reader.close();
2036: } catch (IOException e) {
2037: // ignore
2038: }
2039: // REVISIT: We should never encounter underflow if the calls
2040: // to startEntity and endEntity are balanced, but guard
2041: // against the EmptyStackException for now. -- mrglavas
2042: if (!fReaderStack.isEmpty()) {
2043: fReaderStack.pop();
2044: }
2045:
2046: // Release the character buffer back to the pool for reuse
2047: fCharacterBufferPool
2048: .returnBuffer(fCurrentEntity.fCharacterBuffer);
2049:
2050: // Release the byte buffer back to the pool for reuse
2051: if (fCurrentEntity.fByteBuffer != null) {
2052: fByteBufferPool.returnBuffer(fCurrentEntity.fByteBuffer);
2053: }
2054:
2055: // Pop entity stack.
2056: fCurrentEntity = fEntityStack.size() > 0 ? (ScannedEntity) fEntityStack
2057: .pop()
2058: : null;
2059: fEntityScanner.setCurrentEntity(fCurrentEntity);
2060: if (DEBUG_BUFFER) {
2061: System.out.print(")endEntity: ");
2062: print(fCurrentEntity);
2063: System.out.println();
2064: }
2065:
2066: } // endEntity()
2067:
2068: /**
2069: * Returns the IANA encoding name that is auto-detected from
2070: * the bytes specified, with the endian-ness of that encoding where appropriate.
2071: *
2072: * @param b4 The first four bytes of the input.
2073: * @param count The number of bytes actually read.
2074: * @return a 2-element array: the first element, an IANA-encoding string,
2075: * the second element a Boolean which is true iff the document is big endian, false
2076: * if it's little-endian, and null if the distinction isn't relevant.
2077: */
2078: protected Object[] getEncodingName(byte[] b4, int count) {
2079:
2080: if (count < 2) {
2081: return new Object[] { "UTF-8", null };
2082: }
2083:
2084: // UTF-16, with BOM
2085: int b0 = b4[0] & 0xFF;
2086: int b1 = b4[1] & 0xFF;
2087: if (b0 == 0xFE && b1 == 0xFF) {
2088: // UTF-16, big-endian
2089: return new Object[] { "UTF-16BE", Boolean.TRUE };
2090: }
2091: if (b0 == 0xFF && b1 == 0xFE) {
2092: // UTF-16, little-endian
2093: return new Object[] { "UTF-16LE", Boolean.FALSE };
2094: }
2095:
2096: // default to UTF-8 if we don't have enough bytes to make a
2097: // good determination of the encoding
2098: if (count < 3) {
2099: return new Object[] { "UTF-8", null };
2100: }
2101:
2102: // UTF-8 with a BOM
2103: int b2 = b4[2] & 0xFF;
2104: if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
2105: return new Object[] { "UTF-8", null };
2106: }
2107:
2108: // default to UTF-8 if we don't have enough bytes to make a
2109: // good determination of the encoding
2110: if (count < 4) {
2111: return new Object[] { "UTF-8", null };
2112: }
2113:
2114: // other encodings
2115: int b3 = b4[3] & 0xFF;
2116: if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
2117: // UCS-4, big endian (1234)
2118: return new Object[] { "ISO-10646-UCS-4", Boolean.TRUE };
2119: }
2120: if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
2121: // UCS-4, little endian (4321)
2122: return new Object[] { "ISO-10646-UCS-4", Boolean.FALSE };
2123: }
2124: if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
2125: // UCS-4, unusual octet order (2143)
2126: // REVISIT: What should this be?
2127: return new Object[] { "ISO-10646-UCS-4", null };
2128: }
2129: if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
2130: // UCS-4, unusual octect order (3412)
2131: // REVISIT: What should this be?
2132: return new Object[] { "ISO-10646-UCS-4", null };
2133: }
2134: if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
2135: // UTF-16, big-endian, no BOM
2136: // (or could turn out to be UCS-2...
2137: // REVISIT: What should this be?
2138: return new Object[] { "UTF-16BE", Boolean.TRUE };
2139: }
2140: if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
2141: // UTF-16, little-endian, no BOM
2142: // (or could turn out to be UCS-2...
2143: return new Object[] { "UTF-16LE", Boolean.FALSE };
2144: }
2145: if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
2146: // EBCDIC
2147: // a la xerces1, return CP037 instead of EBCDIC here
2148: return new Object[] { "CP037", null };
2149: }
2150:
2151: // default encoding
2152: return new Object[] { "UTF-8", null };
2153:
2154: } // getEncodingName(byte[],int):Object[]
2155:
2156: /**
2157: * Creates a reader capable of reading the given input stream in
2158: * the specified encoding.
2159: *
2160: * @param inputStream The input stream.
2161: * @param encoding The encoding name that the input stream is
2162: * encoded using. If the user has specified that
2163: * Java encoding names are allowed, then the
2164: * encoding name may be a Java encoding name;
2165: * otherwise, it is an ianaEncoding name.
2166: * @param isBigEndian For encodings (like uCS-4), whose names cannot
2167: * specify a byte order, this tells whether the order is bigEndian. null menas
2168: * unknown or not relevant.
2169: *
2170: * @return Returns a reader.
2171: */
2172: protected Reader createReader(InputStream inputStream,
2173: String encoding, Boolean isBigEndian) throws IOException {
2174:
2175: // if the encoding is UTF-8 use the optimized UTF-8 reader
2176: if (encoding == "UTF-8" || encoding == null) {
2177: if (DEBUG_ENCODINGS) {
2178: System.out.println("$$$ creating UTF8Reader");
2179: }
2180: if (fTempByteBuffer == null) {
2181: fTempByteBuffer = fByteBufferPool.getBuffer();
2182: }
2183: return new UTF8Reader(
2184: inputStream,
2185: fTempByteBuffer,
2186: fErrorReporter
2187: .getMessageFormatter(XMLMessageFormatter.XML_DOMAIN),
2188: fErrorReporter.getLocale());
2189: }
2190:
2191: // try to use an optimized reader
2192: String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
2193: if (ENCODING.equals("UTF-8")) {
2194: if (DEBUG_ENCODINGS) {
2195: System.out.println("$$$ creating UTF8Reader");
2196: }
2197: if (fTempByteBuffer == null) {
2198: fTempByteBuffer = fByteBufferPool.getBuffer();
2199: }
2200: return new UTF8Reader(
2201: inputStream,
2202: fTempByteBuffer,
2203: fErrorReporter
2204: .getMessageFormatter(XMLMessageFormatter.XML_DOMAIN),
2205: fErrorReporter.getLocale());
2206: }
2207: if (ENCODING.equals("ISO-10646-UCS-4")) {
2208: if (isBigEndian != null) {
2209: boolean isBE = isBigEndian.booleanValue();
2210: if (isBE) {
2211: return new UCSReader(inputStream, UCSReader.UCS4BE);
2212: } else {
2213: return new UCSReader(inputStream, UCSReader.UCS4LE);
2214: }
2215: } else {
2216: fErrorReporter.reportError(
2217: XMLMessageFormatter.XML_DOMAIN,
2218: "EncodingByteOrderUnsupported",
2219: new Object[] { encoding },
2220: XMLErrorReporter.SEVERITY_FATAL_ERROR);
2221: }
2222: }
2223: if (ENCODING.equals("ISO-10646-UCS-2")) {
2224: if (isBigEndian != null) { // sould never happen with this encoding...
2225: boolean isBE = isBigEndian.booleanValue();
2226: if (isBE) {
2227: return new UCSReader(inputStream, UCSReader.UCS2BE);
2228: } else {
2229: return new UCSReader(inputStream, UCSReader.UCS2LE);
2230: }
2231: } else {
2232: fErrorReporter.reportError(
2233: XMLMessageFormatter.XML_DOMAIN,
2234: "EncodingByteOrderUnsupported",
2235: new Object[] { encoding },
2236: XMLErrorReporter.SEVERITY_FATAL_ERROR);
2237: }
2238: }
2239:
2240: // check for valid name
2241: boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
2242: boolean validJava = XMLChar.isValidJavaEncoding(encoding);
2243: if (!validIANA || (fAllowJavaEncodings && !validJava)) {
2244: fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
2245: "EncodingDeclInvalid", new Object[] { encoding },
2246: XMLErrorReporter.SEVERITY_FATAL_ERROR);
2247: // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
2248: // because every byte is a valid ISO Latin 1 character.
2249: // It may not translate correctly but if we failed on
2250: // the encoding anyway, then we're expecting the content
2251: // of the document to be bad. This will just prevent an
2252: // invalid UTF-8 sequence to be detected. This is only
2253: // important when continue-after-fatal-error is turned
2254: // on. -Ac
2255: if (DEBUG_ENCODINGS) {
2256: System.out.println("$$$ creating Latin1Reader");
2257: }
2258: return new Latin1Reader(inputStream, fBufferSize);
2259: }
2260:
2261: // try to use a Java reader
2262: String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
2263: if (javaEncoding == null) {
2264: if (fAllowJavaEncodings) {
2265: javaEncoding = encoding;
2266: } else {
2267: fErrorReporter.reportError(
2268: XMLMessageFormatter.XML_DOMAIN,
2269: "EncodingDeclInvalid",
2270: new Object[] { encoding },
2271: XMLErrorReporter.SEVERITY_FATAL_ERROR);
2272: // see comment above.
2273: if (DEBUG_ENCODINGS) {
2274: System.out.println("$$$ creating Latin1Reader");
2275: }
2276: if (fTempByteBuffer == null) {
2277: fTempByteBuffer = fByteBufferPool.getBuffer();
2278: }
2279: return new Latin1Reader(inputStream, fTempByteBuffer);
2280: }
2281: } else if (javaEncoding.equals("ASCII")) {
2282: if (DEBUG_ENCODINGS) {
2283: System.out.println("$$$ creating ASCIIReader");
2284: }
2285: if (fTempByteBuffer == null) {
2286: fTempByteBuffer = fByteBufferPool.getBuffer();
2287: }
2288: return new ASCIIReader(
2289: inputStream,
2290: fTempByteBuffer,
2291: fErrorReporter
2292: .getMessageFormatter(XMLMessageFormatter.XML_DOMAIN),
2293: fErrorReporter.getLocale());
2294: } else if (javaEncoding.equals("ISO8859_1")) {
2295: if (DEBUG_ENCODINGS) {
2296: System.out.println("$$$ creating Latin1Reader");
2297: }
2298: if (fTempByteBuffer == null) {
2299: fTempByteBuffer = fByteBufferPool.getBuffer();
2300: }
2301: return new Latin1Reader(inputStream, fTempByteBuffer);
2302: }
2303: if (DEBUG_ENCODINGS) {
2304: System.out
2305: .print("$$$ creating Java InputStreamReader: encoding="
2306: + javaEncoding);
2307: if (javaEncoding == encoding) {
2308: System.out.print(" (IANA encoding)");
2309: }
2310: System.out.println();
2311: }
2312: return new InputStreamReader(inputStream, javaEncoding);
2313:
2314: } // createReader(InputStream,String, Boolean): Reader
2315:
2316: //
2317: // Protected static methods
2318: //
2319:
2320: /**
2321: * Fixes a platform dependent filename to standard URI form.
2322: *
2323: * @param str The string to fix.
2324: *
2325: * @return Returns the fixed URI string.
2326: */
2327: protected static String fixURI(String str) {
2328:
2329: // handle platform dependent strings
2330: str = str.replace(java.io.File.separatorChar, '/');
2331:
2332: StringBuffer sb = null;
2333:
2334: // Windows fix
2335: if (str.length() >= 2) {
2336: char ch1 = str.charAt(1);
2337: // change "C:blah" to "file:///C:blah"
2338: if (ch1 == ':') {
2339: char ch0 = Character.toUpperCase(str.charAt(0));
2340: if (ch0 >= 'A' && ch0 <= 'Z') {
2341: sb = new StringBuffer(str.length() + 8);
2342: sb.append("file:///");
2343: }
2344: }
2345: // change "//blah" to "file://blah"
2346: else if (ch1 == '/' && str.charAt(0) == '/') {
2347: sb = new StringBuffer(str.length() + 5);
2348: sb.append("file:");
2349: }
2350: }
2351:
2352: int pos = str.indexOf(' ');
2353: // there is no space in the string
2354: // we just append "str" to the end of sb
2355: if (pos < 0) {
2356: if (sb != null) {
2357: sb.append(str);
2358: str = sb.toString();
2359: }
2360: }
2361: // otherwise, convert all ' ' to "%20".
2362: // Note: the following algorithm might not be very performant,
2363: // but people who want to use invalid URI's have to pay the price.
2364: else {
2365: if (sb == null)
2366: sb = new StringBuffer(str.length());
2367: // put characters before ' ' into the string buffer
2368: for (int i = 0; i < pos; i++)
2369: sb.append(str.charAt(i));
2370: // and %20 for the space
2371: sb.append("%20");
2372: // for the remamining part, also convert ' ' to "%20".
2373: for (int i = pos + 1; i < str.length(); i++) {
2374: if (str.charAt(i) == ' ')
2375: sb.append("%20");
2376: else
2377: sb.append(str.charAt(i));
2378: }
2379: str = sb.toString();
2380: }
2381:
2382: // done
2383: return str;
2384:
2385: } // fixURI(String):String
2386:
2387: //
2388: // Package visible methods
2389: //
2390:
2391: /**
2392: * Returns the hashtable of declared entities.
2393: * <p>
2394: * <strong>REVISIT:</strong>
2395: * This should be done the "right" way by designing a better way to
2396: * enumerate the declared entities. For now, this method is needed
2397: * by the constructor that takes an XMLEntityManager parameter.
2398: */
2399: Hashtable getDeclaredEntities() {
2400: return fEntities;
2401: } // getDeclaredEntities():Hashtable
2402:
2403: /** Prints the contents of the buffer. */
2404: static final void print(ScannedEntity currentEntity) {
2405: if (DEBUG_BUFFER) {
2406: if (currentEntity != null) {
2407: System.out.print('[');
2408: System.out.print(currentEntity.count);
2409: System.out.print(' ');
2410: System.out.print(currentEntity.position);
2411: if (currentEntity.count > 0) {
2412: System.out.print(" \"");
2413: for (int i = 0; i < currentEntity.count; i++) {
2414: if (i == currentEntity.position) {
2415: System.out.print('^');
2416: }
2417: char c = currentEntity.ch[i];
2418: switch (c) {
2419: case '\n': {
2420: System.out.print("\\n");
2421: break;
2422: }
2423: case '\r': {
2424: System.out.print("\\r");
2425: break;
2426: }
2427: case '\t': {
2428: System.out.print("\\t");
2429: break;
2430: }
2431: case '\\': {
2432: System.out.print("\\\\");
2433: break;
2434: }
2435: default: {
2436: System.out.print(c);
2437: }
2438: }
2439: }
2440: if (currentEntity.position == currentEntity.count) {
2441: System.out.print('^');
2442: }
2443: System.out.print('"');
2444: }
2445: System.out.print(']');
2446: System.out.print(" @ ");
2447: System.out.print(currentEntity.lineNumber);
2448: System.out.print(',');
2449: System.out.print(currentEntity.columnNumber);
2450: } else {
2451: System.out.print("*NO CURRENT ENTITY*");
2452: }
2453: }
2454: } // print(ScannedEntity)
2455:
2456: //
2457: // Classes
2458: //
2459:
2460: /**
2461: * Entity information.
2462: *
2463: * @xerces.internal
2464: *
2465: * @author Andy Clark, IBM
2466: */
2467: public static abstract class Entity {
2468:
2469: //
2470: // Data
2471: //
2472:
2473: /** Entity name. */
2474: public String name;
2475:
2476: // whether this entity's declaration was found in the internal
2477: // or external subset
2478: public boolean inExternalSubset;
2479:
2480: //
2481: // Constructors
2482: //
2483:
2484: /** Default constructor. */
2485: public Entity() {
2486: clear();
2487: } // <init>()
2488:
2489: /** Constructs an entity. */
2490: public Entity(String name, boolean inExternalSubset) {
2491: this .name = name;
2492: this .inExternalSubset = inExternalSubset;
2493: } // <init>(String)
2494:
2495: //
2496: // Public methods
2497: //
2498:
2499: /** Returns true if this entity was declared in the external subset. */
2500: public boolean isEntityDeclInExternalSubset() {
2501: return inExternalSubset;
2502: }
2503:
2504: /** Returns true if this is an external entity. */
2505: public abstract boolean isExternal();
2506:
2507: /** Returns true if this is an unparsed entity. */
2508: public abstract boolean isUnparsed();
2509:
2510: /** Clears the entity. */
2511: public void clear() {
2512: name = null;
2513: inExternalSubset = false;
2514: } // clear()
2515:
2516: /** Sets the values of the entity. */
2517: public void setValues(Entity entity) {
2518: name = entity.name;
2519: inExternalSubset = entity.inExternalSubset;
2520: } // setValues(Entity)
2521:
2522: } // class Entity
2523:
2524: /**
2525: * Internal entity.
2526: *
2527: * @xerces.internal
2528: *
2529: * @author Andy Clark, IBM
2530: */
2531: protected static class InternalEntity extends Entity {
2532:
2533: //
2534: // Data
2535: //
2536:
2537: /** Text value of entity. */
2538: public String text;
2539:
2540: //
2541: // Constructors
2542: //
2543:
2544: /** Default constructor. */
2545: public InternalEntity() {
2546: clear();
2547: } // <init>()
2548:
2549: /** Constructs an internal entity. */
2550: public InternalEntity(String name, String text,
2551: boolean inExternalSubset) {
2552: super (name, inExternalSubset);
2553: this .text = text;
2554: } // <init>(String,String)
2555:
2556: //
2557: // Entity methods
2558: //
2559:
2560: /** Returns true if this is an external entity. */
2561: public final boolean isExternal() {
2562: return false;
2563: } // isExternal():boolean
2564:
2565: /** Returns true if this is an unparsed entity. */
2566: public final boolean isUnparsed() {
2567: return false;
2568: } // isUnparsed():boolean
2569:
2570: /** Clears the entity. */
2571: public void clear() {
2572: super .clear();
2573: text = null;
2574: } // clear()
2575:
2576: /** Sets the values of the entity. */
2577: public void setValues(Entity entity) {
2578: super .setValues(entity);
2579: text = null;
2580: } // setValues(Entity)
2581:
2582: /** Sets the values of the entity. */
2583: public void setValues(InternalEntity entity) {
2584: super .setValues(entity);
2585: text = entity.text;
2586: } // setValues(InternalEntity)
2587:
2588: } // class InternalEntity
2589:
2590: /**
2591: * External entity.
2592: *
2593: * @xerces.internal
2594: *
2595: * @author Andy Clark, IBM
2596: */
2597: protected static class ExternalEntity extends Entity {
2598:
2599: //
2600: // Data
2601: //
2602:
2603: /** container for all relevant entity location information. */
2604: public XMLResourceIdentifier entityLocation;
2605:
2606: /** Notation name for unparsed entity. */
2607: public String notation;
2608:
2609: //
2610: // Constructors
2611: //
2612:
2613: /** Default constructor. */
2614: public ExternalEntity() {
2615: clear();
2616: } // <init>()
2617:
2618: /** Constructs an internal entity. */
2619: public ExternalEntity(String name,
2620: XMLResourceIdentifier entityLocation, String notation,
2621: boolean inExternalSubset) {
2622: super (name, inExternalSubset);
2623: this .entityLocation = entityLocation;
2624: this .notation = notation;
2625: } // <init>(String,XMLResourceIdentifier, String)
2626:
2627: //
2628: // Entity methods
2629: //
2630:
2631: /** Returns true if this is an external entity. */
2632: public final boolean isExternal() {
2633: return true;
2634: } // isExternal():boolean
2635:
2636: /** Returns true if this is an unparsed entity. */
2637: public final boolean isUnparsed() {
2638: return notation != null;
2639: } // isUnparsed():boolean
2640:
2641: /** Clears the entity. */
2642: public void clear() {
2643: super .clear();
2644: entityLocation = null;
2645: notation = null;
2646: } // clear()
2647:
2648: /** Sets the values of the entity. */
2649: public void setValues(Entity entity) {
2650: super .setValues(entity);
2651: entityLocation = null;
2652: notation = null;
2653: } // setValues(Entity)
2654:
2655: /** Sets the values of the entity. */
2656: public void setValues(ExternalEntity entity) {
2657: super .setValues(entity);
2658: entityLocation = entity.entityLocation;
2659: notation = entity.notation;
2660: } // setValues(ExternalEntity)
2661:
2662: } // class ExternalEntity
2663:
2664: /**
2665: * Entity state.
2666: *
2667: * @xerces.internal
2668: *
2669: * @author Andy Clark, IBM
2670: */
2671: public class ScannedEntity extends Entity {
2672:
2673: //
2674: // Data
2675: //
2676:
2677: // i/o
2678:
2679: /** Input stream. */
2680: public InputStream stream;
2681:
2682: /** Reader. */
2683: public Reader reader;
2684:
2685: // locator information
2686:
2687: /** entity location information */
2688: public XMLResourceIdentifier entityLocation;
2689:
2690: /** Line number. */
2691: public int lineNumber = 1;
2692:
2693: /** Column number. */
2694: public int columnNumber = 1;
2695:
2696: // encoding
2697:
2698: /** Auto-detected encoding. */
2699: public String encoding;
2700:
2701: /**
2702: * Encoding has been set externally, for example
2703: * using a SAX InputSource or a DOM LSInput.
2704: */
2705: boolean externallySpecifiedEncoding = false;
2706:
2707: // version
2708:
2709: /** XML version. **/
2710: public String xmlVersion = "1.0";
2711:
2712: // status
2713:
2714: /** True if in a literal. */
2715: public boolean literal;
2716:
2717: // whether this is an external or internal scanned entity
2718: public boolean isExternal;
2719:
2720: // buffer
2721:
2722: /** Character buffer. */
2723: public char[] ch = null;
2724:
2725: /** Position in character buffer. */
2726: public int position;
2727:
2728: /** Base character offset for computing absolute character offset. */
2729: public int baseCharOffset;
2730:
2731: /** Start position in character buffer. */
2732: public int startPosition;
2733:
2734: /** Count of characters in buffer. */
2735: public int count;
2736:
2737: // to allow the reader/inputStream to behave efficiently:
2738: public boolean mayReadChunks;
2739:
2740: /** Character buffer container. */
2741: private CharacterBuffer fCharacterBuffer;
2742:
2743: /** Byte buffer. */
2744: private byte[] fByteBuffer;
2745:
2746: //
2747: // Constructors
2748: //
2749:
2750: /** Constructs a scanned entity. */
2751: public ScannedEntity(String name,
2752: XMLResourceIdentifier entityLocation,
2753: InputStream stream, Reader reader, byte[] byteBuffer,
2754: String encoding, boolean literal,
2755: boolean mayReadChunks, boolean isExternal) {
2756: super (name, XMLEntityManager.this .fInExternalSubset);
2757: this .entityLocation = entityLocation;
2758: this .stream = stream;
2759: this .reader = reader;
2760: this .encoding = encoding;
2761: this .literal = literal;
2762: this .mayReadChunks = mayReadChunks;
2763: this .isExternal = isExternal;
2764: this .fCharacterBuffer = fCharacterBufferPool
2765: .getBuffer(isExternal);
2766: this .ch = fCharacterBuffer.ch;
2767: this .fByteBuffer = byteBuffer;
2768: } // <init>(StringXMLResourceIdentifier,InputStream,Reader,String,boolean, boolean)
2769:
2770: //
2771: // Entity methods
2772: //
2773:
2774: /** Returns true if this is an external entity. */
2775: public final boolean isExternal() {
2776: return isExternal;
2777: } // isExternal():boolean
2778:
2779: /** Returns true if this is an unparsed entity. */
2780: public final boolean isUnparsed() {
2781: return false;
2782: } // isUnparsed():boolean
2783:
2784: public void setReader(InputStream stream, String encoding,
2785: Boolean isBigEndian) throws IOException {
2786: fTempByteBuffer = fByteBuffer;
2787: reader = createReader(stream, encoding, isBigEndian);
2788: fByteBuffer = fTempByteBuffer;
2789: }
2790:
2791: // return the expanded system ID of the
2792: // first external entity on the stack, null
2793: // otherwise.
2794: public String getExpandedSystemId() {
2795:
2796: // search for the first external entity on the stack
2797: int size = fEntityStack.size();
2798: for (int i = size - 1; i >= 0; --i) {
2799: ScannedEntity externalEntity = (ScannedEntity) fEntityStack
2800: .elementAt(i);
2801:
2802: if (externalEntity.entityLocation != null
2803: && externalEntity.entityLocation
2804: .getExpandedSystemId() != null) {
2805: return externalEntity.entityLocation
2806: .getExpandedSystemId();
2807: }
2808: }
2809: return null;
2810: }
2811:
2812: // return literal systemId of
2813: // nearest external entity
2814: public String getLiteralSystemId() {
2815: // search for the first external entity on the stack
2816: int size = fEntityStack.size();
2817: for (int i = size - 1; i >= 0; --i) {
2818: ScannedEntity externalEntity = (ScannedEntity) fEntityStack
2819: .elementAt(i);
2820:
2821: if (externalEntity.entityLocation != null
2822: && externalEntity.entityLocation
2823: .getLiteralSystemId() != null) {
2824: return externalEntity.entityLocation
2825: .getLiteralSystemId();
2826: }
2827: }
2828: return null;
2829: }
2830:
2831: // return line number of position in most
2832: // recent external entity
2833: public int getLineNumber() {
2834: // search for the first external entity on the stack
2835: int size = fEntityStack.size();
2836: for (int i = size - 1; i >= 0; --i) {
2837: ScannedEntity firstExternalEntity = (ScannedEntity) fEntityStack
2838: .elementAt(i);
2839: if (firstExternalEntity.isExternal()) {
2840: return firstExternalEntity.lineNumber;
2841: }
2842: }
2843: return -1;
2844: }
2845:
2846: // return column number of position in most
2847: // recent external entity
2848: public int getColumnNumber() {
2849: // search for the first external entity on the stack
2850: int size = fEntityStack.size();
2851: for (int i = size - 1; i >= 0; --i) {
2852: ScannedEntity firstExternalEntity = (ScannedEntity) fEntityStack
2853: .elementAt(i);
2854: if (firstExternalEntity.isExternal()) {
2855: return firstExternalEntity.columnNumber;
2856: }
2857: }
2858: return -1;
2859: }
2860:
2861: // return character offset of position in most
2862: // recent external entity
2863: public int getCharacterOffset() {
2864: // search for the first external entity on the stack
2865: int size = fEntityStack.size();
2866: for (int i = size - 1; i >= 0; --i) {
2867: ScannedEntity firstExternalEntity = (ScannedEntity) fEntityStack
2868: .elementAt(i);
2869: if (firstExternalEntity.isExternal()) {
2870: return firstExternalEntity.baseCharOffset
2871: + (firstExternalEntity.position - firstExternalEntity.startPosition);
2872: }
2873: }
2874: return -1;
2875: }
2876:
2877: // return encoding of most recent external entity
2878: public String getEncoding() {
2879: // search for the first external entity on the stack
2880: int size = fEntityStack.size();
2881: for (int i = size - 1; i >= 0; --i) {
2882: ScannedEntity firstExternalEntity = (ScannedEntity) fEntityStack
2883: .elementAt(i);
2884: if (firstExternalEntity.isExternal()) {
2885: return firstExternalEntity.encoding;
2886: }
2887: }
2888: return null;
2889: }
2890:
2891: // return xml version of most recent external entity
2892: public String getXMLVersion() {
2893: // search for the first external entity on the stack
2894: int size = fEntityStack.size();
2895: for (int i = size - 1; i >= 0; --i) {
2896: ScannedEntity firstExternalEntity = (ScannedEntity) fEntityStack
2897: .elementAt(i);
2898: if (firstExternalEntity.isExternal()) {
2899: return firstExternalEntity.xmlVersion;
2900: }
2901: }
2902: return null;
2903: }
2904:
2905: /** Returns whether the encoding of this entity was externally specified. **/
2906: public boolean isEncodingExternallySpecified() {
2907: return externallySpecifiedEncoding;
2908: }
2909:
2910: /** Sets whether the encoding of this entity was externally specified. **/
2911: public void setEncodingExternallySpecified(boolean value) {
2912: externallySpecifiedEncoding = value;
2913: }
2914:
2915: //
2916: // Object methods
2917: //
2918:
2919: /** Returns a string representation of this object. */
2920: public String toString() {
2921:
2922: StringBuffer str = new StringBuffer();
2923: str.append("name=\"").append(name).append('"');
2924: str.append(",ch=");
2925: str.append(ch);
2926: str.append(",position=").append(position);
2927: str.append(",count=").append(count);
2928: str.append(",baseCharOffset=").append(baseCharOffset);
2929: str.append(",startPosition=").append(startPosition);
2930: return str.toString();
2931:
2932: } // toString():String
2933:
2934: } // class ScannedEntity
2935:
2936: /**
2937: * Pool of byte buffers for the java.io.Readers.
2938: *
2939: * @xerces.internal
2940: *
2941: * @author Michael Glavassevich, IBM
2942: */
2943: private static final class ByteBufferPool {
2944:
2945: private static final int DEFAULT_POOL_SIZE = 3;
2946:
2947: private int fPoolSize;
2948: private int fBufferSize;
2949: private byte[][] fByteBufferPool;
2950: private int fDepth;
2951:
2952: public ByteBufferPool(int bufferSize) {
2953: this (DEFAULT_POOL_SIZE, bufferSize);
2954: }
2955:
2956: public ByteBufferPool(int poolSize, int bufferSize) {
2957: fPoolSize = poolSize;
2958: fBufferSize = bufferSize;
2959: fByteBufferPool = new byte[fPoolSize][];
2960: fDepth = 0;
2961: }
2962:
2963: /** Retrieves a byte buffer from the pool. **/
2964: public byte[] getBuffer() {
2965: return (fDepth > 0) ? fByteBufferPool[--fDepth]
2966: : new byte[fBufferSize];
2967: }
2968:
2969: /** Returns byte buffer to pool. **/
2970: public void returnBuffer(byte[] buffer) {
2971: if (fDepth < fByteBufferPool.length) {
2972: fByteBufferPool[fDepth++] = buffer;
2973: }
2974: }
2975:
2976: /** Sets the size of the buffers and dumps the old pool. **/
2977: public void setBufferSize(int bufferSize) {
2978: fBufferSize = bufferSize;
2979: fByteBufferPool = new byte[fPoolSize][];
2980: fDepth = 0;
2981: }
2982: }
2983:
2984: /**
2985: * Buffer used in entity manager to reuse character arrays instead
2986: * of creating new ones every time.
2987: *
2988: * @xerces.internal
2989: *
2990: * @author Ankit Pasricha, IBM
2991: */
2992: private static final class CharacterBuffer {
2993:
2994: /** character buffer */
2995: private final char[] ch;
2996:
2997: /** whether the buffer is for an external or internal scanned entity */
2998: private final boolean isExternal;
2999:
3000: public CharacterBuffer(boolean isExternal, int size) {
3001: this .isExternal = isExternal;
3002: ch = new char[size];
3003: }
3004: }
3005:
3006: /**
3007: * Stores a number of character buffers and provides it to the entity
3008: * manager to use when an entity is seen.
3009: *
3010: * @xerces.internal
3011: *
3012: * @author Ankit Pasricha, IBM
3013: */
3014: private static final class CharacterBufferPool {
3015:
3016: private static final int DEFAULT_POOL_SIZE = 3;
3017:
3018: private CharacterBuffer[] fInternalBufferPool;
3019: private CharacterBuffer[] fExternalBufferPool;
3020:
3021: private int fExternalBufferSize;
3022: private int fInternalBufferSize;
3023: private int fPoolSize;
3024:
3025: private int fInternalTop;
3026: private int fExternalTop;
3027:
3028: public CharacterBufferPool(int externalBufferSize,
3029: int internalBufferSize) {
3030: this (DEFAULT_POOL_SIZE, externalBufferSize,
3031: internalBufferSize);
3032: }
3033:
3034: public CharacterBufferPool(int poolSize,
3035: int externalBufferSize, int internalBufferSize) {
3036: fExternalBufferSize = externalBufferSize;
3037: fInternalBufferSize = internalBufferSize;
3038: fPoolSize = poolSize;
3039: init();
3040: }
3041:
3042: /** Initializes buffer pool. **/
3043: private void init() {
3044: fInternalBufferPool = new CharacterBuffer[fPoolSize];
3045: fExternalBufferPool = new CharacterBuffer[fPoolSize];
3046: fInternalTop = -1;
3047: fExternalTop = -1;
3048: }
3049:
3050: /** Retrieves buffer from pool. **/
3051: public CharacterBuffer getBuffer(boolean external) {
3052: if (external) {
3053: if (fExternalTop > -1) {
3054: return (CharacterBuffer) fExternalBufferPool[fExternalTop--];
3055: } else {
3056: return new CharacterBuffer(true,
3057: fExternalBufferSize);
3058: }
3059: } else {
3060: if (fInternalTop > -1) {
3061: return (CharacterBuffer) fInternalBufferPool[fInternalTop--];
3062: } else {
3063: return new CharacterBuffer(false,
3064: fInternalBufferSize);
3065: }
3066: }
3067: }
3068:
3069: /** Returns buffer to pool. **/
3070: public void returnBuffer(CharacterBuffer buffer) {
3071: if (buffer.isExternal) {
3072: if (fExternalTop < fExternalBufferPool.length - 1) {
3073: fExternalBufferPool[++fExternalTop] = buffer;
3074: }
3075: } else if (fInternalTop < fInternalBufferPool.length - 1) {
3076: fInternalBufferPool[++fInternalTop] = buffer;
3077: }
3078: }
3079:
3080: /** Sets the size of external buffers and dumps the old pool. **/
3081: public void setExternalBufferSize(int bufferSize) {
3082: fExternalBufferSize = bufferSize;
3083: fExternalBufferPool = new CharacterBuffer[fPoolSize];
3084: fExternalTop = -1;
3085: }
3086: }
3087:
3088: /**
3089: * This class wraps the byte inputstreams we're presented with.
3090: * We need it because java.io.InputStreams don't provide
3091: * functionality to reread processed bytes, and they have a habit
3092: * of reading more than one character when you call their read()
3093: * methods. This means that, once we discover the true (declared)
3094: * encoding of a document, we can neither backtrack to read the
3095: * whole doc again nor start reading where we are with a new
3096: * reader.
3097: *
3098: * This class allows rewinding an inputStream by allowing a mark
3099: * to be set, and the stream reset to that position. <strong>The
3100: * class assumes that it needs to read one character per
3101: * invocation when it's read() method is inovked, but uses the
3102: * underlying InputStream's read(char[], offset length) method--it
3103: * won't buffer data read this way!</strong>
3104: *
3105: * @xerces.internal
3106: *
3107: * @author Neil Graham, IBM
3108: * @author Glenn Marcy, IBM
3109: */
3110: protected final class RewindableInputStream extends InputStream {
3111:
3112: private InputStream fInputStream;
3113: private byte[] fData;
3114: private int fStartOffset;
3115: private int fEndOffset;
3116: private int fOffset;
3117: private int fLength;
3118: private int fMark;
3119:
3120: public RewindableInputStream(InputStream is) {
3121: fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
3122: fInputStream = is;
3123: fStartOffset = 0;
3124: fEndOffset = -1;
3125: fOffset = 0;
3126: fLength = 0;
3127: fMark = 0;
3128: }
3129:
3130: public void setStartOffset(int offset) {
3131: fStartOffset = offset;
3132: }
3133:
3134: public void rewind() {
3135: fOffset = fStartOffset;
3136: }
3137:
3138: public int read() throws IOException {
3139: int b = 0;
3140: if (fOffset < fLength) {
3141: return fData[fOffset++] & 0xff;
3142: }
3143: if (fOffset == fEndOffset) {
3144: return -1;
3145: }
3146: if (fOffset == fData.length) {
3147: byte[] newData = new byte[fOffset << 1];
3148: System.arraycopy(fData, 0, newData, 0, fOffset);
3149: fData = newData;
3150: }
3151: b = fInputStream.read();
3152: if (b == -1) {
3153: fEndOffset = fOffset;
3154: return -1;
3155: }
3156: fData[fLength++] = (byte) b;
3157: fOffset++;
3158: return b & 0xff;
3159: }
3160:
3161: public int read(byte[] b, int off, int len) throws IOException {
3162: int bytesLeft = fLength - fOffset;
3163: if (bytesLeft == 0) {
3164: if (fOffset == fEndOffset) {
3165: return -1;
3166: }
3167: // better get some more for the voracious reader...
3168: if (fCurrentEntity.mayReadChunks) {
3169: return fInputStream.read(b, off, len);
3170: }
3171: int returnedVal = read();
3172: if (returnedVal == -1) {
3173: fEndOffset = fOffset;
3174: return -1;
3175: }
3176: b[off] = (byte) returnedVal;
3177: return 1;
3178: }
3179: if (len < bytesLeft) {
3180: if (len <= 0) {
3181: return 0;
3182: }
3183: } else {
3184: len = bytesLeft;
3185: }
3186: if (b != null) {
3187: System.arraycopy(fData, fOffset, b, off, len);
3188: }
3189: fOffset += len;
3190: return len;
3191: }
3192:
3193: public long skip(long n) throws IOException {
3194: int bytesLeft;
3195: if (n <= 0) {
3196: return 0;
3197: }
3198: bytesLeft = fLength - fOffset;
3199: if (bytesLeft == 0) {
3200: if (fOffset == fEndOffset) {
3201: return 0;
3202: }
3203: return fInputStream.skip(n);
3204: }
3205: if (n <= bytesLeft) {
3206: fOffset += n;
3207: return n;
3208: }
3209: fOffset += bytesLeft;
3210: if (fOffset == fEndOffset) {
3211: return bytesLeft;
3212: }
3213: n -= bytesLeft;
3214: /*
3215: * In a manner of speaking, when this class isn't permitting more
3216: * than one byte at a time to be read, it is "blocking". The
3217: * available() method should indicate how much can be read without
3218: * blocking, so while we're in this mode, it should only indicate
3219: * that bytes in its buffer are available; otherwise, the result of
3220: * available() on the underlying InputStream is appropriate.
3221: */
3222: return fInputStream.skip(n) + bytesLeft;
3223: }
3224:
3225: public int available() throws IOException {
3226: int bytesLeft = fLength - fOffset;
3227: if (bytesLeft == 0) {
3228: if (fOffset == fEndOffset) {
3229: return -1;
3230: }
3231: return fCurrentEntity.mayReadChunks ? fInputStream
3232: .available() : 0;
3233: }
3234: return bytesLeft;
3235: }
3236:
3237: public void mark(int howMuch) {
3238: fMark = fOffset;
3239: }
3240:
3241: public void reset() {
3242: fOffset = fMark;
3243: }
3244:
3245: public boolean markSupported() {
3246: return true;
3247: }
3248:
3249: public void close() throws IOException {
3250: if (fInputStream != null) {
3251: fInputStream.close();
3252: fInputStream = null;
3253: }
3254: }
3255: } // end of RewindableInputStream class
3256:
3257: } // class XMLEntityManager
|