0001: /*
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: */
0017:
0018: package org.apache.xerces.impl;
0019:
0020: import java.io.CharConversionException;
0021: import java.io.EOFException;
0022: import java.io.IOException;
0023:
0024: import org.apache.xerces.impl.io.MalformedByteSequenceException;
0025: import org.apache.xerces.impl.msg.XMLMessageFormatter;
0026: import org.apache.xerces.util.AugmentationsImpl;
0027: import org.apache.xerces.util.XMLAttributesImpl;
0028: import org.apache.xerces.util.XMLChar;
0029: import org.apache.xerces.util.XMLStringBuffer;
0030: import org.apache.xerces.util.XMLSymbols;
0031: import org.apache.xerces.xni.Augmentations;
0032: import org.apache.xerces.xni.QName;
0033: import org.apache.xerces.xni.XMLAttributes;
0034: import org.apache.xerces.xni.XMLDocumentHandler;
0035: import org.apache.xerces.xni.XMLResourceIdentifier;
0036: import org.apache.xerces.xni.XMLString;
0037: import org.apache.xerces.xni.XNIException;
0038: import org.apache.xerces.xni.parser.XMLComponent;
0039: import org.apache.xerces.xni.parser.XMLComponentManager;
0040: import org.apache.xerces.xni.parser.XMLConfigurationException;
0041: import org.apache.xerces.xni.parser.XMLDocumentScanner;
0042: import org.apache.xerces.xni.parser.XMLInputSource;
0043:
0044: /**
0045: * This class is responsible for scanning the structure and content
0046: * of document fragments. The scanner acts as the source for the
0047: * document information which is communicated to the document handler.
0048: * <p>
0049: * This component requires the following features and properties from the
0050: * component manager that uses it:
0051: * <ul>
0052: * <li>http://xml.org/sax/features/validation</li>
0053: * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
0054: * <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
0055: * <li>http://apache.org/xml/properties/internal/symbol-table</li>
0056: * <li>http://apache.org/xml/properties/internal/error-reporter</li>
0057: * <li>http://apache.org/xml/properties/internal/entity-manager</li>
0058: * </ul>
0059: *
0060: * @xerces.internal
0061: *
0062: * @author Glenn Marcy, IBM
0063: * @author Andy Clark, IBM
0064: * @author Arnaud Le Hors, IBM
0065: * @author Eric Ye, IBM
0066: *
0067: * @version $Id: XMLDocumentFragmentScannerImpl.java 572055 2007-09-02 17:55:43Z mrglavas $
0068: */
0069: public class XMLDocumentFragmentScannerImpl extends XMLScanner
0070: implements XMLDocumentScanner, XMLComponent, XMLEntityHandler {
0071:
0072: //
0073: // Constants
0074: //
0075:
0076: // scanner states
0077:
0078: /** Scanner state: start of markup. */
0079: protected static final int SCANNER_STATE_START_OF_MARKUP = 1;
0080:
0081: /** Scanner state: comment. */
0082: protected static final int SCANNER_STATE_COMMENT = 2;
0083:
0084: /** Scanner state: processing instruction. */
0085: protected static final int SCANNER_STATE_PI = 3;
0086:
0087: /** Scanner state: DOCTYPE. */
0088: protected static final int SCANNER_STATE_DOCTYPE = 4;
0089:
0090: /** Scanner state: root element. */
0091: protected static final int SCANNER_STATE_ROOT_ELEMENT = 6;
0092:
0093: /** Scanner state: content. */
0094: protected static final int SCANNER_STATE_CONTENT = 7;
0095:
0096: /** Scanner state: reference. */
0097: protected static final int SCANNER_STATE_REFERENCE = 8;
0098:
0099: /** Scanner state: end of input. */
0100: protected static final int SCANNER_STATE_END_OF_INPUT = 13;
0101:
0102: /** Scanner state: terminated. */
0103: protected static final int SCANNER_STATE_TERMINATED = 14;
0104:
0105: /** Scanner state: CDATA section. */
0106: protected static final int SCANNER_STATE_CDATA = 15;
0107:
0108: /** Scanner state: Text declaration. */
0109: protected static final int SCANNER_STATE_TEXT_DECL = 16;
0110:
0111: // feature identifiers
0112:
0113: /** Feature identifier: namespaces. */
0114: protected static final String NAMESPACES = Constants.SAX_FEATURE_PREFIX
0115: + Constants.NAMESPACES_FEATURE;
0116:
0117: /** Feature identifier: notify built-in refereces. */
0118: protected static final String NOTIFY_BUILTIN_REFS = Constants.XERCES_FEATURE_PREFIX
0119: + Constants.NOTIFY_BUILTIN_REFS_FEATURE;
0120:
0121: // property identifiers
0122:
0123: /** Property identifier: entity resolver. */
0124: protected static final String ENTITY_RESOLVER = Constants.XERCES_PROPERTY_PREFIX
0125: + Constants.ENTITY_RESOLVER_PROPERTY;
0126:
0127: // recognized features and properties
0128:
0129: /** Recognized features. */
0130: private static final String[] RECOGNIZED_FEATURES = { NAMESPACES,
0131: VALIDATION, NOTIFY_BUILTIN_REFS, NOTIFY_CHAR_REFS, };
0132:
0133: /** Feature defaults. */
0134: private static final Boolean[] FEATURE_DEFAULTS = { null, null,
0135: Boolean.FALSE, Boolean.FALSE, };
0136:
0137: /** Recognized properties. */
0138: private static final String[] RECOGNIZED_PROPERTIES = {
0139: SYMBOL_TABLE, ERROR_REPORTER, ENTITY_MANAGER,
0140: ENTITY_RESOLVER, };
0141:
0142: /** Property defaults. */
0143: private static final Object[] PROPERTY_DEFAULTS = { null, null,
0144: null, null, };
0145:
0146: // debugging
0147:
0148: /** Debug scanner state. */
0149: private static final boolean DEBUG_SCANNER_STATE = false;
0150:
0151: /** Debug dispatcher. */
0152: private static final boolean DEBUG_DISPATCHER = false;
0153:
0154: /** Debug content dispatcher scanning. */
0155: protected static final boolean DEBUG_CONTENT_SCANNING = false;
0156:
0157: //
0158: // Data
0159: //
0160:
0161: // protected data
0162:
0163: /** Document handler. */
0164: protected XMLDocumentHandler fDocumentHandler;
0165:
0166: /** Entity stack. */
0167: protected int[] fEntityStack = new int[4];
0168:
0169: /** Markup depth. */
0170: protected int fMarkupDepth;
0171:
0172: /** Scanner state. */
0173: protected int fScannerState;
0174:
0175: /** SubScanner state: inside scanContent method. */
0176: protected boolean fInScanContent = false;
0177:
0178: /** has external dtd */
0179: protected boolean fHasExternalDTD;
0180:
0181: /** Standalone. */
0182: protected boolean fStandalone;
0183:
0184: /** True if [Entity Declared] is a VC; false if it is a WFC. */
0185: protected boolean fIsEntityDeclaredVC;
0186:
0187: /** External subset resolver. **/
0188: protected ExternalSubsetResolver fExternalSubsetResolver;
0189:
0190: // element information
0191:
0192: /** Current element. */
0193: protected QName fCurrentElement;
0194:
0195: /** Element stack. */
0196: protected final ElementStack fElementStack = new ElementStack();
0197:
0198: // other info
0199:
0200: /** Document system identifier.
0201: * REVISIT: So what's this used for? - NG
0202: * protected String fDocumentSystemId;
0203: ******/
0204:
0205: // features
0206: /** Notify built-in references. */
0207: protected boolean fNotifyBuiltInRefs = false;
0208:
0209: // dispatchers
0210:
0211: /** Active dispatcher. */
0212: protected Dispatcher fDispatcher;
0213:
0214: /** Content dispatcher. */
0215: protected final Dispatcher fContentDispatcher = createContentDispatcher();
0216:
0217: // temporary variables
0218:
0219: /** Element QName. */
0220: protected final QName fElementQName = new QName();
0221:
0222: /** Attribute QName. */
0223: protected final QName fAttributeQName = new QName();
0224:
0225: /** Element attributes. */
0226: protected final XMLAttributesImpl fAttributes = new XMLAttributesImpl();
0227:
0228: /** String. */
0229: protected final XMLString fTempString = new XMLString();
0230:
0231: /** String. */
0232: protected final XMLString fTempString2 = new XMLString();
0233:
0234: /** Array of 3 strings. */
0235: private final String[] fStrings = new String[3];
0236:
0237: /** String buffer. */
0238: private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
0239:
0240: /** String buffer. */
0241: private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
0242:
0243: /** Another QName. */
0244: private final QName fQName = new QName();
0245:
0246: /** Single character array. */
0247: private final char[] fSingleChar = new char[1];
0248:
0249: /**
0250: * Saw spaces after element name or between attributes.
0251: *
0252: * This is reserved for the case where scanning of a start element spans
0253: * several methods, as is the case when scanning the start of a root element
0254: * where a DTD external subset may be read after scanning the element name.
0255: */
0256: private boolean fSawSpace;
0257:
0258: /** Reusable Augmentations. */
0259: private Augmentations fTempAugmentations = null;
0260:
0261: //
0262: // Constructors
0263: //
0264:
0265: /** Default constructor. */
0266: public XMLDocumentFragmentScannerImpl() {
0267: } // <init>()
0268:
0269: //
0270: // XMLDocumentScanner methods
0271: //
0272:
0273: /**
0274: * Sets the input source.
0275: *
0276: * @param inputSource The input source.
0277: *
0278: * @throws IOException Thrown on i/o error.
0279: */
0280: public void setInputSource(XMLInputSource inputSource)
0281: throws IOException {
0282: fEntityManager.setEntityHandler(this );
0283: fEntityManager.startEntity("$fragment$", inputSource, false,
0284: true);
0285: //fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
0286: } // setInputSource(XMLInputSource)
0287:
0288: /**
0289: * Scans a document.
0290: *
0291: * @param complete True if the scanner should scan the document
0292: * completely, pushing all events to the registered
0293: * document handler. A value of false indicates that
0294: * that the scanner should only scan the next portion
0295: * of the document and return. A scanner instance is
0296: * permitted to completely scan a document if it does
0297: * not support this "pull" scanning model.
0298: *
0299: * @return True if there is more to scan, false otherwise.
0300: */
0301: public boolean scanDocument(boolean complete) throws IOException,
0302: XNIException {
0303:
0304: // reset entity scanner
0305: fEntityScanner = fEntityManager.getEntityScanner();
0306:
0307: // keep dispatching "events"
0308: fEntityManager.setEntityHandler(this );
0309: do {
0310: if (!fDispatcher.dispatch(complete)) {
0311: return false;
0312: }
0313: } while (complete);
0314:
0315: // return success
0316: return true;
0317:
0318: } // scanDocument(boolean):boolean
0319:
0320: //
0321: // XMLComponent methods
0322: //
0323:
0324: /**
0325: * Resets the component. The component can query the component manager
0326: * about any features and properties that affect the operation of the
0327: * component.
0328: *
0329: * @param componentManager The component manager.
0330: *
0331: * @throws SAXException Thrown by component on initialization error.
0332: * For example, if a feature or property is
0333: * required for the operation of the component, the
0334: * component manager may throw a
0335: * SAXNotRecognizedException or a
0336: * SAXNotSupportedException.
0337: */
0338: public void reset(XMLComponentManager componentManager)
0339: throws XMLConfigurationException {
0340:
0341: super .reset(componentManager);
0342:
0343: // other settings
0344: //fDocumentSystemId = null;
0345:
0346: // sax features
0347: fAttributes.setNamespaces(fNamespaces);
0348:
0349: // initialize vars
0350: fMarkupDepth = 0;
0351: fCurrentElement = null;
0352: fElementStack.clear();
0353: fHasExternalDTD = false;
0354: fStandalone = false;
0355: fIsEntityDeclaredVC = false;
0356: fInScanContent = false;
0357:
0358: // setup dispatcher
0359: setScannerState(SCANNER_STATE_CONTENT);
0360: setDispatcher(fContentDispatcher);
0361:
0362: if (fParserSettings) {
0363: // parser settings have changed. reset them.
0364:
0365: // xerces features
0366: try {
0367: fNotifyBuiltInRefs = componentManager
0368: .getFeature(NOTIFY_BUILTIN_REFS);
0369: } catch (XMLConfigurationException e) {
0370: fNotifyBuiltInRefs = false;
0371: }
0372:
0373: // xerces properties
0374: try {
0375: Object resolver = componentManager
0376: .getProperty(ENTITY_RESOLVER);
0377: fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? (ExternalSubsetResolver) resolver
0378: : null;
0379: } catch (XMLConfigurationException e) {
0380: fExternalSubsetResolver = null;
0381: }
0382: }
0383:
0384: } // reset(XMLComponentManager)
0385:
0386: /**
0387: * Returns a list of feature identifiers that are recognized by
0388: * this component. This method may return null if no features
0389: * are recognized by this component.
0390: */
0391: public String[] getRecognizedFeatures() {
0392: return (String[]) (RECOGNIZED_FEATURES.clone());
0393: } // getRecognizedFeatures():String[]
0394:
0395: /**
0396: * Sets the state of a feature. This method is called by the component
0397: * manager any time after reset when a feature changes state.
0398: * <p>
0399: * <strong>Note:</strong> Components should silently ignore features
0400: * that do not affect the operation of the component.
0401: *
0402: * @param featureId The feature identifier.
0403: * @param state The state of the feature.
0404: *
0405: * @throws SAXNotRecognizedException The component should not throw
0406: * this exception.
0407: * @throws SAXNotSupportedException The component should not throw
0408: * this exception.
0409: */
0410: public void setFeature(String featureId, boolean state)
0411: throws XMLConfigurationException {
0412:
0413: super .setFeature(featureId, state);
0414:
0415: // Xerces properties
0416: if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
0417: final int suffixLength = featureId.length()
0418: - Constants.XERCES_FEATURE_PREFIX.length();
0419: if (suffixLength == Constants.NOTIFY_BUILTIN_REFS_FEATURE
0420: .length()
0421: && featureId
0422: .endsWith(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) {
0423: fNotifyBuiltInRefs = state;
0424: }
0425: }
0426:
0427: } // setFeature(String,boolean)
0428:
0429: /**
0430: * Returns a list of property identifiers that are recognized by
0431: * this component. This method may return null if no properties
0432: * are recognized by this component.
0433: */
0434: public String[] getRecognizedProperties() {
0435: return (String[]) (RECOGNIZED_PROPERTIES.clone());
0436: } // getRecognizedProperties():String[]
0437:
0438: /**
0439: * Sets the value of a property. This method is called by the component
0440: * manager any time after reset when a property changes value.
0441: * <p>
0442: * <strong>Note:</strong> Components should silently ignore properties
0443: * that do not affect the operation of the component.
0444: *
0445: * @param propertyId The property identifier.
0446: * @param value The value of the property.
0447: *
0448: * @throws SAXNotRecognizedException The component should not throw
0449: * this exception.
0450: * @throws SAXNotSupportedException The component should not throw
0451: * this exception.
0452: */
0453: public void setProperty(String propertyId, Object value)
0454: throws XMLConfigurationException {
0455:
0456: super .setProperty(propertyId, value);
0457:
0458: // Xerces properties
0459: if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
0460: final int suffixLength = propertyId.length()
0461: - Constants.XERCES_PROPERTY_PREFIX.length();
0462: if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY
0463: .length()
0464: && propertyId
0465: .endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
0466: fEntityManager = (XMLEntityManager) value;
0467: return;
0468: }
0469: if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY
0470: .length()
0471: && propertyId
0472: .endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) {
0473: fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? (ExternalSubsetResolver) value
0474: : null;
0475: return;
0476: }
0477: }
0478:
0479: } // setProperty(String,Object)
0480:
0481: /**
0482: * Returns the default state for a feature, or null if this
0483: * component does not want to report a default value for this
0484: * feature.
0485: *
0486: * @param featureId The feature identifier.
0487: *
0488: * @since Xerces 2.2.0
0489: */
0490: public Boolean getFeatureDefault(String featureId) {
0491: for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
0492: if (RECOGNIZED_FEATURES[i].equals(featureId)) {
0493: return FEATURE_DEFAULTS[i];
0494: }
0495: }
0496: return null;
0497: } // getFeatureDefault(String):Boolean
0498:
0499: /**
0500: * Returns the default state for a property, or null if this
0501: * component does not want to report a default value for this
0502: * property.
0503: *
0504: * @param propertyId The property identifier.
0505: *
0506: * @since Xerces 2.2.0
0507: */
0508: public Object getPropertyDefault(String propertyId) {
0509: for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
0510: if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
0511: return PROPERTY_DEFAULTS[i];
0512: }
0513: }
0514: return null;
0515: } // getPropertyDefault(String):Object
0516:
0517: //
0518: // XMLDocumentSource methods
0519: //
0520:
0521: /**
0522: * setDocumentHandler
0523: *
0524: * @param documentHandler
0525: */
0526: public void setDocumentHandler(XMLDocumentHandler documentHandler) {
0527: fDocumentHandler = documentHandler;
0528: } // setDocumentHandler(XMLDocumentHandler)
0529:
0530: /** Returns the document handler */
0531: public XMLDocumentHandler getDocumentHandler() {
0532: return fDocumentHandler;
0533: }
0534:
0535: //
0536: // XMLEntityHandler methods
0537: //
0538:
0539: /**
0540: * This method notifies of the start of an entity. The DTD has the
0541: * pseudo-name of "[dtd]" parameter entity names start with '%'; and
0542: * general entities are just specified by their name.
0543: *
0544: * @param name The name of the entity.
0545: * @param identifier The resource identifier.
0546: * @param encoding The auto-detected IANA encoding name of the entity
0547: * stream. This value will be null in those situations
0548: * where the entity encoding is not auto-detected (e.g.
0549: * internal entities or a document entity that is
0550: * parsed from a java.io.Reader).
0551: * @param augs Additional information that may include infoset augmentations
0552: *
0553: * @throws XNIException Thrown by handler to signal an error.
0554: */
0555: public void startEntity(String name,
0556: XMLResourceIdentifier identifier, String encoding,
0557: Augmentations augs) throws XNIException {
0558:
0559: // keep track of this entity before fEntityDepth is increased
0560: if (fEntityDepth == fEntityStack.length) {
0561: int[] entityarray = new int[fEntityStack.length * 2];
0562: System.arraycopy(fEntityStack, 0, entityarray, 0,
0563: fEntityStack.length);
0564: fEntityStack = entityarray;
0565: }
0566: fEntityStack[fEntityDepth] = fMarkupDepth;
0567:
0568: super .startEntity(name, identifier, encoding, augs);
0569:
0570: // WFC: entity declared in external subset in standalone doc
0571: if (fStandalone
0572: && fEntityManager.isEntityDeclInExternalSubset(name)) {
0573: reportFatalError(
0574: "MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE",
0575: new Object[] { name });
0576: }
0577:
0578: // call handler
0579: if (fDocumentHandler != null && !fScanningAttribute) {
0580: if (!name.equals("[xml]")) {
0581: fDocumentHandler.startGeneralEntity(name, identifier,
0582: encoding, augs);
0583: }
0584: }
0585:
0586: } // startEntity(String,XMLResourceIdentifier,String)
0587:
0588: /**
0589: * This method notifies the end of an entity. The DTD has the pseudo-name
0590: * of "[dtd]" parameter entity names start with '%'; and general entities
0591: * are just specified by their name.
0592: *
0593: * @param name The name of the entity.
0594: * @param augs Additional information that may include infoset augmentations
0595: *
0596: * @throws XNIException Thrown by handler to signal an error.
0597: */
0598: public void endEntity(String name, Augmentations augs)
0599: throws XNIException {
0600:
0601: // flush possible pending output buffer - see scanContent
0602: if (fInScanContent && fStringBuffer.length != 0
0603: && fDocumentHandler != null) {
0604: fDocumentHandler.characters(fStringBuffer, null);
0605: fStringBuffer.length = 0; // make sure we know it's been flushed
0606: }
0607:
0608: super .endEntity(name, augs);
0609:
0610: // make sure markup is properly balanced
0611: if (fMarkupDepth != fEntityStack[fEntityDepth]) {
0612: reportFatalError("MarkupEntityMismatch", null);
0613: }
0614:
0615: // call handler
0616: if (fDocumentHandler != null && !fScanningAttribute) {
0617: if (!name.equals("[xml]")) {
0618: fDocumentHandler.endGeneralEntity(name, augs);
0619: }
0620: }
0621:
0622: } // endEntity(String)
0623:
0624: //
0625: // Protected methods
0626: //
0627:
0628: // dispatcher factory methods
0629:
0630: /** Creates a content dispatcher. */
0631: protected Dispatcher createContentDispatcher() {
0632: return new FragmentContentDispatcher();
0633: } // createContentDispatcher():Dispatcher
0634:
0635: // scanning methods
0636:
0637: /**
0638: * Scans an XML or text declaration.
0639: * <p>
0640: * <pre>
0641: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
0642: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
0643: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
0644: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
0645: * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
0646: * | ('"' ('yes' | 'no') '"'))
0647: *
0648: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
0649: * </pre>
0650: *
0651: * @param scanningTextDecl True if a text declaration is to
0652: * be scanned instead of an XML
0653: * declaration.
0654: */
0655: protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
0656: throws IOException, XNIException {
0657:
0658: // scan decl
0659: super .scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
0660: fMarkupDepth--;
0661:
0662: // pseudo-attribute values
0663: String version = fStrings[0];
0664: String encoding = fStrings[1];
0665: String standalone = fStrings[2];
0666:
0667: // set standalone
0668: fStandalone = standalone != null && standalone.equals("yes");
0669: fEntityManager.setStandalone(fStandalone);
0670:
0671: // set version on reader
0672: fEntityScanner.setXMLVersion(version);
0673:
0674: // call handler
0675: if (fDocumentHandler != null) {
0676: if (scanningTextDecl) {
0677: fDocumentHandler.textDecl(version, encoding, null);
0678: } else {
0679: fDocumentHandler.xmlDecl(version, encoding, standalone,
0680: null);
0681: }
0682: }
0683:
0684: // set encoding on reader
0685: if (encoding != null
0686: && !fEntityScanner.fCurrentEntity
0687: .isEncodingExternallySpecified()) {
0688: fEntityScanner.setEncoding(encoding);
0689: }
0690:
0691: } // scanXMLDeclOrTextDecl(boolean)
0692:
0693: /**
0694: * Scans a processing data. This is needed to handle the situation
0695: * where a document starts with a processing instruction whose
0696: * target name <em>starts with</em> "xml". (e.g. xmlfoo)
0697: *
0698: * @param target The PI target
0699: * @param data The string to fill in with the data
0700: */
0701: protected void scanPIData(String target, XMLString data)
0702: throws IOException, XNIException {
0703:
0704: super .scanPIData(target, data);
0705: fMarkupDepth--;
0706:
0707: // call handler
0708: if (fDocumentHandler != null) {
0709: fDocumentHandler.processingInstruction(target, data, null);
0710: }
0711:
0712: } // scanPIData(String)
0713:
0714: /**
0715: * Scans a comment.
0716: * <p>
0717: * <pre>
0718: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
0719: * </pre>
0720: * <p>
0721: * <strong>Note:</strong> Called after scanning past '<!--'
0722: */
0723: protected void scanComment() throws IOException, XNIException {
0724:
0725: scanComment(fStringBuffer);
0726: fMarkupDepth--;
0727:
0728: // call handler
0729: if (fDocumentHandler != null) {
0730: fDocumentHandler.comment(fStringBuffer, null);
0731: }
0732:
0733: } // scanComment()
0734:
0735: /**
0736: * Scans a start element. This method will handle the binding of
0737: * namespace information and notifying the handler of the start
0738: * of the element.
0739: * <p>
0740: * <pre>
0741: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
0742: * [40] STag ::= '<' Name (S Attribute)* S? '>'
0743: * </pre>
0744: * <p>
0745: * <strong>Note:</strong> This method assumes that the leading
0746: * '<' character has been consumed.
0747: * <p>
0748: * <strong>Note:</strong> This method uses the fElementQName and
0749: * fAttributes variables. The contents of these variables will be
0750: * destroyed. The caller should copy important information out of
0751: * these variables before calling this method.
0752: *
0753: * @return True if element is empty. (i.e. It matches
0754: * production [44].
0755: */
0756: protected boolean scanStartElement() throws IOException,
0757: XNIException {
0758: if (DEBUG_CONTENT_SCANNING)
0759: System.out.println(">>> scanStartElement()");
0760:
0761: // name
0762: if (fNamespaces) {
0763: fEntityScanner.scanQName(fElementQName);
0764: } else {
0765: String name = fEntityScanner.scanName();
0766: fElementQName.setValues(null, name, name, null);
0767: }
0768: String rawname = fElementQName.rawname;
0769:
0770: // push element stack
0771: fCurrentElement = fElementStack.pushElement(fElementQName);
0772:
0773: // attributes
0774: boolean empty = false;
0775: fAttributes.removeAllAttributes();
0776: do {
0777: // spaces
0778: boolean sawSpace = fEntityScanner.skipSpaces();
0779:
0780: // end tag?
0781: int c = fEntityScanner.peekChar();
0782: if (c == '>') {
0783: fEntityScanner.scanChar();
0784: break;
0785: } else if (c == '/') {
0786: fEntityScanner.scanChar();
0787: if (!fEntityScanner.skipChar('>')) {
0788: reportFatalError("ElementUnterminated",
0789: new Object[] { rawname });
0790: }
0791: empty = true;
0792: break;
0793: } else if (!isValidNameStartChar(c) || !sawSpace) {
0794: // Second chance. Check if this character is a high
0795: // surrogate of a valid name start character.
0796: if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
0797: reportFatalError("ElementUnterminated",
0798: new Object[] { rawname });
0799: }
0800: }
0801:
0802: // attributes
0803: scanAttribute(fAttributes);
0804:
0805: } while (true);
0806:
0807: // call handler
0808: if (fDocumentHandler != null) {
0809: if (empty) {
0810:
0811: //decrease the markup depth..
0812: fMarkupDepth--;
0813: // check that this element was opened in the same entity
0814: if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
0815: reportFatalError("ElementEntityMismatch",
0816: new Object[] { fCurrentElement.rawname });
0817: }
0818:
0819: fDocumentHandler.emptyElement(fElementQName,
0820: fAttributes, null);
0821:
0822: //pop the element off the stack..
0823: fElementStack.popElement(fElementQName);
0824: } else {
0825: fDocumentHandler.startElement(fElementQName,
0826: fAttributes, null);
0827: }
0828: }
0829:
0830: if (DEBUG_CONTENT_SCANNING)
0831: System.out.println("<<< scanStartElement(): " + empty);
0832: return empty;
0833:
0834: } // scanStartElement():boolean
0835:
0836: /**
0837: * Scans the name of an element in a start or empty tag.
0838: *
0839: * @see #scanStartElement()
0840: */
0841: protected void scanStartElementName() throws IOException,
0842: XNIException {
0843: // name
0844: if (fNamespaces) {
0845: fEntityScanner.scanQName(fElementQName);
0846: } else {
0847: String name = fEntityScanner.scanName();
0848: fElementQName.setValues(null, name, name, null);
0849: }
0850: // Must skip spaces here because the DTD scanner
0851: // would consume them at the end of the external subset.
0852: fSawSpace = fEntityScanner.skipSpaces();
0853: } // scanStartElementName()
0854:
0855: /**
0856: * Scans the remainder of a start or empty tag after the element name.
0857: *
0858: * @see #scanStartElement
0859: * @return True if element is empty.
0860: */
0861: protected boolean scanStartElementAfterName() throws IOException,
0862: XNIException {
0863: String rawname = fElementQName.rawname;
0864:
0865: // push element stack
0866: fCurrentElement = fElementStack.pushElement(fElementQName);
0867:
0868: // attributes
0869: boolean empty = false;
0870: fAttributes.removeAllAttributes();
0871: do {
0872:
0873: // end tag?
0874: int c = fEntityScanner.peekChar();
0875: if (c == '>') {
0876: fEntityScanner.scanChar();
0877: break;
0878: } else if (c == '/') {
0879: fEntityScanner.scanChar();
0880: if (!fEntityScanner.skipChar('>')) {
0881: reportFatalError("ElementUnterminated",
0882: new Object[] { rawname });
0883: }
0884: empty = true;
0885: break;
0886: } else if (!isValidNameStartChar(c) || !fSawSpace) {
0887: // Second chance. Check if this character is a high
0888: // surrogate of a valid name start character.
0889: if (!isValidNameStartHighSurrogate(c) || !fSawSpace) {
0890: reportFatalError("ElementUnterminated",
0891: new Object[] { rawname });
0892: }
0893: }
0894:
0895: // attributes
0896: scanAttribute(fAttributes);
0897:
0898: // spaces
0899: fSawSpace = fEntityScanner.skipSpaces();
0900:
0901: } while (true);
0902:
0903: // call handler
0904: if (fDocumentHandler != null) {
0905: if (empty) {
0906:
0907: //decrease the markup depth..
0908: fMarkupDepth--;
0909: // check that this element was opened in the same entity
0910: if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
0911: reportFatalError("ElementEntityMismatch",
0912: new Object[] { fCurrentElement.rawname });
0913: }
0914:
0915: fDocumentHandler.emptyElement(fElementQName,
0916: fAttributes, null);
0917:
0918: //pop the element off the stack..
0919: fElementStack.popElement(fElementQName);
0920: } else {
0921: fDocumentHandler.startElement(fElementQName,
0922: fAttributes, null);
0923: }
0924: }
0925:
0926: if (DEBUG_CONTENT_SCANNING)
0927: System.out.println("<<< scanStartElementAfterName(): "
0928: + empty);
0929: return empty;
0930: } // scanStartElementAfterName()
0931:
0932: /**
0933: * Scans an attribute.
0934: * <p>
0935: * <pre>
0936: * [41] Attribute ::= Name Eq AttValue
0937: * </pre>
0938: * <p>
0939: * <strong>Note:</strong> This method assumes that the next
0940: * character on the stream is the first character of the attribute
0941: * name.
0942: * <p>
0943: * <strong>Note:</strong> This method uses the fAttributeQName and
0944: * fQName variables. The contents of these variables will be
0945: * destroyed.
0946: *
0947: * @param attributes The attributes list for the scanned attribute.
0948: */
0949: protected void scanAttribute(XMLAttributes attributes)
0950: throws IOException, XNIException {
0951: if (DEBUG_CONTENT_SCANNING)
0952: System.out.println(">>> scanAttribute()");
0953:
0954: // name
0955: if (fNamespaces) {
0956: fEntityScanner.scanQName(fAttributeQName);
0957: } else {
0958: String name = fEntityScanner.scanName();
0959: fAttributeQName.setValues(null, name, name, null);
0960: }
0961:
0962: // equals
0963: fEntityScanner.skipSpaces();
0964: if (!fEntityScanner.skipChar('=')) {
0965: reportFatalError("EqRequiredInAttribute", new Object[] {
0966: fCurrentElement.rawname, fAttributeQName.rawname });
0967: }
0968: fEntityScanner.skipSpaces();
0969:
0970: // content
0971: int oldLen = attributes.getLength();
0972: int attrIndex = attributes.addAttribute(fAttributeQName,
0973: XMLSymbols.fCDATASymbol, null);
0974:
0975: // WFC: Unique Att Spec
0976: if (oldLen == attributes.getLength()) {
0977: reportFatalError("AttributeNotUnique", new Object[] {
0978: fCurrentElement.rawname, fAttributeQName.rawname });
0979: }
0980:
0981: // Scan attribute value and return true if the un-normalized and normalized value are the same
0982: boolean isSameNormalizedAttr = scanAttributeValue(fTempString,
0983: fTempString2, fAttributeQName.rawname,
0984: fIsEntityDeclaredVC, fCurrentElement.rawname);
0985:
0986: attributes.setValue(attrIndex, fTempString.toString());
0987: // If the non-normalized and normalized value are the same, avoid creating a new string.
0988: if (!isSameNormalizedAttr) {
0989: attributes.setNonNormalizedValue(attrIndex, fTempString2
0990: .toString());
0991: }
0992: attributes.setSpecified(attrIndex, true);
0993:
0994: if (DEBUG_CONTENT_SCANNING)
0995: System.out.println("<<< scanAttribute()");
0996: } // scanAttribute(XMLAttributes)
0997:
0998: /**
0999: * Scans element content.
1000: *
1001: * @return Returns the next character on the stream.
1002: */
1003: protected int scanContent() throws IOException, XNIException {
1004:
1005: XMLString content = fTempString;
1006: int c = fEntityScanner.scanContent(content);
1007: if (c == '\r') {
1008: // happens when there is the character reference
1009: fEntityScanner.scanChar();
1010: fStringBuffer.clear();
1011: fStringBuffer.append(fTempString);
1012: fStringBuffer.append((char) c);
1013: content = fStringBuffer;
1014: c = -1;
1015: }
1016: if (fDocumentHandler != null && content.length > 0) {
1017: fDocumentHandler.characters(content, null);
1018: }
1019:
1020: if (c == ']' && fTempString.length == 0) {
1021: fStringBuffer.clear();
1022: fStringBuffer.append((char) fEntityScanner.scanChar());
1023: // remember where we are in case we get an endEntity before we
1024: // could flush the buffer out - this happens when we're parsing an
1025: // entity which ends with a ]
1026: fInScanContent = true;
1027: //
1028: // We work on a single character basis to handle cases such as:
1029: // ']]]>' which we might otherwise miss.
1030: //
1031: if (fEntityScanner.skipChar(']')) {
1032: fStringBuffer.append(']');
1033: while (fEntityScanner.skipChar(']')) {
1034: fStringBuffer.append(']');
1035: }
1036: if (fEntityScanner.skipChar('>')) {
1037: reportFatalError("CDEndInContent", null);
1038: }
1039: }
1040: if (fDocumentHandler != null && fStringBuffer.length != 0) {
1041: fDocumentHandler.characters(fStringBuffer, null);
1042: }
1043: fInScanContent = false;
1044: c = -1;
1045: }
1046: return c;
1047:
1048: } // scanContent():int
1049:
1050: /**
1051: * Scans a CDATA section.
1052: * <p>
1053: * <strong>Note:</strong> This method uses the fTempString and
1054: * fStringBuffer variables.
1055: *
1056: * @param complete True if the CDATA section is to be scanned
1057: * completely.
1058: *
1059: * @return True if CDATA is completely scanned.
1060: */
1061: protected boolean scanCDATASection(boolean complete)
1062: throws IOException, XNIException {
1063:
1064: // call handler
1065: if (fDocumentHandler != null) {
1066: fDocumentHandler.startCDATA(null);
1067: }
1068:
1069: while (true) {
1070: fStringBuffer.clear();
1071: if (!fEntityScanner.scanData("]]", fStringBuffer)) {
1072: if (fDocumentHandler != null
1073: && fStringBuffer.length > 0) {
1074: fDocumentHandler.characters(fStringBuffer, null);
1075: }
1076: int brackets = 0;
1077: while (fEntityScanner.skipChar(']')) {
1078: brackets++;
1079: }
1080: if (fDocumentHandler != null && brackets > 0) {
1081: fStringBuffer.clear();
1082: if (brackets > XMLEntityManager.DEFAULT_BUFFER_SIZE) {
1083: // Handle large sequences of ']'
1084: int chunks = brackets
1085: / XMLEntityManager.DEFAULT_BUFFER_SIZE;
1086: int remainder = brackets
1087: % XMLEntityManager.DEFAULT_BUFFER_SIZE;
1088: for (int i = 0; i < XMLEntityManager.DEFAULT_BUFFER_SIZE; i++) {
1089: fStringBuffer.append(']');
1090: }
1091: for (int i = 0; i < chunks; i++) {
1092: fDocumentHandler.characters(fStringBuffer,
1093: null);
1094: }
1095: if (remainder != 0) {
1096: fStringBuffer.length = remainder;
1097: fDocumentHandler.characters(fStringBuffer,
1098: null);
1099: }
1100: } else {
1101: for (int i = 0; i < brackets; i++) {
1102: fStringBuffer.append(']');
1103: }
1104: fDocumentHandler
1105: .characters(fStringBuffer, null);
1106: }
1107: }
1108: if (fEntityScanner.skipChar('>')) {
1109: break;
1110: }
1111: if (fDocumentHandler != null) {
1112: fStringBuffer.clear();
1113: fStringBuffer.append("]]");
1114: fDocumentHandler.characters(fStringBuffer, null);
1115: }
1116: } else {
1117: if (fDocumentHandler != null) {
1118: fDocumentHandler.characters(fStringBuffer, null);
1119: }
1120: int c = fEntityScanner.peekChar();
1121: if (c != -1 && isInvalidLiteral(c)) {
1122: if (XMLChar.isHighSurrogate(c)) {
1123: fStringBuffer.clear();
1124: scanSurrogates(fStringBuffer);
1125: if (fDocumentHandler != null) {
1126: fDocumentHandler.characters(fStringBuffer,
1127: null);
1128: }
1129: } else {
1130: reportFatalError(
1131: "InvalidCharInCDSect",
1132: new Object[] { Integer.toString(c, 16) });
1133: fEntityScanner.scanChar();
1134: }
1135: }
1136: }
1137: }
1138: fMarkupDepth--;
1139:
1140: // call handler
1141: if (fDocumentHandler != null) {
1142: fDocumentHandler.endCDATA(null);
1143: }
1144:
1145: return true;
1146:
1147: } // scanCDATASection(boolean):boolean
1148:
1149: /**
1150: * Scans an end element.
1151: * <p>
1152: * <pre>
1153: * [42] ETag ::= '</' Name S? '>'
1154: * </pre>
1155: * <p>
1156: * <strong>Note:</strong> This method uses the fElementQName variable.
1157: * The contents of this variable will be destroyed. The caller should
1158: * copy the needed information out of this variable before calling
1159: * this method.
1160: *
1161: * @return The element depth.
1162: */
1163: protected int scanEndElement() throws IOException, XNIException {
1164: if (DEBUG_CONTENT_SCANNING)
1165: System.out.println(">>> scanEndElement()");
1166:
1167: fElementStack.popElement(fElementQName);
1168:
1169: // Take advantage of the fact that next string _should_ be "fElementQName.rawName",
1170: //In scanners most of the time is consumed on checks done for XML characters, we can
1171: // optimize on it and avoid the checks done for endElement,
1172: //we will also avoid symbol table lookup - neeraj.bajaj@sun.com
1173:
1174: // this should work both for namespace processing true or false...
1175:
1176: //REVISIT: if the string is not the same as expected.. we need to do better error handling..
1177: //We can skip this for now... In any case if the string doesn't match -- document is not well formed.
1178: if (!fEntityScanner.skipString(fElementQName.rawname)) {
1179: reportFatalError("ETagRequired",
1180: new Object[] { fElementQName.rawname });
1181: }
1182:
1183: // end
1184: fEntityScanner.skipSpaces();
1185: if (!fEntityScanner.skipChar('>')) {
1186: reportFatalError("ETagUnterminated",
1187: new Object[] { fElementQName.rawname });
1188: }
1189: fMarkupDepth--;
1190:
1191: //we have increased the depth for two markup "<" characters
1192: fMarkupDepth--;
1193:
1194: // check that this element was opened in the same entity
1195: if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
1196: reportFatalError("ElementEntityMismatch",
1197: new Object[] { fCurrentElement.rawname });
1198: }
1199:
1200: // call handler
1201: if (fDocumentHandler != null) {
1202: fDocumentHandler.endElement(fElementQName, null);
1203: }
1204:
1205: return fMarkupDepth;
1206:
1207: } // scanEndElement():int
1208:
1209: /**
1210: * Scans a character reference.
1211: * <p>
1212: * <pre>
1213: * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1214: * </pre>
1215: */
1216: protected void scanCharReference() throws IOException, XNIException {
1217:
1218: fStringBuffer2.clear();
1219: int ch = scanCharReferenceValue(fStringBuffer2, null);
1220: fMarkupDepth--;
1221: if (ch != -1) {
1222: // call handler
1223: if (fDocumentHandler != null) {
1224: if (fNotifyCharRefs) {
1225: fDocumentHandler.startGeneralEntity(
1226: fCharRefLiteral, null, null, null);
1227: }
1228: Augmentations augs = null;
1229: if (fValidation && ch <= 0x20) {
1230: if (fTempAugmentations != null) {
1231: fTempAugmentations.removeAllItems();
1232: } else {
1233: fTempAugmentations = new AugmentationsImpl();
1234: }
1235: augs = fTempAugmentations;
1236: augs.putItem(Constants.CHAR_REF_PROBABLE_WS,
1237: Boolean.TRUE);
1238: }
1239: fDocumentHandler.characters(fStringBuffer2, augs);
1240: if (fNotifyCharRefs) {
1241: fDocumentHandler.endGeneralEntity(fCharRefLiteral,
1242: null);
1243: }
1244: }
1245: }
1246:
1247: } // scanCharReference()
1248:
1249: /**
1250: * Scans an entity reference.
1251: *
1252: * @throws IOException Thrown if i/o error occurs.
1253: * @throws XNIException Thrown if handler throws exception upon
1254: * notification.
1255: */
1256: protected void scanEntityReference() throws IOException,
1257: XNIException {
1258:
1259: // name
1260: String name = fEntityScanner.scanName();
1261: if (name == null) {
1262: reportFatalError("NameRequiredInReference", null);
1263: return;
1264: }
1265:
1266: // end
1267: if (!fEntityScanner.skipChar(';')) {
1268: reportFatalError("SemicolonRequiredInReference",
1269: new Object[] { name });
1270: }
1271: fMarkupDepth--;
1272:
1273: // handle built-in entities
1274: if (name == fAmpSymbol) {
1275: handleCharacter('&', fAmpSymbol);
1276: } else if (name == fLtSymbol) {
1277: handleCharacter('<', fLtSymbol);
1278: } else if (name == fGtSymbol) {
1279: handleCharacter('>', fGtSymbol);
1280: } else if (name == fQuotSymbol) {
1281: handleCharacter('"', fQuotSymbol);
1282: } else if (name == fAposSymbol) {
1283: handleCharacter('\'', fAposSymbol);
1284: }
1285: // start general entity
1286: else if (fEntityManager.isUnparsedEntity(name)) {
1287: reportFatalError("ReferenceToUnparsedEntity",
1288: new Object[] { name });
1289: } else {
1290: if (!fEntityManager.isDeclaredEntity(name)) {
1291: if (fIsEntityDeclaredVC) {
1292: if (fValidation)
1293: fErrorReporter.reportError(
1294: XMLMessageFormatter.XML_DOMAIN,
1295: "EntityNotDeclared",
1296: new Object[] { name },
1297: XMLErrorReporter.SEVERITY_ERROR);
1298: } else {
1299: reportFatalError("EntityNotDeclared",
1300: new Object[] { name });
1301: }
1302: }
1303: fEntityManager.startEntity(name, false);
1304: }
1305:
1306: } // scanEntityReference()
1307:
1308: // utility methods
1309:
1310: /**
1311: * Calls document handler with a single character resulting from
1312: * built-in entity resolution.
1313: *
1314: * @param c
1315: * @param entity built-in name
1316: */
1317: private void handleCharacter(char c, String entity)
1318: throws XNIException {
1319: if (fDocumentHandler != null) {
1320: if (fNotifyBuiltInRefs) {
1321: fDocumentHandler.startGeneralEntity(entity, null, null,
1322: null);
1323: }
1324:
1325: fSingleChar[0] = c;
1326: fTempString.setValues(fSingleChar, 0, 1);
1327: fDocumentHandler.characters(fTempString, null);
1328:
1329: if (fNotifyBuiltInRefs) {
1330: fDocumentHandler.endGeneralEntity(entity, null);
1331: }
1332: }
1333: } // handleCharacter(char)
1334:
1335: /**
1336: * Handles the end element. This method will make sure that
1337: * the end element name matches the current element and notify
1338: * the handler about the end of the element and the end of any
1339: * relevent prefix mappings.
1340: * <p>
1341: * <strong>Note:</strong> This method uses the fQName variable.
1342: * The contents of this variable will be destroyed.
1343: *
1344: * @param element The element.
1345: *
1346: * @return The element depth.
1347: *
1348: * @throws XNIException Thrown if the handler throws a SAX exception
1349: * upon notification.
1350: *
1351: */
1352: // REVISIT: need to remove this method. It's not called anymore, because
1353: // the handling is done when the end tag is scanned. - SG
1354: protected int handleEndElement(QName element, boolean isEmpty)
1355: throws XNIException {
1356:
1357: fMarkupDepth--;
1358: // check that this element was opened in the same entity
1359: if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
1360: reportFatalError("ElementEntityMismatch",
1361: new Object[] { fCurrentElement.rawname });
1362: }
1363: // make sure the elements match
1364: QName startElement = fQName;
1365: fElementStack.popElement(startElement);
1366: if (element.rawname != startElement.rawname) {
1367: reportFatalError("ETagRequired",
1368: new Object[] { startElement.rawname });
1369: }
1370:
1371: // bind namespaces
1372: if (fNamespaces) {
1373: element.uri = startElement.uri;
1374: }
1375:
1376: // call handler
1377: if (fDocumentHandler != null && !isEmpty) {
1378: fDocumentHandler.endElement(element, null);
1379: }
1380:
1381: return fMarkupDepth;
1382:
1383: } // callEndElement(QName,boolean):int
1384:
1385: // helper methods
1386:
1387: /**
1388: * Sets the scanner state.
1389: *
1390: * @param state The new scanner state.
1391: */
1392: protected final void setScannerState(int state) {
1393:
1394: fScannerState = state;
1395: if (DEBUG_SCANNER_STATE) {
1396: System.out.print("### setScannerState: ");
1397: System.out.print(getScannerStateName(state));
1398: System.out.println();
1399: }
1400:
1401: } // setScannerState(int)
1402:
1403: /**
1404: * Sets the dispatcher.
1405: *
1406: * @param dispatcher The new dispatcher.
1407: */
1408: protected final void setDispatcher(Dispatcher dispatcher) {
1409: fDispatcher = dispatcher;
1410: if (DEBUG_DISPATCHER) {
1411: System.out.print("%%% setDispatcher: ");
1412: System.out.print(getDispatcherName(dispatcher));
1413: System.out.println();
1414: }
1415: }
1416:
1417: //
1418: // Private methods
1419: //
1420:
1421: /** Returns the scanner state name. */
1422: protected String getScannerStateName(int state) {
1423:
1424: switch (state) {
1425: case SCANNER_STATE_DOCTYPE:
1426: return "SCANNER_STATE_DOCTYPE";
1427: case SCANNER_STATE_ROOT_ELEMENT:
1428: return "SCANNER_STATE_ROOT_ELEMENT";
1429: case SCANNER_STATE_START_OF_MARKUP:
1430: return "SCANNER_STATE_START_OF_MARKUP";
1431: case SCANNER_STATE_COMMENT:
1432: return "SCANNER_STATE_COMMENT";
1433: case SCANNER_STATE_PI:
1434: return "SCANNER_STATE_PI";
1435: case SCANNER_STATE_CONTENT:
1436: return "SCANNER_STATE_CONTENT";
1437: case SCANNER_STATE_REFERENCE:
1438: return "SCANNER_STATE_REFERENCE";
1439: case SCANNER_STATE_END_OF_INPUT:
1440: return "SCANNER_STATE_END_OF_INPUT";
1441: case SCANNER_STATE_TERMINATED:
1442: return "SCANNER_STATE_TERMINATED";
1443: case SCANNER_STATE_CDATA:
1444: return "SCANNER_STATE_CDATA";
1445: case SCANNER_STATE_TEXT_DECL:
1446: return "SCANNER_STATE_TEXT_DECL";
1447: }
1448:
1449: return "??? (" + state + ')';
1450:
1451: } // getScannerStateName(int):String
1452:
1453: /** Returns the dispatcher name. */
1454: public String getDispatcherName(Dispatcher dispatcher) {
1455:
1456: if (DEBUG_DISPATCHER) {
1457: if (dispatcher != null) {
1458: String name = dispatcher.getClass().getName();
1459: int index = name.lastIndexOf('.');
1460: if (index != -1) {
1461: name = name.substring(index + 1);
1462: index = name.lastIndexOf('$');
1463: if (index != -1) {
1464: name = name.substring(index + 1);
1465: }
1466: }
1467: return name;
1468: }
1469: }
1470: return "null";
1471:
1472: } // getDispatcherName():String
1473:
1474: //
1475: // Classes
1476: //
1477:
1478: /**
1479: * Element stack. This stack operates without synchronization, error
1480: * checking, and it re-uses objects instead of throwing popped items
1481: * away.
1482: *
1483: * @author Andy Clark, IBM
1484: */
1485: protected static class ElementStack {
1486:
1487: //
1488: // Data
1489: //
1490:
1491: /** The stack data. */
1492: protected QName[] fElements;
1493:
1494: /** The size of the stack. */
1495: protected int fSize;
1496:
1497: //
1498: // Constructors
1499: //
1500:
1501: /** Default constructor. */
1502: public ElementStack() {
1503: fElements = new QName[10];
1504: for (int i = 0; i < fElements.length; i++) {
1505: fElements[i] = new QName();
1506: }
1507: } // <init>()
1508:
1509: //
1510: // Public methods
1511: //
1512:
1513: /**
1514: * Pushes an element on the stack.
1515: * <p>
1516: * <strong>Note:</strong> The QName values are copied into the
1517: * stack. In other words, the caller does <em>not</em> orphan
1518: * the element to the stack. Also, the QName object returned
1519: * is <em>not</em> orphaned to the caller. It should be
1520: * considered read-only.
1521: *
1522: * @param element The element to push onto the stack.
1523: *
1524: * @return Returns the actual QName object that stores the
1525: */
1526: public QName pushElement(QName element) {
1527: if (fSize == fElements.length) {
1528: QName[] array = new QName[fElements.length * 2];
1529: System.arraycopy(fElements, 0, array, 0, fSize);
1530: fElements = array;
1531: for (int i = fSize; i < fElements.length; i++) {
1532: fElements[i] = new QName();
1533: }
1534: }
1535: fElements[fSize].setValues(element);
1536: return fElements[fSize++];
1537: } // pushElement(QName):QName
1538:
1539: /**
1540: * Pops an element off of the stack by setting the values of
1541: * the specified QName.
1542: * <p>
1543: * <strong>Note:</strong> The object returned is <em>not</em>
1544: * orphaned to the caller. Therefore, the caller should consider
1545: * the object to be read-only.
1546: */
1547: public void popElement(QName element) {
1548: element.setValues(fElements[--fSize]);
1549: } // popElement(QName)
1550:
1551: /** Clears the stack without throwing away existing QName objects. */
1552: public void clear() {
1553: fSize = 0;
1554: } // clear()
1555:
1556: } // class ElementStack
1557:
1558: /**
1559: * This interface defines an XML "event" dispatching model. Classes
1560: * that implement this interface are responsible for scanning parts
1561: * of the XML document and dispatching callbacks.
1562: *
1563: * @xerces.internal
1564: *
1565: * @author Glenn Marcy, IBM
1566: */
1567: protected interface Dispatcher {
1568:
1569: //
1570: // Dispatcher methods
1571: //
1572:
1573: /**
1574: * Dispatch an XML "event".
1575: *
1576: * @param complete True if this dispatcher is intended to scan
1577: * and dispatch as much as possible.
1578: *
1579: * @return True if there is more to dispatch either from this
1580: * or a another dispatcher.
1581: *
1582: * @throws IOException Thrown on i/o error.
1583: * @throws XNIException Thrown on parse error.
1584: */
1585: public boolean dispatch(boolean complete) throws IOException,
1586: XNIException;
1587:
1588: } // interface Dispatcher
1589:
1590: /**
1591: * Dispatcher to handle content scanning.
1592: *
1593: * @author Andy Clark, IBM
1594: * @author Eric Ye, IBM
1595: */
1596: protected class FragmentContentDispatcher implements Dispatcher {
1597:
1598: //
1599: // Dispatcher methods
1600: //
1601:
1602: /**
1603: * Dispatch an XML "event".
1604: *
1605: * @param complete True if this dispatcher is intended to scan
1606: * and dispatch as much as possible.
1607: *
1608: * @return True if there is more to dispatch either from this
1609: * or a another dispatcher.
1610: *
1611: * @throws IOException Thrown on i/o error.
1612: * @throws XNIException Thrown on parse error.
1613: */
1614: public boolean dispatch(boolean complete) throws IOException,
1615: XNIException {
1616: try {
1617: boolean again;
1618: do {
1619: again = false;
1620: switch (fScannerState) {
1621: case SCANNER_STATE_CONTENT: {
1622: if (fEntityScanner.skipChar('<')) {
1623: setScannerState(SCANNER_STATE_START_OF_MARKUP);
1624: again = true;
1625: } else if (fEntityScanner.skipChar('&')) {
1626: setScannerState(SCANNER_STATE_REFERENCE);
1627: again = true;
1628: } else {
1629: do {
1630: int c = scanContent();
1631: if (c == '<') {
1632: fEntityScanner.scanChar();
1633: setScannerState(SCANNER_STATE_START_OF_MARKUP);
1634: break;
1635: } else if (c == '&') {
1636: fEntityScanner.scanChar();
1637: setScannerState(SCANNER_STATE_REFERENCE);
1638: break;
1639: } else if (c != -1
1640: && isInvalidLiteral(c)) {
1641: if (XMLChar.isHighSurrogate(c)) {
1642: // special case: surrogates
1643: fStringBuffer.clear();
1644: if (scanSurrogates(fStringBuffer)) {
1645: // call handler
1646: if (fDocumentHandler != null) {
1647: fDocumentHandler
1648: .characters(
1649: fStringBuffer,
1650: null);
1651: }
1652: }
1653: } else {
1654: reportFatalError(
1655: "InvalidCharInContent",
1656: new Object[] { Integer
1657: .toString(c, 16) });
1658: fEntityScanner.scanChar();
1659: }
1660: }
1661: } while (complete);
1662: }
1663: break;
1664: }
1665: case SCANNER_STATE_START_OF_MARKUP: {
1666: fMarkupDepth++;
1667: if (fEntityScanner.skipChar('/')) {
1668: if (scanEndElement() == 0) {
1669: if (elementDepthIsZeroHook()) {
1670: return true;
1671: }
1672: }
1673: setScannerState(SCANNER_STATE_CONTENT);
1674: } else if (isValidNameStartChar(fEntityScanner
1675: .peekChar())) {
1676: scanStartElement();
1677: setScannerState(SCANNER_STATE_CONTENT);
1678: } else if (fEntityScanner.skipChar('!')) {
1679: if (fEntityScanner.skipChar('-')) {
1680: if (!fEntityScanner.skipChar('-')) {
1681: reportFatalError(
1682: "InvalidCommentStart", null);
1683: }
1684: setScannerState(SCANNER_STATE_COMMENT);
1685: again = true;
1686: } else if (fEntityScanner
1687: .skipString("[CDATA[")) {
1688: setScannerState(SCANNER_STATE_CDATA);
1689: again = true;
1690: } else if (!scanForDoctypeHook()) {
1691: reportFatalError(
1692: "MarkupNotRecognizedInContent",
1693: null);
1694: }
1695: } else if (fEntityScanner.skipChar('?')) {
1696: setScannerState(SCANNER_STATE_PI);
1697: again = true;
1698: } else if (isValidNameStartHighSurrogate(fEntityScanner
1699: .peekChar())) {
1700: scanStartElement();
1701: setScannerState(SCANNER_STATE_CONTENT);
1702: } else {
1703: reportFatalError(
1704: "MarkupNotRecognizedInContent",
1705: null);
1706: setScannerState(SCANNER_STATE_CONTENT);
1707: }
1708: break;
1709: }
1710: case SCANNER_STATE_COMMENT: {
1711: scanComment();
1712: setScannerState(SCANNER_STATE_CONTENT);
1713: break;
1714: }
1715: case SCANNER_STATE_PI: {
1716: scanPI();
1717: setScannerState(SCANNER_STATE_CONTENT);
1718: break;
1719: }
1720: case SCANNER_STATE_CDATA: {
1721: scanCDATASection(complete);
1722: setScannerState(SCANNER_STATE_CONTENT);
1723: break;
1724: }
1725: case SCANNER_STATE_REFERENCE: {
1726: fMarkupDepth++;
1727: // NOTE: We need to set the state beforehand
1728: // because the XMLEntityHandler#startEntity
1729: // callback could set the state to
1730: // SCANNER_STATE_TEXT_DECL and we don't want
1731: // to override that scanner state.
1732: setScannerState(SCANNER_STATE_CONTENT);
1733: if (fEntityScanner.skipChar('#')) {
1734: scanCharReference();
1735: } else {
1736: scanEntityReference();
1737: }
1738: break;
1739: }
1740: case SCANNER_STATE_TEXT_DECL: {
1741: // scan text decl
1742: if (fEntityScanner.skipString("<?xml")) {
1743: fMarkupDepth++;
1744: // NOTE: special case where entity starts with a PI
1745: // whose name starts with "xml" (e.g. "xmlfoo")
1746: if (isValidNameChar(fEntityScanner
1747: .peekChar())) {
1748: fStringBuffer.clear();
1749: fStringBuffer.append("xml");
1750: if (fNamespaces) {
1751: while (isValidNCName(fEntityScanner
1752: .peekChar())) {
1753: fStringBuffer
1754: .append((char) fEntityScanner
1755: .scanChar());
1756: }
1757: } else {
1758: while (isValidNameChar(fEntityScanner
1759: .peekChar())) {
1760: fStringBuffer
1761: .append((char) fEntityScanner
1762: .scanChar());
1763: }
1764: }
1765: String target = fSymbolTable.addSymbol(
1766: fStringBuffer.ch,
1767: fStringBuffer.offset,
1768: fStringBuffer.length);
1769: scanPIData(target, fTempString);
1770: }
1771:
1772: // standard text declaration
1773: else {
1774: scanXMLDeclOrTextDecl(true);
1775: }
1776: }
1777: // now that we've straightened out the readers, we can read in chunks:
1778: fEntityManager.fCurrentEntity.mayReadChunks = true;
1779: setScannerState(SCANNER_STATE_CONTENT);
1780: break;
1781: }
1782: case SCANNER_STATE_ROOT_ELEMENT: {
1783: if (scanRootElementHook()) {
1784: return true;
1785: }
1786: setScannerState(SCANNER_STATE_CONTENT);
1787: break;
1788: }
1789: case SCANNER_STATE_DOCTYPE: {
1790: reportFatalError("DoctypeIllegalInContent",
1791: null);
1792: setScannerState(SCANNER_STATE_CONTENT);
1793: }
1794: }
1795: } while (complete || again);
1796: }
1797: // encoding errors
1798: catch (MalformedByteSequenceException e) {
1799: fErrorReporter.reportError(e.getDomain(), e.getKey(), e
1800: .getArguments(),
1801: XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1802: return false;
1803: } catch (CharConversionException e) {
1804: fErrorReporter.reportError(
1805: XMLMessageFormatter.XML_DOMAIN,
1806: "CharConversionFailure", null,
1807: XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1808: return false;
1809: }
1810: // premature end of file
1811: catch (EOFException e) {
1812: endOfFileHook(e);
1813: return false;
1814: }
1815:
1816: return true;
1817:
1818: } // dispatch(boolean):boolean
1819:
1820: //
1821: // Protected methods
1822: //
1823:
1824: // hooks
1825:
1826: // NOTE: These hook methods are added so that the full document
1827: // scanner can share the majority of code with this class.
1828:
1829: /**
1830: * Scan for DOCTYPE hook. This method is a hook for subclasses
1831: * to add code to handle scanning for a the "DOCTYPE" string
1832: * after the string "<!" has been scanned.
1833: *
1834: * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE"
1835: * was not scanned.
1836: */
1837: protected boolean scanForDoctypeHook() throws IOException,
1838: XNIException {
1839: return false;
1840: } // scanForDoctypeHook():boolean
1841:
1842: /**
1843: * Element depth iz zero. This methos is a hook for subclasses
1844: * to add code to handle when the element depth hits zero. When
1845: * scanning a document fragment, an element depth of zero is
1846: * normal. However, when scanning a full XML document, the
1847: * scanner must handle the trailing miscellanous section of
1848: * the document after the end of the document's root element.
1849: *
1850: * @return True if the caller should stop and return true which
1851: * allows the scanner to switch to a new scanning
1852: * dispatcher. A return value of false indicates that
1853: * the content dispatcher should continue as normal.
1854: */
1855: protected boolean elementDepthIsZeroHook() throws IOException,
1856: XNIException {
1857: return false;
1858: } // elementDepthIsZeroHook():boolean
1859:
1860: /**
1861: * Scan for root element hook. This method is a hook for
1862: * subclasses to add code that handles scanning for the root
1863: * element. When scanning a document fragment, there is no
1864: * "root" element. However, when scanning a full XML document,
1865: * the scanner must handle the root element specially.
1866: *
1867: * @return True if the caller should stop and return true which
1868: * allows the scanner to switch to a new scanning
1869: * dispatcher. A return value of false indicates that
1870: * the content dispatcher should continue as normal.
1871: */
1872: protected boolean scanRootElementHook() throws IOException,
1873: XNIException {
1874: return false;
1875: } // scanRootElementHook():boolean
1876:
1877: /**
1878: * End of file hook. This method is a hook for subclasses to
1879: * add code that handles the end of file. The end of file in
1880: * a document fragment is OK if the markup depth is zero.
1881: * However, when scanning a full XML document, an end of file
1882: * is always premature.
1883: */
1884: protected void endOfFileHook(EOFException e)
1885: throws IOException, XNIException {
1886:
1887: // NOTE: An end of file is only only an error if we were
1888: // in the middle of scanning some markup. -Ac
1889: if (fMarkupDepth != 0) {
1890: reportFatalError("PrematureEOF", null);
1891: }
1892:
1893: } // endOfFileHook()
1894:
1895: } // class FragmentContentDispatcher
1896:
1897: } // class XMLDocumentFragmentScannerImpl
|