0001: /*
0002: * $Id: XMLDocumentFragmentScannerImpl.java,v 1.5 2006/11/29 22:01:32 spericas Exp $
0003: */
0004:
0005: /*
0006: * The contents of this file are subject to the terms
0007: * of the Common Development and Distribution License
0008: * (the License). You may not use this file except in
0009: * compliance with the License.
0010: *
0011: * You can obtain a copy of the license at
0012: * https://glassfish.dev.java.net/public/CDDLv1.0.html.
0013: * See the License for the specific language governing
0014: * permissions and limitations under the License.
0015: *
0016: * When distributing Covered Code, include this CDDL
0017: * Header Notice in each file and include the License file
0018: * at https://glassfish.dev.java.net/public/CDDLv1.0.html.
0019: * If applicable, add the following below the CDDL Header,
0020: * with the fields enclosed by brackets [] replaced by
0021: * you own identifying information:
0022: * "Portions Copyrighted [year] [name of copyright owner]"
0023: *
0024: * [Name of File] [ver.__] [Date]
0025: *
0026: * Copyright 2006 Sun Microsystems Inc. All Rights Reserved
0027: */
0028:
0029: /*
0030: * The Apache Software License, Version 1.1
0031: *
0032: *
0033: * Copyright (c) 1999-2002 The Apache Software Foundation.
0034: * All rights reserved.
0035: *
0036: * Redistribution and use in source and binary forms, with or without
0037: * modification, are permitted provided that the following conditions
0038: * are met:
0039: *
0040: * 1. Redistributions of source code must retain the above copyright
0041: * notice, this list of conditions and the following disclaimer.
0042: *
0043: * 2. Redistributions in binary form must reproduce the above copyright
0044: * notice, this list of conditions and the following disclaimer in
0045: * the documentation and/or other materials provided with the
0046: * distribution.
0047: *
0048: * 3. The end-user documentation included with the redistribution,
0049: * if any, must include the following acknowledgment:
0050: * "This product includes software developed by the
0051: * Apache Software Foundation (http://www.apache.org/)."
0052: * Alternately, this acknowledgment may appear in the software itself,
0053: * if and wherever such third-party acknowledgments normally appear.
0054: *
0055: * 4. The names "Xerces" and "Apache Software Foundation" must
0056: * not be used to endorse or promote products derived from this
0057: * software without prior written permission. For written
0058: * permission, please contact apache@apache.org.
0059: *
0060: * 5. Products derived from this software may not be called "Apache",
0061: * nor may "Apache" appear in their name, without prior written
0062: * permission of the Apache Software Foundation.
0063: *
0064: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0065: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0066: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0067: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
0068: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0069: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0070: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
0071: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0072: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
0073: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
0074: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0075: * SUCH DAMAGE.
0076: * ====================================================================
0077: *
0078: * This software consists of voluntary contributions made by many
0079: * individuals on behalf of the Apache Software Foundation and was
0080: * originally based on software copyright (c) 1999, International
0081: * Business Machines, Inc., http://www.apache.org. For more
0082: * information on the Apache Software Foundation, please see
0083: * <http://www.apache.org/>.
0084: */
0085:
0086: package com.sun.xml.stream;
0087:
0088: import com.sun.xml.stream.dtd.DTDGrammarUtil;
0089:
0090: import java.io.EOFException;
0091: import java.io.IOException;
0092: import javax.xml.stream.XMLInputFactory;
0093: import javax.xml.stream.events.XMLEvent;
0094: import com.sun.xml.stream.xerces.impl.msg.XMLMessageFormatter;
0095: import com.sun.xml.stream.xerces.util.XMLAttributesIteratorImpl;
0096: import com.sun.xml.stream.xerces.util.XMLChar;
0097: import com.sun.xml.stream.xerces.util.XMLStringBuffer;
0098: import com.sun.xml.stream.xerces.util.XMLSymbols;
0099: import com.sun.xml.stream.xerces.xni.QName;
0100: import com.sun.xml.stream.xerces.xni.XMLAttributes;
0101: import com.sun.xml.stream.xerces.xni.XMLDocumentHandler;
0102: import com.sun.xml.stream.xerces.xni.XMLResourceIdentifier;
0103: import com.sun.xml.stream.xerces.xni.XMLString;
0104: import com.sun.xml.stream.xerces.xni.XNIException;
0105: import com.sun.xml.stream.xerces.xni.parser.XMLComponent;
0106: import com.sun.xml.stream.xerces.xni.parser.XMLComponentManager;
0107: import com.sun.xml.stream.xerces.xni.parser.XMLConfigurationException;
0108: import com.sun.xml.stream.xerces.xni.parser.XMLDocumentScanner;
0109: import com.sun.xml.stream.xerces.xni.parser.XMLInputSource;
0110:
0111: /**
0112: *
0113: * This class is responsible for scanning the structure and content
0114: * of document fragments.
0115: *
0116: * This class has been modified as per the new design which is more suited to
0117: * efficiently build pull parser. Lot of improvements have been done and
0118: * the code has been added to support stax functionality/features.
0119: *
0120: * @author Neeraj Bajaj SUN Microsystems
0121: * @author K.Venugopal SUN Microsystems
0122: * @author Glenn Marcy, IBM
0123: * @author Andy Clark, IBM
0124: * @author Arnaud Le Hors, IBM
0125: * @author Eric Ye, IBM
0126: * @author Sunitha Reddy SUN Microsystems
0127: * @version $Id: XMLDocumentFragmentScannerImpl.java,v 1.5 2006/11/29 22:01:32 spericas Exp $
0128:
0129: */
0130: public class XMLDocumentFragmentScannerImpl extends XMLScanner
0131: implements XMLDocumentScanner, XMLComponent, XMLEntityHandler {
0132:
0133: //
0134: // Constants
0135: //
0136:
0137: // scanner states
0138:
0139: //XXX this should be divided into more states.
0140: /** Scanner state: start of markup. */
0141: protected static final int SCANNER_STATE_START_OF_MARKUP = 21;
0142:
0143: /** Scanner state: content. */
0144: protected static final int SCANNER_STATE_CONTENT = 22;
0145:
0146: /** Scanner state: processing instruction. */
0147: protected static final int SCANNER_STATE_PI = 23;
0148:
0149: /** Scanner state: DOCTYPE. */
0150: protected static final int SCANNER_STATE_DOCTYPE = 24;
0151:
0152: /** Scanner state: XML Declaration */
0153: protected static final int SCANNER_STATE_XML_DECL = 25;
0154:
0155: /** Scanner state: root element. */
0156: protected static final int SCANNER_STATE_ROOT_ELEMENT = 26;
0157:
0158: /** Scanner state: comment. */
0159: protected static final int SCANNER_STATE_COMMENT = 27;
0160:
0161: /** Scanner state: reference. */
0162: protected static final int SCANNER_STATE_REFERENCE = 28;
0163:
0164: // <book type="hard"> reading attribute name 'type'
0165: protected static final int SCANNER_STATE_ATTRIBUTE = 29;
0166:
0167: // <book type="hard"> //reading attribute value.
0168: protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30;
0169:
0170: /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/
0171: //protected static final int SCANNER_STATE_TRAILING_MISC = 32;
0172: /** Scanner state: end of input. */
0173: protected static final int SCANNER_STATE_END_OF_INPUT = 33;
0174:
0175: /** Scanner state: terminated. */
0176: protected static final int SCANNER_STATE_TERMINATED = 34;
0177:
0178: /** Scanner state: CDATA section. */
0179: protected static final int SCANNER_STATE_CDATA = 35;
0180:
0181: /** Scanner state: Text declaration. */
0182: protected static final int SCANNER_STATE_TEXT_DECL = 36;
0183:
0184: /** Scanner state: Text declaration. */
0185: protected static final int SCANNER_STATE_CHARACTER_DATA = 37;
0186:
0187: //<book type="hard">foo</book>
0188: protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38;
0189:
0190: //<book type="hard">foo</book> reading </book>
0191: protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39;
0192:
0193: protected static final int SCANNER_STATE_CHAR_REFERENCE = 40;
0194: protected static final int SCANNER_STATE_BUILT_IN_REFS = 41;
0195:
0196: // feature identifiers
0197:
0198: /** Feature identifier: namespaces. */
0199: protected static final String NAMESPACES = Constants.SAX_FEATURE_PREFIX
0200: + Constants.NAMESPACES_FEATURE;
0201:
0202: /** Feature identifier: notify built-in refereces. */
0203: protected static final String NOTIFY_BUILTIN_REFS = Constants.XERCES_FEATURE_PREFIX
0204: + Constants.NOTIFY_BUILTIN_REFS_FEATURE;
0205:
0206: // recognized features and properties
0207:
0208: /** Recognized features. */
0209: private static final String[] RECOGNIZED_FEATURES = { NAMESPACES,
0210: VALIDATION, NOTIFY_BUILTIN_REFS, NOTIFY_CHAR_REFS, };
0211:
0212: /** Feature defaults. */
0213: private static final Boolean[] FEATURE_DEFAULTS = { null, null,
0214: Boolean.FALSE, Boolean.FALSE, };
0215:
0216: /** Recognized properties. */
0217: private static final String[] RECOGNIZED_PROPERTIES = {
0218: SYMBOL_TABLE, ERROR_REPORTER, ENTITY_MANAGER, };
0219:
0220: /** Property defaults. */
0221: private static final Object[] PROPERTY_DEFAULTS = { null, null,
0222: null, };
0223:
0224: protected static final char[] cdata = { '[', 'C', 'D', 'A', 'T',
0225: 'A', '[' };
0226: protected static final char[] xmlDecl = { '<', '?', 'x', 'm', 'l' };
0227: protected static final char[] endTag = { '<', '/' };
0228: // debugging
0229:
0230: /** Debug scanner state. */
0231: private static final boolean DEBUG_SCANNER_STATE = false;
0232:
0233: /** Debug driver. */
0234: private static final boolean DEBUG_DISPATCHER = false;
0235:
0236: /** Debug content driver scanning. */
0237: protected static final boolean DEBUG_CONTENT_SCANNING = false;
0238:
0239: /** Debug driver next */
0240: protected static final boolean DEBUG_NEXT = false;
0241:
0242: /** Debug driver next */
0243: protected static final boolean DEBUG = false;
0244: protected static final boolean DEBUG_COALESCE = false;
0245: //
0246: // Data
0247: //
0248:
0249: // protected data
0250:
0251: /** Document handler. */
0252: protected XMLDocumentHandler fDocumentHandler;
0253:
0254: /** Entity Storage */
0255: protected XMLEntityStorage fEntityStore;
0256:
0257: /** Entity stack. */
0258: protected int[] fEntityStack = new int[4];
0259:
0260: /** Markup depth. */
0261: protected int fMarkupDepth;
0262:
0263: //is the element empty
0264: protected boolean fEmptyElement;
0265:
0266: /** Scanner state. */
0267: protected int fScannerState;
0268:
0269: /** SubScanner state: inside scanContent method. */
0270: protected boolean fInScanContent = false;
0271: protected boolean fLastSectionWasCData = false;
0272: protected boolean fLastSectionWasEntityReference = false;
0273: protected boolean fLastSectionWasCharacterData = false;
0274:
0275: /** has external dtd */
0276: protected boolean fHasExternalDTD;
0277:
0278: /** Standalone. */
0279: protected boolean fStandalone;
0280: protected String fVersion;
0281:
0282: // element information
0283:
0284: /** Current element. */
0285: protected QName fCurrentElement;
0286:
0287: /** Element stack. */
0288: protected ElementStack fElementStack = new ElementStack();
0289: protected ElementStack2 fElementStack2 = new ElementStack2();
0290:
0291: // other info
0292:
0293: /** Document system identifier.
0294: * REVISIT: So what's this used for? - NG
0295: * protected String fDocumentSystemId;
0296: ******/
0297:
0298: protected String fPITarget;
0299:
0300: //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values
0301: protected XMLString fPIData = new XMLString();
0302:
0303: // features
0304:
0305: /** Namespaces. */
0306: protected boolean fNamespaces;
0307:
0308: /** Notify built-in references. */
0309: protected boolean fNotifyBuiltInRefs = false;
0310:
0311: //STAX related properties
0312: //defaultValues.
0313: protected boolean fReplaceEntityReferences = true;
0314: protected boolean fSupportExternalEntities = false;
0315: protected boolean fReportCdataEvent = false;
0316: protected boolean fIsCoalesce = false;
0317: protected String fDeclaredEncoding = null;
0318:
0319: // drivers
0320:
0321: /** Active driver. */
0322: protected Driver fDriver;
0323:
0324: /** Content driver. */
0325: protected Driver fContentDriver = createContentDriver();
0326:
0327: // temporary variables
0328:
0329: /** Element QName. */
0330: protected QName fElementQName = new QName();
0331:
0332: /** Attribute QName. */
0333: protected QName fAttributeQName = new QName();
0334:
0335: /**
0336: * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class
0337: * implements Iterator interface so we can directly give Attributes in the form of
0338: * iterator.
0339: */
0340: protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl();
0341:
0342: /** String. */
0343: protected XMLString fTempString = new XMLString();
0344:
0345: /** String. */
0346: protected XMLString fTempString2 = new XMLString();
0347:
0348: /** Array of 3 strings. */
0349: private String[] fStrings = new String[3];
0350:
0351: /** Making the buffer accesible to derived class -- String buffer. */
0352: protected XMLStringBuffer fStringBuffer = new XMLStringBuffer();
0353:
0354: /** Making the buffer accesible to derived class -- String buffer. */
0355: protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
0356:
0357: /** stores character data. */
0358: /** Making the buffer accesible to derived class -- stores PI data */
0359: protected XMLStringBuffer fContentBuffer = new XMLStringBuffer();
0360:
0361: /** Single character array. */
0362: private final char[] fSingleChar = new char[1];
0363: private String fCurrentEntityName = null;
0364:
0365: // New members
0366: protected boolean fScanToEnd = false;
0367:
0368: protected DTDGrammarUtil dtdGrammarUtil = null;
0369:
0370: protected boolean fAddDefaultAttr = false;
0371:
0372: protected boolean foundBuiltInRefs = false;
0373:
0374: //skip element algorithm
0375: static final short MAX_DEPTH_LIMIT = 5;
0376: static final short ELEMENT_ARRAY_LENGTH = 200;
0377: static final short MAX_POINTER_AT_A_DEPTH = 4;
0378: static final boolean DEBUG_SKIP_ALGORITHM = false;
0379: //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH
0380: String[] fElementArray = new String[ELEMENT_ARRAY_LENGTH];
0381: //pointer location where last element was skipped
0382: short fLastPointerLocation = 0;
0383: short fElementPointer = 0;
0384: //2D array to store pointer info
0385: short[][] fPointerInfo = new short[MAX_DEPTH_LIMIT][MAX_POINTER_AT_A_DEPTH];
0386: protected String fElementRawname;
0387: protected boolean fShouldSkip = false;
0388: protected boolean fAdd = false;
0389: protected boolean fSkip = false;
0390:
0391: //
0392: // Constructors
0393: //
0394:
0395: /** Default constructor. */
0396: public XMLDocumentFragmentScannerImpl() {
0397: } // <init>()
0398:
0399: //
0400: // XMLDocumentScanner methods
0401: //
0402:
0403: /**
0404: * Sets the input source.
0405: *
0406: * @param inputSource The input source.
0407: *
0408: * @throws IOException Thrown on i/o error.
0409: */
0410: public void setInputSource(XMLInputSource inputSource)
0411: throws IOException {
0412: fEntityManager.setEntityHandler(this );
0413: fEntityManager.startEntity("$fragment$", inputSource, false,
0414: true);
0415: // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
0416: } // setInputSource(XMLInputSource)
0417:
0418: /**
0419: * Scans a document.
0420: *
0421: * @param complete True if the scanner should scan the document
0422: * completely, pushing all events to the registered
0423: * document handler. A value of false indicates that
0424: * that the scanner should only scan the next portion
0425: * of the document and return. A scanner instance is
0426: * permitted to completely scan a document if it does
0427: * not support this "pull" scanning model.
0428: *
0429: * @return True if there is more to scan, false otherwise.
0430: */
0431: public boolean scanDocument(boolean complete) throws IOException,
0432: XNIException {
0433:
0434: // keep dispatching "events"
0435: fEntityManager.setEntityHandler(this );
0436:
0437: /**
0438: * do {
0439: * if (!fDriver.dispatch(complete)) {
0440: * return false;
0441: * }
0442: * } while (complete);
0443: */
0444: // return success
0445: return true;
0446:
0447: } // scanDocument(boolean):boolean
0448:
0449: /** return the next state on the input
0450: * @return int
0451: */
0452:
0453: public int next() throws IOException, XNIException {
0454: return fDriver.next();
0455: }
0456:
0457: //
0458: // XMLComponent methods
0459: //
0460:
0461: /**
0462: * Resets the component. The component can query the component manager
0463: * about any features and properties that affect the operation of the
0464: * component.
0465: *
0466: * @param componentManager The component manager.
0467: *
0468: * @throws SAXException Thrown by component on initialization error.
0469: * For example, if a feature or property is
0470: * required for the operation of the component, the
0471: * component manager may throw a
0472: * SAXNotRecognizedException or a
0473: * SAXNotSupportedException.
0474: */
0475:
0476: public void reset(XMLComponentManager componentManager)
0477: throws XMLConfigurationException {
0478:
0479: super .reset(componentManager);
0480:
0481: // other settings
0482: // fDocumentSystemId = null;
0483:
0484: // sax features
0485: try {
0486: fNamespaces = componentManager.getFeature(NAMESPACES);
0487: } catch (XMLConfigurationException e) {
0488: fNamespaces = true;
0489: }
0490: //fAttributes.setNamespaces(fNamespaces);
0491:
0492: // xerces features
0493: try {
0494: fNotifyBuiltInRefs = componentManager
0495: .getFeature(NOTIFY_BUILTIN_REFS);
0496: } catch (XMLConfigurationException e) {
0497: fNotifyBuiltInRefs = false;
0498: }
0499:
0500: // initialize vars
0501: fMarkupDepth = 0;
0502: fCurrentElement = null;
0503: fElementStack.clear();
0504: fHasExternalDTD = false;
0505: fStandalone = false;
0506:
0507: // setup Driver
0508: setScannerState(SCANNER_STATE_CONTENT);
0509: setDriver(fContentDriver);
0510: fEntityStore = fEntityManager.getEntityStore();
0511: //fEntityManager.test();
0512: } // reset(XMLComponentManager)
0513:
0514: public void reset(PropertyManager propertyManager) {
0515:
0516: super .reset(propertyManager);
0517:
0518: // other settings
0519: // fDocumentSystemId = null;
0520: fNamespaces = false;
0521: fNotifyBuiltInRefs = false;
0522:
0523: // initialize vars
0524: fMarkupDepth = 0;
0525: fCurrentElement = null;
0526: fShouldSkip = false;
0527: fAdd = false;
0528: fSkip = false;
0529: fElementStack.clear();
0530: //fElementStack2.clear();
0531: fHasExternalDTD = false;
0532: fStandalone = false;
0533: //fReplaceEntityReferences = true;
0534: //fSupportExternalEntities = true;
0535: Boolean bo = (Boolean) propertyManager
0536: .getProperty(ZephyrParserFactory.IS_REPLACING_ENTITY_REFERENCES);
0537: fReplaceEntityReferences = bo.booleanValue();
0538: bo = (Boolean) propertyManager
0539: .getProperty(ZephyrParserFactory.IS_SUPPORTING_EXTERNAL_ENTITIES);
0540: fSupportExternalEntities = bo.booleanValue();
0541: Boolean cdata = (Boolean) propertyManager
0542: .getProperty(Constants.ZEPHYR_PROPERTY_PREFIX
0543: + Constants.STAX_REPORT_CDATA_EVENT);
0544: if (cdata != null)
0545: fReportCdataEvent = cdata.booleanValue();
0546: Boolean coalesce = (Boolean) propertyManager
0547: .getProperty(XMLInputFactory.IS_COALESCING);
0548: if (coalesce != null)
0549: fIsCoalesce = coalesce.booleanValue();
0550: fReportCdataEvent = fIsCoalesce ? false
0551: : (fReportCdataEvent && true);
0552: //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true,
0553: //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application
0554: fReplaceEntityReferences = fIsCoalesce ? true
0555: : fReplaceEntityReferences;
0556: // setup Driver
0557: //we dont need to do this -- nb.
0558: //setScannerState(SCANNER_STATE_CONTENT);
0559: //setDriver(fContentDriver);
0560: fEntityStore = fEntityManager.getEntityStore();
0561: //fEntityManager.test();
0562:
0563: } // reset(XMLComponentManager)
0564:
0565: /**
0566: * Returns a list of feature identifiers that are recognized by
0567: * this component. This method may return null if no features
0568: * are recognized by this component.
0569: */
0570: public String[] getRecognizedFeatures() {
0571: return (String[]) (RECOGNIZED_FEATURES.clone());
0572: } // getRecognizedFeatures():String[]
0573:
0574: /**
0575: * Sets the state of a feature. This method is called by the component
0576: * manager any time after reset when a feature changes state.
0577: * <p>
0578: * <strong>Note:</strong> Components should silently ignore features
0579: * that do not affect the operation of the component.
0580: *
0581: * @param featureId The feature identifier.
0582: * @param state The state of the feature.
0583: *
0584: * @throws SAXNotRecognizedException The component should not throw
0585: * this exception.
0586: * @throws SAXNotSupportedException The component should not throw
0587: * this exception.
0588: */
0589: public void setFeature(String featureId, boolean state)
0590: throws XMLConfigurationException {
0591:
0592: super .setFeature(featureId, state);
0593:
0594: // Xerces properties
0595: if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
0596: String feature = featureId
0597: .substring(Constants.XERCES_FEATURE_PREFIX.length());
0598: if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) {
0599: fNotifyBuiltInRefs = state;
0600: }
0601: }
0602:
0603: } // setFeature(String,boolean)
0604:
0605: /**
0606: * Returns a list of property identifiers that are recognized by
0607: * this component. This method may return null if no properties
0608: * are recognized by this component.
0609: */
0610: public String[] getRecognizedProperties() {
0611: return (String[]) (RECOGNIZED_PROPERTIES.clone());
0612: } // getRecognizedProperties():String[]
0613:
0614: /**
0615: * Sets the value of a property. This method is called by the component
0616: * manager any time after reset when a property changes value.
0617: * <p>
0618: * <strong>Note:</strong> Components should silently ignore properties
0619: * that do not affect the operation of the component.
0620: *
0621: * @param propertyId The property identifier.
0622: * @param value The value of the property.
0623: *
0624: * @throws SAXNotRecognizedException The component should not throw
0625: * this exception.
0626: * @throws SAXNotSupportedException The component should not throw
0627: * this exception.
0628: */
0629: public void setProperty(String propertyId, Object value)
0630: throws XMLConfigurationException {
0631:
0632: super .setProperty(propertyId, value);
0633:
0634: // Xerces properties
0635: if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
0636: String property = propertyId
0637: .substring(Constants.XERCES_PROPERTY_PREFIX
0638: .length());
0639: if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) {
0640: fEntityManager = (XMLEntityManager) value;
0641: }
0642: return;
0643: }
0644:
0645: } // setProperty(String,Object)
0646:
0647: /**
0648: * Returns the default state for a feature, or null if this
0649: * component does not want to report a default value for this
0650: * feature.
0651: *
0652: * @param featureId The feature identifier.
0653: *
0654: * @since Xerces 2.2.0
0655: */
0656: public Boolean getFeatureDefault(String featureId) {
0657: for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
0658: if (RECOGNIZED_FEATURES[i].equals(featureId)) {
0659: return FEATURE_DEFAULTS[i];
0660: }
0661: }
0662: return null;
0663: } // getFeatureDefault(String):Boolean
0664:
0665: /**
0666: * Returns the default state for a property, or null if this
0667: * component does not want to report a default value for this
0668: * property.
0669: *
0670: * @param propertyId The property identifier.
0671: *
0672: * @since Xerces 2.2.0
0673: */
0674: public Object getPropertyDefault(String propertyId) {
0675: for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
0676: if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
0677: return PROPERTY_DEFAULTS[i];
0678: }
0679: }
0680: return null;
0681: } // getPropertyDefault(String):Object
0682:
0683: //
0684: // XMLDocumentSource methods
0685: //
0686:
0687: /**
0688: * setDocumentHandler
0689: *
0690: * @param documentHandler
0691: */
0692: public void setDocumentHandler(XMLDocumentHandler documentHandler) {
0693: fDocumentHandler = documentHandler;
0694: } // setDocumentHandler(XMLDocumentHandler)
0695:
0696: /** Returns the document handler */
0697: public XMLDocumentHandler getDocumentHandler() {
0698: return fDocumentHandler;
0699: }
0700:
0701: //
0702: // XMLEntityHandler methods
0703: //
0704:
0705: /**
0706: * This method notifies of the start of an entity. The DTD has the
0707: * pseudo-name of "[dtd]" parameter entity names start with '%'; and
0708: * general entities are just specified by their name.
0709: *
0710: * @param name The name of the entity.
0711: * @param identifier The resource identifier.
0712: * @param encoding The auto-detected IANA encoding name of the entity
0713: * stream. This value will be null in those situations
0714: * where the entity encoding is not auto-detected (e.g.
0715: * internal entities or a document entity that is
0716: * parsed from a java.io.Reader).
0717: *
0718: * @throws XNIException Thrown by handler to signal an error.
0719: */
0720: public void startEntity(String name,
0721: XMLResourceIdentifier identifier, String encoding)
0722: throws XNIException {
0723:
0724: // keep track of this entity before fEntityDepth is increased
0725: if (fEntityDepth == fEntityStack.length) {
0726: int[] entityarray = new int[fEntityStack.length * 2];
0727: System.arraycopy(fEntityStack, 0, entityarray, 0,
0728: fEntityStack.length);
0729: fEntityStack = entityarray;
0730: }
0731: fEntityStack[fEntityDepth] = fMarkupDepth;
0732:
0733: super .startEntity(name, identifier, encoding);
0734:
0735: // WFC: entity declared in external subset in standalone doc
0736: if (fStandalone
0737: && fEntityStore.isEntityDeclInExternalSubset(name)) {
0738: reportFatalError(
0739: "MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE",
0740: new Object[] { name });
0741: }
0742:
0743: /** we are not calling the handlers yet.. */
0744: // call handler
0745: if (fDocumentHandler != null && !fScanningAttribute) {
0746: if (!name.equals("[xml]")) {
0747: fDocumentHandler.startGeneralEntity(name, identifier,
0748: encoding, null);
0749: }
0750: }
0751:
0752: } // startEntity(String,XMLResourceIdentifier,String)
0753:
0754: /**
0755: * This method notifies the end of an entity. The DTD has the pseudo-name
0756: * of "[dtd]" parameter entity names start with '%'; and general entities
0757: * are just specified by their name.
0758: *
0759: * @param name The name of the entity.
0760: *
0761: * @throws XNIException Thrown by handler to signal an error.
0762: */
0763: public void endEntity(String name) throws IOException, XNIException {
0764:
0765: /**
0766: * // flush possible pending output buffer - see scanContent
0767: * if (fInScanContent && fStringBuffer.length != 0
0768: * && fDocumentHandler != null) {
0769: * fDocumentHandler.characters(fStringBuffer, null);
0770: * fStringBuffer.length = 0; // make sure we know it's been flushed
0771: * }
0772: */
0773: super .endEntity(name);
0774:
0775: // make sure markup is properly balanced
0776: if (fMarkupDepth != fEntityStack[fEntityDepth]) {
0777: reportFatalError("MarkupEntityMismatch", null);
0778: }
0779:
0780: /**/
0781: // call handler
0782: if (fDocumentHandler != null && !fScanningAttribute) {
0783: if (!name.equals("[xml]")) {
0784: fDocumentHandler.endGeneralEntity(name, null);
0785: }
0786: }
0787:
0788: } // endEntity(String)
0789:
0790: //
0791: // Protected methods
0792: //
0793:
0794: // Driver factory methods
0795:
0796: /** Creates a content Driver. */
0797: protected Driver createContentDriver() {
0798: return new FragmentContentDriver();
0799: } // createContentDriver():Driver
0800:
0801: // scanning methods
0802:
0803: /**
0804: * Scans an XML or text declaration.
0805: * <p>
0806: * <pre>
0807: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
0808: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
0809: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
0810: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
0811: * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
0812: * | ('"' ('yes' | 'no') '"'))
0813: *
0814: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
0815: * </pre>
0816: *
0817: * @param scanningTextDecl True if a text declaration is to
0818: * be scanned instead of an XML
0819: * declaration.
0820: */
0821: protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
0822: throws IOException, XNIException {
0823:
0824: // scan decl
0825: super .scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
0826: fMarkupDepth--;
0827:
0828: // pseudo-attribute values
0829: String version = fStrings[0];
0830: String encoding = fStrings[1];
0831: String standalone = fStrings[2];
0832: fDeclaredEncoding = encoding;
0833: // set standalone
0834: fStandalone = standalone != null && standalone.equals("yes");
0835: ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information
0836: //but this information is only related with Document Entity.
0837: fEntityManager.setStandalone(fStandalone);
0838:
0839: // call handler
0840: if (fDocumentHandler != null) {
0841: if (scanningTextDecl) {
0842: fDocumentHandler.textDecl(version, encoding, null);
0843: } else {
0844: fDocumentHandler.xmlDecl(version, encoding, standalone,
0845: null);
0846: }
0847: }
0848:
0849: if (version != null) {
0850: fEntityScanner.setVersion(version);
0851: }
0852: // set encoding on reader
0853: if (encoding != null) {
0854: fEntityScanner.setEncoding(encoding);
0855: }
0856:
0857: } // scanXMLDeclOrTextDecl(boolean)
0858:
0859: public String getPITarget() {
0860: return fPITarget;
0861: }
0862:
0863: public XMLStringBuffer getPIData() {
0864: return fContentBuffer;
0865: }
0866:
0867: //XXX: why not this function behave as per the state of the parser?
0868: public XMLString getCharacterData() {
0869: if (fUsebuffer) {
0870: return fContentBuffer;
0871: } else {
0872: return fTempString;
0873: }
0874:
0875: }
0876:
0877: /**
0878: * Scans a processing data. This is needed to handle the situation
0879: * where a document starts with a processing instruction whose
0880: * target name <em>starts with</em> "xml". (e.g. xmlfoo)
0881: *
0882: * @param target The PI target
0883: * @param data The XMLStringBuffer to fill in with the data
0884: */
0885: protected void scanPIData(String target, XMLStringBuffer data)
0886: throws IOException, XNIException {
0887:
0888: super .scanPIData(target, data);
0889:
0890: //set the PI target and values
0891: fPITarget = target;
0892:
0893: fMarkupDepth--;
0894:
0895: // call handler
0896: if (fDocumentHandler != null) {
0897: fDocumentHandler.processingInstruction(target, data, null);
0898: }
0899:
0900: } // scanPIData(String)
0901:
0902: /**
0903: * Scans a comment.
0904: * <p>
0905: * <pre>
0906: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
0907: * </pre>
0908: * <p>
0909: * <strong>Note:</strong> Called after scanning past '<!--'
0910: */
0911: protected void scanComment() throws IOException, XNIException {
0912: fContentBuffer.clear();
0913: scanComment(fContentBuffer);
0914: //getTextCharacters can also be called for reading comments
0915: fUsebuffer = true;
0916: fMarkupDepth--;
0917:
0918: // call handler
0919: if (fDocumentHandler != null) {
0920: fDocumentHandler.comment(fContentBuffer, null);
0921: }
0922:
0923: } // scanComment()
0924:
0925: //xxx value returned by this function may not remain valid if another event is scanned.
0926: public String getComment() {
0927: return fContentBuffer.toString();
0928: }
0929:
0930: void addElement(String rawname) {
0931: if (fElementPointer < ELEMENT_ARRAY_LENGTH) {
0932: //storing element raw name in a linear list of array
0933: fElementArray[fElementPointer] = rawname;
0934: //storing elemnetPointer for particular element depth
0935:
0936: if (DEBUG_SKIP_ALGORITHM) {
0937: StringBuffer sb = new StringBuffer();
0938: sb.append(" Storing element information ");
0939: sb.append(" fElementPointer = " + fElementPointer);
0940: sb
0941: .append(" fElementRawname = "
0942: + fElementQName.rawname);
0943: sb.append(" fElementStack.fDepth = "
0944: + fElementStack.fDepth);
0945: System.out.println(sb.toString());
0946: }
0947:
0948: //store pointer information only when element depth is less MAX_DEPTH_LIMIT
0949: if (fElementStack.fDepth < MAX_DEPTH_LIMIT) {
0950: short column = storePointerForADepth(fElementPointer);
0951: if (column > 0) {
0952: short pointer = getElementPointer(
0953: (short) fElementStack.fDepth,
0954: (short) (column - 1));
0955: //identity comparison shouldn't take much time and we can rely on this
0956: //since its guaranteed to have same object id for same string.
0957: if (rawname == fElementArray[pointer]) {
0958: fShouldSkip = true;
0959: fLastPointerLocation = pointer;
0960: //reset the things and return.
0961: resetPointer((short) fElementStack.fDepth,
0962: column);
0963: fElementArray[fElementPointer] = null;
0964: return;
0965: } else {
0966: fShouldSkip = false;
0967: }
0968: }
0969: }
0970: fElementPointer++;
0971: }
0972: }
0973:
0974: void resetPointer(short depth, short column) {
0975: fPointerInfo[depth][column] = (short) 0;
0976: }
0977:
0978: //returns column information at which pointer was stored.
0979: short storePointerForADepth(short elementPointer) {
0980: short depth = (short) fElementStack.fDepth;
0981:
0982: //Stores element pointer locations at particular depth , only 4 pointer locations
0983: //are stored at particular depth for now.
0984: for (short i = 0; i < MAX_POINTER_AT_A_DEPTH; i++) {
0985:
0986: if (canStore(depth, i)) {
0987: fPointerInfo[depth][i] = elementPointer;
0988: if (DEBUG_SKIP_ALGORITHM) {
0989: StringBuffer sb = new StringBuffer();
0990: sb.append(" Pointer information ");
0991: sb.append(" fElementPointer = " + fElementPointer);
0992: sb.append(" fElementStack.fDepth = "
0993: + fElementStack.fDepth);
0994: sb.append(" column = " + i);
0995: System.out.println(sb.toString());
0996: }
0997: return i;
0998: }
0999: //else
1000: //pointer was not stored because we reached the limit
1001: }
1002: return -1;
1003: }
1004:
1005: boolean canStore(short depth, short column) {
1006: //colum = 0 , means first element at particular depth
1007: //column = 1, means second element at particular depth
1008: // calle should make sure that it doesn't call for value outside allowed co-ordinates
1009: return fPointerInfo[depth][column] == 0 ? true : false;
1010: }
1011:
1012: short getElementPointer(short depth, short column) {
1013: //colum = 0 , means first element at particular depth
1014: //column = 1, means second element at particular depth
1015: // calle should make sure that it doesn't call for value outside allowed co-ordinates
1016: return fPointerInfo[depth][column];
1017: }
1018:
1019: //this function assumes that string passed is not null and skips
1020: //the following string from the buffer this makes sure
1021: boolean skipFromTheBuffer(String rawname) throws IOException {
1022: if (fEntityScanner.skipString(rawname)) {
1023: char c = (char) fEntityScanner.peekChar();
1024: //If the start element was completely skipped we should encounter either ' '(space),
1025: //or '/' (in case of empty element) or '>'
1026: if (c == ' ' || c == '/' || c == '>') {
1027: fElementRawname = rawname;
1028: return true;
1029: } else {
1030: return false;
1031: }
1032: } else
1033: return false;
1034: }
1035:
1036: //this function assumes that string passed is not null and skips
1037: //the following string from the buffer this makes sure
1038: boolean skipQElement(QName name) throws IOException {
1039:
1040: final int c = fEntityScanner.getChar(name.characters.length);
1041: //if this character is still valid element name -- this means string can't match
1042: if (XMLChar.isName(c)) {
1043: return false;
1044: } else {
1045: return fEntityScanner.skipString(name.characters);
1046: }
1047: }
1048:
1049: boolean skipQElement(String rawname) throws IOException {
1050:
1051: final int c = fEntityScanner.getChar(rawname.length());
1052: //if this character is still valid element name -- this means string can't match
1053: if (XMLChar.isName(c)) {
1054: return false;
1055: } else {
1056: return fEntityScanner.skipString(rawname);
1057: }
1058: }
1059:
1060: protected boolean skipElement() throws IOException {
1061:
1062: if (!fShouldSkip)
1063: return false;
1064:
1065: if (fLastPointerLocation != 0) {
1066: //Look at the next element stored in the array list.. we might just get a match.
1067: String rawname = fElementArray[fLastPointerLocation + 1];
1068: if (rawname != null && skipFromTheBuffer(rawname)) {
1069: fLastPointerLocation++;
1070: if (DEBUG_SKIP_ALGORITHM) {
1071: System.out.println("Element " + fElementRawname
1072: + " was SKIPPED at pointer location = "
1073: + fLastPointerLocation);
1074: }
1075: return true;
1076: } else {
1077: //reset it back to zero... we haven't got the correct subset yet.
1078: fLastPointerLocation = 0;
1079:
1080: }
1081: }
1082: //xxx: we can put some logic here as from what column it should start looking
1083: //for now we always start at 0
1084: //fallback to tolerant algorithm, it would look for differnt element stored at different
1085: //depth and get us the pointer location.
1086: return fShouldSkip && skipElement((short) 0);
1087:
1088: }
1089:
1090: //start of the column at which it should try searching
1091: boolean skipElement(short column) throws IOException {
1092: short depth = (short) fElementStack.fDepth;
1093:
1094: if (depth > MAX_DEPTH_LIMIT) {
1095: return fShouldSkip = false;
1096: }
1097: for (short i = column; i < MAX_POINTER_AT_A_DEPTH; i++) {
1098: short pointer = getElementPointer(depth, i);
1099:
1100: if (pointer == 0) {
1101: return fShouldSkip = false;
1102: }
1103:
1104: if (fElementArray[pointer] != null
1105: && skipFromTheBuffer(fElementArray[pointer])) {
1106: if (DEBUG_SKIP_ALGORITHM) {
1107: System.out.println();
1108: System.out.println("Element " + fElementRawname
1109: + " was SKIPPED at depth = "
1110: + fElementStack.fDepth + " column = "
1111: + column);
1112: System.out.println();
1113: }
1114: fLastPointerLocation = pointer;
1115: return fShouldSkip = true;
1116: }
1117: }
1118: return fShouldSkip = false;
1119: }
1120:
1121: /**
1122: * Scans a start element. This method will handle the binding of
1123: * namespace information and notifying the handler of the start
1124: * of the element.
1125: * <p>
1126: * <pre>
1127: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1128: * [40] STag ::= '<' Name (S Attribute)* S? '>'
1129: * </pre>
1130: * <p>
1131: * <strong>Note:</strong> This method assumes that the leading
1132: * '<' character has been consumed.
1133: * <p>
1134: * <strong>Note:</strong> This method uses the fElementQName and
1135: * fAttributes variables. The contents of these variables will be
1136: * destroyed. The caller should copy important information out of
1137: * these variables before calling this method.
1138: * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT
1139: *
1140: * @return True if element is empty. (i.e. It matches
1141: * production [44].
1142: */
1143: // fElementQName will have the details of element just read..
1144: // fAttributes will have the details of all the attributes.
1145: protected boolean scanStartElement() throws IOException,
1146: XNIException {
1147:
1148: if (DEBUG_CONTENT_SCANNING)
1149: System.out.println(">>> scanStartElement()");
1150: //Try the skipping
1151: //Note that fAdd is only set to false, after analyzing data which
1152: //has been parsed
1153: //when skipping is true and no more elements should be added
1154: if (fShouldSkip && !fAdd) {
1155: //get the stored element -- if everything goes right this should match the
1156: //token in the buffer
1157: QName name = fElementStack2.getNext();
1158:
1159: if (DEBUG_SKIP_ALGORITHM) {
1160: System.out.println("Trying to skip String = "
1161: + name.rawname);
1162: }
1163:
1164: //Be conservative -- if skipping fails -- stop.
1165: fShouldSkip = skipQElement(name);
1166:
1167: if (DEBUG_SKIP_ALGORITHM) {
1168: System.out.println("STRING = " + name.rawname
1169: + " SKIPPED = " + fShouldSkip);
1170: }
1171: /**
1172: //this means that skipping was sucessful --
1173: if(fShouldSkip){
1174: //just change the reference
1175: fElementQName = name;
1176: fElementStack.setElement(fElementQName);
1177: }
1178: else{
1179: // name
1180: if (fNamespaces) {
1181: fEntityScanner.scanQName(fElementQName);
1182: }
1183: else {
1184: String element = fEntityScanner.scanName();
1185: fElementQName.setValues(null, element, element, null);
1186: //XXX: THIS IS UGLY -- THIS SHOULD BE CHANGED.
1187: //WE SHOULD DO IT AS PART OF QNAME -- NB.
1188: fElementQName.characters = fEntityScanner.scannedName;
1189: }
1190: }
1191: */
1192: }
1193:
1194: //Should an element be stored in stack2 -- return the reference
1195: //and set the same reference in stack1 too
1196: if (fAdd) {
1197: fElementQName = fElementStack2.nextElement();
1198: //fElementStack.setElement(fElementQName);
1199: } else {
1200: //get the next element from the stack
1201: fElementQName = fElementStack.nextElement();
1202: }
1203:
1204: //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName
1205: fCurrentElement = fElementQName;
1206:
1207: //if at all skipping should be done --
1208: //1. We are still at the stage of adding
1209: //2. Last skipping operation failed and skipping has been stopped
1210: if (!fShouldSkip || fAdd) {
1211: // name
1212: if (fNamespaces) {
1213: fEntityScanner.scanQName(fElementQName);
1214: } else {
1215: String name = fEntityScanner.scanName();
1216: fElementQName.setValues(null, name, name, null);
1217: //XXX: THIS IS UGLY -- THIS SHOULD BE CHANGED.
1218: //WE SHOULD DO IT AS PART OF QNAME -- NB.
1219: fElementQName.characters = fEntityScanner.scannedName;
1220: }
1221: }
1222: //when the elements are being added , we need to check if we are set for skipping the elements
1223: if (fAdd) {
1224: fElementStack2.matchElement(fElementQName);
1225: }
1226:
1227: String rawname = fElementQName.rawname;
1228:
1229: // attributes
1230: boolean empty = false;
1231: //remove all the attributes.
1232: fAttributes.removeAllAttributes();
1233:
1234: // this code will read all the attributes Name, Value pair.
1235: // It will also consume '>' for start element or '/>' if its empty element.
1236: do {
1237: // spaces
1238: boolean sawSpace = fEntityScanner.skipSpaces();
1239:
1240: // end tag?
1241: int c = fEntityScanner.peekChar();
1242: if (c == '>') {
1243: fEntityScanner.scanChar();
1244: break;
1245: } else if (c == '/') {
1246: fEntityScanner.scanChar();
1247: if (!fEntityScanner.skipChar('>')) {
1248: reportFatalError("ElementUnterminated",
1249: new Object[] { rawname });
1250: }
1251: empty = true;
1252: break;
1253: } else if (!isValidNameStartChar(c) || !sawSpace) {
1254: reportFatalError("ElementUnterminated",
1255: new Object[] { rawname });
1256: }
1257:
1258: // attributes
1259: scanAttribute(fAttributes);
1260:
1261: } while (true);
1262:
1263: // call handler
1264: if (empty) {
1265: //decrease the markup depth..
1266: fMarkupDepth--;
1267: // check that this element was opened in the same entity
1268: if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
1269: reportFatalError("ElementEntityMismatch",
1270: new Object[] { fCurrentElement.rawname });
1271: }
1272: //Also pass the information if the document handler is registered.
1273: //Infact this can be layered on top of pull model.
1274: if (fDocumentHandler != null) {
1275: //fDocumentHandler.emptyElement(fElementQName, fAttributes, null);
1276: }
1277: //pop the element off the stack..
1278: fElementStack.popElement();
1279:
1280: } else if (fDocumentHandler != null) {
1281: //Also pass the information if the document handler is registered.
1282: //Infact this can be layered on top of pull model.
1283:
1284: //fDocumentHandler.startElement(fElementQName, fAttributes, null);
1285: }
1286:
1287: if (DEBUG_CONTENT_SCANNING)
1288: System.out.println("<<< scanStartElement(): " + empty);
1289: return empty;
1290:
1291: } // scanStartElement():boolean
1292:
1293: public boolean hasAttributes() {
1294: return fAttributes.getLength() > 0 ? true : false;
1295: }
1296:
1297: /**
1298: * Scans an attribute.
1299: * <p>
1300: * <pre>
1301: * [41] Attribute ::= Name Eq AttValue
1302: * </pre>
1303: * <p>
1304: * <strong>Note:</strong> This method assumes that the next
1305: * character on the stream is the first character of the attribute
1306: * name.
1307: * <p>
1308: * <strong>Note:</strong> This method uses the fAttributeQName and
1309: * fQName variables. The contents of these variables will be
1310: * destroyed.
1311: *
1312: * @param attributes The attributes list for the scanned attribute.
1313: */
1314:
1315: /**
1316: * protected void scanAttribute(AttributeIteratorImpl attributes)
1317: * throws IOException, XNIException {
1318: * if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanAttribute()");
1319: *
1320: *
1321: * // name
1322: * if (fNamespaces) {
1323: * fEntityScanner.scanQName(fAttributeQName);
1324: * }
1325: * else {
1326: * String name = fEntityScanner.scanName();
1327: * fAttributeQName.setValues(null, name, name, null);
1328: * }
1329: *
1330: * // equals
1331: * fEntityScanner.skipSpaces();
1332: * if (!fEntityScanner.skipChar('=')) {
1333: * reportFatalError("EqRequiredInAttribute",
1334: * new Object[]{fAttributeQName.rawname});
1335: * }
1336: * fEntityScanner.skipSpaces();
1337: *
1338: *
1339: * // content
1340: * int oldLen = attributes.getLength();
1341: */
1342: /**xxx there is one check of duplicate attribute that has been removed.
1343: * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null);
1344: *
1345: * // WFC: Unique Att Spec
1346: * if (oldLen == attributes.getLength()) {
1347: * reportFatalError("AttributeNotUnique",
1348: * new Object[]{fCurrentElement.rawname,
1349: * fAttributeQName.rawname});
1350: * }
1351: */
1352:
1353: /*
1354: //REVISIT: one more case needs to be included: external PE and standalone is no
1355: boolean isVC = fHasExternalDTD && !fStandalone;
1356: scanAttributeValue(fTempString, fTempString2,
1357: fAttributeQName.rawname, attributes,
1358: oldLen, isVC);
1359:
1360: //attributes.setValue(oldLen, fTempString.toString());
1361: //attributes.setNonNormalizedValue(oldLen, fTempString2.toString());
1362: //attributes.setSpecified(oldLen, true);
1363:
1364: AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true);
1365: fAttributes.addAttribute(attribute);
1366: if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanAttribute()");
1367: } // scanAttribute(XMLAttributes)
1368:
1369: */
1370:
1371: /** return the attribute iterator implementation */
1372: public XMLAttributesIteratorImpl getAttributeIterator() {
1373: if (dtdGrammarUtil != null && fAddDefaultAttr) {
1374: dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,
1375: fAttributes);
1376: fAddDefaultAttr = false;
1377: }
1378: return fAttributes;
1379: }
1380:
1381: /** return if the doucment is standalone */
1382: public boolean isStandAlone() {
1383: return fStandalone;
1384: }
1385:
1386: /**
1387: * Scans an attribute name value pair.
1388: * <p>
1389: * <pre>
1390: * [41] Attribute ::= Name Eq AttValue
1391: * </pre>
1392: * <p>
1393: * <strong>Note:</strong> This method assumes that the next
1394: * character on the stream is the first character of the attribute
1395: * name.
1396: * <p>
1397: * <strong>Note:</strong> This method uses the fAttributeQName and
1398: * fQName variables. The contents of these variables will be
1399: * destroyed.
1400: *
1401: * @param attributes The attributes list for the scanned attribute.
1402: */
1403:
1404: protected void scanAttribute(XMLAttributes attributes)
1405: throws IOException, XNIException {
1406: if (DEBUG_CONTENT_SCANNING)
1407: System.out.println(">>> scanAttribute()");
1408:
1409: // name
1410: if (fNamespaces) {
1411: fEntityScanner.scanQName(fAttributeQName);
1412: } else {
1413: String name = fEntityScanner.scanName();
1414: fAttributeQName.setValues(null, name, name, null);
1415: }
1416:
1417: // equals
1418: fEntityScanner.skipSpaces();
1419: if (!fEntityScanner.skipChar('=')) {
1420: reportFatalError("EqRequiredInAttribute",
1421: new Object[] { fAttributeQName.rawname });
1422: }
1423: fEntityScanner.skipSpaces();
1424:
1425: // content
1426: int oldLen = attributes.getLength();
1427: //if the attribute name already exists.. new value is replaced with old value
1428: attributes.addAttribute(fAttributeQName,
1429: XMLSymbols.fCDATASymbol, null);
1430:
1431: // WFC: Unique Att Spec
1432: //attributes count will be same if the current attribute name already exists for this element name.
1433: //this means there are two duplicate attributes.
1434: if (oldLen == attributes.getLength()) {
1435: reportFatalError("AttributeNotUnique", new Object[] {
1436: fCurrentElement.rawname, fAttributeQName.rawname });
1437: }
1438: //REVISIT: one more case needs to be included: external PE and standalone is no
1439: boolean isVC = fHasExternalDTD && !fStandalone;
1440: //fTempString would store attribute value
1441: ///fTempString2 would store attribute non-normalized value
1442: scanAttributeValue(fTempString, fTempString2,
1443: fAttributeQName.rawname, attributes, oldLen, isVC);
1444: attributes.setValue(oldLen, fTempString.toString());
1445: //attributes.setNonNormalizedValue(oldLen, fTempString2.toString());
1446: attributes.setSpecified(oldLen, true);
1447:
1448: if (DEBUG_CONTENT_SCANNING)
1449: System.out.println("<<< scanAttribute()");
1450: } // scanAttribute(XMLAttributes)
1451:
1452: /**
1453: * Scans element content.
1454: *
1455: * @return Returns the next character on the stream.
1456: */
1457: //CHANGED:
1458: //EARLIER: scanContent()
1459: //NOW: scanContent(XMLStringBuffer)
1460: //It makes things easy if this functions takes XMLStringBuffer as parameter..
1461: //this function appends the data to the buffer.
1462: protected int scanContent(XMLStringBuffer content)
1463: throws IOException, XNIException {
1464: //set the fTempString length to 0 before passing it on to scanContent
1465: //scanContent sets the correct co-ordinates as per the content read
1466: fTempString.length = 0;
1467: int c = fEntityScanner.scanContent(fTempString);
1468: content.append(fTempString);
1469: fTempString.length = 0;
1470: if (c == '\r') {
1471: // happens when there is the character reference
1472: //xxx: We know the next chracter.. we should just skip it and add ']' directlry
1473: fEntityScanner.scanChar();
1474: content.append((char) c);
1475: c = -1;
1476: } else if (c == ']') {
1477: //fStringBuffer.clear();
1478: //xxx: We know the next chracter.. we should just skip it and add ']' directlry
1479: content.append((char) fEntityScanner.scanChar());
1480: // remember where we are in case we get an endEntity before we
1481: // could flush the buffer out - this happens when we're parsing an
1482: // entity which ends with a ]
1483: fInScanContent = true;
1484: //
1485: // We work on a single character basis to handle cases such as:
1486: // ']]]>' which we might otherwise miss.
1487: //
1488: if (fEntityScanner.skipChar(']')) {
1489: content.append(']');
1490: while (fEntityScanner.skipChar(']')) {
1491: content.append(']');
1492: }
1493: if (fEntityScanner.skipChar('>')) {
1494: reportFatalError("CDEndInContent", null);
1495: }
1496: }
1497: fInScanContent = false;
1498: c = -1;
1499: }
1500: if (fDocumentHandler != null && content.length > 0) {
1501: fDocumentHandler.characters(content, null);
1502: }
1503: return c;
1504:
1505: } // scanContent():int
1506:
1507: /**
1508: * Scans a CDATA section.
1509: * <p>
1510: * <strong>Note:</strong> This method uses the fTempString and
1511: * fStringBuffer variables.
1512: *
1513: * @param complete True if the CDATA section is to be scanned
1514: * completely.
1515: *
1516: * @return True if CDATA is completely scanned.
1517: */
1518: //CHANGED:
1519: protected boolean scanCDATASection(XMLStringBuffer contentBuffer,
1520: boolean complete) throws IOException, XNIException {
1521:
1522: // call handler
1523: if (fDocumentHandler != null) {
1524: fDocumentHandler.startCDATA(null);
1525: }
1526:
1527: while (true) {
1528: //scanData will fill the contentBuffer
1529: if (!fEntityScanner.scanData("]]>", contentBuffer)) {
1530: break;
1531: /** We dont need all this code if we pass ']]>' as delimeter..
1532: int brackets = 2;
1533: while (fEntityScanner.skipChar(']')) {
1534: brackets++;
1535: }
1536:
1537: //When we find more than 2 square brackets
1538: if (fDocumentHandler != null && brackets > 2) {
1539: //we dont need to clear the buffer..
1540: //contentBuffer.clear();
1541: for (int i = 2; i < brackets; i++) {
1542: contentBuffer.append(']');
1543: }
1544: fDocumentHandler.characters(contentBuffer, null);
1545: }
1546:
1547: if (fEntityScanner.skipChar('>')) {
1548: break;
1549: }
1550: if (fDocumentHandler != null) {
1551: //we dont need to clear the buffer now..
1552: //contentBuffer.clear();
1553: contentBuffer.append("]]");
1554: fDocumentHandler.characters(contentBuffer, null);
1555: }
1556: **/
1557: } else {
1558: int c = fEntityScanner.peekChar();
1559: if (c != -1 && isInvalidLiteral(c)) {
1560: if (XMLChar.isHighSurrogate(c)) {
1561: //contentBuffer.clear();
1562: //scan surrogates if any....
1563: scanSurrogates(contentBuffer);
1564: } else {
1565: reportFatalError(
1566: "InvalidCharInCDSect",
1567: new Object[] { Integer.toString(c, 16) });
1568: fEntityScanner.scanChar();
1569: }
1570: }
1571: //by this time we have also read surrogate contents if any...
1572: if (fDocumentHandler != null) {
1573: fDocumentHandler.characters(contentBuffer, null);
1574: }
1575: break;
1576: }
1577: }
1578: fMarkupDepth--;
1579:
1580: if (fDocumentHandler != null && contentBuffer.length > 0) {
1581: fDocumentHandler.characters(contentBuffer, null);
1582: }
1583:
1584: // call handler
1585: if (fDocumentHandler != null) {
1586: fDocumentHandler.endCDATA(null);
1587: }
1588:
1589: return true;
1590:
1591: } // scanCDATASection(XMLStringBuffer, boolean):boolean
1592:
1593: /**
1594: * Scans an end element.
1595: * <p>
1596: * <pre>
1597: * [42] ETag ::= '</' Name S? '>'
1598: * </pre>
1599: * <p>
1600: * <strong>Note:</strong> This method uses the fElementQName variable.
1601: * The contents of this variable will be destroyed. The caller should
1602: * copy the needed information out of this variable before calling
1603: * this method.
1604: *
1605: * @return The element depth.
1606: */
1607: protected int scanEndElement() throws IOException, XNIException {
1608: if (DEBUG_CONTENT_SCANNING)
1609: System.out.println(">>> scanEndElement()");
1610:
1611: String rawname = fElementStack.popElement().rawname;
1612:
1613: // Take advantage of the fact that next string _should_ be "fElementQName.rawName",
1614: //In scanners most of the time is consumed on checks done for XML characters, we can
1615: // optimize on it and avoid the checks done for endElement,
1616: //we will also avoid symbol table lookup - neeraj.bajaj@sun.com
1617:
1618: // this should work both for namespace processing true or false...
1619:
1620: //REVISIT: if the string is not the same as expected.. we need to do better error handling..
1621: //We can skip this for now... In any case if the string doesn't match -- document is not well formed.
1622:
1623: //Pass characters instead of string , this gives better performance than strings.
1624: //Use the character array for rawname present in the symboltable. -Venu
1625: if (!fEntityScanner.skipString(rawname)) {
1626: reportFatalError("ETagRequired", new Object[] { rawname });
1627: }
1628:
1629: // end
1630: fEntityScanner.skipSpaces();
1631: if (!fEntityScanner.skipChar('>')) {
1632: reportFatalError("ETagUnterminated",
1633: new Object[] { rawname });
1634: }
1635: fMarkupDepth--;
1636:
1637: //we have increased the depth for two markup "<" characters
1638: fMarkupDepth--;
1639:
1640: // check that this element was opened in the same entity
1641: if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
1642: reportFatalError("ElementEntityMismatch",
1643: new Object[] { rawname });
1644: }
1645:
1646: /**/
1647: // call handler
1648: if (fDocumentHandler != null) {
1649: //xxx: Commenting this now since we are not passing any information
1650: //along the pipeline. However, we do need to set the values if we
1651: //need to pass the values along the pipeline.
1652:
1653: // fDocumentHandler.endElement(fElementQName, null);
1654: }
1655:
1656: return fMarkupDepth;
1657:
1658: } // scanEndElement():int
1659:
1660: /**
1661: * Scans a character reference.
1662: * <p>
1663: * <pre>
1664: * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1665: * </pre>
1666: */
1667: protected void scanCharReference() throws IOException, XNIException {
1668:
1669: fStringBuffer2.clear();
1670: int ch = scanCharReferenceValue(fStringBuffer2, null);
1671: fMarkupDepth--;
1672: if (ch != -1) {
1673: // call handler
1674: if (fDocumentHandler != null) {
1675: if (fNotifyCharRefs) {
1676: fDocumentHandler.startGeneralEntity(
1677: fCharRefLiteral, null, null, null);
1678: }
1679:
1680: fDocumentHandler.characters(fStringBuffer2, null);
1681: if (fNotifyCharRefs) {
1682: fDocumentHandler.endGeneralEntity(fCharRefLiteral,
1683: null);
1684: }
1685: }
1686: }
1687:
1688: } // scanCharReference()
1689:
1690: /**
1691: * Scans an entity reference.
1692: *
1693: * @return returns true if the new entity is started. If it was built-in entity
1694: * 'false' is returned.
1695: * @throws IOException Thrown if i/o error occurs.
1696: * @throws XNIException Thrown if handler throws exception upon
1697: * notification.
1698: */
1699: protected void scanEntityReference(XMLStringBuffer content)
1700: throws IOException, XNIException {
1701: String name = fEntityScanner.scanName();
1702: if (name == null) {
1703: reportFatalError("NameRequiredInReference", null);
1704: }
1705: if (!fEntityScanner.skipChar(';')) {
1706: reportFatalError("SemicolonRequiredInReference",
1707: new Object[] { name });
1708: }
1709: if (fEntityStore.isUnparsedEntity(name)) {
1710: reportFatalError("ReferenceToUnparsedEntity",
1711: new Object[] { name });
1712: }
1713: fMarkupDepth--;
1714: fCurrentEntityName = name;
1715:
1716: // handle built-in entities
1717: if (name == fAmpSymbol) {
1718: handleCharacter('&', fAmpSymbol, content);
1719: fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1720: return;
1721: } else if (name == fLtSymbol) {
1722: handleCharacter('<', fLtSymbol, content);
1723: fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1724: return;
1725: } else if (name == fGtSymbol) {
1726: handleCharacter('>', fGtSymbol, content);
1727: fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1728: return;
1729: } else if (name == fQuotSymbol) {
1730: handleCharacter('"', fQuotSymbol, content);
1731: fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1732: return;
1733: } else if (name == fAposSymbol) {
1734: handleCharacter('\'', fAposSymbol, content);
1735: fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1736: return;
1737: }
1738:
1739: //1. if the entity is external and support to external entities is not required
1740: // 2. or entities should not be replaced
1741: // set the state to ENTITY_REFERENCE & return
1742:
1743: if ((fEntityStore.isExternalEntity(name) && !fSupportExternalEntities)
1744: || (!fEntityStore.isExternalEntity(name) && !fReplaceEntityReferences)) {
1745: fScannerState = SCANNER_STATE_REFERENCE;
1746: return;
1747: }
1748:
1749: // start general entity
1750: if (!fEntityStore.isDeclaredEntity(name)) {
1751: //REVISIT: one more case needs to be included: external PE and standalone is no
1752: if (fHasExternalDTD && !fStandalone) {
1753: if (fValidation)
1754: fErrorReporter.reportError(fEntityScanner,
1755: XMLMessageFormatter.XML_DOMAIN,
1756: "EntityNotDeclared", new Object[] { name },
1757: XMLErrorReporter.SEVERITY_ERROR);
1758: } else
1759: reportFatalError("EntityNotDeclared",
1760: new Object[] { name });
1761: }
1762: //we are starting the entity even if the entity was not declared
1763: //if that was the case it its taken care in XMLEntityManager.startEntity()
1764: //we immediately call the endEntity. Application gets to know if there was
1765: //any entity that was not declared.
1766: fEntityManager.startEntity(name, false);
1767:
1768: } // scanEntityReference()
1769:
1770: // utility methods
1771:
1772: /**
1773: * Calls document handler with a single character resulting from
1774: * built-in entity resolution.
1775: *
1776: * @param c
1777: * @param entity built-in name
1778: * @param XMLStringBuffer append the character to buffer
1779: *
1780: * we really dont need to call this function -- this function is only required when
1781: * we integrate with rest of Xerces2. SO maintaining the current behavior and still
1782: * calling this function to hanlde built-in entity reference.
1783: *
1784: */
1785: private void handleCharacter(char c, String entity,
1786: XMLStringBuffer content) throws XNIException {
1787: foundBuiltInRefs = true;
1788: content.append(c);
1789: if (fDocumentHandler != null) {
1790: fSingleChar[0] = c;
1791: if (fNotifyBuiltInRefs) {
1792: fDocumentHandler.startGeneralEntity(entity, null, null,
1793: null);
1794: }
1795: fTempString.setValues(fSingleChar, 0, 1);
1796: fDocumentHandler.characters(fTempString, null);
1797:
1798: if (fNotifyBuiltInRefs) {
1799: fDocumentHandler.endGeneralEntity(entity, null);
1800: }
1801: }
1802: } // handleCharacter(char)
1803:
1804: // helper methods
1805:
1806: /**
1807: * Sets the scanner state.
1808: *
1809: * @param state The new scanner state.
1810: */
1811: protected final void setScannerState(int state) {
1812:
1813: fScannerState = state;
1814: if (DEBUG_SCANNER_STATE) {
1815: System.out.print("### setScannerState: ");
1816: //System.out.print(fScannerState);
1817: System.out.print(getScannerStateName(state));
1818: System.out.println();
1819: }
1820:
1821: } // setScannerState(int)
1822:
1823: /**
1824: * Sets the Driver.
1825: *
1826: * @param Driver The new Driver.
1827: */
1828: protected final void setDriver(Driver driver) {
1829: fDriver = driver;
1830: if (DEBUG_DISPATCHER) {
1831: System.out.print("%%% setDriver: ");
1832: System.out.print(getDriverName(driver));
1833: System.out.println();
1834: }
1835: }
1836:
1837: //
1838: // Private methods
1839: //
1840:
1841: /** Returns the scanner state name. */
1842: protected String getScannerStateName(int state) {
1843:
1844: switch (state) {
1845: case SCANNER_STATE_DOCTYPE:
1846: return "SCANNER_STATE_DOCTYPE";
1847: case SCANNER_STATE_ROOT_ELEMENT:
1848: return "SCANNER_STATE_ROOT_ELEMENT";
1849: case SCANNER_STATE_START_OF_MARKUP:
1850: return "SCANNER_STATE_START_OF_MARKUP";
1851: case SCANNER_STATE_COMMENT:
1852: return "SCANNER_STATE_COMMENT";
1853: case SCANNER_STATE_PI:
1854: return "SCANNER_STATE_PI";
1855: case SCANNER_STATE_CONTENT:
1856: return "SCANNER_STATE_CONTENT";
1857: case SCANNER_STATE_REFERENCE:
1858: return "SCANNER_STATE_REFERENCE";
1859: case SCANNER_STATE_END_OF_INPUT:
1860: return "SCANNER_STATE_END_OF_INPUT";
1861: case SCANNER_STATE_TERMINATED:
1862: return "SCANNER_STATE_TERMINATED";
1863: case SCANNER_STATE_CDATA:
1864: return "SCANNER_STATE_CDATA";
1865: case SCANNER_STATE_TEXT_DECL:
1866: return "SCANNER_STATE_TEXT_DECL";
1867: case SCANNER_STATE_ATTRIBUTE:
1868: return "SCANNER_STATE_ATTRIBUTE";
1869: case SCANNER_STATE_ATTRIBUTE_VALUE:
1870: return "SCANNER_STATE_ATTRIBUTE_VALUE";
1871: case SCANNER_STATE_START_ELEMENT_TAG:
1872: return "SCANNER_STATE_START_ELEMENT_TAG";
1873: case SCANNER_STATE_END_ELEMENT_TAG:
1874: return "SCANNER_STATE_END_ELEMENT_TAG";
1875: case SCANNER_STATE_CHARACTER_DATA:
1876: return "SCANNER_STATE_CHARACTER_DATA";
1877: }
1878:
1879: return "??? (" + state + ')';
1880:
1881: } // getScannerStateName(int):String
1882:
1883: public String getEntityName() {
1884: //return the cached name
1885: return fCurrentEntityName;
1886: }
1887:
1888: /** Returns the driver name. */
1889: public String getDriverName(Driver driver) {
1890:
1891: if (DEBUG_DISPATCHER) {
1892: if (driver != null) {
1893: String name = driver.getClass().getName();
1894: int index = name.lastIndexOf('.');
1895: if (index != -1) {
1896: name = name.substring(index + 1);
1897: index = name.lastIndexOf('$');
1898: if (index != -1) {
1899: name = name.substring(index + 1);
1900: }
1901: }
1902: return name;
1903: }
1904: }
1905: return "null";
1906:
1907: } // getDriverName():String
1908:
1909: //
1910: // Classes
1911: //
1912:
1913: /**
1914: * @author Neeraj Bajaj, Sun Microsystems.
1915: */
1916: protected static final class Element {
1917:
1918: //
1919: // Data
1920: //
1921:
1922: /** Symbol. */
1923: public QName qname;
1924:
1925: //raw name stored as characters
1926: public char[] fRawname;
1927:
1928: /** The next Element entry. */
1929: public Element next;
1930:
1931: //
1932: // Constructors
1933: //
1934:
1935: /**
1936: * Constructs a new Element from the given QName and next Element
1937: * reference.
1938: */
1939: public Element(QName qname, Element next) {
1940: this .qname.setValues(qname);
1941: this .fRawname = qname.rawname.toCharArray();
1942: this .next = next;
1943: }
1944:
1945: } // class Element
1946:
1947: /**
1948: * Element stack.
1949: *
1950: * @author Neeraj Bajaj, Sun Microsystems.
1951: */
1952: protected class ElementStack2 {
1953:
1954: //
1955: // Data
1956: //
1957:
1958: /** The stack data. */
1959: protected QName[] fQName = new QName[20];
1960:
1961: //Element depth
1962: protected int fDepth;
1963: //total number of elements
1964: protected int fCount;
1965: //current position
1966: protected int fPosition;
1967: //Mark refers to the position
1968: protected int fMark;
1969:
1970: protected int fLastDepth;
1971:
1972: //
1973: // Constructors
1974: //
1975:
1976: /** Default constructor. */
1977: public ElementStack2() {
1978: for (int i = 0; i < fQName.length; i++) {
1979: fQName[i] = new QName();
1980: }
1981: fMark = fPosition = 1;
1982: } // <init>()
1983:
1984: public void resize() {
1985: /**
1986: int length = fElements.length;
1987: Element [] temp = new Element[length * 2];
1988: System.arraycopy(fElements, 0, temp, 0, length);
1989: fElements = temp;
1990: */
1991: //resize QNames
1992: int oldLength = fQName.length;
1993: QName[] tmp = new QName[oldLength * 2];
1994: System.arraycopy(fQName, 0, tmp, 0, oldLength);
1995: fQName = tmp;
1996:
1997: for (int i = oldLength; i < fQName.length; i++) {
1998: fQName[i] = new QName();
1999: }
2000:
2001: }
2002:
2003: //
2004: // Public methods
2005: //
2006:
2007: /** Check if the element scanned during the start element
2008: *matches the stored element.
2009: *
2010: *@return true if the match suceeds.
2011: */
2012: public boolean matchElement(QName element) {
2013: //last depth is the depth when last elemnt was pushed
2014: //if last depth is greater than current depth
2015: if (DEBUG_SKIP_ALGORITHM) {
2016: System.out.println("fLastDepth = " + fLastDepth);
2017: System.out.println("fDepth = " + fDepth);
2018: }
2019: boolean match = false;
2020: if (fLastDepth > fDepth && fDepth <= 2) {
2021: if (DEBUG_SKIP_ALGORITHM) {
2022: System.out
2023: .println("Checking if the elements match "
2024: + element.rawname + " , "
2025: + fQName[fDepth].rawname);
2026: }
2027: if (element.rawname == fQName[fDepth].rawname) {
2028: fAdd = false;
2029: //mark this position
2030: //decrease the depth by 1 as arrays are 0 based
2031: fMark = fDepth - 1;
2032: //we found the match and from next element skipping will start, add 1
2033: fPosition = fMark + 1;
2034: match = true;
2035: //Once we get match decrease the count -- this was increased by nextElement()
2036: --fCount;
2037: if (DEBUG_SKIP_ALGORITHM) {
2038: System.out
2039: .println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED");
2040: System.out.println("fMark = " + fMark);
2041: System.out.println("fPosition = " + fPosition);
2042: System.out.println("fDepth = " + fDepth);
2043: System.out.println("fCount = " + fCount);
2044: }
2045: } else {
2046: fAdd = true;
2047: if (DEBUG_SKIP_ALGORITHM)
2048: System.out.println("fAdd is " + fAdd);
2049: }
2050: }
2051: //store the last depth
2052: fLastDepth = fDepth++;
2053: return match;
2054: } // pushElement(QName):QName
2055:
2056: /**
2057: * This function doesn't increase depth. The function in this function is
2058: *broken down into two functions for efficiency. <@see>matchElement</see>.
2059: * This function just returns the pointer to the object and its values are set.
2060: *
2061: *@return QName reference to the next element in the list
2062: */
2063: public QName nextElement() {
2064:
2065: //if number of elements becomes equal to the length of array -- stop the skipping
2066: if (fCount == fQName.length) {
2067: fShouldSkip = false;
2068: fAdd = false;
2069: if (DEBUG_SKIP_ALGORITHM)
2070: System.out
2071: .println("SKIPPING STOPPED, fShouldSkip = "
2072: + fShouldSkip);
2073: //xxx: this is not correct, we are returning the last element
2074: //this wont make any difference since flag has been set to 'false'
2075: return fQName[--fCount];
2076: }
2077: if (DEBUG_SKIP_ALGORITHM) {
2078: System.out.println("fCount = " + fCount);
2079: }
2080: return fQName[fCount++];
2081:
2082: }
2083:
2084: /** Note that this function is considerably different than nextElement()
2085: * This function just returns the previously stored elements
2086: */
2087: public QName getNext() {
2088: //when position reaches number of elements in the list..
2089: //set the position back to mark, making it a circular linked list.
2090: if (fPosition == fCount) {
2091: fPosition = fMark;
2092: }
2093: return fQName[fPosition++];
2094: }
2095:
2096: /** returns the current depth
2097: */
2098: public int popElement() {
2099: return fDepth--;
2100: }
2101:
2102: /** Clears the stack without throwing away existing QName objects. */
2103: public void clear() {
2104: fLastDepth = 0;
2105: fDepth = 0;
2106: fCount = 0;
2107: fPosition = fMark = 1;
2108: } // clear()
2109:
2110: } // class ElementStack
2111:
2112: /**
2113: * Element stack. This stack operates without synchronization, error
2114: * checking, and it re-uses objects instead of throwing popped items
2115: * away.
2116: *
2117: * @author Andy Clark, IBM
2118: */
2119: protected class ElementStack {
2120:
2121: //
2122: // Data
2123: //
2124:
2125: /** The stack data. */
2126: protected QName[] fElements;
2127: protected int[] fInt = new int[20];
2128:
2129: //Element depth
2130: protected int fDepth;
2131: //total number of elements
2132: protected int fCount;
2133: //current position
2134: protected int fPosition;
2135: //Mark refers to the position
2136: protected int fMark;
2137:
2138: protected int fLastDepth;
2139:
2140: //
2141: // Constructors
2142: //
2143:
2144: /** Default constructor. */
2145: public ElementStack() {
2146: fElements = new QName[20];
2147: for (int i = 0; i < fElements.length; i++) {
2148: fElements[i] = new QName();
2149: }
2150: } // <init>()
2151:
2152: //
2153: // Public methods
2154: //
2155:
2156: /**
2157: * Pushes an element on the stack.
2158: * <p>
2159: * <strong>Note:</strong> The QName values are copied into the
2160: * stack. In other words, the caller does <em>not</em> orphan
2161: * the element to the stack. Also, the QName object returned
2162: * is <em>not</em> orphaned to the caller. It should be
2163: * considered read-only.
2164: *
2165: * @param element The element to push onto the stack.
2166: *
2167: * @return Returns the actual QName object that stores the
2168: */
2169: //XXX: THIS FUNCTION IS NOT USED
2170: public QName pushElement(QName element) {
2171: if (fDepth == fElements.length) {
2172: QName[] array = new QName[fElements.length * 2];
2173: System.arraycopy(fElements, 0, array, 0, fDepth);
2174: fElements = array;
2175: for (int i = fDepth; i < fElements.length; i++) {
2176: fElements[i] = new QName();
2177: }
2178: }
2179: fElements[fDepth].setValues(element);
2180: return fElements[fDepth++];
2181: } // pushElement(QName):QName
2182:
2183: /** Note that this function is considerably different than nextElement()
2184: * This function just returns the previously stored elements
2185: */
2186: public QName getNext() {
2187: //when position reaches number of elements in the list..
2188: //set the position back to mark, making it a circular linked list.
2189: if (fPosition == fCount) {
2190: fPosition = fMark;
2191: }
2192: //store the position of last opened tag at particular depth
2193: //fInt[++fDepth] = fPosition;
2194: if (DEBUG_SKIP_ALGORITHM) {
2195: System.out.println("Element at fPosition = "
2196: + fPosition + " is "
2197: + fElements[fPosition].rawname);
2198: }
2199: //return fElements[fPosition++];
2200: return fElements[fPosition];
2201: }
2202:
2203: /** This function should be called only when element was skipped sucessfully.
2204: * 1. Increase the depth - because element was sucessfully skipped.
2205: *2. Store the position of the element token in array "last opened tag" at depth.
2206: *3. increase the position counter so as to point to the next element in the array
2207: */
2208: public void push() {
2209:
2210: fInt[++fDepth] = fPosition++;
2211: }
2212:
2213: /** Check if the element scanned during the start element
2214: *matches the stored element.
2215: *
2216: *@return true if the match suceeds.
2217: */
2218: public boolean matchElement(QName element) {
2219: //last depth is the depth when last elemnt was pushed
2220: //if last depth is greater than current depth
2221: //if(DEBUG_SKIP_ALGORITHM){
2222: // System.out.println("Check if the element " + element.rawname + " matches");
2223: // System.out.println("fLastDepth = " + fLastDepth);
2224: // System.out.println("fDepth = " + fDepth);
2225: //}
2226: boolean match = false;
2227: if (fLastDepth > fDepth && fDepth <= 3) {
2228: if (DEBUG_SKIP_ALGORITHM) {
2229: System.out
2230: .println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----");
2231: System.out.println("Depth = " + fDepth
2232: + " Checking if INCOMING element "
2233: + element.rawname
2234: + " match STORED ELEMENT "
2235: + fElements[fDepth - 1].rawname);
2236: }
2237: if (element.rawname == fElements[fDepth - 1].rawname) {
2238: fAdd = false;
2239: //mark this position
2240: //decrease the depth by 1 as arrays are 0 based
2241: fMark = fDepth - 1;
2242: //we found the match
2243: fPosition = fMark;
2244: match = true;
2245: //Once we get match decrease the count -- this was increased by nextElement()
2246: --fCount;
2247: if (DEBUG_SKIP_ALGORITHM) {
2248: System.out
2249: .println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false");
2250: System.out.println("fMark = " + fMark);
2251: System.out.println("fPosition = " + fPosition);
2252: System.out.println("fDepth = " + fDepth);
2253: System.out.println("fCount = " + fCount);
2254: System.out
2255: .println("---------MATCH SUCEEDED-----------------");
2256: System.out.println("");
2257: }
2258: } else {
2259: fAdd = true;
2260: if (DEBUG_SKIP_ALGORITHM)
2261: System.out.println("fAdd is " + fAdd);
2262: }
2263: }
2264: //store the position for the current depth
2265: //when we are adding the elements, when skipping
2266: //starts even then this should be tracked ie. when
2267: //calling getNext()
2268: if (match) {
2269: //from next element skipping will start, add 1
2270: fInt[fDepth] = fPosition++;
2271: } else {
2272: if (DEBUG_SKIP_ALGORITHM) {
2273: System.out.println("At depth = " + fDepth
2274: + "array position is = " + (fCount - 1));
2275: }
2276: //sicne fInt[fDepth] contains pointer to the element array which are 0 based.
2277: fInt[fDepth] = fCount - 1;
2278: }
2279:
2280: //if number of elements becomes equal to the length of array -- stop the skipping
2281: //xxx: should we do "fCount == fInt.length"
2282: if (fCount == fElements.length) {
2283: fSkip = false;
2284: fAdd = false;
2285: //reposition the stack -- it seems to be too complex document and there is no symmerty in structure
2286: reposition();
2287: if (DEBUG_SKIP_ALGORITHM) {
2288: System.out
2289: .println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED");
2290: System.out.println("REPOSITIONING THE STACK");
2291: System.out
2292: .println("-----------SKIPPING STOPPED----------");
2293: System.out.println("");
2294: }
2295: return false;
2296: }
2297: if (DEBUG_SKIP_ALGORITHM) {
2298: if (match) {
2299: System.out.println("Storing fPosition = "
2300: + fInt[fDepth] + " at fDepth = " + fDepth);
2301: } else {
2302: System.out.println("Storing fCount = "
2303: + fInt[fDepth] + " at fDepth = " + fDepth);
2304: }
2305: }
2306: //store the last depth
2307: fLastDepth = fDepth;
2308: return match;
2309: } // matchElement(QName):QName
2310:
2311: /**
2312: * Returns the next element on the stack.
2313: *
2314: * @return Returns the actual QName object. Callee should
2315: * use this object to store the details of next element encountered.
2316: */
2317: public QName nextElement() {
2318: if (fSkip) {
2319: fDepth++;
2320: //boundary checks are done in matchElement()
2321: return fElements[fCount++];
2322: } else if (fDepth == fElements.length) {
2323: QName[] array = new QName[fElements.length * 2];
2324: System.arraycopy(fElements, 0, array, 0, fDepth);
2325: fElements = array;
2326: for (int i = fDepth; i < fElements.length; i++) {
2327: fElements[i] = new QName();
2328: }
2329: }
2330:
2331: return fElements[fDepth++];
2332:
2333: } // pushElement(QName):QName
2334:
2335: /**
2336: * Pops an element off of the stack by setting the values of
2337: * the specified QName.
2338: * <p>
2339: * <strong>Note:</strong> The object returned is <em>not</em>
2340: * orphaned to the caller. Therefore, the caller should consider
2341: * the object to be read-only.
2342: */
2343: public QName popElement() {
2344: //return the same object that was pushed -- this would avoid
2345: //setting the values for every end element.
2346: //STRONG: this object is read only -- this object reference shouldn't be stored.
2347: if (fSkip || fAdd) {
2348: if (DEBUG_SKIP_ALGORITHM) {
2349: System.out.println("POPPING Element, at position "
2350: + fInt[fDepth]
2351: + " element at that count is = "
2352: + fElements[fInt[fDepth]].rawname);
2353: System.out.println("");
2354: }
2355: return fElements[fInt[fDepth--]];
2356: } else {
2357: if (DEBUG_SKIP_ALGORITHM) {
2358: System.out
2359: .println("Retrieveing element at depth = "
2360: + fDepth + " is "
2361: + fElements[fDepth].rawname);
2362: }
2363: return fElements[--fDepth];
2364: }
2365: //element.setValues(fElements[--fDepth]);
2366: }// popElement(QName)
2367:
2368: /** Reposition the stack. fInt [] contains all the opened tags at particular depth.
2369: * Transfer all the opened tags starting from depth '2' to the current depth and reposition them
2370: *as per the depth.
2371: */
2372: public void reposition() {
2373: for (int i = 2; i <= fDepth; i++) {
2374: fElements[i - 1] = fElements[fInt[i]];
2375: }
2376: if (DEBUG_SKIP_ALGORITHM) {
2377: for (int i = 0; i < fDepth; i++) {
2378: System.out.println("fElements[" + i + "]" + " = "
2379: + fElements[i].rawname);
2380: }
2381: }
2382: }
2383:
2384: /** Clears the stack without throwing away existing QName objects. */
2385: public void clear() {
2386: fDepth = 0;
2387: fLastDepth = 0;
2388: fCount = 0;
2389: fPosition = fMark = 1;
2390:
2391: } // clear()
2392:
2393: /**
2394: * This function is as a result of optimization done for endElement --
2395: * we dont need to set the value for every end element encouterd.
2396: * For Well formedness checks we can have the same QName object that was pushed.
2397: * the values will be set only if application need to know about the endElement
2398: * -- neeraj.bajaj@sun.com
2399: */
2400:
2401: public QName getLastPoppedElement() {
2402: return fElements[fDepth];
2403: }
2404: } // class ElementStack
2405:
2406: /**
2407: * Drives the parser to the next state/event on the input. Parser is guaranteed
2408: * to stop at the next state/event.
2409: *
2410: * Internally XML document is divided into several states. Each state represents
2411: * a sections of XML document. When this functions returns normally, it has read
2412: * the section of XML document and returns the state corresponding to section of
2413: * document which has been read. For optimizations, a particular driver
2414: * can read ahead of the section of document (state returned) just read and
2415: * can maintain a different internal state.
2416: *
2417: *
2418: * @author Neeraj Bajaj, Sun Microsystems
2419: */
2420: protected interface Driver {
2421:
2422: /**
2423: * Drives the parser to the next state/event on the input. Parser is guaranteed
2424: * to stop at the next state/event.
2425: *
2426: * Internally XML document is divided into several states. Each state represents
2427: * a sections of XML document. When this functions returns normally, it has read
2428: * the section of XML document and returns the state corresponding to section of
2429: * document which has been read. For optimizations, a particular driver
2430: * can read ahead of the section of document (state returned) just read and
2431: * can maintain a different internal state.
2432: *
2433: * @return state representing the section of document just read.
2434: *
2435: * @throws IOException Thrown on i/o error.
2436: * @throws XNIException Thrown on parse error.
2437: */
2438:
2439: public int next() throws IOException, XNIException;
2440:
2441: } // interface Driver
2442:
2443: /**
2444: * Driver to handle content scanning. This driver is capable of reading
2445: * the fragment of XML document. When it has finished reading fragment
2446: * of XML documents, it can pass the job of reading to another driver.
2447: *
2448: * This class has been modified as per the new design which is more suited to
2449: * efficiently build pull parser. Lot of performance improvements have been done and
2450: * the code has been added to support stax functionality/features.
2451: *
2452: * @author Neeraj Bajaj, Sun Microsystems
2453: *
2454: *
2455: * @author Andy Clark, IBM
2456: * @author Eric Ye, IBM
2457: */
2458: protected class FragmentContentDriver implements Driver {
2459:
2460: //
2461: // Driver methods
2462: //
2463: private boolean fContinueDispatching = true;
2464: private boolean fScanningForMarkup = true;
2465:
2466: /**
2467: * decides the appropriate state of the parser
2468: */
2469: private void startOfMarkup() throws IOException {
2470: fMarkupDepth++;
2471: final int ch = fEntityScanner.peekChar();
2472:
2473: switch (ch) {
2474: case '?': {
2475: setScannerState(SCANNER_STATE_PI);
2476: fEntityScanner.skipChar(ch);
2477: break;
2478: }
2479: case '!': {
2480: fEntityScanner.skipChar(ch);
2481: if (fEntityScanner.skipChar('-')) {
2482: if (!fEntityScanner.skipChar('-')) {
2483: reportFatalError("InvalidCommentStart", null);
2484: }
2485: setScannerState(SCANNER_STATE_COMMENT);
2486: } else if (fEntityScanner.skipString(cdata)) {
2487: setScannerState(SCANNER_STATE_CDATA);
2488: } else if (!scanForDoctypeHook()) {
2489: reportFatalError("MarkupNotRecognizedInContent",
2490: null);
2491: }
2492: break;
2493: }
2494: case '/': {
2495: setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
2496: fEntityScanner.skipChar(ch);
2497: break;
2498: }
2499: default: {
2500: if (isValidNameStartChar(ch)) {
2501: setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
2502: } else {
2503: reportFatalError("MarkupNotRecognizedInContent",
2504: null);
2505: }
2506: }
2507: }
2508:
2509: }//startOfMarkup
2510:
2511: private void startOfContent() throws IOException {
2512: if (fEntityScanner.skipChar('<')) {
2513: setScannerState(SCANNER_STATE_START_OF_MARKUP);
2514: } else if (fEntityScanner.skipChar('&')) {
2515: setScannerState(SCANNER_STATE_REFERENCE); //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE
2516: } else {
2517: //element content is there..
2518: setScannerState(SCANNER_STATE_CHARACTER_DATA);
2519: }
2520: }//startOfContent
2521:
2522: /**
2523: *
2524: * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser.
2525: * At any point of time when in doubt over the current state of the parser, the state should be
2526: * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of
2527: * the parser to one of its sub state.
2528: * sub states are defined in the parser on the basis of different XML component like
2529: * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc..
2530: * These sub states help the parser to have fine control over the parsing. These are the
2531: * different milepost, parser stops at each sub state (milepost). Based on this state it is
2532: * decided if paresr needs to stop at next milepost ??
2533: *
2534: */
2535: public void decideSubState() throws IOException {
2536: while (fScannerState == SCANNER_STATE_CONTENT
2537: || fScannerState == SCANNER_STATE_START_OF_MARKUP) {
2538:
2539: switch (fScannerState) {
2540:
2541: case SCANNER_STATE_CONTENT: {
2542: startOfContent();
2543: break;
2544: }
2545:
2546: case SCANNER_STATE_START_OF_MARKUP: {
2547: startOfMarkup();
2548: break;
2549: }
2550: }
2551: }
2552: }//decideSubState
2553:
2554: /**
2555: * Drives the parser to the next state/event on the input. Parser is guaranteed
2556: * to stop at the next state/event. Internally XML document
2557: * is divided into several states. Each state represents a sections of XML
2558: * document. When this functions returns normally, it has read the section
2559: * of XML document and returns the state corresponding to section of
2560: * document which has been read. For optimizations, a particular driver
2561: * can read ahead of the section of document (state returned) just read and
2562: * can maintain a different internal state.
2563: *
2564: * State returned corresponds to Stax states.
2565: *
2566: * @return state representing the section of document just read.
2567: *
2568: * @throws IOException Thrown on i/o error.
2569: * @throws XNIException Thrown on parse error.
2570: */
2571:
2572: public int next() throws IOException, XNIException {
2573: try {
2574: if (DEBUG_NEXT) {
2575: System.out.println("NOW IN FragmentContentDriver");
2576: System.out
2577: .println("Entering the FragmentContentDriver with = "
2578: + getScannerStateName(fScannerState));
2579: }
2580:
2581: //decide the actual sub state of the scanner.For more information refer to the javadoc of
2582: //decideSubState.
2583:
2584: switch (fScannerState) {
2585: case SCANNER_STATE_CONTENT: {
2586: final int ch = fEntityScanner.peekChar();
2587: if (ch == '<') {
2588: fEntityScanner.scanChar();
2589: setScannerState(SCANNER_STATE_START_OF_MARKUP);
2590: } else if (ch == '&') {
2591: fEntityScanner.scanChar();
2592: setScannerState(SCANNER_STATE_REFERENCE); //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE
2593: break;
2594: } else {
2595: //element content is there..
2596: setScannerState(SCANNER_STATE_CHARACTER_DATA);
2597: break;
2598: }
2599: }
2600:
2601: case SCANNER_STATE_START_OF_MARKUP: {
2602: startOfMarkup();
2603: break;
2604: }//case: SCANNER_STATE_START_OF_MARKUP
2605:
2606: }//end of switch
2607: //decideSubState() ;
2608:
2609: //do some special handling if isCoalesce is set to true.
2610: if (fIsCoalesce) {
2611: fUsebuffer = true;
2612: //if the last section was character data
2613: if (fLastSectionWasCharacterData) {
2614:
2615: //if we dont encounter any CDATA or ENITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA
2616: //return the last scanned charactrer data.
2617: if ((fScannerState != SCANNER_STATE_CDATA)
2618: && (fScannerState != SCANNER_STATE_REFERENCE)
2619: && (fScannerState != SCANNER_STATE_CHARACTER_DATA)) {
2620: fLastSectionWasCharacterData = false;
2621: return XMLEvent.CHARACTERS;
2622: }
2623: }//if last section was CDATA or ENTITY REFERENCE
2624: //xxx: there might be another entity reference or CDATA after this
2625: //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo>
2626: else if ((fLastSectionWasCData || fLastSectionWasEntityReference)) {
2627: //and current state is not SCANNER_STATE_CHARACTER_DATA
2628: //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE
2629: //this means there is nothing more to be coalesced.
2630: //return the CHARACTERS event.
2631: if ((fScannerState != SCANNER_STATE_CDATA)
2632: && (fScannerState != SCANNER_STATE_REFERENCE)
2633: && (fScannerState != SCANNER_STATE_CHARACTER_DATA)) {
2634:
2635: fLastSectionWasCData = false;
2636: fLastSectionWasEntityReference = false;
2637: return XMLEvent.CHARACTERS;
2638: }
2639: }
2640: }
2641:
2642: if (DEBUG_NEXT) {
2643: System.out
2644: .println("Actual scanner state set by decideSubState is = "
2645: + getScannerStateName(fScannerState));
2646: }
2647:
2648: switch (fScannerState) {
2649:
2650: case XMLEvent.START_DOCUMENT:
2651: return XMLEvent.START_DOCUMENT;
2652:
2653: case SCANNER_STATE_START_ELEMENT_TAG: {
2654:
2655: //xxx this function returns true when element is empty.. can be linked to end element event.
2656: //returns true if the element is empty
2657: fEmptyElement = scanStartElement();
2658: //if the element is empty the next event is "end element"
2659: if (fEmptyElement) {
2660: setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
2661: } else {
2662: //set the next possible state
2663: setScannerState(SCANNER_STATE_CONTENT);
2664: }
2665: return XMLEvent.START_ELEMENT;
2666: }
2667:
2668: case SCANNER_STATE_CHARACTER_DATA: {
2669: if (DEBUG_COALESCE) {
2670: System.out.println("fLastSectionWasCData = "
2671: + fLastSectionWasCData);
2672: System.out.println("fIsCoalesce = "
2673: + fIsCoalesce);
2674: }
2675: //if last section was either entity reference or cdata or character data we should be using buffer
2676: fUsebuffer = fLastSectionWasEntityReference
2677: || fLastSectionWasCData
2678: || fLastSectionWasCharacterData;
2679:
2680: //we should not clear the buffer only when last state was REFERENCE or CDATA or CHARACTER_DATA
2681: if (fIsCoalesce
2682: && (fLastSectionWasEntityReference
2683: || fLastSectionWasCData || fLastSectionWasCharacterData)) {
2684: fLastSectionWasEntityReference = false;
2685: fLastSectionWasCData = false;
2686: fLastSectionWasCharacterData = true;
2687: fUsebuffer = true;
2688: } else {
2689: //clear the buffer
2690: fContentBuffer.clear();
2691: }
2692: //set the fTempString length to 0 before passing it on to scanContent
2693: //scanContent sets the correct co-ordinates as per the content read
2694:
2695: fTempString.length = 0;
2696: int c = fEntityScanner.scanContent(fTempString);
2697: if (DEBUG) {
2698: System.out.println("fTempString = "
2699: + fTempString);
2700: }
2701: if (fEntityScanner.skipChar('<')) {
2702: //check if we have reached end of element
2703: if (fEntityScanner.skipChar('/')) {
2704: //increase the mark up depth
2705: fMarkupDepth++;
2706: fLastSectionWasCharacterData = false;
2707: setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
2708: //check if its start of new element
2709: } else if (XMLChar.isNameStart(fEntityScanner
2710: .peekChar())) {
2711: fMarkupDepth++;
2712: fLastSectionWasCharacterData = false;
2713: setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
2714: } else {
2715: setScannerState(SCANNER_STATE_START_OF_MARKUP);
2716: //there can be cdata ahead if coalesce is true we should call again
2717: if (fIsCoalesce) {
2718: fUsebuffer = true;
2719: fLastSectionWasCharacterData = true;
2720: fContentBuffer.append(fTempString);
2721: fTempString.length = 0;
2722: return fDriver.next();
2723: }
2724: }
2725: //in case last section was either entity reference or cdata or character data -- we should be using buffer
2726: if (fUsebuffer) {
2727: fContentBuffer.append(fTempString);
2728: fTempString.length = 0;
2729: }
2730: if (DEBUG) {
2731: System.out
2732: .println("NOT USING THE BUFFER, STRING = "
2733: + fTempString.toString());
2734: }
2735: if (dtdGrammarUtil != null
2736: && dtdGrammarUtil
2737: .isIgnorableWhiteSpace(fContentBuffer)) {
2738: if (DEBUG)
2739: System.out
2740: .println("Return SPACE EVENT");
2741: return XMLEvent.SPACE;
2742: } else
2743: return XMLEvent.CHARACTERS;
2744:
2745: } else {
2746: fUsebuffer = true;
2747: if (DEBUG) {
2748: System.out.println("fContentBuffer = "
2749: + fContentBuffer);
2750: System.out.println("fTempString = "
2751: + fTempString);
2752: }
2753: fContentBuffer.append(fTempString);
2754: fTempString.length = 0;
2755: }
2756: if (c == '\r') {
2757: if (DEBUG) {
2758: System.out.println("'\r' character found");
2759: }
2760: // happens when there is the character reference
2761: //xxx: We know the next chracter.. we should just skip it and add ']' directlry
2762: fEntityScanner.scanChar();
2763: fUsebuffer = true;
2764: fContentBuffer.append((char) c);
2765: c = -1;
2766: } else if (c == ']') {
2767: //fStringBuffer.clear();
2768: //xxx: We know the next chracter.. we should just skip it and add ']' directlry
2769: fUsebuffer = true;
2770: fContentBuffer.append((char) fEntityScanner
2771: .scanChar());
2772: // remember where we are in case we get an endEntity before we
2773: // could flush the buffer out - this happens when we're parsing an
2774: // entity which ends with a ]
2775: fInScanContent = true;
2776:
2777: // We work on a single character basis to handle cases such as:
2778: // ']]]>' which we might otherwise miss.
2779: //
2780: if (fEntityScanner.skipChar(']')) {
2781: fContentBuffer.append(']');
2782: while (fEntityScanner.skipChar(']')) {
2783: fContentBuffer.append(']');
2784: }
2785: if (fEntityScanner.skipChar('>')) {
2786: reportFatalError("CDEndInContent", null);
2787: }
2788: }
2789: c = -1;
2790: fInScanContent = false;
2791: }
2792:
2793: do {
2794: //xxx: we should be using only one buffer..
2795: // we need not to grow the buffer only when isCoalesce() is not true;
2796:
2797: if (c == '<') {
2798: fEntityScanner.scanChar();
2799: setScannerState(SCANNER_STATE_START_OF_MARKUP);
2800: break;
2801: }//xxx what should be the behavior if entity reference is present in the content ?
2802: else if (c == '&') {
2803: fEntityScanner.scanChar();
2804: setScannerState(SCANNER_STATE_REFERENCE);
2805: break;
2806: }///xxx since this part is also characters, it should be merged...
2807: else if (c != -1 && isInvalidLiteral(c)) {
2808: if (XMLChar.isHighSurrogate(c)) {
2809: // special case: surrogates
2810: scanSurrogates(fContentBuffer);
2811: setScannerState(SCANNER_STATE_CONTENT);
2812: } else {
2813: reportFatalError(
2814: "InvalidCharInContent",
2815: new Object[] { Integer
2816: .toString(c, 16) });
2817: fEntityScanner.scanChar();
2818: }
2819: break;
2820: }
2821: //xxx: scanContent also gives character callback.
2822: c = scanContent(fContentBuffer);
2823: //we should not be iterating again if fIsCoalesce is not set to true
2824:
2825: if (!fIsCoalesce) {
2826: setScannerState(SCANNER_STATE_CONTENT);
2827: break;
2828: }
2829:
2830: } while (true);
2831:
2832: //if (fDocumentHandler != null) {
2833: // fDocumentHandler.characters(fContentBuffer, null);
2834: //}
2835: if (DEBUG)
2836: System.out
2837: .println("USING THE BUFFER, STRING START="
2838: + fContentBuffer.toString()
2839: + "=END");
2840: //if fIsCoalesce is true there might be more data so call fDriver.next()
2841: if (fIsCoalesce) {
2842: fLastSectionWasCharacterData = true;
2843: return fDriver.next();
2844: } else {
2845: if (dtdGrammarUtil != null
2846: && dtdGrammarUtil
2847: .isIgnorableWhiteSpace(fContentBuffer)) {
2848: if (DEBUG)
2849: System.out
2850: .println("Return SPACE EVENT");
2851: return XMLEvent.SPACE;
2852: } else
2853: return XMLEvent.CHARACTERS;
2854: }
2855: }
2856:
2857: case SCANNER_STATE_END_ELEMENT_TAG: {
2858: if (fEmptyElement) {
2859: //set it back to false.
2860: fEmptyElement = false;
2861: setScannerState(SCANNER_STATE_CONTENT);
2862: //check the case when there is comment after single element document
2863: //<foo/> and some comment after this
2864: return (fMarkupDepth == 0 && elementDepthIsZeroHook()) ? XMLEvent.END_ELEMENT
2865: : XMLEvent.END_ELEMENT;
2866:
2867: } else if (scanEndElement() == 0) {
2868: //It is last element of the document
2869: if (elementDepthIsZeroHook()) {
2870: //if element depth is zero , it indicates the end of the document
2871: //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function
2872: //xxx understand this point once again..
2873: return XMLEvent.END_ELEMENT;
2874: }
2875:
2876: }
2877: setScannerState(SCANNER_STATE_CONTENT);
2878: return XMLEvent.END_ELEMENT;
2879: }
2880:
2881: case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT:
2882: scanComment();
2883: setScannerState(SCANNER_STATE_CONTENT);
2884: return XMLEvent.COMMENT;
2885: //break;
2886: }
2887: case SCANNER_STATE_PI: { //SCANNER_STATE_PI: {
2888: //clear the buffer first
2889: fContentBuffer.clear();
2890: //xxx: which buffer should be passed. Ideally we shouldn't have
2891: //more than two buffers --
2892: //xxx: where should we add the switch for buffering.
2893: scanPI(fContentBuffer);
2894: setScannerState(SCANNER_STATE_CONTENT);
2895: return XMLEvent.PROCESSING_INSTRUCTION;
2896: //break;
2897: }
2898: case SCANNER_STATE_CDATA: { //SCANNER_STATE_CDATA: {
2899: //xxx: What if CDATA is the first event
2900: //<foo><![CDATA[hello<><>]]>append</foo>
2901:
2902: //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or
2903: //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE
2904: if (fIsCoalesce
2905: && (fLastSectionWasEntityReference
2906: || fLastSectionWasCData || fLastSectionWasCharacterData)) {
2907: fLastSectionWasCData = true;
2908: fLastSectionWasEntityReference = false;
2909: fLastSectionWasCharacterData = false;
2910: }//if we dont need to coalesce clear the buffer
2911: else {
2912: fContentBuffer.clear();
2913: }
2914: fUsebuffer = true;
2915: //CDATA section is completely read in all the case.
2916: scanCDATASection(fContentBuffer, true);
2917: setScannerState(SCANNER_STATE_CONTENT);
2918: //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true
2919: //and just call fDispatche.next(). Since we have set the scanner state to
2920: //SCANNER_STATE_CONTENT (super state) parser will automatically recover and
2921: //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event
2922: //2. Check if application has set for reporting CDATA event
2923: //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent
2924: //return the cdata event as characters.
2925: if (fIsCoalesce) {
2926: fLastSectionWasCData = true;
2927: //there might be more data to coalesce.
2928: return fDriver.next();
2929: } else if (fReportCdataEvent) {
2930: return XMLEvent.CDATA;
2931: } else {
2932: return XMLEvent.CHARACTERS;
2933: }
2934: }
2935:
2936: case SCANNER_STATE_REFERENCE: {
2937: fMarkupDepth++;
2938: foundBuiltInRefs = false;
2939:
2940: //we should not clear the buffer only when the last state was either CDATA or
2941: //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE
2942: if (fIsCoalesce
2943: && (fLastSectionWasEntityReference
2944: || fLastSectionWasCData || fLastSectionWasCharacterData)) {
2945: //fLastSectionWasEntityReference or fLastSectionWasCData are only
2946: //used when fIsCoalesce is set to true.
2947: fLastSectionWasEntityReference = true;
2948: fLastSectionWasCData = false;
2949: fLastSectionWasCharacterData = false;
2950: }//if we dont need to coalesce clear the buffer
2951: else {
2952: fContentBuffer.clear();
2953: }
2954: fUsebuffer = true;
2955: //take care of character reference
2956: if (fEntityScanner.skipChar('#')) {
2957: scanCharReferenceValue(fContentBuffer, null);
2958: fMarkupDepth--;
2959: if (!fIsCoalesce) {
2960: setScannerState(SCANNER_STATE_CONTENT);
2961: return XMLEvent.CHARACTERS;
2962: }
2963: } else {
2964: //this function also starts new entity
2965: scanEntityReference(fContentBuffer);
2966: }
2967:
2968: //if there was built-in entity reference & coalesce is not true
2969: //return CHARACTERS
2970: if (fScannerState == SCANNER_STATE_BUILT_IN_REFS
2971: && !fIsCoalesce) {
2972: setScannerState(SCANNER_STATE_CONTENT);
2973: return XMLEvent.CHARACTERS;
2974: }
2975:
2976: //if there was a text declaration, call next() it will be taken care.
2977: if (fScannerState == SCANNER_STATE_TEXT_DECL) {
2978: fLastSectionWasEntityReference = true;
2979: return fDriver.next();
2980: }
2981:
2982: if (fScannerState == SCANNER_STATE_REFERENCE) {
2983: setScannerState(SCANNER_STATE_CONTENT);
2984: return XMLEvent.ENTITY_REFERENCE;
2985: }
2986: //Wether it was character reference, entity reference or built-in entity
2987: //set the next possible state to SCANNER_STATE_CONTENT
2988: setScannerState(SCANNER_STATE_CONTENT);
2989: fLastSectionWasEntityReference = true;
2990: return fDriver.next();
2991: }
2992:
2993: case SCANNER_STATE_TEXT_DECL: {
2994: // scan text decl
2995: if (fEntityScanner.skipString("<?xml")) {
2996: fMarkupDepth++;
2997: // NOTE: special case where entity starts with a PI
2998: // whose name starts with "xml" (e.g. "xmlfoo")
2999: if (isValidNameChar(fEntityScanner.peekChar())) {
3000: fStringBuffer.clear();
3001: fStringBuffer.append("xml");
3002:
3003: if (fNamespaces) {
3004: while (isValidNCName(fEntityScanner
3005: .peekChar())) {
3006: fStringBuffer
3007: .append((char) fEntityScanner
3008: .scanChar());
3009: }
3010: } else {
3011: while (isValidNameChar(fEntityScanner
3012: .peekChar())) {
3013: fStringBuffer
3014: .append((char) fEntityScanner
3015: .scanChar());
3016: }
3017: }
3018: String target = fSymbolTable.addSymbol(
3019: fStringBuffer.ch,
3020: fStringBuffer.offset,
3021: fStringBuffer.length);
3022: fStringBuffer.clear();
3023: scanPIData(target, fStringBuffer);
3024: }
3025:
3026: // standard text declaration
3027: else {
3028: //xxx: this function gives callback
3029: scanXMLDeclOrTextDecl(true);
3030: }
3031: }
3032: // now that we've straightened out the readers, we can read in chunks:
3033: fEntityManager.fCurrentEntity.mayReadChunks = true;
3034: setScannerState(SCANNER_STATE_CONTENT);
3035: //xxx: we don't return any state, so how do we get to know about TEXT declarations.
3036: //it seems we have to careful when to allow function issue a callback
3037: //and when to allow adapter issue a callback.
3038: return fDriver.next();
3039: }
3040:
3041: case SCANNER_STATE_ROOT_ELEMENT: {
3042: if (scanRootElementHook()) {
3043: //return true;
3044: return -1;
3045: }
3046: setScannerState(SCANNER_STATE_CONTENT);
3047: break;
3048: }
3049: case SCANNER_STATE_CHAR_REFERENCE: {
3050: fContentBuffer.clear();
3051: scanCharReferenceValue(fContentBuffer, null);
3052: fMarkupDepth--;
3053: setScannerState(SCANNER_STATE_CONTENT);
3054: return XMLEvent.CHARACTERS;
3055: }
3056: default:
3057: throw new XNIException("Scanner State "
3058: + fScannerState + " not Recognized ");
3059:
3060: }//switch
3061: }
3062: // premature end of file
3063: catch (EOFException e) {
3064: endOfFileHook(e);
3065: return -1;
3066: }
3067:
3068: return -1;
3069:
3070: }//next
3071:
3072: //
3073: // Protected methods
3074: //
3075:
3076: // hooks
3077:
3078: // NOTE: These hook methods are added so that the full document
3079: // scanner can share the majority of code with this class.
3080:
3081: /**
3082: * Scan for DOCTYPE hook. This method is a hook for subclasses
3083: * to add code to handle scanning for a the "DOCTYPE" string
3084: * after the string "<!" has been scanned.
3085: *
3086: * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE"
3087: * was not scanned.
3088: */
3089: protected boolean scanForDoctypeHook() throws IOException,
3090: XNIException {
3091: return false;
3092: } // scanForDoctypeHook():boolean
3093:
3094: /**
3095: * Element depth iz zero. This methos is a hook for subclasses
3096: * to add code to handle when the element depth hits zero. When
3097: * scanning a document fragment, an element depth of zero is
3098: * normal. However, when scanning a full XML document, the
3099: * scanner must handle the trailing miscellanous section of
3100: * the document after the end of the document's root element.
3101: *
3102: * @return True if the caller should stop and return true which
3103: * allows the scanner to switch to a new scanning
3104: * driver. A return value of false indicates that
3105: * the content driver should continue as normal.
3106: */
3107: protected boolean elementDepthIsZeroHook() throws IOException,
3108: XNIException {
3109: return false;
3110: } // elementDepthIsZeroHook():boolean
3111:
3112: /**
3113: * Scan for root element hook. This method is a hook for
3114: * subclasses to add code that handles scanning for the root
3115: * element. When scanning a document fragment, there is no
3116: * "root" element. However, when scanning a full XML document,
3117: * the scanner must handle the root element specially.
3118: *
3119: * @return True if the caller should stop and return true which
3120: * allows the scanner to switch to a new scanning
3121: * driver. A return value of false indicates that
3122: * the content driver should continue as normal.
3123: */
3124: protected boolean scanRootElementHook() throws IOException,
3125: XNIException {
3126: return false;
3127: } // scanRootElementHook():boolean
3128:
3129: /**
3130: * End of file hook. This method is a hook for subclasses to
3131: * add code that handles the end of file. The end of file in
3132: * a document fragment is OK if the markup depth is zero.
3133: * However, when scanning a full XML document, an end of file
3134: * is always premature.
3135: */
3136: protected void endOfFileHook(EOFException e)
3137: throws IOException, XNIException {
3138:
3139: // NOTE: An end of file is only only an error if we were
3140: // in the middle of scanning some markup. -Ac
3141: if (fMarkupDepth != 0) {
3142: reportFatalError("PrematureEOF", null);
3143: }
3144:
3145: } // endOfFileHook()
3146:
3147: } // class FragmentContentDriver
3148:
3149: static void pr(String str) {
3150: System.out.println(str);
3151: }
3152:
3153: boolean fUsebuffer;
3154: } // class XMLDocumentFragmentScannerImpl
|