0001: /*
0002: * $Id: XMLScanner.java,v 1.5 2006/11/29 22:01:32 spericas Exp $
0003: */
0004:
0005: /*
0006: * The contents of this file are subject to the terms
0007: * of the Common Development and Distribution License
0008: * (the License). You may not use this file except in
0009: * compliance with the License.
0010: *
0011: * You can obtain a copy of the license at
0012: * https://glassfish.dev.java.net/public/CDDLv1.0.html.
0013: * See the License for the specific language governing
0014: * permissions and limitations under the License.
0015: *
0016: * When distributing Covered Code, include this CDDL
0017: * Header Notice in each file and include the License file
0018: * at https://glassfish.dev.java.net/public/CDDLv1.0.html.
0019: * If applicable, add the following below the CDDL Header,
0020: * with the fields enclosed by brackets [] replaced by
0021: * you own identifying information:
0022: * "Portions Copyrighted [year] [name of copyright owner]"
0023: *
0024: * [Name of File] [ver.__] [Date]
0025: *
0026: * Copyright 2006 Sun Microsystems Inc. All Rights Reserved
0027: */
0028:
0029: /*
0030: * The Apache Software License, Version 1.1
0031: *
0032: *
0033: * Copyright (c) 1999-2002 The Apache Software Foundation.
0034: * All rights reserved.
0035: *
0036: * Redistribution and use in source and binary forms, with or without
0037: * modification, are permitted provided that the following conditions
0038: * are met:
0039: *
0040: * 1. Redistributions of source code must retain the above copyright
0041: * notice, this list of conditions and the following disclaimer.
0042: *
0043: * 2. Redistributions in binary form must reproduce the above copyright
0044: * notice, this list of conditions and the following disclaimer in
0045: * the documentation and/or other materials provided with the
0046: * distribution.
0047: *
0048: * 3. The end-user documentation included with the redistribution,
0049: * if any, must include the following acknowledgment:
0050: * "This product includes software developed by the
0051: * Apache Software Foundation (http://www.apache.org/)."
0052: * Alternately, this acknowledgment may appear in the software itself,
0053: * if and wherever such third-party acknowledgments normally appear.
0054: *
0055: * 4. The names "Xerces" and "Apache Software Foundation" must
0056: * not be used to endorse or promote products derived from this
0057: * software without prior written permission. For written
0058: * permission, please contact apache@apache.org.
0059: *
0060: * 5. Products derived from this software may not be called "Apache",
0061: * nor may "Apache" appear in their name, without prior written
0062: * permission of the Apache Software Foundation.
0063: *
0064: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0065: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0066: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0067: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
0068: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0069: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0070: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
0071: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0072: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
0073: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
0074: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0075: * SUCH DAMAGE.
0076: * ====================================================================
0077: *
0078: * This software consists of voluntary contributions made by many
0079: * individuals on behalf of the Apache Software Foundation and was
0080: * originally based on software copyright (c) 1999, International
0081: * Business Machines, Inc., http://www.apache.org. For more
0082: * information on the Apache Software Foundation, please see
0083: * <http://www.apache.org/>.
0084: */
0085:
0086: package com.sun.xml.stream;
0087:
0088: import java.io.IOException;
0089: import java.util.ArrayList;
0090: import java.util.HashMap;
0091: import javax.xml.stream.events.XMLEvent;
0092: import com.sun.xml.stream.xerces.impl.msg.XMLMessageFormatter;
0093: import com.sun.xml.stream.xerces.util.SymbolTable;
0094: import com.sun.xml.stream.xerces.util.XMLChar;
0095: import com.sun.xml.stream.xerces.util.XMLResourceIdentifierImpl;
0096: import com.sun.xml.stream.xerces.util.XMLStringBuffer;
0097: import com.sun.xml.stream.xerces.xni.XMLAttributes;
0098: import com.sun.xml.stream.xerces.xni.XMLResourceIdentifier;
0099: import com.sun.xml.stream.xerces.xni.XMLString;
0100: import com.sun.xml.stream.xerces.xni.XNIException;
0101: import com.sun.xml.stream.xerces.xni.parser.XMLComponent;
0102: import com.sun.xml.stream.xerces.xni.parser.XMLComponentManager;
0103: import com.sun.xml.stream.xerces.xni.parser.XMLConfigurationException;
0104:
0105: /**
0106: * This class is responsible for holding scanning methods common to
0107: * scanning the XML document structure and content as well as the DTD
0108: * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit
0109: * from this base class.
0110: *
0111: * <p>
0112: * This component requires the following features and properties from the
0113: * component manager that uses it:
0114: * <ul>
0115: * <li>http://xml.org/sax/features/validation</li>
0116: * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
0117: * <li>http://apache.org/xml/properties/internal/symbol-table</li>
0118: * <li>http://apache.org/xml/properties/internal/error-reporter</li>
0119: * <li>http://apache.org/xml/properties/internal/entity-manager</li>
0120: * </ul>
0121: *
0122: * @author Andy Clark, IBM
0123: * @author Arnaud Le Hors, IBM
0124: * @author Eric Ye, IBM
0125: * @author K.Venugopal SUN Microsystems
0126: *
0127: * @version $Id: XMLScanner.java,v 1.5 2006/11/29 22:01:32 spericas Exp $
0128: */
0129: public abstract class XMLScanner implements XMLComponent {
0130:
0131: //
0132: // Constants
0133: //
0134:
0135: // feature identifiers
0136:
0137: /** Feature identifier: validation. */
0138: protected static final String VALIDATION = Constants.SAX_FEATURE_PREFIX
0139: + Constants.VALIDATION_FEATURE;
0140:
0141: /** Feature identifier: notify character references. */
0142: protected static final String NOTIFY_CHAR_REFS = Constants.XERCES_FEATURE_PREFIX
0143: + Constants.NOTIFY_CHAR_REFS_FEATURE;
0144:
0145: // property identifiers
0146:
0147: /** Property identifier: symbol table. */
0148: protected static final String SYMBOL_TABLE = Constants.XERCES_PROPERTY_PREFIX
0149: + Constants.SYMBOL_TABLE_PROPERTY;
0150:
0151: /** Property identifier: error reporter. */
0152: protected static final String ERROR_REPORTER = Constants.XERCES_PROPERTY_PREFIX
0153: + Constants.ERROR_REPORTER_PROPERTY;
0154:
0155: /** Property identifier: entity manager. */
0156: protected static final String ENTITY_MANAGER = Constants.XERCES_PROPERTY_PREFIX
0157: + Constants.ENTITY_MANAGER_PROPERTY;
0158:
0159: // debugging
0160:
0161: /** Debug attribute normalization. */
0162: protected static final boolean DEBUG_ATTR_NORMALIZATION = false;
0163:
0164: //xxx: setting the default value as false, as we dont need to calculate this value
0165: //we should have a feature when set to true computes this value
0166: private boolean fNeedNonNormalizedValue = false;
0167:
0168: protected ArrayList attributeValueCache = new ArrayList();
0169: protected ArrayList stringBufferCache = new ArrayList();
0170: protected int fStringBufferIndex = 0;
0171: protected boolean fAttributeCacheInitDone = false;
0172: protected int fAttributeCacheUsedCount = 0;
0173:
0174: //
0175: // Data
0176: //
0177:
0178: // features
0179:
0180: /**
0181: * Validation. This feature identifier is:
0182: * http://xml.org/sax/features/validation
0183: */
0184: protected boolean fValidation = false;
0185:
0186: /** Character references notification. */
0187: protected boolean fNotifyCharRefs = false;
0188:
0189: // properties
0190:
0191: protected PropertyManager fPropertyManager = null;
0192: /** Symbol table. */
0193: protected SymbolTable fSymbolTable;
0194:
0195: /** Error reporter. */
0196: protected XMLErrorReporter fErrorReporter;
0197:
0198: /** Entity manager. */
0199: //protected XMLEntityManager fEntityManager = PropertyManager.getEntityManager();
0200: protected XMLEntityManager fEntityManager = null;
0201:
0202: /** xxx this should be available from EntityManager Entity storage */
0203: protected XMLEntityStorage fEntityStore = null;
0204:
0205: // protected data
0206:
0207: /** event type */
0208: protected XMLEvent fEvent;
0209:
0210: /** Entity scanner, this alwasy works on last entity that was opened. */
0211: protected XMLEntityReaderImpl fEntityScanner = null;
0212:
0213: /** Entity depth. */
0214: protected int fEntityDepth;
0215:
0216: /** Literal value of the last character refence scanned. */
0217: protected String fCharRefLiteral = null;
0218:
0219: /** Scanning attribute. */
0220: protected boolean fScanningAttribute;
0221:
0222: /** Report entity boundary. */
0223: protected boolean fReportEntity;
0224:
0225: // symbols
0226:
0227: /** Symbol: "version". */
0228: protected final static String fVersionSymbol = "version".intern();
0229:
0230: /** Symbol: "encoding". */
0231: protected final static String fEncodingSymbol = "encoding".intern();
0232:
0233: /** Symbol: "standalone". */
0234: protected final static String fStandaloneSymbol = "standalone"
0235: .intern();
0236:
0237: /** Symbol: "amp". */
0238: protected final static String fAmpSymbol = "amp".intern();
0239:
0240: /** Symbol: "lt". */
0241: protected final static String fLtSymbol = "lt".intern();
0242:
0243: /** Symbol: "gt". */
0244: protected final static String fGtSymbol = "gt".intern();
0245:
0246: /** Symbol: "quot". */
0247: protected final static String fQuotSymbol = "quot".intern();
0248:
0249: /** Symbol: "apos". */
0250: protected final static String fAposSymbol = "apos".intern();
0251:
0252: // temporary variables
0253:
0254: // NOTE: These objects are private to help prevent accidental modification
0255: // of values by a subclass. If there were protected *and* the sub-
0256: // modified the values, it would be difficult to track down the real
0257: // cause of the bug. By making these private, we avoid this
0258: // possibility.
0259:
0260: /** String. */
0261: private XMLString fString = new XMLString();
0262:
0263: /** String buffer. */
0264: private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
0265:
0266: /** String buffer. */
0267: private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
0268:
0269: /** String buffer. */
0270: private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
0271:
0272: // temporary location for Resource identification information.
0273: protected XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
0274: int initialCacheCount = 6;
0275:
0276: //
0277: // XMLComponent methods
0278: //
0279:
0280: /**
0281: *
0282: *
0283: * @param componentManager The component manager.
0284: *
0285: * @throws SAXException Throws exception if required features and
0286: * properties cannot be found.
0287: */
0288: public void reset(XMLComponentManager componentManager)
0289: throws XMLConfigurationException {
0290:
0291: // Xerces properties
0292: fSymbolTable = (SymbolTable) componentManager
0293: .getProperty(SYMBOL_TABLE);
0294: fErrorReporter = (XMLErrorReporter) componentManager
0295: .getProperty(ERROR_REPORTER);
0296: fEntityManager = (XMLEntityManager) componentManager
0297: .getProperty(ENTITY_MANAGER);
0298:
0299: init();
0300: // sax features
0301: try {
0302: fValidation = componentManager.getFeature(VALIDATION);
0303: } catch (XMLConfigurationException e) {
0304: fValidation = false;
0305: }
0306: try {
0307: fNotifyCharRefs = componentManager
0308: .getFeature(NOTIFY_CHAR_REFS);
0309: } catch (XMLConfigurationException e) {
0310: fNotifyCharRefs = false;
0311: }
0312:
0313: } // reset(XMLComponentManager)
0314:
0315: protected void setPropertyManager(PropertyManager propertyManager) {
0316: fPropertyManager = propertyManager;
0317: }
0318:
0319: /**
0320: * Sets the value of a property during parsing.
0321: *
0322: * @param propertyId
0323: * @param value
0324: */
0325: public void setProperty(String propertyId, Object value)
0326: throws XMLConfigurationException {
0327:
0328: // Xerces properties
0329: if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
0330: String property = propertyId
0331: .substring(Constants.XERCES_PROPERTY_PREFIX
0332: .length());
0333: if (property.equals(Constants.SYMBOL_TABLE_PROPERTY)) {
0334: fSymbolTable = (SymbolTable) value;
0335: } else if (property
0336: .equals(Constants.ERROR_REPORTER_PROPERTY)) {
0337: fErrorReporter = (XMLErrorReporter) value;
0338: } else if (property
0339: .equals(Constants.ENTITY_MANAGER_PROPERTY)) {
0340: fEntityManager = (XMLEntityManager) value;
0341: }
0342: }
0343: /*else if(propertyId.equals(Constants.STAX_PROPERTIES)){
0344: fStaxProperties = (HashMap)value;
0345: //TODO::discuss with neeraj what are his thoughts on passing properties.
0346: //For now use this
0347: }*/
0348:
0349: } // setProperty(String,Object)
0350:
0351: /*
0352: * Sets the feature of the scanner.
0353: */
0354: public void setFeature(String featureId, boolean value)
0355: throws XMLConfigurationException {
0356:
0357: if (VALIDATION.equals(featureId)) {
0358: fValidation = value;
0359: } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
0360: fNotifyCharRefs = value;
0361: }
0362: }
0363:
0364: /*
0365: * Gets the state of the feature of the scanner.
0366: */
0367: public boolean getFeature(String featureId)
0368: throws XMLConfigurationException {
0369:
0370: if (VALIDATION.equals(featureId)) {
0371: return fValidation;
0372: } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
0373: return fNotifyCharRefs;
0374: }
0375: throw new XMLConfigurationException(
0376: XMLConfigurationException.NOT_RECOGNIZED, featureId);
0377: }
0378:
0379: //
0380: // Protected methods
0381: //
0382:
0383: // anybody calling this had better have set Symtoltable!
0384: public void reset(PropertyManager propertyManager) {
0385: init();
0386: // Xerces properties
0387: fSymbolTable = (SymbolTable) propertyManager
0388: .getProperty(Constants.XERCES_PROPERTY_PREFIX
0389: + Constants.SYMBOL_TABLE_PROPERTY);
0390:
0391: fErrorReporter = (XMLErrorReporter) propertyManager
0392: .getProperty(Constants.XERCES_PROPERTY_PREFIX
0393: + Constants.ERROR_REPORTER_PROPERTY);
0394:
0395: fEntityManager = (XMLEntityManager) propertyManager
0396: .getProperty(ENTITY_MANAGER);
0397: fEntityStore = fEntityManager.getEntityStore();
0398: fEntityScanner = (XMLEntityReaderImpl) fEntityManager
0399: .getEntityReader();
0400: //fEntityManager.reset();
0401: // DTD preparsing defaults:
0402: fValidation = false;
0403: fNotifyCharRefs = false;
0404:
0405: }
0406:
0407: // common scanning methods
0408:
0409: /**
0410: * Scans an XML or text declaration.
0411: * <p>
0412: * <pre>
0413: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
0414: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
0415: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
0416: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
0417: * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
0418: * | ('"' ('yes' | 'no') '"'))
0419: *
0420: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
0421: * </pre>
0422: *
0423: * @param scanningTextDecl True if a text declaration is to
0424: * be scanned instead of an XML
0425: * declaration.
0426: * @param pseudoAttributeValues An array of size 3 to return the version,
0427: * encoding and standalone pseudo attribute values
0428: * (in that order).
0429: *
0430: * <strong>Note:</strong> This method uses fString, anything in it
0431: * at the time of calling is lost.
0432: */
0433: protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
0434: String[] pseudoAttributeValues) throws IOException,
0435: XNIException {
0436:
0437: // pseudo-attribute values
0438: String version = null;
0439: String encoding = null;
0440: String standalone = null;
0441:
0442: // scan pseudo-attributes
0443: final int STATE_VERSION = 0;
0444: final int STATE_ENCODING = 1;
0445: final int STATE_STANDALONE = 2;
0446: final int STATE_DONE = 3;
0447: int state = STATE_VERSION;
0448:
0449: boolean dataFoundForTarget = false;
0450: boolean sawSpace = fEntityScanner.skipSpaces();
0451: while (fEntityScanner.peekChar() != '?') {
0452: dataFoundForTarget = true;
0453: String name = scanPseudoAttribute(scanningTextDecl, fString);
0454: switch (state) {
0455: case STATE_VERSION: {
0456: if (name.equals(fVersionSymbol)) {
0457: if (!sawSpace) {
0458: reportFatalError(
0459: scanningTextDecl ? "SpaceRequiredBeforeVersionInTextDecl"
0460: : "SpaceRequiredBeforeVersionInXMLDecl",
0461: null);
0462: }
0463: version = fString.toString();
0464: state = STATE_ENCODING;
0465: if (!versionSupported(version)) {
0466: reportFatalError("VersionNotSupported",
0467: new Object[] { version });
0468: }
0469: } else if (name.equals(fEncodingSymbol)) {
0470: if (!scanningTextDecl) {
0471: reportFatalError("VersionInfoRequired", null);
0472: }
0473: if (!sawSpace) {
0474: reportFatalError(
0475: scanningTextDecl ? "SpaceRequiredBeforeEncodingInTextDecl"
0476: : "SpaceRequiredBeforeEncodingInXMLDecl",
0477: null);
0478: }
0479: encoding = fString.toString();
0480: state = scanningTextDecl ? STATE_DONE
0481: : STATE_STANDALONE;
0482: } else {
0483: if (scanningTextDecl) {
0484: reportFatalError("EncodingDeclRequired", null);
0485: } else {
0486: reportFatalError("VersionInfoRequired", null);
0487: }
0488: }
0489: break;
0490: }
0491: case STATE_ENCODING: {
0492: if (name.equals(fEncodingSymbol)) {
0493: if (!sawSpace) {
0494: reportFatalError(
0495: scanningTextDecl ? "SpaceRequiredBeforeEncodingInTextDecl"
0496: : "SpaceRequiredBeforeEncodingInXMLDecl",
0497: null);
0498: }
0499: encoding = fString.toString();
0500: state = scanningTextDecl ? STATE_DONE
0501: : STATE_STANDALONE;
0502: // TODO: check encoding name; set encoding on
0503: // entity scanner
0504: } else if (!scanningTextDecl
0505: && name.equals(fStandaloneSymbol)) {
0506: if (!sawSpace) {
0507: reportFatalError(
0508: "SpaceRequiredBeforeStandalone", null);
0509: }
0510: standalone = fString.toString();
0511: state = STATE_DONE;
0512: if (!standalone.equals("yes")
0513: && !standalone.equals("no")) {
0514: reportFatalError("SDDeclInvalid", null);
0515: }
0516: } else {
0517: reportFatalError("EncodingDeclRequired", null);
0518: }
0519: break;
0520: }
0521: case STATE_STANDALONE: {
0522: if (name.equals(fStandaloneSymbol)) {
0523: if (!sawSpace) {
0524: reportFatalError(
0525: "SpaceRequiredBeforeStandalone", null);
0526: }
0527: standalone = fString.toString();
0528: state = STATE_DONE;
0529: if (!standalone.equals("yes")
0530: && !standalone.equals("no")) {
0531: reportFatalError("SDDeclInvalid", null);
0532: }
0533: } else {
0534: reportFatalError("EncodingDeclRequired", null);
0535: }
0536: break;
0537: }
0538: default: {
0539: reportFatalError("NoMorePseudoAttributes", null);
0540: }
0541: }
0542: sawSpace = fEntityScanner.skipSpaces();
0543: }
0544: // REVISIT: should we remove this error reporting?
0545: if (scanningTextDecl && state != STATE_DONE) {
0546: reportFatalError("MorePseudoAttributes", null);
0547: }
0548:
0549: // If there is no data in the xml or text decl then we fail to report error
0550: // for version or encoding info above.
0551: if (scanningTextDecl) {
0552: if (!dataFoundForTarget && encoding == null) {
0553: reportFatalError("EncodingDeclRequired", null);
0554: }
0555: } else {
0556: if (!dataFoundForTarget && version == null) {
0557: reportFatalError("VersionInfoRequired", null);
0558: }
0559: }
0560:
0561: // end
0562: if (!fEntityScanner.skipChar('?')) {
0563: reportFatalError("XMLDeclUnterminated", null);
0564: }
0565: if (!fEntityScanner.skipChar('>')) {
0566: reportFatalError("XMLDeclUnterminated", null);
0567:
0568: }
0569:
0570: // fill in return array
0571: pseudoAttributeValues[0] = version;
0572: pseudoAttributeValues[1] = encoding;
0573: pseudoAttributeValues[2] = standalone;
0574:
0575: } // scanXMLDeclOrTextDecl(boolean)
0576:
0577: /**
0578: * Scans a pseudo attribute.
0579: *
0580: * @param scanningTextDecl True if scanning this pseudo-attribute for a
0581: * TextDecl; false if scanning XMLDecl. This
0582: * flag is needed to report the correct type of
0583: * error.
0584: * @param value The string to fill in with the attribute
0585: * value.
0586: *
0587: * @return The name of the attribute
0588: *
0589: * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
0590: * at the time of calling is lost.
0591: */
0592: public String scanPseudoAttribute(boolean scanningTextDecl,
0593: XMLString value) throws IOException, XNIException {
0594:
0595: String name = fEntityScanner.scanName();
0596: // XMLEntityManager.print(fEntityManager.getCurrentEntity());
0597:
0598: if (name == null) {
0599: reportFatalError("PseudoAttrNameExpected", null);
0600: }
0601: fEntityScanner.skipSpaces();
0602: if (!fEntityScanner.skipChar('=')) {
0603: reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
0604: : "EqRequiredInXMLDecl", new Object[] { name });
0605: }
0606: fEntityScanner.skipSpaces();
0607: int quote = fEntityScanner.peekChar();
0608: if (quote != '\'' && quote != '"') {
0609: reportFatalError(
0610: scanningTextDecl ? "QuoteRequiredInTextDecl"
0611: : "QuoteRequiredInXMLDecl",
0612: new Object[] { name });
0613: }
0614: fEntityScanner.scanChar();
0615: int c = fEntityScanner.scanLiteral(quote, value);
0616: if (c != quote) {
0617: fStringBuffer2.clear();
0618: do {
0619: fStringBuffer2.append(value);
0620: if (c != -1) {
0621: if (c == '&' || c == '%' || c == '<' || c == ']') {
0622: fStringBuffer2.append((char) fEntityScanner
0623: .scanChar());
0624: } else if (XMLChar.isHighSurrogate(c)) {
0625: scanSurrogates(fStringBuffer2);
0626: } else if (isInvalidLiteral(c)) {
0627: String key = scanningTextDecl ? "InvalidCharInTextDecl"
0628: : "InvalidCharInXMLDecl";
0629: reportFatalError(key, new Object[] { Integer
0630: .toString(c, 16) });
0631: fEntityScanner.scanChar();
0632: }
0633: }
0634: c = fEntityScanner.scanLiteral(quote, value);
0635: } while (c != quote);
0636: fStringBuffer2.append(value);
0637: value.setValues(fStringBuffer2);
0638: }
0639: if (!fEntityScanner.skipChar(quote)) {
0640: reportFatalError(
0641: scanningTextDecl ? "CloseQuoteMissingInTextDecl"
0642: : "CloseQuoteMissingInXMLDecl",
0643: new Object[] { name });
0644: }
0645:
0646: // return
0647: return name;
0648:
0649: } // scanPseudoAttribute(XMLString):String
0650:
0651: /**
0652: * Scans a processing instruction.
0653: * <p>
0654: * <pre>
0655: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
0656: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
0657: * </pre>
0658: */
0659: //CHANGED:
0660: //EARLIER: scanPI()
0661: //NOW: scanPI(XMLStringBuffer)
0662: //it makes things more easy if XMLStringBUffer is passed. Motivation for this change is same
0663: // as that for scanContent()
0664: protected void scanPI(XMLStringBuffer data) throws IOException,
0665: XNIException {
0666:
0667: // target
0668: fReportEntity = false;
0669: String target = fEntityScanner.scanName();
0670: if (target == null) {
0671: reportFatalError("PITargetRequired", null);
0672: }
0673:
0674: // scan data
0675: scanPIData(target, data);
0676: fReportEntity = true;
0677:
0678: } // scanPI(XMLStringBuffer)
0679:
0680: /**
0681: * Scans a processing data. This is needed to handle the situation
0682: * where a document starts with a processing instruction whose
0683: * target name <em>starts with</em> "xml". (e.g. xmlfoo)
0684: *
0685: * This method would always read the whole data. We have while loop and data is buffered
0686: * until delimeter is encountered.
0687: *
0688: * @param target The PI target
0689: * @param data The string to fill in with the data
0690: */
0691:
0692: //CHANGED:
0693: //Earlier:This method uses the fStringBuffer and later buffer values are set to
0694: //the supplied XMLString....
0695: //Now: Changed the signature of this function to pass XMLStringBuffer.. and data would
0696: //be appended to that buffer
0697: protected void scanPIData(String target, XMLStringBuffer data)
0698: throws IOException, XNIException {
0699:
0700: // check target
0701: if (target.length() == 3) {
0702: char c0 = Character.toLowerCase(target.charAt(0));
0703: char c1 = Character.toLowerCase(target.charAt(1));
0704: char c2 = Character.toLowerCase(target.charAt(2));
0705: if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
0706: reportFatalError("ReservedPITarget", null);
0707: }
0708: }
0709:
0710: // spaces
0711: if (!fEntityScanner.skipSpaces()) {
0712: if (fEntityScanner.skipString("?>")) {
0713: // we found the end, there is no data just return
0714: return;
0715: } else {
0716: // if there is data there should be some space
0717: reportFatalError("SpaceRequiredInPI", null);
0718: }
0719: }
0720:
0721: // since scanData appends the parsed data to the buffer passed
0722: // a while loop would append the whole of parsed data to the buffer(data:XMLStringBuffer)
0723: //until all of the data is buffered.
0724: if (fEntityScanner.scanData("?>", data)) {
0725: do {
0726: int c = fEntityScanner.peekChar();
0727: if (c != -1) {
0728: if (XMLChar.isHighSurrogate(c)) {
0729: scanSurrogates(data);
0730: } else if (isInvalidLiteral(c)) {
0731: reportFatalError("InvalidCharInPI",
0732: new Object[] { Integer.toHexString(c) });
0733: fEntityScanner.scanChar();
0734: }
0735: }
0736: } while (fEntityScanner.scanData("?>", data));
0737: }
0738:
0739: } // scanPIData(String,XMLString)
0740:
0741: /**
0742: * Scans a comment.
0743: * <p>
0744: * <pre>
0745: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
0746: * </pre>
0747: * <p>
0748: * <strong>Note:</strong> Called after scanning past '<!--'
0749: * <strong>Note:</strong> This method uses fString, anything in it
0750: * at the time of calling is lost.
0751: *
0752: * @param text The buffer to fill in with the text.
0753: */
0754: protected void scanComment(XMLStringBuffer text)
0755: throws IOException, XNIException {
0756:
0757: //System.out.println( "XMLScanner#scanComment# In Scan Comment" );
0758: // text
0759: // REVISIT: handle invalid character, eof
0760: text.clear();
0761: while (fEntityScanner.scanData("--", text)) {
0762: int c = fEntityScanner.peekChar();
0763:
0764: //System.out.println( "XMLScanner#scanComment#text.toString() == " + text.toString() );
0765: //System.out.println( "XMLScanner#scanComment#c == " + c );
0766:
0767: if (c != -1) {
0768: if (XMLChar.isHighSurrogate(c)) {
0769: scanSurrogates(text);
0770: }
0771: if (isInvalidLiteral(c)) {
0772: reportFatalError("InvalidCharInComment",
0773: new Object[] { Integer.toHexString(c) });
0774: fEntityScanner.scanChar();
0775: }
0776: }
0777: }
0778: if (!fEntityScanner.skipChar('>')) {
0779: reportFatalError("DashDashInComment", null);
0780: }
0781:
0782: } // scanComment()
0783:
0784: /**
0785: * Scans an attribute value and normalizes whitespace converting all
0786: * whitespace characters to space characters.
0787: *
0788: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
0789: *
0790: * @param value The XMLString to fill in with the value.
0791: * @param nonNormalizedValue The XMLString to fill in with the
0792: * non-normalized value.
0793: * @param atName The name of the attribute being parsed (for error msgs).
0794: * @param attributes The attributes list for the scanned attribute.
0795: * @param attrIndex The index of the attribute to use from the list.
0796: * @param checkEntities true if undeclared entities should be reported as VC violation,
0797: * false if undeclared entities should be reported as WFC violation.
0798: *
0799: * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
0800: * at the time of calling is lost.
0801: **/
0802: protected void scanAttributeValue(XMLString value,
0803: XMLString nonNormalizedValue, String atName,
0804: XMLAttributes attributes, int attrIndex,
0805: boolean checkEntities) throws IOException, XNIException {
0806: XMLStringBuffer stringBuffer = null;
0807: // quote
0808: int quote = fEntityScanner.peekChar();
0809: if (quote != '\'' && quote != '"') {
0810: reportFatalError("OpenQuoteExpected",
0811: new Object[] { atName });
0812: }
0813:
0814: fEntityScanner.scanChar();
0815: int entityDepth = fEntityDepth;
0816:
0817: int c = fEntityScanner.scanLiteral(quote, value);
0818: if (DEBUG_ATTR_NORMALIZATION) {
0819: System.out.println("** scanLiteral -> \""
0820: + value.toString() + "\"");
0821: }
0822: if (fNeedNonNormalizedValue) {
0823: fStringBuffer2.clear();
0824: fStringBuffer2.append(value);
0825: }
0826: if (fEntityScanner.whiteSpaceLen > 0)
0827: normalizeWhitespace(value);
0828: if (DEBUG_ATTR_NORMALIZATION) {
0829: System.out.println("** normalizeWhitespace -> \""
0830: + value.toString() + "\"");
0831: }
0832: if (c != quote) {
0833: fScanningAttribute = true;
0834: stringBuffer = getStringBuffer();
0835: stringBuffer.clear();
0836: do {
0837: stringBuffer.append(value);
0838: if (DEBUG_ATTR_NORMALIZATION) {
0839: System.out.println("** value2: \""
0840: + stringBuffer.toString() + "\"");
0841: }
0842: if (c == '&') {
0843: fEntityScanner.skipChar('&');
0844: if (entityDepth == fEntityDepth
0845: && fNeedNonNormalizedValue) {
0846: fStringBuffer2.append('&');
0847: }
0848: if (fEntityScanner.skipChar('#')) {
0849: if (entityDepth == fEntityDepth
0850: && fNeedNonNormalizedValue) {
0851: fStringBuffer2.append('#');
0852: }
0853: int ch;
0854: if (fNeedNonNormalizedValue)
0855: ch = scanCharReferenceValue(stringBuffer,
0856: fStringBuffer2);
0857: else
0858: ch = scanCharReferenceValue(stringBuffer,
0859: null);
0860:
0861: if (ch != -1) {
0862: if (DEBUG_ATTR_NORMALIZATION) {
0863: System.out.println("** value3: \""
0864: + stringBuffer.toString()
0865: + "\"");
0866: }
0867: }
0868: } else {
0869: String entityName = fEntityScanner.scanName();
0870: if (entityName == null) {
0871: reportFatalError("NameRequiredInReference",
0872: null);
0873: } else if (entityDepth == fEntityDepth
0874: && fNeedNonNormalizedValue) {
0875: fStringBuffer2.append(entityName);
0876: }
0877: if (!fEntityScanner.skipChar(';')) {
0878: reportFatalError(
0879: "SemicolonRequiredInReference",
0880: new Object[] { entityName });
0881: } else if (entityDepth == fEntityDepth
0882: && fNeedNonNormalizedValue) {
0883: fStringBuffer2.append(';');
0884: }
0885: if (entityName == fAmpSymbol) {
0886: stringBuffer.append('&');
0887: if (DEBUG_ATTR_NORMALIZATION) {
0888: System.out.println("** value5: \""
0889: + stringBuffer.toString()
0890: + "\"");
0891: }
0892: } else if (entityName == fAposSymbol) {
0893: stringBuffer.append('\'');
0894: if (DEBUG_ATTR_NORMALIZATION) {
0895: System.out.println("** value7: \""
0896: + stringBuffer.toString()
0897: + "\"");
0898: }
0899: } else if (entityName == fLtSymbol) {
0900: stringBuffer.append('<');
0901: if (DEBUG_ATTR_NORMALIZATION) {
0902: System.out.println("** value9: \""
0903: + stringBuffer.toString()
0904: + "\"");
0905: }
0906: } else if (entityName == fGtSymbol) {
0907: stringBuffer.append('>');
0908: if (DEBUG_ATTR_NORMALIZATION) {
0909: System.out.println("** valueB: \""
0910: + stringBuffer.toString()
0911: + "\"");
0912: }
0913: } else if (entityName == fQuotSymbol) {
0914: stringBuffer.append('"');
0915: if (DEBUG_ATTR_NORMALIZATION) {
0916: System.out.println("** valueD: \""
0917: + stringBuffer.toString()
0918: + "\"");
0919: }
0920: } else {
0921: if (fEntityStore
0922: .isExternalEntity(entityName)) {
0923: reportFatalError(
0924: "ReferenceToExternalEntity",
0925: new Object[] { entityName });
0926: } else {
0927: if (!fEntityStore
0928: .isDeclaredEntity(entityName)) {
0929: //WFC & VC: Entity Declared
0930: if (checkEntities) {
0931: if (fValidation) {
0932: fErrorReporter
0933: .reportError(
0934: fEntityScanner,
0935: XMLMessageFormatter.XML_DOMAIN,
0936: "EntityNotDeclared",
0937: new Object[] { entityName },
0938: XMLErrorReporter.SEVERITY_ERROR);
0939: }
0940: } else {
0941: reportFatalError(
0942: "EntityNotDeclared",
0943: new Object[] { entityName });
0944: }
0945: }
0946: fEntityManager.startEntity(entityName,
0947: true);
0948: }
0949: }
0950: }
0951: } else if (c == '<') {
0952: reportFatalError("LessthanInAttValue",
0953: new Object[] { null, atName });
0954: fEntityScanner.scanChar();
0955: if (entityDepth == fEntityDepth
0956: && fNeedNonNormalizedValue) {
0957: fStringBuffer2.append((char) c);
0958: }
0959: } else if (c == '%' || c == ']') {
0960: fEntityScanner.scanChar();
0961: stringBuffer.append((char) c);
0962: if (entityDepth == fEntityDepth
0963: && fNeedNonNormalizedValue) {
0964: fStringBuffer2.append((char) c);
0965: }
0966: if (DEBUG_ATTR_NORMALIZATION) {
0967: System.out.println("** valueF: \""
0968: + stringBuffer.toString() + "\"");
0969: }
0970: } else if (c == '\n' || c == '\r') {
0971: fEntityScanner.scanChar();
0972: stringBuffer.append(' ');
0973: if (entityDepth == fEntityDepth
0974: && fNeedNonNormalizedValue) {
0975: fStringBuffer2.append('\n');
0976: }
0977: } else if (c != -1 && XMLChar.isHighSurrogate(c)) {
0978: if (scanSurrogates(fStringBuffer3)) {
0979: stringBuffer.append(fStringBuffer3);
0980: if (entityDepth == fEntityDepth
0981: && fNeedNonNormalizedValue) {
0982: fStringBuffer2.append(fStringBuffer3);
0983: }
0984: if (DEBUG_ATTR_NORMALIZATION) {
0985: System.out.println("** valueI: \""
0986: + stringBuffer.toString() + "\"");
0987: }
0988: }
0989: } else if (c != -1 && isInvalidLiteral(c)) {
0990: reportFatalError("InvalidCharInAttValue",
0991: new Object[] { Integer.toString(c, 16) });
0992: fEntityScanner.scanChar();
0993: if (entityDepth == fEntityDepth
0994: && fNeedNonNormalizedValue) {
0995: fStringBuffer2.append((char) c);
0996: }
0997: }
0998: c = fEntityScanner.scanLiteral(quote, value);
0999: if (entityDepth == fEntityDepth
1000: && fNeedNonNormalizedValue) {
1001: fStringBuffer2.append(value);
1002: }
1003: if (fEntityScanner.whiteSpaceLen > 0)
1004: normalizeWhitespace(value);
1005: //Todo ::Move this check to Attributes , do conversion
1006: //only if attribute is being accessed. -Venu
1007: } while (c != quote || entityDepth != fEntityDepth);
1008: stringBuffer.append(value);
1009: if (DEBUG_ATTR_NORMALIZATION) {
1010: System.out.println("** valueN: \""
1011: + stringBuffer.toString() + "\"");
1012: }
1013: value.setValues(stringBuffer);
1014: fScanningAttribute = false;
1015: }
1016: if (fNeedNonNormalizedValue)
1017: nonNormalizedValue.setValues(fStringBuffer2);
1018:
1019: // quote
1020: int cquote = fEntityScanner.scanChar();
1021: if (cquote != quote) {
1022: reportFatalError("CloseQuoteExpected",
1023: new Object[] { atName });
1024: }
1025: } // scanAttributeValue()
1026:
1027: /**
1028: * Scans External ID and return the public and system IDs.
1029: *
1030: * @param identifiers An array of size 2 to return the system id,
1031: * and public id (in that order).
1032: * @param optionalSystemId Specifies whether the system id is optional.
1033: *
1034: * <strong>Note:</strong> This method uses fString and fStringBuffer,
1035: * anything in them at the time of calling is lost.
1036: */
1037: protected void scanExternalID(String[] identifiers,
1038: boolean optionalSystemId) throws IOException, XNIException {
1039:
1040: String systemId = null;
1041: String publicId = null;
1042: if (fEntityScanner.skipString("PUBLIC")) {
1043: if (!fEntityScanner.skipSpaces()) {
1044: reportFatalError("SpaceRequiredAfterPUBLIC", null);
1045: }
1046: scanPubidLiteral(fString);
1047: publicId = fString.toString();
1048:
1049: if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
1050: reportFatalError("SpaceRequiredBetweenPublicAndSystem",
1051: null);
1052: }
1053: }
1054:
1055: if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
1056: if (publicId == null && !fEntityScanner.skipSpaces()) {
1057: reportFatalError("SpaceRequiredAfterSYSTEM", null);
1058: }
1059: int quote = fEntityScanner.peekChar();
1060: if (quote != '\'' && quote != '"') {
1061: if (publicId != null && optionalSystemId) {
1062: // looks like we don't have any system id
1063: // simply return the public id
1064: identifiers[0] = null;
1065: identifiers[1] = publicId;
1066: return;
1067: }
1068: reportFatalError("QuoteRequiredInSystemID", null);
1069: }
1070: fEntityScanner.scanChar();
1071: XMLString ident = fString;
1072: if (fEntityScanner.scanLiteral(quote, ident) != quote) {
1073: fStringBuffer.clear();
1074: do {
1075: fStringBuffer.append(ident);
1076: int c = fEntityScanner.peekChar();
1077: if (XMLChar.isMarkup(c) || c == ']') {
1078: fStringBuffer.append((char) fEntityScanner
1079: .scanChar());
1080: }
1081: } while (fEntityScanner.scanLiteral(quote, ident) != quote);
1082: fStringBuffer.append(ident);
1083: ident = fStringBuffer;
1084: }
1085: systemId = ident.toString();
1086: if (!fEntityScanner.skipChar(quote)) {
1087: reportFatalError("SystemIDUnterminated", null);
1088: }
1089: }
1090:
1091: // store result in array
1092: identifiers[0] = systemId;
1093: identifiers[1] = publicId;
1094: }
1095:
1096: /**
1097: * Scans public ID literal.
1098: *
1099: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1100: * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
1101: *
1102: * The returned string is normalized according to the following rule,
1103: * from http://www.w3.org/TR/REC-xml#dt-pubid:
1104: *
1105: * Before a match is attempted, all strings of white space in the public
1106: * identifier must be normalized to single space characters (#x20), and
1107: * leading and trailing white space must be removed.
1108: *
1109: * @param literal The string to fill in with the public ID literal.
1110: * @return True on success.
1111: *
1112: * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
1113: * the time of calling is lost.
1114: */
1115: protected boolean scanPubidLiteral(XMLString literal)
1116: throws IOException, XNIException {
1117: int quote = fEntityScanner.scanChar();
1118: if (quote != '\'' && quote != '"') {
1119: reportFatalError("QuoteRequiredInPublicID", null);
1120: return false;
1121: }
1122:
1123: fStringBuffer.clear();
1124: // skip leading whitespace
1125: boolean skipSpace = true;
1126: boolean dataok = true;
1127: while (true) {
1128: int c = fEntityScanner.scanChar();
1129: if (c == ' ' || c == '\n' || c == '\r') {
1130: if (!skipSpace) {
1131: // take the first whitespace as a space and skip the others
1132: fStringBuffer.append(' ');
1133: skipSpace = true;
1134: }
1135: } else if (c == quote) {
1136: if (skipSpace) {
1137: // if we finished on a space let's trim it
1138: fStringBuffer.length--;
1139: }
1140: literal.setValues(fStringBuffer);
1141: break;
1142: } else if (XMLChar.isPubid(c)) {
1143: fStringBuffer.append((char) c);
1144: skipSpace = false;
1145: } else if (c == -1) {
1146: reportFatalError("PublicIDUnterminated", null);
1147: return false;
1148: } else {
1149: dataok = false;
1150: reportFatalError("InvalidCharInPublicID",
1151: new Object[] { Integer.toHexString(c) });
1152: }
1153: }
1154: return dataok;
1155: }
1156:
1157: /**
1158: * Normalize whitespace in an XMLString converting all whitespace
1159: * characters to space characters.
1160: */
1161: protected void normalizeWhitespace(XMLString value) {
1162: int i = 0;
1163: int j = 0;
1164: int[] buff = fEntityScanner.whiteSpaceLookup;
1165: int buffLen = fEntityScanner.whiteSpaceLen;
1166: int end = value.offset + value.length;
1167: while (i < buffLen) {
1168: j = buff[i];
1169: if (j < end) {
1170: value.ch[j] = ' ';
1171: }
1172: i++;
1173: }
1174: }
1175:
1176: //
1177: // XMLEntityHandler methods
1178: //
1179:
1180: /**
1181: * This method notifies of the start of an entity. The document entity
1182: * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1183: * parameter entity names start with '%'; and general entities are just
1184: * specified by their name.
1185: *
1186: * @param name The name of the entity.
1187: * @param identifier The resource identifier.
1188: * @param encoding The auto-detected IANA encoding name of the entity
1189: * stream. This value will be null in those situations
1190: * where the entity encoding is not auto-detected (e.g.
1191: * internal entities or a document entity that is
1192: * parsed from a java.io.Reader).
1193: *
1194: * @throws XNIException Thrown by handler to signal an error.
1195: */
1196: public void startEntity(String name,
1197: XMLResourceIdentifier identifier, String encoding)
1198: throws XNIException {
1199:
1200: // keep track of the entity depth
1201: fEntityDepth++;
1202:
1203: } // startEntity(String,XMLResourceIdentifier,String)
1204:
1205: /**
1206: * This method notifies the end of an entity. The document entity has
1207: * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1208: * parameter entity names start with '%'; and general entities are just
1209: * specified by their name.
1210: *
1211: * @param name The name of the entity.
1212: *
1213: * @throws XNIException Thrown by handler to signal an error.
1214: */
1215: public void endEntity(String name) throws IOException, XNIException {
1216:
1217: // keep track of the entity depth
1218: fEntityDepth--;
1219:
1220: } // endEntity(String)
1221:
1222: /**
1223: * Scans a character reference and append the corresponding chars to the
1224: * specified buffer.
1225: *
1226: * <p>
1227: * <pre>
1228: * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1229: * </pre>
1230: *
1231: * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1232: * at the time of calling is lost.
1233: *
1234: * @param buf the character buffer to append chars to
1235: * @param buf2 the character buffer to append non-normalized chars to
1236: *
1237: * @return the character value or (-1) on conversion failure
1238: */
1239: protected int scanCharReferenceValue(XMLStringBuffer buf,
1240: XMLStringBuffer buf2) throws IOException, XNIException {
1241: // scan hexadecimal value
1242: boolean hex = false;
1243: if (fEntityScanner.skipChar('x')) {
1244: if (buf2 != null) {
1245: buf2.append('x');
1246: }
1247: hex = true;
1248: fStringBuffer3.clear();
1249: boolean digit = true;
1250:
1251: int c = fEntityScanner.peekChar();
1252: digit = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')
1253: || (c >= 'A' && c <= 'F');
1254: if (digit) {
1255: if (buf2 != null) {
1256: buf2.append((char) c);
1257: }
1258: fEntityScanner.scanChar();
1259: fStringBuffer3.append((char) c);
1260:
1261: do {
1262: c = fEntityScanner.peekChar();
1263: digit = (c >= '0' && c <= '9')
1264: || (c >= 'a' && c <= 'f')
1265: || (c >= 'A' && c <= 'F');
1266: if (digit) {
1267: if (buf2 != null) {
1268: buf2.append((char) c);
1269: }
1270: fEntityScanner.scanChar();
1271: fStringBuffer3.append((char) c);
1272: }
1273: } while (digit);
1274: } else {
1275: reportFatalError("HexdigitRequiredInCharRef", null);
1276: }
1277: }
1278:
1279: // scan decimal value
1280: else {
1281: fStringBuffer3.clear();
1282: boolean digit = true;
1283:
1284: int c = fEntityScanner.peekChar();
1285: digit = c >= '0' && c <= '9';
1286: if (digit) {
1287: if (buf2 != null) {
1288: buf2.append((char) c);
1289: }
1290: fEntityScanner.scanChar();
1291: fStringBuffer3.append((char) c);
1292:
1293: do {
1294: c = fEntityScanner.peekChar();
1295: digit = c >= '0' && c <= '9';
1296: if (digit) {
1297: if (buf2 != null) {
1298: buf2.append((char) c);
1299: }
1300: fEntityScanner.scanChar();
1301: fStringBuffer3.append((char) c);
1302: }
1303: } while (digit);
1304: } else {
1305: reportFatalError("DigitRequiredInCharRef", null);
1306: }
1307: }
1308:
1309: // end
1310: if (!fEntityScanner.skipChar(';')) {
1311: reportFatalError("SemicolonRequiredInCharRef", null);
1312: }
1313: if (buf2 != null) {
1314: buf2.append(';');
1315: }
1316:
1317: // convert string to number
1318: int value = -1;
1319: try {
1320: value = Integer.parseInt(fStringBuffer3.toString(),
1321: hex ? 16 : 10);
1322:
1323: // character reference must be a valid XML character
1324: if (isInvalid(value)) {
1325: StringBuffer errorBuf = new StringBuffer(
1326: fStringBuffer3.length + 1);
1327: if (hex)
1328: errorBuf.append('x');
1329: errorBuf.append(fStringBuffer3.ch,
1330: fStringBuffer3.offset, fStringBuffer3.length);
1331: reportFatalError("InvalidCharRef",
1332: new Object[] { errorBuf.toString() });
1333: }
1334: } catch (NumberFormatException e) {
1335: // Conversion failed, let -1 value drop through.
1336: // If we end up here, the character reference was invalid.
1337: StringBuffer errorBuf = new StringBuffer(
1338: fStringBuffer3.length + 1);
1339: if (hex)
1340: errorBuf.append('x');
1341: errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset,
1342: fStringBuffer3.length);
1343: reportFatalError("InvalidCharRef", new Object[] { errorBuf
1344: .toString() });
1345: }
1346:
1347: // append corresponding chars to the given buffer
1348: if (!XMLChar.isSupplemental(value)) {
1349: buf.append((char) value);
1350: } else {
1351: // character is supplemental, split it into surrogate chars
1352: buf.append(XMLChar.highSurrogate(value));
1353: buf.append(XMLChar.lowSurrogate(value));
1354: }
1355:
1356: // char refs notification code
1357: if (fNotifyCharRefs && value != -1) {
1358: String literal = "#" + (hex ? "x" : "")
1359: + fStringBuffer3.toString();
1360: if (!fScanningAttribute) {
1361: fCharRefLiteral = literal;
1362: }
1363: }
1364:
1365: return value;
1366: }
1367:
1368: // returns true if the given character is not
1369: // valid with respect to the version of
1370: // XML understood by this scanner.
1371: protected static boolean isInvalid(int value) {
1372: return (XMLChar.isInvalid(value));
1373: } // isInvalid(int): boolean
1374:
1375: // returns true if the given character is not
1376: // valid or may not be used outside a character reference
1377: // with respect to the version of XML understood by this scanner.
1378: protected static boolean isInvalidLiteral(int value) {
1379: return (XMLChar.isInvalid(value));
1380: } // isInvalidLiteral(int): boolean
1381:
1382: // returns true if the given character is
1383: // a valid nameChar with respect to the version of
1384: // XML understood by this scanner.
1385: protected static boolean isValidNameChar(int value) {
1386: return (XMLChar.isName(value));
1387: } // isValidNameChar(int): boolean
1388:
1389: // returns true if the given character is
1390: // a valid NCName character with respect to the version of
1391: // XML understood by this scanner.
1392: protected static boolean isValidNCName(int value) {
1393: return (XMLChar.isNCName(value));
1394: } // isValidNCName(int): boolean
1395:
1396: // returns true if the given character is
1397: // a valid nameStartChar with respect to the version of
1398: // XML understood by this scanner.
1399: protected static boolean isValidNameStartChar(int value) {
1400: return (XMLChar.isNameStart(value));
1401: } // isValidNameStartChar(int): boolean
1402:
1403: protected boolean versionSupported(String version) {
1404: return version.equals("1.0");
1405: } // version Supported
1406:
1407: /**
1408: * Scans surrogates and append them to the specified buffer.
1409: * <p>
1410: * <strong>Note:</strong> This assumes the current char has already been
1411: * identified as a high surrogate.
1412: *
1413: * @param buf The StringBuffer to append the read surrogates to.
1414: * @return True if it succeeded.
1415: */
1416: protected boolean scanSurrogates(XMLStringBuffer buf)
1417: throws IOException, XNIException {
1418:
1419: int high = fEntityScanner.scanChar();
1420: int low = fEntityScanner.peekChar();
1421: if (!XMLChar.isLowSurrogate(low)) {
1422: reportFatalError("InvalidCharInContent",
1423: new Object[] { Integer.toString(high, 16) });
1424: return false;
1425: }
1426: fEntityScanner.scanChar();
1427:
1428: // convert surrogates to supplemental character
1429: int c = XMLChar.supplemental((char) high, (char) low);
1430:
1431: // supplemental character must be a valid XML character
1432: if (isInvalid(c)) {
1433: reportFatalError("InvalidCharInContent",
1434: new Object[] { Integer.toString(c, 16) });
1435: return false;
1436: }
1437:
1438: // fill in the buffer
1439: buf.append((char) high);
1440: buf.append((char) low);
1441:
1442: return true;
1443:
1444: } // scanSurrogates():boolean
1445:
1446: /**
1447: * Convenience function used in all XML scanners.
1448: */
1449: protected void reportFatalError(String msgId, Object[] args)
1450: throws XNIException {
1451: fErrorReporter.reportError(fEntityScanner,
1452: XMLMessageFormatter.XML_DOMAIN, msgId, args,
1453: XMLErrorReporter.SEVERITY_FATAL_ERROR);
1454: }
1455:
1456: // private methods
1457: private void init() {
1458: // initialize scanner
1459: //fEntityScanner = XMLEntityReaderImpl.getEntityScanner();
1460:
1461: // initialize vars
1462: fEntityDepth = 0;
1463: fReportEntity = true;
1464: fResourceIdentifier.clear();
1465: }
1466:
1467: XMLStringBuffer getStringBuffer() {
1468: if ((fStringBufferIndex < initialCacheCount)
1469: || (fStringBufferIndex < stringBufferCache.size())) {
1470: return (XMLStringBuffer) stringBufferCache
1471: .get(fStringBufferIndex++);
1472: } else {
1473: XMLStringBuffer tmpObj = new XMLStringBuffer();
1474: stringBufferCache.add(fStringBufferIndex, tmpObj);
1475: return tmpObj;
1476: }
1477: }
1478:
1479: } // class XMLScanner
|