0001: /*
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: */
0017:
0018: package org.apache.xerces.impl;
0019:
0020: import java.io.IOException;
0021:
0022: import org.apache.xerces.impl.msg.XMLMessageFormatter;
0023: import org.apache.xerces.util.SymbolTable;
0024: import org.apache.xerces.util.XMLChar;
0025: import org.apache.xerces.util.XMLResourceIdentifierImpl;
0026: import org.apache.xerces.util.XMLStringBuffer;
0027: import org.apache.xerces.xni.Augmentations;
0028: import org.apache.xerces.xni.XMLResourceIdentifier;
0029: import org.apache.xerces.xni.XMLString;
0030: import org.apache.xerces.xni.XNIException;
0031: import org.apache.xerces.xni.parser.XMLComponent;
0032: import org.apache.xerces.xni.parser.XMLComponentManager;
0033: import org.apache.xerces.xni.parser.XMLConfigurationException;
0034:
0035: /**
0036: * This class is responsible for holding scanning methods common to
0037: * scanning the XML document structure and content as well as the DTD
0038: * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit
0039: * from this base class.
0040: *
0041: * <p>
0042: * This component requires the following features and properties from the
0043: * component manager that uses it:
0044: * <ul>
0045: * <li>http://xml.org/sax/features/validation</li>
0046: * <li>http://xml.org/sax/features/namespaces</li>
0047: * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
0048: * <li>http://apache.org/xml/properties/internal/symbol-table</li>
0049: * <li>http://apache.org/xml/properties/internal/error-reporter</li>
0050: * <li>http://apache.org/xml/properties/internal/entity-manager</li>
0051: * </ul>
0052: *
0053: * @xerces.internal
0054: *
0055: * @author Andy Clark, IBM
0056: * @author Arnaud Le Hors, IBM
0057: * @author Eric Ye, IBM
0058: *
0059: * @version $Id: XMLScanner.java 572055 2007-09-02 17:55:43Z mrglavas $
0060: */
0061: public abstract class XMLScanner implements XMLComponent {
0062:
0063: //
0064: // Constants
0065: //
0066:
0067: // feature identifiers
0068:
0069: /** Feature identifier: validation. */
0070: protected static final String VALIDATION = Constants.SAX_FEATURE_PREFIX
0071: + Constants.VALIDATION_FEATURE;
0072:
0073: /** Feature identifier: namespaces. */
0074: protected static final String NAMESPACES = Constants.SAX_FEATURE_PREFIX
0075: + Constants.NAMESPACES_FEATURE;
0076:
0077: /** Feature identifier: notify character references. */
0078: protected static final String NOTIFY_CHAR_REFS = Constants.XERCES_FEATURE_PREFIX
0079: + Constants.NOTIFY_CHAR_REFS_FEATURE;
0080:
0081: protected static final String PARSER_SETTINGS = Constants.XERCES_FEATURE_PREFIX
0082: + Constants.PARSER_SETTINGS;
0083:
0084: // property identifiers
0085:
0086: /** Property identifier: symbol table. */
0087: protected static final String SYMBOL_TABLE = Constants.XERCES_PROPERTY_PREFIX
0088: + Constants.SYMBOL_TABLE_PROPERTY;
0089:
0090: /** Property identifier: error reporter. */
0091: protected static final String ERROR_REPORTER = Constants.XERCES_PROPERTY_PREFIX
0092: + Constants.ERROR_REPORTER_PROPERTY;
0093:
0094: /** Property identifier: entity manager. */
0095: protected static final String ENTITY_MANAGER = Constants.XERCES_PROPERTY_PREFIX
0096: + Constants.ENTITY_MANAGER_PROPERTY;
0097:
0098: // debugging
0099:
0100: /** Debug attribute normalization. */
0101: protected static final boolean DEBUG_ATTR_NORMALIZATION = false;
0102:
0103: //
0104: // Data
0105: //
0106:
0107: // features
0108:
0109: /**
0110: * Validation. This feature identifier is:
0111: * http://xml.org/sax/features/validation
0112: */
0113: protected boolean fValidation = false;
0114:
0115: /** Namespaces. */
0116: protected boolean fNamespaces;
0117:
0118: /** Character references notification. */
0119: protected boolean fNotifyCharRefs = false;
0120:
0121: /** Internal parser-settings feature */
0122: protected boolean fParserSettings = true;
0123:
0124: // properties
0125:
0126: /** Symbol table. */
0127: protected SymbolTable fSymbolTable;
0128:
0129: /** Error reporter. */
0130: protected XMLErrorReporter fErrorReporter;
0131:
0132: /** Entity manager. */
0133: protected XMLEntityManager fEntityManager;
0134:
0135: // protected data
0136:
0137: /** Entity scanner. */
0138: protected XMLEntityScanner fEntityScanner;
0139:
0140: /** Entity depth. */
0141: protected int fEntityDepth;
0142:
0143: /** Literal value of the last character refence scanned. */
0144: protected String fCharRefLiteral = null;
0145:
0146: /** Scanning attribute. */
0147: protected boolean fScanningAttribute;
0148:
0149: /** Report entity boundary. */
0150: protected boolean fReportEntity;
0151:
0152: // symbols
0153:
0154: /** Symbol: "version". */
0155: protected final static String fVersionSymbol = "version".intern();
0156:
0157: /** Symbol: "encoding". */
0158: protected final static String fEncodingSymbol = "encoding".intern();
0159:
0160: /** Symbol: "standalone". */
0161: protected final static String fStandaloneSymbol = "standalone"
0162: .intern();
0163:
0164: /** Symbol: "amp". */
0165: protected final static String fAmpSymbol = "amp".intern();
0166:
0167: /** Symbol: "lt". */
0168: protected final static String fLtSymbol = "lt".intern();
0169:
0170: /** Symbol: "gt". */
0171: protected final static String fGtSymbol = "gt".intern();
0172:
0173: /** Symbol: "quot". */
0174: protected final static String fQuotSymbol = "quot".intern();
0175:
0176: /** Symbol: "apos". */
0177: protected final static String fAposSymbol = "apos".intern();
0178:
0179: // temporary variables
0180:
0181: // NOTE: These objects are private to help prevent accidental modification
0182: // of values by a subclass. If there were protected *and* the sub-
0183: // modified the values, it would be difficult to track down the real
0184: // cause of the bug. By making these private, we avoid this
0185: // possibility.
0186:
0187: /** String. */
0188: private final XMLString fString = new XMLString();
0189:
0190: /** String buffer. */
0191: private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
0192:
0193: /** String buffer. */
0194: private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
0195:
0196: /** String buffer. */
0197: private final XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
0198:
0199: // temporary location for Resource identification information.
0200: protected final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
0201:
0202: //
0203: // XMLComponent methods
0204: //
0205:
0206: /**
0207: *
0208: *
0209: * @param componentManager The component manager.
0210: *
0211: * @throws SAXException Throws exception if required features and
0212: * properties cannot be found.
0213: */
0214: public void reset(XMLComponentManager componentManager)
0215: throws XMLConfigurationException {
0216:
0217: try {
0218: fParserSettings = componentManager
0219: .getFeature(PARSER_SETTINGS);
0220: } catch (XMLConfigurationException e) {
0221: fParserSettings = true;
0222: }
0223:
0224: if (!fParserSettings) {
0225: // parser settings have not been changed
0226: init();
0227: return;
0228: }
0229:
0230: // Xerces properties
0231: fSymbolTable = (SymbolTable) componentManager
0232: .getProperty(SYMBOL_TABLE);
0233: fErrorReporter = (XMLErrorReporter) componentManager
0234: .getProperty(ERROR_REPORTER);
0235: fEntityManager = (XMLEntityManager) componentManager
0236: .getProperty(ENTITY_MANAGER);
0237:
0238: // sax features
0239: try {
0240: fValidation = componentManager.getFeature(VALIDATION);
0241: } catch (XMLConfigurationException e) {
0242: fValidation = false;
0243: }
0244: try {
0245: fNamespaces = componentManager.getFeature(NAMESPACES);
0246: } catch (XMLConfigurationException e) {
0247: fNamespaces = true;
0248: }
0249: try {
0250: fNotifyCharRefs = componentManager
0251: .getFeature(NOTIFY_CHAR_REFS);
0252: } catch (XMLConfigurationException e) {
0253: fNotifyCharRefs = false;
0254: }
0255:
0256: init();
0257:
0258: } // reset(XMLComponentManager)
0259:
0260: /**
0261: * Sets the value of a property during parsing.
0262: *
0263: * @param propertyId
0264: * @param value
0265: */
0266: public void setProperty(String propertyId, Object value)
0267: throws XMLConfigurationException {
0268:
0269: // Xerces properties
0270: if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
0271: final int suffixLength = propertyId.length()
0272: - Constants.XERCES_PROPERTY_PREFIX.length();
0273:
0274: if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY
0275: .length()
0276: && propertyId
0277: .endsWith(Constants.SYMBOL_TABLE_PROPERTY)) {
0278: fSymbolTable = (SymbolTable) value;
0279: } else if (suffixLength == Constants.ERROR_REPORTER_PROPERTY
0280: .length()
0281: && propertyId
0282: .endsWith(Constants.ERROR_REPORTER_PROPERTY)) {
0283: fErrorReporter = (XMLErrorReporter) value;
0284: } else if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY
0285: .length()
0286: && propertyId
0287: .endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
0288: fEntityManager = (XMLEntityManager) value;
0289: }
0290: }
0291:
0292: } // setProperty(String,Object)
0293:
0294: /*
0295: * Sets the feature of the scanner.
0296: */
0297: public void setFeature(String featureId, boolean value)
0298: throws XMLConfigurationException {
0299:
0300: if (VALIDATION.equals(featureId)) {
0301: fValidation = value;
0302: } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
0303: fNotifyCharRefs = value;
0304: }
0305: }
0306:
0307: /*
0308: * Gets the state of the feature of the scanner.
0309: */
0310: public boolean getFeature(String featureId)
0311: throws XMLConfigurationException {
0312:
0313: if (VALIDATION.equals(featureId)) {
0314: return fValidation;
0315: } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
0316: return fNotifyCharRefs;
0317: }
0318: throw new XMLConfigurationException(
0319: XMLConfigurationException.NOT_RECOGNIZED, featureId);
0320: }
0321:
0322: //
0323: // Protected methods
0324: //
0325:
0326: // anybody calling this had better have set Symtoltable!
0327: protected void reset() {
0328: init();
0329:
0330: // DTD preparsing defaults:
0331: fValidation = true;
0332: fNotifyCharRefs = false;
0333:
0334: }
0335:
0336: // common scanning methods
0337:
0338: /**
0339: * Scans an XML or text declaration.
0340: * <p>
0341: * <pre>
0342: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
0343: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
0344: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
0345: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
0346: * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
0347: * | ('"' ('yes' | 'no') '"'))
0348: *
0349: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
0350: * </pre>
0351: *
0352: * @param scanningTextDecl True if a text declaration is to
0353: * be scanned instead of an XML
0354: * declaration.
0355: * @param pseudoAttributeValues An array of size 3 to return the version,
0356: * encoding and standalone pseudo attribute values
0357: * (in that order).
0358: *
0359: * <strong>Note:</strong> This method uses fString, anything in it
0360: * at the time of calling is lost.
0361: */
0362: protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
0363: String[] pseudoAttributeValues) throws IOException,
0364: XNIException {
0365:
0366: // pseudo-attribute values
0367: String version = null;
0368: String encoding = null;
0369: String standalone = null;
0370:
0371: // scan pseudo-attributes
0372: final int STATE_VERSION = 0;
0373: final int STATE_ENCODING = 1;
0374: final int STATE_STANDALONE = 2;
0375: final int STATE_DONE = 3;
0376: int state = STATE_VERSION;
0377:
0378: boolean dataFoundForTarget = false;
0379: boolean sawSpace = fEntityScanner.skipDeclSpaces();
0380: // since pseudoattributes are *not* attributes,
0381: // their quotes don't need to be preserved in external parameter entities.
0382: // the XMLEntityScanner#scanLiteral method will continue to
0383: // emit -1 in such cases when it finds a quote; this is
0384: // fine for other methods that parse scanned entities,
0385: // but not for the scanning of pseudoattributes. So,
0386: // temporarily, we must mark the current entity as not being "literal"
0387: XMLEntityManager.ScannedEntity currEnt = fEntityManager
0388: .getCurrentEntity();
0389: boolean currLiteral = currEnt.literal;
0390: currEnt.literal = false;
0391: while (fEntityScanner.peekChar() != '?') {
0392: dataFoundForTarget = true;
0393: String name = scanPseudoAttribute(scanningTextDecl, fString);
0394: switch (state) {
0395: case STATE_VERSION: {
0396: if (name == fVersionSymbol) {
0397: if (!sawSpace) {
0398: reportFatalError(
0399: scanningTextDecl ? "SpaceRequiredBeforeVersionInTextDecl"
0400: : "SpaceRequiredBeforeVersionInXMLDecl",
0401: null);
0402: }
0403: version = fString.toString();
0404: state = STATE_ENCODING;
0405: if (!versionSupported(version)) {
0406: reportFatalError(getVersionNotSupportedKey(),
0407: new Object[] { version });
0408: }
0409: } else if (name == fEncodingSymbol) {
0410: if (!scanningTextDecl) {
0411: reportFatalError("VersionInfoRequired", null);
0412: }
0413: if (!sawSpace) {
0414: reportFatalError(
0415: scanningTextDecl ? "SpaceRequiredBeforeEncodingInTextDecl"
0416: : "SpaceRequiredBeforeEncodingInXMLDecl",
0417: null);
0418: }
0419: encoding = fString.toString();
0420: state = scanningTextDecl ? STATE_DONE
0421: : STATE_STANDALONE;
0422: } else {
0423: if (scanningTextDecl) {
0424: reportFatalError("EncodingDeclRequired", null);
0425: } else {
0426: reportFatalError("VersionInfoRequired", null);
0427: }
0428: }
0429: break;
0430: }
0431: case STATE_ENCODING: {
0432: if (name == fEncodingSymbol) {
0433: if (!sawSpace) {
0434: reportFatalError(
0435: scanningTextDecl ? "SpaceRequiredBeforeEncodingInTextDecl"
0436: : "SpaceRequiredBeforeEncodingInXMLDecl",
0437: null);
0438: }
0439: encoding = fString.toString();
0440: state = scanningTextDecl ? STATE_DONE
0441: : STATE_STANDALONE;
0442: // TODO: check encoding name; set encoding on
0443: // entity scanner
0444: } else if (!scanningTextDecl
0445: && name == fStandaloneSymbol) {
0446: if (!sawSpace) {
0447: reportFatalError(
0448: "SpaceRequiredBeforeStandalone", null);
0449: }
0450: standalone = fString.toString();
0451: state = STATE_DONE;
0452: if (!standalone.equals("yes")
0453: && !standalone.equals("no")) {
0454: reportFatalError("SDDeclInvalid",
0455: new Object[] { standalone });
0456: }
0457: } else {
0458: reportFatalError("EncodingDeclRequired", null);
0459: }
0460: break;
0461: }
0462: case STATE_STANDALONE: {
0463: if (name == fStandaloneSymbol) {
0464: if (!sawSpace) {
0465: reportFatalError(
0466: "SpaceRequiredBeforeStandalone", null);
0467: }
0468: standalone = fString.toString();
0469: state = STATE_DONE;
0470: if (!standalone.equals("yes")
0471: && !standalone.equals("no")) {
0472: reportFatalError("SDDeclInvalid",
0473: new Object[] { standalone });
0474: }
0475: } else {
0476: reportFatalError("EncodingDeclRequired", null);
0477: }
0478: break;
0479: }
0480: default: {
0481: reportFatalError("NoMorePseudoAttributes", null);
0482: }
0483: }
0484: sawSpace = fEntityScanner.skipDeclSpaces();
0485: }
0486: // restore original literal value
0487: if (currLiteral)
0488: currEnt.literal = true;
0489: // REVISIT: should we remove this error reporting?
0490: if (scanningTextDecl && state != STATE_DONE) {
0491: reportFatalError("MorePseudoAttributes", null);
0492: }
0493:
0494: // If there is no data in the xml or text decl then we fail to report error
0495: // for version or encoding info above.
0496: if (scanningTextDecl) {
0497: if (!dataFoundForTarget && encoding == null) {
0498: reportFatalError("EncodingDeclRequired", null);
0499: }
0500: } else {
0501: if (!dataFoundForTarget && version == null) {
0502: reportFatalError("VersionInfoRequired", null);
0503: }
0504: }
0505:
0506: // end
0507: if (!fEntityScanner.skipChar('?')) {
0508: reportFatalError("XMLDeclUnterminated", null);
0509: }
0510: if (!fEntityScanner.skipChar('>')) {
0511: reportFatalError("XMLDeclUnterminated", null);
0512:
0513: }
0514:
0515: // fill in return array
0516: pseudoAttributeValues[0] = version;
0517: pseudoAttributeValues[1] = encoding;
0518: pseudoAttributeValues[2] = standalone;
0519:
0520: } // scanXMLDeclOrTextDecl(boolean)
0521:
0522: /**
0523: * Scans a pseudo attribute.
0524: *
0525: * @param scanningTextDecl True if scanning this pseudo-attribute for a
0526: * TextDecl; false if scanning XMLDecl. This
0527: * flag is needed to report the correct type of
0528: * error.
0529: * @param value The string to fill in with the attribute
0530: * value.
0531: *
0532: * @return The name of the attribute
0533: *
0534: * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
0535: * at the time of calling is lost.
0536: */
0537: public String scanPseudoAttribute(boolean scanningTextDecl,
0538: XMLString value) throws IOException, XNIException {
0539:
0540: // REVISIT: This method is used for generic scanning of
0541: // pseudo attributes, but since there are only three such
0542: // attributes: version, encoding, and standalone there are
0543: // for performant ways of scanning them. Every decl must
0544: // have a version, and in TextDecls this version must
0545: // be followed by an encoding declaration. Also the
0546: // methods we invoke on the scanners allow non-ASCII
0547: // characters to be parsed in the decls, but since
0548: // we don't even know what the actual encoding of the
0549: // document is until we scan the encoding declaration
0550: // you cannot reliably read any characters outside
0551: // of the ASCII range here. -- mrglavas
0552: String name = fEntityScanner.scanName();
0553: XMLEntityManager.print(fEntityManager.getCurrentEntity());
0554: if (name == null) {
0555: reportFatalError("PseudoAttrNameExpected", null);
0556: }
0557: fEntityScanner.skipDeclSpaces();
0558: if (!fEntityScanner.skipChar('=')) {
0559: reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
0560: : "EqRequiredInXMLDecl", new Object[] { name });
0561: }
0562: fEntityScanner.skipDeclSpaces();
0563: int quote = fEntityScanner.peekChar();
0564: if (quote != '\'' && quote != '"') {
0565: reportFatalError(
0566: scanningTextDecl ? "QuoteRequiredInTextDecl"
0567: : "QuoteRequiredInXMLDecl",
0568: new Object[] { name });
0569: }
0570: fEntityScanner.scanChar();
0571: int c = fEntityScanner.scanLiteral(quote, value);
0572: if (c != quote) {
0573: fStringBuffer2.clear();
0574: do {
0575: fStringBuffer2.append(value);
0576: if (c != -1) {
0577: if (c == '&' || c == '%' || c == '<' || c == ']') {
0578: fStringBuffer2.append((char) fEntityScanner
0579: .scanChar());
0580: }
0581: // REVISIT: Even if you could reliably read non-ASCII chars
0582: // why bother scanning for surrogates here? Only ASCII chars
0583: // match the productions in XMLDecls and TextDecls. -- mrglavas
0584: else if (XMLChar.isHighSurrogate(c)) {
0585: scanSurrogates(fStringBuffer2);
0586: } else if (isInvalidLiteral(c)) {
0587: String key = scanningTextDecl ? "InvalidCharInTextDecl"
0588: : "InvalidCharInXMLDecl";
0589: reportFatalError(key, new Object[] { Integer
0590: .toString(c, 16) });
0591: fEntityScanner.scanChar();
0592: }
0593: }
0594: c = fEntityScanner.scanLiteral(quote, value);
0595: } while (c != quote);
0596: fStringBuffer2.append(value);
0597: value.setValues(fStringBuffer2);
0598: }
0599: if (!fEntityScanner.skipChar(quote)) {
0600: reportFatalError(
0601: scanningTextDecl ? "CloseQuoteMissingInTextDecl"
0602: : "CloseQuoteMissingInXMLDecl",
0603: new Object[] { name });
0604: }
0605:
0606: // return
0607: return name;
0608:
0609: } // scanPseudoAttribute(XMLString):String
0610:
0611: /**
0612: * Scans a processing instruction.
0613: * <p>
0614: * <pre>
0615: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
0616: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
0617: * </pre>
0618: * <strong>Note:</strong> This method uses fString, anything in it
0619: * at the time of calling is lost.
0620: */
0621: protected void scanPI() throws IOException, XNIException {
0622:
0623: // target
0624: fReportEntity = false;
0625: String target = null;
0626: if (fNamespaces) {
0627: target = fEntityScanner.scanNCName();
0628: } else {
0629: target = fEntityScanner.scanName();
0630: }
0631: if (target == null) {
0632: reportFatalError("PITargetRequired", null);
0633: }
0634:
0635: // scan data
0636: scanPIData(target, fString);
0637: fReportEntity = true;
0638:
0639: } // scanPI()
0640:
0641: /**
0642: * Scans a processing data. This is needed to handle the situation
0643: * where a document starts with a processing instruction whose
0644: * target name <em>starts with</em> "xml". (e.g. xmlfoo)
0645: *
0646: * <strong>Note:</strong> This method uses fStringBuffer, anything in it
0647: * at the time of calling is lost.
0648: *
0649: * @param target The PI target
0650: * @param data The string to fill in with the data
0651: */
0652: protected void scanPIData(String target, XMLString data)
0653: throws IOException, XNIException {
0654:
0655: // check target
0656: if (target.length() == 3) {
0657: char c0 = Character.toLowerCase(target.charAt(0));
0658: char c1 = Character.toLowerCase(target.charAt(1));
0659: char c2 = Character.toLowerCase(target.charAt(2));
0660: if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
0661: reportFatalError("ReservedPITarget", null);
0662: }
0663: }
0664:
0665: // spaces
0666: if (!fEntityScanner.skipSpaces()) {
0667: if (fEntityScanner.skipString("?>")) {
0668: // we found the end, there is no data
0669: data.clear();
0670: return;
0671: } else {
0672: if (fNamespaces && fEntityScanner.peekChar() == ':') {
0673: fEntityScanner.scanChar();
0674: XMLStringBuffer colonName = new XMLStringBuffer(
0675: target);
0676: colonName.append(":");
0677: String str = fEntityScanner.scanName();
0678: if (str != null)
0679: colonName.append(str);
0680: reportFatalError("ColonNotLegalWithNS",
0681: new Object[] { colonName.toString() });
0682: fEntityScanner.skipSpaces();
0683: } else {
0684: // if there is data there should be some space
0685: reportFatalError("SpaceRequiredInPI", null);
0686: }
0687: }
0688: }
0689:
0690: fStringBuffer.clear();
0691: // data
0692: if (fEntityScanner.scanData("?>", fStringBuffer)) {
0693: do {
0694: int c = fEntityScanner.peekChar();
0695: if (c != -1) {
0696: if (XMLChar.isHighSurrogate(c)) {
0697: scanSurrogates(fStringBuffer);
0698: } else if (isInvalidLiteral(c)) {
0699: reportFatalError("InvalidCharInPI",
0700: new Object[] { Integer.toHexString(c) });
0701: fEntityScanner.scanChar();
0702: }
0703: }
0704: } while (fEntityScanner.scanData("?>", fStringBuffer));
0705: }
0706: data.setValues(fStringBuffer);
0707:
0708: } // scanPIData(String,XMLString)
0709:
0710: /**
0711: * Scans a comment.
0712: * <p>
0713: * <pre>
0714: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
0715: * </pre>
0716: * <p>
0717: * <strong>Note:</strong> Called after scanning past '<!--'
0718: * <strong>Note:</strong> This method uses fString, anything in it
0719: * at the time of calling is lost.
0720: *
0721: * @param text The buffer to fill in with the text.
0722: */
0723: protected void scanComment(XMLStringBuffer text)
0724: throws IOException, XNIException {
0725:
0726: // text
0727: // REVISIT: handle invalid character, eof
0728: text.clear();
0729: while (fEntityScanner.scanData("--", text)) {
0730: int c = fEntityScanner.peekChar();
0731: if (c != -1) {
0732: if (XMLChar.isHighSurrogate(c)) {
0733: scanSurrogates(text);
0734: } else if (isInvalidLiteral(c)) {
0735: reportFatalError("InvalidCharInComment",
0736: new Object[] { Integer.toHexString(c) });
0737: fEntityScanner.scanChar();
0738: }
0739: }
0740: }
0741: if (!fEntityScanner.skipChar('>')) {
0742: reportFatalError("DashDashInComment", null);
0743: }
0744:
0745: } // scanComment()
0746:
0747: /**
0748: * Scans an attribute value and normalizes whitespace converting all
0749: * whitespace characters to space characters.
0750: *
0751: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
0752: *
0753: * @param value The XMLString to fill in with the value.
0754: * @param nonNormalizedValue The XMLString to fill in with the
0755: * non-normalized value.
0756: * @param atName The name of the attribute being parsed (for error msgs).
0757: * @param checkEntities true if undeclared entities should be reported as VC violation,
0758: * false if undeclared entities should be reported as WFC violation.
0759: * @param eleName The name of element to which this attribute belongs.
0760: *
0761: * @return true if the non-normalized and normalized value are the same
0762: *
0763: * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
0764: * at the time of calling is lost.
0765: **/
0766: protected boolean scanAttributeValue(XMLString value,
0767: XMLString nonNormalizedValue, String atName,
0768: boolean checkEntities, String eleName) throws IOException,
0769: XNIException {
0770: // quote
0771: int quote = fEntityScanner.peekChar();
0772: if (quote != '\'' && quote != '"') {
0773: reportFatalError("OpenQuoteExpected", new Object[] {
0774: eleName, atName });
0775: }
0776:
0777: fEntityScanner.scanChar();
0778: int entityDepth = fEntityDepth;
0779:
0780: int c = fEntityScanner.scanLiteral(quote, value);
0781: if (DEBUG_ATTR_NORMALIZATION) {
0782: System.out.println("** scanLiteral -> \""
0783: + value.toString() + "\"");
0784: }
0785:
0786: int fromIndex = 0;
0787: if (c == quote
0788: && (fromIndex = isUnchangedByNormalization(value)) == -1) {
0789: /** Both the non-normalized and normalized attribute values are equal. **/
0790: nonNormalizedValue.setValues(value);
0791: int cquote = fEntityScanner.scanChar();
0792: if (cquote != quote) {
0793: reportFatalError("CloseQuoteExpected", new Object[] {
0794: eleName, atName });
0795: }
0796: return true;
0797: }
0798: fStringBuffer2.clear();
0799: fStringBuffer2.append(value);
0800: normalizeWhitespace(value, fromIndex);
0801: if (DEBUG_ATTR_NORMALIZATION) {
0802: System.out.println("** normalizeWhitespace -> \""
0803: + value.toString() + "\"");
0804: }
0805: if (c != quote) {
0806: fScanningAttribute = true;
0807: fStringBuffer.clear();
0808: do {
0809: fStringBuffer.append(value);
0810: if (DEBUG_ATTR_NORMALIZATION) {
0811: System.out.println("** value2: \""
0812: + fStringBuffer.toString() + "\"");
0813: }
0814: if (c == '&') {
0815: fEntityScanner.skipChar('&');
0816: if (entityDepth == fEntityDepth) {
0817: fStringBuffer2.append('&');
0818: }
0819: if (fEntityScanner.skipChar('#')) {
0820: if (entityDepth == fEntityDepth) {
0821: fStringBuffer2.append('#');
0822: }
0823: int ch = scanCharReferenceValue(fStringBuffer,
0824: fStringBuffer2);
0825: if (ch != -1) {
0826: if (DEBUG_ATTR_NORMALIZATION) {
0827: System.out.println("** value3: \""
0828: + fStringBuffer.toString()
0829: + "\"");
0830: }
0831: }
0832: } else {
0833: String entityName = fEntityScanner.scanName();
0834: if (entityName == null) {
0835: reportFatalError("NameRequiredInReference",
0836: null);
0837: } else if (entityDepth == fEntityDepth) {
0838: fStringBuffer2.append(entityName);
0839: }
0840: if (!fEntityScanner.skipChar(';')) {
0841: reportFatalError(
0842: "SemicolonRequiredInReference",
0843: new Object[] { entityName });
0844: } else if (entityDepth == fEntityDepth) {
0845: fStringBuffer2.append(';');
0846: }
0847: if (entityName == fAmpSymbol) {
0848: fStringBuffer.append('&');
0849: if (DEBUG_ATTR_NORMALIZATION) {
0850: System.out.println("** value5: \""
0851: + fStringBuffer.toString()
0852: + "\"");
0853: }
0854: } else if (entityName == fAposSymbol) {
0855: fStringBuffer.append('\'');
0856: if (DEBUG_ATTR_NORMALIZATION) {
0857: System.out.println("** value7: \""
0858: + fStringBuffer.toString()
0859: + "\"");
0860: }
0861: } else if (entityName == fLtSymbol) {
0862: fStringBuffer.append('<');
0863: if (DEBUG_ATTR_NORMALIZATION) {
0864: System.out.println("** value9: \""
0865: + fStringBuffer.toString()
0866: + "\"");
0867: }
0868: } else if (entityName == fGtSymbol) {
0869: fStringBuffer.append('>');
0870: if (DEBUG_ATTR_NORMALIZATION) {
0871: System.out.println("** valueB: \""
0872: + fStringBuffer.toString()
0873: + "\"");
0874: }
0875: } else if (entityName == fQuotSymbol) {
0876: fStringBuffer.append('"');
0877: if (DEBUG_ATTR_NORMALIZATION) {
0878: System.out.println("** valueD: \""
0879: + fStringBuffer.toString()
0880: + "\"");
0881: }
0882: } else {
0883: if (fEntityManager
0884: .isExternalEntity(entityName)) {
0885: reportFatalError(
0886: "ReferenceToExternalEntity",
0887: new Object[] { entityName });
0888: } else {
0889: if (!fEntityManager
0890: .isDeclaredEntity(entityName)) {
0891: //WFC & VC: Entity Declared
0892: if (checkEntities) {
0893: if (fValidation) {
0894: fErrorReporter
0895: .reportError(
0896: XMLMessageFormatter.XML_DOMAIN,
0897: "EntityNotDeclared",
0898: new Object[] { entityName },
0899: XMLErrorReporter.SEVERITY_ERROR);
0900: }
0901: } else {
0902: reportFatalError(
0903: "EntityNotDeclared",
0904: new Object[] { entityName });
0905: }
0906: }
0907: fEntityManager.startEntity(entityName,
0908: true);
0909: }
0910: }
0911: }
0912: } else if (c == '<') {
0913: reportFatalError("LessthanInAttValue",
0914: new Object[] { eleName, atName });
0915: fEntityScanner.scanChar();
0916: if (entityDepth == fEntityDepth) {
0917: fStringBuffer2.append((char) c);
0918: }
0919: } else if (c == '%' || c == ']') {
0920: fEntityScanner.scanChar();
0921: fStringBuffer.append((char) c);
0922: if (entityDepth == fEntityDepth) {
0923: fStringBuffer2.append((char) c);
0924: }
0925: if (DEBUG_ATTR_NORMALIZATION) {
0926: System.out.println("** valueF: \""
0927: + fStringBuffer.toString() + "\"");
0928: }
0929: } else if (c == '\n' || c == '\r') {
0930: fEntityScanner.scanChar();
0931: fStringBuffer.append(' ');
0932: if (entityDepth == fEntityDepth) {
0933: fStringBuffer2.append('\n');
0934: }
0935: } else if (c != -1 && XMLChar.isHighSurrogate(c)) {
0936: fStringBuffer3.clear();
0937: if (scanSurrogates(fStringBuffer3)) {
0938: fStringBuffer.append(fStringBuffer3);
0939: if (entityDepth == fEntityDepth) {
0940: fStringBuffer2.append(fStringBuffer3);
0941: }
0942: if (DEBUG_ATTR_NORMALIZATION) {
0943: System.out.println("** valueI: \""
0944: + fStringBuffer.toString() + "\"");
0945: }
0946: }
0947: } else if (c != -1 && isInvalidLiteral(c)) {
0948: reportFatalError("InvalidCharInAttValue",
0949: new Object[] { eleName, atName,
0950: Integer.toString(c, 16) });
0951: fEntityScanner.scanChar();
0952: if (entityDepth == fEntityDepth) {
0953: fStringBuffer2.append((char) c);
0954: }
0955: }
0956: c = fEntityScanner.scanLiteral(quote, value);
0957: if (entityDepth == fEntityDepth) {
0958: fStringBuffer2.append(value);
0959: }
0960: normalizeWhitespace(value);
0961: } while (c != quote || entityDepth != fEntityDepth);
0962: fStringBuffer.append(value);
0963: if (DEBUG_ATTR_NORMALIZATION) {
0964: System.out.println("** valueN: \""
0965: + fStringBuffer.toString() + "\"");
0966: }
0967: value.setValues(fStringBuffer);
0968: fScanningAttribute = false;
0969: }
0970: nonNormalizedValue.setValues(fStringBuffer2);
0971:
0972: // quote
0973: int cquote = fEntityScanner.scanChar();
0974: if (cquote != quote) {
0975: reportFatalError("CloseQuoteExpected", new Object[] {
0976: eleName, atName });
0977: }
0978: return nonNormalizedValue.equals(value.ch, value.offset,
0979: value.length);
0980:
0981: } // scanAttributeValue()
0982:
0983: /**
0984: * Scans External ID and return the public and system IDs.
0985: *
0986: * @param identifiers An array of size 2 to return the system id,
0987: * and public id (in that order).
0988: * @param optionalSystemId Specifies whether the system id is optional.
0989: *
0990: * <strong>Note:</strong> This method uses fString and fStringBuffer,
0991: * anything in them at the time of calling is lost.
0992: */
0993: protected void scanExternalID(String[] identifiers,
0994: boolean optionalSystemId) throws IOException, XNIException {
0995:
0996: String systemId = null;
0997: String publicId = null;
0998: if (fEntityScanner.skipString("PUBLIC")) {
0999: if (!fEntityScanner.skipSpaces()) {
1000: reportFatalError("SpaceRequiredAfterPUBLIC", null);
1001: }
1002: scanPubidLiteral(fString);
1003: publicId = fString.toString();
1004:
1005: if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
1006: reportFatalError("SpaceRequiredBetweenPublicAndSystem",
1007: null);
1008: }
1009: }
1010:
1011: if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
1012: if (publicId == null && !fEntityScanner.skipSpaces()) {
1013: reportFatalError("SpaceRequiredAfterSYSTEM", null);
1014: }
1015: int quote = fEntityScanner.peekChar();
1016: if (quote != '\'' && quote != '"') {
1017: if (publicId != null && optionalSystemId) {
1018: // looks like we don't have any system id
1019: // simply return the public id
1020: identifiers[0] = null;
1021: identifiers[1] = publicId;
1022: return;
1023: }
1024: reportFatalError("QuoteRequiredInSystemID", null);
1025: }
1026: fEntityScanner.scanChar();
1027: XMLString ident = fString;
1028: if (fEntityScanner.scanLiteral(quote, ident) != quote) {
1029: fStringBuffer.clear();
1030: do {
1031: fStringBuffer.append(ident);
1032: int c = fEntityScanner.peekChar();
1033: if (XMLChar.isMarkup(c) || c == ']') {
1034: fStringBuffer.append((char) fEntityScanner
1035: .scanChar());
1036: }
1037: } while (fEntityScanner.scanLiteral(quote, ident) != quote);
1038: fStringBuffer.append(ident);
1039: ident = fStringBuffer;
1040: }
1041: systemId = ident.toString();
1042: if (!fEntityScanner.skipChar(quote)) {
1043: reportFatalError("SystemIDUnterminated", null);
1044: }
1045: }
1046:
1047: // store result in array
1048: identifiers[0] = systemId;
1049: identifiers[1] = publicId;
1050: }
1051:
1052: /**
1053: * Scans public ID literal.
1054: *
1055: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1056: * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
1057: *
1058: * The returned string is normalized according to the following rule,
1059: * from http://www.w3.org/TR/REC-xml#dt-pubid:
1060: *
1061: * Before a match is attempted, all strings of white space in the public
1062: * identifier must be normalized to single space characters (#x20), and
1063: * leading and trailing white space must be removed.
1064: *
1065: * @param literal The string to fill in with the public ID literal.
1066: * @return True on success.
1067: *
1068: * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
1069: * the time of calling is lost.
1070: */
1071: protected boolean scanPubidLiteral(XMLString literal)
1072: throws IOException, XNIException {
1073: int quote = fEntityScanner.scanChar();
1074: if (quote != '\'' && quote != '"') {
1075: reportFatalError("QuoteRequiredInPublicID", null);
1076: return false;
1077: }
1078:
1079: fStringBuffer.clear();
1080: // skip leading whitespace
1081: boolean skipSpace = true;
1082: boolean dataok = true;
1083: while (true) {
1084: int c = fEntityScanner.scanChar();
1085: if (c == ' ' || c == '\n' || c == '\r') {
1086: if (!skipSpace) {
1087: // take the first whitespace as a space and skip the others
1088: fStringBuffer.append(' ');
1089: skipSpace = true;
1090: }
1091: } else if (c == quote) {
1092: if (skipSpace) {
1093: // if we finished on a space let's trim it
1094: fStringBuffer.length--;
1095: }
1096: literal.setValues(fStringBuffer);
1097: break;
1098: } else if (XMLChar.isPubid(c)) {
1099: fStringBuffer.append((char) c);
1100: skipSpace = false;
1101: } else if (c == -1) {
1102: reportFatalError("PublicIDUnterminated", null);
1103: return false;
1104: } else {
1105: dataok = false;
1106: reportFatalError("InvalidCharInPublicID",
1107: new Object[] { Integer.toHexString(c) });
1108: }
1109: }
1110: return dataok;
1111: }
1112:
1113: /**
1114: * Normalize whitespace in an XMLString converting all whitespace
1115: * characters to space characters.
1116: */
1117: protected void normalizeWhitespace(XMLString value) {
1118: int end = value.offset + value.length;
1119: for (int i = value.offset; i < end; ++i) {
1120: int c = value.ch[i];
1121: // Performance: For XML 1.0 documents take advantage of
1122: // the fact that the only legal characters below 0x20
1123: // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
1124: // already determined the well-formedness of these
1125: // characters it is sufficient (and safe) to check
1126: // against 0x20. -- mrglavas
1127: if (c < 0x20) {
1128: value.ch[i] = ' ';
1129: }
1130: }
1131: }
1132:
1133: /**
1134: * Normalize whitespace in an XMLString converting all whitespace
1135: * characters to space characters.
1136: */
1137: protected void normalizeWhitespace(XMLString value, int fromIndex) {
1138: int end = value.offset + value.length;
1139: for (int i = value.offset + fromIndex; i < end; ++i) {
1140: int c = value.ch[i];
1141: // Performance: For XML 1.0 documents take advantage of
1142: // the fact that the only legal characters below 0x20
1143: // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
1144: // already determined the well-formedness of these
1145: // characters it is sufficient (and safe) to check
1146: // against 0x20. -- mrglavas
1147: if (c < 0x20) {
1148: value.ch[i] = ' ';
1149: }
1150: }
1151: }
1152:
1153: /**
1154: * Checks whether this string would be unchanged by normalization.
1155: *
1156: * @return -1 if the value would be unchanged by normalization,
1157: * otherwise the index of the first whitespace character which
1158: * would be transformed.
1159: */
1160: protected int isUnchangedByNormalization(XMLString value) {
1161: int end = value.offset + value.length;
1162: for (int i = value.offset; i < end; ++i) {
1163: int c = value.ch[i];
1164: // Performance: For XML 1.0 documents take advantage of
1165: // the fact that the only legal characters below 0x20
1166: // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
1167: // already determined the well-formedness of these
1168: // characters it is sufficient (and safe) to check
1169: // against 0x20. -- mrglavas
1170: if (c < 0x20) {
1171: return i - value.offset;
1172: }
1173: }
1174: return -1;
1175: }
1176:
1177: //
1178: // XMLEntityHandler methods
1179: //
1180:
1181: /**
1182: * This method notifies of the start of an entity. The document entity
1183: * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1184: * parameter entity names start with '%'; and general entities are just
1185: * specified by their name.
1186: *
1187: * @param name The name of the entity.
1188: * @param identifier The resource identifier.
1189: * @param encoding The auto-detected IANA encoding name of the entity
1190: * stream. This value will be null in those situations
1191: * where the entity encoding is not auto-detected (e.g.
1192: * internal entities or a document entity that is
1193: * parsed from a java.io.Reader).
1194: * @param augs Additional information that may include infoset augmentations
1195: *
1196: * @throws XNIException Thrown by handler to signal an error.
1197: */
1198: public void startEntity(String name,
1199: XMLResourceIdentifier identifier, String encoding,
1200: Augmentations augs) throws XNIException {
1201:
1202: // keep track of the entity depth
1203: fEntityDepth++;
1204: // must reset entity scanner
1205: fEntityScanner = fEntityManager.getEntityScanner();
1206:
1207: } // startEntity(String,XMLResourceIdentifier,String)
1208:
1209: /**
1210: * This method notifies the end of an entity. The document entity has
1211: * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1212: * parameter entity names start with '%'; and general entities are just
1213: * specified by their name.
1214: *
1215: * @param name The name of the entity.
1216: * @param augs Additional information that may include infoset augmentations
1217: *
1218: * @throws XNIException Thrown by handler to signal an error.
1219: */
1220: public void endEntity(String name, Augmentations augs)
1221: throws XNIException {
1222:
1223: // keep track of the entity depth
1224: fEntityDepth--;
1225:
1226: } // endEntity(String)
1227:
1228: /**
1229: * Scans a character reference and append the corresponding chars to the
1230: * specified buffer.
1231: *
1232: * <p>
1233: * <pre>
1234: * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1235: * </pre>
1236: *
1237: * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1238: * at the time of calling is lost.
1239: *
1240: * @param buf the character buffer to append chars to
1241: * @param buf2 the character buffer to append non-normalized chars to
1242: *
1243: * @return the character value or (-1) on conversion failure
1244: */
1245: protected int scanCharReferenceValue(XMLStringBuffer buf,
1246: XMLStringBuffer buf2) throws IOException, XNIException {
1247:
1248: // scan hexadecimal value
1249: boolean hex = false;
1250: if (fEntityScanner.skipChar('x')) {
1251: if (buf2 != null) {
1252: buf2.append('x');
1253: }
1254: hex = true;
1255: fStringBuffer3.clear();
1256: boolean digit = true;
1257:
1258: int c = fEntityScanner.peekChar();
1259: digit = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')
1260: || (c >= 'A' && c <= 'F');
1261: if (digit) {
1262: if (buf2 != null) {
1263: buf2.append((char) c);
1264: }
1265: fEntityScanner.scanChar();
1266: fStringBuffer3.append((char) c);
1267:
1268: do {
1269: c = fEntityScanner.peekChar();
1270: digit = (c >= '0' && c <= '9')
1271: || (c >= 'a' && c <= 'f')
1272: || (c >= 'A' && c <= 'F');
1273: if (digit) {
1274: if (buf2 != null) {
1275: buf2.append((char) c);
1276: }
1277: fEntityScanner.scanChar();
1278: fStringBuffer3.append((char) c);
1279: }
1280: } while (digit);
1281: } else {
1282: reportFatalError("HexdigitRequiredInCharRef", null);
1283: }
1284: }
1285:
1286: // scan decimal value
1287: else {
1288: fStringBuffer3.clear();
1289: boolean digit = true;
1290:
1291: int c = fEntityScanner.peekChar();
1292: digit = c >= '0' && c <= '9';
1293: if (digit) {
1294: if (buf2 != null) {
1295: buf2.append((char) c);
1296: }
1297: fEntityScanner.scanChar();
1298: fStringBuffer3.append((char) c);
1299:
1300: do {
1301: c = fEntityScanner.peekChar();
1302: digit = c >= '0' && c <= '9';
1303: if (digit) {
1304: if (buf2 != null) {
1305: buf2.append((char) c);
1306: }
1307: fEntityScanner.scanChar();
1308: fStringBuffer3.append((char) c);
1309: }
1310: } while (digit);
1311: } else {
1312: reportFatalError("DigitRequiredInCharRef", null);
1313: }
1314: }
1315:
1316: // end
1317: if (!fEntityScanner.skipChar(';')) {
1318: reportFatalError("SemicolonRequiredInCharRef", null);
1319: }
1320: if (buf2 != null) {
1321: buf2.append(';');
1322: }
1323:
1324: // convert string to number
1325: int value = -1;
1326: try {
1327: value = Integer.parseInt(fStringBuffer3.toString(),
1328: hex ? 16 : 10);
1329:
1330: // character reference must be a valid XML character
1331: if (isInvalid(value)) {
1332: StringBuffer errorBuf = new StringBuffer(
1333: fStringBuffer3.length + 1);
1334: if (hex)
1335: errorBuf.append('x');
1336: errorBuf.append(fStringBuffer3.ch,
1337: fStringBuffer3.offset, fStringBuffer3.length);
1338: reportFatalError("InvalidCharRef",
1339: new Object[] { errorBuf.toString() });
1340: }
1341: } catch (NumberFormatException e) {
1342: // Conversion failed, let -1 value drop through.
1343: // If we end up here, the character reference was invalid.
1344: StringBuffer errorBuf = new StringBuffer(
1345: fStringBuffer3.length + 1);
1346: if (hex)
1347: errorBuf.append('x');
1348: errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset,
1349: fStringBuffer3.length);
1350: reportFatalError("InvalidCharRef", new Object[] { errorBuf
1351: .toString() });
1352: }
1353:
1354: // append corresponding chars to the given buffer
1355: if (!XMLChar.isSupplemental(value)) {
1356: buf.append((char) value);
1357: } else {
1358: // character is supplemental, split it into surrogate chars
1359: buf.append(XMLChar.highSurrogate(value));
1360: buf.append(XMLChar.lowSurrogate(value));
1361: }
1362:
1363: // char refs notification code
1364: if (fNotifyCharRefs && value != -1) {
1365: String literal = "#" + (hex ? "x" : "")
1366: + fStringBuffer3.toString();
1367: if (!fScanningAttribute) {
1368: fCharRefLiteral = literal;
1369: }
1370: }
1371:
1372: return value;
1373: }
1374:
1375: // returns true if the given character is not
1376: // valid with respect to the version of
1377: // XML understood by this scanner.
1378: protected boolean isInvalid(int value) {
1379: return (XMLChar.isInvalid(value));
1380: } // isInvalid(int): boolean
1381:
1382: // returns true if the given character is not
1383: // valid or may not be used outside a character reference
1384: // with respect to the version of XML understood by this scanner.
1385: protected boolean isInvalidLiteral(int value) {
1386: return (XMLChar.isInvalid(value));
1387: } // isInvalidLiteral(int): boolean
1388:
1389: // returns true if the given character is
1390: // a valid nameChar with respect to the version of
1391: // XML understood by this scanner.
1392: protected boolean isValidNameChar(int value) {
1393: return (XMLChar.isName(value));
1394: } // isValidNameChar(int): boolean
1395:
1396: // returns true if the given character is
1397: // a valid nameStartChar with respect to the version of
1398: // XML understood by this scanner.
1399: protected boolean isValidNameStartChar(int value) {
1400: return (XMLChar.isNameStart(value));
1401: } // isValidNameStartChar(int): boolean
1402:
1403: // returns true if the given character is
1404: // a valid NCName character with respect to the version of
1405: // XML understood by this scanner.
1406: protected boolean isValidNCName(int value) {
1407: return (XMLChar.isNCName(value));
1408: } // isValidNCName(int): boolean
1409:
1410: // returns true if the given character is
1411: // a valid high surrogate for a nameStartChar
1412: // with respect to the version of XML understood
1413: // by this scanner.
1414: protected boolean isValidNameStartHighSurrogate(int value) {
1415: return false;
1416: } // isValidNameStartHighSurrogate(int): boolean
1417:
1418: protected boolean versionSupported(String version) {
1419: return version.equals("1.0");
1420: } // version Supported
1421:
1422: // returns the error message key for unsupported
1423: // versions of XML with respect to the version of
1424: // XML understood by this scanner.
1425: protected String getVersionNotSupportedKey() {
1426: return "VersionNotSupported";
1427: } // getVersionNotSupportedKey: String
1428:
1429: /**
1430: * Scans surrogates and append them to the specified buffer.
1431: * <p>
1432: * <strong>Note:</strong> This assumes the current char has already been
1433: * identified as a high surrogate.
1434: *
1435: * @param buf The StringBuffer to append the read surrogates to.
1436: * @return True if it succeeded.
1437: */
1438: protected boolean scanSurrogates(XMLStringBuffer buf)
1439: throws IOException, XNIException {
1440:
1441: int high = fEntityScanner.scanChar();
1442: int low = fEntityScanner.peekChar();
1443: if (!XMLChar.isLowSurrogate(low)) {
1444: reportFatalError("InvalidCharInContent",
1445: new Object[] { Integer.toString(high, 16) });
1446: return false;
1447: }
1448: fEntityScanner.scanChar();
1449:
1450: // convert surrogates to supplemental character
1451: int c = XMLChar.supplemental((char) high, (char) low);
1452:
1453: // supplemental character must be a valid XML character
1454: if (isInvalid(c)) {
1455: reportFatalError("InvalidCharInContent",
1456: new Object[] { Integer.toString(c, 16) });
1457: return false;
1458: }
1459:
1460: // fill in the buffer
1461: buf.append((char) high);
1462: buf.append((char) low);
1463:
1464: return true;
1465:
1466: } // scanSurrogates():boolean
1467:
1468: /**
1469: * Convenience function used in all XML scanners.
1470: */
1471: protected void reportFatalError(String msgId, Object[] args)
1472: throws XNIException {
1473: fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1474: msgId, args, XMLErrorReporter.SEVERITY_FATAL_ERROR);
1475: }
1476:
1477: // private methods
1478: private void init() {
1479: fEntityScanner = null;
1480: // initialize vars
1481: fEntityDepth = 0;
1482: fReportEntity = true;
1483: fResourceIdentifier.clear();
1484: }
1485:
1486: } // class XMLScanner
|