0001: /*
0002: * The Apache Software License, Version 1.1
0003: *
0004: *
0005: * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
0006: * reserved.
0007: *
0008: * Redistribution and use in source and binary forms, with or without
0009: * modification, are permitted provided that the following conditions
0010: * are met:
0011: *
0012: * 1. Redistributions of source code must retain the above copyright
0013: * notice, this list of conditions and the following disclaimer.
0014: *
0015: * 2. Redistributions in binary form must reproduce the above copyright
0016: * notice, this list of conditions and the following disclaimer in
0017: * the documentation and/or other materials provided with the
0018: * distribution.
0019: *
0020: * 3. The end-user documentation included with the redistribution,
0021: * if any, must include the following acknowledgment:
0022: * "This product includes software developed by the
0023: * Apache Software Foundation (http://www.apache.org/)."
0024: * Alternately, this acknowledgment may appear in the software itself,
0025: * if and wherever such third-party acknowledgments normally appear.
0026: *
0027: * 4. The names "Xerces" and "Apache Software Foundation" must
0028: * not be used to endorse or promote products derived from this
0029: * software without prior written permission. For written
0030: * permission, please contact apache@apache.org.
0031: *
0032: * 5. Products derived from this software may not be called "Apache",
0033: * nor may "Apache" appear in their name, without prior written
0034: * permission of the Apache Software Foundation.
0035: *
0036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
0040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
0043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
0045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
0046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0047: * SUCH DAMAGE.
0048: * ====================================================================
0049: *
0050: * This software consists of voluntary contributions made by many
0051: * individuals on behalf of the Apache Software Foundation and was
0052: * originally based on software copyright (c) 1999, International
0053: * Business Machines, Inc., http://www.apache.org. For more
0054: * information on the Apache Software Foundation, please see
0055: * <http://www.apache.org/>.
0056: */
0057:
0058: package org.apache.xerces.framework;
0059:
0060: import org.apache.xerces.readers.XMLEntityHandler;
0061: import org.apache.xerces.readers.DefaultEntityHandler;
0062: import org.apache.xerces.utils.ChunkyCharArray;
0063: import org.apache.xerces.utils.QName;
0064: import org.apache.xerces.utils.StringPool;
0065: import org.apache.xerces.utils.XMLCharacterProperties;
0066: import org.apache.xerces.utils.XMLMessages;
0067: import org.apache.xerces.validators.common.GrammarResolver;
0068:
0069: import org.xml.sax.Locator;
0070: import org.xml.sax.SAXParseException;
0071:
0072: /**
0073: * This class recognizes most of the grammer for an XML processor.
0074: * Additional support is provided by the XMLEntityHandler, via the
0075: * XMLEntityReader instances it creates, which are used to process
0076: * simple constructs like string literals and character data between
0077: * markup. The XMLDTDScanner class contains the remaining support
0078: * for the grammer of DTD declarations. When a <!DOCTYPE ...> is
0079: * found in the document, the scanDoctypeDecl method will then be
0080: * called and the XMLDocumentScanner subclass is responsible for
0081: * "connecting" that method to the corresponding method provided
0082: * by the XMLDTDScanner class.
0083: *
0084: * @version $Id: XMLDocumentScanner.java,v 1.12 2001/08/08 18:20:19 neilg Exp $
0085: */
0086: public final class XMLDocumentScanner {
0087: //
0088: // Constants
0089: //
0090:
0091: //
0092: // These character arrays are used as parameters for calls to the
0093: // XMLEntityHandler.EntityReader skippedString() method. Some have
0094: // package access for use by the inner dispatcher classes.
0095: //
0096:
0097: //
0098: // [19] CDStart ::= '<![CDATA['
0099: //
0100: static final char[] cdata_string = { '[', 'C', 'D', 'A', 'T', 'A',
0101: '[' };
0102: //
0103: // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
0104: // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
0105: //
0106: static final char[] xml_string = { 'x', 'm', 'l' };
0107: //
0108: // [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
0109: //
0110: private static final char[] version_string = { 'v', 'e', 'r', 's',
0111: 'i', 'o', 'n' };
0112: //
0113: // [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
0114: // ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
0115: //
0116: static final char[] doctype_string = { 'D', 'O', 'C', 'T', 'Y',
0117: 'P', 'E' };
0118: //
0119: // [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
0120: // | ('"' ('yes' | 'no') '"'))
0121: //
0122: private static final char[] standalone_string = { 's', 't', 'a',
0123: 'n', 'd', 'a', 'l', 'o', 'n', 'e' };
0124: //
0125: // [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
0126: //
0127: private static final char[] encoding_string = { 'e', 'n', 'c', 'o',
0128: 'd', 'i', 'n', 'g' };
0129:
0130: /*
0131: * Return values for the EventHandler scanAttValue method.
0132: */
0133: public static final int RESULT_SUCCESS = 0, RESULT_FAILURE = -1,
0134: RESULT_DUPLICATE_ATTR = -2;
0135:
0136: /** Scanner states */
0137: static final int SCANNER_STATE_XML_DECL = 0,
0138: SCANNER_STATE_START_OF_MARKUP = 1,
0139: SCANNER_STATE_COMMENT = 2, SCANNER_STATE_PI = 3,
0140: SCANNER_STATE_DOCTYPE = 4, SCANNER_STATE_PROLOG = 5,
0141: SCANNER_STATE_ROOT_ELEMENT = 6, SCANNER_STATE_CONTENT = 7,
0142: SCANNER_STATE_REFERENCE = 8,
0143: SCANNER_STATE_ATTRIBUTE_LIST = 9,
0144: SCANNER_STATE_ATTRIBUTE_NAME = 10,
0145: SCANNER_STATE_ATTRIBUTE_VALUE = 11,
0146: SCANNER_STATE_TRAILING_MISC = 12,
0147: SCANNER_STATE_END_OF_INPUT = 13,
0148: SCANNER_STATE_TERMINATED = 14;
0149:
0150: //
0151: // Instance Variables
0152: //
0153: /***/
0154: // NOTE: Used by old implementation of scanElementType method. -Ac
0155: private StringPool.CharArrayRange fCurrentElementCharArrayRange = null;
0156: /***/
0157: int fAttrListHandle = -1;
0158: XMLAttrList fAttrList = null;
0159: GrammarResolver fGrammarResolver = null;
0160: XMLDTDScanner fDTDScanner = null;
0161: boolean fNamespacesEnabled = false;
0162: boolean fValidationEnabled = false;
0163: boolean fLoadExternalDTD = true;
0164: QName fElementQName = new QName();
0165: QName fAttributeQName = new QName();
0166: QName fCurrentElementQName = new QName();
0167: ScannerDispatcher fDispatcher = null;
0168: EventHandler fEventHandler = null;
0169: XMLDocumentHandler.DTDHandler fDTDHandler = null;
0170: StringPool fStringPool = null;
0171: XMLErrorReporter fErrorReporter = null;
0172: XMLEntityHandler fEntityHandler = null;
0173: XMLEntityHandler.EntityReader fEntityReader = null;
0174: XMLEntityHandler.CharBuffer fLiteralData = null;
0175: boolean fSeenRootElement = false;
0176: boolean fSeenDoctypeDecl = false;
0177: boolean fStandalone = false;
0178: boolean fParseTextDecl = false;
0179: boolean fScanningDTD = false;
0180: int fScannerState = SCANNER_STATE_XML_DECL;
0181: int fReaderId = -1;
0182: int fAttValueReader = -1;
0183: int fAttValueElementType = -1;
0184: int fAttValueAttrName = -1;
0185: int fAttValueOffset = -1;
0186: int fAttValueMark = -1;
0187: int fScannerMarkupDepth = 0;
0188:
0189: //
0190: // Interfaces
0191: //
0192:
0193: /**
0194: * This interface must be implemented by the users of the XMLDocumentScanner class.
0195: * These methods form the abstraction between the implementation semantics and the
0196: * more generic task of scanning the XML non-DTD grammar.
0197: */
0198: public interface EventHandler {
0199: /**
0200: * Signal standalone = "yes"
0201: *
0202: * @exception java.lang.Exception
0203: */
0204: public void callStandaloneIsYes() throws Exception;
0205:
0206: /**
0207: * Signal the start of a document
0208: *
0209: * @exception java.lang.Exception
0210: */
0211: public void callStartDocument() throws Exception;
0212:
0213: /**
0214: * Signal the end of a document
0215: *
0216: * @exception java.lang.Exception
0217: */
0218: public void callEndDocument() throws Exception;
0219:
0220: /**
0221: * Signal the XML declaration of a document
0222: *
0223: * @param version the handle in the string pool for the version number
0224: * @param encoding the handle in the string pool for the encoding
0225: * @param standalong the handle in the string pool for the standalone value
0226: * @exception java.lang.Exception
0227: */
0228: public void callXMLDecl(int version, int encoding,
0229: int standalone) throws Exception;
0230:
0231: /**
0232: * Signal the Text declaration of an external entity.
0233: *
0234: * @param version the handle in the string pool for the version number
0235: * @param encoding the handle in the string pool for the encoding
0236: * @exception java.lang.Exception
0237: */
0238: public void callTextDecl(int version, int encoding)
0239: throws Exception;
0240:
0241: /**
0242: * signal the scanning of a start element tag
0243: *
0244: * @param element Element name scanned.
0245: * @exception java.lang.Exception
0246: */
0247: public void callStartElement(QName element) throws Exception;
0248:
0249: /**
0250: * Signal the scanning of an element name in a start element tag.
0251: *
0252: * @param element Element name scanned.
0253: */
0254: public void element(QName element) throws Exception;
0255:
0256: /**
0257: * Signal the scanning of an attribute associated to the previous
0258: * start element tag.
0259: *
0260: * @param element Element name scanned.
0261: * @param attrName Attribute name scanned.
0262: * @param attrValue The string pool index of the attribute value.
0263: */
0264: public boolean attribute(QName element, QName attrName,
0265: int attrValue) throws Exception;
0266:
0267: /**
0268: * signal the scanning of an end element tag
0269: *
0270: * @param readerId the Id of the reader being used to scan the end tag.
0271: * @exception java.lang.Exception
0272: */
0273: public void callEndElement(int readerId) throws Exception;
0274:
0275: /**
0276: * Signal the start of a CDATA section
0277: * @exception java.lang.Exception
0278: */
0279: public void callStartCDATA() throws Exception;
0280:
0281: /**
0282: * Signal the end of a CDATA section
0283: * @exception java.lang.Exception
0284: */
0285: public void callEndCDATA() throws Exception;
0286:
0287: /**
0288: * Report the scanning of character data
0289: *
0290: * @param ch the handle in the string pool of the character data that was scanned
0291: * @exception java.lang.Exception
0292: */
0293: public void callCharacters(int ch) throws Exception;
0294:
0295: /**
0296: * Report the scanning of a processing instruction
0297: *
0298: * @param piTarget the handle in the string pool of the processing instruction targe
0299: * @param piData the handle in the string pool of the processing instruction data
0300: * @exception java.lang.Exception
0301: */
0302: public void callProcessingInstruction(int piTarget, int piData)
0303: throws Exception;
0304:
0305: /**
0306: * Report the scanning of a comment
0307: *
0308: * @param data the handle in the string pool of the comment text
0309: * @exception java.lang.Exception
0310: */
0311: public void callComment(int data) throws Exception;
0312: }
0313:
0314: /**
0315: * Constructor
0316: */
0317: public XMLDocumentScanner(StringPool stringPool,
0318: XMLErrorReporter errorReporter,
0319: XMLEntityHandler entityHandler,
0320: XMLEntityHandler.CharBuffer literalData) {
0321: fStringPool = stringPool;
0322: fErrorReporter = errorReporter;
0323: fEntityHandler = entityHandler;
0324: fLiteralData = literalData;
0325: fDispatcher = new XMLDeclDispatcher();
0326: fAttrList = new XMLAttrList(fStringPool);
0327: }
0328:
0329: /**
0330: * Set the event handler
0331: *
0332: * @param eventHandler The place to send our callbacks.
0333: */
0334: public void setEventHandler(
0335: XMLDocumentScanner.EventHandler eventHandler) {
0336: fEventHandler = eventHandler;
0337: }
0338:
0339: /** Set the DTD handler. */
0340: public void setDTDHandler(XMLDocumentHandler.DTDHandler dtdHandler) {
0341: fDTDHandler = dtdHandler;
0342: }
0343:
0344: /** Sets the grammar resolver. */
0345: public void setGrammarResolver(GrammarResolver resolver) {
0346: fGrammarResolver = resolver;
0347: }
0348:
0349: /**
0350: * reset the parser so that the instance can be reused
0351: *
0352: * @param stringPool the string pool instance to be used by the reset parser
0353: */
0354: public void reset(StringPool stringPool,
0355: XMLEntityHandler.CharBuffer literalData) {
0356: fStringPool = stringPool;
0357: fLiteralData = literalData;
0358: fParseTextDecl = false;
0359: fSeenRootElement = false;
0360: fSeenDoctypeDecl = false;
0361: fStandalone = false;
0362: fScanningDTD = false;
0363: fDispatcher = new XMLDeclDispatcher();
0364: fScannerState = SCANNER_STATE_XML_DECL;
0365: fScannerMarkupDepth = 0;
0366: fAttrList = new XMLAttrList(fStringPool);
0367: }
0368:
0369: //
0370: // From the standard:
0371: //
0372: // [1] document ::= prolog element Misc*
0373: //
0374: // [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
0375: // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
0376: // [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
0377: //
0378: // The beginning of XMLDecl simplifies to:
0379: // '<?xml' S ...
0380: //
0381: // [27] Misc ::= Comment | PI | S
0382: // [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
0383: // [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
0384: // [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
0385: //
0386: // [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
0387: // ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
0388: //
0389: /**
0390: * Entry point for parsing
0391: *
0392: * @param doItAll if true the entire document is parsed otherwise just
0393: * the next segment of the document is parsed
0394: */
0395: public boolean parseSome(boolean doItAll) throws Exception {
0396: do {
0397: if (!fDispatcher.dispatch(doItAll))
0398: return false;
0399: } while (doItAll);
0400: return true;
0401: }
0402:
0403: /**
0404: * Change readers
0405: *
0406: * @param nextReader the new reader that the scanner will use
0407: * @param nextReaderId id of the reader to change to
0408: * @exception throws java.lang.Exception
0409: */
0410: public void readerChange(XMLEntityHandler.EntityReader nextReader,
0411: int nextReaderId) throws Exception {
0412: fEntityReader = nextReader;
0413: fReaderId = nextReaderId;
0414: if (fScannerState == SCANNER_STATE_ATTRIBUTE_VALUE) {
0415: fAttValueOffset = fEntityReader.currentOffset();
0416: fAttValueMark = fAttValueOffset;
0417: }
0418:
0419: //also propagate the change to DTDScanner if there is one
0420: if (fDTDScanner != null && fScanningDTD)
0421: fDTDScanner.readerChange(nextReader, nextReaderId);
0422: }
0423:
0424: /**
0425: * Handle the end of input
0426: *
0427: * @param entityName the handle in the string pool of the name of the entity which has reached end of input
0428: * @param moreToFollow if true, there is still input left to process in other readers
0429: * @exception java.lang.Exception
0430: */
0431: public void endOfInput(int entityName, boolean moreToFollow)
0432: throws Exception {
0433: if (fDTDScanner != null && fScanningDTD) {
0434: fDTDScanner.endOfInput(entityName, moreToFollow);
0435: }
0436: fDispatcher.endOfInput(entityName, moreToFollow);
0437: }
0438:
0439: /**
0440: * Tell if scanner has reached end of input
0441: * @return true if scanner has reached end of input.
0442: */
0443: public boolean atEndOfInput() {
0444: return fScannerState == SCANNER_STATE_END_OF_INPUT;
0445: }
0446:
0447: //
0448: // [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
0449: //
0450: /**
0451: * Scan an attribute value
0452: *
0453: * @param elementType handle to the element whose attribute value is being scanned
0454: * @param attrName handle in the string pool of the name of attribute being scanned
0455: * @param asSymbol controls whether the value is a string (duplicates allowed) or a symbol (duplicates not allowed)
0456: * @return handle in the string pool of the scanned value
0457: * @exception java.lang.Exception
0458: */
0459: public int scanAttValue(QName element, QName attribute,
0460: boolean asSymbol) throws Exception {
0461: boolean single;
0462: if (!(single = fEntityReader.lookingAtChar('\'', true))
0463: && !fEntityReader.lookingAtChar('\"', true)) {
0464: reportFatalXMLError(
0465: XMLMessages.MSG_QUOTE_REQUIRED_IN_ATTVALUE,
0466: XMLMessages.P10_QUOTE_REQUIRED, element.rawname,
0467: attribute.rawname);
0468: return -1;
0469: }
0470: char qchar = single ? '\'' : '\"';
0471: fAttValueMark = fEntityReader.currentOffset();
0472: int attValue = fEntityReader.scanAttValue(qchar, asSymbol);
0473: if (attValue >= 0)
0474: return attValue;
0475: int previousState = setScannerState(SCANNER_STATE_ATTRIBUTE_VALUE);
0476: fAttValueReader = fReaderId;
0477: // REVISIT: What should this be?
0478: fAttValueElementType = element.rawname;
0479: // REVISIT: What should this be?
0480: fAttValueAttrName = attribute.rawname;
0481: fAttValueOffset = fEntityReader.currentOffset();
0482: int dataOffset = fLiteralData.length();
0483: if (fAttValueOffset - fAttValueMark > 0)
0484: fEntityReader.append(fLiteralData, fAttValueMark,
0485: fAttValueOffset - fAttValueMark);
0486: fAttValueMark = fAttValueOffset;
0487: boolean setMark = false;
0488: boolean skippedCR;
0489: while (true) {
0490: if (fEntityReader.lookingAtChar(qchar, true)) {
0491: if (fReaderId == fAttValueReader)
0492: break;
0493: } else if (fEntityReader.lookingAtChar(' ', true)) {
0494: //
0495: // no action required
0496: //
0497: } else if ((skippedCR = fEntityReader.lookingAtChar(
0498: (char) 0x0D, true))
0499: || fEntityReader.lookingAtSpace(true)) {
0500: if (fAttValueOffset - fAttValueMark > 0)
0501: fEntityReader.append(fLiteralData, fAttValueMark,
0502: fAttValueOffset - fAttValueMark);
0503: setMark = true;
0504: fLiteralData.append(' ');
0505: if (skippedCR) {
0506: //
0507: // REVISIT - HACK !!! code changed to pass incorrect OASIS test 'valid-sa-110'
0508: // Uncomment the next line to conform to the spec...
0509: //
0510: //fEntityReader.lookingAtChar((char)0x0A, true);
0511: }
0512: } else if (fEntityReader.lookingAtChar('&', true)) {
0513: if (fAttValueOffset - fAttValueMark > 0)
0514: fEntityReader.append(fLiteralData, fAttValueMark,
0515: fAttValueOffset - fAttValueMark);
0516: setMark = true;
0517: //
0518: // Check for character reference first.
0519: //
0520: if (fEntityReader.lookingAtChar('#', true)) {
0521: int ch = scanCharRef();
0522: if (ch != -1) {
0523: if (ch < 0x10000)
0524: fLiteralData.append((char) ch);
0525: else {
0526: fLiteralData
0527: .append((char) (((ch - 0x00010000) >> 10) + 0xd800));
0528: fLiteralData
0529: .append((char) (((ch - 0x00010000) & 0x3ff) + 0xdc00));
0530: }
0531: }
0532: } else {
0533: //
0534: // Entity reference
0535: //
0536: int nameOffset = fEntityReader.currentOffset();
0537: fEntityReader.skipPastName(';');
0538: int nameLength = fEntityReader.currentOffset()
0539: - nameOffset;
0540: if (nameLength == 0) {
0541: reportFatalXMLError(
0542: XMLMessages.MSG_NAME_REQUIRED_IN_REFERENCE,
0543: XMLMessages.P68_NAME_REQUIRED);
0544: } else if (!fEntityReader.lookingAtChar(';', true)) {
0545: reportFatalXMLError(
0546: XMLMessages.MSG_SEMICOLON_REQUIRED_IN_REFERENCE,
0547: XMLMessages.P68_SEMICOLON_REQUIRED,
0548: fEntityReader.addString(nameOffset,
0549: nameLength));
0550: } else {
0551: int entityName = fEntityReader.addSymbol(
0552: nameOffset, nameLength);
0553: fEntityHandler.startReadingFromEntity(
0554: entityName, fScannerMarkupDepth,
0555: XMLEntityHandler.ENTITYREF_IN_ATTVALUE);
0556: }
0557: }
0558: } else if (fEntityReader.lookingAtChar('<', true)) {
0559: if (fAttValueOffset - fAttValueMark > 0)
0560: fEntityReader.append(fLiteralData, fAttValueMark,
0561: fAttValueOffset - fAttValueMark);
0562: setMark = true;
0563: reportFatalXMLError(
0564: XMLMessages.MSG_LESSTHAN_IN_ATTVALUE,
0565: XMLMessages.WFC_NO_LESSTHAN_IN_ATTVALUE,
0566: element.rawname, attribute.rawname);
0567: } else if (!fEntityReader.lookingAtValidChar(true)) {
0568: if (fAttValueOffset - fAttValueMark > 0)
0569: fEntityReader.append(fLiteralData, fAttValueMark,
0570: fAttValueOffset - fAttValueMark);
0571: setMark = true;
0572: int invChar = fEntityReader.scanInvalidChar();
0573: if (fScannerState == SCANNER_STATE_END_OF_INPUT)
0574: return -1;
0575: if (invChar >= 0) {
0576: reportFatalXMLError(
0577: XMLMessages.MSG_INVALID_CHAR_IN_ATTVALUE,
0578: XMLMessages.P10_INVALID_CHARACTER,
0579: fStringPool.toString(element.rawname),
0580: fStringPool.toString(attribute.rawname),
0581: Integer.toHexString(invChar));
0582: }
0583: }
0584: fAttValueOffset = fEntityReader.currentOffset();
0585: if (setMark) {
0586: fAttValueMark = fAttValueOffset;
0587: setMark = false;
0588: }
0589: }
0590: restoreScannerState(previousState);
0591: int dataLength = fLiteralData.length() - dataOffset;
0592: if (dataLength == 0) {
0593: return fEntityReader.addString(fAttValueMark,
0594: fAttValueOffset - fAttValueMark);
0595: }
0596: if (fAttValueOffset - fAttValueMark > 0) {
0597: fEntityReader.append(fLiteralData, fAttValueMark,
0598: fAttValueOffset - fAttValueMark);
0599: dataLength = fLiteralData.length() - dataOffset;
0600: }
0601: int value = fLiteralData.addString(dataOffset, dataLength);
0602: return value;
0603: }
0604:
0605: //
0606: //
0607: //
0608: void reportFatalXMLError(int majorCode, int minorCode)
0609: throws Exception {
0610: fErrorReporter.reportError(fErrorReporter.getLocator(),
0611: XMLMessages.XML_DOMAIN, majorCode, minorCode, null,
0612: XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
0613: }
0614:
0615: void reportFatalXMLError(int majorCode, int minorCode,
0616: int stringIndex1) throws Exception {
0617: Object[] args = { fStringPool.toString(stringIndex1) };
0618: fErrorReporter.reportError(fErrorReporter.getLocator(),
0619: XMLMessages.XML_DOMAIN, majorCode, minorCode, args,
0620: XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
0621: }
0622:
0623: void reportFatalXMLError(int majorCode, int minorCode,
0624: String string1) throws Exception {
0625: Object[] args = { string1 };
0626: fErrorReporter.reportError(fErrorReporter.getLocator(),
0627: XMLMessages.XML_DOMAIN, majorCode, minorCode, args,
0628: XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
0629: }
0630:
0631: void reportFatalXMLError(int majorCode, int minorCode,
0632: int stringIndex1, int stringIndex2) throws Exception {
0633: Object[] args = { fStringPool.toString(stringIndex1),
0634: fStringPool.toString(stringIndex2) };
0635: fErrorReporter.reportError(fErrorReporter.getLocator(),
0636: XMLMessages.XML_DOMAIN, majorCode, minorCode, args,
0637: XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
0638: }
0639:
0640: void reportFatalXMLError(int majorCode, int minorCode,
0641: String string1, String string2) throws Exception {
0642: Object[] args = { string1, string2 };
0643: fErrorReporter.reportError(fErrorReporter.getLocator(),
0644: XMLMessages.XML_DOMAIN, majorCode, minorCode, args,
0645: XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
0646: }
0647:
0648: void reportFatalXMLError(int majorCode, int minorCode,
0649: String string1, String string2, String string3)
0650: throws Exception {
0651: Object[] args = { string1, string2, string3 };
0652: fErrorReporter.reportError(fErrorReporter.getLocator(),
0653: XMLMessages.XML_DOMAIN, majorCode, minorCode, args,
0654: XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
0655: }
0656:
0657: void abortMarkup(int majorCode, int minorCode) throws Exception {
0658: reportFatalXMLError(majorCode, minorCode);
0659: skipPastEndOfCurrentMarkup();
0660: }
0661:
0662: void abortMarkup(int majorCode, int minorCode, int stringIndex1)
0663: throws Exception {
0664: reportFatalXMLError(majorCode, minorCode, stringIndex1);
0665: skipPastEndOfCurrentMarkup();
0666: }
0667:
0668: void abortMarkup(int majorCode, int minorCode, String string1)
0669: throws Exception {
0670: reportFatalXMLError(majorCode, minorCode, string1);
0671: skipPastEndOfCurrentMarkup();
0672: }
0673:
0674: void abortMarkup(int majorCode, int minorCode, int stringIndex1,
0675: int stringIndex2) throws Exception {
0676: reportFatalXMLError(majorCode, minorCode, stringIndex1,
0677: stringIndex2);
0678: skipPastEndOfCurrentMarkup();
0679: }
0680:
0681: void skipPastEndOfCurrentMarkup() throws Exception {
0682: fEntityReader.skipToChar('>');
0683: if (fEntityReader.lookingAtChar('>', true))
0684: fScannerMarkupDepth--;
0685: }
0686:
0687: //
0688: //
0689: //
0690: int setScannerState(int state) {
0691: int oldState = fScannerState;
0692: fScannerState = state;
0693: return oldState;
0694: }
0695:
0696: void restoreScannerState(int state) {
0697: if (fScannerState != SCANNER_STATE_END_OF_INPUT)
0698: fScannerState = state;
0699: }
0700:
0701: //
0702: //
0703: //
0704: /**
0705: * The main loop of the scanner is implemented by calling the dispatch method
0706: * of ScannerDispatcher with a flag which tells the dispatcher whether to continue
0707: * or return. The scanner logic is split up into dispatchers for various syntatic
0708: * components of XML. //REVISIT more rationale needed
0709: */
0710: interface ScannerDispatcher {
0711: /**
0712: * scan an XML syntactic component
0713: *
0714: * @param keepgoing if true continue on to the next dispatcher, otherwise return
0715: * @return true if scanning was successful //REVISIT - does it ever return false or does it just throw?
0716: * @exception java.lang.Exception
0717: */
0718: boolean dispatch(boolean keepgoing) throws Exception;
0719:
0720: /**
0721: * endOfInput encapsulates the end of entity handling for each dispatcher
0722: *
0723: * @param entityName StringPool handle of the entity that has reached the end
0724: * @param moreToFollow true if there is more input to be read
0725: * @exception
0726: */
0727: void endOfInput(int entityName, boolean moreToFollow)
0728: throws Exception;
0729: }
0730:
0731: final class XMLDeclDispatcher implements ScannerDispatcher {
0732: public boolean dispatch(boolean keepgoing) throws Exception {
0733: fEventHandler.callStartDocument();
0734: if (fEntityReader.lookingAtChar('<', true)) {
0735: fScannerMarkupDepth++;
0736: setScannerState(SCANNER_STATE_START_OF_MARKUP);
0737: if (fEntityReader.lookingAtChar('?', true)) {
0738: int piTarget = fEntityReader.scanName(' ');
0739: if (piTarget == -1) {
0740: abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED,
0741: XMLMessages.P16_PITARGET_REQUIRED);
0742: } else if ("xml".equals(fStringPool
0743: .toString(piTarget))) {
0744: if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
0745: scanXMLDeclOrTextDecl(false);
0746: } else { // a PI target matching 'xml'
0747: abortMarkup(
0748: XMLMessages.MSG_RESERVED_PITARGET,
0749: XMLMessages.P17_RESERVED_PITARGET);
0750: }
0751: } else { // PI
0752: scanPI(piTarget);
0753: }
0754: fDispatcher = new PrologDispatcher();
0755: restoreScannerState(SCANNER_STATE_PROLOG);
0756: return true;
0757: }
0758: if (fEntityReader.lookingAtChar('!', true)) {
0759: if (fEntityReader.lookingAtChar('-', true)) { // comment ?
0760: if (fEntityReader.lookingAtChar('-', true)) {
0761: scanComment(); // scan through the closing '-->'
0762: } else {
0763: abortMarkup(
0764: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
0765: XMLMessages.P22_NOT_RECOGNIZED);
0766: }
0767: } else {
0768: if (fEntityReader.skippedString(doctype_string)) {
0769: setScannerState(SCANNER_STATE_DOCTYPE);
0770: fSeenDoctypeDecl = true;
0771: scanDoctypeDecl(fStandalone); // scan through the closing '>'
0772: fScannerMarkupDepth--;
0773: fDispatcher = new PrologDispatcher();
0774: restoreScannerState(SCANNER_STATE_PROLOG);
0775: return true;
0776: } else {
0777: abortMarkup(
0778: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
0779: XMLMessages.P22_NOT_RECOGNIZED);
0780: }
0781: }
0782: } else {
0783: fDispatcher = new ContentDispatcher();
0784: restoreScannerState(SCANNER_STATE_ROOT_ELEMENT);
0785: return true;
0786: }
0787: } else {
0788: if (fEntityReader.lookingAtSpace(true)) {
0789: fEntityReader.skipPastSpaces();
0790: } else if (!fEntityReader.lookingAtValidChar(false)) {
0791: int invChar = fEntityReader.scanInvalidChar();
0792: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
0793: if (invChar >= 0) {
0794: String arg = Integer.toHexString(invChar);
0795: reportFatalXMLError(
0796: XMLMessages.MSG_INVALID_CHAR_IN_PROLOG,
0797: XMLMessages.P22_INVALID_CHARACTER,
0798: arg);
0799: }
0800: } else {
0801: fDispatcher = new EndOfInputDispatcher();
0802: setScannerState(SCANNER_STATE_END_OF_INPUT);
0803: return true;
0804: }
0805: } else {
0806: reportFatalXMLError(
0807: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
0808: XMLMessages.P22_NOT_RECOGNIZED);
0809: fEntityReader.lookingAtValidChar(true);
0810: }
0811: }
0812: fDispatcher = new PrologDispatcher();
0813: restoreScannerState(SCANNER_STATE_PROLOG);
0814: return true;
0815: }
0816:
0817: public void endOfInput(int entityName, boolean moreToFollow)
0818: throws Exception {
0819: switch (fScannerState) {
0820: case SCANNER_STATE_XML_DECL:
0821: case SCANNER_STATE_START_OF_MARKUP:
0822: case SCANNER_STATE_DOCTYPE:
0823: break;
0824: case SCANNER_STATE_COMMENT:
0825: if (!moreToFollow) {
0826: reportFatalXMLError(
0827: XMLMessages.MSG_COMMENT_UNTERMINATED,
0828: XMLMessages.P15_UNTERMINATED);
0829: } else {
0830: reportFatalXMLError(
0831: XMLMessages.MSG_COMMENT_NOT_IN_ONE_ENTITY,
0832: XMLMessages.P78_NOT_WELLFORMED);
0833: }
0834: break;
0835: case SCANNER_STATE_PI:
0836: if (!moreToFollow) {
0837: reportFatalXMLError(
0838: XMLMessages.MSG_PI_UNTERMINATED,
0839: XMLMessages.P16_UNTERMINATED);
0840: } else {
0841: reportFatalXMLError(
0842: XMLMessages.MSG_PI_NOT_IN_ONE_ENTITY,
0843: XMLMessages.P78_NOT_WELLFORMED);
0844: }
0845: break;
0846: default:
0847: throw new RuntimeException("FWK001 1] ScannerState="
0848: + fScannerState + "\n" + "1\t" + fScannerState);
0849: }
0850: if (!moreToFollow) {
0851: reportFatalXMLError(
0852: XMLMessages.MSG_ROOT_ELEMENT_REQUIRED,
0853: XMLMessages.P1_ELEMENT_REQUIRED);
0854: fDispatcher = new EndOfInputDispatcher();
0855: setScannerState(SCANNER_STATE_END_OF_INPUT);
0856: }
0857: }
0858: }
0859:
0860: final class PrologDispatcher implements ScannerDispatcher {
0861: public boolean dispatch(boolean keepgoing) throws Exception {
0862: do {
0863: if (fEntityReader.lookingAtChar('<', true)) {
0864: fScannerMarkupDepth++;
0865: setScannerState(SCANNER_STATE_START_OF_MARKUP);
0866: if (fEntityReader.lookingAtChar('?', true)) {
0867: int piTarget = fEntityReader.scanName(' ');
0868: if (piTarget == -1) {
0869: abortMarkup(
0870: XMLMessages.MSG_PITARGET_REQUIRED,
0871: XMLMessages.P16_PITARGET_REQUIRED);
0872: } else if ("xml".equals(fStringPool
0873: .toString(piTarget))) {
0874: if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
0875: abortMarkup(
0876: XMLMessages.MSG_XMLDECL_MUST_BE_FIRST,
0877: XMLMessages.P22_XMLDECL_MUST_BE_FIRST);
0878: } else { // a PI target matching 'xml'
0879: abortMarkup(
0880: XMLMessages.MSG_RESERVED_PITARGET,
0881: XMLMessages.P17_RESERVED_PITARGET);
0882: }
0883: } else { // PI
0884: scanPI(piTarget);
0885: }
0886: } else if (fEntityReader.lookingAtChar('!', true)) {
0887: if (fEntityReader.lookingAtChar('-', true)) { // comment ?
0888: if (fEntityReader.lookingAtChar('-', true)) {
0889: scanComment(); // scan through the closing '-->'
0890: } else {
0891: abortMarkup(
0892: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
0893: XMLMessages.P22_NOT_RECOGNIZED);
0894: }
0895: } else {
0896: if (!fSeenDoctypeDecl
0897: && fEntityReader
0898: .skippedString(doctype_string)) {
0899: setScannerState(SCANNER_STATE_DOCTYPE);
0900: fSeenDoctypeDecl = true;
0901: scanDoctypeDecl(fStandalone); // scan through the closing '>'
0902: fScannerMarkupDepth--;
0903: } else {
0904: abortMarkup(
0905: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
0906: XMLMessages.P22_NOT_RECOGNIZED);
0907: }
0908: }
0909: } else {
0910: fDispatcher = new ContentDispatcher();
0911: restoreScannerState(SCANNER_STATE_ROOT_ELEMENT);
0912: return true;
0913: }
0914: restoreScannerState(SCANNER_STATE_PROLOG);
0915: } else if (fEntityReader.lookingAtSpace(true)) {
0916: fEntityReader.skipPastSpaces();
0917: } else if (!fEntityReader.lookingAtValidChar(false)) {
0918: int invChar = fEntityReader.scanInvalidChar();
0919: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
0920: if (invChar >= 0) {
0921: String arg = Integer.toHexString(invChar);
0922: reportFatalXMLError(
0923: XMLMessages.MSG_INVALID_CHAR_IN_PROLOG,
0924: XMLMessages.P22_INVALID_CHARACTER,
0925: arg);
0926: }
0927: }
0928: } else {
0929: reportFatalXMLError(
0930: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
0931: XMLMessages.P22_NOT_RECOGNIZED);
0932: fEntityReader.lookingAtValidChar(true);
0933: }
0934: } while (fScannerState != SCANNER_STATE_END_OF_INPUT
0935: && keepgoing);
0936: return true;
0937: }
0938:
0939: public void endOfInput(int entityName, boolean moreToFollow)
0940: throws Exception {
0941: switch (fScannerState) {
0942: case SCANNER_STATE_PROLOG:
0943: case SCANNER_STATE_START_OF_MARKUP:
0944: case SCANNER_STATE_DOCTYPE:
0945: break;
0946: case SCANNER_STATE_COMMENT:
0947: if (!moreToFollow) {
0948: reportFatalXMLError(
0949: XMLMessages.MSG_COMMENT_UNTERMINATED,
0950: XMLMessages.P15_UNTERMINATED);
0951: } else {
0952: reportFatalXMLError(
0953: XMLMessages.MSG_COMMENT_NOT_IN_ONE_ENTITY,
0954: XMLMessages.P78_NOT_WELLFORMED);
0955: }
0956: break;
0957: case SCANNER_STATE_PI:
0958: if (!moreToFollow) {
0959: reportFatalXMLError(
0960: XMLMessages.MSG_PI_UNTERMINATED,
0961: XMLMessages.P16_UNTERMINATED);
0962: } else {
0963: reportFatalXMLError(
0964: XMLMessages.MSG_PI_NOT_IN_ONE_ENTITY,
0965: XMLMessages.P78_NOT_WELLFORMED);
0966: }
0967: break;
0968: default:
0969: throw new RuntimeException("FWK001 2] ScannerState="
0970: + fScannerState + "\n" + "2\t" + fScannerState);
0971: }
0972: if (!moreToFollow) {
0973: reportFatalXMLError(
0974: XMLMessages.MSG_ROOT_ELEMENT_REQUIRED,
0975: XMLMessages.P1_ELEMENT_REQUIRED);
0976: fDispatcher = new EndOfInputDispatcher();
0977: setScannerState(SCANNER_STATE_END_OF_INPUT);
0978: }
0979: }
0980: }
0981:
0982: int fCurrentElementType = -1;
0983:
0984: public int getCurrentElementType() {
0985: return fCurrentElementType;
0986: }
0987:
0988: final class ContentDispatcher implements ScannerDispatcher {
0989: private int fContentReader = -1;
0990: private int fElementDepth = 0;
0991: private int[] fElementTypeStack = new int[8];
0992:
0993: void popElementType() {
0994: if (fElementDepth-- == 0) {
0995: throw new RuntimeException(
0996: "FWK002 popElementType: fElementDepth-- == 0.");
0997: }
0998: if (fElementDepth == 0) {
0999: fCurrentElementType = -1;
1000: } else {
1001: fCurrentElementType = fElementTypeStack[fElementDepth - 1];
1002: }
1003: }
1004:
1005: public boolean dispatch(boolean keepgoing) throws Exception {
1006: do {
1007: switch (fScannerState) {
1008: case SCANNER_STATE_ROOT_ELEMENT: {
1009: scanElementType(fEntityReader, '>', fElementQName);
1010: if (fElementQName.rawname != -1) {
1011: //
1012: // root element
1013: //
1014: fContentReader = fReaderId;
1015: fSeenRootElement = true;
1016: //
1017: // scan element
1018: //
1019: if (fEntityReader.lookingAtChar('>', true)) {
1020: //
1021: // we have more content
1022: //
1023: fEventHandler
1024: .callStartElement(fElementQName);
1025: fScannerMarkupDepth--;
1026: if (fElementDepth == fElementTypeStack.length) {
1027: int[] newStack = new int[fElementDepth * 2];
1028: System.arraycopy(fElementTypeStack, 0,
1029: newStack, 0, fElementDepth);
1030: fElementTypeStack = newStack;
1031: }
1032: fCurrentElementType = fElementQName.rawname;
1033: fElementTypeStack[fElementDepth] = fElementQName.rawname;
1034: fElementDepth++;
1035: restoreScannerState(SCANNER_STATE_CONTENT);
1036: } else if (scanElement(fElementQName)) {
1037: //
1038: // we have more content
1039: //
1040: if (fElementDepth == fElementTypeStack.length) {
1041: int[] newStack = new int[fElementDepth * 2];
1042: System.arraycopy(fElementTypeStack, 0,
1043: newStack, 0, fElementDepth);
1044: fElementTypeStack = newStack;
1045: }
1046: fCurrentElementType = fElementQName.rawname;
1047: fElementTypeStack[fElementDepth] = fElementQName.rawname;
1048: fElementDepth++;
1049: restoreScannerState(SCANNER_STATE_CONTENT);
1050: } else {
1051: fDispatcher = new TrailingMiscDispatcher();
1052: restoreScannerState(SCANNER_STATE_TRAILING_MISC);
1053: return true;
1054: }
1055: } else {
1056: reportFatalXMLError(
1057: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG,
1058: XMLMessages.P22_NOT_RECOGNIZED);
1059: fDispatcher = new PrologDispatcher();
1060: restoreScannerState(SCANNER_STATE_PROLOG);
1061: return true;
1062: }
1063: break;
1064: }
1065: case SCANNER_STATE_START_OF_MARKUP:
1066: if (fEntityReader.lookingAtChar('?', true)) {
1067: int piTarget = fEntityReader.scanName(' ');
1068: if (piTarget == -1) {
1069: abortMarkup(
1070: XMLMessages.MSG_PITARGET_REQUIRED,
1071: XMLMessages.P16_PITARGET_REQUIRED);
1072: } else if ("xml".equals(fStringPool
1073: .toString(piTarget))) {
1074: if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
1075: if (fParseTextDecl) {
1076: scanXMLDeclOrTextDecl(true);
1077: fParseTextDecl = false;
1078: } else {
1079: abortMarkup(
1080: XMLMessages.MSG_TEXTDECL_MUST_BE_FIRST,
1081: XMLMessages.P30_TEXTDECL_MUST_BE_FIRST);
1082: }
1083: } else { // a PI target matching 'xml'
1084: abortMarkup(
1085: XMLMessages.MSG_RESERVED_PITARGET,
1086: XMLMessages.P17_RESERVED_PITARGET);
1087: }
1088: } else { // PI
1089: scanPI(piTarget);
1090: }
1091: restoreScannerState(SCANNER_STATE_CONTENT);
1092: } else if (fEntityReader.lookingAtChar('!', true)) {
1093: if (fEntityReader.lookingAtChar('-', true)) { // comment ?
1094: if (fEntityReader.lookingAtChar('-', true)) {
1095: scanComment(); // scan through the closing '-->'
1096: } else {
1097: abortMarkup(
1098: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1099: XMLMessages.P43_NOT_RECOGNIZED);
1100: }
1101: } else {
1102: if (fEntityReader
1103: .skippedString(cdata_string)) {
1104: fEntityReader.setInCDSect(true);
1105: fEventHandler.callStartCDATA();
1106: } else {
1107: abortMarkup(
1108: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1109: XMLMessages.P43_NOT_RECOGNIZED);
1110: }
1111: }
1112: } else {
1113: if (fEntityReader.lookingAtChar('/', true)) {
1114: //
1115: // [42] ETag ::= '</' Name S? '>'
1116: //
1117: if (!scanExpectedElementType(fEntityReader,
1118: '>', fCurrentElementType)) {
1119: abortMarkup(
1120: XMLMessages.MSG_ETAG_REQUIRED,
1121: XMLMessages.P39_UNTERMINATED,
1122: fCurrentElementType);
1123: } else {
1124: if (!fEntityReader.lookingAtChar('>',
1125: true)) {
1126: fEntityReader.skipPastSpaces();
1127: if (!fEntityReader.lookingAtChar(
1128: '>', true)) {
1129: reportFatalXMLError(
1130: XMLMessages.MSG_ETAG_UNTERMINATED,
1131: XMLMessages.P42_UNTERMINATED,
1132: fCurrentElementType);
1133: }
1134: }
1135: fScannerMarkupDepth--;
1136: fEventHandler.callEndElement(fReaderId);
1137: if (fElementDepth-- == 0) {
1138: throw new RuntimeException(
1139: "FWK002 popElementType: fElementDepth-- == 0.");
1140: }
1141: if (fElementDepth == 0) {
1142: fCurrentElementType = -1;
1143: fDispatcher = new TrailingMiscDispatcher();
1144: restoreScannerState(SCANNER_STATE_TRAILING_MISC);
1145: return true;
1146: } else {
1147: fCurrentElementType = fElementTypeStack[fElementDepth - 1];
1148: }
1149: }
1150: } else {
1151: scanElementType(fEntityReader, '>',
1152: fElementQName);
1153: if (fElementQName.rawname != -1) {
1154: //
1155: // element
1156: //
1157: if (fEntityReader.lookingAtChar('>',
1158: true)) {
1159: fEventHandler
1160: .callStartElement(fElementQName);
1161: fScannerMarkupDepth--;
1162: if (fElementDepth == fElementTypeStack.length) {
1163: int[] newStack = new int[fElementDepth * 2];
1164: System.arraycopy(
1165: fElementTypeStack, 0,
1166: newStack, 0,
1167: fElementDepth);
1168: fElementTypeStack = newStack;
1169: }
1170: fCurrentElementType = fElementQName.rawname;
1171: fElementTypeStack[fElementDepth] = fElementQName.rawname;
1172: fElementDepth++;
1173: } else {
1174: if (scanElement(fElementQName)) {
1175: if (fElementDepth == fElementTypeStack.length) {
1176: int[] newStack = new int[fElementDepth * 2];
1177: System.arraycopy(
1178: fElementTypeStack,
1179: 0, newStack, 0,
1180: fElementDepth);
1181: fElementTypeStack = newStack;
1182: }
1183: fCurrentElementType = fElementQName.rawname;
1184: fElementTypeStack[fElementDepth] = fElementQName.rawname;
1185: fElementDepth++;
1186: }
1187: }
1188: } else {
1189: abortMarkup(
1190: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1191: XMLMessages.P43_NOT_RECOGNIZED);
1192: }
1193: }
1194: }
1195: restoreScannerState(SCANNER_STATE_CONTENT);
1196: break;
1197: case SCANNER_STATE_CONTENT:
1198: if (fParseTextDecl
1199: && fEntityReader.lookingAtChar('<', true)) {
1200: fScannerMarkupDepth++;
1201: setScannerState(SCANNER_STATE_START_OF_MARKUP);
1202: continue;
1203: }
1204: // REVISIT: Is this the right thing to do? Do we need to
1205: // save more information on the stack?
1206: fCurrentElementQName.setValues(-1, -1,
1207: fCurrentElementType);
1208: switch (fEntityReader
1209: .scanContent(fCurrentElementQName)) {
1210: case XMLEntityHandler.CONTENT_RESULT_START_OF_PI:
1211: fScannerMarkupDepth++;
1212: int piTarget = fEntityReader.scanName(' ');
1213: if (piTarget == -1) {
1214: abortMarkup(
1215: XMLMessages.MSG_PITARGET_REQUIRED,
1216: XMLMessages.P16_PITARGET_REQUIRED);
1217: } else if ("xml".equals(fStringPool
1218: .toString(piTarget))) {
1219: if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
1220: if (fReaderId == fContentReader) {
1221: abortMarkup(
1222: XMLMessages.MSG_XMLDECL_MUST_BE_FIRST,
1223: XMLMessages.P22_XMLDECL_MUST_BE_FIRST);
1224: } else {
1225: abortMarkup(
1226: XMLMessages.MSG_TEXTDECL_MUST_BE_FIRST,
1227: XMLMessages.P30_TEXTDECL_MUST_BE_FIRST);
1228: }
1229: } else { // a PI target matching 'xml'
1230: abortMarkup(
1231: XMLMessages.MSG_RESERVED_PITARGET,
1232: XMLMessages.P17_RESERVED_PITARGET);
1233: }
1234: } else { // PI
1235: scanPI(piTarget);
1236: }
1237: break;
1238: case XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT:
1239: fScannerMarkupDepth++;
1240: fParseTextDecl = false;
1241: scanComment(); // scan through the closing '-->'
1242: break;
1243: case XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT:
1244: fScannerMarkupDepth++;
1245: fParseTextDecl = false;
1246: fEntityReader.setInCDSect(true);
1247: fEventHandler.callStartCDATA();
1248: break;
1249: case XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG:
1250: fScannerMarkupDepth++;
1251: fParseTextDecl = false;
1252: //
1253: // [42] ETag ::= '</' Name S? '>'
1254: //
1255: if (!scanExpectedElementType(fEntityReader,
1256: '>', fCurrentElementType)) {
1257: abortMarkup(XMLMessages.MSG_ETAG_REQUIRED,
1258: XMLMessages.P39_UNTERMINATED,
1259: fCurrentElementType);
1260: } else {
1261: if (!fEntityReader.lookingAtChar('>', true)) {
1262: fEntityReader.skipPastSpaces();
1263: if (!fEntityReader.lookingAtChar('>',
1264: true)) {
1265: reportFatalXMLError(
1266: XMLMessages.MSG_ETAG_UNTERMINATED,
1267: XMLMessages.P42_UNTERMINATED,
1268: fCurrentElementType);
1269: }
1270: }
1271: fScannerMarkupDepth--;
1272: fEventHandler.callEndElement(fReaderId);
1273: if (fElementDepth-- == 0) {
1274: throw new RuntimeException(
1275: "FWK002 popElementType: fElementDepth-- == 0.");
1276: }
1277: if (fElementDepth == 0) {
1278: fCurrentElementType = -1;
1279: fDispatcher = new TrailingMiscDispatcher();
1280: restoreScannerState(SCANNER_STATE_TRAILING_MISC);
1281: return true;
1282: } else {
1283: fCurrentElementType = fElementTypeStack[fElementDepth - 1];
1284: }
1285: }
1286: restoreScannerState(SCANNER_STATE_CONTENT);
1287: break;
1288: case XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT: {
1289: fScannerMarkupDepth++;
1290: fParseTextDecl = false;
1291: scanElementType(fEntityReader, '>',
1292: fElementQName);
1293: if (fElementQName.rawname != -1) {
1294: if (fEntityReader.lookingAtChar('>', true)) {
1295: fEventHandler
1296: .callStartElement(fElementQName);
1297: fScannerMarkupDepth--;
1298: if (fElementDepth == fElementTypeStack.length) {
1299: int[] newStack = new int[fElementDepth * 2];
1300: System.arraycopy(fElementTypeStack,
1301: 0, newStack, 0,
1302: fElementDepth);
1303: fElementTypeStack = newStack;
1304: }
1305: fCurrentElementType = fElementQName.rawname;
1306: fElementTypeStack[fElementDepth] = fElementQName.rawname;
1307: fElementDepth++;
1308: } else {
1309: if (scanElement(fElementQName)) {
1310: if (fElementDepth == fElementTypeStack.length) {
1311: int[] newStack = new int[fElementDepth * 2];
1312: System.arraycopy(
1313: fElementTypeStack, 0,
1314: newStack, 0,
1315: fElementDepth);
1316: fElementTypeStack = newStack;
1317: }
1318: fCurrentElementType = fElementQName.rawname;
1319: fElementTypeStack[fElementDepth] = fElementQName.rawname;
1320: fElementDepth++;
1321: }
1322: }
1323: } else {
1324: abortMarkup(
1325: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1326: XMLMessages.P43_NOT_RECOGNIZED);
1327: }
1328: if (fScannerState != SCANNER_STATE_END_OF_INPUT)
1329: fScannerState = SCANNER_STATE_CONTENT;
1330: break;
1331: }
1332: case XMLEntityHandler.CONTENT_RESULT_MATCHING_ETAG: {
1333: fParseTextDecl = false;
1334: fEventHandler.callEndElement(fReaderId);
1335: if (fElementDepth-- == 0) {
1336: throw new RuntimeException(
1337: "FWK002 popElementType: fElementDepth-- == 0.");
1338: }
1339: if (fElementDepth == 0) {
1340: fCurrentElementType = -1;
1341: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1342: fDispatcher = new TrailingMiscDispatcher();
1343: fScannerState = SCANNER_STATE_TRAILING_MISC;
1344: }
1345: return true;
1346: } else {
1347: fCurrentElementType = fElementTypeStack[fElementDepth - 1];
1348: }
1349: if (fScannerState != SCANNER_STATE_END_OF_INPUT)
1350: fScannerState = SCANNER_STATE_CONTENT;
1351: break;
1352: }
1353: case XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF:
1354: fParseTextDecl = false;
1355: //
1356: // [67] Reference ::= EntityRef | CharRef
1357: // [68] EntityRef ::= '&' Name ';'
1358: // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1359: //
1360: setScannerState(SCANNER_STATE_REFERENCE);
1361: int num = scanCharRef();
1362: // if (num == -1) num = 0xfffd; // REVISIT - alternative is to use Unicode replacement char
1363: if (num != -1)
1364: fEventHandler.callCharacters(num);
1365: restoreScannerState(SCANNER_STATE_CONTENT);
1366: break;
1367: case XMLEntityHandler.CONTENT_RESULT_REFERENCE_END_OF_INPUT:
1368: // REVISIT - This should hopefully get us the "reference not
1369: // contained in one entity" error when endOfInput is called.
1370: // Test that this is so...
1371: //
1372: // fall through...
1373: //
1374: case XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF:
1375: fParseTextDecl = false;
1376: //
1377: // [68] EntityRef ::= '&' Name ';'
1378: //
1379: setScannerState(SCANNER_STATE_REFERENCE);
1380: int nameOffset = fEntityReader.currentOffset();
1381: fEntityReader.skipPastName(';');
1382: int nameLength = fEntityReader.currentOffset()
1383: - nameOffset;
1384: if (nameLength == 0) {
1385: reportFatalXMLError(
1386: XMLMessages.MSG_NAME_REQUIRED_IN_REFERENCE,
1387: XMLMessages.P68_NAME_REQUIRED);
1388: restoreScannerState(SCANNER_STATE_CONTENT);
1389: } else if (!fEntityReader.lookingAtChar(';',
1390: true)) {
1391: reportFatalXMLError(
1392: XMLMessages.MSG_SEMICOLON_REQUIRED_IN_REFERENCE,
1393: XMLMessages.P68_SEMICOLON_REQUIRED,
1394: fEntityReader.addString(nameOffset,
1395: nameLength));
1396: restoreScannerState(SCANNER_STATE_CONTENT);
1397: } else {
1398: restoreScannerState(SCANNER_STATE_CONTENT);
1399: int entityName = fEntityReader.addSymbol(
1400: nameOffset, nameLength);
1401: fParseTextDecl = fEntityHandler
1402: .startReadingFromEntity(
1403: entityName,
1404: fElementDepth,
1405: XMLEntityHandler.ENTITYREF_IN_CONTENT);
1406: }
1407: break;
1408: case XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT:
1409: fParseTextDecl = false;
1410: //
1411: // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1412: // [21] CDEnd ::= ']]>'
1413: //
1414: if (fEntityReader.getInCDSect()) {
1415: fEntityReader.setInCDSect(false);
1416: fEventHandler.callEndCDATA();
1417: fScannerMarkupDepth--;
1418: } else {
1419: reportFatalXMLError(
1420: XMLMessages.MSG_CDEND_IN_CONTENT,
1421: XMLMessages.P14_INVALID);
1422: }
1423: restoreScannerState(SCANNER_STATE_CONTENT);
1424: break;
1425: case XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR:
1426: fParseTextDecl = false;
1427: //
1428: // The reader will also use this state if it
1429: // encounters the end of input while reading
1430: // content. We need to check for this case.
1431: //
1432: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1433: if (!fEntityReader
1434: .lookingAtValidChar(false)) {
1435: //
1436: // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] // any Unicode character, excluding the
1437: // | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // surrogate blocks, FFFE, and FFFF.
1438: //
1439: int invChar = fEntityReader
1440: .scanInvalidChar();
1441: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1442: if (invChar >= 0) {
1443: if (fEntityReader.getInCDSect()) {
1444: reportFatalXMLError(
1445: XMLMessages.MSG_INVALID_CHAR_IN_CDSECT,
1446: XMLMessages.P20_INVALID_CHARACTER,
1447: Integer
1448: .toHexString(invChar));
1449: } else {
1450: reportFatalXMLError(
1451: XMLMessages.MSG_INVALID_CHAR_IN_CONTENT,
1452: XMLMessages.P43_INVALID_CHARACTER,
1453: Integer
1454: .toHexString(invChar));
1455: }
1456: }
1457: }
1458: }
1459: restoreScannerState(SCANNER_STATE_CONTENT);
1460: }
1461: break;
1462: case XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED:
1463: fParseTextDecl = false;
1464: abortMarkup(
1465: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_CONTENT,
1466: XMLMessages.P43_NOT_RECOGNIZED);
1467: break;
1468: case XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT:
1469: // REVISIT - This should hopefully get us the "markup not
1470: // contained in one entity" error when endOfInput is called.
1471: // Test that this is so...
1472: fScannerMarkupDepth++;
1473: fParseTextDecl = false;
1474: fScannerState = SCANNER_STATE_START_OF_MARKUP;
1475: break;
1476: default:
1477: throw new RuntimeException(
1478: "FWK001 3] ScannerState="
1479: + fScannerState + "\n" + "3\t"
1480: + fScannerState); // should not happen
1481: }
1482: break;
1483: default:
1484: throw new RuntimeException(
1485: "FWK001 4] ScannerState=" + fScannerState
1486: + "\n" + "4\t" + fScannerState);
1487: }
1488: } while (fScannerState != SCANNER_STATE_END_OF_INPUT
1489: && keepgoing);
1490: return true;
1491: }
1492:
1493: public void endOfInput(int entityName, boolean moreToFollow)
1494: throws Exception {
1495: switch (fScannerState) {
1496: case SCANNER_STATE_ROOT_ELEMENT:
1497: case SCANNER_STATE_START_OF_MARKUP:
1498: break;
1499: case SCANNER_STATE_CONTENT:
1500: if (fEntityReader.getInCDSect()) {
1501: reportFatalXMLError(
1502: XMLMessages.MSG_CDSECT_UNTERMINATED,
1503: XMLMessages.P18_UNTERMINATED);
1504: }
1505: break;
1506: case SCANNER_STATE_ATTRIBUTE_LIST:
1507: if (!moreToFollow) {
1508: // REVISIT reportFatalXMLError(XMLMessages.MSG_TAG1);
1509: } else {
1510: // REVISIT reportFatalXMLError(XMLMessages.MSG_TAG1);
1511: }
1512: break;
1513: case SCANNER_STATE_ATTRIBUTE_NAME:
1514: if (!moreToFollow) {
1515: // REVISIT reportFatalXMLError(XMLMessages.MSG_ATTVAL0);
1516: } else {
1517: // REVISIT reportFatalXMLError(XMLMessages.MSG_ATTVAL0);
1518: }
1519: break;
1520: case SCANNER_STATE_ATTRIBUTE_VALUE:
1521: if (!moreToFollow) {
1522: reportFatalXMLError(
1523: XMLMessages.MSG_ATTRIBUTE_VALUE_UNTERMINATED,
1524: XMLMessages.P10_UNTERMINATED,
1525: fAttValueElementType, fAttValueAttrName);
1526: } else if (fReaderId == fAttValueReader) {
1527: // REVISIT reportFatalXMLError(XMLMessages.MSG_ATTVAL0);
1528: } else {
1529: fEntityReader.append(fLiteralData, fAttValueMark,
1530: fAttValueOffset - fAttValueMark);
1531: }
1532: break;
1533: case SCANNER_STATE_COMMENT:
1534: if (!moreToFollow) {
1535: reportFatalXMLError(
1536: XMLMessages.MSG_COMMENT_UNTERMINATED,
1537: XMLMessages.P15_UNTERMINATED);
1538: } else {
1539: reportFatalXMLError(
1540: XMLMessages.MSG_COMMENT_NOT_IN_ONE_ENTITY,
1541: XMLMessages.P78_NOT_WELLFORMED);
1542: }
1543: break;
1544: case SCANNER_STATE_PI:
1545: if (!moreToFollow) {
1546: reportFatalXMLError(
1547: XMLMessages.MSG_PI_UNTERMINATED,
1548: XMLMessages.P16_UNTERMINATED);
1549: } else {
1550: reportFatalXMLError(
1551: XMLMessages.MSG_PI_NOT_IN_ONE_ENTITY,
1552: XMLMessages.P78_NOT_WELLFORMED);
1553: }
1554: break;
1555: case SCANNER_STATE_REFERENCE:
1556: if (!moreToFollow) {
1557: reportFatalXMLError(
1558: XMLMessages.MSG_REFERENCE_UNTERMINATED,
1559: XMLMessages.P67_UNTERMINATED);
1560: } else {
1561: reportFatalXMLError(
1562: XMLMessages.MSG_REFERENCE_NOT_IN_ONE_ENTITY,
1563: XMLMessages.P78_NOT_WELLFORMED);
1564: }
1565: break;
1566: default:
1567: throw new RuntimeException("FWK001 5] ScannerState="
1568: + fScannerState + "\n" + "5\t" + fScannerState);
1569: }
1570: if (!moreToFollow) {
1571: if (fElementDepth > 0) {
1572: reportFatalXMLError(XMLMessages.MSG_ETAG_REQUIRED,
1573: XMLMessages.P39_UNTERMINATED,
1574: fCurrentElementType);
1575: } else {
1576: reportFatalXMLError(
1577: XMLMessages.MSG_ROOT_ELEMENT_REQUIRED,
1578: XMLMessages.P1_ELEMENT_REQUIRED, null);
1579: }
1580: fDispatcher = new EndOfInputDispatcher();
1581: setScannerState(SCANNER_STATE_END_OF_INPUT);
1582: }
1583: }
1584: }
1585:
1586: final class TrailingMiscDispatcher implements ScannerDispatcher {
1587: public boolean dispatch(boolean keepgoing) throws Exception {
1588: do {
1589: if (fEntityReader.lookingAtChar('<', true)) {
1590: fScannerMarkupDepth++;
1591: setScannerState(SCANNER_STATE_START_OF_MARKUP);
1592: if (fEntityReader.lookingAtChar('?', true)) {
1593: int piTarget = fEntityReader.scanName(' ');
1594: if (piTarget == -1) {
1595: abortMarkup(
1596: XMLMessages.MSG_PITARGET_REQUIRED,
1597: XMLMessages.P16_PITARGET_REQUIRED);
1598: } else if ("xml".equals(fStringPool
1599: .toString(piTarget))) {
1600: if (fEntityReader.lookingAtSpace(true)) { // an XMLDecl looks like a PI with the target 'xml'
1601: abortMarkup(
1602: XMLMessages.MSG_XMLDECL_MUST_BE_FIRST,
1603: XMLMessages.P22_XMLDECL_MUST_BE_FIRST);
1604: } else { // a PI target matching 'xml'
1605: abortMarkup(
1606: XMLMessages.MSG_RESERVED_PITARGET,
1607: XMLMessages.P17_RESERVED_PITARGET);
1608: }
1609: } else { // PI
1610: scanPI(piTarget);
1611: }
1612: } else if (fEntityReader.lookingAtChar('!', true)) {
1613: if (fEntityReader.lookingAtChar('-', true)
1614: && fEntityReader.lookingAtChar('-',
1615: true)) { // comment ?
1616: scanComment(); // scan through the closing '-->'
1617: } else {
1618: abortMarkup(
1619: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_MISC,
1620: XMLMessages.P27_NOT_RECOGNIZED);
1621: }
1622: } else {
1623: abortMarkup(
1624: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_MISC,
1625: XMLMessages.P27_NOT_RECOGNIZED);
1626: }
1627: restoreScannerState(SCANNER_STATE_TRAILING_MISC);
1628: } else if (fEntityReader.lookingAtSpace(true)) {
1629: fEntityReader.skipPastSpaces();
1630: } else if (!fEntityReader.lookingAtValidChar(false)) {
1631: int invChar = fEntityReader.scanInvalidChar();
1632: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1633: if (invChar >= 0) {
1634: String arg = Integer.toHexString(invChar);
1635: reportFatalXMLError(
1636: XMLMessages.MSG_INVALID_CHAR_IN_MISC,
1637: XMLMessages.P27_INVALID_CHARACTER,
1638: arg);
1639: }
1640: }
1641: } else {
1642: reportFatalXMLError(
1643: XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_MISC,
1644: XMLMessages.P27_NOT_RECOGNIZED);
1645: fEntityReader.lookingAtValidChar(true);
1646: }
1647: } while (fScannerState != SCANNER_STATE_END_OF_INPUT
1648: && keepgoing);
1649: return true;
1650: }
1651:
1652: public void endOfInput(int entityName, boolean moreToFollow)
1653: throws Exception {
1654: if (moreToFollow)
1655: throw new RuntimeException(
1656: "FWK003 TrailingMiscDispatcher.endOfInput moreToFollow");
1657: switch (fScannerState) {
1658: case SCANNER_STATE_TRAILING_MISC:
1659: case SCANNER_STATE_START_OF_MARKUP:
1660: break;
1661: case SCANNER_STATE_COMMENT:
1662: reportFatalXMLError(
1663: XMLMessages.MSG_COMMENT_UNTERMINATED,
1664: XMLMessages.P15_UNTERMINATED);
1665: break;
1666: case SCANNER_STATE_PI:
1667: reportFatalXMLError(XMLMessages.MSG_PI_UNTERMINATED,
1668: XMLMessages.P16_UNTERMINATED);
1669: break;
1670: default:
1671: throw new RuntimeException("FWK001 6] ScannerState="
1672: + fScannerState + "\n" + "6\t" + fScannerState);
1673: }
1674: fDispatcher = new EndOfInputDispatcher();
1675: setScannerState(SCANNER_STATE_END_OF_INPUT);
1676: }
1677: }
1678:
1679: final class EndOfInputDispatcher implements ScannerDispatcher {
1680: public boolean dispatch(boolean keepgoing) throws Exception {
1681: if (fScannerState != SCANNER_STATE_TERMINATED)
1682: fEventHandler.callEndDocument();
1683: setScannerState(SCANNER_STATE_TERMINATED);
1684: return false;
1685: }
1686:
1687: public void endOfInput(int entityName, boolean moreToFollow)
1688: throws Exception {
1689: throw new RuntimeException("FWK001 7] ScannerState="
1690: + fScannerState + "\n" + "7\t" + fScannerState);
1691: }
1692: }
1693:
1694: //
1695: // From the standard:
1696: //
1697: // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1698: // [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1699: // [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
1700: // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1701: // [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1702: // | ('"' ('yes' | 'no') '"'))
1703: //
1704: // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1705: //
1706: void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
1707: throws Exception {
1708: int version = -1;
1709: int encoding = -1;
1710: int standalone = -1;
1711: final int XMLDECL_START = 0;
1712: final int XMLDECL_VERSION = 1;
1713: final int XMLDECL_ENCODING = 2;
1714: final int XMLDECL_STANDALONE = 3;
1715: final int XMLDECL_FINISHED = 4;
1716: int state = XMLDECL_START;
1717: do {
1718: fEntityReader.skipPastSpaces();
1719: int offset = fEntityReader.currentOffset();
1720: if (scanningTextDecl) {
1721: if (state == XMLDECL_START
1722: && fEntityReader.skippedString(version_string)) {
1723: state = XMLDECL_VERSION;
1724: } else if (fEntityReader.skippedString(encoding_string)) {
1725: state = XMLDECL_ENCODING;
1726: } else {
1727: abortMarkup(XMLMessages.MSG_ENCODINGDECL_REQUIRED,
1728: XMLMessages.P77_ENCODINGDECL_REQUIRED);
1729: return;
1730: }
1731: } else {
1732: if (state == XMLDECL_START) {
1733: if (!fEntityReader.skippedString(version_string)) {
1734: abortMarkup(
1735: XMLMessages.MSG_VERSIONINFO_REQUIRED,
1736: XMLMessages.P23_VERSIONINFO_REQUIRED);
1737: return;
1738: }
1739: state = XMLDECL_VERSION;
1740: } else {
1741: if (state == XMLDECL_VERSION) {
1742: if (fEntityReader
1743: .skippedString(encoding_string))
1744: state = XMLDECL_ENCODING;
1745: else
1746: state = XMLDECL_STANDALONE;
1747: } else
1748: state = XMLDECL_STANDALONE;
1749: if (state == XMLDECL_STANDALONE
1750: && !fEntityReader
1751: .skippedString(standalone_string))
1752: break;
1753: }
1754: }
1755: int length = fEntityReader.currentOffset() - offset;
1756: fEntityReader.skipPastSpaces();
1757: if (!fEntityReader.lookingAtChar('=', true)) {
1758: int majorCode = scanningTextDecl ? XMLMessages.MSG_EQ_REQUIRED_IN_TEXTDECL
1759: : XMLMessages.MSG_EQ_REQUIRED_IN_XMLDECL;
1760: int minorCode = state == XMLDECL_VERSION ? XMLMessages.P24_EQ_REQUIRED
1761: : (state == XMLDECL_ENCODING ? XMLMessages.P80_EQ_REQUIRED
1762: : XMLMessages.P32_EQ_REQUIRED);
1763: abortMarkup(majorCode, minorCode, fEntityReader
1764: .addString(offset, length));
1765: return;
1766: }
1767: fEntityReader.skipPastSpaces();
1768: int result = fEntityReader.scanStringLiteral();
1769: switch (result) {
1770: case XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED: {
1771: int majorCode = scanningTextDecl ? XMLMessages.MSG_QUOTE_REQUIRED_IN_TEXTDECL
1772: : XMLMessages.MSG_QUOTE_REQUIRED_IN_XMLDECL;
1773: int minorCode = state == XMLDECL_VERSION ? XMLMessages.P24_QUOTE_REQUIRED
1774: : (state == XMLDECL_ENCODING ? XMLMessages.P80_QUOTE_REQUIRED
1775: : XMLMessages.P32_QUOTE_REQUIRED);
1776: abortMarkup(majorCode, minorCode, fEntityReader
1777: .addString(offset, length));
1778: return;
1779: }
1780: case XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR:
1781: int invChar = fEntityReader.scanInvalidChar();
1782: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1783: if (invChar >= 0) {
1784: int majorCode = scanningTextDecl ? XMLMessages.MSG_INVALID_CHAR_IN_TEXTDECL
1785: : XMLMessages.MSG_INVALID_CHAR_IN_XMLDECL;
1786: int minorCode = state == XMLDECL_VERSION ? XMLMessages.P26_INVALID_CHARACTER
1787: : (state == XMLDECL_ENCODING ? XMLMessages.P81_INVALID_CHARACTER
1788: : XMLMessages.P32_INVALID_CHARACTER);
1789: reportFatalXMLError(majorCode, minorCode,
1790: Integer.toHexString(invChar));
1791: }
1792: skipPastEndOfCurrentMarkup();
1793: }
1794: return;
1795: default:
1796: break;
1797: }
1798: switch (state) {
1799: case XMLDECL_VERSION:
1800: //
1801: // version="..."
1802: //
1803: version = result;
1804: String versionString = fStringPool.toString(version);
1805: if (!"1.0".equals(versionString)) {
1806: if (!validVersionNum(versionString)) {
1807: abortMarkup(
1808: XMLMessages.MSG_VERSIONINFO_INVALID,
1809: XMLMessages.P26_INVALID_VALUE,
1810: versionString);
1811: return;
1812: }
1813: // NOTE: RECOVERABLE ERROR
1814: Object[] args = { versionString };
1815: fErrorReporter
1816: .reportError(
1817: fErrorReporter.getLocator(),
1818: XMLMessages.XML_DOMAIN,
1819: XMLMessages.MSG_VERSION_NOT_SUPPORTED,
1820: XMLMessages.P26_NOT_SUPPORTED,
1821: args,
1822: XMLErrorReporter.ERRORTYPE_RECOVERABLE_ERROR);
1823: // REVISIT - hope it is compatible...
1824: // skipPastEndOfCurrentMarkup();
1825: // return;
1826: }
1827: if (!fEntityReader.lookingAtSpace(true)) {
1828: if (scanningTextDecl) {
1829: abortMarkup(
1830: XMLMessages.MSG_SPACE_REQUIRED_IN_TEXTDECL,
1831: XMLMessages.P80_WHITESPACE_REQUIRED);
1832: return;
1833: }
1834: state = XMLDECL_FINISHED;
1835: }
1836: break;
1837: case XMLDECL_ENCODING:
1838: //
1839: // encoding = "..."
1840: //
1841: encoding = result;
1842: String encodingString = fStringPool.toString(encoding);
1843: if (!validEncName(encodingString)) {
1844: abortMarkup(XMLMessages.MSG_ENCODINGDECL_INVALID,
1845: XMLMessages.P81_INVALID_VALUE,
1846: encodingString);
1847: return;
1848: }
1849: if (!fEntityReader.lookingAtSpace(true)) {
1850: state = XMLDECL_FINISHED;
1851: } else if (scanningTextDecl) {
1852: fEntityReader.skipPastSpaces();
1853: state = XMLDECL_FINISHED;
1854: }
1855: break;
1856: case XMLDECL_STANDALONE:
1857: //
1858: // standalone="..."
1859: //
1860: standalone = result;
1861: String standaloneString = fStringPool
1862: .toString(standalone);
1863: boolean yes = "yes".equals(standaloneString);
1864: if (!yes && !"no".equals(standaloneString)) {
1865: abortMarkup(XMLMessages.MSG_SDDECL_INVALID,
1866: XMLMessages.P32_INVALID_VALUE,
1867: standaloneString);
1868: return;
1869: }
1870: fStandalone = yes;
1871: fEntityReader.skipPastSpaces();
1872: state = XMLDECL_FINISHED;
1873: break;
1874: }
1875: } while (state != XMLDECL_FINISHED);
1876: if (!fEntityReader.lookingAtChar('?', true)
1877: || !fEntityReader.lookingAtChar('>', true)) {
1878: int majorCode, minorCode;
1879: if (scanningTextDecl) {
1880: majorCode = XMLMessages.MSG_TEXTDECL_UNTERMINATED;
1881: minorCode = XMLMessages.P77_UNTERMINATED;
1882: } else {
1883: majorCode = XMLMessages.MSG_XMLDECL_UNTERMINATED;
1884: minorCode = XMLMessages.P23_UNTERMINATED;
1885: }
1886: abortMarkup(majorCode, minorCode);
1887: return;
1888: }
1889: fScannerMarkupDepth--;
1890: if (scanningTextDecl) {
1891: fEventHandler.callTextDecl(version, encoding);
1892: } else {
1893: //
1894: // Now that we have hit '?>' we are done with XML decl. Call the
1895: // handler before returning.
1896: //
1897: fEventHandler.callXMLDecl(version, encoding, standalone);
1898: // if we see standalone = 'yes', call the eventHandler - XMLValidator
1899: if (fStandalone) {
1900: fEventHandler.callStandaloneIsYes();
1901: }
1902: }
1903: }
1904:
1905: //
1906: // From the standard:
1907: //
1908: // [39] element ::= EmptyElemTag | STag content ETag
1909: // [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1910: // [40] STag ::= '<' Name (S Attribute)* S? '>'
1911: // [41] Attribute ::= Name Eq AttValue
1912: // [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1913: // [67] Reference ::= EntityRef | CharRef
1914: // [68] EntityRef ::= '&' Name ';'
1915: // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1916: // [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1917: // [42] ETag ::= '</' Name S? '>'
1918: //
1919: // Note: We have already scanned Name.
1920: //
1921: boolean scanElement(QName element) throws Exception {
1922: //
1923: // Scan for attributes
1924: //
1925: boolean greater = false;
1926: boolean slash = false;
1927: if (greater = fEntityReader.lookingAtChar('>', true)) {
1928: // no attributes
1929: } else if (fEntityReader.lookingAtSpace(true)) {
1930: int previousState = setScannerState(SCANNER_STATE_ATTRIBUTE_LIST);
1931: while (true) {
1932: fEntityReader.skipPastSpaces();
1933: //
1934: // [41] Attribute ::= Name Eq AttValue
1935: //
1936: if ((greater = fEntityReader.lookingAtChar('>', true))
1937: || (slash = fEntityReader.lookingAtChar('/',
1938: true)))
1939: break;
1940: //
1941: // Name
1942: //
1943: setScannerState(SCANNER_STATE_ATTRIBUTE_NAME);
1944: scanAttributeName(fEntityReader, element,
1945: fAttributeQName);
1946: if (fAttributeQName.rawname == -1) {
1947: break;
1948: }
1949: //
1950: // Eq
1951: //
1952: fEntityReader.skipPastSpaces();
1953: if (!fEntityReader.lookingAtChar('=', true)) {
1954: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1955: abortMarkup(
1956: XMLMessages.MSG_EQ_REQUIRED_IN_ATTRIBUTE,
1957: XMLMessages.P41_EQ_REQUIRED,
1958: element.rawname,
1959: fAttributeQName.rawname);
1960: restoreScannerState(previousState);
1961: }
1962: return false;
1963: }
1964: fEntityReader.skipPastSpaces();
1965: int result = scanAttValue(element, fAttributeQName,
1966: false);
1967: if (result == RESULT_FAILURE) {
1968: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
1969: skipPastEndOfCurrentMarkup();
1970: restoreScannerState(previousState);
1971: }
1972: return false;
1973: } else if (result == RESULT_DUPLICATE_ATTR) {
1974: reportFatalXMLError(
1975: XMLMessages.MSG_ATTRIBUTE_NOT_UNIQUE,
1976: XMLMessages.WFC_UNIQUE_ATT_SPEC,
1977: element.rawname, fAttributeQName.rawname);
1978: }
1979: //The validator will check whether we have a duplicate attr in the start tag.
1980: if (fEventHandler.attribute(element, fAttributeQName,
1981: result)) {
1982: reportFatalXMLError(
1983: XMLMessages.MSG_ATTRIBUTE_NOT_UNIQUE,
1984: XMLMessages.WFC_UNIQUE_ATT_SPEC,
1985: element.rawname, fAttributeQName.rawname);
1986: }
1987: restoreScannerState(SCANNER_STATE_ATTRIBUTE_LIST);
1988: if (!fEntityReader.lookingAtSpace(true)) {
1989: if (!(greater = fEntityReader.lookingAtChar('>',
1990: true)))
1991: slash = fEntityReader.lookingAtChar('/', true);
1992: break;
1993: }
1994: }
1995: restoreScannerState(previousState);
1996: } else {
1997: slash = fEntityReader.lookingAtChar('/', true);
1998: }
1999: if (!greater
2000: && (!slash || !fEntityReader.lookingAtChar('>', true))) { // '>' or '/>'
2001: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
2002: abortMarkup(XMLMessages.MSG_ELEMENT_UNTERMINATED,
2003: XMLMessages.P40_UNTERMINATED, element.rawname);
2004: }
2005: return false;
2006: }
2007: fEventHandler.callStartElement(element);
2008: fScannerMarkupDepth--;
2009: if (slash) { // '/>'
2010: fEventHandler.callEndElement(fReaderId);
2011: return false;
2012: } else {
2013: return true;
2014: }
2015: }
2016:
2017: //
2018: // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2019: //
2020: int scanCharRef() throws Exception {
2021: int valueOffset = fEntityReader.currentOffset();
2022: boolean hex = fEntityReader.lookingAtChar('x', true);
2023: int num = fEntityReader.scanCharRef(hex);
2024: if (num < 0) {
2025: switch (num) {
2026: case XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED:
2027: reportFatalXMLError(
2028: XMLMessages.MSG_SEMICOLON_REQUIRED_IN_CHARREF,
2029: XMLMessages.P66_SEMICOLON_REQUIRED);
2030: return -1;
2031: case XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR:
2032: int majorCode = hex ? XMLMessages.MSG_HEXDIGIT_REQUIRED_IN_CHARREF
2033: : XMLMessages.MSG_DIGIT_REQUIRED_IN_CHARREF;
2034: int minorCode = hex ? XMLMessages.P66_HEXDIGIT_REQUIRED
2035: : XMLMessages.P66_DIGIT_REQUIRED;
2036: reportFatalXMLError(majorCode, minorCode);
2037: return -1;
2038: case XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE:
2039: num = 0x110000; // this will cause the right error to be reported below...
2040: break;
2041: }
2042: }
2043: //
2044: // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] // any Unicode character, excluding the
2045: // | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // surrogate blocks, FFFE, and FFFF.
2046: //
2047: if (num < 0x20) {
2048: if (num == 0x09 || num == 0x0A || num == 0x0D) {
2049: return num;
2050: }
2051: } else if (num <= 0xD7FF
2052: || (num >= 0xE000 && (num <= 0xFFFD || (num >= 0x10000 && num <= 0x10FFFF)))) {
2053: return num;
2054: }
2055: int valueLength = fEntityReader.currentOffset() - valueOffset;
2056: reportFatalXMLError(XMLMessages.MSG_INVALID_CHARREF,
2057: XMLMessages.WFC_LEGAL_CHARACTER, fEntityReader
2058: .addString(valueOffset, valueLength));
2059: return -1;
2060: }
2061:
2062: //
2063: // From the standard:
2064: //
2065: // [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2066: //
2067: // Called after scanning past '<!--'
2068: //
2069: void scanComment() throws Exception {
2070: int commentOffset = fEntityReader.currentOffset();
2071: boolean sawDashDash = false;
2072: int previousState = setScannerState(SCANNER_STATE_COMMENT);
2073: while (fScannerState == SCANNER_STATE_COMMENT) {
2074: if (fEntityReader.lookingAtChar('-', false)) {
2075: int nextEndOffset = fEntityReader.currentOffset();
2076: int endOffset = 0;
2077: fEntityReader.lookingAtChar('-', true);
2078: int offset = fEntityReader.currentOffset();
2079: int count = 1;
2080: while (fEntityReader.lookingAtChar('-', true)) {
2081: count++;
2082: endOffset = nextEndOffset;
2083: nextEndOffset = offset;
2084: offset = fEntityReader.currentOffset();
2085: }
2086: if (count > 1) {
2087: if (fEntityReader.lookingAtChar('>', true)) {
2088: if (!sawDashDash && count > 2) {
2089: reportFatalXMLError(
2090: XMLMessages.MSG_DASH_DASH_IN_COMMENT,
2091: XMLMessages.P15_DASH_DASH);
2092: sawDashDash = true;
2093: }
2094: fScannerMarkupDepth--;
2095: fEventHandler.callComment(fEntityReader
2096: .addString(commentOffset, endOffset
2097: - commentOffset));
2098: restoreScannerState(previousState);
2099: return;
2100: } else if (!sawDashDash) {
2101: reportFatalXMLError(
2102: XMLMessages.MSG_DASH_DASH_IN_COMMENT,
2103: XMLMessages.P15_DASH_DASH);
2104: sawDashDash = true;
2105: }
2106: }
2107: } else {
2108: if (!fEntityReader.lookingAtValidChar(true)) {
2109: int invChar = fEntityReader.scanInvalidChar();
2110: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
2111: if (invChar >= 0) {
2112: reportFatalXMLError(
2113: XMLMessages.MSG_INVALID_CHAR_IN_COMMENT,
2114: XMLMessages.P15_INVALID_CHARACTER,
2115: Integer.toHexString(invChar));
2116: }
2117: }
2118: }
2119: }
2120: }
2121: restoreScannerState(previousState);
2122: }
2123:
2124: //
2125: // From the standard:
2126: //
2127: // [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2128: // [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2129: //
2130: void scanPI(int piTarget) throws Exception {
2131: String piTargetString = fStringPool.toString(piTarget);
2132: if (piTargetString.length() == 3
2133: && (piTargetString.charAt(0) == 'X' || piTargetString
2134: .charAt(0) == 'x')
2135: && (piTargetString.charAt(1) == 'M' || piTargetString
2136: .charAt(1) == 'm')
2137: && (piTargetString.charAt(2) == 'L' || piTargetString
2138: .charAt(2) == 'l')) {
2139: abortMarkup(XMLMessages.MSG_RESERVED_PITARGET,
2140: XMLMessages.P17_RESERVED_PITARGET);
2141: return;
2142: }
2143: int prevState = setScannerState(SCANNER_STATE_PI);
2144: int piDataOffset = -1;
2145: int piDataLength = -1;
2146: if (!fEntityReader.lookingAtSpace(true)) {
2147: if (!fEntityReader.lookingAtChar('?', true)
2148: || !fEntityReader.lookingAtChar('>', true)) {
2149: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
2150: abortMarkup(XMLMessages.MSG_SPACE_REQUIRED_IN_PI,
2151: XMLMessages.P16_WHITESPACE_REQUIRED);
2152: restoreScannerState(prevState);
2153: }
2154: return;
2155: }
2156: piDataLength = 0;
2157: } else {
2158: fEntityReader.skipPastSpaces();
2159: piDataOffset = fEntityReader.currentOffset();
2160: while (fScannerState == SCANNER_STATE_PI) {
2161: while (fEntityReader.lookingAtChar('?', false)) {
2162: int offset = fEntityReader.currentOffset();
2163: fEntityReader.lookingAtChar('?', true);
2164: if (fEntityReader.lookingAtChar('>', true)) {
2165: piDataLength = offset - piDataOffset;
2166: break;
2167: }
2168: }
2169: if (piDataLength >= 0)
2170: break;
2171: if (!fEntityReader.lookingAtValidChar(true)) {
2172: int invChar = fEntityReader.scanInvalidChar();
2173: if (fScannerState != SCANNER_STATE_END_OF_INPUT) {
2174: if (invChar >= 0) {
2175: reportFatalXMLError(
2176: XMLMessages.MSG_INVALID_CHAR_IN_PI,
2177: XMLMessages.P16_INVALID_CHARACTER,
2178: Integer.toHexString(invChar));
2179: }
2180: skipPastEndOfCurrentMarkup();
2181: restoreScannerState(prevState);
2182: }
2183: return;
2184: }
2185: }
2186: }
2187: fScannerMarkupDepth--;
2188: restoreScannerState(prevState);
2189: int piData = piDataLength == 0 ? StringPool.EMPTY_STRING
2190: : fEntityReader.addString(piDataOffset, piDataLength);
2191: fEventHandler.callProcessingInstruction(piTarget, piData);
2192: }
2193:
2194: /** Sets whether the parser preprocesses namespaces. */
2195: public void setNamespacesEnabled(boolean enabled) {
2196: fNamespacesEnabled = enabled;
2197: }
2198:
2199: /** Returns whether the parser processes namespaces. */
2200: public boolean getNamespacesEnabled() {
2201: return fNamespacesEnabled;
2202: }
2203:
2204: /** Sets whether the parser validates. */
2205: public void setValidationEnabled(boolean enabled) {
2206: fValidationEnabled = enabled;
2207: if (fDTDScanner != null) {
2208: fDTDScanner.setValidationEnabled(enabled);
2209: }
2210: }
2211:
2212: /** Returns true if validation is turned on. */
2213: public boolean getValidationEnabled() {
2214: return fValidationEnabled;
2215: }
2216:
2217: /** Sets whether the parser loads the external DTD. */
2218: public void setLoadExternalDTD(boolean enabled) {
2219: fLoadExternalDTD = enabled;
2220: if (fDTDScanner != null) {
2221: fDTDScanner.setLoadExternalDTD(enabled);
2222: }
2223: }
2224:
2225: /** Returns true if loading the external DTD is turned on. */
2226: public boolean getLoadExternalDTD() {
2227: return fLoadExternalDTD;
2228: }
2229:
2230: // old EventHandler methods pushed back into scanner
2231:
2232: /** Scans element type. */
2233: private void scanElementType(
2234: XMLEntityHandler.EntityReader entityReader, char fastchar,
2235: QName element) throws Exception {
2236:
2237: if (!fNamespacesEnabled) {
2238: element.clear();
2239: element.localpart = entityReader.scanName(fastchar);
2240: element.rawname = element.localpart;
2241: } else {
2242: entityReader.scanQName(fastchar, element);
2243: if (entityReader.lookingAtChar(':', false)) {
2244: fErrorReporter.reportError(fErrorReporter.getLocator(),
2245: XMLMessages.XML_DOMAIN,
2246: XMLMessages.MSG_TWO_COLONS_IN_QNAME,
2247: XMLMessages.P5_INVALID_CHARACTER, null,
2248: XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
2249: entityReader.skipPastNmtoken(' ');
2250: }
2251: }
2252:
2253: fEventHandler.element(element);
2254:
2255: } // scanElementType(XMLEntityHandler.EntityReader,char,QName)
2256:
2257: /** Scans expected element type. */
2258: private boolean scanExpectedElementType(
2259: XMLEntityHandler.EntityReader entityReader, char fastchar,
2260: int elementType) throws Exception {
2261:
2262: /***/
2263: // REVISIT: Why aren't we using the 'element' parameter? -Ac
2264: // REVISIT: I replaced the 'fCurrentElement' with 'element' parameter, still working,
2265: // just wondering Why are we using CharArrayRange in the first place? -ericye
2266: if (fCurrentElementCharArrayRange == null) {
2267: fCurrentElementCharArrayRange = fStringPool
2268: .createCharArrayRange();
2269: }
2270: fStringPool.getCharArrayRange(elementType,
2271: fCurrentElementCharArrayRange);
2272: return entityReader.scanExpectedName(fastchar,
2273: fCurrentElementCharArrayRange);
2274: /***
2275: entityReader.scanQName(fastchar, element);
2276: return true;
2277: /***/
2278:
2279: } // scanExpectedElementType(XMLEntityHandler.EntityReader,char,QName)
2280:
2281: /** Scans attribute name. */
2282: private void scanAttributeName(
2283: XMLEntityHandler.EntityReader entityReader, QName element,
2284: QName attribute) throws Exception {
2285:
2286: /***
2287: // REVISIT: What's this check for?
2288: if (!fSeenRootElement) {
2289: fSeenRootElement = true;
2290: rootElementSpecified(element);
2291: fStringPool.resetShuffleCount();
2292: }
2293: /***/
2294:
2295: if (!fNamespacesEnabled) {
2296: attribute.clear();
2297: attribute.localpart = entityReader.scanName('=');
2298: attribute.rawname = attribute.localpart;
2299: } else {
2300: entityReader.scanQName('=', attribute);
2301: if (entityReader.lookingAtChar(':', false)) {
2302: fErrorReporter.reportError(fErrorReporter.getLocator(),
2303: XMLMessages.XML_DOMAIN,
2304: XMLMessages.MSG_TWO_COLONS_IN_QNAME,
2305: XMLMessages.P5_INVALID_CHARACTER, null,
2306: XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
2307: entityReader.skipPastNmtoken(' ');
2308: }
2309: }
2310:
2311: } // scanAttributeName(XMLEntityHandler.EntityReader,QName,QName)
2312:
2313: /** Scan doctype declaration. */
2314: private void scanDoctypeDecl(boolean standalone) throws Exception {
2315:
2316: fScanningDTD = true;
2317:
2318: /***
2319: fScanningDTD = true;
2320: fCheckedForSchema = true;
2321: /***/
2322: fSeenDoctypeDecl = true;
2323: /***
2324: fStandaloneReader = standalone ? fEntityHandler.getReaderId() : -1;
2325: fDeclsAreExternal = false;
2326: if (fDTDImporter == null) {
2327: fDTDImporter = new DTDImporter(fStringPool, fErrorReporter, fEntityHandler, this);
2328: }
2329: else {
2330: fDTDImporter.reset(fStringPool);
2331: }
2332: fDTDImporter.initHandlers(fDTDHandler);
2333: fDTDImporter.setValidating(fValidating);
2334: fDTDImporter.setNamespacesEnabled(fNamespacesEnabled);
2335: if (fDTDImporter.scanDoctypeDecl(standalone) && fValidating) {
2336: // check declared elements
2337: if (fWarningOnUndeclaredElements) {
2338: // REVISIT: comment out because won't compile
2339: // checkDeclaredElements();
2340: }
2341:
2342: // check required notations
2343: fEntityHandler.checkRequiredNotations();
2344: }
2345: fScanningDTD = false;
2346: /***/
2347: if (fDTDScanner == null) {
2348: fDTDScanner = new XMLDTDScanner(fStringPool,
2349: fErrorReporter, fEntityHandler,
2350: new ChunkyCharArray(fStringPool));
2351: fDTDScanner.setValidationEnabled(fValidationEnabled);
2352: fDTDScanner.setNamespacesEnabled(fNamespacesEnabled);
2353: fDTDScanner.setLoadExternalDTD(fLoadExternalDTD);
2354: } else {
2355: fDTDScanner.reset(fStringPool, new ChunkyCharArray(
2356: fStringPool));
2357: }
2358: fDTDScanner.setDTDHandler(fDTDHandler);
2359: fDTDScanner.setGrammarResolver(fGrammarResolver);
2360: // REVISIT: What about standalone?
2361: if (fDTDScanner.scanDoctypeDecl()) {
2362: if (fDTDScanner.getReadingExternalEntity()) {
2363: fDTDScanner.scanDecls(true);
2364: }
2365: // REVISIT: What about validation and checking stuff?
2366: }
2367: //VC_NOTATION_DECLARED
2368: if (fValidationEnabled) {
2369: ((DefaultEntityHandler) fEntityHandler)
2370: .checkRequiredNotations();
2371: }
2372: /***/
2373: fScanningDTD = false;
2374:
2375: } // scanDoctypeDecl(boolean)
2376:
2377: /** Scan attribute value. */
2378: private int scanAttValue(QName element, QName attribute)
2379: throws Exception {
2380:
2381: //fAttrNameLocator = getLocatorImpl(fAttrNameLocator);
2382: int attValue = scanAttValue(element, attribute,
2383: fValidationEnabled);
2384: if (attValue == -1) {
2385: return XMLDocumentScanner.RESULT_FAILURE;
2386: }
2387:
2388: /***
2389: // REVISIT: This is validation related.
2390: if (!fValidating && fAttDefCount == 0) {
2391: int attType = fCDATASymbol;
2392: if (fAttrListHandle == -1)
2393: fAttrListHandle = fAttrList.startAttrList();
2394: // REVISIT: Should this be localpart or rawname?
2395: if (fAttrList.addAttr(attribute, attValue, attType, true, true) == -1) {
2396: return XMLDocumentScanner.RESULT_DUPLICATE_ATTR;
2397: }
2398: return XMLDocumentScanner.RESULT_SUCCESS;
2399: }
2400: /****/
2401:
2402: /****
2403: // REVISIT: Validation. What should these be?
2404: int attDefIndex = getAttDef(element, attribute);
2405: if (attDefIndex == -1) {
2406:
2407: if (fValidating) {
2408: // REVISIT - cache the elem/attr tuple so that we only give
2409: // this error once for each unique occurrence
2410: Object[] args = { fStringPool.toString(element.rawname),
2411: fStringPool.toString(attribute.rawname) };
2412: fErrorReporter.reportError(fAttrNameLocator,
2413: XMLMessages.XML_DOMAIN,
2414: XMLMessages.MSG_ATTRIBUTE_NOT_DECLARED,
2415: XMLMessages.VC_ATTRIBUTE_VALUE_TYPE,
2416: args,
2417: XMLErrorReporter.ERRORTYPE_RECOVERABLE_ERROR);
2418: }
2419:
2420: int attType = fCDATASymbol;
2421: if (fAttrListHandle == -1) {
2422: fAttrListHandle = fAttrList.startAttrList();
2423: }
2424: // REVISIT: Validation. What should the name be?
2425: if (fAttrList.addAttr(attribute, attValue, attType, true, true) == -1) {
2426: return XMLDocumentScanner.RESULT_DUPLICATE_ATTR;
2427: }
2428: return XMLDocumentScanner.RESULT_SUCCESS;
2429: }
2430: /****/
2431:
2432: /****
2433: int attType = getAttType(attDefIndex);
2434: if (attType != fCDATASymbol) {
2435: AttributeValidator av = getAttributeValidator(attDefIndex);
2436: int enumHandle = getEnumeration(attDefIndex);
2437: // REVISIT: Validation. What should these be?
2438: attValue = av.normalize(element, attribute,
2439: attValue, attType, enumHandle);
2440: }
2441:
2442: if (fAttrListHandle == -1) {
2443: fAttrListHandle = fAttrList.startAttrList();
2444: }
2445: // REVISIT: Validation. What should the name be?
2446: if (fAttrList.addAttr(attribute, attValue, attType, true, true) == -1) {
2447: return XMLDocumentScanner.RESULT_DUPLICATE_ATTR;
2448: }
2449: /***/
2450:
2451: return XMLDocumentScanner.RESULT_SUCCESS;
2452:
2453: } // scanAttValue(QName,QName):int
2454:
2455: /** Returns true if the version number is valid. */
2456: private boolean validVersionNum(String version) {
2457: return XMLCharacterProperties.validVersionNum(version);
2458: }
2459:
2460: /** Returns true if the encoding name is valid. */
2461: private boolean validEncName(String encoding) {
2462: return XMLCharacterProperties.validEncName(encoding);
2463: }
2464:
2465: } // class XMLDocumentScanner
|