0001: /*
0002: * The Apache Software License, Version 1.1
0003: *
0004: *
0005: * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
0006: * reserved.
0007: *
0008: * Redistribution and use in source and binary forms, with or without
0009: * modification, are permitted provided that the following conditions
0010: * are met:
0011: *
0012: * 1. Redistributions of source code must retain the above copyright
0013: * notice, this list of conditions and the following disclaimer.
0014: *
0015: * 2. Redistributions in binary form must reproduce the above copyright
0016: * notice, this list of conditions and the following disclaimer in
0017: * the documentation and/or other materials provided with the
0018: * distribution.
0019: *
0020: * 3. The end-user documentation included with the redistribution,
0021: * if any, must include the following acknowledgment:
0022: * "This product includes software developed by the
0023: * Apache Software Foundation (http://www.apache.org/)."
0024: * Alternately, this acknowledgment may appear in the software itself,
0025: * if and wherever such third-party acknowledgments normally appear.
0026: *
0027: * 4. The names "Xerces" and "Apache Software Foundation" must
0028: * not be used to endorse or promote products derived from this
0029: * software without prior written permission. For written
0030: * permission, please contact apache@apache.org.
0031: *
0032: * 5. Products derived from this software may not be called "Apache",
0033: * nor may "Apache" appear in their name, without prior written
0034: * permission of the Apache Software Foundation.
0035: *
0036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
0040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
0043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
0045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
0046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0047: * SUCH DAMAGE.
0048: * ====================================================================
0049: *
0050: * This software consists of voluntary contributions made by many
0051: * individuals on behalf of the Apache Software Foundation and was
0052: * originally based on software copyright (c) 1999, International
0053: * Business Machines, Inc., http://www.apache.org. For more
0054: * information on the Apache Software Foundation, please see
0055: * <http://www.apache.org/>.
0056: */
0057:
0058: package org.apache.xerces.readers;
0059:
0060: import org.apache.xerces.framework.XMLErrorReporter;
0061: import org.apache.xerces.utils.QName;
0062: import org.apache.xerces.utils.StringPool;
0063: import org.apache.xerces.utils.XMLCharacterProperties;
0064:
0065: import org.xml.sax.Locator;
0066: import org.xml.sax.InputSource;
0067: import java.io.IOException;
0068:
0069: /**
0070: * Reader for processing internal entity replacement text.
0071: * <p>
0072: * This reader processes data contained within strings kept
0073: * in the string pool. It provides the support for both
0074: * general and parameter entities. The location support
0075: * as we are processing the replacement text is somewhat
0076: * poor and needs to be updated when "nested locations"
0077: * have been implemented.
0078: * <p>
0079: * For efficiency, we return instances of this class to a
0080: * free list and reuse those instances to process other
0081: * strings.
0082: *
0083: * @version $id$
0084: */
0085: final class StringReader extends XMLEntityReader {
0086: /**
0087: * Allocate a string reader
0088: *
0089: * @param entityHandler The current entity handler.
0090: * @param errorReporter The current error reporter.
0091: * @param sendCharDataAsCharArray true if char data should be reported using
0092: * char arrays instead of string handles.
0093: * @param lineNumber The line number to return as our position.
0094: * @param columnNumber The column number to return as our position.
0095: * @param stringHandle The StringPool handle for the data to process.
0096: * @param stringPool The string pool.
0097: * @param addEnclosingSpaces If true, treat the data to process as if
0098: * there were a leading and trailing space
0099: * character enclosing the string data.
0100: * @return The reader that will process the string data.
0101: */
0102: public static StringReader createStringReader(
0103: XMLEntityHandler entityHandler,
0104: XMLErrorReporter errorReporter,
0105: boolean sendCharDataAsCharArray, int lineNumber,
0106: int columnNumber, int stringHandle, StringPool stringPool,
0107: boolean addEnclosingSpaces) {
0108: StringReader reader = null;
0109: synchronized (StringReader.class) {
0110: reader = fgFreeReaders;
0111: if (reader == null) {
0112: return new StringReader(entityHandler, errorReporter,
0113: sendCharDataAsCharArray, lineNumber,
0114: columnNumber, stringHandle, stringPool,
0115: addEnclosingSpaces);
0116: }
0117: fgFreeReaders = reader.fNextFreeReader;
0118: }
0119: reader.init(entityHandler, errorReporter,
0120: sendCharDataAsCharArray, lineNumber, columnNumber,
0121: stringHandle, stringPool, addEnclosingSpaces);
0122: return reader;
0123: }
0124:
0125: //
0126: //
0127: //
0128: private StringReader(XMLEntityHandler entityHandler,
0129: XMLErrorReporter errorReporter,
0130: boolean sendCharDataAsCharArray, int lineNumber,
0131: int columnNumber, int stringHandle, StringPool stringPool,
0132: boolean addEnclosingSpaces) {
0133: super (entityHandler, errorReporter, sendCharDataAsCharArray,
0134: lineNumber, columnNumber);
0135: fStringPool = stringPool;
0136: fData = fStringPool.toString(stringHandle);
0137: fCurrentOffset = 0;
0138: fEndOffset = fData.length();
0139: if (addEnclosingSpaces) {
0140: fMostRecentChar = ' ';
0141: fCurrentOffset--;
0142: oweTrailingSpace = hadTrailingSpace = true;
0143: } else {
0144: fMostRecentChar = fEndOffset == 0 ? -1 : fData.charAt(0);
0145: }
0146: }
0147:
0148: private void init(XMLEntityHandler entityHandler,
0149: XMLErrorReporter errorReporter,
0150: boolean sendCharDataAsCharArray, int lineNumber,
0151: int columnNumber, int stringHandle, StringPool stringPool,
0152: boolean addEnclosingSpaces) {
0153: super .init(entityHandler, errorReporter,
0154: sendCharDataAsCharArray, lineNumber, columnNumber);
0155: fStringPool = stringPool;
0156: fData = fStringPool.toString(stringHandle);
0157: fCurrentOffset = 0;
0158: fEndOffset = fData.length();
0159: fNextFreeReader = null;
0160: if (addEnclosingSpaces) {
0161: fMostRecentChar = ' ';
0162: fCurrentOffset--;
0163: oweTrailingSpace = hadTrailingSpace = true;
0164: } else {
0165: fMostRecentChar = fEndOffset == 0 ? -1 : fData.charAt(0);
0166: oweTrailingSpace = hadTrailingSpace = false;
0167: }
0168: }
0169:
0170: //
0171: //
0172: //
0173: public int addString(int offset, int length) {
0174: if (length == 0)
0175: return 0;
0176: return fStringPool.addString(fData.substring(offset, offset
0177: + length));
0178: }
0179:
0180: //
0181: //
0182: //
0183: public int addSymbol(int offset, int length) {
0184: if (length == 0)
0185: return 0;
0186: return fStringPool.addSymbol(fData.substring(offset, offset
0187: + length));
0188: }
0189:
0190: //
0191: //
0192: //
0193: public void append(XMLEntityHandler.CharBuffer charBuffer,
0194: int offset, int length) {
0195: boolean addSpace = false;
0196: for (int i = 0; i < length; i++) {
0197: try {
0198: charBuffer.append(fData.charAt(offset++));
0199: } catch (StringIndexOutOfBoundsException ex) {
0200: if (offset == fEndOffset + 1 && hadTrailingSpace) {
0201: charBuffer.append(' ');
0202: } else {
0203: System.err.println("StringReader.append()");
0204: throw ex;
0205: }
0206: }
0207: }
0208: }
0209:
0210: //
0211: //
0212: //
0213: private int loadNextChar() {
0214: if (++fCurrentOffset >= fEndOffset) {
0215: if (oweTrailingSpace) {
0216: oweTrailingSpace = false;
0217: fMostRecentChar = ' ';
0218: } else {
0219: fMostRecentChar = -1;
0220: }
0221: } else {
0222: fMostRecentChar = fData.charAt(fCurrentOffset);
0223: }
0224: return fMostRecentChar;
0225: }
0226:
0227: //
0228: //
0229: //
0230: public XMLEntityHandler.EntityReader changeReaders()
0231: throws Exception {
0232: XMLEntityHandler.EntityReader nextReader = super
0233: .changeReaders();
0234: synchronized (StringReader.class) {
0235: fNextFreeReader = fgFreeReaders;
0236: fgFreeReaders = this ;
0237: // Allow these following two fields to be GC-ed.
0238: fStringPool = null;
0239: fData = null;
0240: }
0241: return nextReader;
0242: }
0243:
0244: //
0245: //
0246: //
0247: public boolean lookingAtChar(char chr, boolean skipPastChar)
0248: throws Exception {
0249: int ch = fMostRecentChar;
0250: if (ch != chr) {
0251: if (ch == -1) {
0252: return changeReaders().lookingAtChar(chr, skipPastChar);
0253: }
0254: return false;
0255: }
0256: if (skipPastChar) {
0257: if (++fCurrentOffset >= fEndOffset) {
0258: if (oweTrailingSpace) {
0259: oweTrailingSpace = false;
0260: fMostRecentChar = ' ';
0261: } else {
0262: fMostRecentChar = -1;
0263: }
0264: } else {
0265: fMostRecentChar = fData.charAt(fCurrentOffset);
0266: }
0267: }
0268: return true;
0269: }
0270:
0271: //
0272: //
0273: //
0274: public boolean lookingAtValidChar(boolean skipPastChar)
0275: throws Exception {
0276: int ch = fMostRecentChar;
0277: if (ch < 0xD800) {
0278: if (ch < 0x20 && ch != 0x09 && ch != 0x0A && ch != 0x0D) {
0279: if (ch == -1)
0280: return changeReaders().lookingAtValidChar(
0281: skipPastChar);
0282: return false;
0283: }
0284: if (skipPastChar) {
0285: if (++fCurrentOffset >= fEndOffset) {
0286: if (oweTrailingSpace) {
0287: oweTrailingSpace = false;
0288: fMostRecentChar = ' ';
0289: } else {
0290: fMostRecentChar = -1;
0291: }
0292: } else {
0293: fMostRecentChar = fData.charAt(fCurrentOffset);
0294: }
0295: }
0296: return true;
0297: }
0298: if (ch > 0xFFFD) {
0299: return false;
0300: }
0301: if (ch < 0xDC00) {
0302: if (fCurrentOffset + 1 >= fEndOffset) {
0303: return false;
0304: }
0305: ch = fData.charAt(fCurrentOffset + 1);
0306: if (ch < 0xDC00 || ch >= 0xE000) {
0307: return false;
0308: } else if (!skipPastChar) {
0309: return true;
0310: } else {
0311: fCurrentOffset++;
0312: }
0313: } else if (ch < 0xE000) {
0314: return false;
0315: }
0316: if (skipPastChar) {
0317: if (++fCurrentOffset >= fEndOffset) {
0318: if (oweTrailingSpace) {
0319: oweTrailingSpace = false;
0320: fMostRecentChar = ' ';
0321: } else {
0322: fMostRecentChar = -1;
0323: }
0324: } else {
0325: fMostRecentChar = fData.charAt(fCurrentOffset);
0326: }
0327: }
0328: return true;
0329: }
0330:
0331: //
0332: //
0333: //
0334: public boolean lookingAtSpace(boolean skipPastChar)
0335: throws Exception {
0336: int ch = fMostRecentChar;
0337: if (ch > 0x20)
0338: return false;
0339: if (ch == 0x20 || ch == 0x0A || ch == 0x0D || ch == 0x09) {
0340: if (skipPastChar) {
0341: loadNextChar();
0342: }
0343: return true;
0344: }
0345: if (ch == -1) {
0346: return changeReaders().lookingAtSpace(skipPastChar);
0347: }
0348: return false;
0349: }
0350:
0351: //
0352: //
0353: //
0354: public void skipToChar(char chr) throws Exception {
0355: //
0356: // REVISIT - this will skip invalid characters without reporting them.
0357: //
0358: int ch = fMostRecentChar;
0359: while (true) {
0360: if (ch == chr)
0361: return;
0362: if (ch == -1) {
0363: changeReaders().skipToChar(chr);
0364: return;
0365: }
0366: ch = loadNextChar();
0367: }
0368: }
0369:
0370: //
0371: //
0372: //
0373: public void skipPastSpaces() throws Exception {
0374: int ch = fMostRecentChar;
0375: if (ch == -1) {
0376: changeReaders().skipPastSpaces();
0377: return;
0378: }
0379: while (true) {
0380: if (ch > 0x20
0381: || (ch != 0x20 && ch != 0x0A && ch != 0x09 && ch != 0x0D)) {
0382: fMostRecentChar = ch;
0383: return;
0384: }
0385: if (++fCurrentOffset >= fEndOffset) {
0386: changeReaders().skipPastSpaces();
0387: return;
0388: }
0389: ch = fData.charAt(fCurrentOffset);
0390: }
0391: }
0392:
0393: //
0394: //
0395: //
0396: public void skipPastName(char fastcheck) throws Exception {
0397: int ch = fMostRecentChar;
0398: if (ch < 0x80) {
0399: if (ch == -1
0400: || XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0)
0401: return;
0402: } else {
0403: if (!fCalledCharPropInit) {
0404: XMLCharacterProperties.initCharFlags();
0405: fCalledCharPropInit = true;
0406: }
0407: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
0408: return;
0409: }
0410: while (true) {
0411: ch = loadNextChar();
0412: if (fastcheck == ch)
0413: return;
0414: if (ch < 0x80) {
0415: if (ch == -1
0416: || XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
0417: return;
0418: } else {
0419: if (!fCalledCharPropInit) {
0420: XMLCharacterProperties.initCharFlags();
0421: fCalledCharPropInit = true;
0422: }
0423: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
0424: return;
0425: }
0426: }
0427: }
0428:
0429: //
0430: //
0431: //
0432: public void skipPastNmtoken(char fastcheck) throws Exception {
0433: int ch = fMostRecentChar;
0434: while (true) {
0435: if (fastcheck == ch)
0436: return;
0437: if (ch < 0x80) {
0438: if (ch == -1
0439: || XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
0440: return;
0441: } else {
0442: if (!fCalledCharPropInit) {
0443: XMLCharacterProperties.initCharFlags();
0444: fCalledCharPropInit = true;
0445: }
0446: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
0447: return;
0448: }
0449: ch = loadNextChar();
0450: }
0451: }
0452:
0453: //
0454: //
0455: //
0456: public boolean skippedString(char[] s) throws Exception {
0457: int ch = fMostRecentChar;
0458: if (ch != s[0]) {
0459: if (ch == -1)
0460: return changeReaders().skippedString(s);
0461: return false;
0462: }
0463: if (fCurrentOffset + s.length > fEndOffset)
0464: return false;
0465: for (int i = 1; i < s.length; i++) {
0466: if (fData.charAt(fCurrentOffset + i) != s[i])
0467: return false;
0468: }
0469: fCurrentOffset += (s.length - 1);
0470: loadNextChar();
0471: return true;
0472: }
0473:
0474: //
0475: //
0476: //
0477: public int scanInvalidChar() throws Exception {
0478: int ch = fMostRecentChar;
0479: if (ch == -1)
0480: return changeReaders().scanInvalidChar();
0481: loadNextChar();
0482: return ch;
0483: }
0484:
0485: //
0486: //
0487: //
0488: public int scanCharRef(boolean hex) throws Exception {
0489: int ch = fMostRecentChar;
0490: if (ch == -1)
0491: return changeReaders().scanCharRef(hex);
0492: int num = 0;
0493: if (hex) {
0494: if (ch > 'f'
0495: || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
0496: return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
0497: num = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
0498: } else {
0499: if (ch < '0' || ch > '9')
0500: return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
0501: num = ch - '0';
0502: }
0503: boolean toobig = false;
0504: while (true) {
0505: ch = loadNextChar();
0506: if (ch == -1)
0507: return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED;
0508: if (hex) {
0509: if (ch > 'f'
0510: || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
0511: break;
0512: } else {
0513: if (ch < '0' || ch > '9')
0514: break;
0515: }
0516: if (hex) {
0517: int dig = ch
0518: - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
0519: num = (num << 4) + dig;
0520: } else {
0521: int dig = ch - '0';
0522: num = (num * 10) + dig;
0523: }
0524: if (num > 0x10FFFF) {
0525: toobig = true;
0526: num = 0;
0527: }
0528: }
0529: if (ch != ';')
0530: return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED;
0531: loadNextChar();
0532: if (toobig)
0533: return XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE;
0534: return num;
0535: }
0536:
0537: //
0538: //
0539: //
0540: public int scanStringLiteral() throws Exception {
0541: boolean single;
0542: if (!(single = lookingAtChar('\'', true))
0543: && !lookingAtChar('\"', true)) {
0544: return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
0545: }
0546: int offset = fCurrentOffset;
0547: char qchar = single ? '\'' : '\"';
0548: while (!lookingAtChar(qchar, false)) {
0549: if (!lookingAtValidChar(true)) {
0550: return XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR;
0551: }
0552: }
0553: int stringIndex = addString(offset, fCurrentOffset - offset);
0554: lookingAtChar(qchar, true); // move past qchar
0555: return stringIndex;
0556: }
0557:
0558: //
0559: // [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
0560: // | "'" ([^<&'] | Reference)* "'"
0561: //
0562: public int scanAttValue(char qchar, boolean asSymbol)
0563: throws Exception {
0564: int offset = fCurrentOffset;
0565: while (true) {
0566: if (lookingAtChar(qchar, false)) {
0567: break;
0568: }
0569: if (lookingAtChar(' ', true)) {
0570: continue;
0571: }
0572: if (lookingAtSpace(false)) {
0573: return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX;
0574: }
0575: if (lookingAtChar('&', false)) {
0576: return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX;
0577: }
0578: if (lookingAtChar('<', false)) {
0579: return XMLEntityHandler.ATTVALUE_RESULT_LESSTHAN;
0580: }
0581: if (!lookingAtValidChar(true)) {
0582: return XMLEntityHandler.ATTVALUE_RESULT_INVALID_CHAR;
0583: }
0584: }
0585: int result = asSymbol ? addSymbol(offset, fCurrentOffset
0586: - offset) : addString(offset, fCurrentOffset - offset);
0587: lookingAtChar(qchar, true);
0588: return result;
0589: }
0590:
0591: //
0592: // [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
0593: // | "'" ([^%&'] | PEReference | Reference)* "'"
0594: //
0595: // The values in the following table are defined as:
0596: //
0597: // 0 - not special
0598: // 1 - quote character
0599: // 2 - reference
0600: // 3 - peref
0601: // 4 - invalid
0602: //
0603: public static final byte fgAsciiEntityValueChar[] = { 4, 4, 4, 4,
0604: 4, 4, 4, 4, 4, 0, 0, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0605: 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 1, 0, 0, 3, 2, 1, 0, 0, 0, 0,
0606: 0,
0607: 0,
0608: 0,
0609: 0, // '\"', '%', '&', '\''
0610: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0611: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0612: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0613: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
0614:
0615: public int scanEntityValue(int qchar, boolean createString)
0616: throws Exception {
0617: int offset = fCurrentOffset;
0618: int ch = fMostRecentChar;
0619: while (true) {
0620: if (ch == -1) {
0621: changeReaders(); // do not call next reader, our caller may need to change the parameters
0622: return XMLEntityHandler.ENTITYVALUE_RESULT_END_OF_INPUT;
0623: }
0624: if (ch < 0x80) {
0625: switch (fgAsciiEntityValueChar[ch]) {
0626: case 1: // quote char
0627: if (ch == qchar) {
0628: if (!createString)
0629: return XMLEntityHandler.ENTITYVALUE_RESULT_FINISHED;
0630: int length = fCurrentOffset - offset;
0631: int result = length == 0 ? StringPool.EMPTY_STRING
0632: : addString(offset, length);
0633: loadNextChar();
0634: return result;
0635: }
0636: // the other quote character is not special
0637: // fall through
0638: case 0: // non-special char
0639: if (++fCurrentOffset >= fEndOffset) {
0640: if (oweTrailingSpace) {
0641: oweTrailingSpace = false;
0642: ch = fMostRecentChar = ' ';
0643: } else {
0644: ch = fMostRecentChar = -1;
0645: }
0646: } else {
0647: ch = fMostRecentChar = fData
0648: .charAt(fCurrentOffset);
0649: }
0650: continue;
0651: case 2: // reference
0652: return XMLEntityHandler.ENTITYVALUE_RESULT_REFERENCE;
0653: case 3: // peref
0654: return XMLEntityHandler.ENTITYVALUE_RESULT_PEREF;
0655: case 4: // invalid
0656: return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR;
0657: }
0658: } else if (ch < 0xD800) {
0659: ch = loadNextChar();
0660: } else if (ch >= 0xE000
0661: && (ch <= 0xFFFD || (ch >= 0x10000 && ch <= 0x10FFFF))) {
0662: //
0663: // REVISIT - needs more code to check surrogates.
0664: //
0665: ch = loadNextChar();
0666: } else {
0667: return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR;
0668: }
0669: }
0670: }
0671:
0672: //
0673: //
0674: //
0675: public boolean scanExpectedName(char fastcheck,
0676: StringPool.CharArrayRange expectedName) throws Exception {
0677: int ch = fMostRecentChar;
0678: if (ch == -1) {
0679: return changeReaders().scanExpectedName(fastcheck,
0680: expectedName);
0681: }
0682: if (!fCalledCharPropInit) {
0683: XMLCharacterProperties.initCharFlags();
0684: fCalledCharPropInit = true;
0685: }
0686: int nameOffset = fCurrentOffset;
0687: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
0688: return false;
0689: while (true) {
0690: ch = loadNextChar();
0691: if (fastcheck == ch)
0692: break;
0693: if (ch == -1)
0694: break;
0695: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
0696: break;
0697: }
0698: int nameIndex = fStringPool.addSymbol(fData.substring(
0699: nameOffset, fCurrentOffset));
0700: // DEFECT !! check name against expected name
0701:
0702: return true;
0703: }
0704:
0705: //
0706: //
0707: //
0708: public void scanQName(char fastcheck, QName qname) throws Exception {
0709: int ch = fMostRecentChar;
0710: if (ch == -1) {
0711: changeReaders().scanQName(fastcheck, qname);
0712: return;
0713: }
0714: if (!fCalledCharPropInit) {
0715: XMLCharacterProperties.initCharFlags();
0716: fCalledCharPropInit = true;
0717: }
0718: int nameOffset = fCurrentOffset;
0719: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) {
0720: qname.clear();
0721: return;
0722: }
0723: while (true) {
0724: ch = loadNextChar();
0725: if (fastcheck == ch)
0726: break;
0727: if (ch == -1)
0728: break;
0729: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
0730: break;
0731: }
0732:
0733: qname.clear();
0734: qname.rawname = fStringPool.addSymbol(fData.substring(
0735: nameOffset, fCurrentOffset));
0736:
0737: int index = fData.indexOf(':', nameOffset);
0738: if (index != -1 && index < fCurrentOffset) {
0739: qname.prefix = fStringPool.addSymbol(fData.substring(
0740: nameOffset, index));
0741: int indexOfSpaceChar = fData.indexOf(' ', index + 1);//one past : look for blank
0742: String localPart;
0743: if (indexOfSpaceChar != -1) {//found one
0744: localPart = fData
0745: .substring(index + 1, indexOfSpaceChar);
0746: qname.localpart = fStringPool.addSymbol(localPart);
0747: } else {//then get up to end of String
0748: int lenfData = fData.length();
0749: localPart = fData.substring(index + 1, lenfData);
0750: qname.localpart = fStringPool.addSymbol(localPart);
0751: }
0752: qname.localpart = fStringPool.addSymbol(localPart);
0753: } else {
0754: qname.localpart = qname.rawname;
0755: }
0756:
0757: } // scanQName(char,QName)
0758:
0759: //
0760: //
0761: //
0762: public int scanName(char fastcheck) throws Exception {
0763: int ch = fMostRecentChar;
0764: if (ch == -1) {
0765: return changeReaders().scanName(fastcheck);
0766: }
0767: if (!fCalledCharPropInit) {
0768: XMLCharacterProperties.initCharFlags();
0769: fCalledCharPropInit = true;
0770: }
0771: int nameOffset = fCurrentOffset;
0772: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
0773: return -1;
0774: while (true) {
0775: if (++fCurrentOffset >= fEndOffset) {
0776: if (oweTrailingSpace) {
0777: oweTrailingSpace = false;
0778: fMostRecentChar = ' ';
0779: } else {
0780: fMostRecentChar = -1;
0781: }
0782: break;
0783: }
0784: ch = fMostRecentChar = fData.charAt(fCurrentOffset);
0785: if (fastcheck == ch)
0786: break;
0787: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
0788: break;
0789: }
0790: int nameIndex = fStringPool.addSymbol(fData.substring(
0791: nameOffset, fCurrentOffset));
0792: return nameIndex;
0793: }
0794:
0795: //
0796: // There are no leading/trailing space checks here because scanContent cannot
0797: // be called on a parameter entity reference value.
0798: //
0799: private int recognizeMarkup(int ch) throws Exception {
0800: if (ch == -1) {
0801: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
0802: }
0803: switch (ch) {
0804: case '?':
0805: loadNextChar();
0806: return XMLEntityHandler.CONTENT_RESULT_START_OF_PI;
0807: case '!':
0808: ch = loadNextChar();
0809: if (ch == -1) {
0810: fCurrentOffset -= 2;
0811: loadNextChar();
0812: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
0813: }
0814: if (ch == '-') {
0815: ch = loadNextChar();
0816: if (ch == -1) {
0817: fCurrentOffset -= 3;
0818: loadNextChar();
0819: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
0820: }
0821: if (ch == '-') {
0822: loadNextChar();
0823: return XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT;
0824: }
0825: break;
0826: }
0827: if (ch == '[') {
0828: for (int i = 0; i < 6; i++) {
0829: ch = loadNextChar();
0830: if (ch == -1) {
0831: fCurrentOffset -= (3 + i);
0832: loadNextChar();
0833: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
0834: }
0835: if (ch != cdata_string[i]) {
0836: return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
0837: }
0838: }
0839: loadNextChar();
0840: return XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT;
0841: }
0842: break;
0843: case '/':
0844: loadNextChar();
0845: return XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG;
0846: default:
0847: return XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT;
0848: }
0849: return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
0850: }
0851:
0852: private int recognizeReference(int ch) throws Exception {
0853: if (ch == -1) {
0854: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
0855: }
0856: //
0857: // [67] Reference ::= EntityRef | CharRef
0858: // [68] EntityRef ::= '&' Name ';'
0859: // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
0860: //
0861: if (ch == '#') {
0862: loadNextChar();
0863: return XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF;
0864: } else {
0865: return XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF;
0866: }
0867: }
0868:
0869: public int scanContent(QName element) throws Exception {
0870: int ch = fMostRecentChar;
0871: if (ch == -1) {
0872: return changeReaders().scanContent(element);
0873: }
0874: int offset = fCurrentOffset;
0875: if (ch < 0x80) {
0876: switch (XMLCharacterProperties.fgAsciiWSCharData[ch]) {
0877: case 0:
0878: ch = loadNextChar();
0879: break;
0880: case 1:
0881: ch = loadNextChar();
0882: if (!fInCDSect) {
0883: return recognizeMarkup(ch);
0884: }
0885: break;
0886: case 2:
0887: ch = loadNextChar();
0888: if (!fInCDSect) {
0889: return recognizeReference(ch);
0890: }
0891: break;
0892: case 3:
0893: ch = loadNextChar();
0894: if (ch == ']' && fCurrentOffset + 1 < fEndOffset
0895: && fData.charAt(fCurrentOffset + 1) == '>') {
0896: loadNextChar();
0897: loadNextChar();
0898: return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
0899: }
0900: break;
0901: case 4:
0902: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
0903: case 5:
0904: do {
0905: ch = loadNextChar();
0906: if (ch == -1) {
0907: callCharDataHandler(offset, fEndOffset, true);
0908: return changeReaders().scanContent(element);
0909: }
0910: } while (ch == 0x20 || ch == 0x0A || ch == 0x0D
0911: || ch == 0x09);
0912: if (ch < 0x80) {
0913: switch (XMLCharacterProperties.fgAsciiCharData[ch]) {
0914: case 0:
0915: ch = loadNextChar();
0916: break;
0917: case 1:
0918: ch = loadNextChar();
0919: if (!fInCDSect) {
0920: callCharDataHandler(offset,
0921: fCurrentOffset - 1, true);
0922: return recognizeMarkup(ch);
0923: }
0924: break;
0925: case 2:
0926: ch = loadNextChar();
0927: if (!fInCDSect) {
0928: callCharDataHandler(offset,
0929: fCurrentOffset - 1, true);
0930: return recognizeReference(ch);
0931: }
0932: break;
0933: case 3:
0934: ch = loadNextChar();
0935: if (ch == ']'
0936: && fCurrentOffset + 1 < fEndOffset
0937: && fData.charAt(fCurrentOffset + 1) == '>') {
0938: callCharDataHandler(offset,
0939: fCurrentOffset - 1, true);
0940: loadNextChar();
0941: loadNextChar();
0942: return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
0943: }
0944: break;
0945: case 4:
0946: callCharDataHandler(offset, fCurrentOffset,
0947: true);
0948: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
0949: }
0950: } else {
0951: if (ch == 0xFFFE || ch == 0xFFFF) {
0952: callCharDataHandler(offset, fCurrentOffset,
0953: true);
0954: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
0955: }
0956: ch = loadNextChar();
0957: }
0958: }
0959: } else {
0960: if (ch == 0xFFFE || ch == 0xFFFF) {
0961: callCharDataHandler(offset, fCurrentOffset, false);
0962: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
0963: }
0964: ch = loadNextChar();
0965: }
0966: while (true) {
0967: if (ch == -1) {
0968: callCharDataHandler(offset, fEndOffset, false);
0969: return changeReaders().scanContent(element);
0970: }
0971: if (ch >= 0x80)
0972: break;
0973: if (XMLCharacterProperties.fgAsciiCharData[ch] != 0)
0974: break;
0975: ch = loadNextChar();
0976: }
0977: while (true) { // REVISIT - EOF check ?
0978: if (ch < 0x80) {
0979: switch (XMLCharacterProperties.fgAsciiCharData[ch]) {
0980: case 0:
0981: ch = loadNextChar();
0982: break;
0983: case 1:
0984: ch = loadNextChar();
0985: if (!fInCDSect) {
0986: callCharDataHandler(offset, fCurrentOffset - 1,
0987: false);
0988: return recognizeMarkup(ch);
0989: }
0990: break;
0991: case 2:
0992: ch = loadNextChar();
0993: if (!fInCDSect) {
0994: callCharDataHandler(offset, fCurrentOffset - 1,
0995: false);
0996: return recognizeReference(ch);
0997: }
0998: break;
0999: case 3:
1000: ch = loadNextChar();
1001: if (ch == ']' && fCurrentOffset + 1 < fEndOffset
1002: && fData.charAt(fCurrentOffset + 1) == '>') {
1003: callCharDataHandler(offset, fCurrentOffset - 1,
1004: false);
1005: loadNextChar();
1006: loadNextChar();
1007: return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
1008: }
1009: break;
1010: case 4:
1011: callCharDataHandler(offset, fCurrentOffset, false);
1012: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
1013: }
1014: } else {
1015: if (ch == 0xFFFE || ch == 0xFFFF) {
1016: callCharDataHandler(offset, fCurrentOffset, false);
1017: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
1018: }
1019: ch = loadNextChar();
1020: }
1021: if (ch == -1) {
1022: callCharDataHandler(offset, fCurrentOffset, false);
1023: return changeReaders().scanContent(element);
1024: }
1025: }
1026: }
1027:
1028: //
1029: //
1030: //
1031: private void callCharDataHandler(int offset, int endOffset,
1032: boolean isWhitespace) throws Exception {
1033: int length = endOffset - offset;
1034: if (!fSendCharDataAsCharArray) {
1035: int stringIndex = addString(offset, length);
1036: if (isWhitespace)
1037: fCharDataHandler.processWhitespace(stringIndex);
1038: else
1039: fCharDataHandler.processCharacters(stringIndex);
1040: return;
1041: }
1042: if (isWhitespace)
1043: fCharDataHandler.processWhitespace(fData.toCharArray(),
1044: offset, length);
1045: else
1046: fCharDataHandler.processCharacters(fData.toCharArray(),
1047: offset, length);
1048: }
1049:
1050: //
1051: //
1052: //
1053: private static final char[] cdata_string = { 'C', 'D', 'A', 'T',
1054: 'A', '[' };
1055: //
1056: //
1057: //
1058: private StringPool fStringPool = null;
1059: private String fData = null;
1060: private int fEndOffset;
1061: private boolean hadTrailingSpace = false;
1062: private boolean oweTrailingSpace = false;
1063: private int fMostRecentChar;
1064: private StringReader fNextFreeReader = null;
1065: private static StringReader fgFreeReaders = null;
1066: private boolean fCalledCharPropInit = false;
1067: }
|