0001: /*
0002: * The Apache Software License, Version 1.1
0003: *
0004: *
0005: * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
0006: * reserved.
0007: *
0008: * Redistribution and use in source and binary forms, with or without
0009: * modification, are permitted provided that the following conditions
0010: * are met:
0011: *
0012: * 1. Redistributions of source code must retain the above copyright
0013: * notice, this list of conditions and the following disclaimer.
0014: *
0015: * 2. Redistributions in binary form must reproduce the above copyright
0016: * notice, this list of conditions and the following disclaimer in
0017: * the documentation and/or other materials provided with the
0018: * distribution.
0019: *
0020: * 3. The end-user documentation included with the redistribution,
0021: * if any, must include the following acknowledgment:
0022: * "This product includes software developed by the
0023: * Apache Software Foundation (http://www.apache.org/)."
0024: * Alternately, this acknowledgment may appear in the software itself,
0025: * if and wherever such third-party acknowledgments normally appear.
0026: *
0027: * 4. The names "Xerces" and "Apache Software Foundation" must
0028: * not be used to endorse or promote products derived from this
0029: * software without prior written permission. For written
0030: * permission, please contact apache@apache.org.
0031: *
0032: * 5. Products derived from this software may not be called "Apache",
0033: * nor may "Apache" appear in their name, without prior written
0034: * permission of the Apache Software Foundation.
0035: *
0036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
0040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
0043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
0045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
0046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0047: * SUCH DAMAGE.
0048: * ====================================================================
0049: *
0050: * This software consists of voluntary contributions made by many
0051: * individuals on behalf of the Apache Software Foundation and was
0052: * originally based on software copyright (c) 1999, International
0053: * Business Machines, Inc., http://www.apache.org. For more
0054: * information on the Apache Software Foundation, please see
0055: * <http://www.apache.org/>.
0056: */
0057:
0058: package org.apache.xerces.readers;
0059:
0060: import org.apache.xerces.framework.XMLErrorReporter;
0061: import org.apache.xerces.utils.CharDataChunk;
0062: import org.apache.xerces.utils.QName;
0063: import org.apache.xerces.utils.StringHasher;
0064: import org.apache.xerces.utils.StringPool;
0065: import org.apache.xerces.utils.XMLCharacterProperties;
0066: import org.apache.xerces.utils.ImplementationMessages;
0067: import org.xml.sax.SAXParseException;
0068: import java.util.Vector;
0069:
0070: /**
0071: * An abstract class for readers that process input data as characters.
0072: * <p>
0073: * This is the base class of the CharReader and UTF8CharReader classes.
0074: * The CharReader classes fills in the "chunks" of data to process from
0075: * a character stream (java.io.Reader). The UTF8CharReader performs
0076: * its own UTF-8 transcoding to fill in the data from an InputStream.
0077: *
0078: * @version $Id: AbstractCharReader.java,v 1.9 2001/02/01 09:58:21 andyc Exp $
0079: */
0080: abstract class AbstractCharReader extends XMLEntityReader {
0081:
0082: /**
0083: * Subclass constructor
0084: *
0085: * @param entityHandler The entity handler.
0086: * @param errorReporter The error reporter.
0087: * @param sendCharDataAsCharArray true if char data should be reported using
0088: * char arrays instead of string handles.
0089: * @param stringPool The string pool.
0090: */
0091: protected AbstractCharReader(XMLEntityHandler entityHandler,
0092: XMLErrorReporter errorReporter,
0093: boolean sendCharDataAsCharArray, StringPool stringPool) {
0094: super (entityHandler, errorReporter, sendCharDataAsCharArray);
0095: fStringPool = stringPool;
0096: fCurrentChunk = CharDataChunk.createChunk(fStringPool, null);
0097: }
0098:
0099: //
0100: // These instance variables are referenced and updated directly
0101: // by our subclass from the fillCurrentChunk() method.
0102: //
0103: protected CharDataChunk fCurrentChunk = null;
0104: protected int fCurrentIndex = 0;
0105: protected char[] fMostRecentData = null;
0106: protected int fMostRecentChar = 0;
0107: protected int fLength = 0;
0108:
0109: /**
0110: * Fill the current chunk the next buffer worth of data.
0111: *
0112: * This method should replace any 0xD,0xA sequence with a single
0113: * 0xA character, and replace single 0xD characters with a 0xA
0114: * character. This is described in the spec under section 2.11,
0115: * "End-of-Line Handling".
0116: *
0117: * @return The value of the first character available for processing.
0118: * @exception java.lang.Exception
0119: */
0120: protected abstract int fillCurrentChunk() throws Exception;
0121:
0122: /**
0123: * Delay reporting an error message.
0124: *
0125: * If there is an error detected in the underlying input stream during
0126: * the fillCurrentChunk method, the error is described here and will be
0127: * reported when we reach that offset during normal processing. The
0128: * subclass should place a character with a value of zero at that offset,
0129: * which will be detected here as an invalid character. When the invalid
0130: * character is scanned, we will generate the deferred exception.
0131: *
0132: * @param errorCode the errorCode to report
0133: * @param args an array of arguments needed to generate a good error message
0134: * @param offset the position in the reader where the error occured
0135: */
0136: protected void deferException(int errorCode, Object[] args,
0137: int offset) {
0138: if (fDeferredErrors == null)
0139: fDeferredErrors = new Vector();
0140: DeferredError de = new DeferredError(errorCode, args, offset);
0141: fDeferredErrors.addElement(de);
0142: }
0143:
0144: /**
0145: * Change readers at end of input.
0146: *
0147: * We override our superclass method to release the final chunk
0148: * of the input data before handing off to the next reader.
0149: *
0150: * @return The next reader used to continue processing the document.
0151: */
0152: protected XMLEntityHandler.EntityReader changeReaders()
0153: throws Exception {
0154: XMLEntityHandler.EntityReader nextReader = super
0155: .changeReaders();
0156: fCurrentChunk.releaseChunk();
0157: fCurrentChunk = null;
0158: return nextReader;
0159: }
0160:
0161: //
0162: // XMLEntityHandler.EntityReader implementation
0163: //
0164: // The first five methods of the interface are implemented
0165: // in the XMLEntityHandler base class for us, namely
0166: //
0167: // public int currentOffset();
0168: // public int getLineNumber();
0169: // public int getColumnNumber();
0170: // public void setInCDSect(boolean inCDSect);
0171: // public boolean getInCDSect();
0172: //
0173:
0174: /**
0175: * Append the characters processed by this reader associated with <code>offset</code> and
0176: * <code>length</code> to the <code>CharBuffer</code>.
0177: *
0178: * @param charBuffer The <code>CharBuffer</code> to append the characters to.
0179: * @param offset The offset within this reader where the copy should start.
0180: * @param length The length within this reader where the copy should stop.
0181: */
0182: public void append(XMLEntityHandler.CharBuffer charBuffer,
0183: int offset, int length) {
0184: fCurrentChunk.append(charBuffer, offset, length);
0185: }
0186:
0187: /**
0188: * Add a string to the <code>StringPool</code> from the characters scanned using this
0189: * reader as described by <code>offset</code> and <code>length</code>.
0190: *
0191: * @param offset The offset within this reader where the characters start.
0192: * @param length The length within this reader where the characters end.
0193: * @return The <code>StringPool</code> handle for the string.
0194: */
0195: public int addString(int offset, int length) {
0196: if (length == 0)
0197: return 0;
0198: return fCurrentChunk.addString(offset, length);
0199: }
0200:
0201: /**
0202: * Add a symbol to the <code>StringPool</code> from the characters scanned using this
0203: * reader as described by <code>offset</code> and <code>length</code>.
0204: *
0205: * @param offset The offset within this reader where the characters start.
0206: * @param length The length within this reader where the characters end.
0207: * @return The <code>StringPool</code> handle for the symbol.
0208: */
0209: public int addSymbol(int offset, int length) {
0210: if (length == 0)
0211: return 0;
0212: return fCurrentChunk.addSymbol(offset, length, 0);
0213: }
0214:
0215: /**
0216: *
0217: */
0218: public boolean lookingAtChar(char chr, boolean skipPastChar)
0219: throws Exception {
0220: int ch = fMostRecentChar;
0221: if (ch != chr) {
0222: if (ch == 0) {
0223: if (atEOF(fCurrentOffset + 1)) {
0224: return changeReaders().lookingAtChar(chr,
0225: skipPastChar);
0226: }
0227: }
0228: return false;
0229: }
0230: if (skipPastChar) {
0231: fCharacterCounter++;
0232: fCurrentOffset++;
0233: if (++fCurrentIndex == CharDataChunk.CHUNK_SIZE)
0234: slowLoadNextChar();
0235: else
0236: fMostRecentChar = fMostRecentData[fCurrentIndex] & 0xFFFF;
0237: }
0238: return true;
0239: }
0240:
0241: /**
0242: *
0243: */
0244: public boolean lookingAtValidChar(boolean skipPastChar)
0245: throws Exception {
0246: int ch = fMostRecentChar;
0247: if (ch < 0xD800) {
0248: if (ch >= 0x20 || ch == 0x09) {
0249: if (skipPastChar) {
0250: fCharacterCounter++;
0251: loadNextChar();
0252: }
0253: return true;
0254: }
0255: if (ch == 0x0A) {
0256: if (skipPastChar) {
0257: fLinefeedCounter++;
0258: fCharacterCounter = 1;
0259: loadNextChar();
0260: }
0261: return true;
0262: }
0263: if (ch == 0) {
0264: if (atEOF(fCurrentOffset + 1)) {
0265: return changeReaders().lookingAtValidChar(
0266: skipPastChar);
0267: }
0268: }
0269: return false;
0270: }
0271: if (ch > 0xFFFD) {
0272: return false;
0273: }
0274: if (ch < 0xDC00) {
0275: CharDataChunk savedChunk = fCurrentChunk;
0276: int savedIndex = fCurrentIndex;
0277: int savedOffset = fCurrentOffset;
0278: ch = loadNextChar();
0279: boolean valid = (ch >= 0xDC00 && ch < 0xE000);
0280: if (!valid || !skipPastChar) {
0281: fCurrentChunk = savedChunk;
0282: fCurrentIndex = savedIndex;
0283: fCurrentOffset = savedOffset;
0284: fMostRecentData = savedChunk.toCharArray();
0285: fMostRecentChar = fMostRecentData[savedIndex] & 0xFFFF;
0286: return valid;
0287: }
0288: } else if (ch < 0xE000) {
0289: return false;
0290: }
0291: if (skipPastChar) {
0292: fCharacterCounter++;
0293: loadNextChar();
0294: }
0295: return true;
0296: }
0297:
0298: /**
0299: *
0300: */
0301: public boolean lookingAtSpace(boolean skipPastChar)
0302: throws Exception {
0303: int ch = fMostRecentChar;
0304: if (ch > 0x20)
0305: return false;
0306: if (ch == 0x20 || ch == 0x09) {
0307: if (!skipPastChar)
0308: return true;
0309: fCharacterCounter++;
0310: } else if (ch == 0x0A) {
0311: if (!skipPastChar)
0312: return true;
0313: fLinefeedCounter++;
0314: fCharacterCounter = 1;
0315: } else {
0316: if (ch == 0) { // REVISIT - should we be checking this here ?
0317: if (atEOF(fCurrentOffset + 1)) {
0318: return changeReaders().lookingAtSpace(skipPastChar);
0319: }
0320: }
0321: return false;
0322: }
0323: fCurrentOffset++;
0324: if (++fCurrentIndex == CharDataChunk.CHUNK_SIZE)
0325: slowLoadNextChar();
0326: else
0327: fMostRecentChar = fMostRecentData[fCurrentIndex] & 0xFFFF;
0328: return true;
0329: }
0330:
0331: /**
0332: *
0333: */
0334: public void skipToChar(char chr) throws Exception {
0335: //
0336: // REVISIT - this will skip invalid characters without reporting them.
0337: //
0338: int ch = fMostRecentChar;
0339: while (true) {
0340: if (ch == chr)
0341: return;
0342: if (ch == 0) {
0343: if (atEOF(fCurrentOffset + 1)) {
0344: changeReaders().skipToChar(chr);
0345: return;
0346: }
0347: fCharacterCounter++;
0348: } else if (ch == 0x0A) {
0349: fLinefeedCounter++;
0350: fCharacterCounter = 1;
0351: } else if (ch >= 0xD800 && ch < 0xDC00) {
0352: fCharacterCounter++;
0353: ch = loadNextChar();
0354: if (ch < 0xDC00 || ch >= 0xE000)
0355: continue;
0356: } else
0357: fCharacterCounter++;
0358: ch = loadNextChar();
0359: }
0360: }
0361:
0362: /**
0363: *
0364: */
0365: public void skipPastSpaces() throws Exception {
0366: int ch = fMostRecentChar;
0367: while (true) {
0368: if (ch == 0x20 || ch == 0x09) {
0369: fCharacterCounter++;
0370: } else if (ch == 0x0A) {
0371: fLinefeedCounter++;
0372: fCharacterCounter = 1;
0373: } else {
0374: if (ch == 0 && atEOF(fCurrentOffset + 1))
0375: changeReaders().skipPastSpaces();
0376: return;
0377: }
0378: ch = loadNextChar();
0379: }
0380: }
0381:
0382: /**
0383: *
0384: */
0385: public void skipPastName(char fastcheck) throws Exception {
0386: int ch = fMostRecentChar;
0387: if (ch < 0x80) {
0388: if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0)
0389: return;
0390: } else {
0391: if (!fCalledCharPropInit) {
0392: XMLCharacterProperties.initCharFlags();
0393: fCalledCharPropInit = true;
0394: }
0395: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
0396: return;
0397: }
0398: while (true) {
0399: fCharacterCounter++;
0400: ch = loadNextChar();
0401: if (fastcheck == ch)
0402: return;
0403: if (ch < 0x80) {
0404: if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
0405: return;
0406: } else {
0407: if (!fCalledCharPropInit) {
0408: XMLCharacterProperties.initCharFlags();
0409: fCalledCharPropInit = true;
0410: }
0411: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
0412: return;
0413: }
0414: }
0415: }
0416:
0417: /**
0418: *
0419: */
0420: public void skipPastNmtoken(char fastcheck) throws Exception {
0421: int ch = fMostRecentChar;
0422: while (true) {
0423: if (fastcheck == ch)
0424: return;
0425: if (ch < 0x80) {
0426: if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
0427: return;
0428: } else {
0429: if (!fCalledCharPropInit) {
0430: XMLCharacterProperties.initCharFlags();
0431: fCalledCharPropInit = true;
0432: }
0433: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
0434: return;
0435: }
0436: fCharacterCounter++;
0437: ch = loadNextChar();
0438: }
0439: }
0440:
0441: /**
0442: *
0443: */
0444: public boolean skippedString(char[] s) throws Exception {
0445: int length = s.length;
0446: char[] data = fMostRecentData;
0447: int index = fCurrentIndex;
0448: if (index + length <= CharDataChunk.CHUNK_SIZE) {
0449: for (int i = 0; i < length; i++) {
0450: if (data[index++] != s[i])
0451: return false;
0452: }
0453: fCharacterCounter += length;
0454: fCurrentOffset += length;
0455: fCurrentIndex = index;
0456: if (index == CharDataChunk.CHUNK_SIZE)
0457: slowLoadNextChar();
0458: else
0459: fMostRecentChar = data[index] & 0xFFFF;
0460: return true;
0461: }
0462: CharDataChunk dataChunk = fCurrentChunk;
0463: int offset = fCurrentOffset;
0464: int savedIndex = index;
0465: int i = 0;
0466: while (index < CharDataChunk.CHUNK_SIZE) {
0467: if (data[index++] != s[i++])
0468: return false;
0469: }
0470: slowLoadNextChar();
0471: data = fMostRecentData;
0472: index = 0;
0473: while (i < length) {
0474: if (data[index++] != s[i++]) {
0475: fCurrentChunk = dataChunk;
0476: fCurrentIndex = savedIndex;
0477: fCurrentOffset = offset;
0478: fMostRecentData = dataChunk.toCharArray();
0479: fMostRecentChar = fMostRecentData[savedIndex] & 0xFFFF;
0480: return false;
0481: }
0482: }
0483: fCharacterCounter += length;
0484: fCurrentOffset += length;
0485: fCurrentIndex = index;
0486: if (index == CharDataChunk.CHUNK_SIZE)
0487: slowLoadNextChar();
0488: else
0489: fMostRecentChar = data[index] & 0xFFFF;
0490: return true;
0491: }
0492:
0493: /**
0494: *
0495: */
0496: public int scanInvalidChar() throws Exception {
0497: int ch = fMostRecentChar;
0498: if (ch == 0x0A) {
0499: fLinefeedCounter++;
0500: fCharacterCounter = 1;
0501: loadNextChar();
0502: } else if (ch == 0) {
0503: if (atEOF(fCurrentOffset + 1)) {
0504: return changeReaders().scanInvalidChar();
0505: }
0506: if (fDeferredErrors != null) {
0507: for (int i = 0; i < fDeferredErrors.size(); i++) {
0508: DeferredError de = (DeferredError) fDeferredErrors
0509: .elementAt(i);
0510: if (de.offset == fCurrentIndex) {
0511: fErrorReporter
0512: .reportError(
0513: fErrorReporter.getLocator(),
0514: ImplementationMessages.XERCES_IMPLEMENTATION_DOMAIN,
0515: de.errorCode,
0516: 0,
0517: de.args,
0518: XMLErrorReporter.ERRORTYPE_FATAL_ERROR);
0519: fDeferredErrors.removeElementAt(i);
0520: fCharacterCounter++;
0521: loadNextChar();
0522: return -1;
0523: }
0524: }
0525: }
0526: fCharacterCounter++;
0527: loadNextChar();
0528: } else {
0529: fCharacterCounter++;
0530: if (ch >= 0xD800 && ch < 0xDC00) {
0531: int ch2 = loadNextChar();
0532: if (ch2 >= 0xDC00 && ch2 < 0xE000) {
0533: ch = ((ch - 0xD800) << 10) + (ch2 - 0xDC00)
0534: + 0x10000;
0535: loadNextChar();
0536: }
0537: } else
0538: loadNextChar();
0539: }
0540: return ch;
0541: }
0542:
0543: /**
0544: *
0545: */
0546: public int scanCharRef(boolean hex) throws Exception {
0547: int ch = fMostRecentChar;
0548: if (ch == 0) {
0549: if (atEOF(fCurrentOffset + 1)) {
0550: return changeReaders().scanCharRef(hex);
0551: }
0552: return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
0553: }
0554: int num = 0;
0555: if (hex) {
0556: if (ch > 'f'
0557: || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
0558: return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
0559: num = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
0560: } else {
0561: if (ch < '0' || ch > '9')
0562: return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
0563: num = ch - '0';
0564: }
0565: fCharacterCounter++;
0566: loadNextChar();
0567: boolean toobig = false;
0568: while (true) {
0569: ch = fMostRecentChar;
0570: if (ch == 0)
0571: break;
0572: if (hex) {
0573: if (ch > 'f'
0574: || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
0575: break;
0576: } else {
0577: if (ch < '0' || ch > '9')
0578: break;
0579: }
0580: fCharacterCounter++;
0581: loadNextChar();
0582: if (hex) {
0583: int dig = ch
0584: - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
0585: num = (num << 4) + dig;
0586: } else {
0587: int dig = ch - '0';
0588: num = (num * 10) + dig;
0589: }
0590: if (num > 0x10FFFF) {
0591: toobig = true;
0592: num = 0;
0593: }
0594: }
0595: if (ch != ';')
0596: return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED;
0597: fCharacterCounter++;
0598: loadNextChar();
0599: if (toobig)
0600: return XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE;
0601: return num;
0602: }
0603:
0604: /**
0605: *
0606: */
0607: public int scanStringLiteral() throws Exception {
0608: boolean single;
0609: if (!(single = lookingAtChar('\'', true))
0610: && !lookingAtChar('\"', true)) {
0611: return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
0612: }
0613: int offset = fCurrentOffset;
0614: char qchar = single ? '\'' : '\"';
0615: while (!lookingAtChar(qchar, false)) {
0616: if (!lookingAtValidChar(true)) {
0617: return XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR;
0618: }
0619: }
0620: int stringIndex = addString(offset, fCurrentOffset - offset);
0621: lookingAtChar(qchar, true); // move past qchar
0622: return stringIndex;
0623: }
0624:
0625: //
0626: // [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
0627: // | "'" ([^<&'] | Reference)* "'"
0628: //
0629: /**
0630: *
0631: */
0632: public int scanAttValue(char qchar, boolean asSymbol)
0633: throws Exception {
0634: int offset = fCurrentOffset;
0635: while (true) {
0636: if (lookingAtChar(qchar, false)) {
0637: break;
0638: }
0639: if (lookingAtChar(' ', true)) {
0640: continue;
0641: }
0642: if (lookingAtSpace(false)) {
0643: return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX;
0644: }
0645: if (lookingAtChar('&', false)) {
0646: return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX;
0647: }
0648: if (lookingAtChar('<', false)) {
0649: return XMLEntityHandler.ATTVALUE_RESULT_LESSTHAN;
0650: }
0651: if (!lookingAtValidChar(true)) {
0652: return XMLEntityHandler.ATTVALUE_RESULT_INVALID_CHAR;
0653: }
0654: }
0655: int result = asSymbol ? addSymbol(offset, fCurrentOffset
0656: - offset) : addString(offset, fCurrentOffset - offset);
0657: lookingAtChar(qchar, true);
0658: return result;
0659: }
0660:
0661: //
0662: // [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
0663: // | "'" ([^%&'] | PEReference | Reference)* "'"
0664: //
0665: /**
0666: *
0667: */
0668: public int scanEntityValue(int qchar, boolean createString)
0669: throws Exception {
0670: int offset = fCurrentOffset;
0671: while (true) {
0672: if (atEOF(fCurrentOffset + 1)) {
0673: changeReaders();
0674: return XMLEntityHandler.ENTITYVALUE_RESULT_END_OF_INPUT;
0675: }
0676: if (qchar != -1 && lookingAtChar((char) qchar, false)) {
0677: if (!createString)
0678: return XMLEntityHandler.ENTITYVALUE_RESULT_FINISHED;
0679: break;
0680: }
0681: if (lookingAtChar('&', false)) {
0682: return XMLEntityHandler.ENTITYVALUE_RESULT_REFERENCE;
0683: }
0684: if (lookingAtChar('%', false)) {
0685: return XMLEntityHandler.ENTITYVALUE_RESULT_PEREF;
0686: }
0687: if (!lookingAtValidChar(true)) {
0688: return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR;
0689: }
0690: }
0691: int result = addString(offset, fCurrentOffset - offset);
0692: lookingAtChar((char) qchar, true);
0693: return result;
0694: }
0695:
0696: /**
0697: *
0698: */
0699: public int scanName(char fastcheck) throws Exception {
0700: int ch = fMostRecentChar;
0701: if (ch < 0x80) {
0702: if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0)
0703: return -1;
0704: } else {
0705: if (!fCalledCharPropInit) {
0706: XMLCharacterProperties.initCharFlags();
0707: fCalledCharPropInit = true;
0708: }
0709: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
0710: return -1;
0711: }
0712: int offset = fCurrentOffset;
0713: int index = fCurrentIndex;
0714: char[] data = fMostRecentData;
0715: if (++index == CharDataChunk.CHUNK_SIZE) {
0716: slowLoadNextChar();
0717: index = 0;
0718: data = fMostRecentData;
0719: }
0720: fCharacterCounter++;
0721: fCurrentOffset++;
0722: int hashcode = 0;
0723: while (true) {
0724: hashcode = StringHasher.hashChar(hashcode, ch);
0725: ch = data[index] & 0xFFFF;
0726: if (fastcheck == ch)
0727: break;
0728: if (ch < 0x80) {
0729: if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
0730: break;
0731: } else {
0732: if (!fCalledCharPropInit) {
0733: XMLCharacterProperties.initCharFlags();
0734: fCalledCharPropInit = true;
0735: }
0736: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
0737: break;
0738: }
0739: if (++index == CharDataChunk.CHUNK_SIZE) {
0740: slowLoadNextChar();
0741: index = 0;
0742: data = fMostRecentData;
0743: }
0744: fCharacterCounter++;
0745: fCurrentOffset++;
0746: }
0747: fCurrentIndex = index;
0748: fMostRecentChar = ch;
0749: hashcode = StringHasher.finishHash(hashcode);
0750: int length = fCurrentOffset - offset;
0751: int nameIndex = fCurrentChunk.addSymbol(offset, length,
0752: hashcode);
0753: return nameIndex;
0754: }
0755:
0756: /**
0757: *
0758: */
0759: public boolean scanExpectedName(char fastcheck,
0760: StringPool.CharArrayRange expectedName) throws Exception {
0761: char[] expected = expectedName.chars;
0762: int offset = expectedName.offset;
0763: int len = expectedName.length;
0764: int ch = fMostRecentChar;
0765: for (int i = 0; i < len; i++) {
0766: if (ch != expected[offset++]) {
0767: skipPastNmtoken(fastcheck);
0768: return false;
0769: }
0770: fCharacterCounter++;
0771: fCurrentOffset++;
0772: if (++fCurrentIndex == CharDataChunk.CHUNK_SIZE)
0773: ch = slowLoadNextChar();
0774: else
0775: ch = (fMostRecentChar = fMostRecentData[fCurrentIndex] & 0xFFFF);
0776: }
0777: if (ch == fastcheck)
0778: return true;
0779: if (ch < 0x80) {
0780: if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
0781: return true;
0782: } else {
0783: if (!fCalledCharPropInit) {
0784: XMLCharacterProperties.initCharFlags();
0785: fCalledCharPropInit = true;
0786: }
0787: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
0788: return true;
0789: }
0790: skipPastNmtoken(fastcheck);
0791: return false;
0792: }
0793:
0794: /**
0795: *
0796: */
0797: public void scanQName(char fastcheck, QName qname) throws Exception {
0798: int ch = fMostRecentChar;
0799: if (ch < 0x80) {
0800: if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0) {
0801: qname.clear();
0802: return;
0803: }
0804: if (ch == ':') {
0805: qname.clear();
0806: return;
0807: }
0808: } else {
0809: if (!fCalledCharPropInit) {
0810: XMLCharacterProperties.initCharFlags();
0811: fCalledCharPropInit = true;
0812: }
0813: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) {
0814: qname.clear();
0815: return;
0816: }
0817: }
0818: int offset = fCurrentOffset;
0819: int index = fCurrentIndex;
0820: char[] data = fMostRecentData;
0821: if (++index == CharDataChunk.CHUNK_SIZE) {
0822: slowLoadNextChar();
0823: index = 0;
0824: data = fMostRecentData;
0825: }
0826: fCharacterCounter++;
0827: fCurrentOffset++;
0828: int hashcode = 0;
0829: int prefixend = -1;
0830: while (true) {
0831: hashcode = StringHasher.hashChar(hashcode, ch);
0832: ch = data[index] & 0xFFFF;
0833: if (fastcheck == ch)
0834: break;
0835: if (ch < 0x80) {
0836: if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
0837: break;
0838: if (ch == ':') {
0839: if (prefixend != -1)
0840: break;
0841: prefixend = fCurrentOffset;
0842: //
0843: // We need to peek ahead one character. If the next character is not a
0844: // valid initial name character, or is another colon, then we cannot meet
0845: // both the Prefix and LocalPart productions for the QName production,
0846: // which means that there is no Prefix and we need to terminate the QName
0847: // at the first colon.
0848: //
0849: if (index + 1 == CharDataChunk.CHUNK_SIZE) {
0850: CharDataChunk savedChunk = fCurrentChunk;
0851: int savedOffset = fCurrentOffset;
0852: ch = slowLoadNextChar();
0853: fCurrentChunk = savedChunk;
0854: fCurrentOffset = savedOffset;
0855: fMostRecentData = savedChunk.toCharArray();
0856: } else
0857: ch = data[index + 1] & 0xFFFF;
0858: boolean lpok = true;
0859: if (ch < 0x80) {
0860: if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0
0861: || ch == ':')
0862: lpok = false;
0863: } else {
0864: if (!fCalledCharPropInit) {
0865: XMLCharacterProperties.initCharFlags();
0866: fCalledCharPropInit = true;
0867: }
0868: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
0869: lpok = false;
0870: }
0871: ch = ':';
0872: if (!lpok) {
0873: prefixend = -1;
0874: break;
0875: }
0876: }
0877: } else {
0878: if (!fCalledCharPropInit) {
0879: XMLCharacterProperties.initCharFlags();
0880: fCalledCharPropInit = true;
0881: }
0882: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
0883: break;
0884: }
0885: if (++index == CharDataChunk.CHUNK_SIZE) {
0886: slowLoadNextChar();
0887: index = 0;
0888: data = fMostRecentData;
0889: }
0890: fCharacterCounter++;
0891: fCurrentOffset++;
0892: }
0893: fCurrentIndex = index;
0894: fMostRecentChar = ch;
0895: hashcode = StringHasher.finishHash(hashcode);
0896: int length = fCurrentOffset - offset;
0897: qname.rawname = fCurrentChunk.addSymbol(offset, length,
0898: hashcode);
0899: qname.prefix = prefixend == -1 ? -1 : addSymbol(offset,
0900: prefixend - offset);
0901: qname.localpart = prefixend == -1 ? qname.rawname : addSymbol(
0902: prefixend + 1, fCurrentOffset - (prefixend + 1));
0903: qname.uri = StringPool.EMPTY_STRING;
0904:
0905: } // scanQName(char,QName)
0906:
0907: //
0908: // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
0909: //
0910: /**
0911: *
0912: */
0913: public int scanContent(QName element) throws Exception {
0914: if (fCallClearPreviousChunk
0915: && fCurrentChunk.clearPreviousChunk())
0916: fCallClearPreviousChunk = false;
0917: int charDataOffset = fCurrentOffset;
0918: int ch = fMostRecentChar;
0919: if (ch < 0x80) {
0920: switch (XMLCharacterProperties.fgAsciiWSCharData[ch]) {
0921: case 0:
0922: fCharacterCounter++;
0923: ch = loadNextChar();
0924: break;
0925: case 1: // '<'
0926: fCharacterCounter++;
0927: ch = loadNextChar();
0928: if (!fInCDSect) {
0929: return recognizeMarkup(ch);
0930: }
0931: break;
0932: case 2: // '&'
0933: fCharacterCounter++;
0934: ch = loadNextChar();
0935: if (!fInCDSect) {
0936: return recognizeReference(ch);
0937: }
0938: break;
0939: case 3: // ']'
0940: fCharacterCounter++;
0941: ch = loadNextChar();
0942: if (ch != ']')
0943: break;
0944: if (fCurrentIndex + 1 == CharDataChunk.CHUNK_SIZE) {
0945: CharDataChunk dataChunk = fCurrentChunk;
0946: int index = fCurrentIndex;
0947: int offset = fCurrentOffset;
0948: if (loadNextChar() != '>') {
0949: fCurrentChunk = dataChunk;
0950: fCurrentIndex = index;
0951: fCurrentOffset = offset;
0952: fMostRecentData = dataChunk.toCharArray();
0953: fMostRecentChar = ']';
0954: break;
0955: }
0956: } else {
0957: if (fMostRecentData[fCurrentIndex + 1] != '>')
0958: break;
0959: fCurrentIndex++;
0960: fCurrentOffset++;
0961: }
0962: loadNextChar();
0963: fCharacterCounter += 2;
0964: return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
0965: case 4: // invalid char
0966: if (ch == 0 && atEOF(fCurrentOffset + 1)) {
0967: changeReaders();
0968: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; // REVISIT - not quite...
0969: }
0970: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
0971: case 5:
0972: do {
0973: if (ch == 0x0A) {
0974: fLinefeedCounter++;
0975: fCharacterCounter = 1;
0976: } else
0977: fCharacterCounter++;
0978: ch = loadNextChar();
0979: } while (ch == 0x20 || ch == 0x09 || ch == 0x0A);
0980: if (ch < 0x80) {
0981: switch (XMLCharacterProperties.fgAsciiCharData[ch]) {
0982: case 0:
0983: fCharacterCounter++;
0984: ch = loadNextChar();
0985: break;
0986: case 1: // '<'
0987: if (!fInCDSect) {
0988: callCharDataHandler(charDataOffset,
0989: fCurrentOffset, true);
0990: fCharacterCounter++;
0991: ch = loadNextChar();
0992: return recognizeMarkup(ch);
0993: }
0994: fCharacterCounter++;
0995: ch = loadNextChar();
0996: break;
0997: case 2: // '&'
0998: if (!fInCDSect) {
0999: callCharDataHandler(charDataOffset,
1000: fCurrentOffset, true);
1001: fCharacterCounter++;
1002: ch = loadNextChar();
1003: return recognizeReference(ch);
1004: }
1005: fCharacterCounter++;
1006: ch = loadNextChar();
1007: break;
1008: case 3: // ']'
1009: int endOffset = fCurrentOffset;
1010: ch = loadNextChar();
1011: if (ch != ']') {
1012: fCharacterCounter++;
1013: break;
1014: }
1015: if (fCurrentIndex + 1 == CharDataChunk.CHUNK_SIZE) {
1016: CharDataChunk dataChunk = fCurrentChunk;
1017: int index = fCurrentIndex;
1018: int offset = fCurrentOffset;
1019: if (loadNextChar() != '>') {
1020: fCurrentChunk = dataChunk;
1021: fCurrentIndex = index;
1022: fCurrentOffset = offset;
1023: fMostRecentData = dataChunk
1024: .toCharArray();
1025: fMostRecentChar = ']';
1026: fCharacterCounter++;
1027: break;
1028: }
1029: } else {
1030: if (fMostRecentData[fCurrentIndex + 1] != '>') {
1031: fCharacterCounter++;
1032: break;
1033: }
1034: fCurrentIndex++;
1035: fCurrentOffset++;
1036: }
1037: loadNextChar();
1038: callCharDataHandler(charDataOffset, endOffset,
1039: true);
1040: fCharacterCounter += 3;
1041: return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
1042: case 4: // invalid char
1043: callCharDataHandler(charDataOffset,
1044: fCurrentOffset, true);
1045: if (ch == 0 && atEOF(fCurrentOffset + 1)) {
1046: changeReaders();
1047: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; // REVISIT - not quite...
1048: }
1049: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
1050: }
1051: } else if (!skipMultiByteCharData(ch)) {
1052: callCharDataHandler(charDataOffset, fCurrentOffset,
1053: true);
1054: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
1055: }
1056: break;
1057: }
1058: } else if (!skipMultiByteCharData(ch)) {
1059: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
1060: }
1061: ch = skipAsciiCharData();
1062: while (true) {
1063: if (ch < 0x80) {
1064: switch (XMLCharacterProperties.fgAsciiCharData[ch]) {
1065: case 0:
1066: fCharacterCounter++;
1067: ch = loadNextChar();
1068: break;
1069: case 1: // '<'
1070: if (!fInCDSect) {
1071: callCharDataHandler(charDataOffset,
1072: fCurrentOffset, false);
1073: fCharacterCounter++;
1074: ch = loadNextChar();
1075: return recognizeMarkup(ch);
1076: }
1077: fCharacterCounter++;
1078: ch = loadNextChar();
1079: break;
1080: case 2: // '&'
1081: if (!fInCDSect) {
1082: callCharDataHandler(charDataOffset,
1083: fCurrentOffset, false);
1084: fCharacterCounter++;
1085: ch = loadNextChar();
1086: return recognizeReference(ch);
1087: }
1088: fCharacterCounter++;
1089: ch = loadNextChar();
1090: break;
1091: case 3: // ']'
1092: int endOffset = fCurrentOffset;
1093: ch = loadNextChar();
1094: if (ch != ']') {
1095: fCharacterCounter++;
1096: break;
1097: }
1098: if (fCurrentIndex + 1 == CharDataChunk.CHUNK_SIZE) {
1099: CharDataChunk dataChunk = fCurrentChunk;
1100: int index = fCurrentIndex;
1101: int offset = fCurrentOffset;
1102: if (loadNextChar() != '>') {
1103: fCurrentChunk = dataChunk;
1104: fCurrentIndex = index;
1105: fCurrentOffset = offset;
1106: fMostRecentData = dataChunk.toCharArray();
1107: fMostRecentChar = ']';
1108: fCharacterCounter++;
1109: break;
1110: }
1111: } else {
1112: if (fMostRecentData[fCurrentIndex + 1] != '>') {
1113: fCharacterCounter++;
1114: break;
1115: }
1116: fCurrentIndex++;
1117: fCurrentOffset++;
1118: }
1119: loadNextChar();
1120: callCharDataHandler(charDataOffset, endOffset,
1121: false);
1122: fCharacterCounter += 3;
1123: return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
1124: case 4: // invalid char
1125: if (ch == 0x0A) {
1126: fLinefeedCounter++;
1127: fCharacterCounter = 1;
1128: ch = loadNextChar();
1129: break;
1130: }
1131: callCharDataHandler(charDataOffset, fCurrentOffset,
1132: false);
1133: if (ch == 0 && atEOF(fCurrentOffset + 1)) {
1134: changeReaders();
1135: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; // REVISIT - not quite...
1136: }
1137: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
1138: }
1139: } else {
1140: if (!skipMultiByteCharData(ch)) {
1141: callCharDataHandler(charDataOffset, fCurrentOffset,
1142: false);
1143: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
1144: }
1145: ch = fMostRecentChar;
1146: }
1147: }
1148: }
1149:
1150: //
1151: // Private data members
1152: //
1153: private static final char[] cdata_string = { 'C', 'D', 'A', 'T',
1154: 'A', '[' };
1155: private StringPool fStringPool = null;
1156: private boolean fCalledCharPropInit = false;
1157: private boolean fCallClearPreviousChunk = true;
1158: private Vector fDeferredErrors = null;
1159:
1160: //
1161: // Private classes
1162: //
1163: private class DeferredError {
1164: int errorCode;
1165: Object[] args;
1166: int offset;
1167:
1168: DeferredError(int ec, Object[] a, int o) {
1169: errorCode = ec;
1170: args = a;
1171: offset = o;
1172: }
1173: }
1174:
1175: //
1176: // Private methods
1177: //
1178:
1179: /*
1180: * Return a result code for scanContent when the character data
1181: * ends with a less-than character.
1182: */
1183: private int recognizeMarkup(int ch) throws Exception {
1184: switch (ch) {
1185: case 0:
1186: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
1187: case '?':
1188: fCharacterCounter++;
1189: loadNextChar();
1190: return XMLEntityHandler.CONTENT_RESULT_START_OF_PI;
1191: case '!':
1192: fCharacterCounter++;
1193: ch = loadNextChar();
1194: if (ch == 0) {
1195: fCharacterCounter--;
1196: fCurrentOffset--;
1197: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
1198: }
1199: if (ch == '-') {
1200: fCharacterCounter++;
1201: ch = loadNextChar();
1202: if (ch == 0) {
1203: fCharacterCounter -= 2;
1204: fCurrentOffset -= 2;
1205: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
1206: }
1207: if (ch == '-') {
1208: fCharacterCounter++;
1209: loadNextChar();
1210: return XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT;
1211: }
1212: break;
1213: }
1214: if (ch == '[') {
1215: for (int i = 0; i < 6; i++) {
1216: fCharacterCounter++;
1217: ch = loadNextChar();
1218: if (ch == 0) {
1219: fCharacterCounter -= (2 + i);
1220: fCurrentOffset -= (2 + i);
1221: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
1222: }
1223: if (ch != cdata_string[i]) {
1224: return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
1225: }
1226: }
1227: fCharacterCounter++;
1228: loadNextChar();
1229: return XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT;
1230: }
1231: break;
1232: case '/':
1233: fCharacterCounter++;
1234: loadNextChar();
1235: return XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG;
1236: default:
1237: return XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT;
1238: }
1239: return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
1240: }
1241:
1242: /*
1243: * Return a result code for scanContent when the character data
1244: * ends with an ampersand character.
1245: */
1246: private int recognizeReference(int ch) throws Exception {
1247: if (ch == 0) {
1248: return XMLEntityHandler.CONTENT_RESULT_REFERENCE_END_OF_INPUT;
1249: }
1250: //
1251: // [67] Reference ::= EntityRef | CharRef
1252: // [68] EntityRef ::= '&' Name ';'
1253: // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1254: //
1255: if (ch == '#') {
1256: fCharacterCounter++;
1257: loadNextChar();
1258: return XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF;
1259: } else {
1260: return XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF;
1261: }
1262: }
1263:
1264: /*
1265: * Skip over a multi-byte character.
1266: */
1267: private boolean skipMultiByteCharData(int ch) throws Exception {
1268: if (ch < 0xD800) {
1269: loadNextChar();
1270: return true;
1271: }
1272: if (ch > 0xFFFD)
1273: return false;
1274: if (ch >= 0xDC00 && ch < 0xE000)
1275: return false;
1276: if (ch >= 0xD800 && ch < 0xDC00) {
1277: CharDataChunk savedChunk = fCurrentChunk;
1278: int savedIndex = fCurrentIndex;
1279: int savedOffset = fCurrentOffset;
1280: ch = loadNextChar();
1281: if (ch < 0xDC00 || ch >= 0xE000) {
1282: fCurrentChunk = savedChunk;
1283: fCurrentIndex = savedIndex;
1284: fCurrentOffset = savedOffset;
1285: fMostRecentData = savedChunk.toCharArray();
1286: fMostRecentChar = fMostRecentData[savedIndex] & 0xFFFF;
1287: return false;
1288: }
1289: }
1290: loadNextChar();
1291: return true;
1292: }
1293:
1294: /*
1295: * Skip over contiguous ascii character data.
1296: *
1297: * @return the character skipped
1298: * @exception java.lang.Exception
1299: */
1300: private int skipAsciiCharData() throws Exception {
1301: int index = fCurrentIndex;
1302: int offset = fCurrentOffset - index;
1303: while (true) {
1304: char[] data = fMostRecentData;
1305: while (index < CharDataChunk.CHUNK_SIZE) {
1306: int ch = data[index] & 0xFFFF;
1307: if (ch >= 0x80) {
1308: fCurrentOffset = offset + index;
1309: fCurrentIndex = index;
1310: fMostRecentChar = ch;
1311: return ch;
1312: }
1313: if (XMLCharacterProperties.fgAsciiCharData[ch] == 0) {
1314: fCharacterCounter++;
1315: } else if (ch == 0x0A) {
1316: fLinefeedCounter++;
1317: fCharacterCounter = 1;
1318: } else {
1319: fCurrentOffset = offset + index;
1320: fCurrentIndex = index;
1321: fMostRecentChar = ch;
1322: return ch;
1323: }
1324: index++;
1325: }
1326: offset += index;
1327: slowLoadNextChar();
1328: index = 0;
1329: }
1330: }
1331:
1332: /*
1333: * Report character data to the parser through the entity handler interface.
1334: *
1335: * @param offset the offset of the start of the character data
1336: * @param endOffset the offset of the end of the character data
1337: * @param isWhitespace true if the character data is whitespace
1338: * @exception java.lang.Exception
1339: */
1340: private void callCharDataHandler(int offset, int endOffset,
1341: boolean isWhitespace) throws Exception {
1342:
1343: int length = endOffset - offset;
1344: if (!fSendCharDataAsCharArray) {
1345: int stringIndex = addString(offset, length);
1346: if (isWhitespace)
1347: fCharDataHandler.processWhitespace(stringIndex);
1348: else
1349: fCharDataHandler.processCharacters(stringIndex);
1350: return;
1351: }
1352:
1353: CharDataChunk dataChunk = fCurrentChunk.chunkFor(offset);
1354: int index = offset & CharDataChunk.CHUNK_MASK;
1355: if (index + length <= CharDataChunk.CHUNK_SIZE) {
1356: //
1357: // All the chars are in the same chunk
1358: //
1359: if (length != 0) {
1360: if (isWhitespace)
1361: fCharDataHandler.processWhitespace(dataChunk
1362: .toCharArray(), index, length);
1363: else
1364: fCharDataHandler.processCharacters(dataChunk
1365: .toCharArray(), index, length);
1366: }
1367: return;
1368: }
1369:
1370: //
1371: // The data is spread across chunks.
1372: //
1373: int count = length;
1374: int nbytes = CharDataChunk.CHUNK_SIZE - index;
1375: if (isWhitespace)
1376: fCharDataHandler.processWhitespace(dataChunk.toCharArray(),
1377: index, nbytes);
1378: else
1379: fCharDataHandler.processCharacters(dataChunk.toCharArray(),
1380: index, nbytes);
1381: count -= nbytes;
1382:
1383: //
1384: // Use each Chunk in turn until we are done.
1385: //
1386: do {
1387: dataChunk = dataChunk.nextChunk();
1388: if (dataChunk == null) {
1389: throw new RuntimeException(
1390: new ImplementationMessages().createMessage(
1391: null, ImplementationMessages.INT_DCN,
1392: 0, null));
1393: }
1394: nbytes = count <= CharDataChunk.CHUNK_SIZE ? count
1395: : CharDataChunk.CHUNK_SIZE;
1396: if (isWhitespace)
1397: fCharDataHandler.processWhitespace(dataChunk
1398: .toCharArray(), 0, nbytes);
1399: else
1400: fCharDataHandler.processCharacters(dataChunk
1401: .toCharArray(), 0, nbytes);
1402: count -= nbytes;
1403: } while (count > 0);
1404: }
1405:
1406: /*
1407: * Advance the reader's notion of where it is, moving on to the next chunk.
1408: *
1409: * @return The next character that will be processed.
1410: * @exception java.lang.Exception
1411: */
1412: private int slowLoadNextChar() throws Exception {
1413: fCallClearPreviousChunk = true;
1414: if (fCurrentChunk.nextChunk() != null) {
1415: fCurrentChunk = fCurrentChunk.nextChunk();
1416: fCurrentIndex = 0;
1417: fMostRecentData = fCurrentChunk.toCharArray();
1418: return (fMostRecentChar = fMostRecentData[fCurrentIndex] & 0xFFFF);
1419: } else {
1420: fCurrentChunk = CharDataChunk.createChunk(fStringPool,
1421: fCurrentChunk);
1422: return fillCurrentChunk();
1423: }
1424: }
1425:
1426: /*
1427: * Advance the reader's notion of where it is
1428: *
1429: * @return The next character that will be processed.
1430: * @exception java.lang.Exception
1431: */
1432: private int loadNextChar() throws Exception {
1433: fCurrentOffset++;
1434: if (++fCurrentIndex == CharDataChunk.CHUNK_SIZE)
1435: return slowLoadNextChar();
1436: return (fMostRecentChar = fMostRecentData[fCurrentIndex] & 0xFFFF);
1437: }
1438:
1439: /*
1440: * Would the reader be at end of file at a given offset?
1441: *
1442: * @param offset the offset to test for being at EOF
1443: * @return true if being at offset would mean being at or beyond EOF
1444: */
1445: private boolean atEOF(int offset) {
1446: return (offset > fLength);
1447: }
1448:
1449: }
|