0001: /*
0002: * The Apache Software License, Version 1.1
0003: *
0004: *
0005: * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
0006: * reserved.
0007: *
0008: * Redistribution and use in source and binary forms, with or without
0009: * modification, are permitted provided that the following conditions
0010: * are met:
0011: *
0012: * 1. Redistributions of source code must retain the above copyright
0013: * notice, this list of conditions and the following disclaimer.
0014: *
0015: * 2. Redistributions in binary form must reproduce the above copyright
0016: * notice, this list of conditions and the following disclaimer in
0017: * the documentation and/or other materials provided with the
0018: * distribution.
0019: *
0020: * 3. The end-user documentation included with the redistribution,
0021: * if any, must include the following acknowledgment:
0022: * "This product includes software developed by the
0023: * Apache Software Foundation (http://www.apache.org/)."
0024: * Alternately, this acknowledgment may appear in the software itself,
0025: * if and wherever such third-party acknowledgments normally appear.
0026: *
0027: * 4. The names "Xerces" and "Apache Software Foundation" must
0028: * not be used to endorse or promote products derived from this
0029: * software without prior written permission. For written
0030: * permission, please contact apache@apache.org.
0031: *
0032: * 5. Products derived from this software may not be called "Apache",
0033: * nor may "Apache" appear in their name, without prior written
0034: * permission of the Apache Software Foundation.
0035: *
0036: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0037: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0038: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0039: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
0040: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0041: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0042: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
0043: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0044: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
0045: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
0046: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0047: * SUCH DAMAGE.
0048: * ====================================================================
0049: *
0050: * This software consists of voluntary contributions made by many
0051: * individuals on behalf of the Apache Software Foundation and was
0052: * originally based on software copyright (c) 1999, International
0053: * Business Machines, Inc., http://www.apache.org. For more
0054: * information on the Apache Software Foundation, please see
0055: * <http://www.apache.org/>.
0056: */
0057:
0058: package org.apache.xerces.readers;
0059:
0060: import org.apache.xerces.framework.XMLErrorReporter;
0061: import org.apache.xerces.utils.QName;
0062: import org.apache.xerces.utils.StringPool;
0063: import org.apache.xerces.utils.SymbolCache;
0064: import org.apache.xerces.utils.UTF8DataChunk;
0065: import org.apache.xerces.utils.XMLCharacterProperties;
0066: import org.xml.sax.SAXParseException;
0067: import org.xml.sax.helpers.LocatorImpl;
0068: import java.io.InputStream;
0069: import java.util.Vector;
0070:
0071: /**
0072: * This is the primary reader used for UTF-8 encoded byte streams.
0073: * <p>
0074: * This reader processes requests from the scanners against the
0075: * underlying UTF-8 byte stream, avoiding when possible any up-front
0076: * transcoding. When the StringPool handle interfaces are used,
0077: * the information in the data stream will be added to the string
0078: * pool and lazy-evaluated until asked for.
0079: * <p>
0080: * We use the SymbolCache to match expected names (element types in
0081: * end tags) and walk the data structures of that class directly.
0082: * <p>
0083: * There is a significant amount of hand-inlining and some blatant
0084: * voilation of good object oriented programming rules, ignoring
0085: * boundaries of modularity, etc., in the name of good performance.
0086: * <p>
0087: * There are also some places where the code here frequently crashes
0088: * the SUN java runtime compiler (JIT) and the code here has been
0089: * carefully "crafted" to avoid those problems.
0090: *
0091: * @version $Id: UTF8Reader.java,v 1.12 2001/02/01 09:58:23 andyc Exp $
0092: */
0093: final class UTF8Reader extends XMLEntityReader {
0094: //
0095: //
0096: //
0097: private final static boolean USE_OUT_OF_LINE_LOAD_NEXT_BYTE = false;
0098: private final static boolean USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE = true;
0099:
0100: //
0101: //
0102: //
0103: public UTF8Reader(XMLEntityHandler entityHandler,
0104: XMLErrorReporter errorReporter,
0105: boolean sendCharDataAsCharArray, InputStream dataStream,
0106: StringPool stringPool) throws Exception {
0107: super (entityHandler, errorReporter, sendCharDataAsCharArray);
0108: fInputStream = dataStream;
0109: fStringPool = stringPool;
0110: fCharArrayRange = fStringPool.createCharArrayRange();
0111: fCurrentChunk = UTF8DataChunk.createChunk(fStringPool, null);
0112: fillCurrentChunk();
0113: }
0114:
0115: /**
0116: *
0117: */
0118: public int addString(int offset, int length) {
0119: if (length == 0)
0120: return 0;
0121: return fCurrentChunk.addString(offset, length);
0122: }
0123:
0124: /**
0125: *
0126: */
0127: public int addSymbol(int offset, int length) {
0128: if (length == 0)
0129: return 0;
0130: return fCurrentChunk.addSymbol(offset, length, 0);
0131: }
0132:
0133: /**
0134: *
0135: */
0136: private int addSymbol(int offset, int length, int hashcode) {
0137: if (length == 0)
0138: return 0;
0139: return fCurrentChunk.addSymbol(offset, length, hashcode);
0140: }
0141:
0142: /**
0143: *
0144: */
0145: public void append(XMLEntityHandler.CharBuffer charBuffer,
0146: int offset, int length) {
0147: fCurrentChunk.append(charBuffer, offset, length);
0148: }
0149:
0150: //
0151: //
0152: //
0153: private int slowLoadNextByte() throws Exception {
0154: fCallClearPreviousChunk = true;
0155: if (fCurrentChunk.nextChunk() != null) {
0156: fCurrentChunk = fCurrentChunk.nextChunk();
0157: fCurrentIndex = 0;
0158: fMostRecentData = fCurrentChunk.toByteArray();
0159: return (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
0160: } else {
0161: fCurrentChunk = UTF8DataChunk.createChunk(fStringPool,
0162: fCurrentChunk);
0163: return fillCurrentChunk();
0164: }
0165: }
0166:
0167: private int loadNextByte() throws Exception {
0168: fCurrentOffset++;
0169: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
0170: fCurrentIndex++;
0171: try {
0172: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0173: return fMostRecentByte;
0174: } catch (ArrayIndexOutOfBoundsException ex) {
0175: return slowLoadNextByte();
0176: }
0177: } else {
0178: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
0179: return slowLoadNextByte();
0180: else
0181: return (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
0182: }
0183: }
0184:
0185: //
0186: //
0187: //
0188: private boolean atEOF(int offset) {
0189: return (offset > fLength);
0190: }
0191:
0192: //
0193: //
0194: //
0195: public XMLEntityHandler.EntityReader changeReaders()
0196: throws Exception {
0197: XMLEntityHandler.EntityReader nextReader = super
0198: .changeReaders();
0199: fCurrentChunk.releaseChunk();
0200: fCurrentChunk = null;
0201: fMostRecentData = null;
0202: fMostRecentByte = 0;
0203: return nextReader;
0204: }
0205:
0206: //
0207: //
0208: //
0209: public boolean lookingAtChar(char ch, boolean skipPastChar)
0210: throws Exception {
0211: int b0 = fMostRecentByte;
0212: if (b0 != ch) {
0213: if (b0 == 0) {
0214: if (atEOF(fCurrentOffset + 1)) {
0215: return changeReaders().lookingAtChar(ch,
0216: skipPastChar);
0217: }
0218: }
0219: if (ch == 0x0A && b0 == 0x0D) {
0220: if (skipPastChar) {
0221: fCarriageReturnCounter++;
0222: fCharacterCounter = 1;
0223: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
0224: b0 = loadNextByte();
0225: } else {
0226: fCurrentOffset++;
0227: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
0228: fCurrentIndex++;
0229: try {
0230: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0231: b0 = fMostRecentByte;
0232: } catch (ArrayIndexOutOfBoundsException ex) {
0233: b0 = slowLoadNextByte();
0234: }
0235: } else {
0236: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
0237: b0 = slowLoadNextByte();
0238: else
0239: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
0240: }
0241: }
0242: if (b0 == 0x0A) {
0243: fLinefeedCounter++;
0244: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
0245: loadNextByte();
0246: } else {
0247: fCurrentOffset++;
0248: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
0249: fCurrentIndex++;
0250: try {
0251: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0252: } catch (ArrayIndexOutOfBoundsException ex) {
0253: slowLoadNextByte();
0254: }
0255: } else {
0256: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
0257: slowLoadNextByte();
0258: else
0259: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0260: }
0261: }
0262: }
0263: }
0264: return true;
0265: }
0266: return false;
0267: }
0268: if (ch == 0x0D)
0269: return false;
0270: if (skipPastChar) {
0271: fCharacterCounter++;
0272: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
0273: loadNextByte();
0274: } else {
0275: fCurrentOffset++;
0276: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
0277: fCurrentIndex++;
0278: try {
0279: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0280: } catch (ArrayIndexOutOfBoundsException ex) {
0281: slowLoadNextByte();
0282: }
0283: } else {
0284: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
0285: slowLoadNextByte();
0286: else
0287: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0288: }
0289: }
0290: }
0291: return true;
0292: }
0293:
0294: //
0295: //
0296: //
0297: public boolean lookingAtValidChar(boolean skipPastChar)
0298: throws Exception {
0299: int b0 = fMostRecentByte;
0300: if (b0 < 0x80) { // 0xxxxxxx
0301: if (b0 >= 0x20 || b0 == 0x09) {
0302: if (skipPastChar) {
0303: fCharacterCounter++;
0304: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
0305: loadNextByte();
0306: } else {
0307: fCurrentOffset++;
0308: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
0309: fCurrentIndex++;
0310: try {
0311: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0312: } catch (ArrayIndexOutOfBoundsException ex) {
0313: slowLoadNextByte();
0314: }
0315: } else {
0316: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
0317: slowLoadNextByte();
0318: else
0319: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0320: }
0321: }
0322: }
0323: return true;
0324: }
0325: if (b0 == 0x0A) {
0326: if (skipPastChar) {
0327: fLinefeedCounter++;
0328: fCharacterCounter = 1;
0329: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
0330: loadNextByte();
0331: } else {
0332: fCurrentOffset++;
0333: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
0334: fCurrentIndex++;
0335: try {
0336: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0337: } catch (ArrayIndexOutOfBoundsException ex) {
0338: slowLoadNextByte();
0339: }
0340: } else {
0341: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
0342: slowLoadNextByte();
0343: else
0344: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0345: }
0346: }
0347: }
0348: return true;
0349: }
0350: if (b0 == 0x0D) {
0351: if (skipPastChar) {
0352: fCarriageReturnCounter++;
0353: fCharacterCounter = 1;
0354: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
0355: b0 = loadNextByte();
0356: } else {
0357: fCurrentOffset++;
0358: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
0359: fCurrentIndex++;
0360: try {
0361: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0362: b0 = fMostRecentByte;
0363: } catch (ArrayIndexOutOfBoundsException ex) {
0364: b0 = slowLoadNextByte();
0365: }
0366: } else {
0367: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
0368: b0 = slowLoadNextByte();
0369: else
0370: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
0371: }
0372: }
0373: if (b0 == 0x0A) {
0374: fLinefeedCounter++;
0375: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
0376: loadNextByte();
0377: } else {
0378: fCurrentOffset++;
0379: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
0380: fCurrentIndex++;
0381: try {
0382: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0383: } catch (ArrayIndexOutOfBoundsException ex) {
0384: slowLoadNextByte();
0385: }
0386: } else {
0387: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
0388: slowLoadNextByte();
0389: else
0390: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0391: }
0392: }
0393: }
0394: }
0395: return true;
0396: }
0397: if (b0 == 0) {
0398: if (atEOF(fCurrentOffset + 1)) {
0399: return changeReaders().lookingAtValidChar(
0400: skipPastChar);
0401: }
0402: }
0403: return false;
0404: }
0405: //
0406: // REVISIT - optimize this with in-buffer lookahead.
0407: //
0408: UTF8DataChunk saveChunk = fCurrentChunk;
0409: int saveIndex = fCurrentIndex;
0410: int saveOffset = fCurrentOffset;
0411: int b1 = loadNextByte();
0412: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx (0x80 to 0x7ff)
0413: if (skipPastChar) {
0414: fCharacterCounter++;
0415: loadNextByte();
0416: } else {
0417: fCurrentChunk = saveChunk;
0418: fCurrentIndex = saveIndex;
0419: fCurrentOffset = saveOffset;
0420: fMostRecentData = saveChunk.toByteArray();
0421: fMostRecentByte = b0;
0422: }
0423: return true; // [#x20-#xD7FF]
0424: }
0425: int b2 = loadNextByte();
0426: if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
0427: // ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
0428: // if (!((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE))
0429: // if ((ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD))
0430: boolean result = false;
0431: if (!((b0 == 0xED && b1 >= 0xA0) || (b0 == 0xEF
0432: && b1 == 0xBF && b2 >= 0xBE))) { // [#x20-#xD7FF] | [#xE000-#xFFFD]
0433: if (skipPastChar) {
0434: fCharacterCounter++;
0435: loadNextByte();
0436: return true;
0437: }
0438: result = true;
0439: }
0440: fCurrentChunk = saveChunk;
0441: fCurrentIndex = saveIndex;
0442: fCurrentOffset = saveOffset;
0443: fMostRecentData = saveChunk.toByteArray();
0444: fMostRecentByte = b0;
0445: return result;
0446: }
0447: int b3 = loadNextByte(); // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
0448: // ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3); // u uuuu zzzz yyyy yyxx xxxx (0x10000 to 0x1ffff)
0449: // if (ch >= 0x110000)
0450: boolean result = false;
0451:
0452: //if (( 0xf8 & b0 ) == 0xf0 ) {
0453: //if (!(b0 > 0xF4 || (b0 == 0xF4 && b1 >= 0x90))) { // [#x10000-#x10FFFF]
0454: if (((b0 & 0xf8) == 0xf0) && ((b1 & 0xc0) == 0x80)
0455: && ((b2 & 0xc0) == 0x80) && ((b3 & 0xc0) == 0x80)) {
0456: if (!(b0 > 0xF4 || (b0 == 0xF4 && b1 >= 0x90))) { // [#x10000-#x10FFFF]
0457:
0458: if (skipPastChar) {
0459: fCharacterCounter++;
0460: loadNextByte();
0461: return true;
0462: }
0463: result = true;
0464: }
0465: fCurrentChunk = saveChunk;
0466: fCurrentIndex = saveIndex;
0467: fCurrentOffset = saveOffset;
0468: fMostRecentData = saveChunk.toByteArray();
0469: fMostRecentByte = b0;
0470: return result;
0471: } else {
0472: fCurrentChunk = saveChunk;
0473: fCurrentIndex = saveIndex;
0474: fCurrentOffset = saveOffset;
0475: fMostRecentData = saveChunk.toByteArray();
0476: fMostRecentByte = b0;
0477: return result;
0478: }
0479: }
0480:
0481: //
0482: //
0483: //
0484: public boolean lookingAtSpace(boolean skipPastChar)
0485: throws Exception {
0486: int ch = fMostRecentByte;
0487: if (ch > 0x20)
0488: return false;
0489: if (ch == 0x20 || ch == 0x09) {
0490: if (!skipPastChar)
0491: return true;
0492: fCharacterCounter++;
0493: } else if (ch == 0x0A) {
0494: if (!skipPastChar)
0495: return true;
0496: fLinefeedCounter++;
0497: fCharacterCounter = 1;
0498: } else if (ch == 0x0D) {
0499: if (!skipPastChar)
0500: return true;
0501: fCarriageReturnCounter++;
0502: fCharacterCounter = 1;
0503: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
0504: ch = loadNextByte();
0505: } else {
0506: fCurrentOffset++;
0507: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
0508: fCurrentIndex++;
0509: try {
0510: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0511: ch = fMostRecentByte;
0512: } catch (ArrayIndexOutOfBoundsException ex) {
0513: ch = slowLoadNextByte();
0514: }
0515: } else {
0516: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
0517: ch = slowLoadNextByte();
0518: else
0519: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
0520: }
0521: }
0522: if (ch != 0x0A)
0523: return true;
0524: fLinefeedCounter++;
0525: } else {
0526: if (ch == 0) { // REVISIT - should we be checking this here ?
0527: if (atEOF(fCurrentOffset + 1)) {
0528: return changeReaders().lookingAtSpace(skipPastChar);
0529: }
0530: }
0531: return false;
0532: }
0533: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
0534: loadNextByte();
0535: } else {
0536: fCurrentOffset++;
0537: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
0538: fCurrentIndex++;
0539: try {
0540: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0541: } catch (ArrayIndexOutOfBoundsException ex) {
0542: slowLoadNextByte();
0543: }
0544: } else {
0545: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
0546: slowLoadNextByte();
0547: else
0548: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0549: }
0550: }
0551: return true;
0552: }
0553:
0554: //
0555: //
0556: //
0557: public void skipToChar(char ch) throws Exception {
0558: //
0559: // REVISIT - this will skip invalid characters without reporting them.
0560: //
0561: int b0 = fMostRecentByte;
0562: while (true) {
0563: if (b0 == ch) // ch will always be an ascii character
0564: return;
0565: if (b0 == 0) {
0566: if (atEOF(fCurrentOffset + 1)) {
0567: changeReaders().skipToChar(ch);
0568: return;
0569: }
0570: fCharacterCounter++;
0571: } else if (b0 == 0x0A) {
0572: fLinefeedCounter++;
0573: fCharacterCounter = 1;
0574: } else if (b0 == 0x0D) {
0575: fCarriageReturnCounter++;
0576: fCharacterCounter = 1;
0577: b0 = loadNextByte();
0578: if (b0 != 0x0A)
0579: continue;
0580: fLinefeedCounter++;
0581: } else if (b0 < 0x80) { // 0xxxxxxx
0582: fCharacterCounter++;
0583: } else {
0584: fCharacterCounter++;
0585: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
0586: loadNextByte();
0587: } else if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
0588: loadNextByte();
0589: loadNextByte();
0590: } else { // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
0591: loadNextByte();
0592: loadNextByte();
0593: loadNextByte();
0594: }
0595: }
0596: b0 = loadNextByte();
0597: }
0598: }
0599:
0600: //
0601: //
0602: //
0603: public void skipPastSpaces() throws Exception {
0604: int ch = fMostRecentByte;
0605: while (true) {
0606: if (ch == 0x20 || ch == 0x09) {
0607: fCharacterCounter++;
0608: } else if (ch == 0x0A) {
0609: fLinefeedCounter++;
0610: fCharacterCounter = 1;
0611: } else if (ch == 0x0D) {
0612: fCarriageReturnCounter++;
0613: fCharacterCounter = 1;
0614: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
0615: ch = loadNextByte();
0616: } else {
0617: fCurrentOffset++;
0618: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
0619: fCurrentIndex++;
0620: try {
0621: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0622: ch = fMostRecentByte;
0623: } catch (ArrayIndexOutOfBoundsException ex) {
0624: ch = slowLoadNextByte();
0625: }
0626: } else {
0627: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
0628: ch = slowLoadNextByte();
0629: else
0630: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
0631: }
0632: }
0633: if (ch != 0x0A)
0634: continue;
0635: fLinefeedCounter++;
0636: } else {
0637: if (ch == 0 && atEOF(fCurrentOffset + 1))
0638: changeReaders().skipPastSpaces();
0639: return;
0640: }
0641: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
0642: ch = loadNextByte();
0643: } else {
0644: fCurrentOffset++;
0645: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
0646: fCurrentIndex++;
0647: try {
0648: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
0649: ch = fMostRecentByte;
0650: } catch (ArrayIndexOutOfBoundsException ex) {
0651: ch = slowLoadNextByte();
0652: }
0653: } else {
0654: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
0655: ch = slowLoadNextByte();
0656: else
0657: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
0658: }
0659: }
0660: }
0661: }
0662:
0663: //
0664: //
0665: //
0666: protected boolean skippedMultiByteCharWithFlag(int b0, int flag)
0667: throws Exception {
0668: UTF8DataChunk saveChunk = fCurrentChunk;
0669: int saveOffset = fCurrentOffset;
0670: int saveIndex = fCurrentIndex;
0671: if (!fCalledCharPropInit) {
0672: XMLCharacterProperties.initCharFlags();
0673: fCalledCharPropInit = true;
0674: }
0675: int b1 = loadNextByte();
0676: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
0677: if ((XMLCharacterProperties.fgCharFlags[((0x1f & b0) << 6)
0678: + (0x3f & b1)] & flag) == 0) { // yyy yyxx xxxx (0x80 to 0x7ff)
0679: fCurrentChunk = saveChunk;
0680: fCurrentIndex = saveIndex;
0681: fCurrentOffset = saveOffset;
0682: fMostRecentData = saveChunk.toByteArray();
0683: fMostRecentByte = b0;
0684: return false;
0685: }
0686: return true;
0687: }
0688: int b2 = loadNextByte();
0689: if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
0690: // if ((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE)
0691: if ((b0 == 0xED && b1 >= 0xA0)
0692: || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
0693: fCurrentChunk = saveChunk;
0694: fCurrentIndex = saveIndex;
0695: fCurrentOffset = saveOffset;
0696: fMostRecentData = saveChunk.toByteArray();
0697: fMostRecentByte = b0;
0698: return false;
0699: }
0700: if ((XMLCharacterProperties.fgCharFlags[((0x0f & b0) << 12)
0701: + ((0x3f & b1) << 6) + (0x3f & b2)] & flag) == 0) { // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
0702: fCurrentChunk = saveChunk;
0703: fCurrentIndex = saveIndex;
0704: fCurrentOffset = saveOffset;
0705: fMostRecentData = saveChunk.toByteArray();
0706: fMostRecentByte = b0;
0707: return false;
0708: }
0709: return true;
0710: } else { // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
0711: fCurrentChunk = saveChunk;
0712: fCurrentIndex = saveIndex;
0713: fCurrentOffset = saveOffset;
0714: fMostRecentData = saveChunk.toByteArray();
0715: fMostRecentByte = b0;
0716: return false;
0717: }
0718: }
0719:
0720: public void skipPastName(char fastcheck) throws Exception {
0721: int b0 = fMostRecentByte;
0722: if (b0 < 0x80) {
0723: if (XMLCharacterProperties.fgAsciiInitialNameChar[b0] == 0)
0724: return;
0725: } else {
0726: if (!fCalledCharPropInit) {
0727: XMLCharacterProperties.initCharFlags();
0728: fCalledCharPropInit = true;
0729: }
0730: if (!skippedMultiByteCharWithFlag(b0,
0731: XMLCharacterProperties.E_InitialNameCharFlag))
0732: return;
0733: }
0734: while (true) {
0735: fCharacterCounter++;
0736: b0 = loadNextByte();
0737: if (fastcheck == b0)
0738: return;
0739: if (b0 < 0x80) {
0740: if (XMLCharacterProperties.fgAsciiNameChar[b0] == 0)
0741: return;
0742: } else {
0743: if (!fCalledCharPropInit) {
0744: XMLCharacterProperties.initCharFlags();
0745: fCalledCharPropInit = true;
0746: }
0747: if (!skippedMultiByteCharWithFlag(b0,
0748: XMLCharacterProperties.E_NameCharFlag))
0749: return;
0750: }
0751: }
0752: }
0753:
0754: //
0755: //
0756: //
0757: public void skipPastNmtoken(char fastcheck) throws Exception {
0758: int b0 = fMostRecentByte;
0759: while (true) {
0760: if (fastcheck == b0)
0761: return;
0762: if (b0 < 0x80) {
0763: if (XMLCharacterProperties.fgAsciiNameChar[b0] == 0)
0764: return;
0765: } else {
0766: if (!skippedMultiByteCharWithFlag(b0,
0767: XMLCharacterProperties.E_NameCharFlag))
0768: return;
0769: }
0770: fCharacterCounter++;
0771: b0 = loadNextByte();
0772: }
0773: }
0774:
0775: //
0776: //
0777: //
0778: public boolean skippedString(char[] s) throws Exception {
0779: int length = s.length;
0780: byte[] data = fMostRecentData;
0781: int index = fCurrentIndex + length;
0782: int sindex = length;
0783: try {
0784: while (sindex-- > 0) {
0785: if (data[--index] != s[sindex])
0786: return false;
0787: }
0788: fCurrentIndex += length;
0789: } catch (ArrayIndexOutOfBoundsException ex) {
0790: int i = 0;
0791: index = fCurrentIndex;
0792: while (index < UTF8DataChunk.CHUNK_SIZE) {
0793: if (data[index++] != s[i++])
0794: return false;
0795: }
0796: UTF8DataChunk dataChunk = fCurrentChunk;
0797: int savedOffset = fCurrentOffset;
0798: int savedIndex = fCurrentIndex;
0799: slowLoadNextByte();
0800: data = fMostRecentData;
0801: index = 0;
0802: while (i < length) {
0803: if (data[index++] != s[i++]) {
0804: fCurrentChunk = dataChunk;
0805: fCurrentIndex = savedIndex;
0806: fCurrentOffset = savedOffset;
0807: fMostRecentData = fCurrentChunk.toByteArray();
0808: fMostRecentByte = fMostRecentData[savedIndex] & 0xFF;
0809: return false;
0810: }
0811: }
0812: fCurrentIndex = index;
0813: }
0814: fCharacterCounter += length;
0815: fCurrentOffset += length;
0816: try {
0817: fMostRecentByte = data[fCurrentIndex] & 0xFF;
0818: } catch (ArrayIndexOutOfBoundsException ex) {
0819: slowLoadNextByte();
0820: }
0821: return true;
0822: }
0823:
0824: //
0825: //
0826: //
0827: public int scanInvalidChar() throws Exception {
0828: int b0 = fMostRecentByte;
0829: int ch = b0;
0830: if (ch == 0x0A) {
0831: fLinefeedCounter++;
0832: fCharacterCounter = 1;
0833: } else if (ch == 0x0D) {
0834: fCarriageReturnCounter++;
0835: fCharacterCounter = 1;
0836: ch = loadNextByte();
0837: if (ch != 0x0A)
0838: return 0x0A;
0839: fLinefeedCounter++;
0840: } else if (ch == 0) {
0841: if (atEOF(fCurrentOffset + 1)) {
0842: return changeReaders().scanInvalidChar();
0843: }
0844: fCharacterCounter++;
0845: } else if (b0 >= 0x80) {
0846: fCharacterCounter++;
0847: int b1 = loadNextByte();
0848: int b2 = 0;
0849: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
0850: ch = ((0x1f & b0) << 6) + (0x3f & b1);
0851: } else if ((0xf0 & b0) == 0xe0) {
0852: b2 = loadNextByte();
0853: ch = ((0x0f & b0) << 12) + ((0x3f & b1) << 6)
0854: + (0x3f & b2);
0855: } else if ((0xf8 & b0) == 0xf0) {
0856: b2 = loadNextByte();
0857: int b3 = loadNextByte(); // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
0858: ch = ((0x0f & b0) << 18) + ((0x3f & b1) << 12)
0859: + ((0x3f & b2) << 6) + (0x3f & b3);
0860: }
0861: }
0862: loadNextByte();
0863: return ch;
0864: }
0865:
0866: //
0867: //
0868: //
0869: public int scanCharRef(boolean hex) throws Exception {
0870: int ch = fMostRecentByte;
0871: if (ch == 0) {
0872: if (atEOF(fCurrentOffset + 1)) {
0873: return changeReaders().scanCharRef(hex);
0874: }
0875: return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
0876: }
0877: int num = 0;
0878: if (hex) {
0879: if (ch > 'f'
0880: || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
0881: return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
0882: num = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
0883: } else {
0884: if (ch < '0' || ch > '9')
0885: return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR;
0886: num = ch - '0';
0887: }
0888: fCharacterCounter++;
0889: loadNextByte();
0890: boolean toobig = false;
0891: while (true) {
0892: ch = fMostRecentByte;
0893: if (ch == 0)
0894: break;
0895: if (hex) {
0896: if (ch > 'f'
0897: || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0)
0898: break;
0899: } else {
0900: if (ch < '0' || ch > '9')
0901: break;
0902: }
0903: fCharacterCounter++;
0904: loadNextByte();
0905: if (hex) {
0906: int dig = ch
0907: - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10);
0908: num = (num << 4) + dig;
0909: } else {
0910: int dig = ch - '0';
0911: num = (num * 10) + dig;
0912: }
0913: if (num > 0x10FFFF) {
0914: toobig = true;
0915: num = 0;
0916: }
0917: }
0918: if (ch != ';')
0919: return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED;
0920: fCharacterCounter++;
0921: loadNextByte();
0922: if (toobig)
0923: return XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE;
0924: return num;
0925: }
0926:
0927: //
0928: //
0929: //
0930: public int scanStringLiteral() throws Exception {
0931: boolean single;
0932: if (!(single = lookingAtChar('\'', true))
0933: && !lookingAtChar('\"', true)) {
0934: return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
0935: }
0936: int offset = fCurrentOffset;
0937: char qchar = single ? '\'' : '\"';
0938: while (!lookingAtChar(qchar, false)) {
0939: if (!lookingAtValidChar(true)) {
0940: return XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR;
0941: }
0942: }
0943: int stringIndex = fCurrentChunk.addString(offset,
0944: fCurrentOffset - offset);
0945: lookingAtChar(qchar, true); // move past qchar
0946: return stringIndex;
0947: }
0948:
0949: //
0950: // [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
0951: // | "'" ([^<&'] | Reference)* "'"
0952: //
0953: // The values in the following table are defined as:
0954: //
0955: // 0 - not special
0956: // 1 - quote character
0957: // 2 - complex
0958: // 3 - less than
0959: // 4 - invalid
0960: //
0961: public static final byte fgAsciiAttValueChar[] = { 4, 4, 4, 4, 4,
0962: 4, 4, 4, 4, 2, 2, 4,
0963: 4,
0964: 2,
0965: 4,
0966: 4, // tab is 0x09, LF is 0x0A, CR is 0x0D
0967: 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 1, 0,
0968: 0, 0, 2, 1, 0, 0, 0, 0, 0,
0969: 0,
0970: 0,
0971: 0, // '\"' is 0x22, '&' is 0x26, '\'' is 0x27
0972: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
0973: 0,
0974: 0,
0975: 0, // '<' is 0x3C
0976: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0977: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0978: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0979: 0, 0, 0, 0 };
0980:
0981: public int scanAttValue(char qchar, boolean asSymbol)
0982: throws Exception {
0983: int offset = fCurrentOffset;
0984: int b0 = fMostRecentByte;
0985: while (true) {
0986: if (b0 < 0x80) {
0987: switch (fgAsciiAttValueChar[b0]) {
0988: case 1: // quote char
0989: if (b0 == qchar) {
0990: int length = fCurrentOffset - offset;
0991: int result = length == 0 ? StringPool.EMPTY_STRING
0992: : (asSymbol ? fCurrentChunk.addSymbol(
0993: offset, length, 0)
0994: : fCurrentChunk.addString(
0995: offset, length));
0996: fCharacterCounter++;
0997: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
0998: loadNextByte();
0999: } else {
1000: fCurrentOffset++;
1001: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1002: fCurrentIndex++;
1003: try {
1004: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
1005: } catch (ArrayIndexOutOfBoundsException ex) {
1006: slowLoadNextByte();
1007: }
1008: } else {
1009: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1010: slowLoadNextByte();
1011: else
1012: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
1013: }
1014: }
1015: return result;
1016: }
1017: // the other quote character is not special
1018: // fall through
1019: case 0: // non-special char
1020: fCharacterCounter++;
1021: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1022: b0 = loadNextByte();
1023: } else {
1024: fCurrentOffset++;
1025: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1026: fCurrentIndex++;
1027: try {
1028: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1029: } catch (ArrayIndexOutOfBoundsException ex) {
1030: b0 = slowLoadNextByte();
1031: }
1032: } else {
1033: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1034: b0 = slowLoadNextByte();
1035: else
1036: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1037: }
1038: }
1039: continue;
1040: case 2: // complex
1041: return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX;
1042: case 3: // less than
1043: return XMLEntityHandler.ATTVALUE_RESULT_LESSTHAN;
1044: case 4: // invalid
1045: return XMLEntityHandler.ATTVALUE_RESULT_INVALID_CHAR;
1046: }
1047: } else {
1048: if (!skipMultiByteCharData(b0))
1049: return XMLEntityHandler.ATTVALUE_RESULT_INVALID_CHAR;
1050: b0 = fMostRecentByte;
1051: }
1052: }
1053: }
1054:
1055: //
1056: // [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
1057: // | "'" ([^%&'] | PEReference | Reference)* "'"
1058: //
1059: // The values in the following table are defined as:
1060: //
1061: // 0 - not special
1062: // 1 - quote character
1063: // 2 - reference
1064: // 3 - peref
1065: // 4 - invalid
1066: // 5 - linefeed
1067: // 6 - carriage-return
1068: // 7 - end of input
1069: //
1070: public static final byte fgAsciiEntityValueChar[] = { 7, 4, 4, 4,
1071: 4, 4, 4, 4, 4, 0, 5, 4,
1072: 4,
1073: 6,
1074: 4,
1075: 4, // tab is 0x09, LF is 0x0A, CR is 0x0D
1076: 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 1, 0,
1077: 0, 3, 2, 1, 0, 0, 0, 0, 0,
1078: 0,
1079: 0,
1080: 0, // '\"', '%', '&', '\''
1081: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1082: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1083: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1084: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1085:
1086: public int scanEntityValue(int qchar, boolean createString)
1087: throws Exception {
1088: int offset = fCurrentOffset;
1089: int b0 = fMostRecentByte;
1090: while (true) {
1091: if (b0 < 0x80) {
1092: switch (fgAsciiEntityValueChar[b0]) {
1093: case 1: // quote char
1094: if (b0 == qchar) {
1095: if (!createString)
1096: return XMLEntityHandler.ENTITYVALUE_RESULT_FINISHED;
1097: int length = fCurrentOffset - offset;
1098: int result = length == 0 ? StringPool.EMPTY_STRING
1099: : fCurrentChunk.addString(offset,
1100: length);
1101: fCharacterCounter++;
1102: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1103: loadNextByte();
1104: } else {
1105: fCurrentOffset++;
1106: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1107: fCurrentIndex++;
1108: try {
1109: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
1110: } catch (ArrayIndexOutOfBoundsException ex) {
1111: slowLoadNextByte();
1112: }
1113: } else {
1114: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1115: slowLoadNextByte();
1116: else
1117: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
1118: }
1119: }
1120: return result;
1121: }
1122: // the other quote character is not special
1123: // fall through
1124: case 0: // non-special char
1125: fCharacterCounter++;
1126: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1127: b0 = loadNextByte();
1128: } else {
1129: fCurrentOffset++;
1130: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1131: fCurrentIndex++;
1132: try {
1133: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1134: } catch (ArrayIndexOutOfBoundsException ex) {
1135: b0 = slowLoadNextByte();
1136: }
1137: } else {
1138: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1139: b0 = slowLoadNextByte();
1140: else
1141: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1142: }
1143: }
1144: continue;
1145: case 5: // linefeed
1146: fLinefeedCounter++;
1147: fCharacterCounter = 1;
1148: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1149: b0 = loadNextByte();
1150: } else {
1151: fCurrentOffset++;
1152: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1153: fCurrentIndex++;
1154: try {
1155: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1156: } catch (ArrayIndexOutOfBoundsException ex) {
1157: b0 = slowLoadNextByte();
1158: }
1159: } else {
1160: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1161: b0 = slowLoadNextByte();
1162: else
1163: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1164: }
1165: }
1166: continue;
1167: case 6: // carriage-return
1168: fCarriageReturnCounter++;
1169: fCharacterCounter = 1;
1170: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1171: b0 = loadNextByte();
1172: } else {
1173: fCurrentOffset++;
1174: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1175: fCurrentIndex++;
1176: try {
1177: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1178: } catch (ArrayIndexOutOfBoundsException ex) {
1179: b0 = slowLoadNextByte();
1180: }
1181: } else {
1182: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1183: b0 = slowLoadNextByte();
1184: else
1185: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1186: }
1187: }
1188: if (b0 != 0x0A) {
1189: continue;
1190: }
1191: fLinefeedCounter++;
1192: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1193: b0 = loadNextByte();
1194: } else {
1195: fCurrentOffset++;
1196: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1197: fCurrentIndex++;
1198: try {
1199: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1200: } catch (ArrayIndexOutOfBoundsException ex) {
1201: b0 = slowLoadNextByte();
1202: }
1203: } else {
1204: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1205: b0 = slowLoadNextByte();
1206: else
1207: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1208: }
1209: }
1210: continue;
1211: case 2: // reference
1212: return XMLEntityHandler.ENTITYVALUE_RESULT_REFERENCE;
1213: case 3: // peref
1214: return XMLEntityHandler.ENTITYVALUE_RESULT_PEREF;
1215: case 7:
1216: if (atEOF(fCurrentOffset + 1)) {
1217: changeReaders(); // do not call next reader, our caller may need to change the parameters
1218: return XMLEntityHandler.ENTITYVALUE_RESULT_END_OF_INPUT;
1219: }
1220: // fall into...
1221: case 4: // invalid
1222: return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR;
1223: }
1224: } else {
1225: if (!skipMultiByteCharData(b0))
1226: return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR;
1227: b0 = fMostRecentByte;
1228: }
1229: }
1230: }
1231:
1232: //
1233: //
1234: //
1235: public boolean scanExpectedName(char fastcheck,
1236: StringPool.CharArrayRange expectedName) throws Exception {
1237: char[] expected = expectedName.chars;
1238: int offset = expectedName.offset;
1239: int len = expectedName.length;
1240: int b0 = fMostRecentByte;
1241: int ch = 0;
1242: int i = 0;
1243: while (true) {
1244: if (b0 < 0x80) {
1245: ch = b0;
1246: if (i == len)
1247: break;
1248: if (ch != expected[offset]) {
1249: skipPastNmtoken(fastcheck);
1250: return false;
1251: }
1252: } else {
1253: //
1254: // REVISIT - optimize this with in-buffer lookahead.
1255: //
1256: UTF8DataChunk saveChunk = fCurrentChunk;
1257: int saveIndex = fCurrentIndex;
1258: int saveOffset = fCurrentOffset;
1259: int b1;
1260: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1261: b1 = loadNextByte();
1262: } else {
1263: fCurrentOffset++;
1264: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1265: fCurrentIndex++;
1266: try {
1267: b1 = fMostRecentData[fCurrentIndex] & 0xFF;
1268: } catch (ArrayIndexOutOfBoundsException ex) {
1269: b1 = slowLoadNextByte();
1270: }
1271: } else {
1272: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1273: b1 = slowLoadNextByte();
1274: else
1275: b1 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1276: }
1277: }
1278: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
1279: ch = ((0x1f & b0) << 6) + (0x3f & b1);
1280: if (i == len)
1281: break;
1282: if (ch != expected[offset]) {
1283: fCurrentChunk = saveChunk;
1284: fCurrentIndex = saveIndex;
1285: fCurrentOffset = saveOffset;
1286: fMostRecentData = saveChunk.toByteArray();
1287: fMostRecentByte = b0;
1288: skipPastNmtoken(fastcheck);
1289: return false;
1290: }
1291: } else {
1292: int b2;
1293: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1294: b2 = loadNextByte();
1295: } else {
1296: fCurrentOffset++;
1297: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1298: fCurrentIndex++;
1299: try {
1300: b2 = fMostRecentData[fCurrentIndex] & 0xFF;
1301: } catch (ArrayIndexOutOfBoundsException ex) {
1302: b2 = slowLoadNextByte();
1303: }
1304: } else {
1305: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1306: b2 = slowLoadNextByte();
1307: else
1308: b2 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1309: }
1310: }
1311: if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
1312: // if ((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE)
1313: if ((b0 == 0xED && b1 >= 0xA0)
1314: || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
1315: fCurrentChunk = saveChunk;
1316: fCurrentIndex = saveIndex;
1317: fCurrentOffset = saveOffset;
1318: fMostRecentData = saveChunk.toByteArray();
1319: fMostRecentByte = b0;
1320: return false;
1321: }
1322: ch = ((0x0f & b0) << 12) + ((0x3f & b1) << 6)
1323: + (0x3f & b2);
1324: if (i == len)
1325: break;
1326: if (ch != expected[offset]) {
1327: fCurrentChunk = saveChunk;
1328: fCurrentIndex = saveIndex;
1329: fCurrentOffset = saveOffset;
1330: fMostRecentData = saveChunk.toByteArray();
1331: fMostRecentByte = b0;
1332: skipPastNmtoken(fastcheck);
1333: return false;
1334: }
1335: } else { // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
1336: fCurrentChunk = saveChunk;
1337: fCurrentIndex = saveIndex;
1338: fCurrentOffset = saveOffset;
1339: fMostRecentData = saveChunk.toByteArray();
1340: fMostRecentByte = b0;
1341: return false;
1342: }
1343: }
1344: }
1345: i++;
1346: offset++;
1347: fCharacterCounter++;
1348: fCurrentOffset++;
1349: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1350: fCurrentIndex++;
1351: try {
1352: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1353: } catch (ArrayIndexOutOfBoundsException ex) {
1354: b0 = slowLoadNextByte();
1355: }
1356: } else {
1357: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1358: b0 = slowLoadNextByte();
1359: else
1360: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1361: }
1362: }
1363: if (ch == fastcheck)
1364: return true;
1365: if (ch < 0x80) {
1366: if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
1367: return true;
1368: } else {
1369: if (!fCalledCharPropInit) {
1370: XMLCharacterProperties.initCharFlags();
1371: fCalledCharPropInit = true;
1372: }
1373: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
1374: return true;
1375: }
1376: skipPastNmtoken(fastcheck);
1377: return false;
1378: }
1379:
1380: public void scanQName(char fastcheck, QName qname) throws Exception {
1381: int offset = fCurrentOffset;
1382: int ch = fMostRecentByte;
1383: if (ch < 0x80) {
1384: if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0) {
1385: qname.clear();
1386: return;
1387: }
1388: if (ch == ':') {
1389: qname.clear();
1390: return;
1391: }
1392: } else {
1393: if (!fCalledCharPropInit) {
1394: XMLCharacterProperties.initCharFlags();
1395: fCalledCharPropInit = true;
1396: }
1397: ch = getMultiByteSymbolChar(ch);
1398: fCurrentIndex--;
1399: fCurrentOffset--;
1400: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) {
1401: qname.clear();
1402: return;
1403: }
1404: }
1405: int index = fCurrentIndex;
1406: byte[] data = fMostRecentData;
1407: int prefixend = -1;
1408: while (true) {
1409: fCharacterCounter++;
1410: fCurrentOffset++;
1411: index++;
1412: try {
1413: ch = data[index] & 0xFF;
1414: } catch (ArrayIndexOutOfBoundsException ex) {
1415: ch = slowLoadNextByte();
1416: index = 0;
1417: data = fMostRecentData;
1418: }
1419: if (fastcheck == ch)
1420: break;
1421: if (ch < 0x80) {
1422: if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0)
1423: break;
1424: if (ch == ':') {
1425: if (prefixend != -1)
1426: break;
1427: prefixend = fCurrentOffset;
1428: //
1429: // We need to peek ahead one character. If the next character is not a
1430: // valid initial name character, or is another colon, then we cannot meet
1431: // both the Prefix and LocalPart productions for the QName production,
1432: // which means that there is no Prefix and we need to terminate the QName
1433: // at the first colon.
1434: //
1435: try {
1436: ch = data[index + 1] & 0xFF;
1437: } catch (ArrayIndexOutOfBoundsException ex) {
1438: UTF8DataChunk savedChunk = fCurrentChunk;
1439: int savedOffset = fCurrentOffset;
1440: ch = slowLoadNextByte();
1441: fCurrentChunk = savedChunk;
1442: fCurrentOffset = savedOffset;
1443: fMostRecentData = fCurrentChunk.toByteArray();
1444: }
1445: boolean lpok = true;
1446: if (ch < 0x80) {
1447: if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0
1448: || ch == ':')
1449: lpok = false;
1450: } else {
1451: if (!fCalledCharPropInit) {
1452: XMLCharacterProperties.initCharFlags();
1453: fCalledCharPropInit = true;
1454: }
1455: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0)
1456: lpok = false;
1457: }
1458: ch = ':';
1459: if (!lpok) {
1460: prefixend = -1;
1461: break;
1462: }
1463: }
1464: } else {
1465: if (!fCalledCharPropInit) {
1466: XMLCharacterProperties.initCharFlags();
1467: fCalledCharPropInit = true;
1468: }
1469: fCurrentIndex = index;
1470: fMostRecentByte = ch;
1471: ch = getMultiByteSymbolChar(ch);
1472: fCurrentIndex--;
1473: fCurrentOffset--;
1474: index = fCurrentIndex;
1475: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0)
1476: break;
1477: }
1478: }
1479: fCurrentIndex = index;
1480: fMostRecentByte = ch;
1481: int length = fCurrentOffset - offset;
1482: qname.rawname = addSymbol(offset, length);
1483: qname.prefix = prefixend == -1 ? -1 : addSymbol(offset,
1484: prefixend - offset);
1485: qname.localpart = prefixend == -1 ? qname.rawname : addSymbol(
1486: prefixend + 1, fCurrentOffset - (prefixend + 1));
1487: qname.uri = StringPool.EMPTY_STRING;
1488:
1489: } // scanQName(char,QName)
1490:
1491: private int getMultiByteSymbolChar(int b0) throws Exception {
1492: //
1493: // REVISIT - optimize this with in-buffer lookahead.
1494: //
1495: UTF8DataChunk saveChunk = fCurrentChunk;
1496: int saveIndex = fCurrentIndex;
1497: int saveOffset = fCurrentOffset;
1498: if (!fCalledCharPropInit) {
1499: XMLCharacterProperties.initCharFlags();
1500: fCalledCharPropInit = true;
1501: }
1502: int b1;
1503: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1504: b1 = loadNextByte();
1505: } else {
1506: fCurrentOffset++;
1507: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1508: fCurrentIndex++;
1509: try {
1510: b1 = fMostRecentData[fCurrentIndex] & 0xFF;
1511: } catch (ArrayIndexOutOfBoundsException ex) {
1512: b1 = slowLoadNextByte();
1513: }
1514: } else {
1515: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1516: b1 = slowLoadNextByte();
1517: else
1518: b1 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1519: }
1520: }
1521: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
1522: int ch = ((0x1f & b0) << 6) + (0x3f & b1);
1523: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) { // yyy yyxx xxxx (0x80 to 0x7ff)
1524: fCurrentChunk = saveChunk;
1525: fCurrentIndex = saveIndex;
1526: fCurrentOffset = saveOffset;
1527: fMostRecentData = saveChunk.toByteArray();
1528: fMostRecentByte = b0;
1529: return -1;
1530: }
1531: loadNextByte();
1532: return ch;
1533: }
1534: int b2;
1535: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1536: b2 = loadNextByte();
1537: } else {
1538: fCurrentOffset++;
1539: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1540: fCurrentIndex++;
1541: try {
1542: b2 = fMostRecentData[fCurrentIndex] & 0xFF;
1543: } catch (ArrayIndexOutOfBoundsException ex) {
1544: b2 = slowLoadNextByte();
1545: }
1546: } else {
1547: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1548: b2 = slowLoadNextByte();
1549: else
1550: b2 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1551: }
1552: }
1553: if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
1554: // if ((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE)
1555: if ((b0 == 0xED && b1 >= 0xA0)
1556: || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
1557: fCurrentChunk = saveChunk;
1558: fCurrentIndex = saveIndex;
1559: fCurrentOffset = saveOffset;
1560: fMostRecentData = saveChunk.toByteArray();
1561: fMostRecentByte = b0;
1562: return -1;
1563: }
1564: int ch = ((0x0f & b0) << 12) + ((0x3f & b1) << 6)
1565: + (0x3f & b2);
1566: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) { // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
1567: fCurrentChunk = saveChunk;
1568: fCurrentIndex = saveIndex;
1569: fCurrentOffset = saveOffset;
1570: fMostRecentData = saveChunk.toByteArray();
1571: fMostRecentByte = b0;
1572: return -1;
1573: }
1574: loadNextByte();
1575: return ch;
1576: }
1577: // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
1578: fCurrentChunk = saveChunk;
1579: fCurrentIndex = saveIndex;
1580: fCurrentOffset = saveOffset;
1581: fMostRecentData = saveChunk.toByteArray();
1582: fMostRecentByte = b0;
1583: return -1;
1584: }
1585:
1586: public int scanName(char fastcheck) throws Exception {
1587: int b0 = fMostRecentByte;
1588: int ch;
1589: if (b0 < 0x80) {
1590: if (XMLCharacterProperties.fgAsciiInitialNameChar[b0] == 0) {
1591: if (b0 == 0 && atEOF(fCurrentOffset + 1)) {
1592: return changeReaders().scanName(fastcheck);
1593: }
1594: return -1;
1595: }
1596: ch = b0;
1597: } else {
1598: //
1599: // REVISIT - optimize this with in-buffer lookahead.
1600: //
1601: UTF8DataChunk saveChunk = fCurrentChunk;
1602: int saveIndex = fCurrentIndex;
1603: int saveOffset = fCurrentOffset;
1604: if (!fCalledCharPropInit) {
1605: XMLCharacterProperties.initCharFlags();
1606: fCalledCharPropInit = true;
1607: }
1608: int b1;
1609: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1610: b1 = loadNextByte();
1611: } else {
1612: fCurrentOffset++;
1613: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1614: fCurrentIndex++;
1615: try {
1616: b1 = fMostRecentData[fCurrentIndex] & 0xFF;
1617: } catch (ArrayIndexOutOfBoundsException ex) {
1618: b1 = slowLoadNextByte();
1619: }
1620: } else {
1621: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1622: b1 = slowLoadNextByte();
1623: else
1624: b1 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1625: }
1626: }
1627: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx
1628: ch = ((0x1f & b0) << 6) + (0x3f & b1);
1629: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) { // yyy yyxx xxxx (0x80 to 0x7ff)
1630: fCurrentChunk = saveChunk;
1631: fCurrentIndex = saveIndex;
1632: fCurrentOffset = saveOffset;
1633: fMostRecentData = saveChunk.toByteArray();
1634: fMostRecentByte = b0;
1635: return -1;
1636: }
1637: } else {
1638: int b2;
1639: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1640: b2 = loadNextByte();
1641: } else {
1642: fCurrentOffset++;
1643: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1644: fCurrentIndex++;
1645: try {
1646: b2 = fMostRecentData[fCurrentIndex] & 0xFF;
1647: } catch (ArrayIndexOutOfBoundsException ex) {
1648: b2 = slowLoadNextByte();
1649: }
1650: } else {
1651: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1652: b2 = slowLoadNextByte();
1653: else
1654: b2 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1655: }
1656: }
1657: if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
1658: // if ((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE)
1659: if ((b0 == 0xED && b1 >= 0xA0)
1660: || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
1661: fCurrentChunk = saveChunk;
1662: fCurrentIndex = saveIndex;
1663: fCurrentOffset = saveOffset;
1664: fMostRecentData = saveChunk.toByteArray();
1665: fMostRecentByte = b0;
1666: return -1;
1667: }
1668: ch = ((0x0f & b0) << 12) + ((0x3f & b1) << 6)
1669: + (0x3f & b2);
1670: if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) { // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
1671: fCurrentChunk = saveChunk;
1672: fCurrentIndex = saveIndex;
1673: fCurrentOffset = saveOffset;
1674: fMostRecentData = saveChunk.toByteArray();
1675: fMostRecentByte = b0;
1676: return -1;
1677: }
1678: } else { // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
1679: fCurrentChunk = saveChunk;
1680: fCurrentIndex = saveIndex;
1681: fCurrentOffset = saveOffset;
1682: fMostRecentData = saveChunk.toByteArray();
1683: fMostRecentByte = b0;
1684: return -1;
1685: }
1686: }
1687: }
1688: fCharacterCounter++;
1689: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1690: b0 = loadNextByte();
1691: } else {
1692: fCurrentOffset++;
1693: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1694: fCurrentIndex++;
1695: try {
1696: b0 = fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
1697: } catch (ArrayIndexOutOfBoundsException ex) {
1698: b0 = slowLoadNextByte();
1699: }
1700: } else {
1701: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1702: b0 = slowLoadNextByte();
1703: else
1704: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1705: }
1706: }
1707: return scanMatchingName(ch, b0, fastcheck);
1708: }
1709:
1710: private int scanMatchingName(int ch, int b0, int fastcheck)
1711: throws Exception {
1712: SymbolCache cache = fStringPool.getSymbolCache();
1713: int[][] cacheLines = cache.fCacheLines;
1714: char[] symbolChars = cache.fSymbolChars;
1715: boolean lengthOfOne = fastcheck == fMostRecentByte;
1716: int startOffset = cache.fSymbolCharsOffset;
1717: int entry = 0;
1718: int[] entries = cacheLines[entry];
1719: int offset = 1 + ((entries[0] - 1) * SymbolCache.CACHE_RECORD_SIZE);
1720: int totalMisses = 0;
1721: if (lengthOfOne) {
1722: while (offset > 0) {
1723: if (entries[offset + SymbolCache.CHAR_OFFSET] == ch) {
1724: if (entries[offset + SymbolCache.INDEX_OFFSET] != -1) {
1725: int symbolIndex = entries[offset
1726: + SymbolCache.INDEX_OFFSET];
1727: if (totalMisses > 3)
1728: fStringPool.updateCacheLine(symbolIndex,
1729: totalMisses, 1);
1730: return symbolIndex;
1731: }
1732: break;
1733: }
1734: offset -= SymbolCache.CACHE_RECORD_SIZE;
1735: totalMisses++;
1736: }
1737: try {
1738: symbolChars[cache.fSymbolCharsOffset] = (char) ch;
1739: } catch (ArrayIndexOutOfBoundsException ex) {
1740: symbolChars = new char[cache.fSymbolCharsOffset * 2];
1741: System.arraycopy(cache.fSymbolChars, 0, symbolChars, 0,
1742: cache.fSymbolCharsOffset);
1743: cache.fSymbolChars = symbolChars;
1744: symbolChars[cache.fSymbolCharsOffset] = (char) ch;
1745: }
1746: cache.fSymbolCharsOffset++;
1747: if (offset < 0) {
1748: offset = 1 + (entries[0] * SymbolCache.CACHE_RECORD_SIZE);
1749: entries[0]++;
1750: try {
1751: entries[offset + SymbolCache.CHAR_OFFSET] = ch;
1752: } catch (ArrayIndexOutOfBoundsException ex) {
1753: int newSize = 1 + ((offset - 1) * 2);
1754: entries = new int[newSize];
1755: System.arraycopy(cacheLines[entry], 0, entries, 0,
1756: offset);
1757: cacheLines[entry] = entries;
1758: entries[offset + SymbolCache.CHAR_OFFSET] = ch;
1759: }
1760: entries[offset + SymbolCache.NEXT_OFFSET] = -1;
1761: }
1762: int result = fStringPool.createNonMatchingSymbol(
1763: startOffset, entry, entries, offset);
1764: return result;
1765: }
1766: try {
1767: symbolChars[cache.fSymbolCharsOffset] = (char) ch;
1768: } catch (ArrayIndexOutOfBoundsException ex) {
1769: symbolChars = new char[cache.fSymbolCharsOffset * 2];
1770: System.arraycopy(cache.fSymbolChars, 0, symbolChars, 0,
1771: cache.fSymbolCharsOffset);
1772: cache.fSymbolChars = symbolChars;
1773: symbolChars[cache.fSymbolCharsOffset] = (char) ch;
1774: }
1775: cache.fSymbolCharsOffset++;
1776: int depth = 1;
1777: while (true) {
1778: if (offset < 0)
1779: break;
1780: if (entries[offset + SymbolCache.CHAR_OFFSET] != ch) {
1781: offset -= SymbolCache.CACHE_RECORD_SIZE;
1782: totalMisses++;
1783: continue;
1784: }
1785: if (b0 >= 0x80) {
1786: ch = getMultiByteSymbolChar(b0);
1787: b0 = fMostRecentByte;
1788: } else if (b0 == fastcheck
1789: || XMLCharacterProperties.fgAsciiNameChar[b0] == 0) {
1790: ch = -1;
1791: } else {
1792: ch = b0;
1793: fCharacterCounter++;
1794: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1795: b0 = loadNextByte();
1796: } else {
1797: fCurrentOffset++;
1798: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1799: fCurrentIndex++;
1800: try {
1801: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1802: } catch (ArrayIndexOutOfBoundsException ex) {
1803: b0 = slowLoadNextByte();
1804: }
1805: } else {
1806: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1807: b0 = slowLoadNextByte();
1808: else
1809: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1810: }
1811: }
1812: }
1813: if (ch == -1) {
1814: if (entries[offset + SymbolCache.INDEX_OFFSET] == -1) {
1815: return fStringPool.createNonMatchingSymbol(
1816: startOffset, entry, entries, offset);
1817: }
1818: cache.fSymbolCharsOffset = startOffset;
1819: int symbolIndex = entries[offset
1820: + SymbolCache.INDEX_OFFSET];
1821: if (totalMisses > (depth * 3))
1822: fStringPool.updateCacheLine(symbolIndex,
1823: totalMisses, depth);
1824: return symbolIndex;
1825: }
1826: try {
1827: symbolChars[cache.fSymbolCharsOffset] = (char) ch;
1828: } catch (ArrayIndexOutOfBoundsException ex) {
1829: symbolChars = new char[cache.fSymbolCharsOffset * 2];
1830: System.arraycopy(cache.fSymbolChars, 0, symbolChars, 0,
1831: cache.fSymbolCharsOffset);
1832: cache.fSymbolChars = symbolChars;
1833: symbolChars[cache.fSymbolCharsOffset] = (char) ch;
1834: }
1835: cache.fSymbolCharsOffset++;
1836: entry = entries[offset + SymbolCache.NEXT_OFFSET];
1837: try {
1838: entries = cacheLines[entry];
1839: } catch (ArrayIndexOutOfBoundsException ex) {
1840: if (entry == -1) {
1841: entry = cache.fCacheLineCount++;
1842: entries[offset + SymbolCache.NEXT_OFFSET] = entry;
1843: entries = new int[1 + (SymbolCache.INITIAL_CACHE_RECORD_COUNT * SymbolCache.CACHE_RECORD_SIZE)];
1844: try {
1845: cacheLines[entry] = entries;
1846: } catch (ArrayIndexOutOfBoundsException ex2) {
1847: cacheLines = new int[entry * 2][];
1848: System.arraycopy(cache.fCacheLines, 0,
1849: cacheLines, 0, entry);
1850: cache.fCacheLines = cacheLines;
1851: cacheLines[entry] = entries;
1852: }
1853: } else {
1854: entries = cacheLines[entry];
1855: throw new RuntimeException("RDR001 untested"); // REVISIT
1856: }
1857: }
1858: offset = 1 + ((entries[0] - 1) * SymbolCache.CACHE_RECORD_SIZE);
1859: depth++;
1860: }
1861: if (offset < 0)
1862: offset = 1 + (entries[0] * SymbolCache.CACHE_RECORD_SIZE);
1863: while (true) {
1864: entries[0]++;
1865: try {
1866: entries[offset + SymbolCache.CHAR_OFFSET] = ch;
1867: } catch (ArrayIndexOutOfBoundsException ex) {
1868: int newSize = 1 + ((offset - 1) * 2);
1869: entries = new int[newSize];
1870: System.arraycopy(cacheLines[entry], 0, entries, 0,
1871: offset);
1872: cacheLines[entry] = entries;
1873: entries[offset + SymbolCache.CHAR_OFFSET] = ch;
1874: }
1875: if (b0 >= 0x80) {
1876: ch = getMultiByteSymbolChar(b0);
1877: b0 = fMostRecentByte;
1878: } else if (b0 == fastcheck
1879: || XMLCharacterProperties.fgAsciiNameChar[b0] == 0) {
1880: ch = -1;
1881: } else {
1882: ch = b0;
1883: fCharacterCounter++;
1884: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1885: b0 = loadNextByte();
1886: } else {
1887: fCurrentOffset++;
1888: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1889: fCurrentIndex++;
1890: try {
1891: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1892: } catch (ArrayIndexOutOfBoundsException ex) {
1893: b0 = slowLoadNextByte();
1894: }
1895: } else {
1896: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
1897: b0 = slowLoadNextByte();
1898: else
1899: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
1900: }
1901: }
1902: }
1903: if (ch == -1) {
1904: entries[offset + SymbolCache.NEXT_OFFSET] = -1;
1905: break;
1906: }
1907: entry = cache.fCacheLineCount++;
1908: entries[offset + SymbolCache.INDEX_OFFSET] = -1;
1909: entries[offset + SymbolCache.NEXT_OFFSET] = entry;
1910: entries = new int[1 + (SymbolCache.INITIAL_CACHE_RECORD_COUNT * SymbolCache.CACHE_RECORD_SIZE)];
1911: try {
1912: cacheLines[entry] = entries;
1913: } catch (ArrayIndexOutOfBoundsException ex) {
1914: cacheLines = new int[entry * 2][];
1915: System.arraycopy(cache.fCacheLines, 0, cacheLines, 0,
1916: entry);
1917: cache.fCacheLines = cacheLines;
1918: cacheLines[entry] = entries;
1919: }
1920: offset = 1;
1921: try {
1922: symbolChars[cache.fSymbolCharsOffset] = (char) ch;
1923: } catch (ArrayIndexOutOfBoundsException ex) {
1924: symbolChars = new char[cache.fSymbolCharsOffset * 2];
1925: System.arraycopy(cache.fSymbolChars, 0, symbolChars, 0,
1926: cache.fSymbolCharsOffset);
1927: cache.fSymbolChars = symbolChars;
1928: symbolChars[cache.fSymbolCharsOffset] = (char) ch;
1929: }
1930: cache.fSymbolCharsOffset++;
1931: }
1932:
1933: int result = fStringPool.createNonMatchingSymbol(startOffset,
1934: entry, entries, offset);
1935: return result;
1936: }
1937:
1938: //
1939: // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1940: //
1941: private int recognizeMarkup(int b0, QName element) throws Exception {
1942: switch (b0) {
1943: case 0:
1944: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
1945: case '?':
1946: fCharacterCounter++;
1947: loadNextByte();
1948: return XMLEntityHandler.CONTENT_RESULT_START_OF_PI;
1949: case '!':
1950: fCharacterCounter++;
1951: b0 = loadNextByte();
1952: if (b0 == 0) {
1953: fCharacterCounter--;
1954: fCurrentOffset--;
1955: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
1956: }
1957: if (b0 == '-') {
1958: fCharacterCounter++;
1959: b0 = loadNextByte();
1960: if (b0 == 0) {
1961: fCharacterCounter -= 2;
1962: fCurrentOffset -= 2;
1963: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
1964: }
1965: if (b0 == '-') {
1966: fCharacterCounter++;
1967: b0 = loadNextByte();
1968: return XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT;
1969: }
1970: break;
1971: }
1972: if (b0 == '[') {
1973: for (int i = 0; i < 6; i++) {
1974: fCharacterCounter++;
1975: b0 = loadNextByte();
1976: if (b0 == 0) {
1977: fCharacterCounter -= (2 + i);
1978: fCurrentOffset -= (2 + i);
1979: return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT;
1980: }
1981: if (b0 != cdata_string[i]) {
1982: return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
1983: }
1984: }
1985: fCharacterCounter++;
1986: loadNextByte();
1987: return XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT;
1988: }
1989: break;
1990: case '/':
1991: fCharacterCounter++;
1992: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
1993: b0 = loadNextByte();
1994: } else {
1995: fCurrentOffset++;
1996: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
1997: fCurrentIndex++;
1998: try {
1999: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2000: } catch (ArrayIndexOutOfBoundsException ex) {
2001: b0 = slowLoadNextByte();
2002: }
2003: } else {
2004: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2005: b0 = slowLoadNextByte();
2006: else
2007: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2008: }
2009: }
2010: int expectedName = element.rawname;
2011: fStringPool
2012: .getCharArrayRange(expectedName, fCharArrayRange);
2013: char[] expected = fCharArrayRange.chars;
2014: int offset = fCharArrayRange.offset;
2015: int len = fCharArrayRange.length;
2016: //
2017: // DEFECT !! - needs UTF8 multibyte support...
2018: //
2019: if (b0 == expected[offset++]) {
2020: UTF8DataChunk savedChunk = fCurrentChunk;
2021: int savedIndex = fCurrentIndex;
2022: int savedOffset = fCurrentOffset;
2023: for (int i = 1; i < len; i++) {
2024: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2025: b0 = loadNextByte();
2026: } else {
2027: fCurrentOffset++;
2028: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2029: fCurrentIndex++;
2030: try {
2031: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2032: } catch (ArrayIndexOutOfBoundsException ex) {
2033: b0 = slowLoadNextByte();
2034: }
2035: } else {
2036: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2037: b0 = slowLoadNextByte();
2038: else
2039: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2040: }
2041: }
2042: //
2043: // DEFECT !! - needs UTF8 multibyte support...
2044: //
2045: if (b0 != expected[offset++]) {
2046: fCurrentChunk = savedChunk;
2047: fCurrentIndex = savedIndex;
2048: fCurrentOffset = savedOffset;
2049: fMostRecentData = fCurrentChunk.toByteArray();
2050: fMostRecentByte = fMostRecentData[savedIndex] & 0xFF;
2051: return XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG;
2052: }
2053: }
2054: fCharacterCounter += len; // REVISIT - double check this...
2055: fCharacterCounter++;
2056: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2057: b0 = loadNextByte();
2058: } else {
2059: fCurrentOffset++;
2060: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2061: fCurrentIndex++;
2062: try {
2063: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2064: } catch (ArrayIndexOutOfBoundsException ex) {
2065: b0 = slowLoadNextByte();
2066: }
2067: } else {
2068: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2069: b0 = slowLoadNextByte();
2070: else
2071: b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2072: }
2073: }
2074: if (b0 == '>') {
2075: fCharacterCounter++;
2076: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2077: loadNextByte();
2078: } else {
2079: fCurrentOffset++;
2080: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2081: fCurrentIndex++;
2082: try {
2083: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
2084: } catch (ArrayIndexOutOfBoundsException ex) {
2085: slowLoadNextByte();
2086: }
2087: } else {
2088: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2089: slowLoadNextByte();
2090: else
2091: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
2092: }
2093: }
2094: return XMLEntityHandler.CONTENT_RESULT_MATCHING_ETAG;
2095: }
2096: while (b0 == 0x20 || b0 == 0x09 || b0 == 0x0A
2097: || b0 == 0x0D) {
2098: if (b0 == 0x0A) {
2099: fLinefeedCounter++;
2100: fCharacterCounter = 1;
2101: b0 = loadNextByte();
2102: } else if (b0 == 0x0D) {
2103: fCarriageReturnCounter++;
2104: fCharacterCounter = 1;
2105: b0 = loadNextByte();
2106: if (b0 == 0x0A) {
2107: fLinefeedCounter++;
2108: b0 = loadNextByte();
2109: }
2110: } else {
2111: fCharacterCounter++;
2112: b0 = loadNextByte();
2113: }
2114: if (b0 == '>') {
2115: fCharacterCounter++;
2116: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2117: loadNextByte();
2118: } else {
2119: fCurrentOffset++;
2120: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2121: fCurrentIndex++;
2122: try {
2123: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
2124: } catch (ArrayIndexOutOfBoundsException ex) {
2125: slowLoadNextByte();
2126: }
2127: } else {
2128: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2129: slowLoadNextByte();
2130: else
2131: fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF;
2132: }
2133: }
2134: return XMLEntityHandler.CONTENT_RESULT_MATCHING_ETAG;
2135: }
2136: }
2137: fCurrentChunk = savedChunk;
2138: fCurrentIndex = savedIndex;
2139: fCurrentOffset = savedOffset;
2140: fMostRecentData = fCurrentChunk.toByteArray();
2141: fMostRecentByte = fMostRecentData[savedIndex] & 0xFF;
2142: }
2143: return XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG;
2144: default:
2145: return XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT;
2146: }
2147: return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED;
2148: }
2149:
2150: private int recognizeReference(int ch) throws Exception {
2151: if (ch == 0) {
2152: return XMLEntityHandler.CONTENT_RESULT_REFERENCE_END_OF_INPUT;
2153: }
2154: //
2155: // [67] Reference ::= EntityRef | CharRef
2156: // [68] EntityRef ::= '&' Name ';'
2157: // [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2158: //
2159: if (ch == '#') {
2160: fCharacterCounter++;
2161: loadNextByte();
2162: return XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF;
2163: } else {
2164: return XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF;
2165: }
2166: }
2167:
2168: public int scanContent(QName element) throws Exception {
2169: if (fCallClearPreviousChunk
2170: && fCurrentChunk.clearPreviousChunk())
2171: fCallClearPreviousChunk = false;
2172: fCharDataLength = 0;
2173: int charDataOffset = fCurrentOffset;
2174: int ch = fMostRecentByte;
2175: if (ch < 0x80) {
2176: switch (XMLCharacterProperties.fgAsciiWSCharData[ch]) {
2177: case 0:
2178: if (fSendCharDataAsCharArray) {
2179: try {
2180: fCharacters[fCharDataLength] = (char) ch;
2181: fCharDataLength++;
2182: } catch (ArrayIndexOutOfBoundsException ex) {
2183: slowAppendCharData(ch);
2184: }
2185: }
2186: fCharacterCounter++;
2187: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2188: ch = loadNextByte();
2189: } else {
2190: fCurrentOffset++;
2191: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2192: fCurrentIndex++;
2193: try {
2194: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2195: } catch (ArrayIndexOutOfBoundsException ex) {
2196: ch = slowLoadNextByte();
2197: }
2198: } else {
2199: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2200: ch = slowLoadNextByte();
2201: else
2202: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2203: }
2204: }
2205: break;
2206: case 1: // '<'
2207: fCharacterCounter++;
2208: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2209: ch = loadNextByte();
2210: } else {
2211: fCurrentOffset++;
2212: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2213: fCurrentIndex++;
2214: try {
2215: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2216: } catch (ArrayIndexOutOfBoundsException ex) {
2217: ch = slowLoadNextByte();
2218: }
2219: } else {
2220: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2221: ch = slowLoadNextByte();
2222: else
2223: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2224: }
2225: }
2226: if (!fInCDSect) {
2227: return recognizeMarkup(ch, element);
2228: }
2229: if (fSendCharDataAsCharArray)
2230: appendCharData('<');
2231: break;
2232: case 2: // '&'
2233: fCharacterCounter++;
2234: ch = loadNextByte();
2235: if (!fInCDSect) {
2236: return recognizeReference(ch);
2237: }
2238: if (fSendCharDataAsCharArray)
2239: appendCharData('&');
2240: break;
2241: case 3: // ']'
2242: fCharacterCounter++;
2243: ch = loadNextByte();
2244: if (ch != ']') {
2245: if (fSendCharDataAsCharArray)
2246: appendCharData(']');
2247: break;
2248: }
2249: if (fCurrentIndex + 1 == UTF8DataChunk.CHUNK_SIZE) {
2250: UTF8DataChunk saveChunk = fCurrentChunk;
2251: int saveIndex = fCurrentIndex;
2252: int saveOffset = fCurrentOffset;
2253: if (loadNextByte() != '>') {
2254: fCurrentChunk = saveChunk;
2255: fCurrentIndex = saveIndex;
2256: fCurrentOffset = saveOffset;
2257: fMostRecentData = fCurrentChunk.toByteArray();
2258: fMostRecentByte = ']';
2259: if (fSendCharDataAsCharArray)
2260: appendCharData(']');
2261: break;
2262: }
2263: } else {
2264: if (fMostRecentData[fCurrentIndex + 1] != '>') {
2265: if (fSendCharDataAsCharArray)
2266: appendCharData(']');
2267: break;
2268: }
2269: fCurrentIndex++;
2270: fCurrentOffset++;
2271: }
2272: loadNextByte();
2273: fCharacterCounter += 2;
2274: return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
2275: case 4: // invalid char
2276: if (ch == 0 && atEOF(fCurrentOffset + 1)) {
2277: changeReaders();
2278: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; // REVISIT - not quite...
2279: }
2280: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2281: case 5:
2282: do {
2283: if (ch == 0x0A) {
2284: fLinefeedCounter++;
2285: fCharacterCounter = 1;
2286: } else if (ch == 0x0D) {
2287: fCarriageReturnCounter++;
2288: fCharacterCounter = 1;
2289: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2290: ch = loadNextByte();
2291: } else {
2292: fCurrentOffset++;
2293: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2294: fCurrentIndex++;
2295: try {
2296: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2297: } catch (ArrayIndexOutOfBoundsException ex) {
2298: ch = slowLoadNextByte();
2299: }
2300: } else {
2301: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2302: ch = slowLoadNextByte();
2303: else
2304: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2305: }
2306: }
2307: if (ch != 0x0A) {
2308: if (fSendCharDataAsCharArray)
2309: appendCharData(0x0A);
2310: if (ch == 0x20 || ch == 0x09 || ch == 0x0D)
2311: continue;
2312: break;
2313: }
2314: fLinefeedCounter++;
2315: } else {
2316: fCharacterCounter++;
2317: }
2318: if (fSendCharDataAsCharArray) {
2319: try {
2320: fCharacters[fCharDataLength] = (char) ch;
2321: fCharDataLength++;
2322: } catch (ArrayIndexOutOfBoundsException ex) {
2323: slowAppendCharData(ch);
2324: }
2325: }
2326: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2327: ch = loadNextByte();
2328: } else {
2329: fCurrentOffset++;
2330: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2331: fCurrentIndex++;
2332: try {
2333: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2334: } catch (ArrayIndexOutOfBoundsException ex) {
2335: ch = slowLoadNextByte();
2336: }
2337: } else {
2338: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2339: ch = slowLoadNextByte();
2340: else
2341: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2342: }
2343: }
2344: } while (ch == 0x20 || ch == 0x09 || ch == 0x0A
2345: || ch == 0x0D);
2346: if (ch < 0x80) {
2347: switch (XMLCharacterProperties.fgAsciiCharData[ch]) {
2348: case 0:
2349: if (fSendCharDataAsCharArray)
2350: appendCharData(ch);
2351: fCharacterCounter++;
2352: ch = loadNextByte();
2353: break;
2354: case 1: // '<'
2355: if (!fInCDSect) {
2356: if (fSendCharDataAsCharArray) {
2357: fCharDataHandler
2358: .processWhitespace(fCharacters,
2359: 0, fCharDataLength);
2360: } else {
2361: int stringIndex = addString(
2362: charDataOffset, fCurrentOffset
2363: - charDataOffset);
2364: fCharDataHandler
2365: .processWhitespace(stringIndex);
2366: }
2367: fCharacterCounter++;
2368: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2369: ch = loadNextByte();
2370: } else {
2371: fCurrentOffset++;
2372: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2373: fCurrentIndex++;
2374: try {
2375: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2376: } catch (ArrayIndexOutOfBoundsException ex) {
2377: ch = slowLoadNextByte();
2378: }
2379: } else {
2380: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2381: ch = slowLoadNextByte();
2382: else
2383: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2384: }
2385: }
2386: return recognizeMarkup(ch, element);
2387: }
2388: if (fSendCharDataAsCharArray)
2389: appendCharData('<');
2390: fCharacterCounter++;
2391: ch = loadNextByte();
2392: break;
2393: case 2: // '&'
2394: if (!fInCDSect) {
2395: whitespace(charDataOffset, fCurrentOffset);
2396: fCharacterCounter++;
2397: ch = loadNextByte();
2398: return recognizeReference(ch);
2399: }
2400: if (fSendCharDataAsCharArray)
2401: appendCharData('&');
2402: fCharacterCounter++;
2403: ch = loadNextByte();
2404: break;
2405: case 3: // ']'
2406: int endOffset = fCurrentOffset;
2407: ch = loadNextByte();
2408: if (ch != ']') {
2409: fCharacterCounter++;
2410: if (fSendCharDataAsCharArray)
2411: appendCharData(']');
2412: break;
2413: }
2414: if (fCurrentIndex + 1 == UTF8DataChunk.CHUNK_SIZE) {
2415: UTF8DataChunk saveChunk = fCurrentChunk;
2416: int saveIndex = fCurrentIndex;
2417: int saveOffset = fCurrentOffset;
2418: if (loadNextByte() != '>') {
2419: fCurrentChunk = saveChunk;
2420: fCurrentIndex = saveIndex;
2421: fCurrentOffset = saveOffset;
2422: fMostRecentData = fCurrentChunk
2423: .toByteArray();
2424: fMostRecentByte = ']';
2425: fCharacterCounter++;
2426: if (fSendCharDataAsCharArray)
2427: appendCharData(']');
2428: break;
2429: }
2430: } else {
2431: if (fMostRecentData[fCurrentIndex + 1] != '>') {
2432: fCharacterCounter++;
2433: if (fSendCharDataAsCharArray)
2434: appendCharData(']');
2435: break;
2436: }
2437: fCurrentIndex++;
2438: fCurrentOffset++;
2439: }
2440: loadNextByte();
2441: whitespace(charDataOffset, endOffset);
2442: fCharacterCounter += 3;
2443: return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
2444: case 4: // invalid char
2445: whitespace(charDataOffset, fCurrentOffset);
2446: if (ch == 0 && atEOF(fCurrentOffset + 1)) {
2447: changeReaders();
2448: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; // REVISIT - not quite...
2449: }
2450: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2451: }
2452: } else {
2453: if (fSendCharDataAsCharArray) {
2454: if (!copyMultiByteCharData(ch)) {
2455: whitespace(charDataOffset, fCurrentOffset);
2456: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2457: }
2458: } else if (!skipMultiByteCharData(ch)) {
2459: whitespace(charDataOffset, fCurrentOffset);
2460: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2461: }
2462: }
2463: break;
2464: }
2465: } else {
2466: if (fSendCharDataAsCharArray) {
2467: if (!copyMultiByteCharData(ch)) {
2468: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2469: }
2470: } else {
2471: if (!skipMultiByteCharData(ch)) {
2472: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2473: }
2474: }
2475: }
2476: if (fSendCharDataAsCharArray)
2477: ch = copyAsciiCharData();
2478: else
2479: ch = skipAsciiCharData();
2480: while (true) {
2481: if (ch < 0x80) {
2482: switch (XMLCharacterProperties.fgAsciiCharData[ch]) {
2483: case 0:
2484: if (fSendCharDataAsCharArray)
2485: appendCharData(ch);
2486: fCharacterCounter++;
2487: ch = loadNextByte();
2488: break;
2489: case 1: // '<'
2490: if (!fInCDSect) {
2491: if (fSendCharDataAsCharArray) {
2492: fCharDataHandler.processCharacters(
2493: fCharacters, 0, fCharDataLength);
2494: } else {
2495: int stringIndex = addString(charDataOffset,
2496: fCurrentOffset - charDataOffset);
2497: fCharDataHandler
2498: .processCharacters(stringIndex);
2499: }
2500: fCharacterCounter++;
2501: if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) {
2502: ch = loadNextByte();
2503: } else {
2504: fCurrentOffset++;
2505: if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) {
2506: fCurrentIndex++;
2507: try {
2508: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2509: } catch (ArrayIndexOutOfBoundsException ex) {
2510: ch = slowLoadNextByte();
2511: }
2512: } else {
2513: if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE)
2514: ch = slowLoadNextByte();
2515: else
2516: ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF);
2517: }
2518: }
2519: return recognizeMarkup(ch, element);
2520: }
2521: if (fSendCharDataAsCharArray)
2522: appendCharData('<');
2523: fCharacterCounter++;
2524: ch = loadNextByte();
2525: break;
2526: case 2: // '&'
2527: if (!fInCDSect) {
2528: characters(charDataOffset, fCurrentOffset);
2529: fCharacterCounter++;
2530: ch = loadNextByte();
2531: return recognizeReference(ch);
2532: }
2533: if (fSendCharDataAsCharArray)
2534: appendCharData('&');
2535: fCharacterCounter++;
2536: ch = loadNextByte();
2537: break;
2538: case 3: // ']'
2539: int endOffset = fCurrentOffset;
2540: ch = loadNextByte();
2541: if (ch != ']') {
2542: fCharacterCounter++;
2543: if (fSendCharDataAsCharArray)
2544: appendCharData(']');
2545: break;
2546: }
2547: if (fCurrentIndex + 1 == UTF8DataChunk.CHUNK_SIZE) {
2548: UTF8DataChunk saveChunk = fCurrentChunk;
2549: int saveIndex = fCurrentIndex;
2550: int saveOffset = fCurrentOffset;
2551: if (loadNextByte() != '>') {
2552: fCurrentChunk = saveChunk;
2553: fCurrentIndex = saveIndex;
2554: fCurrentOffset = saveOffset;
2555: fMostRecentData = fCurrentChunk
2556: .toByteArray();
2557: fMostRecentByte = ']';
2558: fCharacterCounter++;
2559: if (fSendCharDataAsCharArray)
2560: appendCharData(']');
2561: break;
2562: }
2563: } else {
2564: if (fMostRecentData[fCurrentIndex + 1] != '>') {
2565: fCharacterCounter++;
2566: if (fSendCharDataAsCharArray)
2567: appendCharData(']');
2568: break;
2569: }
2570: fCurrentIndex++;
2571: fCurrentOffset++;
2572: }
2573: loadNextByte();
2574: characters(charDataOffset, endOffset);
2575: fCharacterCounter += 3;
2576: return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT;
2577: case 4: // invalid char
2578: if (ch == 0x0A) {
2579: if (fSendCharDataAsCharArray)
2580: appendCharData(ch);
2581: fLinefeedCounter++;
2582: fCharacterCounter = 1;
2583: ch = loadNextByte();
2584: break;
2585: }
2586: if (ch == 0x0D) {
2587: if (fSendCharDataAsCharArray)
2588: appendCharData(0x0A);
2589: fCarriageReturnCounter++;
2590: fCharacterCounter = 1;
2591: ch = loadNextByte();
2592: if (ch == 0x0A) {
2593: fLinefeedCounter++;
2594: ch = loadNextByte();
2595: }
2596: break;
2597: }
2598: characters(charDataOffset, fCurrentOffset);
2599: if (ch == 0 && atEOF(fCurrentOffset + 1)) {
2600: changeReaders();
2601: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; // REVISIT - not quite...
2602: }
2603: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2604: }
2605: } else {
2606: if (fSendCharDataAsCharArray) {
2607: if (!copyMultiByteCharData(ch)) {
2608: characters(charDataOffset, fCurrentOffset);
2609: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2610: }
2611: } else if (!skipMultiByteCharData(ch)) {
2612: characters(charDataOffset, fCurrentOffset);
2613: return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR;
2614: }
2615: ch = fMostRecentByte;
2616: }
2617: }
2618: }
2619:
2620: private boolean copyMultiByteCharData(int b0) throws Exception {
2621: UTF8DataChunk saveChunk = fCurrentChunk;
2622: int saveOffset = fCurrentOffset;
2623: int saveIndex = fCurrentIndex;
2624: int b1 = loadNextByte();
2625: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx (0x80 to 0x7ff)
2626: int ch = ((0x1f & b0) << 6) + (0x3f & b1);
2627: appendCharData(ch); // yyy yyxx xxxx (0x80 to 0x7ff)
2628: loadNextByte();
2629: return true;
2630: }
2631: int b2 = loadNextByte();
2632: if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
2633: // ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
2634: // if ((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE)
2635: if ((b0 == 0xED && b1 >= 0xA0)
2636: || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
2637: fCurrentChunk = saveChunk;
2638: fCurrentIndex = saveIndex;
2639: fCurrentOffset = saveOffset;
2640: fMostRecentData = saveChunk.toByteArray();
2641: fMostRecentByte = b0;
2642: return false;
2643: }
2644: int ch = ((0x0f & b0) << 12) + ((0x3f & b1) << 6)
2645: + (0x3f & b2);
2646: appendCharData(ch); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
2647: loadNextByte();
2648: return true;
2649: }
2650:
2651: int b3 = loadNextByte(); // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
2652: // ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3); // u uuuu zzzz yyyy yyxx xxxx (0x10000 to 0x1ffff)
2653: // if (ch >= 0x110000)
2654: if ((0xf8 & b0) == 0xf0) {
2655: if (b0 > 0xF4 || (b0 == 0xF4 && b1 >= 0x90)) {
2656: fCurrentChunk = saveChunk;
2657: fCurrentIndex = saveIndex;
2658: fCurrentOffset = saveOffset;
2659: fMostRecentData = saveChunk.toByteArray();
2660: fMostRecentByte = b0;
2661: return false;
2662: }
2663: int ch = ((0x0f & b0) << 18) + ((0x3f & b1) << 12)
2664: + ((0x3f & b2) << 6) + (0x3f & b3);
2665: if (ch < 0x10000) {
2666: appendCharData(ch);
2667: } else {
2668: appendCharData(((ch - 0x00010000) >> 10) + 0xd800);
2669: appendCharData(((ch - 0x00010000) & 0x3ff) + 0xdc00);
2670: }
2671: loadNextByte();
2672: return true;
2673: } else {
2674: fCurrentChunk = saveChunk;
2675: fCurrentIndex = saveIndex;
2676: fCurrentOffset = saveOffset;
2677: fMostRecentData = saveChunk.toByteArray();
2678: fMostRecentByte = b0;
2679: return false;
2680: }
2681: }
2682:
2683: private boolean skipMultiByteCharData(int b0) throws Exception {
2684: UTF8DataChunk saveChunk = fCurrentChunk;
2685: int saveOffset = fCurrentOffset;
2686: int saveIndex = fCurrentIndex;
2687: int b1 = loadNextByte();
2688: if ((0xe0 & b0) == 0xc0) { // 110yyyyy 10xxxxxx (0x80 to 0x7ff)
2689: loadNextByte();
2690: return true;
2691: }
2692: int b2 = loadNextByte();
2693: if ((0xf0 & b0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
2694: // ch = ((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2); // zzzz yyyy yyxx xxxx (0x800 to 0xffff)
2695: // if ((ch >= 0xD800 && ch <= 0xDFFF) || ch >= 0xFFFE)
2696: if ((b0 == 0xED && b1 >= 0xA0)
2697: || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) {
2698: fCurrentChunk = saveChunk;
2699: fCurrentIndex = saveIndex;
2700: fCurrentOffset = saveOffset;
2701: fMostRecentData = saveChunk.toByteArray();
2702: fMostRecentByte = b0;
2703: return false;
2704: }
2705: loadNextByte();
2706: return true;
2707: }
2708: int b3 = loadNextByte(); // 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
2709: // ch = ((0x0f & b0)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3); // u uuuu zzzz yyyy yyxx xxxx (0x10000 to 0x1ffff)
2710: // if (ch >= 0x110000)
2711: if (b0 > 0xF4 || (b0 == 0xF4 && b1 >= 0x90)) {
2712: fCurrentChunk = saveChunk;
2713: fCurrentIndex = saveIndex;
2714: fCurrentOffset = saveOffset;
2715: fMostRecentData = saveChunk.toByteArray();
2716: fMostRecentByte = b0;
2717: return false;
2718: }
2719: loadNextByte();
2720: return true;
2721: }
2722:
2723: private int copyAsciiCharData() throws Exception {
2724: int srcIndex = fCurrentIndex;
2725: int offset = fCurrentOffset - srcIndex;
2726: byte[] data = fMostRecentData;
2727: int dstIndex = fCharDataLength;
2728: boolean skiplf = false;
2729: while (true) {
2730: int ch;
2731: try {
2732: ch = data[srcIndex] & 0xFF;
2733: } catch (ArrayIndexOutOfBoundsException ex) {
2734: offset += srcIndex;
2735: slowLoadNextByte();
2736: srcIndex = 0;
2737: data = fMostRecentData;
2738: ch = data[srcIndex] & 0xFF;
2739: }
2740: if (ch >= 0x80) {
2741: fCurrentOffset = offset + srcIndex;
2742: fCurrentIndex = srcIndex;
2743: fMostRecentByte = ch;
2744: return ch;
2745: }
2746: if (XMLCharacterProperties.fgAsciiCharData[ch] == 0) {
2747: fCharacterCounter++;
2748: skiplf = false;
2749: } else if (ch == 0x0A) {
2750: fLinefeedCounter++;
2751: if (skiplf) {
2752: skiplf = false;
2753: srcIndex++;
2754: continue;
2755: }
2756: fCharacterCounter = 1;
2757: } else if (ch == 0x0D) {
2758: fCarriageReturnCounter++;
2759: fCharacterCounter = 1;
2760: skiplf = true;
2761: ch = 0x0A;
2762: } else {
2763: fCurrentOffset = offset + srcIndex;
2764: fCurrentIndex = srcIndex;
2765: fMostRecentByte = ch;
2766: return ch;
2767: }
2768: srcIndex++;
2769: try {
2770: fCharacters[fCharDataLength] = (char) ch;
2771: fCharDataLength++;
2772: } catch (ArrayIndexOutOfBoundsException ex) {
2773: slowAppendCharData(ch);
2774: }
2775: }
2776: }
2777:
2778: private int skipAsciiCharData() throws Exception {
2779: int srcIndex = fCurrentIndex;
2780: int offset = fCurrentOffset - srcIndex;
2781: byte[] data = fMostRecentData;
2782: while (true) {
2783: int ch;
2784: try {
2785: ch = data[srcIndex] & 0xFF;
2786: } catch (ArrayIndexOutOfBoundsException ex) {
2787: offset += srcIndex;
2788: slowLoadNextByte();
2789: srcIndex = 0;
2790: data = fMostRecentData;
2791: ch = data[srcIndex] & 0xFF;
2792: }
2793: if (ch >= 0x80) {
2794: fCurrentOffset = offset + srcIndex;
2795: fCurrentIndex = srcIndex;
2796: fMostRecentByte = ch;
2797: return ch;
2798: }
2799: if (XMLCharacterProperties.fgAsciiCharData[ch] == 0) {
2800: fCharacterCounter++;
2801: } else if (ch == 0x0A) {
2802: fLinefeedCounter++;
2803: fCharacterCounter = 1;
2804: } else if (ch == 0x0D) {
2805: fCarriageReturnCounter++;
2806: fCharacterCounter = 1;
2807: } else {
2808: fCurrentOffset = offset + srcIndex;
2809: fCurrentIndex = srcIndex;
2810: fMostRecentByte = ch;
2811: return ch;
2812: }
2813: srcIndex++;
2814: }
2815: }
2816:
2817: private char[] fCharacters = new char[UTF8DataChunk.CHUNK_SIZE];
2818: private int fCharDataLength = 0;
2819:
2820: private void appendCharData(int ch) throws Exception {
2821: try {
2822: fCharacters[fCharDataLength] = (char) ch;
2823: fCharDataLength++;
2824: } catch (ArrayIndexOutOfBoundsException ex) {
2825: slowAppendCharData(ch);
2826: }
2827: }
2828:
2829: private void slowAppendCharData(int ch) throws Exception {
2830: // flush the buffer...
2831: characters(0, fCharDataLength); /* DEFECT !! whitespace this long is unlikely, but possible */
2832: fCharDataLength = 0;
2833: fCharacters[fCharDataLength++] = (char) ch;
2834: }
2835:
2836: private void characters(int offset, int endOffset) throws Exception {
2837: //
2838: // REVISIT - need more up front bounds checking code of params...
2839: //
2840: if (!fSendCharDataAsCharArray) {
2841: int stringIndex = addString(offset, endOffset - offset);
2842: fCharDataHandler.processCharacters(stringIndex);
2843: return;
2844: }
2845: fCharDataHandler.processCharacters(fCharacters, 0,
2846: fCharDataLength);
2847: }
2848:
2849: private void whitespace(int offset, int endOffset) throws Exception {
2850: //
2851: // REVISIT - need more up front bounds checking code of params...
2852: //
2853: if (!fSendCharDataAsCharArray) {
2854: int stringIndex = addString(offset, endOffset - offset);
2855: fCharDataHandler.processWhitespace(stringIndex);
2856: return;
2857: }
2858: fCharDataHandler.processWhitespace(fCharacters, 0,
2859: fCharDataLength);
2860: }
2861:
2862: //
2863: //
2864: //
2865: private static final char[] cdata_string = { 'C', 'D', 'A', 'T',
2866: 'A', '[' };
2867: private StringPool.CharArrayRange fCharArrayRange = null;
2868: private InputStream fInputStream = null;
2869: private StringPool fStringPool = null;
2870: private UTF8DataChunk fCurrentChunk = null;
2871: private int fCurrentIndex = 0;
2872: private byte[] fMostRecentData = null;
2873: private int fMostRecentByte = 0;
2874: private int fLength = 0;
2875: private boolean fCalledCharPropInit = false;
2876: private boolean fCallClearPreviousChunk = true;
2877:
2878: //
2879: //
2880: //
2881: private int fillCurrentChunk() throws Exception {
2882: byte[] buf = fCurrentChunk.toByteArray();
2883: if (fInputStream == null) {
2884: if (buf == null)
2885: buf = new byte[1];
2886: buf[0] = 0;
2887: fMostRecentData = buf;
2888: fCurrentIndex = 0;
2889: fCurrentChunk.setByteArray(fMostRecentData);
2890: return (fMostRecentByte = fMostRecentData[0] & 0xFF);
2891: }
2892: if (buf == null)
2893: buf = new byte[UTF8DataChunk.CHUNK_SIZE];
2894: int offset = 0;
2895: int capacity = UTF8DataChunk.CHUNK_SIZE;
2896: int result = 0;
2897: do {
2898: try {
2899: result = fInputStream.read(buf, offset, capacity);
2900: } catch (java.io.IOException ex) {
2901: result = -1;
2902: }
2903: if (result == -1) {
2904: //
2905: // We have reached the end of the stream.
2906: //
2907: fInputStream.close();
2908: fInputStream = null;
2909: try {
2910: buf[offset] = 0;
2911: } catch (ArrayIndexOutOfBoundsException ex) {
2912: }
2913: break;
2914: }
2915: if (result > 0) {
2916: offset += result;
2917: capacity -= result;
2918: }
2919: } while (capacity > 0);
2920: fMostRecentData = buf;
2921: fLength += offset;
2922: fCurrentIndex = 0;
2923: fCurrentChunk.setByteArray(fMostRecentData);
2924: return (fMostRecentByte = fMostRecentData[0] & 0xFF);
2925: }
2926: }
|