0001: /*
0002: * $Id: XMLEntityReaderImpl.java,v 1.4 2006/11/29 22:01:31 spericas Exp $
0003: */
0004:
0005: /*
0006: * The contents of this file are subject to the terms
0007: * of the Common Development and Distribution License
0008: * (the License). You may not use this file except in
0009: * compliance with the License.
0010: *
0011: * You can obtain a copy of the license at
0012: * https://glassfish.dev.java.net/public/CDDLv1.0.html.
0013: * See the License for the specific language governing
0014: * permissions and limitations under the License.
0015: *
0016: * When distributing Covered Code, include this CDDL
0017: * Header Notice in each file and include the License file
0018: * at https://glassfish.dev.java.net/public/CDDLv1.0.html.
0019: * If applicable, add the following below the CDDL Header,
0020: * with the fields enclosed by brackets [] replaced by
0021: * you own identifying information:
0022: * "Portions Copyrighted [year] [name of copyright owner]"
0023: *
0024: * [Name of File] [ver.__] [Date]
0025: *
0026: * Copyright 2006 Sun Microsystems Inc. All Rights Reserved
0027: */
0028:
0029: /*
0030: * The Apache Software License, Version 1.1
0031: *
0032: *
0033: * Copyright (c) 1999-2002 The Apache Software Foundation.
0034: * All rights reserved.
0035: *
0036: * Redistribution and use in source and binary forms, with or without
0037: * modification, are permitted provided that the following conditions
0038: * are met:
0039: *
0040: * 1. Redistributions of source code must retain the above copyright
0041: * notice, this list of conditions and the following disclaimer.
0042: *
0043: * 2. Redistributions in binary form must reproduce the above copyright
0044: * notice, this list of conditions and the following disclaimer in
0045: * the documentation and/or other materials provided with the
0046: * distribution.
0047: *
0048: * 3. The end-user documentation included with the redistribution,
0049: * if any, must include the following acknowledgment:
0050: * "This product includes software developed by the
0051: * Apache Software Foundation (http://www.apache.org/)."
0052: * Alternately, this acknowledgment may appear in the software itself,
0053: * if and wherever such third-party acknowledgments normally appear.
0054: *
0055: * 4. The names "Xerces" and "Apache Software Foundation" must
0056: * not be used to endorse or promote products derived from this
0057: * software without prior written permission. For written
0058: * permission, please contact apache@apache.org.
0059: *
0060: * 5. Products derived from this software may not be called "Apache",
0061: * nor may "Apache" appear in their name, without prior written
0062: * permission of the Apache Software Foundation.
0063: *
0064: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
0065: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0066: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0067: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
0068: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0069: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0070: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
0071: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0072: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
0073: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
0074: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0075: * SUCH DAMAGE.
0076: */
0077: /*
0078: * XMLEntityReaderImpl.java
0079: *
0080: * Created on December 2, 2002, 3:06 PM
0081: */
0082:
0083: package com.sun.xml.stream;
0084:
0085: import java.io.IOException;
0086: import java.io.InputStream;
0087: import java.io.InputStreamReader;
0088: import java.io.Reader;
0089:
0090: import java.util.Locale;
0091: import java.util.Vector;
0092: import com.sun.xml.stream.xerces.impl.io.ASCIIReader;
0093: import com.sun.xml.stream.xerces.impl.io.UCSReader;
0094: import com.sun.xml.stream.xerces.impl.io.UTF8Reader;
0095:
0096: import com.sun.xml.stream.xerces.impl.msg.XMLMessageFormatter;
0097: import com.sun.xml.stream.xerces.util.EncodingMap;
0098:
0099: import com.sun.xml.stream.xerces.util.SymbolTable;
0100: import com.sun.xml.stream.xerces.util.XMLChar;
0101: import com.sun.xml.stream.xerces.util.XMLStringBuffer;
0102: import com.sun.xml.stream.xerces.xni.QName;
0103: import com.sun.xml.stream.xerces.xni.XMLString;
0104: import com.sun.xml.stream.xerces.xni.parser.XMLComponentManager;
0105: import com.sun.xml.stream.xerces.xni.parser.XMLConfigurationException;
0106:
0107: /**
0108: * Implements the entity scanner methods.
0109: *
0110: * @author Neeraj Bajaj, Sun Microsystems
0111: * @author Andy Clark, IBM
0112: * @author Arnaud Le Hors, IBM
0113: * @author K.Venugopal Sun Microsystems
0114: *
0115: */
0116: public class XMLEntityReaderImpl extends XMLEntityReader {
0117:
0118: //fields
0119: //kvr : Below fields should be made non static.
0120: protected Entity.ScannedEntity fCurrentEntity = null;
0121:
0122: protected XMLEntityManager fEntityManager;
0123:
0124: /** Debug switching readers for encodings. */
0125: private static final boolean DEBUG_ENCODINGS = false;
0126: private Vector listeners = new Vector();
0127: public static final boolean[] validContent = new boolean[127];
0128: public static final boolean[] validNames = new boolean[127];
0129:
0130: /**
0131: * Debug printing of buffer. This debugging flag works best when you
0132: * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
0133: * 64 characters.
0134: */
0135: private static final boolean DEBUG_BUFFER = false;
0136: private static final boolean DEBUG_SKIP_STRING = false;
0137:
0138: protected SymbolTable fSymbolTable = null;
0139: protected XMLErrorReporter fErrorReporter = null;
0140: int[] whiteSpaceLookup = new int[100];
0141: //Fix me: Handle case where spaces in literal are more than 100.
0142: //a rare case.
0143: int whiteSpaceLen = 0;
0144: boolean whiteSpaceInfoNeeded = true;
0145: char[] scannedName = null;
0146: /**
0147: * Allow Java encoding names. This feature identifier is:
0148: * http://apache.org/xml/features/allow-java-encodings
0149: */
0150: protected boolean fAllowJavaEncodings;
0151:
0152: //Will be used only during internal subsets.
0153: //for appending data.
0154:
0155: /** Property identifier: symbol table. */
0156: protected static final String SYMBOL_TABLE = Constants.XERCES_PROPERTY_PREFIX
0157: + Constants.SYMBOL_TABLE_PROPERTY;
0158:
0159: /** Property identifier: error reporter. */
0160: protected static final String ERROR_REPORTER = Constants.XERCES_PROPERTY_PREFIX
0161: + Constants.ERROR_REPORTER_PROPERTY;
0162:
0163: /** Feature identifier: allow Java encodings. */
0164: protected static final String ALLOW_JAVA_ENCODINGS = Constants.XERCES_FEATURE_PREFIX
0165: + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
0166:
0167: protected PropertyManager fPropertyManager = null;
0168:
0169: boolean isExternal = false;
0170: static {
0171: for (char i = 0x20; i < 127; i++) {
0172: validContent[i] = true;
0173: }
0174: validContent[0x9] = true;
0175: validContent[0x26] = false;
0176: validContent[0x3C] = false;
0177:
0178: for (int i = 0x0041; i <= 0x005A; i++) {
0179: validNames[i] = true;
0180: }
0181: for (int i = 0x0061; i <= 0x007A; i++) {
0182: validNames[i] = true;
0183: }
0184: for (int i = 0x0030; i <= 0x0039; i++) {
0185: validNames[i] = true;
0186: }
0187: validNames[45] = true;
0188: validNames[46] = true;
0189: validNames[58] = true;
0190: validNames[95] = true;
0191: }
0192:
0193: //
0194: // Constructors
0195: //
0196:
0197: /** private constructor, this class can only be instantiated within this class. Instance of this class should
0198: * be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
0199: * @see getEntityScanner()
0200: * @see getEntityScanner(ScannedEntity)
0201: */
0202: public XMLEntityReaderImpl(XMLEntityManager entityManager) {
0203: fEntityManager = entityManager;
0204: } // <init>()
0205:
0206: /** private constructor, this class can only be instantiated within this class. Instance of this class should
0207: * be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
0208: * @see getEntityScanner()
0209: * @see getEntityScanner(ScannedEntity)
0210: */
0211: public XMLEntityReaderImpl(PropertyManager propertyManager,
0212: XMLEntityManager entityManager) {
0213: fEntityManager = entityManager;
0214: reset(propertyManager);
0215: } // <init>()
0216:
0217: /**
0218: * Resets the components.
0219: */
0220: public void reset(PropertyManager propertyManager) {
0221: fSymbolTable = (SymbolTable) propertyManager
0222: .getProperty(SYMBOL_TABLE);
0223: fErrorReporter = (XMLErrorReporter) propertyManager
0224: .getProperty(ERROR_REPORTER);
0225: fCurrentEntity = null;
0226: whiteSpaceLen = 0;
0227: whiteSpaceInfoNeeded = true;
0228: scannedName = null;
0229: listeners.clear();
0230: }
0231:
0232: /**
0233: * Resets the component. The component can query the component manager
0234: * about any features and properties that affect the operation of the
0235: * component.
0236: *
0237: * @param componentManager The component manager.
0238: *
0239: * @throws SAXException Thrown by component on initialization error.
0240: * For example, if a feature or property is
0241: * required for the operation of the component, the
0242: * component manager may throw a
0243: * SAXNotRecognizedException or a
0244: * SAXNotSupportedException.
0245: */
0246: public void reset(XMLComponentManager componentManager)
0247: throws XMLConfigurationException {
0248: // xerces features
0249: try {
0250: fAllowJavaEncodings = componentManager
0251: .getFeature(ALLOW_JAVA_ENCODINGS);
0252: } catch (XMLConfigurationException e) {
0253: fAllowJavaEncodings = false;
0254: }
0255:
0256: //xerces properties
0257: fSymbolTable = (SymbolTable) componentManager
0258: .getProperty(SYMBOL_TABLE);
0259: fErrorReporter = (XMLErrorReporter) componentManager
0260: .getProperty(ERROR_REPORTER);
0261:
0262: } // reset(XMLComponentManager)
0263:
0264: /** set the instance of current scanned entity.
0265: * @param ScannedEntity
0266: */
0267:
0268: public void setCurrentEntity(Entity.ScannedEntity scannedEntity) {
0269: fCurrentEntity = scannedEntity;
0270: if (fCurrentEntity != null) {
0271: isExternal = fCurrentEntity.isExternal();
0272: if (DEBUG_BUFFER)
0273: System.out.println("Current Entity is "
0274: + scannedEntity.name);
0275: }
0276: }
0277:
0278: public Entity.ScannedEntity getCurrentEntity() {
0279: return fCurrentEntity;
0280: }
0281:
0282: //
0283: // XMLEntityReader methods
0284: //
0285:
0286: /**
0287: * Returns the base system identifier of the currently scanned
0288: * entity, or null if none is available.
0289: */
0290: public String getBaseSystemId() {
0291: return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation
0292: .getExpandedSystemId()
0293: : null;
0294: } // getBaseSystemId():String
0295:
0296: ///////////// Locator methods start.
0297: public int getLineNumber() {
0298: //if the entity is closed, we should return -1
0299: //xxx at first place why such call should be there...
0300: return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1;
0301: }
0302:
0303: public int getColumnNumber() {
0304: //if the entity is closed, we should return -1
0305: //xxx at first place why such call should be there...
0306: return fCurrentEntity != null ? fCurrentEntity.columnNumber
0307: : -1;
0308: }
0309:
0310: public int getCharacterOffset() {
0311: return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad
0312: + fCurrentEntity.position
0313: : -1;
0314: }
0315:
0316: /** Returns the expanded system identifier. */
0317: public String getExpandedSystemId() {
0318: return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation
0319: .getExpandedSystemId()
0320: : null;
0321: }
0322:
0323: /** Returns the literal system identifier. */
0324: public String getLiteralSystemId() {
0325: return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation
0326: .getLiteralSystemId()
0327: : null;
0328: }
0329:
0330: /** Returns the public identifier. */
0331: public String getPublicId() {
0332: return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation
0333: .getPublicId()
0334: : null;
0335: }
0336:
0337: ///////////////// Locator methods finished.
0338:
0339: /** the version of the current entity being scanned */
0340: public void setVersion(String version) {
0341: fCurrentEntity.version = version;
0342: }
0343:
0344: public String getVersion() {
0345: return fCurrentEntity.version;
0346: }
0347:
0348: public String getEncoding() {
0349: return fCurrentEntity.encoding;
0350: }
0351:
0352: /**
0353: * Sets the encoding of the scanner. This method is used by the
0354: * scanners if the XMLDecl or TextDecl line contains an encoding
0355: * pseudo-attribute.
0356: * <p>
0357: * <strong>Note:</strong> The underlying character reader on the
0358: * current entity will be changed to accomodate the new encoding.
0359: * However, the new encoding is ignored if the current reader was
0360: * not constructed from an input stream (e.g. an external entity
0361: * that is resolved directly to the appropriate java.io.Reader
0362: * object).
0363: *
0364: * @param encoding The IANA encoding name of the new encoding.
0365: *
0366: * @throws IOException Thrown if the new encoding is not supported.
0367: *
0368: * @see com.sun.xml.stream.xerces.util.EncodingMap
0369: */
0370: public void setEncoding(String encoding) throws IOException {
0371:
0372: if (DEBUG_ENCODINGS) {
0373: System.out.println("$$$ setEncoding: " + encoding);
0374: }
0375:
0376: if (fCurrentEntity.stream != null) {
0377: // if the encoding is the same, don't change the reader and
0378: // re-use the original reader used by the OneCharReader
0379: // NOTE: Besides saving an object, this overcomes deficiencies
0380: // in the UTF-16 reader supplied with the standard Java
0381: // distribution (up to and including 1.3). The UTF-16
0382: // decoder buffers 8K blocks even when only asked to read
0383: // a single char! -Ac
0384: if (fCurrentEntity.encoding == null
0385: || !fCurrentEntity.encoding.equals(encoding)) {
0386: // UTF-16 is a bit of a special case. If the encoding is UTF-16,
0387: // and we know the endian-ness, we shouldn't change readers.
0388: // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
0389: // the endian-ness from the encoding we presently have.
0390: if (fCurrentEntity.encoding != null
0391: && fCurrentEntity.encoding.startsWith("UTF-16")) {
0392: String ENCODING = encoding
0393: .toUpperCase(Locale.ENGLISH);
0394: if (ENCODING.equals("UTF-16"))
0395: return;
0396: if (ENCODING.equals("ISO-10646-UCS-4")) {
0397: if (fCurrentEntity.encoding.equals("UTF-16BE")) {
0398: fCurrentEntity.reader = new UCSReader(
0399: fCurrentEntity.stream,
0400: UCSReader.UCS4BE);
0401: } else {
0402: fCurrentEntity.reader = new UCSReader(
0403: fCurrentEntity.stream,
0404: UCSReader.UCS4LE);
0405: }
0406: return;
0407: }
0408: if (ENCODING.equals("ISO-10646-UCS-2")) {
0409: if (fCurrentEntity.encoding.equals("UTF-16BE")) {
0410: fCurrentEntity.reader = new UCSReader(
0411: fCurrentEntity.stream,
0412: UCSReader.UCS2BE);
0413: } else {
0414: fCurrentEntity.reader = new UCSReader(
0415: fCurrentEntity.stream,
0416: UCSReader.UCS2LE);
0417: }
0418: return;
0419: }
0420: }
0421: // wrap a new reader around the input stream, changing
0422: // the encoding
0423: if (DEBUG_ENCODINGS) {
0424: System.out
0425: .println("$$$ creating new reader from stream: "
0426: + fCurrentEntity.stream);
0427: }
0428: //fCurrentEntity.stream.reset();
0429: fCurrentEntity.reader = createReader(
0430: fCurrentEntity.stream, encoding, null);
0431: fCurrentEntity.encoding = encoding;
0432: } else {
0433: if (DEBUG_ENCODINGS)
0434: System.out
0435: .println("$$$ reusing old reader on stream");
0436: }
0437: }
0438:
0439: } // setEncoding(String)
0440:
0441: /** Returns true if the current entity being scanned is external. */
0442: public boolean isExternal() {
0443: return fCurrentEntity.isExternal();
0444: } // isExternal():boolean
0445:
0446: public int getChar(int relative) throws IOException {
0447: if (arrangeCapacity(relative + 1, false)) {
0448: return fCurrentEntity.ch[fCurrentEntity.position + relative];
0449: } else {
0450: return -1;
0451: }
0452: }//getChar()
0453:
0454: /**
0455: * Returns the next character on the input.
0456: * <p>
0457: * <strong>Note:</strong> The character is <em>not</em> consumed.
0458: *
0459: * @throws IOException Thrown if i/o error occurs.
0460: * @throws EOFException Thrown on end of file.
0461: */
0462: public int peekChar() throws IOException {
0463: if (DEBUG_BUFFER) {
0464: System.out.print("(peekChar: ");
0465: print();
0466: System.out.println();
0467: }
0468:
0469: // load more characters, if needed
0470: if (fCurrentEntity.position == fCurrentEntity.count) {
0471: invokeListeners(0);
0472: load(0, true);
0473: }
0474:
0475: // peek at character
0476: int c = fCurrentEntity.ch[fCurrentEntity.position];
0477:
0478: // return peeked character
0479: if (DEBUG_BUFFER) {
0480: System.out.print(")peekChar: ");
0481: print();
0482: if (isExternal) {
0483: System.out.println(" -> '"
0484: + (c != '\r' ? (char) c : '\n') + "'");
0485: } else {
0486: System.out.println(" -> '" + (char) c + "'");
0487: }
0488: }
0489: if (isExternal) {
0490: return c != '\r' ? c : '\n';
0491: } else {
0492: return c;
0493: }
0494:
0495: } // peekChar():int
0496:
0497: /**
0498: * Returns the next character on the input.
0499: * <p>
0500: * <strong>Note:</strong> The character is consumed.
0501: *
0502: * @throws IOException Thrown if i/o error occurs.
0503: * @throws EOFException Thrown on end of file.
0504: */
0505: public int scanChar() throws IOException {
0506: if (DEBUG_BUFFER) {
0507: System.out.print("(scanChar: ");
0508: print();
0509: System.out.println();
0510: }
0511:
0512: // load more characters, if needed
0513: if (fCurrentEntity.position == fCurrentEntity.count) {
0514: invokeListeners(0);
0515: load(0, true);
0516: }
0517:
0518: // scan character
0519: int c = fCurrentEntity.ch[fCurrentEntity.position++];
0520: if (c == '\n' || (c == '\r' && isExternal)) {
0521: fCurrentEntity.lineNumber++;
0522: fCurrentEntity.columnNumber = 1;
0523: if (fCurrentEntity.position == fCurrentEntity.count) {
0524: invokeListeners(1);
0525: fCurrentEntity.ch[0] = (char) c;
0526: load(1, false);
0527: }
0528: if (c == '\r' && isExternal) {
0529: if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
0530: fCurrentEntity.position--;
0531: }
0532: c = '\n';
0533: }
0534: }
0535:
0536: // return character that was scanned
0537: if (DEBUG_BUFFER) {
0538: System.out.print(")scanChar: ");
0539: print();
0540: System.out.println(" -> '" + (char) c + "'");
0541: }
0542: fCurrentEntity.columnNumber++;
0543: return c;
0544:
0545: } // scanChar():int
0546:
0547: /**
0548: * Returns a string matching the NMTOKEN production appearing immediately
0549: * on the input as a symbol, or null if NMTOKEN Name string is present.
0550: * <p>
0551: * <strong>Note:</strong> The NMTOKEN characters are consumed.
0552: * <p>
0553: * <strong>Note:</strong> The string returned must be a symbol. The
0554: * SymbolTable can be used for this purpose.
0555: *
0556: * @throws IOException Thrown if i/o error occurs.
0557: * @throws EOFException Thrown on end of file.
0558: *
0559: * @see com.sun.xml.stream.xerces.util.SymbolTable
0560: * @see com.sun.xml.stream.xerces.util.XMLChar#isName
0561: */
0562: public String scanNmtoken() throws IOException {
0563: if (DEBUG_BUFFER) {
0564: System.out.print("(scanNmtoken: ");
0565: print();
0566: System.out.println();
0567: }
0568:
0569: // load more characters, if needed
0570: if (fCurrentEntity.position == fCurrentEntity.count) {
0571: invokeListeners(0);
0572: load(0, true);
0573: }
0574:
0575: // scan nmtoken
0576: int offset = fCurrentEntity.position;
0577: boolean vc = false;
0578: char c;
0579: while (true) {
0580: //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
0581: c = fCurrentEntity.ch[fCurrentEntity.position];
0582: if (c < 127) {
0583: vc = validNames[c];
0584: } else {
0585: vc = XMLChar.isName(c);
0586: }
0587: if (!vc)
0588: break;
0589:
0590: if (++fCurrentEntity.position == fCurrentEntity.count) {
0591: int length = fCurrentEntity.position - offset;
0592: invokeListeners(length);
0593: if (length == fCurrentEntity.fBufferSize) {
0594: // bad luck we have to resize our buffer
0595: char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
0596: System.arraycopy(fCurrentEntity.ch, offset, tmp, 0,
0597: length);
0598: fCurrentEntity.ch = tmp;
0599: fCurrentEntity.fBufferSize *= 2;
0600: } else {
0601: System.arraycopy(fCurrentEntity.ch, offset,
0602: fCurrentEntity.ch, 0, length);
0603: }
0604: offset = 0;
0605: if (load(length, false)) {
0606: break;
0607: }
0608: }
0609: }
0610: int length = fCurrentEntity.position - offset;
0611: fCurrentEntity.columnNumber += length;
0612:
0613: // return nmtoken
0614: String symbol = null;
0615: if (length > 0) {
0616: symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset,
0617: length);
0618: }
0619: if (DEBUG_BUFFER) {
0620: System.out.print(")scanNmtoken: ");
0621: print();
0622: System.out.println(" -> " + String.valueOf(symbol));
0623: }
0624: return symbol;
0625:
0626: } // scanNmtoken():String
0627:
0628: /**
0629: * Returns a string matching the Name production appearing immediately
0630: * on the input as a symbol, or null if no Name string is present.
0631: * <p>
0632: * <strong>Note:</strong> The Name characters are consumed.
0633: * <p>
0634: * <strong>Note:</strong> The string returned must be a symbol. The
0635: * SymbolTable can be used for this purpose.
0636: *
0637: * @throws IOException Thrown if i/o error occurs.
0638: * @throws EOFException Thrown on end of file.
0639: *
0640: * @see com.sun.xml.stream.xerces.util.SymbolTable
0641: * @see com.sun.xml.stream.xerces.util.XMLChar#isName
0642: * @see com.sun.xml.stream.xerces.util.XMLChar#isNameStart
0643: */
0644: public String scanName() throws IOException {
0645: if (DEBUG_BUFFER) {
0646: System.out.print("(scanName: ");
0647: print();
0648: System.out.println();
0649: }
0650:
0651: // load more characters, if needed
0652: if (fCurrentEntity.position == fCurrentEntity.count) {
0653: invokeListeners(0);
0654: load(0, true);
0655: }
0656:
0657: // scan name
0658: int offset = fCurrentEntity.position;
0659: if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
0660: if (++fCurrentEntity.position == fCurrentEntity.count) {
0661: invokeListeners(1);
0662: fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
0663: offset = 0;
0664: if (load(1, false)) {
0665: fCurrentEntity.columnNumber++;
0666: String symbol = fSymbolTable.addSymbol(
0667: fCurrentEntity.ch, 0, 1);
0668: scannedName = fSymbolTable.getCharArray();
0669: if (DEBUG_BUFFER) {
0670: System.out.print(")scanName: ");
0671: print();
0672: System.out.println(" -> "
0673: + String.valueOf(symbol));
0674: }
0675: return symbol;
0676: }
0677: }
0678: boolean vc = false;
0679: while (true) {
0680: //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
0681: char c = fCurrentEntity.ch[fCurrentEntity.position];
0682: if (c < 127) {
0683: vc = validNames[c];
0684: } else {
0685: vc = XMLChar.isName(c);
0686: }
0687: if (!vc)
0688: break;
0689: if (++fCurrentEntity.position == fCurrentEntity.count) {
0690: int length = fCurrentEntity.position - offset;
0691: invokeListeners(length);
0692: if (length == fCurrentEntity.fBufferSize) {
0693: // bad luck we have to resize our buffer
0694: char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
0695: System.arraycopy(fCurrentEntity.ch, offset,
0696: tmp, 0, length);
0697: fCurrentEntity.ch = tmp;
0698: fCurrentEntity.fBufferSize *= 2;
0699: } else {
0700: System.arraycopy(fCurrentEntity.ch, offset,
0701: fCurrentEntity.ch, 0, length);
0702: }
0703: offset = 0;
0704: if (load(length, false)) {
0705: break;
0706: }
0707: }
0708: }
0709: }
0710: int length = fCurrentEntity.position - offset;
0711: fCurrentEntity.columnNumber += length;
0712:
0713: // return name
0714: String symbol = null;
0715: if (length > 0) {
0716: symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset,
0717: length);
0718: scannedName = fSymbolTable.getCharArray();
0719: }
0720: if (DEBUG_BUFFER) {
0721: System.out.print(")scanName: ");
0722: print();
0723: System.out.println(" -> " + String.valueOf(symbol));
0724: }
0725: return symbol;
0726:
0727: } // scanName():String
0728:
0729: /**
0730: * Scans a qualified name from the input, setting the fields of the
0731: * QName structure appropriately.
0732: * <p>
0733: * <strong>Note:</strong> The qualified name characters are consumed.
0734: * <p>
0735: * <strong>Note:</strong> The strings used to set the values of the
0736: * QName structure must be symbols. The SymbolTable can be used for
0737: * this purpose.
0738: *
0739: * @param qname The qualified name structure to fill.
0740: *
0741: * @return Returns true if a qualified name appeared immediately on
0742: * the input and was scanned, false otherwise.
0743: *
0744: * @throws IOException Thrown if i/o error occurs.
0745: * @throws EOFException Thrown on end of file.
0746: *
0747: * @see com.sun.xml.stream.xerces.util.SymbolTable
0748: * @see com.sun.xml.stream.xerces.util.XMLChar#isName
0749: * @see com.sun.xml.stream.xerces.util.XMLChar#isNameStart
0750: */
0751: public boolean scanQName(QName qname) throws IOException {
0752: if (DEBUG_BUFFER) {
0753: System.out.print("(scanQName, " + qname + ": ");
0754: print();
0755: System.out.println();
0756: }
0757:
0758: // load more characters, if needed
0759: if (fCurrentEntity.position == fCurrentEntity.count) {
0760: invokeListeners(0);
0761: load(0, true);
0762: }
0763:
0764: // scan qualified name
0765: int offset = fCurrentEntity.position;
0766:
0767: //making a check if if the specified character is a valid name start character
0768: //as defined by production [5] in the XML 1.0 specification.
0769: // Name ::= (Letter | '_' | ':') (NameChar)*
0770:
0771: if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
0772: if (++fCurrentEntity.position == fCurrentEntity.count) {
0773: invokeListeners(1);
0774: fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
0775: offset = 0;
0776:
0777: if (load(1, false)) {
0778: fCurrentEntity.columnNumber++;
0779: //adding into symbol table.
0780: //XXX We are trying to add single character in SymbolTable??????
0781: String name = fSymbolTable.addSymbol(
0782: fCurrentEntity.ch, 0, 1);
0783: qname.setValues(null, name, name, null);
0784: qname.characters = fSymbolTable.getCharArray();
0785: if (DEBUG_BUFFER) {
0786: System.out.print(")scanQName, " + qname + ": ");
0787: print();
0788: System.out.println(" -> true");
0789: }
0790: return true;
0791: }
0792: }
0793: int index = -1;
0794: boolean vc = false;
0795: while (true) {
0796:
0797: //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
0798: char c = fCurrentEntity.ch[fCurrentEntity.position];
0799: if (c < 127) {
0800: vc = validNames[c];
0801: } else {
0802: vc = XMLChar.isName(c);
0803: }
0804: if (!vc)
0805: break;
0806: if (c == ':') {
0807: if (index != -1) {
0808: break;
0809: }
0810: index = fCurrentEntity.position;
0811: }
0812: if (++fCurrentEntity.position == fCurrentEntity.count) {
0813: int length = fCurrentEntity.position - offset;
0814: invokeListeners(length);
0815: if (length == fCurrentEntity.fBufferSize) {
0816: // bad luck we have to resize our buffer
0817: char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
0818: System.arraycopy(fCurrentEntity.ch, offset,
0819: tmp, 0, length);
0820: fCurrentEntity.ch = tmp;
0821: fCurrentEntity.fBufferSize *= 2;
0822: } else {
0823: System.arraycopy(fCurrentEntity.ch, offset,
0824: fCurrentEntity.ch, 0, length);
0825: }
0826: if (index != -1) {
0827: index = index - offset;
0828: }
0829: offset = 0;
0830: if (load(length, false)) {
0831: break;
0832: }
0833: }
0834: }
0835: int length = fCurrentEntity.position - offset;
0836: fCurrentEntity.columnNumber += length;
0837: if (length > 0) {
0838: String prefix = null;
0839: String localpart = null;
0840: String rawname = fSymbolTable.addSymbol(
0841: fCurrentEntity.ch, offset, length);
0842: qname.characters = fSymbolTable.getCharArray();
0843: if (index != -1) {
0844: int prefixLength = index - offset;
0845: prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
0846: offset, prefixLength);
0847: int len = length - prefixLength - 1;
0848: localpart = fSymbolTable.addSymbol(
0849: fCurrentEntity.ch, index + 1, len);
0850:
0851: } else {
0852: localpart = rawname;
0853: }
0854: qname.setValues(prefix, localpart, rawname, null);
0855: if (DEBUG_BUFFER) {
0856: System.out.print(")scanQName, " + qname + ": ");
0857: print();
0858: System.out.println(" -> true");
0859: }
0860: return true;
0861: }
0862: }
0863:
0864: // no qualified name found
0865: if (DEBUG_BUFFER) {
0866: System.out.print(")scanQName, " + qname + ": ");
0867: print();
0868: System.out.println(" -> false");
0869: }
0870: return false;
0871:
0872: } // scanQName(QName):boolean
0873:
0874: /**
0875: * CHANGED:
0876: * Scans a range of parsed character data, This function appends the character data to
0877: * the supplied buffer.
0878: * <p>
0879: * <strong>Note:</strong> The characters are consumed.
0880: * <p>
0881: * <strong>Note:</strong> This method does not guarantee to return
0882: * the longest run of parsed character data. This method may return
0883: * before markup due to reaching the end of the input buffer or any
0884: * other reason.
0885: * <p>
0886: *
0887: * @param content The content structure to fill.
0888: *
0889: * @return Returns the next character on the input, if known. This
0890: * value may be -1 but this does <em>note</em> designate
0891: * end of file.
0892: *
0893: * @throws IOException Thrown if i/o error occurs.
0894: * @throws EOFException Thrown on end of file.
0895: */
0896: public int scanContent(XMLString content) throws IOException {
0897: if (DEBUG_BUFFER) {
0898: System.out.print("(scanContent: ");
0899: print();
0900: System.out.println();
0901: }
0902:
0903: // load more characters, if needed
0904: if (fCurrentEntity.position == fCurrentEntity.count) {
0905: invokeListeners(0);
0906: load(0, true);
0907: } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0908: invokeListeners(0);
0909: fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
0910: load(1, false);
0911: fCurrentEntity.position = 0;
0912: }
0913:
0914: // normalize newlines
0915: int offset = fCurrentEntity.position;
0916: int c = fCurrentEntity.ch[offset];
0917: int newlines = 0;
0918: if (c == '\n' || (c == '\r' && isExternal)) {
0919: if (DEBUG_BUFFER) {
0920: System.out.print("[newline, " + offset + ", "
0921: + fCurrentEntity.position + ": ");
0922: print();
0923: System.out.println();
0924: }
0925: do {
0926: c = fCurrentEntity.ch[fCurrentEntity.position++];
0927: if (c == '\r' && isExternal) {
0928: newlines++;
0929: fCurrentEntity.lineNumber++;
0930: fCurrentEntity.columnNumber = 1;
0931: if (fCurrentEntity.position == fCurrentEntity.count) {
0932: offset = 0;
0933: invokeListeners(newlines);
0934: fCurrentEntity.position = newlines;
0935: if (load(newlines, false)) {
0936: break;
0937: }
0938: }
0939: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0940: fCurrentEntity.position++;
0941: offset++;
0942: }
0943: /*** NEWLINE NORMALIZATION ***/
0944: else {
0945: newlines++;
0946: }
0947: } else if (c == '\n') {
0948: newlines++;
0949: fCurrentEntity.lineNumber++;
0950: fCurrentEntity.columnNumber = 1;
0951: if (fCurrentEntity.position == fCurrentEntity.count) {
0952: offset = 0;
0953: invokeListeners(newlines);
0954: fCurrentEntity.position = newlines;
0955: if (load(newlines, false)) {
0956: break;
0957: }
0958: }
0959: } else {
0960: fCurrentEntity.position--;
0961: break;
0962: }
0963: } while (fCurrentEntity.position < fCurrentEntity.count - 1);
0964: for (int i = offset; i < fCurrentEntity.position; i++) {
0965: fCurrentEntity.ch[i] = '\n';
0966: }
0967: int length = fCurrentEntity.position - offset;
0968: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0969: //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
0970: //on buffering the data..
0971: content.setValues(fCurrentEntity.ch, offset, length);
0972: //content.append(fCurrentEntity.ch, offset, length);
0973: if (DEBUG_BUFFER) {
0974: System.out.print("]newline, " + offset + ", "
0975: + fCurrentEntity.position + ": ");
0976: print();
0977: System.out.println();
0978: }
0979: return -1;
0980: }
0981: if (DEBUG_BUFFER) {
0982: System.out.print("]newline, " + offset + ", "
0983: + fCurrentEntity.position + ": ");
0984: print();
0985: System.out.println();
0986: }
0987: }
0988:
0989: // inner loop, scanning for content
0990: boolean vc = false;
0991: while (fCurrentEntity.position < fCurrentEntity.count) {
0992: c = fCurrentEntity.ch[fCurrentEntity.position++];
0993: if (c < 127)
0994: vc = validContent[c];
0995: else
0996: vc = XMLChar.isContent(c);
0997: if (!vc) {
0998: fCurrentEntity.position--;
0999: break;
1000: }
1001: }
1002: int length = fCurrentEntity.position - offset;
1003: fCurrentEntity.columnNumber += length - newlines;
1004:
1005: content.setValues(fCurrentEntity.ch, offset, length);
1006:
1007: // return next character
1008: if (fCurrentEntity.position != fCurrentEntity.count) {
1009: c = fCurrentEntity.ch[fCurrentEntity.position];
1010: // REVISIT: Does this need to be updated to fix the
1011: // #x0D ^#x0A newline normalization problem? -Ac
1012: if (c == '\r' && isExternal) {
1013: c = '\n';
1014: }
1015: } else {
1016: c = -1;
1017: }
1018: if (DEBUG_BUFFER) {
1019: System.out.print(")scanContent: ");
1020: print();
1021: System.out.println(" -> '" + (char) c + "'");
1022: }
1023: return c;
1024:
1025: } // scanContent(XMLString):int
1026:
1027: /**
1028: * Scans a range of attribute value data, setting the fields of the
1029: * XMLString structure, appropriately.
1030: * <p>
1031: * <strong>Note:</strong> The characters are consumed.
1032: * <p>
1033: * <strong>Note:</strong> This method does not guarantee to return
1034: * the longest run of attribute value data. This method may return
1035: * before the quote character due to reaching the end of the input
1036: * buffer or any other reason.
1037: * <p>
1038: * <strong>Note:</strong> The fields contained in the XMLString
1039: * structure are not guaranteed to remain valid upon subsequent calls
1040: * to the entity scanner. Therefore, the caller is responsible for
1041: * immediately using the returned character data or making a copy of
1042: * the character data.
1043: *
1044: * @param quote The quote character that signifies the end of the
1045: * attribute value data.
1046: * @param content The content structure to fill.
1047: *
1048: * @return Returns the next character on the input, if known. This
1049: * value may be -1 but this does <em>note</em> designate
1050: * end of file.
1051: *
1052: * @throws IOException Thrown if i/o error occurs.
1053: * @throws EOFException Thrown on end of file.
1054: */
1055: public int scanLiteral(int quote, XMLString content)
1056: throws IOException {
1057: if (DEBUG_BUFFER) {
1058: System.out.print("(scanLiteral, '" + (char) quote + "': ");
1059: print();
1060: System.out.println();
1061: }
1062: // load more characters, if needed
1063: if (fCurrentEntity.position == fCurrentEntity.count) {
1064: invokeListeners(0);
1065: load(0, true);
1066: } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1067: invokeListeners(0);
1068: fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
1069:
1070: load(1, false);
1071: fCurrentEntity.position = 0;
1072: }
1073:
1074: // normalize newlines
1075: int offset = fCurrentEntity.position;
1076: int c = fCurrentEntity.ch[offset];
1077: int newlines = 0;
1078: if (whiteSpaceInfoNeeded)
1079: whiteSpaceLen = 0;
1080: if (c == '\n' || (c == '\r' && isExternal)) {
1081: if (DEBUG_BUFFER) {
1082: System.out.print("[newline, " + offset + ", "
1083: + fCurrentEntity.position + ": ");
1084: print();
1085: System.out.println();
1086: }
1087: do {
1088: c = fCurrentEntity.ch[fCurrentEntity.position++];
1089: if (c == '\r' && isExternal) {
1090: newlines++;
1091: fCurrentEntity.lineNumber++;
1092: fCurrentEntity.columnNumber = 1;
1093: if (fCurrentEntity.position == fCurrentEntity.count) {
1094: invokeListeners(newlines);
1095: offset = 0;
1096: fCurrentEntity.position = newlines;
1097: if (load(newlines, false)) {
1098: break;
1099: }
1100: }
1101: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1102: fCurrentEntity.position++;
1103: offset++;
1104: }
1105: /*** NEWLINE NORMALIZATION ***/
1106: else {
1107: newlines++;
1108: }
1109: /***/
1110: } else if (c == '\n') {
1111: newlines++;
1112: fCurrentEntity.lineNumber++;
1113: fCurrentEntity.columnNumber = 1;
1114: if (fCurrentEntity.position == fCurrentEntity.count) {
1115: offset = 0;
1116: invokeListeners(newlines);
1117: fCurrentEntity.position = newlines;
1118: if (load(newlines, false)) {
1119: break;
1120: }
1121: }
1122: /*** NEWLINE NORMALIZATION ***
1123: * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
1124: * && external) {
1125: * fCurrentEntity.position++;
1126: * offset++;
1127: * }
1128: * /***/
1129: } else {
1130: fCurrentEntity.position--;
1131: break;
1132: }
1133: } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1134: int i = 0;
1135: for (i = offset; i < fCurrentEntity.position; i++) {
1136: fCurrentEntity.ch[i] = '\n';
1137: whiteSpaceLookup[whiteSpaceLen++] = i;
1138: }
1139:
1140: int length = fCurrentEntity.position - offset;
1141: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1142: content.setValues(fCurrentEntity.ch, offset, length);
1143: if (DEBUG_BUFFER) {
1144: System.out.print("]newline, " + offset + ", "
1145: + fCurrentEntity.position + ": ");
1146: print();
1147: System.out.println();
1148: }
1149: return -1;
1150: }
1151: if (DEBUG_BUFFER) {
1152: System.out.print("]newline, " + offset + ", "
1153: + fCurrentEntity.position + ": ");
1154: print();
1155: System.out.println();
1156: }
1157: }
1158:
1159: // scan literal value
1160: boolean vc = true;
1161: while (fCurrentEntity.position < fCurrentEntity.count) {
1162: c = fCurrentEntity.ch[fCurrentEntity.position++];
1163: if ((c == quote && (!fCurrentEntity.literal || isExternal))
1164: || c == '%') {
1165: fCurrentEntity.position--;
1166: break;
1167: } else {
1168: if (c < 127)
1169: vc = validContent[c];
1170: else
1171: vc = XMLChar.isContent(c);
1172: if (!vc) {
1173: fCurrentEntity.position--;
1174: break;
1175: }
1176: }
1177: if (whiteSpaceInfoNeeded) {
1178: if (c == 0x20 || c == 0x9) {
1179: if (whiteSpaceLen < whiteSpaceLookup.length) {
1180: whiteSpaceLookup[whiteSpaceLen++] = fCurrentEntity.position - 1;
1181: } else {
1182: int[] tmp = new int[whiteSpaceLookup.length + 20];
1183: System.arraycopy(whiteSpaceLookup, 0, tmp, 0,
1184: whiteSpaceLookup.length);
1185: whiteSpaceLookup = tmp;
1186: whiteSpaceLookup[whiteSpaceLen++] = fCurrentEntity.position - 1;
1187: }
1188: }
1189: }
1190: }
1191: int length = fCurrentEntity.position - offset;
1192: fCurrentEntity.columnNumber += length - newlines;
1193: content.setValues(fCurrentEntity.ch, offset, length);
1194:
1195: // return next character
1196: if (fCurrentEntity.position != fCurrentEntity.count) {
1197: c = fCurrentEntity.ch[fCurrentEntity.position];
1198: // NOTE: We don't want to accidentally signal the
1199: // end of the literal if we're expanding an
1200: // entity appearing in the literal. -Ac
1201: if (c == quote && fCurrentEntity.literal) {
1202: c = -1;
1203: }
1204: } else {
1205: c = -1;
1206: }
1207: if (DEBUG_BUFFER) {
1208: System.out.print(")scanLiteral, '" + (char) quote + "': ");
1209: print();
1210: System.out.println(" -> '" + (char) c + "'");
1211: }
1212: return c;
1213:
1214: } // scanLiteral(int,XMLString):int
1215:
1216: //CHANGED:
1217: /**
1218: * Scans a range of character data up to the specified delimiter,
1219: * setting the fields of the XMLString structure, appropriately.
1220: * <p>
1221: * <strong>Note:</strong> The characters are consumed.
1222: * <p>
1223: * <strong>Note:</strong> This assumes that the length of the delimiter
1224: * and that the delimiter contains at least one character.
1225: * <p>
1226: * <strong>Note:</strong> This method does not guarantee to return
1227: * the longest run of character data. This method may return before
1228: * the delimiter due to reaching the end of the input buffer or any
1229: * other reason.
1230: * <p>
1231: * @param delimiter The string that signifies the end of the character
1232: * data to be scanned.
1233: * @param data The data structure to fill. Data will be appendd to the current buffer.
1234: *
1235: * @return Returns true if there is more data to scan, false otherwise.
1236: *
1237: * @throws IOException Thrown if i/o error occurs.
1238: * @throws EOFException Thrown on end of file.
1239: */
1240: public boolean scanData(String delimiter, XMLStringBuffer buffer)
1241: throws IOException {
1242:
1243: boolean done = false;
1244: int delimLen = delimiter.length();
1245: char charAt0 = delimiter.charAt(0);
1246: do {
1247: if (DEBUG_BUFFER) {
1248: System.out.print("(scanData: ");
1249: print();
1250: System.out.println();
1251: }
1252:
1253: // load more characters, if needed
1254:
1255: if (fCurrentEntity.position == fCurrentEntity.count) {
1256: invokeListeners(0);
1257: load(0, true);
1258: } else if (fCurrentEntity.position >= fCurrentEntity.count
1259: - delimLen) {
1260: invokeListeners(fCurrentEntity.count
1261: - fCurrentEntity.position);
1262: System.arraycopy(fCurrentEntity.ch,
1263: fCurrentEntity.position, fCurrentEntity.ch, 0,
1264: fCurrentEntity.count - fCurrentEntity.position);
1265: load(fCurrentEntity.count - fCurrentEntity.position,
1266: false);
1267: fCurrentEntity.position = 0;
1268: }
1269: if (fCurrentEntity.position >= fCurrentEntity.count
1270: - delimLen) {
1271: // something must be wrong with the input: e.g., file ends an unterminated comment
1272: invokeListeners(0);
1273: int length = fCurrentEntity.count
1274: - fCurrentEntity.position;
1275: buffer.append(fCurrentEntity.ch,
1276: fCurrentEntity.position, length);
1277: fCurrentEntity.columnNumber += fCurrentEntity.count;
1278: fCurrentEntity.position = fCurrentEntity.count;
1279: load(0, true);
1280: return false;
1281: }
1282:
1283: // normalize newlines
1284: int offset = fCurrentEntity.position;
1285: int c = fCurrentEntity.ch[offset];
1286: int newlines = 0;
1287: if (c == '\n' || (c == '\r' && isExternal)) {
1288: if (DEBUG_BUFFER) {
1289: System.out.print("[newline, " + offset + ", "
1290: + fCurrentEntity.position + ": ");
1291: print();
1292: System.out.println();
1293: }
1294: do {
1295: c = fCurrentEntity.ch[fCurrentEntity.position++];
1296: if (c == '\r' && isExternal) {
1297: newlines++;
1298: fCurrentEntity.lineNumber++;
1299: fCurrentEntity.columnNumber = 1;
1300: if (fCurrentEntity.position == fCurrentEntity.count) {
1301: offset = 0;
1302: invokeListeners(newlines);
1303: fCurrentEntity.position = newlines;
1304: if (load(newlines, false)) {
1305: break;
1306: }
1307: }
1308: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1309: fCurrentEntity.position++;
1310: offset++;
1311: }
1312: /*** NEWLINE NORMALIZATION ***/
1313: else {
1314: newlines++;
1315: }
1316: } else if (c == '\n') {
1317: newlines++;
1318: fCurrentEntity.lineNumber++;
1319: fCurrentEntity.columnNumber = 1;
1320: if (fCurrentEntity.position == fCurrentEntity.count) {
1321: offset = 0;
1322: invokeListeners(newlines);
1323: fCurrentEntity.position = newlines;
1324: fCurrentEntity.count = newlines;
1325: if (load(newlines, false)) {
1326: break;
1327: }
1328: }
1329: } else {
1330: fCurrentEntity.position--;
1331: break;
1332: }
1333: } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1334: for (int i = offset; i < fCurrentEntity.position; i++) {
1335: fCurrentEntity.ch[i] = '\n';
1336: }
1337: int length = fCurrentEntity.position - offset;
1338: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1339: buffer.append(fCurrentEntity.ch, offset, length);
1340: if (DEBUG_BUFFER) {
1341: System.out.print("]newline, " + offset + ", "
1342: + fCurrentEntity.position + ": ");
1343: print();
1344: System.out.println();
1345: }
1346: return true;
1347: }
1348: if (DEBUG_BUFFER) {
1349: System.out.print("]newline, " + offset + ", "
1350: + fCurrentEntity.position + ": ");
1351: print();
1352: System.out.println();
1353: }
1354: }
1355:
1356: // iterate over buffer looking for delimiter
1357: OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1358: c = fCurrentEntity.ch[fCurrentEntity.position++];
1359: if (c == charAt0) {
1360: // looks like we just hit the delimiter
1361: int delimOffset = fCurrentEntity.position - 1;
1362: for (int i = 1; i < delimLen; i++) {
1363: if (fCurrentEntity.position == fCurrentEntity.count) {
1364: fCurrentEntity.position -= i;
1365: break OUTER;
1366: }
1367: c = fCurrentEntity.ch[fCurrentEntity.position++];
1368: if (delimiter.charAt(i) != c) {
1369: fCurrentEntity.position -= i;
1370: break;
1371: }
1372: }
1373: if (fCurrentEntity.position == delimOffset
1374: + delimLen) {
1375: done = true;
1376: break;
1377: }
1378: } else if (c == '\n' || (isExternal && c == '\r')) {
1379: fCurrentEntity.position--;
1380: break;
1381: } else if (XMLChar.isInvalid(c)) {
1382: fCurrentEntity.position--;
1383: int length = fCurrentEntity.position - offset;
1384: fCurrentEntity.columnNumber += length - newlines;
1385: buffer.append(fCurrentEntity.ch, offset, length);
1386: return true;
1387: }
1388: }
1389: int length = fCurrentEntity.position - offset;
1390: fCurrentEntity.columnNumber += length - newlines;
1391: if (done) {
1392: length -= delimLen;
1393: }
1394: buffer.append(fCurrentEntity.ch, offset, length);
1395:
1396: // return true if string was skipped
1397: if (DEBUG_BUFFER) {
1398: System.out.print(")scanData: ");
1399: print();
1400: System.out.println(" -> " + done);
1401: }
1402: } while (!done);
1403: return !done;
1404:
1405: } // scanData(String,XMLString)
1406:
1407: /**
1408: * Skips a character appearing immediately on the input.
1409: * <p>
1410: * <strong>Note:</strong> The character is consumed only if it matches
1411: * the specified character.
1412: *
1413: * @param c The character to skip.
1414: *
1415: * @return Returns true if the character was skipped.
1416: *
1417: * @throws IOException Thrown if i/o error occurs.
1418: * @throws EOFException Thrown on end of file.
1419: */
1420: public boolean skipChar(int c) throws IOException {
1421: if (DEBUG_BUFFER) {
1422: System.out.print("(skipChar, '" + (char) c + "': ");
1423: print();
1424: System.out.println();
1425: }
1426:
1427: // load more characters, if needed
1428: if (fCurrentEntity.position == fCurrentEntity.count) {
1429: invokeListeners(0);
1430: load(0, true);
1431: }
1432:
1433: // skip character
1434: int cc = fCurrentEntity.ch[fCurrentEntity.position];
1435: if (cc == c) {
1436: fCurrentEntity.position++;
1437: if (c == '\n') {
1438: fCurrentEntity.lineNumber++;
1439: fCurrentEntity.columnNumber = 1;
1440: } else {
1441: fCurrentEntity.columnNumber++;
1442: }
1443: if (DEBUG_BUFFER) {
1444: System.out.print(")skipChar, '" + (char) c + "': ");
1445: print();
1446: System.out.println(" -> true");
1447: }
1448: return true;
1449: } else if (c == '\n' && cc == '\r' && isExternal) {
1450: // handle newlines
1451: if (fCurrentEntity.position == fCurrentEntity.count) {
1452: invokeListeners(1);
1453: fCurrentEntity.ch[0] = (char) cc;
1454: load(1, false);
1455: }
1456: fCurrentEntity.position++;
1457: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1458: fCurrentEntity.position++;
1459: }
1460: fCurrentEntity.lineNumber++;
1461: fCurrentEntity.columnNumber = 1;
1462: if (DEBUG_BUFFER) {
1463: System.out.print(")skipChar, '" + (char) c + "': ");
1464: print();
1465: System.out.println(" -> true");
1466: }
1467: return true;
1468: }
1469:
1470: // character was not skipped
1471: if (DEBUG_BUFFER) {
1472: System.out.print(")skipChar, '" + (char) c + "': ");
1473: print();
1474: System.out.println(" -> false");
1475: }
1476: return false;
1477:
1478: } // skipChar(int):boolean
1479:
1480: public boolean isSpace(char ch) {
1481: return (ch == ' ') || (ch == '\n') || (ch == '\t')
1482: || (ch == '\r');
1483: }
1484:
1485: /**
1486: * Skips space characters appearing immediately on the input.
1487: * <p>
1488: * <strong>Note:</strong> The characters are consumed only if they are
1489: * space characters.
1490: *
1491: * @return Returns true if at least one space character was skipped.
1492: *
1493: * @throws IOException Thrown if i/o error occurs.
1494: * @throws EOFException Thrown on end of file.
1495: *
1496: * @see com.sun.xml.stream.xerces.util.XMLChar#isSpace
1497: */
1498: public boolean skipSpaces() throws IOException {
1499: if (DEBUG_BUFFER) {
1500: System.out.print("(skipSpaces: ");
1501: print();
1502: System.out.println();
1503: }
1504: //boolean entityChanged = false;
1505: // load more characters, if needed
1506: if (fCurrentEntity.position == fCurrentEntity.count) {
1507: invokeListeners(0);
1508: load(0, true);
1509: }
1510:
1511: //we are doing this check only in skipSpace() because it is called by
1512: //fMiscDispatcher and we want the parser to exit gracefully when document
1513: //is well-formed.
1514: //it is possible that end of document is reached and
1515: //fCurrentEntity becomes null
1516: //nothing was read so entity changed 'false' should be returned.
1517: if (fCurrentEntity == null) {
1518: return false;
1519: }
1520:
1521: // skip spaces
1522: int c = fCurrentEntity.ch[fCurrentEntity.position];
1523: if (XMLChar.isSpace(c)) {
1524: do {
1525: boolean entityChanged = false;
1526: // handle newlines
1527: if (c == '\n' || (isExternal && c == '\r')) {
1528: fCurrentEntity.lineNumber++;
1529: fCurrentEntity.columnNumber = 1;
1530: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1531: invokeListeners(0);
1532: fCurrentEntity.ch[0] = (char) c;
1533: entityChanged = load(1, true);
1534: if (!entityChanged) {
1535: // the load change the position to be 1,
1536: // need to restore it when entity not changed
1537: fCurrentEntity.position = 0;
1538: } else if (fCurrentEntity == null) {
1539: return true;
1540: }
1541: }
1542: if (c == '\r' && isExternal) {
1543: // REVISIT: Does this need to be updated to fix the
1544: // #x0D ^#x0A newline normalization problem? -Ac
1545: if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
1546: fCurrentEntity.position--;
1547: }
1548: }
1549: } else {
1550: fCurrentEntity.columnNumber++;
1551: }
1552: // load more characters, if needed
1553: if (!entityChanged) {
1554: fCurrentEntity.position++;
1555: }
1556:
1557: if (fCurrentEntity.position == fCurrentEntity.count) {
1558: invokeListeners(0);
1559: load(0, true);
1560:
1561: //we are doing this check only in skipSpace() because it is called by
1562: //fMiscDispatcher and we want the parser to exit gracefully when document
1563: //is well-formed.
1564:
1565: //it is possible that end of document is reached and
1566: //fCurrentEntity becomes null
1567: //nothing was read so entity changed 'false' should be returned.
1568: if (fCurrentEntity == null) {
1569: return true;
1570: }
1571:
1572: }
1573: } while (XMLChar
1574: .isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1575: if (DEBUG_BUFFER) {
1576: System.out.print(")skipSpaces: ");
1577: print();
1578: System.out.println(" -> true");
1579: }
1580: return true;
1581: }
1582:
1583: // no spaces were found
1584: if (DEBUG_BUFFER) {
1585: System.out.print(")skipSpaces: ");
1586: print();
1587: System.out.println(" -> false");
1588: }
1589: return false;
1590:
1591: } // skipSpaces():boolean
1592:
1593: /**
1594: * @param legnth This function checks that following number of characters are available.
1595: * to the underlying buffer.
1596: * @return This function returns true if capacity asked is available.
1597: */
1598: public boolean arrangeCapacity(int length) throws IOException {
1599: return arrangeCapacity(length, false);
1600: }
1601:
1602: /**
1603: * @param legnth This function checks that following number of characters are available.
1604: * to the underlying buffer.
1605: * @param if the underlying function should change the entity
1606: * @return This function returns true if capacity asked is available.
1607: *
1608: */
1609: public boolean arrangeCapacity(int length, boolean changeEntity)
1610: throws IOException {
1611: //check if the capacity is availble in the current buffer
1612: //count is no. of characters in the buffer [x][m][l]
1613: //position is '0' based
1614:
1615: if ((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1616: return true;
1617: }
1618: if (DEBUG_SKIP_STRING) {
1619: System.out.println("fCurrentEntity.count = "
1620: + fCurrentEntity.count);
1621: System.out.println("fCurrentEntity.position = "
1622: + fCurrentEntity.position);
1623: System.out.println("length = " + length);
1624: }
1625:
1626: boolean entityChanged = false;
1627: //load more characters -- this function shouldn't change the entity
1628: while ((fCurrentEntity.count - fCurrentEntity.position) < length) {
1629: if ((fCurrentEntity.ch.length - fCurrentEntity.position) < length) {
1630: invokeListeners(0);
1631: System.arraycopy(fCurrentEntity.ch,
1632: fCurrentEntity.position, fCurrentEntity.ch, 0,
1633: fCurrentEntity.count - fCurrentEntity.position);
1634: fCurrentEntity.count -= fCurrentEntity.position;
1635: fCurrentEntity.position = 0;
1636: }
1637:
1638: if ((fCurrentEntity.count - fCurrentEntity.position) < length) {
1639: int pos = fCurrentEntity.position;
1640: invokeListeners(pos);
1641: entityChanged = load(fCurrentEntity.count, changeEntity);
1642: fCurrentEntity.position = pos;
1643: if (entityChanged)
1644: break;
1645: }
1646: if (DEBUG_SKIP_STRING) {
1647: System.out.println("fCurrentEntity.count = "
1648: + fCurrentEntity.count);
1649: System.out.println("fCurrentEntity.position = "
1650: + fCurrentEntity.position);
1651: System.out.println("length = " + length);
1652: }
1653:
1654: }
1655: //load changes the position.. set it back to the point where we started.
1656:
1657: //after loading check again.
1658: if ((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1659: return true;
1660: } else {
1661: return false;
1662: }
1663: }
1664:
1665: /**
1666: * Skips the specified string appearing immediately on the input.
1667: * <p>
1668: * <strong>Note:</strong> The characters are consumed only if all
1669: * the characters are skipped.
1670: *
1671: * @param s The string to skip.
1672: *
1673: * @return Returns true if the string was skipped.
1674: *
1675: * @throws IOException Thrown if i/o error occurs.
1676: * @throws EOFException Thrown on end of file.
1677: */
1678: public boolean skipString(String s) throws IOException {
1679:
1680: final int length = s.length();
1681:
1682: //first make sure that required capacity is avaible
1683: if (arrangeCapacity(length, false)) {
1684: final int beforeSkip = fCurrentEntity.position;
1685: int afterSkip = fCurrentEntity.position + length - 1;
1686: if (DEBUG_SKIP_STRING) {
1687: System.out.println("skipString,length = " + s + ","
1688: + length);
1689: System.out.println("Buffer string to be skipped = "
1690: + new String(fCurrentEntity.ch, beforeSkip,
1691: length));
1692: }
1693:
1694: //s.charAt() indexes are 0 to 'Length -1' based.
1695: int i = length - 1;
1696: //check from reverse
1697: while (s.charAt(i--) == fCurrentEntity.ch[afterSkip]) {
1698: if (afterSkip-- == beforeSkip) {
1699: fCurrentEntity.position = fCurrentEntity.position
1700: + length;
1701: fCurrentEntity.columnNumber += length;
1702: return true;
1703: }
1704: }
1705: }
1706:
1707: return false;
1708: } // skipString(String):boolean
1709:
1710: public boolean skipString(char[] s) throws IOException {
1711:
1712: final int length = s.length;
1713:
1714: //first make sure that required capacity is avaible
1715: if (arrangeCapacity(length, false)) {
1716: int beforeSkip = fCurrentEntity.position;
1717: int afterSkip = fCurrentEntity.position + length;
1718: if (DEBUG_SKIP_STRING) {
1719: System.out.println("skipString,length = "
1720: + new String(s) + "," + length);
1721: System.out.println("skipString,length = "
1722: + new String(s) + "," + length);
1723: }
1724:
1725: for (int i = 0; i < length; i++) {
1726: if (!(fCurrentEntity.ch[beforeSkip++] == s[i])) {
1727: return false;
1728: }
1729: }
1730: fCurrentEntity.position = fCurrentEntity.position + length;
1731: fCurrentEntity.columnNumber += length;
1732: return true;
1733:
1734: }
1735:
1736: return false;
1737: }
1738:
1739: //
1740: // Locator methods
1741: //
1742: //
1743: // Private methods
1744: //
1745:
1746: /**
1747: * Loads a chunk of text.
1748: *
1749: * @param offset The offset into the character buffer to
1750: * read the next batch of characters.
1751: * @param changeEntity True if the load should change entities
1752: * at the end of the entity, otherwise leave
1753: * the current entity in place and the entity
1754: * boundary will be signaled by the return
1755: * value.
1756: *
1757: * @returns Returns true if the entity changed as a result of this
1758: * load operation.
1759: */
1760: final boolean load(int offset, boolean changeEntity)
1761: throws IOException {
1762: if (DEBUG_BUFFER) {
1763: System.out.print("(load, " + offset + ": ");
1764: print();
1765: System.out.println();
1766: }
1767:
1768: //maintaing the count till last load
1769: fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad
1770: + fCurrentEntity.fLastCount;
1771:
1772: // read characters
1773: int length = fCurrentEntity.mayReadChunks ? (fCurrentEntity.ch.length - offset)
1774: : (fCurrentEntity.DEFAULT_XMLDECL_BUFFER_SIZE);
1775: if (DEBUG_BUFFER)
1776: System.out.println(" length to try to read: " + length);
1777: int count = fCurrentEntity.reader.read(fCurrentEntity.ch,
1778: offset, length);
1779: if (DEBUG_BUFFER)
1780: System.out.println(" length actually read: " + count);
1781:
1782: // reset count and position
1783: boolean entityChanged = false;
1784: if (count != -1) {
1785: if (count != 0) {
1786: //record the last count
1787: fCurrentEntity.fLastCount = count;
1788: fCurrentEntity.count = count + offset;
1789: fCurrentEntity.position = offset;
1790: }
1791: }
1792: // end of this entity
1793: else {
1794: fCurrentEntity.count = offset;
1795: fCurrentEntity.position = offset;
1796: entityChanged = true;
1797:
1798: if (changeEntity) {
1799: //notify the entity manager about the end of entity
1800: fEntityManager.endEntity();
1801: //return if the current entity becomes null
1802: if (fCurrentEntity == null) {
1803: return true;
1804: }
1805: // handle the trailing edges
1806: if (fCurrentEntity.position == fCurrentEntity.count) {
1807: load(0, true);
1808: }
1809: }
1810:
1811: }
1812: if (DEBUG_BUFFER) {
1813: System.out.print(")load, " + offset + ": ");
1814: print();
1815: System.out.println();
1816: }
1817:
1818: return entityChanged;
1819:
1820: } // load(int, boolean):boolean
1821:
1822: /**
1823: * Creates a reader capable of reading the given input stream in
1824: * the specified encoding.
1825: *
1826: * @param inputStream The input stream.
1827: * @param encoding The encoding name that the input stream is
1828: * encoded using. If the user has specified that
1829: * Java encoding names are allowed, then the
1830: * encoding name may be a Java encoding name;
1831: * otherwise, it is an ianaEncoding name.
1832: * @param isBigEndian For encodings (like uCS-4), whose names cannot
1833: * specify a byte order, this tells whether the order is bigEndian. null menas
1834: * unknown or not relevant.
1835: *
1836: * @return Returns a reader.
1837: */
1838: protected Reader createReader(InputStream inputStream,
1839: String encoding, Boolean isBigEndian) throws IOException {
1840:
1841: // normalize encoding name
1842: if (encoding == null) {
1843: encoding = "UTF-8";
1844: }
1845:
1846: // try to use an optimized reader
1847: String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
1848: if (ENCODING.equals("UTF-8")) {
1849: if (DEBUG_ENCODINGS) {
1850: System.out.println("$$$ creating UTF8Reader");
1851: }
1852: return new UTF8Reader(
1853: inputStream,
1854: fCurrentEntity.fBufferSize,
1855: fErrorReporter
1856: .getMessageFormatter(XMLMessageFormatter.XML_DOMAIN),
1857: fErrorReporter.getLocale());
1858: }
1859: if (ENCODING.equals("US-ASCII")) {
1860: if (DEBUG_ENCODINGS) {
1861: System.out.println("$$$ creating ASCIIReader");
1862: }
1863: return new ASCIIReader(
1864: inputStream,
1865: fCurrentEntity.fBufferSize,
1866: fErrorReporter
1867: .getMessageFormatter(XMLMessageFormatter.XML_DOMAIN),
1868: fErrorReporter.getLocale());
1869: }
1870: if (ENCODING.equals("ISO-10646-UCS-4")) {
1871: if (isBigEndian != null) {
1872: boolean isBE = isBigEndian.booleanValue();
1873: if (isBE) {
1874: return new UCSReader(inputStream, UCSReader.UCS4BE);
1875: } else {
1876: return new UCSReader(inputStream, UCSReader.UCS4LE);
1877: }
1878: } else {
1879: fErrorReporter.reportError(
1880: XMLMessageFormatter.XML_DOMAIN,
1881: "EncodingByteOrderUnsupported",
1882: new Object[] { encoding },
1883: XMLErrorReporter.SEVERITY_FATAL_ERROR);
1884: }
1885: }
1886: if (ENCODING.equals("ISO-10646-UCS-2")) {
1887: if (isBigEndian != null) { // sould never happen with this encoding...
1888: boolean isBE = isBigEndian.booleanValue();
1889: if (isBE) {
1890: return new UCSReader(inputStream, UCSReader.UCS2BE);
1891: } else {
1892: return new UCSReader(inputStream, UCSReader.UCS2LE);
1893: }
1894: } else {
1895: fErrorReporter.reportError(
1896: XMLMessageFormatter.XML_DOMAIN,
1897: "EncodingByteOrderUnsupported",
1898: new Object[] { encoding },
1899: XMLErrorReporter.SEVERITY_FATAL_ERROR);
1900: }
1901: }
1902:
1903: // check for valid name
1904: boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
1905: boolean validJava = XMLChar.isValidJavaEncoding(encoding);
1906: if (!validIANA || (fAllowJavaEncodings && !validJava)) {
1907: fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1908: "EncodingDeclInvalid", new Object[] { encoding },
1909: XMLErrorReporter.SEVERITY_FATAL_ERROR);
1910: // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
1911: // because every byte is a valid ISO Latin 1 character.
1912: // It may not translate correctly but if we failed on
1913: // the encoding anyway, then we're expecting the content
1914: // of the document to be bad. This will just prevent an
1915: // invalid UTF-8 sequence to be detected. This is only
1916: // important when continue-after-fatal-error is turned
1917: // on. -Ac
1918: encoding = "ISO-8859-1";
1919: }
1920:
1921: // try to use a Java reader
1922: String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
1923: if (javaEncoding == null) {
1924: if (fAllowJavaEncodings) {
1925: javaEncoding = encoding;
1926: } else {
1927: fErrorReporter.reportError(
1928: XMLMessageFormatter.XML_DOMAIN,
1929: "EncodingDeclInvalid",
1930: new Object[] { encoding },
1931: XMLErrorReporter.SEVERITY_FATAL_ERROR);
1932: // see comment above.
1933: javaEncoding = "ISO8859_1";
1934: }
1935: }
1936: if (DEBUG_ENCODINGS) {
1937: System.out
1938: .print("$$$ creating Java InputStreamReader: encoding="
1939: + javaEncoding);
1940: if (javaEncoding == encoding) {
1941: System.out.print(" (IANA encoding)");
1942: }
1943: System.out.println();
1944: }
1945: return new InputStreamReader(inputStream, javaEncoding);
1946:
1947: } // createReader(InputStream,String, Boolean): Reader
1948:
1949: /**
1950: * Returns the IANA encoding name that is auto-detected from
1951: * the bytes specified, with the endian-ness of that encoding where appropriate.
1952: *
1953: * @param b4 The first four bytes of the input.
1954: * @param count The number of bytes actually read.
1955: * @return a 2-element array: the first element, an IANA-encoding string,
1956: * the second element a Boolean which is true iff the document is big endian, false
1957: * if it's little-endian, and null if the distinction isn't relevant.
1958: */
1959: protected Object[] getEncodingName(byte[] b4, int count) {
1960:
1961: if (count < 2) {
1962: return new Object[] { "UTF-8", null };
1963: }
1964:
1965: // UTF-16, with BOM
1966: int b0 = b4[0] & 0xFF;
1967: int b1 = b4[1] & 0xFF;
1968: if (b0 == 0xFE && b1 == 0xFF) {
1969: // UTF-16, big-endian
1970: return new Object[] { "UTF-16BE", new Boolean(true) };
1971: }
1972: if (b0 == 0xFF && b1 == 0xFE) {
1973: // UTF-16, little-endian
1974: return new Object[] { "UTF-16LE", new Boolean(false) };
1975: }
1976:
1977: // default to UTF-8 if we don't have enough bytes to make a
1978: // good determination of the encoding
1979: if (count < 3) {
1980: return new Object[] { "UTF-8", null };
1981: }
1982:
1983: // UTF-8 with a BOM
1984: int b2 = b4[2] & 0xFF;
1985: if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
1986: return new Object[] { "UTF-8", null };
1987: }
1988:
1989: // default to UTF-8 if we don't have enough bytes to make a
1990: // good determination of the encoding
1991: if (count < 4) {
1992: return new Object[] { "UTF-8", null };
1993: }
1994:
1995: // other encodings
1996: int b3 = b4[3] & 0xFF;
1997: if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
1998: // UCS-4, big endian (1234)
1999: return new Object[] { "ISO-10646-UCS-4", new Boolean(true) };
2000: }
2001: if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
2002: // UCS-4, little endian (4321)
2003: return new Object[] { "ISO-10646-UCS-4", new Boolean(false) };
2004: }
2005: if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
2006: // UCS-4, unusual octet order (2143)
2007: // REVISIT: What should this be?
2008: return new Object[] { "ISO-10646-UCS-4", null };
2009: }
2010: if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
2011: // UCS-4, unusual octect order (3412)
2012: // REVISIT: What should this be?
2013: return new Object[] { "ISO-10646-UCS-4", null };
2014: }
2015: if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
2016: // UTF-16, big-endian, no BOM
2017: // (or could turn out to be UCS-2...
2018: // REVISIT: What should this be?
2019: return new Object[] { "UTF-16BE", new Boolean(true) };
2020: }
2021: if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
2022: // UTF-16, little-endian, no BOM
2023: // (or could turn out to be UCS-2...
2024: return new Object[] { "UTF-16LE", new Boolean(false) };
2025: }
2026: if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
2027: // EBCDIC
2028: // a la xerces1, return CP037 instead of EBCDIC here
2029: return new Object[] { "CP037", null };
2030: }
2031:
2032: // default encoding
2033: return new Object[] { "UTF-8", null };
2034:
2035: } // getEncodingName(byte[],int):Object[]
2036:
2037: /**
2038: * xxx not removing endEntity() so that i remember that we need to implement it.
2039: * Ends an entity.
2040: *
2041: * @throws XNIException Thrown by entity handler to signal an error.
2042: */
2043: //
2044: /** Prints the contents of the buffer. */
2045: final void print() {
2046: if (DEBUG_BUFFER) {
2047: if (fCurrentEntity != null) {
2048: System.out.print('[');
2049: System.out.print(fCurrentEntity.count);
2050: System.out.print(' ');
2051: System.out.print(fCurrentEntity.position);
2052: if (fCurrentEntity.count > 0) {
2053: System.out.print(" \"");
2054: for (int i = 0; i < fCurrentEntity.count; i++) {
2055: if (i == fCurrentEntity.position) {
2056: System.out.print('^');
2057: }
2058: char c = fCurrentEntity.ch[i];
2059: switch (c) {
2060: case '\n': {
2061: System.out.print("\\n");
2062: break;
2063: }
2064: case '\r': {
2065: System.out.print("\\r");
2066: break;
2067: }
2068: case '\t': {
2069: System.out.print("\\t");
2070: break;
2071: }
2072: case '\\': {
2073: System.out.print("\\\\");
2074: break;
2075: }
2076: default: {
2077: System.out.print(c);
2078: }
2079: }
2080: }
2081: if (fCurrentEntity.position == fCurrentEntity.count) {
2082: System.out.print('^');
2083: }
2084: System.out.print('"');
2085: }
2086: System.out.print(']');
2087: System.out.print(" @ ");
2088: System.out.print(fCurrentEntity.lineNumber);
2089: System.out.print(',');
2090: System.out.print(fCurrentEntity.columnNumber);
2091: } else {
2092: System.out.print("*NO CURRENT ENTITY*");
2093: }
2094: }
2095: }
2096:
2097: /**
2098: * Registers the listener object and provides callback.
2099: * @param listener listener to which call back should be provided when scanner buffer
2100: * is being changed.
2101: */
2102: public void registerListener(XMLBufferListener listener) {
2103: if (!listeners.contains(listener))
2104: listeners.add(listener);
2105: }
2106:
2107: /**
2108: *
2109: * @param loadPos Starting position from which new data is being loaded into scanner buffer.
2110: */
2111: private void invokeListeners(int loadPos) {
2112: for (int i = 0; i < listeners.size(); i++) {
2113: XMLBufferListener listener = (XMLBufferListener) listeners
2114: .get(i);
2115: listener.refresh(loadPos);
2116: }
2117: }
2118:
2119: } // class XMLEntityReaderImpl
|