0001: /*
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: */
0017:
0018: package org.apache.xerces.impl;
0019:
0020: import java.io.EOFException;
0021: import java.io.IOException;
0022: import java.util.Locale;
0023:
0024: import org.apache.xerces.impl.io.UCSReader;
0025: import org.apache.xerces.impl.msg.XMLMessageFormatter;
0026: import org.apache.xerces.util.SymbolTable;
0027: import org.apache.xerces.util.XMLChar;
0028: import org.apache.xerces.util.XMLStringBuffer;
0029: import org.apache.xerces.xni.QName;
0030: import org.apache.xerces.xni.XMLLocator;
0031: import org.apache.xerces.xni.XMLString;
0032:
0033: /**
0034: * Implements the entity scanner methods.
0035: *
0036: * @xerces.internal
0037: *
0038: * @author Andy Clark, IBM
0039: * @author Neil Graham, IBM
0040: * @version $Id: XMLEntityScanner.java 568411 2007-08-22 04:34:13Z mrglavas $
0041: */
0042: public class XMLEntityScanner implements XMLLocator {
0043:
0044: // constants
0045: private static final boolean DEBUG_ENCODINGS = false;
0046: private static final boolean DEBUG_BUFFER = false;
0047:
0048: /**
0049: * To signal the end of the document entity, this exception will be thrown.
0050: */
0051: private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() {
0052: private static final long serialVersionUID = 980337771224675268L;
0053:
0054: public Throwable fillInStackTrace() {
0055: return this ;
0056: }
0057: };
0058:
0059: //
0060: // Data
0061: //
0062:
0063: private XMLEntityManager fEntityManager = null;
0064: protected XMLEntityManager.ScannedEntity fCurrentEntity = null;
0065:
0066: protected SymbolTable fSymbolTable = null;
0067:
0068: protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
0069:
0070: /**
0071: * Error reporter. This property identifier is:
0072: * http://apache.org/xml/properties/internal/error-reporter
0073: */
0074: protected XMLErrorReporter fErrorReporter;
0075:
0076: //
0077: // Constructors
0078: //
0079:
0080: /** Default constructor. */
0081: public XMLEntityScanner() {
0082: } // <init>()
0083:
0084: //
0085: // XMLEntityScanner methods
0086: //
0087:
0088: /**
0089: * Returns the base system identifier of the currently scanned
0090: * entity, or null if none is available.
0091: */
0092: public final String getBaseSystemId() {
0093: return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation
0094: .getExpandedSystemId()
0095: : null;
0096: } // getBaseSystemId():String
0097:
0098: /**
0099: * Sets the encoding of the scanner. This method is used by the
0100: * scanners if the XMLDecl or TextDecl line contains an encoding
0101: * pseudo-attribute.
0102: * <p>
0103: * <strong>Note:</strong> The underlying character reader on the
0104: * current entity will be changed to accomodate the new encoding.
0105: * However, the new encoding is ignored if the current reader was
0106: * not constructed from an input stream (e.g. an external entity
0107: * that is resolved directly to the appropriate java.io.Reader
0108: * object).
0109: *
0110: * @param encoding The IANA encoding name of the new encoding.
0111: *
0112: * @throws IOException Thrown if the new encoding is not supported.
0113: *
0114: * @see org.apache.xerces.util.EncodingMap
0115: */
0116: public final void setEncoding(String encoding) throws IOException {
0117:
0118: if (DEBUG_ENCODINGS) {
0119: System.out.println("$$$ setEncoding: " + encoding);
0120: }
0121:
0122: if (fCurrentEntity.stream != null) {
0123: // if the encoding is the same, don't change the reader and
0124: // re-use the original reader used by the OneCharReader
0125: // NOTE: Besides saving an object, this overcomes deficiencies
0126: // in the UTF-16 reader supplied with the standard Java
0127: // distribution (up to and including 1.3). The UTF-16
0128: // decoder buffers 8K blocks even when only asked to read
0129: // a single char! -Ac
0130: if (fCurrentEntity.encoding == null
0131: || !fCurrentEntity.encoding.equals(encoding)) {
0132: // UTF-16 is a bit of a special case. If the encoding is UTF-16,
0133: // and we know the endian-ness, we shouldn't change readers.
0134: // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
0135: // the endian-ness from the encoding we presently have.
0136: if (fCurrentEntity.encoding != null
0137: && fCurrentEntity.encoding.startsWith("UTF-16")) {
0138: String ENCODING = encoding
0139: .toUpperCase(Locale.ENGLISH);
0140: if (ENCODING.equals("UTF-16"))
0141: return;
0142: if (ENCODING.equals("ISO-10646-UCS-4")) {
0143: if (fCurrentEntity.encoding.equals("UTF-16BE")) {
0144: fCurrentEntity.reader = new UCSReader(
0145: fCurrentEntity.stream,
0146: UCSReader.UCS4BE);
0147: } else {
0148: fCurrentEntity.reader = new UCSReader(
0149: fCurrentEntity.stream,
0150: UCSReader.UCS4LE);
0151: }
0152: return;
0153: }
0154: if (ENCODING.equals("ISO-10646-UCS-2")) {
0155: if (fCurrentEntity.encoding.equals("UTF-16BE")) {
0156: fCurrentEntity.reader = new UCSReader(
0157: fCurrentEntity.stream,
0158: UCSReader.UCS2BE);
0159: } else {
0160: fCurrentEntity.reader = new UCSReader(
0161: fCurrentEntity.stream,
0162: UCSReader.UCS2LE);
0163: }
0164: return;
0165: }
0166: }
0167: // wrap a new reader around the input stream, changing
0168: // the encoding
0169: if (DEBUG_ENCODINGS) {
0170: System.out
0171: .println("$$$ creating new reader from stream: "
0172: + fCurrentEntity.stream);
0173: }
0174: //fCurrentEntity.stream.reset();
0175: fCurrentEntity.setReader(fCurrentEntity.stream,
0176: encoding, null);
0177: fCurrentEntity.encoding = encoding;
0178: } else {
0179: if (DEBUG_ENCODINGS)
0180: System.out
0181: .println("$$$ reusing old reader on stream");
0182: }
0183: }
0184:
0185: } // setEncoding(String)
0186:
0187: /**
0188: * Sets the XML version. This method is used by the
0189: * scanners to report the value of the version pseudo-attribute
0190: * in an XML or text declaration.
0191: *
0192: * @param xmlVersion the XML version of the current entity
0193: */
0194: public final void setXMLVersion(String xmlVersion) {
0195: fCurrentEntity.xmlVersion = xmlVersion;
0196: } // setXMLVersion(String)
0197:
0198: /** Returns true if the current entity being scanned is external. */
0199: public final boolean isExternal() {
0200: return fCurrentEntity.isExternal();
0201: } // isExternal():boolean
0202:
0203: /**
0204: * Returns the next character on the input.
0205: * <p>
0206: * <strong>Note:</strong> The character is <em>not</em> consumed.
0207: *
0208: * @throws IOException Thrown if i/o error occurs.
0209: * @throws EOFException Thrown on end of file.
0210: */
0211: public int peekChar() throws IOException {
0212: if (DEBUG_BUFFER) {
0213: System.out.print("(peekChar: ");
0214: XMLEntityManager.print(fCurrentEntity);
0215: System.out.println();
0216: }
0217:
0218: // load more characters, if needed
0219: if (fCurrentEntity.position == fCurrentEntity.count) {
0220: load(0, true);
0221: }
0222:
0223: // peek at character
0224: int c = fCurrentEntity.ch[fCurrentEntity.position];
0225:
0226: // return peeked character
0227: if (DEBUG_BUFFER) {
0228: System.out.print(")peekChar: ");
0229: XMLEntityManager.print(fCurrentEntity);
0230: if (fCurrentEntity.isExternal()) {
0231: System.out.println(" -> '"
0232: + (c != '\r' ? (char) c : '\n') + "'");
0233: } else {
0234: System.out.println(" -> '" + (char) c + "'");
0235: }
0236: }
0237: if (fCurrentEntity.isExternal()) {
0238: return c != '\r' ? c : '\n';
0239: } else {
0240: return c;
0241: }
0242:
0243: } // peekChar():int
0244:
0245: /**
0246: * Returns the next character on the input.
0247: * <p>
0248: * <strong>Note:</strong> The character is consumed.
0249: *
0250: * @throws IOException Thrown if i/o error occurs.
0251: * @throws EOFException Thrown on end of file.
0252: */
0253: public int scanChar() throws IOException {
0254: if (DEBUG_BUFFER) {
0255: System.out.print("(scanChar: ");
0256: XMLEntityManager.print(fCurrentEntity);
0257: System.out.println();
0258: }
0259:
0260: // load more characters, if needed
0261: if (fCurrentEntity.position == fCurrentEntity.count) {
0262: load(0, true);
0263: }
0264:
0265: // scan character
0266: int c = fCurrentEntity.ch[fCurrentEntity.position++];
0267: boolean external = false;
0268: if (c == '\n'
0269: || (c == '\r' && (external = fCurrentEntity
0270: .isExternal()))) {
0271: fCurrentEntity.lineNumber++;
0272: fCurrentEntity.columnNumber = 1;
0273: if (fCurrentEntity.position == fCurrentEntity.count) {
0274: fCurrentEntity.ch[0] = (char) c;
0275: load(1, false);
0276: }
0277: if (c == '\r' && external) {
0278: if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
0279: fCurrentEntity.position--;
0280: }
0281: c = '\n';
0282: }
0283: }
0284:
0285: // return character that was scanned
0286: if (DEBUG_BUFFER) {
0287: System.out.print(")scanChar: ");
0288: XMLEntityManager.print(fCurrentEntity);
0289: System.out.println(" -> '" + (char) c + "'");
0290: }
0291: fCurrentEntity.columnNumber++;
0292: return c;
0293:
0294: } // scanChar():int
0295:
0296: /**
0297: * Returns a string matching the NMTOKEN production appearing immediately
0298: * on the input as a symbol, or null if NMTOKEN Name string is present.
0299: * <p>
0300: * <strong>Note:</strong> The NMTOKEN characters are consumed.
0301: * <p>
0302: * <strong>Note:</strong> The string returned must be a symbol. The
0303: * SymbolTable can be used for this purpose.
0304: *
0305: * @throws IOException Thrown if i/o error occurs.
0306: * @throws EOFException Thrown on end of file.
0307: *
0308: * @see org.apache.xerces.util.SymbolTable
0309: * @see org.apache.xerces.util.XMLChar#isName
0310: */
0311: public String scanNmtoken() throws IOException {
0312: if (DEBUG_BUFFER) {
0313: System.out.print("(scanNmtoken: ");
0314: XMLEntityManager.print(fCurrentEntity);
0315: System.out.println();
0316: }
0317:
0318: // load more characters, if needed
0319: if (fCurrentEntity.position == fCurrentEntity.count) {
0320: load(0, true);
0321: }
0322:
0323: // scan nmtoken
0324: int offset = fCurrentEntity.position;
0325: while (XMLChar
0326: .isName(fCurrentEntity.ch[fCurrentEntity.position])) {
0327: if (++fCurrentEntity.position == fCurrentEntity.count) {
0328: int length = fCurrentEntity.position - offset;
0329: if (length == fCurrentEntity.ch.length) {
0330: // bad luck we have to resize our buffer
0331: char[] tmp = new char[fCurrentEntity.ch.length << 1];
0332: System.arraycopy(fCurrentEntity.ch, offset, tmp, 0,
0333: length);
0334: fCurrentEntity.ch = tmp;
0335: } else {
0336: System.arraycopy(fCurrentEntity.ch, offset,
0337: fCurrentEntity.ch, 0, length);
0338: }
0339: offset = 0;
0340: if (load(length, false)) {
0341: break;
0342: }
0343: }
0344: }
0345: int length = fCurrentEntity.position - offset;
0346: fCurrentEntity.columnNumber += length;
0347:
0348: // return nmtoken
0349: String symbol = null;
0350: if (length > 0) {
0351: symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset,
0352: length);
0353: }
0354: if (DEBUG_BUFFER) {
0355: System.out.print(")scanNmtoken: ");
0356: XMLEntityManager.print(fCurrentEntity);
0357: System.out.println(" -> " + String.valueOf(symbol));
0358: }
0359: return symbol;
0360:
0361: } // scanNmtoken():String
0362:
0363: /**
0364: * Returns a string matching the Name production appearing immediately
0365: * on the input as a symbol, or null if no Name string is present.
0366: * <p>
0367: * <strong>Note:</strong> The Name characters are consumed.
0368: * <p>
0369: * <strong>Note:</strong> The string returned must be a symbol. The
0370: * SymbolTable can be used for this purpose.
0371: *
0372: * @throws IOException Thrown if i/o error occurs.
0373: * @throws EOFException Thrown on end of file.
0374: *
0375: * @see org.apache.xerces.util.SymbolTable
0376: * @see org.apache.xerces.util.XMLChar#isName
0377: * @see org.apache.xerces.util.XMLChar#isNameStart
0378: */
0379: public String scanName() throws IOException {
0380: if (DEBUG_BUFFER) {
0381: System.out.print("(scanName: ");
0382: XMLEntityManager.print(fCurrentEntity);
0383: System.out.println();
0384: }
0385:
0386: // load more characters, if needed
0387: if (fCurrentEntity.position == fCurrentEntity.count) {
0388: load(0, true);
0389: }
0390:
0391: // scan name
0392: int offset = fCurrentEntity.position;
0393: if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
0394: if (++fCurrentEntity.position == fCurrentEntity.count) {
0395: fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
0396: offset = 0;
0397: if (load(1, false)) {
0398: fCurrentEntity.columnNumber++;
0399: String symbol = fSymbolTable.addSymbol(
0400: fCurrentEntity.ch, 0, 1);
0401: if (DEBUG_BUFFER) {
0402: System.out.print(")scanName: ");
0403: XMLEntityManager.print(fCurrentEntity);
0404: System.out.println(" -> "
0405: + String.valueOf(symbol));
0406: }
0407: return symbol;
0408: }
0409: }
0410: while (XMLChar
0411: .isName(fCurrentEntity.ch[fCurrentEntity.position])) {
0412: if (++fCurrentEntity.position == fCurrentEntity.count) {
0413: int length = fCurrentEntity.position - offset;
0414: if (length == fCurrentEntity.ch.length) {
0415: // bad luck we have to resize our buffer
0416: char[] tmp = new char[fCurrentEntity.ch.length << 1];
0417: System.arraycopy(fCurrentEntity.ch, offset,
0418: tmp, 0, length);
0419: fCurrentEntity.ch = tmp;
0420: } else {
0421: System.arraycopy(fCurrentEntity.ch, offset,
0422: fCurrentEntity.ch, 0, length);
0423: }
0424: offset = 0;
0425: if (load(length, false)) {
0426: break;
0427: }
0428: }
0429: }
0430: }
0431: int length = fCurrentEntity.position - offset;
0432: fCurrentEntity.columnNumber += length;
0433:
0434: // return name
0435: String symbol = null;
0436: if (length > 0) {
0437: symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset,
0438: length);
0439: }
0440: if (DEBUG_BUFFER) {
0441: System.out.print(")scanName: ");
0442: XMLEntityManager.print(fCurrentEntity);
0443: System.out.println(" -> " + String.valueOf(symbol));
0444: }
0445: return symbol;
0446:
0447: } // scanName():String
0448:
0449: /**
0450: * Returns a string matching the NCName production appearing immediately
0451: * on the input as a symbol, or null if no NCName string is present.
0452: * <p>
0453: * <strong>Note:</strong> The NCName characters are consumed.
0454: * <p>
0455: * <strong>Note:</strong> The string returned must be a symbol. The
0456: * SymbolTable can be used for this purpose.
0457: *
0458: * @throws IOException Thrown if i/o error occurs.
0459: * @throws EOFException Thrown on end of file.
0460: *
0461: * @see org.apache.xerces.util.SymbolTable
0462: * @see org.apache.xerces.util.XMLChar#isNCName
0463: * @see org.apache.xerces.util.XMLChar#isNCNameStart
0464: */
0465: public String scanNCName() throws IOException {
0466: if (DEBUG_BUFFER) {
0467: System.out.print("(scanNCName: ");
0468: XMLEntityManager.print(fCurrentEntity);
0469: System.out.println();
0470: }
0471:
0472: // load more characters, if needed
0473: if (fCurrentEntity.position == fCurrentEntity.count) {
0474: load(0, true);
0475: }
0476:
0477: // scan name
0478: int offset = fCurrentEntity.position;
0479: if (XMLChar.isNCNameStart(fCurrentEntity.ch[offset])) {
0480: if (++fCurrentEntity.position == fCurrentEntity.count) {
0481: fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
0482: offset = 0;
0483: if (load(1, false)) {
0484: fCurrentEntity.columnNumber++;
0485: String symbol = fSymbolTable.addSymbol(
0486: fCurrentEntity.ch, 0, 1);
0487: if (DEBUG_BUFFER) {
0488: System.out.print(")scanNCName: ");
0489: XMLEntityManager.print(fCurrentEntity);
0490: System.out.println(" -> "
0491: + String.valueOf(symbol));
0492: }
0493: return symbol;
0494: }
0495: }
0496: while (XMLChar
0497: .isNCName(fCurrentEntity.ch[fCurrentEntity.position])) {
0498: if (++fCurrentEntity.position == fCurrentEntity.count) {
0499: int length = fCurrentEntity.position - offset;
0500: if (length == fCurrentEntity.ch.length) {
0501: // bad luck we have to resize our buffer
0502: char[] tmp = new char[fCurrentEntity.ch.length << 1];
0503: System.arraycopy(fCurrentEntity.ch, offset,
0504: tmp, 0, length);
0505: fCurrentEntity.ch = tmp;
0506: } else {
0507: System.arraycopy(fCurrentEntity.ch, offset,
0508: fCurrentEntity.ch, 0, length);
0509: }
0510: offset = 0;
0511: if (load(length, false)) {
0512: break;
0513: }
0514: }
0515: }
0516: }
0517: int length = fCurrentEntity.position - offset;
0518: fCurrentEntity.columnNumber += length;
0519:
0520: // return name
0521: String symbol = null;
0522: if (length > 0) {
0523: symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset,
0524: length);
0525: }
0526: if (DEBUG_BUFFER) {
0527: System.out.print(")scanNCName: ");
0528: XMLEntityManager.print(fCurrentEntity);
0529: System.out.println(" -> " + String.valueOf(symbol));
0530: }
0531: return symbol;
0532:
0533: } // scanNCName():String
0534:
0535: /**
0536: * Scans a qualified name from the input, setting the fields of the
0537: * QName structure appropriately.
0538: * <p>
0539: * <strong>Note:</strong> The qualified name characters are consumed.
0540: * <p>
0541: * <strong>Note:</strong> The strings used to set the values of the
0542: * QName structure must be symbols. The SymbolTable can be used for
0543: * this purpose.
0544: *
0545: * @param qname The qualified name structure to fill.
0546: *
0547: * @return Returns true if a qualified name appeared immediately on
0548: * the input and was scanned, false otherwise.
0549: *
0550: * @throws IOException Thrown if i/o error occurs.
0551: * @throws EOFException Thrown on end of file.
0552: *
0553: * @see org.apache.xerces.util.SymbolTable
0554: * @see org.apache.xerces.util.XMLChar#isName
0555: * @see org.apache.xerces.util.XMLChar#isNameStart
0556: */
0557: public boolean scanQName(QName qname) throws IOException {
0558: if (DEBUG_BUFFER) {
0559: System.out.print("(scanQName, " + qname + ": ");
0560: XMLEntityManager.print(fCurrentEntity);
0561: System.out.println();
0562: }
0563:
0564: // load more characters, if needed
0565: if (fCurrentEntity.position == fCurrentEntity.count) {
0566: load(0, true);
0567: }
0568:
0569: // scan qualified name
0570: int offset = fCurrentEntity.position;
0571: if (XMLChar.isNCNameStart(fCurrentEntity.ch[offset])) {
0572: if (++fCurrentEntity.position == fCurrentEntity.count) {
0573: fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
0574: offset = 0;
0575: if (load(1, false)) {
0576: fCurrentEntity.columnNumber++;
0577: String name = fSymbolTable.addSymbol(
0578: fCurrentEntity.ch, 0, 1);
0579: qname.setValues(null, name, name, null);
0580: if (DEBUG_BUFFER) {
0581: System.out.print(")scanQName, " + qname + ": ");
0582: XMLEntityManager.print(fCurrentEntity);
0583: System.out.println(" -> true");
0584: }
0585: return true;
0586: }
0587: }
0588: int index = -1;
0589: while (XMLChar
0590: .isName(fCurrentEntity.ch[fCurrentEntity.position])) {
0591: char c = fCurrentEntity.ch[fCurrentEntity.position];
0592:
0593: if (c == ':') {
0594: if (index != -1) {
0595: break;
0596: }
0597: index = fCurrentEntity.position;
0598: }
0599: if (++fCurrentEntity.position == fCurrentEntity.count) {
0600: int length = fCurrentEntity.position - offset;
0601: if (length == fCurrentEntity.ch.length) {
0602: // bad luck we have to resize our buffer
0603: char[] tmp = new char[fCurrentEntity.ch.length << 1];
0604: System.arraycopy(fCurrentEntity.ch, offset,
0605: tmp, 0, length);
0606: fCurrentEntity.ch = tmp;
0607: } else {
0608: System.arraycopy(fCurrentEntity.ch, offset,
0609: fCurrentEntity.ch, 0, length);
0610: }
0611: if (index != -1) {
0612: index = index - offset;
0613: }
0614: offset = 0;
0615: if (load(length, false)) {
0616: break;
0617: }
0618: }
0619: }
0620: int length = fCurrentEntity.position - offset;
0621: fCurrentEntity.columnNumber += length;
0622: if (length > 0) {
0623: String prefix = null;
0624: String localpart = null;
0625: String rawname = fSymbolTable.addSymbol(
0626: fCurrentEntity.ch, offset, length);
0627: if (index != -1) {
0628: int prefixLength = index - offset;
0629: prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
0630: offset, prefixLength);
0631: int len = length - prefixLength - 1;
0632: int startLocal = index + 1;
0633: if (!XMLChar
0634: .isNCNameStart(fCurrentEntity.ch[startLocal])) {
0635: fErrorReporter.reportError(
0636: XMLMessageFormatter.XML_DOMAIN,
0637: "IllegalQName", null,
0638: XMLErrorReporter.SEVERITY_FATAL_ERROR);
0639: }
0640: localpart = fSymbolTable.addSymbol(
0641: fCurrentEntity.ch, startLocal, len);
0642:
0643: } else {
0644: localpart = rawname;
0645: }
0646: qname.setValues(prefix, localpart, rawname, null);
0647: if (DEBUG_BUFFER) {
0648: System.out.print(")scanQName, " + qname + ": ");
0649: XMLEntityManager.print(fCurrentEntity);
0650: System.out.println(" -> true");
0651: }
0652: return true;
0653: }
0654: }
0655:
0656: // no qualified name found
0657: if (DEBUG_BUFFER) {
0658: System.out.print(")scanQName, " + qname + ": ");
0659: XMLEntityManager.print(fCurrentEntity);
0660: System.out.println(" -> false");
0661: }
0662: return false;
0663:
0664: } // scanQName(QName):boolean
0665:
0666: /**
0667: * Scans a range of parsed character data, setting the fields of the
0668: * XMLString structure, appropriately.
0669: * <p>
0670: * <strong>Note:</strong> The characters are consumed.
0671: * <p>
0672: * <strong>Note:</strong> This method does not guarantee to return
0673: * the longest run of parsed character data. This method may return
0674: * before markup due to reaching the end of the input buffer or any
0675: * other reason.
0676: * <p>
0677: * <strong>Note:</strong> The fields contained in the XMLString
0678: * structure are not guaranteed to remain valid upon subsequent calls
0679: * to the entity scanner. Therefore, the caller is responsible for
0680: * immediately using the returned character data or making a copy of
0681: * the character data.
0682: *
0683: * @param content The content structure to fill.
0684: *
0685: * @return Returns the next character on the input, if known. This
0686: * value may be -1 but this does <em>note</em> designate
0687: * end of file.
0688: *
0689: * @throws IOException Thrown if i/o error occurs.
0690: * @throws EOFException Thrown on end of file.
0691: */
0692: public int scanContent(XMLString content) throws IOException {
0693: if (DEBUG_BUFFER) {
0694: System.out.print("(scanContent: ");
0695: XMLEntityManager.print(fCurrentEntity);
0696: System.out.println();
0697: }
0698:
0699: // load more characters, if needed
0700: if (fCurrentEntity.position == fCurrentEntity.count) {
0701: load(0, true);
0702: } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0703: fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
0704: load(1, false);
0705: fCurrentEntity.position = 0;
0706: fCurrentEntity.startPosition = 0;
0707: }
0708:
0709: // normalize newlines
0710: int offset = fCurrentEntity.position;
0711: int c = fCurrentEntity.ch[offset];
0712: int newlines = 0;
0713: boolean external = fCurrentEntity.isExternal();
0714: if (c == '\n' || (c == '\r' && external)) {
0715: if (DEBUG_BUFFER) {
0716: System.out.print("[newline, " + offset + ", "
0717: + fCurrentEntity.position + ": ");
0718: XMLEntityManager.print(fCurrentEntity);
0719: System.out.println();
0720: }
0721: do {
0722: c = fCurrentEntity.ch[fCurrentEntity.position++];
0723: if (c == '\r' && external) {
0724: newlines++;
0725: fCurrentEntity.lineNumber++;
0726: fCurrentEntity.columnNumber = 1;
0727: if (fCurrentEntity.position == fCurrentEntity.count) {
0728: offset = 0;
0729: fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
0730: fCurrentEntity.position = newlines;
0731: fCurrentEntity.startPosition = newlines;
0732: if (load(newlines, false)) {
0733: break;
0734: }
0735: }
0736: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0737: fCurrentEntity.position++;
0738: offset++;
0739: }
0740: /*** NEWLINE NORMALIZATION ***/
0741: else {
0742: newlines++;
0743: }
0744: } else if (c == '\n') {
0745: newlines++;
0746: fCurrentEntity.lineNumber++;
0747: fCurrentEntity.columnNumber = 1;
0748: if (fCurrentEntity.position == fCurrentEntity.count) {
0749: offset = 0;
0750: fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
0751: fCurrentEntity.position = newlines;
0752: fCurrentEntity.startPosition = newlines;
0753: if (load(newlines, false)) {
0754: break;
0755: }
0756: }
0757: } else {
0758: fCurrentEntity.position--;
0759: break;
0760: }
0761: } while (fCurrentEntity.position < fCurrentEntity.count - 1);
0762: for (int i = offset; i < fCurrentEntity.position; i++) {
0763: fCurrentEntity.ch[i] = '\n';
0764: }
0765: int length = fCurrentEntity.position - offset;
0766: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0767: content.setValues(fCurrentEntity.ch, offset, length);
0768: if (DEBUG_BUFFER) {
0769: System.out.print("]newline, " + offset + ", "
0770: + fCurrentEntity.position + ": ");
0771: XMLEntityManager.print(fCurrentEntity);
0772: System.out.println();
0773: }
0774: return -1;
0775: }
0776: if (DEBUG_BUFFER) {
0777: System.out.print("]newline, " + offset + ", "
0778: + fCurrentEntity.position + ": ");
0779: XMLEntityManager.print(fCurrentEntity);
0780: System.out.println();
0781: }
0782: }
0783:
0784: // inner loop, scanning for content
0785: while (fCurrentEntity.position < fCurrentEntity.count) {
0786: c = fCurrentEntity.ch[fCurrentEntity.position++];
0787: if (!XMLChar.isContent(c)) {
0788: fCurrentEntity.position--;
0789: break;
0790: }
0791: }
0792: int length = fCurrentEntity.position - offset;
0793: fCurrentEntity.columnNumber += length - newlines;
0794: content.setValues(fCurrentEntity.ch, offset, length);
0795:
0796: // return next character
0797: if (fCurrentEntity.position != fCurrentEntity.count) {
0798: c = fCurrentEntity.ch[fCurrentEntity.position];
0799: // REVISIT: Does this need to be updated to fix the
0800: // #x0D ^#x0A newline normalization problem? -Ac
0801: if (c == '\r' && external) {
0802: c = '\n';
0803: }
0804: } else {
0805: c = -1;
0806: }
0807: if (DEBUG_BUFFER) {
0808: System.out.print(")scanContent: ");
0809: XMLEntityManager.print(fCurrentEntity);
0810: System.out.println(" -> '" + (char) c + "'");
0811: }
0812: return c;
0813:
0814: } // scanContent(XMLString):int
0815:
0816: /**
0817: * Scans a range of attribute value data, setting the fields of the
0818: * XMLString structure, appropriately.
0819: * <p>
0820: * <strong>Note:</strong> The characters are consumed.
0821: * <p>
0822: * <strong>Note:</strong> This method does not guarantee to return
0823: * the longest run of attribute value data. This method may return
0824: * before the quote character due to reaching the end of the input
0825: * buffer or any other reason.
0826: * <p>
0827: * <strong>Note:</strong> The fields contained in the XMLString
0828: * structure are not guaranteed to remain valid upon subsequent calls
0829: * to the entity scanner. Therefore, the caller is responsible for
0830: * immediately using the returned character data or making a copy of
0831: * the character data.
0832: *
0833: * @param quote The quote character that signifies the end of the
0834: * attribute value data.
0835: * @param content The content structure to fill.
0836: *
0837: * @return Returns the next character on the input, if known. This
0838: * value may be -1 but this does <em>note</em> designate
0839: * end of file.
0840: *
0841: * @throws IOException Thrown if i/o error occurs.
0842: * @throws EOFException Thrown on end of file.
0843: */
0844: public int scanLiteral(int quote, XMLString content)
0845: throws IOException {
0846: if (DEBUG_BUFFER) {
0847: System.out.print("(scanLiteral, '" + (char) quote + "': ");
0848: XMLEntityManager.print(fCurrentEntity);
0849: System.out.println();
0850: }
0851:
0852: // load more characters, if needed
0853: if (fCurrentEntity.position == fCurrentEntity.count) {
0854: load(0, true);
0855: } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0856: fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
0857: load(1, false);
0858: fCurrentEntity.position = 0;
0859: fCurrentEntity.startPosition = 0;
0860: }
0861:
0862: // normalize newlines
0863: int offset = fCurrentEntity.position;
0864: int c = fCurrentEntity.ch[offset];
0865: int newlines = 0;
0866: boolean external = fCurrentEntity.isExternal();
0867: if (c == '\n' || (c == '\r' && external)) {
0868: if (DEBUG_BUFFER) {
0869: System.out.print("[newline, " + offset + ", "
0870: + fCurrentEntity.position + ": ");
0871: XMLEntityManager.print(fCurrentEntity);
0872: System.out.println();
0873: }
0874: do {
0875: c = fCurrentEntity.ch[fCurrentEntity.position++];
0876: if (c == '\r' && external) {
0877: newlines++;
0878: fCurrentEntity.lineNumber++;
0879: fCurrentEntity.columnNumber = 1;
0880: if (fCurrentEntity.position == fCurrentEntity.count) {
0881: offset = 0;
0882: fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
0883: fCurrentEntity.position = newlines;
0884: fCurrentEntity.startPosition = newlines;
0885: if (load(newlines, false)) {
0886: break;
0887: }
0888: }
0889: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0890: fCurrentEntity.position++;
0891: offset++;
0892: }
0893: /*** NEWLINE NORMALIZATION ***/
0894: else {
0895: newlines++;
0896: }
0897: /***/
0898: } else if (c == '\n') {
0899: newlines++;
0900: fCurrentEntity.lineNumber++;
0901: fCurrentEntity.columnNumber = 1;
0902: if (fCurrentEntity.position == fCurrentEntity.count) {
0903: offset = 0;
0904: fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
0905: fCurrentEntity.position = newlines;
0906: fCurrentEntity.startPosition = newlines;
0907: if (load(newlines, false)) {
0908: break;
0909: }
0910: }
0911: } else {
0912: fCurrentEntity.position--;
0913: break;
0914: }
0915: } while (fCurrentEntity.position < fCurrentEntity.count - 1);
0916: for (int i = offset; i < fCurrentEntity.position; i++) {
0917: fCurrentEntity.ch[i] = '\n';
0918: }
0919: int length = fCurrentEntity.position - offset;
0920: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0921: content.setValues(fCurrentEntity.ch, offset, length);
0922: if (DEBUG_BUFFER) {
0923: System.out.print("]newline, " + offset + ", "
0924: + fCurrentEntity.position + ": ");
0925: XMLEntityManager.print(fCurrentEntity);
0926: System.out.println();
0927: }
0928: return -1;
0929: }
0930: if (DEBUG_BUFFER) {
0931: System.out.print("]newline, " + offset + ", "
0932: + fCurrentEntity.position + ": ");
0933: XMLEntityManager.print(fCurrentEntity);
0934: System.out.println();
0935: }
0936: }
0937:
0938: // scan literal value
0939: while (fCurrentEntity.position < fCurrentEntity.count) {
0940: c = fCurrentEntity.ch[fCurrentEntity.position++];
0941: if ((c == quote && (!fCurrentEntity.literal || external))
0942: || c == '%' || !XMLChar.isContent(c)) {
0943: fCurrentEntity.position--;
0944: break;
0945: }
0946: }
0947: int length = fCurrentEntity.position - offset;
0948: fCurrentEntity.columnNumber += length - newlines;
0949: content.setValues(fCurrentEntity.ch, offset, length);
0950:
0951: // return next character
0952: if (fCurrentEntity.position != fCurrentEntity.count) {
0953: c = fCurrentEntity.ch[fCurrentEntity.position];
0954: // NOTE: We don't want to accidentally signal the
0955: // end of the literal if we're expanding an
0956: // entity appearing in the literal. -Ac
0957: if (c == quote && fCurrentEntity.literal) {
0958: c = -1;
0959: }
0960: } else {
0961: c = -1;
0962: }
0963: if (DEBUG_BUFFER) {
0964: System.out.print(")scanLiteral, '" + (char) quote + "': ");
0965: XMLEntityManager.print(fCurrentEntity);
0966: System.out.println(" -> '" + (char) c + "'");
0967: }
0968: return c;
0969:
0970: } // scanLiteral(int,XMLString):int
0971:
0972: /**
0973: * Scans a range of character data up to the specified delimiter,
0974: * setting the fields of the XMLString structure, appropriately.
0975: * <p>
0976: * <strong>Note:</strong> The characters are consumed.
0977: * <p>
0978: * <strong>Note:</strong> This assumes that the internal buffer is
0979: * at least the same size, or bigger, than the length of the delimiter
0980: * and that the delimiter contains at least one character.
0981: * <p>
0982: * <strong>Note:</strong> This method does not guarantee to return
0983: * the longest run of character data. This method may return before
0984: * the delimiter due to reaching the end of the input buffer or any
0985: * other reason.
0986: * <p>
0987: * <strong>Note:</strong> The fields contained in the XMLString
0988: * structure are not guaranteed to remain valid upon subsequent calls
0989: * to the entity scanner. Therefore, the caller is responsible for
0990: * immediately using the returned character data or making a copy of
0991: * the character data.
0992: *
0993: * @param delimiter The string that signifies the end of the character
0994: * data to be scanned.
0995: * @param buffer The XMLStringBuffer to fill.
0996: *
0997: * @return Returns true if there is more data to scan, false otherwise.
0998: *
0999: * @throws IOException Thrown if i/o error occurs.
1000: * @throws EOFException Thrown on end of file.
1001: */
1002: public boolean scanData(String delimiter, XMLStringBuffer buffer)
1003: throws IOException {
1004:
1005: // REVISIT: This method does not need to use a string buffer.
1006: // The change would avoid the array copies and increase
1007: // performance. -Ac
1008: //
1009: // Currently, this method is called for scanning CDATA
1010: // sections, comments, and processing instruction data.
1011: // So if this code is updated to NOT buffer, the scanning
1012: // code for comments and processing instructions will
1013: // need to be updated to do its own buffering. The code
1014: // for CDATA sections is safe as-is. -Ac
1015:
1016: boolean found = false;
1017: int delimLen = delimiter.length();
1018: char charAt0 = delimiter.charAt(0);
1019: boolean external = fCurrentEntity.isExternal();
1020: if (DEBUG_BUFFER) {
1021: System.out.print("(scanData: ");
1022: XMLEntityManager.print(fCurrentEntity);
1023: System.out.println();
1024: }
1025:
1026: // load more characters, if needed
1027:
1028: if (fCurrentEntity.position == fCurrentEntity.count) {
1029: load(0, true);
1030: }
1031:
1032: boolean bNextEntity = false;
1033:
1034: while ((fCurrentEntity.position > fCurrentEntity.count
1035: - delimLen)
1036: && (!bNextEntity)) {
1037: System.arraycopy(fCurrentEntity.ch,
1038: fCurrentEntity.position, fCurrentEntity.ch, 0,
1039: fCurrentEntity.count - fCurrentEntity.position);
1040:
1041: bNextEntity = load(fCurrentEntity.count
1042: - fCurrentEntity.position, false);
1043: fCurrentEntity.position = 0;
1044: fCurrentEntity.startPosition = 0;
1045: }
1046:
1047: if (fCurrentEntity.position > fCurrentEntity.count - delimLen) {
1048: // something must be wrong with the input: e.g., file ends in an unterminated comment
1049: int length = fCurrentEntity.count - fCurrentEntity.position;
1050: buffer.append(fCurrentEntity.ch, fCurrentEntity.position,
1051: length);
1052: fCurrentEntity.columnNumber += fCurrentEntity.count;
1053: fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1054: fCurrentEntity.position = fCurrentEntity.count;
1055: fCurrentEntity.startPosition = fCurrentEntity.count;
1056: load(0, true);
1057: return false;
1058: }
1059:
1060: // normalize newlines
1061: int offset = fCurrentEntity.position;
1062: int c = fCurrentEntity.ch[offset];
1063: int newlines = 0;
1064: if (c == '\n' || (c == '\r' && external)) {
1065: if (DEBUG_BUFFER) {
1066: System.out.print("[newline, " + offset + ", "
1067: + fCurrentEntity.position + ": ");
1068: XMLEntityManager.print(fCurrentEntity);
1069: System.out.println();
1070: }
1071: do {
1072: c = fCurrentEntity.ch[fCurrentEntity.position++];
1073: if (c == '\r' && external) {
1074: newlines++;
1075: fCurrentEntity.lineNumber++;
1076: fCurrentEntity.columnNumber = 1;
1077: if (fCurrentEntity.position == fCurrentEntity.count) {
1078: offset = 0;
1079: fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1080: fCurrentEntity.position = newlines;
1081: fCurrentEntity.startPosition = newlines;
1082: if (load(newlines, false)) {
1083: break;
1084: }
1085: }
1086: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1087: fCurrentEntity.position++;
1088: offset++;
1089: }
1090: /*** NEWLINE NORMALIZATION ***/
1091: else {
1092: newlines++;
1093: }
1094: } else if (c == '\n') {
1095: newlines++;
1096: fCurrentEntity.lineNumber++;
1097: fCurrentEntity.columnNumber = 1;
1098: if (fCurrentEntity.position == fCurrentEntity.count) {
1099: offset = 0;
1100: fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1101: fCurrentEntity.position = newlines;
1102: fCurrentEntity.startPosition = newlines;
1103: fCurrentEntity.count = newlines;
1104: if (load(newlines, false)) {
1105: break;
1106: }
1107: }
1108: } else {
1109: fCurrentEntity.position--;
1110: break;
1111: }
1112: } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1113: for (int i = offset; i < fCurrentEntity.position; i++) {
1114: fCurrentEntity.ch[i] = '\n';
1115: }
1116: int length = fCurrentEntity.position - offset;
1117: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1118: buffer.append(fCurrentEntity.ch, offset, length);
1119: if (DEBUG_BUFFER) {
1120: System.out.print("]newline, " + offset + ", "
1121: + fCurrentEntity.position + ": ");
1122: XMLEntityManager.print(fCurrentEntity);
1123: System.out.println();
1124: }
1125: return true;
1126: }
1127: if (DEBUG_BUFFER) {
1128: System.out.print("]newline, " + offset + ", "
1129: + fCurrentEntity.position + ": ");
1130: XMLEntityManager.print(fCurrentEntity);
1131: System.out.println();
1132: }
1133: }
1134:
1135: // iterate over buffer looking for delimiter
1136: OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1137: c = fCurrentEntity.ch[fCurrentEntity.position++];
1138: if (c == charAt0) {
1139: // looks like we just hit the delimiter
1140: int delimOffset = fCurrentEntity.position - 1;
1141: for (int i = 1; i < delimLen; i++) {
1142: if (fCurrentEntity.position == fCurrentEntity.count) {
1143: fCurrentEntity.position -= i;
1144: break OUTER;
1145: }
1146: c = fCurrentEntity.ch[fCurrentEntity.position++];
1147: if (delimiter.charAt(i) != c) {
1148: fCurrentEntity.position--;
1149: break;
1150: }
1151: }
1152: if (fCurrentEntity.position == delimOffset + delimLen) {
1153: found = true;
1154: break;
1155: }
1156: } else if (c == '\n' || (external && c == '\r')) {
1157: fCurrentEntity.position--;
1158: break;
1159: } else if (XMLChar.isInvalid(c)) {
1160: fCurrentEntity.position--;
1161: int length = fCurrentEntity.position - offset;
1162: fCurrentEntity.columnNumber += length - newlines;
1163: buffer.append(fCurrentEntity.ch, offset, length);
1164: return true;
1165: }
1166: }
1167: int length = fCurrentEntity.position - offset;
1168: fCurrentEntity.columnNumber += length - newlines;
1169: if (found) {
1170: length -= delimLen;
1171: }
1172: buffer.append(fCurrentEntity.ch, offset, length);
1173:
1174: // return true if string was skipped
1175: if (DEBUG_BUFFER) {
1176: System.out.print(")scanData: ");
1177: XMLEntityManager.print(fCurrentEntity);
1178: System.out.println(" -> " + !found);
1179: }
1180: return !found;
1181:
1182: } // scanData(String,XMLString):boolean
1183:
1184: /**
1185: * Skips a character appearing immediately on the input.
1186: * <p>
1187: * <strong>Note:</strong> The character is consumed only if it matches
1188: * the specified character.
1189: *
1190: * @param c The character to skip.
1191: *
1192: * @return Returns true if the character was skipped.
1193: *
1194: * @throws IOException Thrown if i/o error occurs.
1195: * @throws EOFException Thrown on end of file.
1196: */
1197: public boolean skipChar(int c) throws IOException {
1198: if (DEBUG_BUFFER) {
1199: System.out.print("(skipChar, '" + (char) c + "': ");
1200: XMLEntityManager.print(fCurrentEntity);
1201: System.out.println();
1202: }
1203:
1204: // load more characters, if needed
1205: if (fCurrentEntity.position == fCurrentEntity.count) {
1206: load(0, true);
1207: }
1208:
1209: // skip character
1210: int cc = fCurrentEntity.ch[fCurrentEntity.position];
1211: if (cc == c) {
1212: fCurrentEntity.position++;
1213: if (c == '\n') {
1214: fCurrentEntity.lineNumber++;
1215: fCurrentEntity.columnNumber = 1;
1216: } else {
1217: fCurrentEntity.columnNumber++;
1218: }
1219: if (DEBUG_BUFFER) {
1220: System.out.print(")skipChar, '" + (char) c + "': ");
1221: XMLEntityManager.print(fCurrentEntity);
1222: System.out.println(" -> true");
1223: }
1224: return true;
1225: } else if (c == '\n' && cc == '\r'
1226: && fCurrentEntity.isExternal()) {
1227: // handle newlines
1228: if (fCurrentEntity.position == fCurrentEntity.count) {
1229: fCurrentEntity.ch[0] = (char) cc;
1230: load(1, false);
1231: }
1232: fCurrentEntity.position++;
1233: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1234: fCurrentEntity.position++;
1235: }
1236: fCurrentEntity.lineNumber++;
1237: fCurrentEntity.columnNumber = 1;
1238: if (DEBUG_BUFFER) {
1239: System.out.print(")skipChar, '" + (char) c + "': ");
1240: XMLEntityManager.print(fCurrentEntity);
1241: System.out.println(" -> true");
1242: }
1243: return true;
1244: }
1245:
1246: // character was not skipped
1247: if (DEBUG_BUFFER) {
1248: System.out.print(")skipChar, '" + (char) c + "': ");
1249: XMLEntityManager.print(fCurrentEntity);
1250: System.out.println(" -> false");
1251: }
1252: return false;
1253:
1254: } // skipChar(int):boolean
1255:
1256: /**
1257: * Skips space characters appearing immediately on the input.
1258: * <p>
1259: * <strong>Note:</strong> The characters are consumed only if they are
1260: * space characters.
1261: *
1262: * @return Returns true if at least one space character was skipped.
1263: *
1264: * @throws IOException Thrown if i/o error occurs.
1265: * @throws EOFException Thrown on end of file.
1266: *
1267: * @see org.apache.xerces.util.XMLChar#isSpace
1268: */
1269: public boolean skipSpaces() throws IOException {
1270: if (DEBUG_BUFFER) {
1271: System.out.print("(skipSpaces: ");
1272: XMLEntityManager.print(fCurrentEntity);
1273: System.out.println();
1274: }
1275:
1276: // load more characters, if needed
1277: if (fCurrentEntity.position == fCurrentEntity.count) {
1278: load(0, true);
1279: }
1280:
1281: // skip spaces
1282: int c = fCurrentEntity.ch[fCurrentEntity.position];
1283: if (XMLChar.isSpace(c)) {
1284: boolean external = fCurrentEntity.isExternal();
1285: do {
1286: boolean entityChanged = false;
1287: // handle newlines
1288: if (c == '\n' || (external && c == '\r')) {
1289: fCurrentEntity.lineNumber++;
1290: fCurrentEntity.columnNumber = 1;
1291: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1292: fCurrentEntity.ch[0] = (char) c;
1293: entityChanged = load(1, true);
1294: if (!entityChanged) {
1295: // the load change the position to be 1,
1296: // need to restore it when entity not changed
1297: fCurrentEntity.position = 0;
1298: fCurrentEntity.startPosition = 0;
1299: }
1300: }
1301: if (c == '\r' && external) {
1302: // REVISIT: Does this need to be updated to fix the
1303: // #x0D ^#x0A newline normalization problem? -Ac
1304: if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
1305: fCurrentEntity.position--;
1306: }
1307: }
1308: /*** NEWLINE NORMALIZATION ***
1309: else {
1310: if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
1311: && external) {
1312: fCurrentEntity.position++;
1313: }
1314: }
1315: /***/
1316: } else {
1317: fCurrentEntity.columnNumber++;
1318: }
1319: // load more characters, if needed
1320: if (!entityChanged)
1321: fCurrentEntity.position++;
1322: if (fCurrentEntity.position == fCurrentEntity.count) {
1323: load(0, true);
1324: }
1325: } while (XMLChar
1326: .isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1327: if (DEBUG_BUFFER) {
1328: System.out.print(")skipSpaces: ");
1329: XMLEntityManager.print(fCurrentEntity);
1330: System.out.println(" -> true");
1331: }
1332: return true;
1333: }
1334:
1335: // no spaces were found
1336: if (DEBUG_BUFFER) {
1337: System.out.print(")skipSpaces: ");
1338: XMLEntityManager.print(fCurrentEntity);
1339: System.out.println(" -> false");
1340: }
1341: return false;
1342:
1343: } // skipSpaces():boolean
1344:
1345: /**
1346: * Skips space characters appearing immediately on the input that would
1347: * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
1348: * normalization is performed. This is useful when scanning structures
1349: * such as the XMLDecl and TextDecl that can only contain US-ASCII
1350: * characters.
1351: * <p>
1352: * <strong>Note:</strong> The characters are consumed only if they would
1353: * match non-terminal S before end of line normalization is performed.
1354: *
1355: * @return Returns true if at least one space character was skipped.
1356: *
1357: * @throws IOException Thrown if i/o error occurs.
1358: * @throws EOFException Thrown on end of file.
1359: *
1360: * @see org.apache.xerces.util.XMLChar#isSpace
1361: */
1362: public final boolean skipDeclSpaces() throws IOException {
1363: if (DEBUG_BUFFER) {
1364: System.out.print("(skipDeclSpaces: ");
1365: XMLEntityManager.print(fCurrentEntity);
1366: System.out.println();
1367: }
1368:
1369: // load more characters, if needed
1370: if (fCurrentEntity.position == fCurrentEntity.count) {
1371: load(0, true);
1372: }
1373:
1374: // skip spaces
1375: int c = fCurrentEntity.ch[fCurrentEntity.position];
1376: if (XMLChar.isSpace(c)) {
1377: boolean external = fCurrentEntity.isExternal();
1378: do {
1379: boolean entityChanged = false;
1380: // handle newlines
1381: if (c == '\n' || (external && c == '\r')) {
1382: fCurrentEntity.lineNumber++;
1383: fCurrentEntity.columnNumber = 1;
1384: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1385: fCurrentEntity.ch[0] = (char) c;
1386: entityChanged = load(1, true);
1387: if (!entityChanged) {
1388: // the load change the position to be 1,
1389: // need to restore it when entity not changed
1390: fCurrentEntity.position = 0;
1391: fCurrentEntity.startPosition = 0;
1392: }
1393: }
1394: if (c == '\r' && external) {
1395: // REVISIT: Does this need to be updated to fix the
1396: // #x0D ^#x0A newline normalization problem? -Ac
1397: if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
1398: fCurrentEntity.position--;
1399: }
1400: }
1401: /*** NEWLINE NORMALIZATION ***
1402: else {
1403: if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
1404: && external) {
1405: fCurrentEntity.position++;
1406: }
1407: }
1408: /***/
1409: } else {
1410: fCurrentEntity.columnNumber++;
1411: }
1412: // load more characters, if needed
1413: if (!entityChanged)
1414: fCurrentEntity.position++;
1415: if (fCurrentEntity.position == fCurrentEntity.count) {
1416: load(0, true);
1417: }
1418: } while (XMLChar
1419: .isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1420: if (DEBUG_BUFFER) {
1421: System.out.print(")skipDeclSpaces: ");
1422: XMLEntityManager.print(fCurrentEntity);
1423: System.out.println(" -> true");
1424: }
1425: return true;
1426: }
1427:
1428: // no spaces were found
1429: if (DEBUG_BUFFER) {
1430: System.out.print(")skipDeclSpaces: ");
1431: XMLEntityManager.print(fCurrentEntity);
1432: System.out.println(" -> false");
1433: }
1434: return false;
1435:
1436: } // skipDeclSpaces():boolean
1437:
1438: /**
1439: * Skips the specified string appearing immediately on the input.
1440: * <p>
1441: * <strong>Note:</strong> The characters are consumed only if they are
1442: * space characters.
1443: *
1444: * @param s The string to skip.
1445: *
1446: * @return Returns true if the string was skipped.
1447: *
1448: * @throws IOException Thrown if i/o error occurs.
1449: * @throws EOFException Thrown on end of file.
1450: */
1451: public boolean skipString(String s) throws IOException {
1452: if (DEBUG_BUFFER) {
1453: System.out.print("(skipString, \"" + s + "\": ");
1454: XMLEntityManager.print(fCurrentEntity);
1455: System.out.println();
1456: }
1457:
1458: // load more characters, if needed
1459: if (fCurrentEntity.position == fCurrentEntity.count) {
1460: load(0, true);
1461: }
1462:
1463: // skip string
1464: final int length = s.length();
1465: for (int i = 0; i < length; i++) {
1466: char c = fCurrentEntity.ch[fCurrentEntity.position++];
1467: if (c != s.charAt(i)) {
1468: fCurrentEntity.position -= i + 1;
1469: if (DEBUG_BUFFER) {
1470: System.out.print(")skipString, \"" + s + "\": ");
1471: XMLEntityManager.print(fCurrentEntity);
1472: System.out.println(" -> false");
1473: }
1474: return false;
1475: }
1476: if (i < length - 1
1477: && fCurrentEntity.position == fCurrentEntity.count) {
1478: System.arraycopy(fCurrentEntity.ch,
1479: fCurrentEntity.count - i - 1,
1480: fCurrentEntity.ch, 0, i + 1);
1481: // REVISIT: Can a string to be skipped cross an
1482: // entity boundary? -Ac
1483: if (load(i + 1, false)) {
1484: fCurrentEntity.startPosition -= i + 1;
1485: fCurrentEntity.position -= i + 1;
1486: if (DEBUG_BUFFER) {
1487: System.out
1488: .print(")skipString, \"" + s + "\": ");
1489: XMLEntityManager.print(fCurrentEntity);
1490: System.out.println(" -> false");
1491: }
1492: return false;
1493: }
1494: }
1495: }
1496: if (DEBUG_BUFFER) {
1497: System.out.print(")skipString, \"" + s + "\": ");
1498: XMLEntityManager.print(fCurrentEntity);
1499: System.out.println(" -> true");
1500: }
1501: fCurrentEntity.columnNumber += length;
1502: return true;
1503:
1504: } // skipString(String):boolean
1505:
1506: //
1507: // Locator methods
1508: //
1509:
1510: /**
1511: * Return the public identifier for the current document event.
1512: * <p>
1513: * The return value is the public identifier of the document
1514: * entity or of the external parsed entity in which the markup
1515: * triggering the event appears.
1516: *
1517: * @return A string containing the public identifier, or
1518: * null if none is available.
1519: */
1520: public final String getPublicId() {
1521: return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation
1522: .getPublicId()
1523: : null;
1524: } // getPublicId():String
1525:
1526: /**
1527: * Return the expanded system identifier for the current document event.
1528: * <p>
1529: * The return value is the expanded system identifier of the document
1530: * entity or of the external parsed entity in which the markup
1531: * triggering the event appears.
1532: * <p>
1533: * If the system identifier is a URL, the parser must resolve it
1534: * fully before passing it to the application.
1535: *
1536: * @return A string containing the expanded system identifier, or null
1537: * if none is available.
1538: */
1539: public final String getExpandedSystemId() {
1540: if (fCurrentEntity != null) {
1541: if (fCurrentEntity.entityLocation != null
1542: && fCurrentEntity.entityLocation
1543: .getExpandedSystemId() != null) {
1544: return fCurrentEntity.entityLocation
1545: .getExpandedSystemId();
1546: } else {
1547: // get the current entity to return something appropriate:
1548: return fCurrentEntity.getExpandedSystemId();
1549: }
1550: }
1551: return null;
1552: } // getExpandedSystemId():String
1553:
1554: /**
1555: * Return the literal system identifier for the current document event.
1556: * <p>
1557: * The return value is the literal system identifier of the document
1558: * entity or of the external parsed entity in which the markup
1559: * triggering the event appears.
1560: * <p>
1561: * @return A string containing the literal system identifier, or null
1562: * if none is available.
1563: */
1564: public final String getLiteralSystemId() {
1565: if (fCurrentEntity != null) {
1566: if (fCurrentEntity.entityLocation != null
1567: && fCurrentEntity.entityLocation
1568: .getLiteralSystemId() != null) {
1569: return fCurrentEntity.entityLocation
1570: .getLiteralSystemId();
1571: } else {
1572: // get the current entity to do it:
1573: return fCurrentEntity.getLiteralSystemId();
1574: }
1575: }
1576: return null;
1577: } // getLiteralSystemId():String
1578:
1579: /**
1580: * Returns the line number where the current document event ends.
1581: * <p>
1582: * <strong>Warning:</strong> The return value from the method
1583: * is intended only as an approximation for the sake of error
1584: * reporting; it is not intended to provide sufficient information
1585: * to edit the character content of the original XML document.
1586: * <p>
1587: * The return value is an approximation of the line number
1588: * in the document entity or external parsed entity where the
1589: * markup triggering the event appears.
1590: * <p>
1591: * If possible, the line position of the first character after the
1592: * text associated with the document event should be provided.
1593: * The first line in the document is line 1.
1594: *
1595: * @return The line number, or -1 if none is available.
1596: */
1597: public final int getLineNumber() {
1598: if (fCurrentEntity != null) {
1599: if (fCurrentEntity.isExternal()) {
1600: return fCurrentEntity.lineNumber;
1601: } else {
1602: // ask the current entity to return something appropriate:
1603: return fCurrentEntity.getLineNumber();
1604: }
1605: }
1606:
1607: return -1;
1608:
1609: } // getLineNumber():int
1610:
1611: /**
1612: * Returns the column number where the current document event ends.
1613: * <p>
1614: * <strong>Warning:</strong> The return value from the method
1615: * is intended only as an approximation for the sake of error
1616: * reporting; it is not intended to provide sufficient information
1617: * to edit the character content of the original XML document.
1618: * <p>
1619: * The return value is an approximation of the column number
1620: * in the document entity or external parsed entity where the
1621: * markup triggering the event appears.
1622: * <p>
1623: * If possible, the line position of the first character after the
1624: * text associated with the document event should be provided.
1625: * The first column in each line is column 1.
1626: *
1627: * @return The column number, or -1 if none is available.
1628: */
1629: public final int getColumnNumber() {
1630: if (fCurrentEntity != null) {
1631: if (fCurrentEntity.isExternal()) {
1632: return fCurrentEntity.columnNumber;
1633: } else {
1634: // ask current entity to find appropriate column number
1635: return fCurrentEntity.getColumnNumber();
1636: }
1637: }
1638:
1639: return -1;
1640: } // getColumnNumber():int
1641:
1642: /**
1643: * Returns the character offset where the current document event ends.
1644: * <p>
1645: * <strong>Warning:</strong> The return value from the method
1646: * is intended only as an approximation for the sake of error
1647: * reporting; it is not intended to provide sufficient information
1648: * to edit the character content of the original XML document.
1649: * <p>
1650: * The return value is an approximation of the character offset
1651: * in the document entity or external parsed entity where the
1652: * markup triggering the event appears.
1653: * <p>
1654: * If possible, the character offset of the first character after the
1655: * text associated with the document event should be provided.
1656: *
1657: * @return The character offset, or -1 if none is available.
1658: */
1659: public final int getCharacterOffset() {
1660: if (fCurrentEntity != null) {
1661: if (fCurrentEntity.isExternal()) {
1662: return fCurrentEntity.baseCharOffset
1663: + (fCurrentEntity.position - fCurrentEntity.startPosition);
1664: } else {
1665: // ask current entity to find appropriate character offset
1666: return fCurrentEntity.getCharacterOffset();
1667: }
1668: }
1669:
1670: return -1;
1671: } // getCharacterOffset():int
1672:
1673: /**
1674: * Returns the encoding of the current entity.
1675: * Note that, for a given entity, this value can only be
1676: * considered final once the encoding declaration has been read (or once it
1677: * has been determined that there is no such declaration) since, no encoding
1678: * having been specified on the XMLInputSource, the parser
1679: * will make an initial "guess" which could be in error.
1680: */
1681: public final String getEncoding() {
1682: if (fCurrentEntity != null) {
1683: if (fCurrentEntity.isExternal()) {
1684: return fCurrentEntity.encoding;
1685: } else {
1686: // ask current entity to find appropriate encoding
1687: return fCurrentEntity.getEncoding();
1688: }
1689: }
1690: return null;
1691: } // getEncoding():String
1692:
1693: /**
1694: * Returns the XML version of the current entity. This will normally be the
1695: * value from the XML or text declaration or defaulted by the parser. Note that
1696: * that this value may be different than the version of the processing rules
1697: * applied to the current entity. For instance, an XML 1.1 document may refer to
1698: * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
1699: * document. Also note that, for a given entity, this value can only be considered
1700: * final once the XML or text declaration has been read or once it has been
1701: * determined that there is no such declaration.
1702: */
1703: public final String getXMLVersion() {
1704: if (fCurrentEntity != null) {
1705: if (fCurrentEntity.isExternal()) {
1706: return fCurrentEntity.xmlVersion;
1707: } else {
1708: // ask current entity to find the appropriate XML version
1709: return fCurrentEntity.getXMLVersion();
1710: }
1711: }
1712: return null;
1713: } // getXMLVersion():String
1714:
1715: // allow entity manager to tell us what the current entityis:
1716: public final void setCurrentEntity(
1717: XMLEntityManager.ScannedEntity ent) {
1718: fCurrentEntity = ent;
1719: }
1720:
1721: // set buffer size:
1722: public final void setBufferSize(int size) {
1723: // REVISIT: Buffer size passed to entity scanner
1724: // was not being kept in synch with the actual size
1725: // of the buffers in each scanned entity. If any
1726: // of the buffers were actually resized, it was possible
1727: // that the parser would throw an ArrayIndexOutOfBoundsException
1728: // for documents which contained names which are longer than
1729: // the current buffer size. Conceivably the buffer size passed
1730: // to entity scanner could be used to determine a minimum size
1731: // for resizing, if doubling its size is smaller than this
1732: // minimum. -- mrglavas
1733: fBufferSize = size;
1734: }
1735:
1736: // reset what little state we have...
1737: public final void reset(SymbolTable symbolTable,
1738: XMLEntityManager entityManager, XMLErrorReporter reporter) {
1739: fCurrentEntity = null;
1740: fSymbolTable = symbolTable;
1741: fEntityManager = entityManager;
1742: fErrorReporter = reporter;
1743: }
1744:
1745: //
1746: // Private methods
1747: //
1748:
1749: /**
1750: * Loads a chunk of text.
1751: *
1752: * @param offset The offset into the character buffer to
1753: * read the next batch of characters.
1754: * @param changeEntity True if the load should change entities
1755: * at the end of the entity, otherwise leave
1756: * the current entity in place and the entity
1757: * boundary will be signaled by the return
1758: * value.
1759: *
1760: * @returns Returns true if the entity changed as a result of this
1761: * load operation.
1762: */
1763: final boolean load(int offset, boolean changeEntity)
1764: throws IOException {
1765: if (DEBUG_BUFFER) {
1766: System.out.print("(load, " + offset + ": ");
1767: XMLEntityManager.print(fCurrentEntity);
1768: System.out.println();
1769: }
1770:
1771: fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1772: // read characters
1773: int length = fCurrentEntity.mayReadChunks ? (fCurrentEntity.ch.length - offset)
1774: : (XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE);
1775: if (DEBUG_BUFFER)
1776: System.out.println(" length to try to read: " + length);
1777: int count = fCurrentEntity.reader.read(fCurrentEntity.ch,
1778: offset, length);
1779: if (DEBUG_BUFFER)
1780: System.out.println(" length actually read: " + count);
1781:
1782: // reset count and position
1783: boolean entityChanged = false;
1784: if (count != -1) {
1785: if (count != 0) {
1786: fCurrentEntity.count = count + offset;
1787: fCurrentEntity.position = offset;
1788: fCurrentEntity.startPosition = offset;
1789: }
1790: }
1791:
1792: // end of this entity
1793: else {
1794: fCurrentEntity.count = offset;
1795: fCurrentEntity.position = offset;
1796: fCurrentEntity.startPosition = offset;
1797: entityChanged = true;
1798: if (changeEntity) {
1799: fEntityManager.endEntity();
1800: if (fCurrentEntity == null) {
1801: throw END_OF_DOCUMENT_ENTITY;
1802: }
1803: // handle the trailing edges
1804: if (fCurrentEntity.position == fCurrentEntity.count) {
1805: load(0, true);
1806: }
1807: }
1808: }
1809: if (DEBUG_BUFFER) {
1810: System.out.print(")load, " + offset + ": ");
1811: XMLEntityManager.print(fCurrentEntity);
1812: System.out.println();
1813: }
1814:
1815: return entityChanged;
1816:
1817: } // load(int, boolean):boolean
1818:
1819: } // class XMLEntityScanner
|