0001: /*
0002: * Copyright 1999,2004 The Apache Software Foundation.
0003: *
0004: * Licensed under the Apache License, Version 2.0 (the "License");
0005: * you may not use this file except in compliance with the License.
0006: * You may obtain a copy of the License at
0007: *
0008: * http://www.apache.org/licenses/LICENSE-2.0
0009: *
0010: * Unless required by applicable law or agreed to in writing, software
0011: * distributed under the License is distributed on an "AS IS" BASIS,
0012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013: * See the License for the specific language governing permissions and
0014: * limitations under the License.
0015: */
0016:
0017: package org.apache.jasper.xmlparser;
0018:
0019: import java.io.EOFException;
0020: import java.io.InputStream;
0021: import java.io.InputStreamReader;
0022: import java.io.IOException;
0023: import java.io.Reader;
0024: import java.util.Locale;
0025: import java.util.jar.JarFile;
0026:
0027: import org.apache.jasper.JasperException;
0028: import org.apache.jasper.JspCompilationContext;
0029: import org.apache.jasper.compiler.ErrorDispatcher;
0030: import org.apache.jasper.compiler.JspUtil;
0031:
0032: import org.apache.xerces.util.EncodingMap;
0033: import org.apache.xerces.util.SymbolTable;
0034: import org.apache.xerces.util.XMLChar;
0035: import org.apache.xerces.util.XMLStringBuffer;
0036: import org.apache.xerces.xni.XMLString;
0037:
0038: public class XercesEncodingDetector extends XMLEncodingDetector {
0039:
0040: private InputStream stream;
0041: private String encoding;
0042: private boolean isEncodingSetInProlog;
0043: private Boolean isBigEndian;
0044: private Reader reader;
0045:
0046: // org.apache.xerces.impl.XMLEntityManager fields
0047: public static final int DEFAULT_BUFFER_SIZE = 2048;
0048: public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
0049: private boolean fAllowJavaEncodings;
0050: private SymbolTable fSymbolTable;
0051: private XercesEncodingDetector fCurrentEntity;
0052: private int fBufferSize = DEFAULT_BUFFER_SIZE;
0053:
0054: // org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields
0055: private int lineNumber = 1;
0056: private int columnNumber = 1;
0057: private boolean literal;
0058: private char[] ch = new char[DEFAULT_BUFFER_SIZE];
0059: private int position;
0060: private int count;
0061: private boolean mayReadChunks = false;
0062:
0063: // org.apache.xerces.impl.XMLScanner fields
0064: private XMLString fString = new XMLString();
0065: private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
0066: private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
0067: private final static String fVersionSymbol = "version";
0068: private final static String fEncodingSymbol = "encoding";
0069: private final static String fStandaloneSymbol = "standalone";
0070:
0071: // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields
0072: private int fMarkupDepth = 0;
0073: private String[] fStrings = new String[3];
0074:
0075: private ErrorDispatcher err;
0076:
0077: /**
0078: * Constructor
0079: */
0080: public XercesEncodingDetector() {
0081: fSymbolTable = new SymbolTable();
0082: fCurrentEntity = this ;
0083: }
0084:
0085: /**
0086: * Autodetects the encoding of the XML document supplied by the given
0087: * input stream.
0088: *
0089: * Encoding autodetection is done according to the XML 1.0 specification,
0090: * Appendix F.1: Detection Without External Encoding Information.
0091: *
0092: * @param in The input stream to read
0093: * @param err The error dispatcher
0094: *
0095: * @return Two-element array, where the first element (of type
0096: * java.lang.String) contains the name of the (auto)detected encoding, and
0097: * the second element (of type java.lang.Boolean) specifies whether the
0098: * encoding was specified using the 'encoding' attribute of an XML prolog
0099: * (TRUE) or autodetected (FALSE).
0100: */
0101: public Object[] getEncoding(InputStream in, ErrorDispatcher err)
0102: throws IOException, JasperException {
0103: XercesEncodingDetector detector = this ;
0104: this .stream = in;
0105: this .err = err;
0106: detector.createInitialReader();
0107: detector.scanXMLDecl();
0108:
0109: return new Object[] { detector.encoding,
0110: new Boolean(detector.isEncodingSetInProlog) };
0111: }
0112:
0113: public Object[] getEncodingMethod(String fname, JarFile jarFile,
0114: JspCompilationContext ctxt, ErrorDispatcher err)
0115: throws IOException, JasperException {
0116: InputStream inStream = JspUtil.getInputStream(fname, jarFile,
0117: ctxt, err);
0118: Object[] ret = getEncoding(inStream, err);
0119: inStream.close();
0120:
0121: return ret;
0122: }
0123:
0124: // stub method
0125: void endEntity() {
0126: }
0127:
0128: // Adapted from:
0129: // org.apache.xerces.impl.XMLEntityManager.startEntity()
0130: private void createInitialReader() throws IOException,
0131: JasperException {
0132:
0133: // wrap this stream in RewindableInputStream
0134: stream = new RewindableInputStream(stream);
0135:
0136: // perform auto-detect of encoding if necessary
0137: if (encoding == null) {
0138: // read first four bytes and determine encoding
0139: final byte[] b4 = new byte[4];
0140: int count = 0;
0141: for (; count < 4; count++) {
0142: b4[count] = (byte) stream.read();
0143: }
0144: if (count == 4) {
0145: Object[] encodingDesc = getEncodingName(b4, count);
0146: encoding = (String) (encodingDesc[0]);
0147: isBigEndian = (Boolean) (encodingDesc[1]);
0148:
0149: stream.reset();
0150: // Special case UTF-8 files with BOM created by Microsoft
0151: // tools. It's more efficient to consume the BOM than make
0152: // the reader perform extra checks. -Ac
0153: if (count > 2 && encoding.equals("UTF-8")) {
0154: int b0 = b4[0] & 0xFF;
0155: int b1 = b4[1] & 0xFF;
0156: int b2 = b4[2] & 0xFF;
0157: if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
0158: // ignore first three bytes...
0159: stream.skip(3);
0160: }
0161: }
0162: reader = createReader(stream, encoding, isBigEndian);
0163: } else {
0164: reader = createReader(stream, encoding, isBigEndian);
0165: }
0166: }
0167: }
0168:
0169: // Adapted from:
0170: // org.apache.xerces.impl.XMLEntityManager.createReader
0171: /**
0172: * Creates a reader capable of reading the given input stream in
0173: * the specified encoding.
0174: *
0175: * @param inputStream The input stream.
0176: * @param encoding The encoding name that the input stream is
0177: * encoded using. If the user has specified that
0178: * Java encoding names are allowed, then the
0179: * encoding name may be a Java encoding name;
0180: * otherwise, it is an ianaEncoding name.
0181: * @param isBigEndian For encodings (like uCS-4), whose names cannot
0182: * specify a byte order, this tells whether the order
0183: * is bigEndian. null means unknown or not relevant.
0184: *
0185: * @return Returns a reader.
0186: */
0187: private Reader createReader(InputStream inputStream,
0188: String encoding, Boolean isBigEndian) throws IOException,
0189: JasperException {
0190:
0191: // normalize encoding name
0192: if (encoding == null) {
0193: encoding = "UTF-8";
0194: }
0195:
0196: // try to use an optimized reader
0197: String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
0198: if (ENCODING.equals("UTF-8")) {
0199: return new UTF8Reader(inputStream, fBufferSize);
0200: }
0201: if (ENCODING.equals("US-ASCII")) {
0202: return new ASCIIReader(inputStream, fBufferSize);
0203: }
0204: if (ENCODING.equals("ISO-10646-UCS-4")) {
0205: if (isBigEndian != null) {
0206: boolean isBE = isBigEndian.booleanValue();
0207: if (isBE) {
0208: return new UCSReader(inputStream, UCSReader.UCS4BE);
0209: } else {
0210: return new UCSReader(inputStream, UCSReader.UCS4LE);
0211: }
0212: } else {
0213: err.jspError(
0214: "jsp.error.xml.encodingByteOrderUnsupported",
0215: encoding);
0216: }
0217: }
0218: if (ENCODING.equals("ISO-10646-UCS-2")) {
0219: if (isBigEndian != null) { // sould never happen with this encoding...
0220: boolean isBE = isBigEndian.booleanValue();
0221: if (isBE) {
0222: return new UCSReader(inputStream, UCSReader.UCS2BE);
0223: } else {
0224: return new UCSReader(inputStream, UCSReader.UCS2LE);
0225: }
0226: } else {
0227: err.jspError(
0228: "jsp.error.xml.encodingByteOrderUnsupported",
0229: encoding);
0230: }
0231: }
0232:
0233: // check for valid name
0234: boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
0235: boolean validJava = XMLChar.isValidJavaEncoding(encoding);
0236: if (!validIANA || (fAllowJavaEncodings && !validJava)) {
0237: err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
0238: // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
0239: // because every byte is a valid ISO Latin 1 character.
0240: // It may not translate correctly but if we failed on
0241: // the encoding anyway, then we're expecting the content
0242: // of the document to be bad. This will just prevent an
0243: // invalid UTF-8 sequence to be detected. This is only
0244: // important when continue-after-fatal-error is turned
0245: // on. -Ac
0246: encoding = "ISO-8859-1";
0247: }
0248:
0249: // try to use a Java reader
0250: String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
0251: if (javaEncoding == null) {
0252: if (fAllowJavaEncodings) {
0253: javaEncoding = encoding;
0254: } else {
0255: err.jspError("jsp.error.xml.encodingDeclInvalid",
0256: encoding);
0257: // see comment above.
0258: javaEncoding = "ISO8859_1";
0259: }
0260: }
0261: return new InputStreamReader(inputStream, javaEncoding);
0262:
0263: } // createReader(InputStream,String, Boolean): Reader
0264:
0265: // Adapted from:
0266: // org.apache.xerces.impl.XMLEntityManager.getEncodingName
0267: /**
0268: * Returns the IANA encoding name that is auto-detected from
0269: * the bytes specified, with the endian-ness of that encoding where
0270: * appropriate.
0271: *
0272: * @param b4 The first four bytes of the input.
0273: * @param count The number of bytes actually read.
0274: * @return a 2-element array: the first element, an IANA-encoding string,
0275: * the second element a Boolean which is true iff the document is big
0276: * endian, false if it's little-endian, and null if the distinction isn't
0277: * relevant.
0278: */
0279: private Object[] getEncodingName(byte[] b4, int count) {
0280:
0281: if (count < 2) {
0282: return new Object[] { "UTF-8", null };
0283: }
0284:
0285: // UTF-16, with BOM
0286: int b0 = b4[0] & 0xFF;
0287: int b1 = b4[1] & 0xFF;
0288: if (b0 == 0xFE && b1 == 0xFF) {
0289: // UTF-16, big-endian
0290: return new Object[] { "UTF-16BE", new Boolean(true) };
0291: }
0292: if (b0 == 0xFF && b1 == 0xFE) {
0293: // UTF-16, little-endian
0294: return new Object[] { "UTF-16LE", new Boolean(false) };
0295: }
0296:
0297: // default to UTF-8 if we don't have enough bytes to make a
0298: // good determination of the encoding
0299: if (count < 3) {
0300: return new Object[] { "UTF-8", null };
0301: }
0302:
0303: // UTF-8 with a BOM
0304: int b2 = b4[2] & 0xFF;
0305: if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
0306: return new Object[] { "UTF-8", null };
0307: }
0308:
0309: // default to UTF-8 if we don't have enough bytes to make a
0310: // good determination of the encoding
0311: if (count < 4) {
0312: return new Object[] { "UTF-8", null };
0313: }
0314:
0315: // other encodings
0316: int b3 = b4[3] & 0xFF;
0317: if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
0318: // UCS-4, big endian (1234)
0319: return new Object[] { "ISO-10646-UCS-4", new Boolean(true) };
0320: }
0321: if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
0322: // UCS-4, little endian (4321)
0323: return new Object[] { "ISO-10646-UCS-4", new Boolean(false) };
0324: }
0325: if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
0326: // UCS-4, unusual octet order (2143)
0327: // REVISIT: What should this be?
0328: return new Object[] { "ISO-10646-UCS-4", null };
0329: }
0330: if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
0331: // UCS-4, unusual octect order (3412)
0332: // REVISIT: What should this be?
0333: return new Object[] { "ISO-10646-UCS-4", null };
0334: }
0335: if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
0336: // UTF-16, big-endian, no BOM
0337: // (or could turn out to be UCS-2...
0338: // REVISIT: What should this be?
0339: return new Object[] { "UTF-16BE", new Boolean(true) };
0340: }
0341: if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
0342: // UTF-16, little-endian, no BOM
0343: // (or could turn out to be UCS-2...
0344: return new Object[] { "UTF-16LE", new Boolean(false) };
0345: }
0346: if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
0347: // EBCDIC
0348: // a la xerces1, return CP037 instead of EBCDIC here
0349: return new Object[] { "CP037", null };
0350: }
0351:
0352: // default encoding
0353: return new Object[] { "UTF-8", null };
0354:
0355: }
0356:
0357: // Adapted from:
0358: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal
0359: /** Returns true if the current entity being scanned is external. */
0360: public boolean isExternal() {
0361: return true;
0362: }
0363:
0364: // Adapted from:
0365: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar
0366: /**
0367: * Returns the next character on the input.
0368: * <p>
0369: * <strong>Note:</strong> The character is <em>not</em> consumed.
0370: *
0371: * @throws IOException Thrown if i/o error occurs.
0372: * @throws EOFException Thrown on end of file.
0373: */
0374: public int peekChar() throws IOException {
0375:
0376: // load more characters, if needed
0377: if (fCurrentEntity.position == fCurrentEntity.count) {
0378: load(0, true);
0379: }
0380:
0381: // peek at character
0382: int c = fCurrentEntity.ch[fCurrentEntity.position];
0383:
0384: // return peeked character
0385: if (fCurrentEntity.isExternal()) {
0386: return c != '\r' ? c : '\n';
0387: } else {
0388: return c;
0389: }
0390:
0391: } // peekChar():int
0392:
0393: // Adapted from:
0394: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar
0395: /**
0396: * Returns the next character on the input.
0397: * <p>
0398: * <strong>Note:</strong> The character is consumed.
0399: *
0400: * @throws IOException Thrown if i/o error occurs.
0401: * @throws EOFException Thrown on end of file.
0402: */
0403: public int scanChar() throws IOException {
0404:
0405: // load more characters, if needed
0406: if (fCurrentEntity.position == fCurrentEntity.count) {
0407: load(0, true);
0408: }
0409:
0410: // scan character
0411: int c = fCurrentEntity.ch[fCurrentEntity.position++];
0412: boolean external = false;
0413: if (c == '\n'
0414: || (c == '\r' && (external = fCurrentEntity
0415: .isExternal()))) {
0416: fCurrentEntity.lineNumber++;
0417: fCurrentEntity.columnNumber = 1;
0418: if (fCurrentEntity.position == fCurrentEntity.count) {
0419: fCurrentEntity.ch[0] = (char) c;
0420: load(1, false);
0421: }
0422: if (c == '\r' && external) {
0423: if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
0424: fCurrentEntity.position--;
0425: }
0426: c = '\n';
0427: }
0428: }
0429:
0430: // return character that was scanned
0431: fCurrentEntity.columnNumber++;
0432: return c;
0433:
0434: }
0435:
0436: // Adapted from:
0437: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName
0438: /**
0439: * Returns a string matching the Name production appearing immediately
0440: * on the input as a symbol, or null if no Name string is present.
0441: * <p>
0442: * <strong>Note:</strong> The Name characters are consumed.
0443: * <p>
0444: * <strong>Note:</strong> The string returned must be a symbol. The
0445: * SymbolTable can be used for this purpose.
0446: *
0447: * @throws IOException Thrown if i/o error occurs.
0448: * @throws EOFException Thrown on end of file.
0449: *
0450: * @see SymbolTable
0451: * @see XMLChar#isName
0452: * @see XMLChar#isNameStart
0453: */
0454: public String scanName() throws IOException {
0455:
0456: // load more characters, if needed
0457: if (fCurrentEntity.position == fCurrentEntity.count) {
0458: load(0, true);
0459: }
0460:
0461: // scan name
0462: int offset = fCurrentEntity.position;
0463: if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
0464: if (++fCurrentEntity.position == fCurrentEntity.count) {
0465: fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
0466: offset = 0;
0467: if (load(1, false)) {
0468: fCurrentEntity.columnNumber++;
0469: String symbol = fSymbolTable.addSymbol(
0470: fCurrentEntity.ch, 0, 1);
0471: return symbol;
0472: }
0473: }
0474: while (XMLChar
0475: .isName(fCurrentEntity.ch[fCurrentEntity.position])) {
0476: if (++fCurrentEntity.position == fCurrentEntity.count) {
0477: int length = fCurrentEntity.position - offset;
0478: if (length == fBufferSize) {
0479: // bad luck we have to resize our buffer
0480: char[] tmp = new char[fBufferSize * 2];
0481: System.arraycopy(fCurrentEntity.ch, offset,
0482: tmp, 0, length);
0483: fCurrentEntity.ch = tmp;
0484: fBufferSize *= 2;
0485: } else {
0486: System.arraycopy(fCurrentEntity.ch, offset,
0487: fCurrentEntity.ch, 0, length);
0488: }
0489: offset = 0;
0490: if (load(length, false)) {
0491: break;
0492: }
0493: }
0494: }
0495: }
0496: int length = fCurrentEntity.position - offset;
0497: fCurrentEntity.columnNumber += length;
0498:
0499: // return name
0500: String symbol = null;
0501: if (length > 0) {
0502: symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset,
0503: length);
0504: }
0505: return symbol;
0506:
0507: }
0508:
0509: // Adapted from:
0510: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral
0511: /**
0512: * Scans a range of attribute value data, setting the fields of the
0513: * XMLString structure, appropriately.
0514: * <p>
0515: * <strong>Note:</strong> The characters are consumed.
0516: * <p>
0517: * <strong>Note:</strong> This method does not guarantee to return
0518: * the longest run of attribute value data. This method may return
0519: * before the quote character due to reaching the end of the input
0520: * buffer or any other reason.
0521: * <p>
0522: * <strong>Note:</strong> The fields contained in the XMLString
0523: * structure are not guaranteed to remain valid upon subsequent calls
0524: * to the entity scanner. Therefore, the caller is responsible for
0525: * immediately using the returned character data or making a copy of
0526: * the character data.
0527: *
0528: * @param quote The quote character that signifies the end of the
0529: * attribute value data.
0530: * @param content The content structure to fill.
0531: *
0532: * @return Returns the next character on the input, if known. This
0533: * value may be -1 but this does <em>note</em> designate
0534: * end of file.
0535: *
0536: * @throws IOException Thrown if i/o error occurs.
0537: * @throws EOFException Thrown on end of file.
0538: */
0539: public int scanLiteral(int quote, XMLString content)
0540: throws IOException {
0541:
0542: // load more characters, if needed
0543: if (fCurrentEntity.position == fCurrentEntity.count) {
0544: load(0, true);
0545: } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0546: fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
0547: load(1, false);
0548: fCurrentEntity.position = 0;
0549: }
0550:
0551: // normalize newlines
0552: int offset = fCurrentEntity.position;
0553: int c = fCurrentEntity.ch[offset];
0554: int newlines = 0;
0555: boolean external = fCurrentEntity.isExternal();
0556: if (c == '\n' || (c == '\r' && external)) {
0557: do {
0558: c = fCurrentEntity.ch[fCurrentEntity.position++];
0559: if (c == '\r' && external) {
0560: newlines++;
0561: fCurrentEntity.lineNumber++;
0562: fCurrentEntity.columnNumber = 1;
0563: if (fCurrentEntity.position == fCurrentEntity.count) {
0564: offset = 0;
0565: fCurrentEntity.position = newlines;
0566: if (load(newlines, false)) {
0567: break;
0568: }
0569: }
0570: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0571: fCurrentEntity.position++;
0572: offset++;
0573: }
0574: /*** NEWLINE NORMALIZATION ***/
0575: else {
0576: newlines++;
0577: }
0578: /***/
0579: } else if (c == '\n') {
0580: newlines++;
0581: fCurrentEntity.lineNumber++;
0582: fCurrentEntity.columnNumber = 1;
0583: if (fCurrentEntity.position == fCurrentEntity.count) {
0584: offset = 0;
0585: fCurrentEntity.position = newlines;
0586: if (load(newlines, false)) {
0587: break;
0588: }
0589: }
0590: /*** NEWLINE NORMALIZATION ***
0591: if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
0592: && external) {
0593: fCurrentEntity.position++;
0594: offset++;
0595: }
0596: /***/
0597: } else {
0598: fCurrentEntity.position--;
0599: break;
0600: }
0601: } while (fCurrentEntity.position < fCurrentEntity.count - 1);
0602: for (int i = offset; i < fCurrentEntity.position; i++) {
0603: fCurrentEntity.ch[i] = '\n';
0604: }
0605: int length = fCurrentEntity.position - offset;
0606: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0607: content.setValues(fCurrentEntity.ch, offset, length);
0608: return -1;
0609: }
0610: }
0611:
0612: // scan literal value
0613: while (fCurrentEntity.position < fCurrentEntity.count) {
0614: c = fCurrentEntity.ch[fCurrentEntity.position++];
0615: if ((c == quote && (!fCurrentEntity.literal || external))
0616: || c == '%' || !XMLChar.isContent(c)) {
0617: fCurrentEntity.position--;
0618: break;
0619: }
0620: }
0621: int length = fCurrentEntity.position - offset;
0622: fCurrentEntity.columnNumber += length - newlines;
0623: content.setValues(fCurrentEntity.ch, offset, length);
0624:
0625: // return next character
0626: if (fCurrentEntity.position != fCurrentEntity.count) {
0627: c = fCurrentEntity.ch[fCurrentEntity.position];
0628: // NOTE: We don't want to accidentally signal the
0629: // end of the literal if we're expanding an
0630: // entity appearing in the literal. -Ac
0631: if (c == quote && fCurrentEntity.literal) {
0632: c = -1;
0633: }
0634: } else {
0635: c = -1;
0636: }
0637: return c;
0638:
0639: }
0640:
0641: /**
0642: * Scans a range of character data up to the specified delimiter,
0643: * setting the fields of the XMLString structure, appropriately.
0644: * <p>
0645: * <strong>Note:</strong> The characters are consumed.
0646: * <p>
0647: * <strong>Note:</strong> This assumes that the internal buffer is
0648: * at least the same size, or bigger, than the length of the delimiter
0649: * and that the delimiter contains at least one character.
0650: * <p>
0651: * <strong>Note:</strong> This method does not guarantee to return
0652: * the longest run of character data. This method may return before
0653: * the delimiter due to reaching the end of the input buffer or any
0654: * other reason.
0655: * <p>
0656: * <strong>Note:</strong> The fields contained in the XMLString
0657: * structure are not guaranteed to remain valid upon subsequent calls
0658: * to the entity scanner. Therefore, the caller is responsible for
0659: * immediately using the returned character data or making a copy of
0660: * the character data.
0661: *
0662: * @param delimiter The string that signifies the end of the character
0663: * data to be scanned.
0664: * @param buffer The data structure to fill.
0665: *
0666: * @return Returns true if there is more data to scan, false otherwise.
0667: *
0668: * @throws IOException Thrown if i/o error occurs.
0669: * @throws EOFException Thrown on end of file.
0670: */
0671: public boolean scanData(String delimiter, XMLStringBuffer buffer)
0672: throws IOException {
0673:
0674: boolean done = false;
0675: int delimLen = delimiter.length();
0676: char charAt0 = delimiter.charAt(0);
0677: boolean external = fCurrentEntity.isExternal();
0678: do {
0679:
0680: // load more characters, if needed
0681:
0682: if (fCurrentEntity.position == fCurrentEntity.count) {
0683: load(0, true);
0684: } else if (fCurrentEntity.position >= fCurrentEntity.count
0685: - delimLen) {
0686: System.arraycopy(fCurrentEntity.ch,
0687: fCurrentEntity.position, fCurrentEntity.ch, 0,
0688: fCurrentEntity.count - fCurrentEntity.position);
0689: load(fCurrentEntity.count - fCurrentEntity.position,
0690: false);
0691: fCurrentEntity.position = 0;
0692: }
0693: if (fCurrentEntity.position >= fCurrentEntity.count
0694: - delimLen) {
0695: // something must be wrong with the input: e.g., file ends an
0696: // unterminated comment
0697: int length = fCurrentEntity.count
0698: - fCurrentEntity.position;
0699: buffer.append(fCurrentEntity.ch,
0700: fCurrentEntity.position, length);
0701: fCurrentEntity.columnNumber += fCurrentEntity.count;
0702: fCurrentEntity.position = fCurrentEntity.count;
0703: load(0, true);
0704: return false;
0705: }
0706:
0707: // normalize newlines
0708: int offset = fCurrentEntity.position;
0709: int c = fCurrentEntity.ch[offset];
0710: int newlines = 0;
0711: if (c == '\n' || (c == '\r' && external)) {
0712: do {
0713: c = fCurrentEntity.ch[fCurrentEntity.position++];
0714: if (c == '\r' && external) {
0715: newlines++;
0716: fCurrentEntity.lineNumber++;
0717: fCurrentEntity.columnNumber = 1;
0718: if (fCurrentEntity.position == fCurrentEntity.count) {
0719: offset = 0;
0720: fCurrentEntity.position = newlines;
0721: if (load(newlines, false)) {
0722: break;
0723: }
0724: }
0725: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0726: fCurrentEntity.position++;
0727: offset++;
0728: }
0729: /*** NEWLINE NORMALIZATION ***/
0730: else {
0731: newlines++;
0732: }
0733: } else if (c == '\n') {
0734: newlines++;
0735: fCurrentEntity.lineNumber++;
0736: fCurrentEntity.columnNumber = 1;
0737: if (fCurrentEntity.position == fCurrentEntity.count) {
0738: offset = 0;
0739: fCurrentEntity.position = newlines;
0740: fCurrentEntity.count = newlines;
0741: if (load(newlines, false)) {
0742: break;
0743: }
0744: }
0745: } else {
0746: fCurrentEntity.position--;
0747: break;
0748: }
0749: } while (fCurrentEntity.position < fCurrentEntity.count - 1);
0750: for (int i = offset; i < fCurrentEntity.position; i++) {
0751: fCurrentEntity.ch[i] = '\n';
0752: }
0753: int length = fCurrentEntity.position - offset;
0754: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0755: buffer.append(fCurrentEntity.ch, offset, length);
0756: return true;
0757: }
0758: }
0759:
0760: // iterate over buffer looking for delimiter
0761: OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
0762: c = fCurrentEntity.ch[fCurrentEntity.position++];
0763: if (c == charAt0) {
0764: // looks like we just hit the delimiter
0765: int delimOffset = fCurrentEntity.position - 1;
0766: for (int i = 1; i < delimLen; i++) {
0767: if (fCurrentEntity.position == fCurrentEntity.count) {
0768: fCurrentEntity.position -= i;
0769: break OUTER;
0770: }
0771: c = fCurrentEntity.ch[fCurrentEntity.position++];
0772: if (delimiter.charAt(i) != c) {
0773: fCurrentEntity.position--;
0774: break;
0775: }
0776: }
0777: if (fCurrentEntity.position == delimOffset
0778: + delimLen) {
0779: done = true;
0780: break;
0781: }
0782: } else if (c == '\n' || (external && c == '\r')) {
0783: fCurrentEntity.position--;
0784: break;
0785: } else if (XMLChar.isInvalid(c)) {
0786: fCurrentEntity.position--;
0787: int length = fCurrentEntity.position - offset;
0788: fCurrentEntity.columnNumber += length - newlines;
0789: buffer.append(fCurrentEntity.ch, offset, length);
0790: return true;
0791: }
0792: }
0793: int length = fCurrentEntity.position - offset;
0794: fCurrentEntity.columnNumber += length - newlines;
0795: if (done) {
0796: length -= delimLen;
0797: }
0798: buffer.append(fCurrentEntity.ch, offset, length);
0799:
0800: // return true if string was skipped
0801: } while (!done);
0802: return !done;
0803:
0804: }
0805:
0806: // Adapted from:
0807: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar
0808: /**
0809: * Skips a character appearing immediately on the input.
0810: * <p>
0811: * <strong>Note:</strong> The character is consumed only if it matches
0812: * the specified character.
0813: *
0814: * @param c The character to skip.
0815: *
0816: * @return Returns true if the character was skipped.
0817: *
0818: * @throws IOException Thrown if i/o error occurs.
0819: * @throws EOFException Thrown on end of file.
0820: */
0821: public boolean skipChar(int c) throws IOException {
0822:
0823: // load more characters, if needed
0824: if (fCurrentEntity.position == fCurrentEntity.count) {
0825: load(0, true);
0826: }
0827:
0828: // skip character
0829: int cc = fCurrentEntity.ch[fCurrentEntity.position];
0830: if (cc == c) {
0831: fCurrentEntity.position++;
0832: if (c == '\n') {
0833: fCurrentEntity.lineNumber++;
0834: fCurrentEntity.columnNumber = 1;
0835: } else {
0836: fCurrentEntity.columnNumber++;
0837: }
0838: return true;
0839: } else if (c == '\n' && cc == '\r'
0840: && fCurrentEntity.isExternal()) {
0841: // handle newlines
0842: if (fCurrentEntity.position == fCurrentEntity.count) {
0843: fCurrentEntity.ch[0] = (char) cc;
0844: load(1, false);
0845: }
0846: fCurrentEntity.position++;
0847: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0848: fCurrentEntity.position++;
0849: }
0850: fCurrentEntity.lineNumber++;
0851: fCurrentEntity.columnNumber = 1;
0852: return true;
0853: }
0854:
0855: // character was not skipped
0856: return false;
0857:
0858: }
0859:
0860: // Adapted from:
0861: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces
0862: /**
0863: * Skips space characters appearing immediately on the input.
0864: * <p>
0865: * <strong>Note:</strong> The characters are consumed only if they are
0866: * space characters.
0867: *
0868: * @return Returns true if at least one space character was skipped.
0869: *
0870: * @throws IOException Thrown if i/o error occurs.
0871: * @throws EOFException Thrown on end of file.
0872: *
0873: * @see XMLChar#isSpace
0874: */
0875: public boolean skipSpaces() throws IOException {
0876:
0877: // load more characters, if needed
0878: if (fCurrentEntity.position == fCurrentEntity.count) {
0879: load(0, true);
0880: }
0881:
0882: // skip spaces
0883: int c = fCurrentEntity.ch[fCurrentEntity.position];
0884: if (XMLChar.isSpace(c)) {
0885: boolean external = fCurrentEntity.isExternal();
0886: do {
0887: boolean entityChanged = false;
0888: // handle newlines
0889: if (c == '\n' || (external && c == '\r')) {
0890: fCurrentEntity.lineNumber++;
0891: fCurrentEntity.columnNumber = 1;
0892: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0893: fCurrentEntity.ch[0] = (char) c;
0894: entityChanged = load(1, true);
0895: if (!entityChanged)
0896: // the load change the position to be 1,
0897: // need to restore it when entity not changed
0898: fCurrentEntity.position = 0;
0899: }
0900: if (c == '\r' && external) {
0901: // REVISIT: Does this need to be updated to fix the
0902: // #x0D ^#x0A newline normalization problem? -Ac
0903: if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
0904: fCurrentEntity.position--;
0905: }
0906: }
0907: /*** NEWLINE NORMALIZATION ***
0908: else {
0909: if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
0910: && external) {
0911: fCurrentEntity.position++;
0912: }
0913: }
0914: /***/
0915: } else {
0916: fCurrentEntity.columnNumber++;
0917: }
0918: // load more characters, if needed
0919: if (!entityChanged)
0920: fCurrentEntity.position++;
0921: if (fCurrentEntity.position == fCurrentEntity.count) {
0922: load(0, true);
0923: }
0924: } while (XMLChar
0925: .isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
0926: return true;
0927: }
0928:
0929: // no spaces were found
0930: return false;
0931:
0932: }
0933:
0934: /**
0935: * Skips the specified string appearing immediately on the input.
0936: * <p>
0937: * <strong>Note:</strong> The characters are consumed only if they are
0938: * space characters.
0939: *
0940: * @param s The string to skip.
0941: *
0942: * @return Returns true if the string was skipped.
0943: *
0944: * @throws IOException Thrown if i/o error occurs.
0945: * @throws EOFException Thrown on end of file.
0946: */
0947: public boolean skipString(String s) throws IOException {
0948:
0949: // load more characters, if needed
0950: if (fCurrentEntity.position == fCurrentEntity.count) {
0951: load(0, true);
0952: }
0953:
0954: // skip string
0955: final int length = s.length();
0956: for (int i = 0; i < length; i++) {
0957: char c = fCurrentEntity.ch[fCurrentEntity.position++];
0958: if (c != s.charAt(i)) {
0959: fCurrentEntity.position -= i + 1;
0960: return false;
0961: }
0962: if (i < length - 1
0963: && fCurrentEntity.position == fCurrentEntity.count) {
0964: System.arraycopy(fCurrentEntity.ch,
0965: fCurrentEntity.count - i - 1,
0966: fCurrentEntity.ch, 0, i + 1);
0967: // REVISIT: Can a string to be skipped cross an
0968: // entity boundary? -Ac
0969: if (load(i + 1, false)) {
0970: fCurrentEntity.position -= i + 1;
0971: return false;
0972: }
0973: }
0974: }
0975: fCurrentEntity.columnNumber += length;
0976: return true;
0977:
0978: }
0979:
0980: // Adapted from:
0981: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.load
0982: /**
0983: * Loads a chunk of text.
0984: *
0985: * @param offset The offset into the character buffer to
0986: * read the next batch of characters.
0987: * @param changeEntity True if the load should change entities
0988: * at the end of the entity, otherwise leave
0989: * the current entity in place and the entity
0990: * boundary will be signaled by the return
0991: * value.
0992: *
0993: * @returns Returns true if the entity changed as a result of this
0994: * load operation.
0995: */
0996: final boolean load(int offset, boolean changeEntity)
0997: throws IOException {
0998:
0999: // read characters
1000: int length = fCurrentEntity.mayReadChunks ? (fCurrentEntity.ch.length - offset)
1001: : (DEFAULT_XMLDECL_BUFFER_SIZE);
1002: int count = fCurrentEntity.reader.read(fCurrentEntity.ch,
1003: offset, length);
1004:
1005: // reset count and position
1006: boolean entityChanged = false;
1007: if (count != -1) {
1008: if (count != 0) {
1009: fCurrentEntity.count = count + offset;
1010: fCurrentEntity.position = offset;
1011: }
1012: }
1013:
1014: // end of this entity
1015: else {
1016: fCurrentEntity.count = offset;
1017: fCurrentEntity.position = offset;
1018: entityChanged = true;
1019: if (changeEntity) {
1020: endEntity();
1021: if (fCurrentEntity == null) {
1022: throw new EOFException();
1023: }
1024: // handle the trailing edges
1025: if (fCurrentEntity.position == fCurrentEntity.count) {
1026: load(0, false);
1027: }
1028: }
1029: }
1030:
1031: return entityChanged;
1032:
1033: }
1034:
1035: // Adapted from:
1036: // org.apache.xerces.impl.XMLEntityManager.RewindableInputStream
1037: /**
1038: * This class wraps the byte inputstreams we're presented with.
1039: * We need it because java.io.InputStreams don't provide
1040: * functionality to reread processed bytes, and they have a habit
1041: * of reading more than one character when you call their read()
1042: * methods. This means that, once we discover the true (declared)
1043: * encoding of a document, we can neither backtrack to read the
1044: * whole doc again nor start reading where we are with a new
1045: * reader.
1046: *
1047: * This class allows rewinding an inputStream by allowing a mark
1048: * to be set, and the stream reset to that position. <strong>The
1049: * class assumes that it needs to read one character per
1050: * invocation when it's read() method is inovked, but uses the
1051: * underlying InputStream's read(char[], offset length) method--it
1052: * won't buffer data read this way!</strong>
1053: *
1054: * @author Neil Graham, IBM
1055: * @author Glenn Marcy, IBM
1056: */
1057: private final class RewindableInputStream extends InputStream {
1058:
1059: private InputStream fInputStream;
1060: private byte[] fData;
1061: private int fStartOffset;
1062: private int fEndOffset;
1063: private int fOffset;
1064: private int fLength;
1065: private int fMark;
1066:
1067: public RewindableInputStream(InputStream is) {
1068: fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
1069: fInputStream = is;
1070: fStartOffset = 0;
1071: fEndOffset = -1;
1072: fOffset = 0;
1073: fLength = 0;
1074: fMark = 0;
1075: }
1076:
1077: public void setStartOffset(int offset) {
1078: fStartOffset = offset;
1079: }
1080:
1081: public void rewind() {
1082: fOffset = fStartOffset;
1083: }
1084:
1085: public int read() throws IOException {
1086: int b = 0;
1087: if (fOffset < fLength) {
1088: return fData[fOffset++] & 0xff;
1089: }
1090: if (fOffset == fEndOffset) {
1091: return -1;
1092: }
1093: if (fOffset == fData.length) {
1094: byte[] newData = new byte[fOffset << 1];
1095: System.arraycopy(fData, 0, newData, 0, fOffset);
1096: fData = newData;
1097: }
1098: b = fInputStream.read();
1099: if (b == -1) {
1100: fEndOffset = fOffset;
1101: return -1;
1102: }
1103: fData[fLength++] = (byte) b;
1104: fOffset++;
1105: return b & 0xff;
1106: }
1107:
1108: public int read(byte[] b, int off, int len) throws IOException {
1109: int bytesLeft = fLength - fOffset;
1110: if (bytesLeft == 0) {
1111: if (fOffset == fEndOffset) {
1112: return -1;
1113: }
1114: // better get some more for the voracious reader...
1115: if (fCurrentEntity.mayReadChunks) {
1116: return fInputStream.read(b, off, len);
1117: }
1118: int returnedVal = read();
1119: if (returnedVal == -1) {
1120: fEndOffset = fOffset;
1121: return -1;
1122: }
1123: b[off] = (byte) returnedVal;
1124: return 1;
1125: }
1126: if (len < bytesLeft) {
1127: if (len <= 0) {
1128: return 0;
1129: }
1130: } else {
1131: len = bytesLeft;
1132: }
1133: if (b != null) {
1134: System.arraycopy(fData, fOffset, b, off, len);
1135: }
1136: fOffset += len;
1137: return len;
1138: }
1139:
1140: public long skip(long n) throws IOException {
1141: int bytesLeft;
1142: if (n <= 0) {
1143: return 0;
1144: }
1145: bytesLeft = fLength - fOffset;
1146: if (bytesLeft == 0) {
1147: if (fOffset == fEndOffset) {
1148: return 0;
1149: }
1150: return fInputStream.skip(n);
1151: }
1152: if (n <= bytesLeft) {
1153: fOffset += n;
1154: return n;
1155: }
1156: fOffset += bytesLeft;
1157: if (fOffset == fEndOffset) {
1158: return bytesLeft;
1159: }
1160: n -= bytesLeft;
1161: /*
1162: * In a manner of speaking, when this class isn't permitting more
1163: * than one byte at a time to be read, it is "blocking". The
1164: * available() method should indicate how much can be read without
1165: * blocking, so while we're in this mode, it should only indicate
1166: * that bytes in its buffer are available; otherwise, the result of
1167: * available() on the underlying InputStream is appropriate.
1168: */
1169: return fInputStream.skip(n) + bytesLeft;
1170: }
1171:
1172: public int available() throws IOException {
1173: int bytesLeft = fLength - fOffset;
1174: if (bytesLeft == 0) {
1175: if (fOffset == fEndOffset) {
1176: return -1;
1177: }
1178: return fCurrentEntity.mayReadChunks ? fInputStream
1179: .available() : 0;
1180: }
1181: return bytesLeft;
1182: }
1183:
1184: public void mark(int howMuch) {
1185: fMark = fOffset;
1186: }
1187:
1188: public void reset() {
1189: fOffset = fMark;
1190: }
1191:
1192: public boolean markSupported() {
1193: return true;
1194: }
1195:
1196: public void close() throws IOException {
1197: if (fInputStream != null) {
1198: fInputStream.close();
1199: fInputStream = null;
1200: }
1201: }
1202: } // end of RewindableInputStream class
1203:
1204: // Adapted from:
1205: // org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch
1206: private void scanXMLDecl() throws IOException, JasperException {
1207:
1208: if (skipString("<?xml")) {
1209: fMarkupDepth++;
1210: // NOTE: special case where document starts with a PI
1211: // whose name starts with "xml" (e.g. "xmlfoo")
1212: if (XMLChar.isName(peekChar())) {
1213: fStringBuffer.clear();
1214: fStringBuffer.append("xml");
1215: while (XMLChar.isName(peekChar())) {
1216: fStringBuffer.append((char) scanChar());
1217: }
1218: String target = fSymbolTable.addSymbol(
1219: fStringBuffer.ch, fStringBuffer.offset,
1220: fStringBuffer.length);
1221: scanPIData(target, fString);
1222: }
1223:
1224: // standard XML declaration
1225: else {
1226: scanXMLDeclOrTextDecl(false);
1227: }
1228: }
1229: }
1230:
1231: // Adapted from:
1232: // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl
1233: /**
1234: * Scans an XML or text declaration.
1235: * <p>
1236: * <pre>
1237: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1238: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1239: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
1240: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1241: * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1242: * | ('"' ('yes' | 'no') '"'))
1243: *
1244: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1245: * </pre>
1246: *
1247: * @param scanningTextDecl True if a text declaration is to
1248: * be scanned instead of an XML
1249: * declaration.
1250: */
1251: private void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
1252: throws IOException, JasperException {
1253:
1254: // scan decl
1255: scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
1256: fMarkupDepth--;
1257:
1258: // pseudo-attribute values
1259: String encodingPseudoAttr = fStrings[1];
1260:
1261: // set encoding on reader
1262: if (encodingPseudoAttr != null) {
1263: isEncodingSetInProlog = true;
1264: encoding = encodingPseudoAttr;
1265: }
1266: }
1267:
1268: // Adapted from:
1269: // org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl
1270: /**
1271: * Scans an XML or text declaration.
1272: * <p>
1273: * <pre>
1274: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1275: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1276: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
1277: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1278: * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1279: * | ('"' ('yes' | 'no') '"'))
1280: *
1281: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1282: * </pre>
1283: *
1284: * @param scanningTextDecl True if a text declaration is to
1285: * be scanned instead of an XML
1286: * declaration.
1287: * @param pseudoAttributeValues An array of size 3 to return the version,
1288: * encoding and standalone pseudo attribute values
1289: * (in that order).
1290: *
1291: * <strong>Note:</strong> This method uses fString, anything in it
1292: * at the time of calling is lost.
1293: */
1294: private void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
1295: String[] pseudoAttributeValues) throws IOException,
1296: JasperException {
1297:
1298: // pseudo-attribute values
1299: String version = null;
1300: String encoding = null;
1301: String standalone = null;
1302:
1303: // scan pseudo-attributes
1304: final int STATE_VERSION = 0;
1305: final int STATE_ENCODING = 1;
1306: final int STATE_STANDALONE = 2;
1307: final int STATE_DONE = 3;
1308: int state = STATE_VERSION;
1309:
1310: boolean dataFoundForTarget = false;
1311: boolean sawSpace = skipSpaces();
1312: while (peekChar() != '?') {
1313: dataFoundForTarget = true;
1314: String name = scanPseudoAttribute(scanningTextDecl, fString);
1315: switch (state) {
1316: case STATE_VERSION: {
1317: if (name == fVersionSymbol) {
1318: if (!sawSpace) {
1319: reportFatalError(
1320: scanningTextDecl ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
1321: : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
1322: null);
1323: }
1324: version = fString.toString();
1325: state = STATE_ENCODING;
1326: if (!version.equals("1.0")) {
1327: // REVISIT: XML REC says we should throw an error
1328: // in such cases.
1329: // some may object the throwing of fatalError.
1330: err.jspError(
1331: "jsp.error.xml.versionNotSupported",
1332: version);
1333: }
1334: } else if (name == fEncodingSymbol) {
1335: if (!scanningTextDecl) {
1336: err
1337: .jspError("jsp.error.xml.versionInfoRequired");
1338: }
1339: if (!sawSpace) {
1340: reportFatalError(
1341: scanningTextDecl ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1342: : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1343: null);
1344: }
1345: encoding = fString.toString();
1346: state = scanningTextDecl ? STATE_DONE
1347: : STATE_STANDALONE;
1348: } else {
1349: if (scanningTextDecl) {
1350: err
1351: .jspError("jsp.error.xml.encodingDeclRequired");
1352: } else {
1353: err
1354: .jspError("jsp.error.xml.versionInfoRequired");
1355: }
1356: }
1357: break;
1358: }
1359: case STATE_ENCODING: {
1360: if (name == fEncodingSymbol) {
1361: if (!sawSpace) {
1362: reportFatalError(
1363: scanningTextDecl ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1364: : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1365: null);
1366: }
1367: encoding = fString.toString();
1368: state = scanningTextDecl ? STATE_DONE
1369: : STATE_STANDALONE;
1370: // TODO: check encoding name; set encoding on
1371: // entity scanner
1372: } else if (!scanningTextDecl
1373: && name == fStandaloneSymbol) {
1374: if (!sawSpace) {
1375: err
1376: .jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1377: }
1378: standalone = fString.toString();
1379: state = STATE_DONE;
1380: if (!standalone.equals("yes")
1381: && !standalone.equals("no")) {
1382: err.jspError("jsp.error.xml.sdDeclInvalid");
1383: }
1384: } else {
1385: err.jspError("jsp.error.xml.encodingDeclRequired");
1386: }
1387: break;
1388: }
1389: case STATE_STANDALONE: {
1390: if (name == fStandaloneSymbol) {
1391: if (!sawSpace) {
1392: err
1393: .jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1394: }
1395: standalone = fString.toString();
1396: state = STATE_DONE;
1397: if (!standalone.equals("yes")
1398: && !standalone.equals("no")) {
1399: err.jspError("jsp.error.xml.sdDeclInvalid");
1400: }
1401: } else {
1402: err.jspError("jsp.error.xml.encodingDeclRequired");
1403: }
1404: break;
1405: }
1406: default: {
1407: err.jspError("jsp.error.xml.noMorePseudoAttributes");
1408: }
1409: }
1410: sawSpace = skipSpaces();
1411: }
1412: // REVISIT: should we remove this error reporting?
1413: if (scanningTextDecl && state != STATE_DONE) {
1414: err.jspError("jsp.error.xml.morePseudoAttributes");
1415: }
1416:
1417: // If there is no data in the xml or text decl then we fail to report
1418: // error for version or encoding info above.
1419: if (scanningTextDecl) {
1420: if (!dataFoundForTarget && encoding == null) {
1421: err.jspError("jsp.error.xml.encodingDeclRequired");
1422: }
1423: } else {
1424: if (!dataFoundForTarget && version == null) {
1425: err.jspError("jsp.error.xml.versionInfoRequired");
1426: }
1427: }
1428:
1429: // end
1430: if (!skipChar('?')) {
1431: err.jspError("jsp.error.xml.xmlDeclUnterminated");
1432: }
1433: if (!skipChar('>')) {
1434: err.jspError("jsp.error.xml.xmlDeclUnterminated");
1435:
1436: }
1437:
1438: // fill in return array
1439: pseudoAttributeValues[0] = version;
1440: pseudoAttributeValues[1] = encoding;
1441: pseudoAttributeValues[2] = standalone;
1442: }
1443:
1444: // Adapted from:
1445: // org.apache.xerces.impl.XMLScanner.scanPseudoAttribute
1446: /**
1447: * Scans a pseudo attribute.
1448: *
1449: * @param scanningTextDecl True if scanning this pseudo-attribute for a
1450: * TextDecl; false if scanning XMLDecl. This
1451: * flag is needed to report the correct type of
1452: * error.
1453: * @param value The string to fill in with the attribute
1454: * value.
1455: *
1456: * @return The name of the attribute
1457: *
1458: * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
1459: * at the time of calling is lost.
1460: */
1461: public String scanPseudoAttribute(boolean scanningTextDecl,
1462: XMLString value) throws IOException, JasperException {
1463:
1464: String name = scanName();
1465: if (name == null) {
1466: err.jspError("jsp.error.xml.pseudoAttrNameExpected");
1467: }
1468: skipSpaces();
1469: if (!skipChar('=')) {
1470: reportFatalError(
1471: scanningTextDecl ? "jsp.error.xml.eqRequiredInTextDecl"
1472: : "jsp.error.xml.eqRequiredInXMLDecl", name);
1473: }
1474: skipSpaces();
1475: int quote = peekChar();
1476: if (quote != '\'' && quote != '"') {
1477: reportFatalError(
1478: scanningTextDecl ? "jsp.error.xml.quoteRequiredInTextDecl"
1479: : "jsp.error.xml.quoteRequiredInXMLDecl",
1480: name);
1481: }
1482: scanChar();
1483: int c = scanLiteral(quote, value);
1484: if (c != quote) {
1485: fStringBuffer2.clear();
1486: do {
1487: fStringBuffer2.append(value);
1488: if (c != -1) {
1489: if (c == '&' || c == '%' || c == '<' || c == ']') {
1490: fStringBuffer2.append((char) scanChar());
1491: } else if (XMLChar.isHighSurrogate(c)) {
1492: scanSurrogates(fStringBuffer2);
1493: } else if (XMLChar.isInvalid(c)) {
1494: String key = scanningTextDecl ? "jsp.error.xml.invalidCharInTextDecl"
1495: : "jsp.error.xml.invalidCharInXMLDecl";
1496: reportFatalError(key, Integer.toString(c, 16));
1497: scanChar();
1498: }
1499: }
1500: c = scanLiteral(quote, value);
1501: } while (c != quote);
1502: fStringBuffer2.append(value);
1503: value.setValues(fStringBuffer2);
1504: }
1505: if (!skipChar(quote)) {
1506: reportFatalError(
1507: scanningTextDecl ? "jsp.error.xml.closeQuoteMissingInTextDecl"
1508: : "jsp.error.xml.closeQuoteMissingInXMLDecl",
1509: name);
1510: }
1511:
1512: // return
1513: return name;
1514:
1515: }
1516:
1517: // Adapted from:
1518: // org.apache.xerces.impl.XMLScanner.scanPIData
1519: /**
1520: * Scans a processing data. This is needed to handle the situation
1521: * where a document starts with a processing instruction whose
1522: * target name <em>starts with</em> "xml". (e.g. xmlfoo)
1523: *
1524: * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1525: * at the time of calling is lost.
1526: *
1527: * @param target The PI target
1528: * @param data The string to fill in with the data
1529: */
1530: private void scanPIData(String target, XMLString data)
1531: throws IOException, JasperException {
1532:
1533: // check target
1534: if (target.length() == 3) {
1535: char c0 = Character.toLowerCase(target.charAt(0));
1536: char c1 = Character.toLowerCase(target.charAt(1));
1537: char c2 = Character.toLowerCase(target.charAt(2));
1538: if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
1539: err.jspError("jsp.error.xml.reservedPITarget");
1540: }
1541: }
1542:
1543: // spaces
1544: if (!skipSpaces()) {
1545: if (skipString("?>")) {
1546: // we found the end, there is no data
1547: data.clear();
1548: return;
1549: } else {
1550: // if there is data there should be some space
1551: err.jspError("jsp.error.xml.spaceRequiredInPI");
1552: }
1553: }
1554:
1555: fStringBuffer.clear();
1556: // data
1557: if (scanData("?>", fStringBuffer)) {
1558: do {
1559: int c = peekChar();
1560: if (c != -1) {
1561: if (XMLChar.isHighSurrogate(c)) {
1562: scanSurrogates(fStringBuffer);
1563: } else if (XMLChar.isInvalid(c)) {
1564: err.jspError("jsp.error.xml.invalidCharInPI",
1565: Integer.toHexString(c));
1566: scanChar();
1567: }
1568: }
1569: } while (scanData("?>", fStringBuffer));
1570: }
1571: data.setValues(fStringBuffer);
1572:
1573: }
1574:
1575: // Adapted from:
1576: // org.apache.xerces.impl.XMLScanner.scanSurrogates
1577: /**
1578: * Scans surrogates and append them to the specified buffer.
1579: * <p>
1580: * <strong>Note:</strong> This assumes the current char has already been
1581: * identified as a high surrogate.
1582: *
1583: * @param buf The StringBuffer to append the read surrogates to.
1584: * @returns True if it succeeded.
1585: */
1586: private boolean scanSurrogates(XMLStringBuffer buf)
1587: throws IOException, JasperException {
1588:
1589: int high = scanChar();
1590: int low = peekChar();
1591: if (!XMLChar.isLowSurrogate(low)) {
1592: err.jspError("jsp.error.xml.invalidCharInContent", Integer
1593: .toString(high, 16));
1594: return false;
1595: }
1596: scanChar();
1597:
1598: // convert surrogates to supplemental character
1599: int c = XMLChar.supplemental((char) high, (char) low);
1600:
1601: // supplemental character must be a valid XML character
1602: if (!XMLChar.isValid(c)) {
1603: err.jspError("jsp.error.xml.invalidCharInContent", Integer
1604: .toString(c, 16));
1605: return false;
1606: }
1607:
1608: // fill in the buffer
1609: buf.append((char) high);
1610: buf.append((char) low);
1611:
1612: return true;
1613:
1614: }
1615:
1616: // Adapted from:
1617: // org.apache.xerces.impl.XMLScanner.reportFatalError
1618: /**
1619: * Convenience function used in all XML scanners.
1620: */
1621: private void reportFatalError(String msgId, String arg)
1622: throws JasperException {
1623: err.jspError(msgId, arg);
1624: }
1625:
1626: }
|