0001: /*
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: * ====================================================================
0017: *
0018: * This software consists of voluntary contributions made by many
0019: * individuals on behalf of the Apache Software Foundation and was
0020: * originally based on software copyright (c) 1999, International
0021: * Business Machines, Inc., http://www.apache.org. For more
0022: * information on the Apache Software Foundation, please see
0023: * <http://www.apache.org/>.
0024: */
0025:
0026: package org.apache.jasper.xmlparser;
0027:
0028: import java.io.EOFException;
0029: import java.io.InputStream;
0030: import java.io.InputStreamReader;
0031: import java.io.IOException;
0032: import java.io.Reader;
0033: import java.util.Locale;
0034: import java.util.jar.JarFile;
0035:
0036: import org.apache.jasper.JasperException;
0037: import org.apache.jasper.JspCompilationContext;
0038: import org.apache.jasper.compiler.ErrorDispatcher;
0039: import org.apache.jasper.compiler.JspUtil;
0040:
0041: public class XMLEncodingDetector {
0042:
0043: private InputStream stream;
0044: private String encoding;
0045: private boolean isEncodingSetInProlog;
0046: private boolean isBomPresent;
0047: private int skip;
0048: private Boolean isBigEndian;
0049: private Reader reader;
0050:
0051: // org.apache.xerces.impl.XMLEntityManager fields
0052: public static final int DEFAULT_BUFFER_SIZE = 2048;
0053: public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
0054: private boolean fAllowJavaEncodings;
0055: private SymbolTable fSymbolTable;
0056: private XMLEncodingDetector fCurrentEntity;
0057: private int fBufferSize = DEFAULT_BUFFER_SIZE;
0058:
0059: // org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields
0060: private int lineNumber = 1;
0061: private int columnNumber = 1;
0062: private boolean literal;
0063: private char[] ch = new char[DEFAULT_BUFFER_SIZE];
0064: private int position;
0065: private int count;
0066: private boolean mayReadChunks = false;
0067:
0068: // org.apache.xerces.impl.XMLScanner fields
0069: private XMLString fString = new XMLString();
0070: private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
0071: private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
0072: private final static String fVersionSymbol = "version";
0073: private final static String fEncodingSymbol = "encoding";
0074: private final static String fStandaloneSymbol = "standalone";
0075:
0076: // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields
0077: private int fMarkupDepth = 0;
0078: private String[] fStrings = new String[3];
0079:
0080: private ErrorDispatcher err;
0081:
0082: /**
0083: * Constructor
0084: */
0085: public XMLEncodingDetector() {
0086: fSymbolTable = new SymbolTable();
0087: fCurrentEntity = this ;
0088: }
0089:
0090: /**
0091: * Autodetects the encoding of the XML document supplied by the given
0092: * input stream.
0093: *
0094: * Encoding autodetection is done according to the XML 1.0 specification,
0095: * Appendix F.1: Detection Without External Encoding Information.
0096: *
0097: * @return Two-element array, where the first element (of type
0098: * java.lang.String) contains the name of the (auto)detected encoding, and
0099: * the second element (of type java.lang.Boolean) specifies whether the
0100: * encoding was specified using the 'encoding' attribute of an XML prolog
0101: * (TRUE) or autodetected (FALSE).
0102: */
0103: public static Object[] getEncoding(String fname, JarFile jarFile,
0104: JspCompilationContext ctxt, ErrorDispatcher err)
0105: throws IOException, JasperException {
0106: InputStream inStream = JspUtil.getInputStream(fname, jarFile,
0107: ctxt, err);
0108: XMLEncodingDetector detector = new XMLEncodingDetector();
0109: Object[] ret = detector.getEncoding(inStream, err);
0110: inStream.close();
0111:
0112: return ret;
0113: }
0114:
0115: private Object[] getEncoding(InputStream in, ErrorDispatcher err)
0116: throws IOException, JasperException {
0117: this .stream = in;
0118: this .err = err;
0119: createInitialReader();
0120: scanXMLDecl();
0121:
0122: return new Object[] { this .encoding,
0123: Boolean.valueOf(this .isEncodingSetInProlog),
0124: Boolean.valueOf(this .isBomPresent),
0125: Integer.valueOf(this .skip) };
0126: }
0127:
0128: // stub method
0129: void endEntity() {
0130: }
0131:
0132: // Adapted from:
0133: // org.apache.xerces.impl.XMLEntityManager.startEntity()
0134: private void createInitialReader() throws IOException,
0135: JasperException {
0136:
0137: // wrap this stream in RewindableInputStream
0138: stream = new RewindableInputStream(stream);
0139:
0140: // perform auto-detect of encoding if necessary
0141: if (encoding == null) {
0142: // read first four bytes and determine encoding
0143: final byte[] b4 = new byte[4];
0144: int count = 0;
0145: for (; count < 4; count++) {
0146: b4[count] = (byte) stream.read();
0147: }
0148: if (count == 4) {
0149: Object[] encodingDesc = getEncodingName(b4, count);
0150: encoding = (String) (encodingDesc[0]);
0151: isBigEndian = (Boolean) (encodingDesc[1]);
0152:
0153: if (encodingDesc.length > 3) {
0154: isBomPresent = (Boolean) (encodingDesc[2]);
0155: skip = (Integer) (encodingDesc[3]);
0156: } else {
0157: isBomPresent = true;
0158: skip = (Integer) (encodingDesc[2]);
0159: }
0160:
0161: stream.reset();
0162: // Special case UTF-8 files with BOM created by Microsoft
0163: // tools. It's more efficient to consume the BOM than make
0164: // the reader perform extra checks. -Ac
0165: if (count > 2 && encoding.equals("UTF-8")) {
0166: int b0 = b4[0] & 0xFF;
0167: int b1 = b4[1] & 0xFF;
0168: int b2 = b4[2] & 0xFF;
0169: if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
0170: // ignore first three bytes...
0171: stream.skip(3);
0172: }
0173: }
0174: reader = createReader(stream, encoding, isBigEndian);
0175: } else {
0176: reader = createReader(stream, encoding, isBigEndian);
0177: }
0178: }
0179: }
0180:
0181: // Adapted from:
0182: // org.apache.xerces.impl.XMLEntityManager.createReader
0183: /**
0184: * Creates a reader capable of reading the given input stream in
0185: * the specified encoding.
0186: *
0187: * @param inputStream The input stream.
0188: * @param encoding The encoding name that the input stream is
0189: * encoded using. If the user has specified that
0190: * Java encoding names are allowed, then the
0191: * encoding name may be a Java encoding name;
0192: * otherwise, it is an ianaEncoding name.
0193: * @param isBigEndian For encodings (like uCS-4), whose names cannot
0194: * specify a byte order, this tells whether the order
0195: * is bigEndian. null means unknown or not relevant.
0196: *
0197: * @return Returns a reader.
0198: */
0199: private Reader createReader(InputStream inputStream,
0200: String encoding, Boolean isBigEndian) throws IOException,
0201: JasperException {
0202:
0203: // normalize encoding name
0204: if (encoding == null) {
0205: encoding = "UTF-8";
0206: }
0207:
0208: // try to use an optimized reader
0209: String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
0210: if (ENCODING.equals("UTF-8")) {
0211: return new UTF8Reader(inputStream, fBufferSize);
0212: }
0213: if (ENCODING.equals("US-ASCII")) {
0214: return new ASCIIReader(inputStream, fBufferSize);
0215: }
0216: if (ENCODING.equals("ISO-10646-UCS-4")) {
0217: if (isBigEndian != null) {
0218: boolean isBE = isBigEndian.booleanValue();
0219: if (isBE) {
0220: return new UCSReader(inputStream, UCSReader.UCS4BE);
0221: } else {
0222: return new UCSReader(inputStream, UCSReader.UCS4LE);
0223: }
0224: } else {
0225: err.jspError(
0226: "jsp.error.xml.encodingByteOrderUnsupported",
0227: encoding);
0228: }
0229: }
0230: if (ENCODING.equals("ISO-10646-UCS-2")) {
0231: if (isBigEndian != null) { // sould never happen with this encoding...
0232: boolean isBE = isBigEndian.booleanValue();
0233: if (isBE) {
0234: return new UCSReader(inputStream, UCSReader.UCS2BE);
0235: } else {
0236: return new UCSReader(inputStream, UCSReader.UCS2LE);
0237: }
0238: } else {
0239: err.jspError(
0240: "jsp.error.xml.encodingByteOrderUnsupported",
0241: encoding);
0242: }
0243: }
0244:
0245: // check for valid name
0246: boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
0247: boolean validJava = XMLChar.isValidJavaEncoding(encoding);
0248: if (!validIANA || (fAllowJavaEncodings && !validJava)) {
0249: err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
0250: // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
0251: // because every byte is a valid ISO Latin 1 character.
0252: // It may not translate correctly but if we failed on
0253: // the encoding anyway, then we're expecting the content
0254: // of the document to be bad. This will just prevent an
0255: // invalid UTF-8 sequence to be detected. This is only
0256: // important when continue-after-fatal-error is turned
0257: // on. -Ac
0258: encoding = "ISO-8859-1";
0259: }
0260:
0261: // try to use a Java reader
0262: String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
0263: if (javaEncoding == null) {
0264: if (fAllowJavaEncodings) {
0265: javaEncoding = encoding;
0266: } else {
0267: err.jspError("jsp.error.xml.encodingDeclInvalid",
0268: encoding);
0269: // see comment above.
0270: javaEncoding = "ISO8859_1";
0271: }
0272: }
0273: return new InputStreamReader(inputStream, javaEncoding);
0274:
0275: } // createReader(InputStream,String, Boolean): Reader
0276:
0277: // Adapted from:
0278: // org.apache.xerces.impl.XMLEntityManager.getEncodingName
0279: /**
0280: * Returns the IANA encoding name that is auto-detected from
0281: * the bytes specified, with the endian-ness of that encoding where
0282: * appropriate.
0283: *
0284: * @param b4 The first four bytes of the input.
0285: * @param count The number of bytes actually read.
0286: * @return a 2-element array: the first element, an IANA-encoding string,
0287: * the second element a Boolean which is true iff the document is big
0288: * endian, false if it's little-endian, and null if the distinction isn't
0289: * relevant.
0290: */
0291: private Object[] getEncodingName(byte[] b4, int count) {
0292:
0293: if (count < 2) {
0294: return new Object[] { "UTF-8", null, Boolean.FALSE,
0295: Integer.valueOf(0) };
0296: }
0297:
0298: // UTF-16, with BOM
0299: int b0 = b4[0] & 0xFF;
0300: int b1 = b4[1] & 0xFF;
0301: if (b0 == 0xFE && b1 == 0xFF) {
0302: // UTF-16, big-endian
0303: return new Object[] { "UTF-16BE", Boolean.TRUE,
0304: Integer.valueOf(2) };
0305: }
0306: if (b0 == 0xFF && b1 == 0xFE) {
0307: // UTF-16, little-endian
0308: return new Object[] { "UTF-16LE", Boolean.FALSE,
0309: Integer.valueOf(2) };
0310: }
0311:
0312: // default to UTF-8 if we don't have enough bytes to make a
0313: // good determination of the encoding
0314: if (count < 3) {
0315: return new Object[] { "UTF-8", null, Boolean.FALSE,
0316: Integer.valueOf(0) };
0317: }
0318:
0319: // UTF-8 with a BOM
0320: int b2 = b4[2] & 0xFF;
0321: if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
0322: return new Object[] { "UTF-8", null, Integer.valueOf(3) };
0323: }
0324:
0325: // default to UTF-8 if we don't have enough bytes to make a
0326: // good determination of the encoding
0327: if (count < 4) {
0328: return new Object[] { "UTF-8", null, Integer.valueOf(0) };
0329: }
0330:
0331: // other encodings
0332: int b3 = b4[3] & 0xFF;
0333: if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
0334: // UCS-4, big endian (1234)
0335: return new Object[] { "ISO-10646-UCS-4", new Boolean(true),
0336: Integer.valueOf(4) };
0337: }
0338: if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
0339: // UCS-4, little endian (4321)
0340: return new Object[] { "ISO-10646-UCS-4",
0341: new Boolean(false), Integer.valueOf(4) };
0342: }
0343: if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
0344: // UCS-4, unusual octet order (2143)
0345: // REVISIT: What should this be?
0346: return new Object[] { "ISO-10646-UCS-4", null,
0347: Integer.valueOf(4) };
0348: }
0349: if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
0350: // UCS-4, unusual octect order (3412)
0351: // REVISIT: What should this be?
0352: return new Object[] { "ISO-10646-UCS-4", null,
0353: Integer.valueOf(4) };
0354: }
0355: if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
0356: // UTF-16, big-endian, no BOM
0357: // (or could turn out to be UCS-2...
0358: // REVISIT: What should this be?
0359: return new Object[] { "UTF-16BE", new Boolean(true),
0360: Integer.valueOf(4) };
0361: }
0362: if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
0363: // UTF-16, little-endian, no BOM
0364: // (or could turn out to be UCS-2...
0365: return new Object[] { "UTF-16LE", new Boolean(false),
0366: Integer.valueOf(4) };
0367: }
0368: if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
0369: // EBCDIC
0370: // a la xerces1, return CP037 instead of EBCDIC here
0371: return new Object[] { "CP037", null, Integer.valueOf(4) };
0372: }
0373:
0374: // default encoding
0375: return new Object[] { "UTF-8", null, Boolean.FALSE,
0376: Integer.valueOf(0) };
0377:
0378: }
0379:
0380: // Adapted from:
0381: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal
0382: /** Returns true if the current entity being scanned is external. */
0383: public boolean isExternal() {
0384: return true;
0385: }
0386:
0387: // Adapted from:
0388: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar
0389: /**
0390: * Returns the next character on the input.
0391: * <p>
0392: * <strong>Note:</strong> The character is <em>not</em> consumed.
0393: *
0394: * @throws IOException Thrown if i/o error occurs.
0395: * @throws EOFException Thrown on end of file.
0396: */
0397: public int peekChar() throws IOException {
0398:
0399: // load more characters, if needed
0400: if (fCurrentEntity.position == fCurrentEntity.count) {
0401: load(0, true);
0402: }
0403:
0404: // peek at character
0405: int c = fCurrentEntity.ch[fCurrentEntity.position];
0406:
0407: // return peeked character
0408: if (fCurrentEntity.isExternal()) {
0409: return c != '\r' ? c : '\n';
0410: } else {
0411: return c;
0412: }
0413:
0414: } // peekChar():int
0415:
0416: // Adapted from:
0417: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar
0418: /**
0419: * Returns the next character on the input.
0420: * <p>
0421: * <strong>Note:</strong> The character is consumed.
0422: *
0423: * @throws IOException Thrown if i/o error occurs.
0424: * @throws EOFException Thrown on end of file.
0425: */
0426: public int scanChar() throws IOException {
0427:
0428: // load more characters, if needed
0429: if (fCurrentEntity.position == fCurrentEntity.count) {
0430: load(0, true);
0431: }
0432:
0433: // scan character
0434: int c = fCurrentEntity.ch[fCurrentEntity.position++];
0435: boolean external = false;
0436: if (c == '\n'
0437: || (c == '\r' && (external = fCurrentEntity
0438: .isExternal()))) {
0439: fCurrentEntity.lineNumber++;
0440: fCurrentEntity.columnNumber = 1;
0441: if (fCurrentEntity.position == fCurrentEntity.count) {
0442: fCurrentEntity.ch[0] = (char) c;
0443: load(1, false);
0444: }
0445: if (c == '\r' && external) {
0446: if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
0447: fCurrentEntity.position--;
0448: }
0449: c = '\n';
0450: }
0451: }
0452:
0453: // return character that was scanned
0454: fCurrentEntity.columnNumber++;
0455: return c;
0456:
0457: }
0458:
0459: // Adapted from:
0460: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName
0461: /**
0462: * Returns a string matching the Name production appearing immediately
0463: * on the input as a symbol, or null if no Name string is present.
0464: * <p>
0465: * <strong>Note:</strong> The Name characters are consumed.
0466: * <p>
0467: * <strong>Note:</strong> The string returned must be a symbol. The
0468: * SymbolTable can be used for this purpose.
0469: *
0470: * @throws IOException Thrown if i/o error occurs.
0471: * @throws EOFException Thrown on end of file.
0472: *
0473: * @see SymbolTable
0474: * @see XMLChar#isName
0475: * @see XMLChar#isNameStart
0476: */
0477: public String scanName() throws IOException {
0478:
0479: // load more characters, if needed
0480: if (fCurrentEntity.position == fCurrentEntity.count) {
0481: load(0, true);
0482: }
0483:
0484: // scan name
0485: int offset = fCurrentEntity.position;
0486: if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
0487: if (++fCurrentEntity.position == fCurrentEntity.count) {
0488: fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
0489: offset = 0;
0490: if (load(1, false)) {
0491: fCurrentEntity.columnNumber++;
0492: String symbol = fSymbolTable.addSymbol(
0493: fCurrentEntity.ch, 0, 1);
0494: return symbol;
0495: }
0496: }
0497: while (XMLChar
0498: .isName(fCurrentEntity.ch[fCurrentEntity.position])) {
0499: if (++fCurrentEntity.position == fCurrentEntity.count) {
0500: int length = fCurrentEntity.position - offset;
0501: if (length == fBufferSize) {
0502: // bad luck we have to resize our buffer
0503: char[] tmp = new char[fBufferSize * 2];
0504: System.arraycopy(fCurrentEntity.ch, offset,
0505: tmp, 0, length);
0506: fCurrentEntity.ch = tmp;
0507: fBufferSize *= 2;
0508: } else {
0509: System.arraycopy(fCurrentEntity.ch, offset,
0510: fCurrentEntity.ch, 0, length);
0511: }
0512: offset = 0;
0513: if (load(length, false)) {
0514: break;
0515: }
0516: }
0517: }
0518: }
0519: int length = fCurrentEntity.position - offset;
0520: fCurrentEntity.columnNumber += length;
0521:
0522: // return name
0523: String symbol = null;
0524: if (length > 0) {
0525: symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset,
0526: length);
0527: }
0528: return symbol;
0529:
0530: }
0531:
0532: // Adapted from:
0533: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral
0534: /**
0535: * Scans a range of attribute value data, setting the fields of the
0536: * XMLString structure, appropriately.
0537: * <p>
0538: * <strong>Note:</strong> The characters are consumed.
0539: * <p>
0540: * <strong>Note:</strong> This method does not guarantee to return
0541: * the longest run of attribute value data. This method may return
0542: * before the quote character due to reaching the end of the input
0543: * buffer or any other reason.
0544: * <p>
0545: * <strong>Note:</strong> The fields contained in the XMLString
0546: * structure are not guaranteed to remain valid upon subsequent calls
0547: * to the entity scanner. Therefore, the caller is responsible for
0548: * immediately using the returned character data or making a copy of
0549: * the character data.
0550: *
0551: * @param quote The quote character that signifies the end of the
0552: * attribute value data.
0553: * @param content The content structure to fill.
0554: *
0555: * @return Returns the next character on the input, if known. This
0556: * value may be -1 but this does <em>note</em> designate
0557: * end of file.
0558: *
0559: * @throws IOException Thrown if i/o error occurs.
0560: * @throws EOFException Thrown on end of file.
0561: */
0562: public int scanLiteral(int quote, XMLString content)
0563: throws IOException {
0564:
0565: // load more characters, if needed
0566: if (fCurrentEntity.position == fCurrentEntity.count) {
0567: load(0, true);
0568: } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0569: fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
0570: load(1, false);
0571: fCurrentEntity.position = 0;
0572: }
0573:
0574: // normalize newlines
0575: int offset = fCurrentEntity.position;
0576: int c = fCurrentEntity.ch[offset];
0577: int newlines = 0;
0578: boolean external = fCurrentEntity.isExternal();
0579: if (c == '\n' || (c == '\r' && external)) {
0580: do {
0581: c = fCurrentEntity.ch[fCurrentEntity.position++];
0582: if (c == '\r' && external) {
0583: newlines++;
0584: fCurrentEntity.lineNumber++;
0585: fCurrentEntity.columnNumber = 1;
0586: if (fCurrentEntity.position == fCurrentEntity.count) {
0587: offset = 0;
0588: fCurrentEntity.position = newlines;
0589: if (load(newlines, false)) {
0590: break;
0591: }
0592: }
0593: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0594: fCurrentEntity.position++;
0595: offset++;
0596: }
0597: /*** NEWLINE NORMALIZATION ***/
0598: else {
0599: newlines++;
0600: }
0601: /***/
0602: } else if (c == '\n') {
0603: newlines++;
0604: fCurrentEntity.lineNumber++;
0605: fCurrentEntity.columnNumber = 1;
0606: if (fCurrentEntity.position == fCurrentEntity.count) {
0607: offset = 0;
0608: fCurrentEntity.position = newlines;
0609: if (load(newlines, false)) {
0610: break;
0611: }
0612: }
0613: /*** NEWLINE NORMALIZATION ***
0614: if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
0615: && external) {
0616: fCurrentEntity.position++;
0617: offset++;
0618: }
0619: /***/
0620: } else {
0621: fCurrentEntity.position--;
0622: break;
0623: }
0624: } while (fCurrentEntity.position < fCurrentEntity.count - 1);
0625: for (int i = offset; i < fCurrentEntity.position; i++) {
0626: fCurrentEntity.ch[i] = '\n';
0627: }
0628: int length = fCurrentEntity.position - offset;
0629: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0630: content.setValues(fCurrentEntity.ch, offset, length);
0631: return -1;
0632: }
0633: }
0634:
0635: // scan literal value
0636: while (fCurrentEntity.position < fCurrentEntity.count) {
0637: c = fCurrentEntity.ch[fCurrentEntity.position++];
0638: if ((c == quote && (!fCurrentEntity.literal || external))
0639: || c == '%' || !XMLChar.isContent(c)) {
0640: fCurrentEntity.position--;
0641: break;
0642: }
0643: }
0644: int length = fCurrentEntity.position - offset;
0645: fCurrentEntity.columnNumber += length - newlines;
0646: content.setValues(fCurrentEntity.ch, offset, length);
0647:
0648: // return next character
0649: if (fCurrentEntity.position != fCurrentEntity.count) {
0650: c = fCurrentEntity.ch[fCurrentEntity.position];
0651: // NOTE: We don't want to accidentally signal the
0652: // end of the literal if we're expanding an
0653: // entity appearing in the literal. -Ac
0654: if (c == quote && fCurrentEntity.literal) {
0655: c = -1;
0656: }
0657: } else {
0658: c = -1;
0659: }
0660: return c;
0661:
0662: }
0663:
0664: /**
0665: * Scans a range of character data up to the specified delimiter,
0666: * setting the fields of the XMLString structure, appropriately.
0667: * <p>
0668: * <strong>Note:</strong> The characters are consumed.
0669: * <p>
0670: * <strong>Note:</strong> This assumes that the internal buffer is
0671: * at least the same size, or bigger, than the length of the delimiter
0672: * and that the delimiter contains at least one character.
0673: * <p>
0674: * <strong>Note:</strong> This method does not guarantee to return
0675: * the longest run of character data. This method may return before
0676: * the delimiter due to reaching the end of the input buffer or any
0677: * other reason.
0678: * <p>
0679: * <strong>Note:</strong> The fields contained in the XMLString
0680: * structure are not guaranteed to remain valid upon subsequent calls
0681: * to the entity scanner. Therefore, the caller is responsible for
0682: * immediately using the returned character data or making a copy of
0683: * the character data.
0684: *
0685: * @param delimiter The string that signifies the end of the character
0686: * data to be scanned.
0687: * @param buffer The data structure to fill.
0688: *
0689: * @return Returns true if there is more data to scan, false otherwise.
0690: *
0691: * @throws IOException Thrown if i/o error occurs.
0692: * @throws EOFException Thrown on end of file.
0693: */
0694: public boolean scanData(String delimiter, XMLStringBuffer buffer)
0695: throws IOException {
0696:
0697: boolean done = false;
0698: int delimLen = delimiter.length();
0699: char charAt0 = delimiter.charAt(0);
0700: boolean external = fCurrentEntity.isExternal();
0701: do {
0702:
0703: // load more characters, if needed
0704:
0705: if (fCurrentEntity.position == fCurrentEntity.count) {
0706: load(0, true);
0707: } else if (fCurrentEntity.position >= fCurrentEntity.count
0708: - delimLen) {
0709: System.arraycopy(fCurrentEntity.ch,
0710: fCurrentEntity.position, fCurrentEntity.ch, 0,
0711: fCurrentEntity.count - fCurrentEntity.position);
0712: load(fCurrentEntity.count - fCurrentEntity.position,
0713: false);
0714: fCurrentEntity.position = 0;
0715: }
0716: if (fCurrentEntity.position >= fCurrentEntity.count
0717: - delimLen) {
0718: // something must be wrong with the input: e.g., file ends an
0719: // unterminated comment
0720: int length = fCurrentEntity.count
0721: - fCurrentEntity.position;
0722: buffer.append(fCurrentEntity.ch,
0723: fCurrentEntity.position, length);
0724: fCurrentEntity.columnNumber += fCurrentEntity.count;
0725: fCurrentEntity.position = fCurrentEntity.count;
0726: load(0, true);
0727: return false;
0728: }
0729:
0730: // normalize newlines
0731: int offset = fCurrentEntity.position;
0732: int c = fCurrentEntity.ch[offset];
0733: int newlines = 0;
0734: if (c == '\n' || (c == '\r' && external)) {
0735: do {
0736: c = fCurrentEntity.ch[fCurrentEntity.position++];
0737: if (c == '\r' && external) {
0738: newlines++;
0739: fCurrentEntity.lineNumber++;
0740: fCurrentEntity.columnNumber = 1;
0741: if (fCurrentEntity.position == fCurrentEntity.count) {
0742: offset = 0;
0743: fCurrentEntity.position = newlines;
0744: if (load(newlines, false)) {
0745: break;
0746: }
0747: }
0748: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0749: fCurrentEntity.position++;
0750: offset++;
0751: }
0752: /*** NEWLINE NORMALIZATION ***/
0753: else {
0754: newlines++;
0755: }
0756: } else if (c == '\n') {
0757: newlines++;
0758: fCurrentEntity.lineNumber++;
0759: fCurrentEntity.columnNumber = 1;
0760: if (fCurrentEntity.position == fCurrentEntity.count) {
0761: offset = 0;
0762: fCurrentEntity.position = newlines;
0763: fCurrentEntity.count = newlines;
0764: if (load(newlines, false)) {
0765: break;
0766: }
0767: }
0768: } else {
0769: fCurrentEntity.position--;
0770: break;
0771: }
0772: } while (fCurrentEntity.position < fCurrentEntity.count - 1);
0773: for (int i = offset; i < fCurrentEntity.position; i++) {
0774: fCurrentEntity.ch[i] = '\n';
0775: }
0776: int length = fCurrentEntity.position - offset;
0777: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0778: buffer.append(fCurrentEntity.ch, offset, length);
0779: return true;
0780: }
0781: }
0782:
0783: // iterate over buffer looking for delimiter
0784: OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
0785: c = fCurrentEntity.ch[fCurrentEntity.position++];
0786: if (c == charAt0) {
0787: // looks like we just hit the delimiter
0788: int delimOffset = fCurrentEntity.position - 1;
0789: for (int i = 1; i < delimLen; i++) {
0790: if (fCurrentEntity.position == fCurrentEntity.count) {
0791: fCurrentEntity.position -= i;
0792: break OUTER;
0793: }
0794: c = fCurrentEntity.ch[fCurrentEntity.position++];
0795: if (delimiter.charAt(i) != c) {
0796: fCurrentEntity.position--;
0797: break;
0798: }
0799: }
0800: if (fCurrentEntity.position == delimOffset
0801: + delimLen) {
0802: done = true;
0803: break;
0804: }
0805: } else if (c == '\n' || (external && c == '\r')) {
0806: fCurrentEntity.position--;
0807: break;
0808: } else if (XMLChar.isInvalid(c)) {
0809: fCurrentEntity.position--;
0810: int length = fCurrentEntity.position - offset;
0811: fCurrentEntity.columnNumber += length - newlines;
0812: buffer.append(fCurrentEntity.ch, offset, length);
0813: return true;
0814: }
0815: }
0816: int length = fCurrentEntity.position - offset;
0817: fCurrentEntity.columnNumber += length - newlines;
0818: if (done) {
0819: length -= delimLen;
0820: }
0821: buffer.append(fCurrentEntity.ch, offset, length);
0822:
0823: // return true if string was skipped
0824: } while (!done);
0825: return !done;
0826:
0827: }
0828:
0829: // Adapted from:
0830: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar
0831: /**
0832: * Skips a character appearing immediately on the input.
0833: * <p>
0834: * <strong>Note:</strong> The character is consumed only if it matches
0835: * the specified character.
0836: *
0837: * @param c The character to skip.
0838: *
0839: * @return Returns true if the character was skipped.
0840: *
0841: * @throws IOException Thrown if i/o error occurs.
0842: * @throws EOFException Thrown on end of file.
0843: */
0844: public boolean skipChar(int c) throws IOException {
0845:
0846: // load more characters, if needed
0847: if (fCurrentEntity.position == fCurrentEntity.count) {
0848: load(0, true);
0849: }
0850:
0851: // skip character
0852: int cc = fCurrentEntity.ch[fCurrentEntity.position];
0853: if (cc == c) {
0854: fCurrentEntity.position++;
0855: if (c == '\n') {
0856: fCurrentEntity.lineNumber++;
0857: fCurrentEntity.columnNumber = 1;
0858: } else {
0859: fCurrentEntity.columnNumber++;
0860: }
0861: return true;
0862: } else if (c == '\n' && cc == '\r'
0863: && fCurrentEntity.isExternal()) {
0864: // handle newlines
0865: if (fCurrentEntity.position == fCurrentEntity.count) {
0866: fCurrentEntity.ch[0] = (char) cc;
0867: load(1, false);
0868: }
0869: fCurrentEntity.position++;
0870: if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0871: fCurrentEntity.position++;
0872: }
0873: fCurrentEntity.lineNumber++;
0874: fCurrentEntity.columnNumber = 1;
0875: return true;
0876: }
0877:
0878: // character was not skipped
0879: return false;
0880:
0881: }
0882:
0883: // Adapted from:
0884: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces
0885: /**
0886: * Skips space characters appearing immediately on the input.
0887: * <p>
0888: * <strong>Note:</strong> The characters are consumed only if they are
0889: * space characters.
0890: *
0891: * @return Returns true if at least one space character was skipped.
0892: *
0893: * @throws IOException Thrown if i/o error occurs.
0894: * @throws EOFException Thrown on end of file.
0895: *
0896: * @see XMLChar#isSpace
0897: */
0898: public boolean skipSpaces() throws IOException {
0899:
0900: // load more characters, if needed
0901: if (fCurrentEntity.position == fCurrentEntity.count) {
0902: load(0, true);
0903: }
0904:
0905: // skip spaces
0906: int c = fCurrentEntity.ch[fCurrentEntity.position];
0907: if (XMLChar.isSpace(c)) {
0908: boolean external = fCurrentEntity.isExternal();
0909: do {
0910: boolean entityChanged = false;
0911: // handle newlines
0912: if (c == '\n' || (external && c == '\r')) {
0913: fCurrentEntity.lineNumber++;
0914: fCurrentEntity.columnNumber = 1;
0915: if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0916: fCurrentEntity.ch[0] = (char) c;
0917: entityChanged = load(1, true);
0918: if (!entityChanged)
0919: // the load change the position to be 1,
0920: // need to restore it when entity not changed
0921: fCurrentEntity.position = 0;
0922: }
0923: if (c == '\r' && external) {
0924: // REVISIT: Does this need to be updated to fix the
0925: // #x0D ^#x0A newline normalization problem? -Ac
0926: if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
0927: fCurrentEntity.position--;
0928: }
0929: }
0930: /*** NEWLINE NORMALIZATION ***
0931: else {
0932: if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
0933: && external) {
0934: fCurrentEntity.position++;
0935: }
0936: }
0937: /***/
0938: } else {
0939: fCurrentEntity.columnNumber++;
0940: }
0941: // load more characters, if needed
0942: if (!entityChanged)
0943: fCurrentEntity.position++;
0944: if (fCurrentEntity.position == fCurrentEntity.count) {
0945: load(0, true);
0946: }
0947: } while (XMLChar
0948: .isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
0949: return true;
0950: }
0951:
0952: // no spaces were found
0953: return false;
0954:
0955: }
0956:
0957: /**
0958: * Skips the specified string appearing immediately on the input.
0959: * <p>
0960: * <strong>Note:</strong> The characters are consumed only if they are
0961: * space characters.
0962: *
0963: * @param s The string to skip.
0964: *
0965: * @return Returns true if the string was skipped.
0966: *
0967: * @throws IOException Thrown if i/o error occurs.
0968: * @throws EOFException Thrown on end of file.
0969: */
0970: public boolean skipString(String s) throws IOException {
0971:
0972: // load more characters, if needed
0973: if (fCurrentEntity.position == fCurrentEntity.count) {
0974: load(0, true);
0975: }
0976:
0977: // skip string
0978: final int length = s.length();
0979: for (int i = 0; i < length; i++) {
0980: char c = fCurrentEntity.ch[fCurrentEntity.position++];
0981: if (c != s.charAt(i)) {
0982: fCurrentEntity.position -= i + 1;
0983: return false;
0984: }
0985: if (i < length - 1
0986: && fCurrentEntity.position == fCurrentEntity.count) {
0987: System.arraycopy(fCurrentEntity.ch,
0988: fCurrentEntity.count - i - 1,
0989: fCurrentEntity.ch, 0, i + 1);
0990: // REVISIT: Can a string to be skipped cross an
0991: // entity boundary? -Ac
0992: if (load(i + 1, false)) {
0993: fCurrentEntity.position -= i + 1;
0994: return false;
0995: }
0996: }
0997: }
0998: fCurrentEntity.columnNumber += length;
0999: return true;
1000:
1001: }
1002:
1003: // Adapted from:
1004: // org.apache.xerces.impl.XMLEntityManager.EntityScanner.load
1005: /**
1006: * Loads a chunk of text.
1007: *
1008: * @param offset The offset into the character buffer to
1009: * read the next batch of characters.
1010: * @param changeEntity True if the load should change entities
1011: * at the end of the entity, otherwise leave
1012: * the current entity in place and the entity
1013: * boundary will be signaled by the return
1014: * value.
1015: *
1016: * @returns Returns true if the entity changed as a result of this
1017: * load operation.
1018: */
1019: final boolean load(int offset, boolean changeEntity)
1020: throws IOException {
1021:
1022: // read characters
1023: int length = fCurrentEntity.mayReadChunks ? (fCurrentEntity.ch.length - offset)
1024: : (DEFAULT_XMLDECL_BUFFER_SIZE);
1025: int count = fCurrentEntity.reader.read(fCurrentEntity.ch,
1026: offset, length);
1027:
1028: // reset count and position
1029: boolean entityChanged = false;
1030: if (count != -1) {
1031: if (count != 0) {
1032: fCurrentEntity.count = count + offset;
1033: fCurrentEntity.position = offset;
1034: }
1035: }
1036:
1037: // end of this entity
1038: else {
1039: fCurrentEntity.count = offset;
1040: fCurrentEntity.position = offset;
1041: entityChanged = true;
1042: if (changeEntity) {
1043: endEntity();
1044: if (fCurrentEntity == null) {
1045: throw new EOFException();
1046: }
1047: // handle the trailing edges
1048: if (fCurrentEntity.position == fCurrentEntity.count) {
1049: load(0, false);
1050: }
1051: }
1052: }
1053:
1054: return entityChanged;
1055:
1056: }
1057:
1058: // Adapted from:
1059: // org.apache.xerces.impl.XMLEntityManager.RewindableInputStream
1060: /**
1061: * This class wraps the byte inputstreams we're presented with.
1062: * We need it because java.io.InputStreams don't provide
1063: * functionality to reread processed bytes, and they have a habit
1064: * of reading more than one character when you call their read()
1065: * methods. This means that, once we discover the true (declared)
1066: * encoding of a document, we can neither backtrack to read the
1067: * whole doc again nor start reading where we are with a new
1068: * reader.
1069: *
1070: * This class allows rewinding an inputStream by allowing a mark
1071: * to be set, and the stream reset to that position. <strong>The
1072: * class assumes that it needs to read one character per
1073: * invocation when it's read() method is inovked, but uses the
1074: * underlying InputStream's read(char[], offset length) method--it
1075: * won't buffer data read this way!</strong>
1076: *
1077: * @author Neil Graham, IBM
1078: * @author Glenn Marcy, IBM
1079: */
1080: private final class RewindableInputStream extends InputStream {
1081:
1082: private InputStream fInputStream;
1083: private byte[] fData;
1084: private int fStartOffset;
1085: private int fEndOffset;
1086: private int fOffset;
1087: private int fLength;
1088: private int fMark;
1089:
1090: public RewindableInputStream(InputStream is) {
1091: fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
1092: fInputStream = is;
1093: fStartOffset = 0;
1094: fEndOffset = -1;
1095: fOffset = 0;
1096: fLength = 0;
1097: fMark = 0;
1098: }
1099:
1100: public void setStartOffset(int offset) {
1101: fStartOffset = offset;
1102: }
1103:
1104: public void rewind() {
1105: fOffset = fStartOffset;
1106: }
1107:
1108: public int read() throws IOException {
1109: int b = 0;
1110: if (fOffset < fLength) {
1111: return fData[fOffset++] & 0xff;
1112: }
1113: if (fOffset == fEndOffset) {
1114: return -1;
1115: }
1116: if (fOffset == fData.length) {
1117: byte[] newData = new byte[fOffset << 1];
1118: System.arraycopy(fData, 0, newData, 0, fOffset);
1119: fData = newData;
1120: }
1121: b = fInputStream.read();
1122: if (b == -1) {
1123: fEndOffset = fOffset;
1124: return -1;
1125: }
1126: fData[fLength++] = (byte) b;
1127: fOffset++;
1128: return b & 0xff;
1129: }
1130:
1131: public int read(byte[] b, int off, int len) throws IOException {
1132: int bytesLeft = fLength - fOffset;
1133: if (bytesLeft == 0) {
1134: if (fOffset == fEndOffset) {
1135: return -1;
1136: }
1137: // better get some more for the voracious reader...
1138: if (fCurrentEntity.mayReadChunks) {
1139: return fInputStream.read(b, off, len);
1140: }
1141: int returnedVal = read();
1142: if (returnedVal == -1) {
1143: fEndOffset = fOffset;
1144: return -1;
1145: }
1146: b[off] = (byte) returnedVal;
1147: return 1;
1148: }
1149: if (len < bytesLeft) {
1150: if (len <= 0) {
1151: return 0;
1152: }
1153: } else {
1154: len = bytesLeft;
1155: }
1156: if (b != null) {
1157: System.arraycopy(fData, fOffset, b, off, len);
1158: }
1159: fOffset += len;
1160: return len;
1161: }
1162:
1163: public long skip(long n) throws IOException {
1164: int bytesLeft;
1165: if (n <= 0) {
1166: return 0;
1167: }
1168: bytesLeft = fLength - fOffset;
1169: if (bytesLeft == 0) {
1170: if (fOffset == fEndOffset) {
1171: return 0;
1172: }
1173: return fInputStream.skip(n);
1174: }
1175: if (n <= bytesLeft) {
1176: fOffset += n;
1177: return n;
1178: }
1179: fOffset += bytesLeft;
1180: if (fOffset == fEndOffset) {
1181: return bytesLeft;
1182: }
1183: n -= bytesLeft;
1184: /*
1185: * In a manner of speaking, when this class isn't permitting more
1186: * than one byte at a time to be read, it is "blocking". The
1187: * available() method should indicate how much can be read without
1188: * blocking, so while we're in this mode, it should only indicate
1189: * that bytes in its buffer are available; otherwise, the result of
1190: * available() on the underlying InputStream is appropriate.
1191: */
1192: return fInputStream.skip(n) + bytesLeft;
1193: }
1194:
1195: public int available() throws IOException {
1196: int bytesLeft = fLength - fOffset;
1197: if (bytesLeft == 0) {
1198: if (fOffset == fEndOffset) {
1199: return -1;
1200: }
1201: return fCurrentEntity.mayReadChunks ? fInputStream
1202: .available() : 0;
1203: }
1204: return bytesLeft;
1205: }
1206:
1207: public void mark(int howMuch) {
1208: fMark = fOffset;
1209: }
1210:
1211: public void reset() {
1212: fOffset = fMark;
1213: }
1214:
1215: public boolean markSupported() {
1216: return true;
1217: }
1218:
1219: public void close() throws IOException {
1220: if (fInputStream != null) {
1221: fInputStream.close();
1222: fInputStream = null;
1223: }
1224: }
1225: } // end of RewindableInputStream class
1226:
1227: // Adapted from:
1228: // org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch
1229: private void scanXMLDecl() throws IOException, JasperException {
1230:
1231: if (skipString("<?xml")) {
1232: fMarkupDepth++;
1233: // NOTE: special case where document starts with a PI
1234: // whose name starts with "xml" (e.g. "xmlfoo")
1235: if (XMLChar.isName(peekChar())) {
1236: fStringBuffer.clear();
1237: fStringBuffer.append("xml");
1238: while (XMLChar.isName(peekChar())) {
1239: fStringBuffer.append((char) scanChar());
1240: }
1241: String target = fSymbolTable.addSymbol(
1242: fStringBuffer.ch, fStringBuffer.offset,
1243: fStringBuffer.length);
1244: scanPIData(target, fString);
1245: }
1246:
1247: // standard XML declaration
1248: else {
1249: scanXMLDeclOrTextDecl(false);
1250: }
1251: }
1252: }
1253:
1254: // Adapted from:
1255: // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl
1256: /**
1257: * Scans an XML or text declaration.
1258: * <p>
1259: * <pre>
1260: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1261: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1262: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
1263: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1264: * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1265: * | ('"' ('yes' | 'no') '"'))
1266: *
1267: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1268: * </pre>
1269: *
1270: * @param scanningTextDecl True if a text declaration is to
1271: * be scanned instead of an XML
1272: * declaration.
1273: */
1274: private void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
1275: throws IOException, JasperException {
1276:
1277: // scan decl
1278: scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
1279: fMarkupDepth--;
1280:
1281: // pseudo-attribute values
1282: String encodingPseudoAttr = fStrings[1];
1283:
1284: // set encoding on reader
1285: if (encodingPseudoAttr != null) {
1286: isEncodingSetInProlog = true;
1287: encoding = encodingPseudoAttr;
1288: }
1289: }
1290:
1291: // Adapted from:
1292: // org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl
1293: /**
1294: * Scans an XML or text declaration.
1295: * <p>
1296: * <pre>
1297: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1298: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1299: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
1300: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1301: * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1302: * | ('"' ('yes' | 'no') '"'))
1303: *
1304: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1305: * </pre>
1306: *
1307: * @param scanningTextDecl True if a text declaration is to
1308: * be scanned instead of an XML
1309: * declaration.
1310: * @param pseudoAttributeValues An array of size 3 to return the version,
1311: * encoding and standalone pseudo attribute values
1312: * (in that order).
1313: *
1314: * <strong>Note:</strong> This method uses fString, anything in it
1315: * at the time of calling is lost.
1316: */
1317: private void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
1318: String[] pseudoAttributeValues) throws IOException,
1319: JasperException {
1320:
1321: // pseudo-attribute values
1322: String version = null;
1323: String encoding = null;
1324: String standalone = null;
1325:
1326: // scan pseudo-attributes
1327: final int STATE_VERSION = 0;
1328: final int STATE_ENCODING = 1;
1329: final int STATE_STANDALONE = 2;
1330: final int STATE_DONE = 3;
1331: int state = STATE_VERSION;
1332:
1333: boolean dataFoundForTarget = false;
1334: boolean sawSpace = skipSpaces();
1335: while (peekChar() != '?') {
1336: dataFoundForTarget = true;
1337: String name = scanPseudoAttribute(scanningTextDecl, fString);
1338: switch (state) {
1339: case STATE_VERSION: {
1340: if (name == fVersionSymbol) {
1341: if (!sawSpace) {
1342: reportFatalError(
1343: scanningTextDecl ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
1344: : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
1345: null);
1346: }
1347: version = fString.toString();
1348: state = STATE_ENCODING;
1349: if (!version.equals("1.0")) {
1350: // REVISIT: XML REC says we should throw an error
1351: // in such cases.
1352: // some may object the throwing of fatalError.
1353: err.jspError(
1354: "jsp.error.xml.versionNotSupported",
1355: version);
1356: }
1357: } else if (name == fEncodingSymbol) {
1358: if (!scanningTextDecl) {
1359: err
1360: .jspError("jsp.error.xml.versionInfoRequired");
1361: }
1362: if (!sawSpace) {
1363: reportFatalError(
1364: scanningTextDecl ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1365: : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1366: null);
1367: }
1368: encoding = fString.toString();
1369: state = scanningTextDecl ? STATE_DONE
1370: : STATE_STANDALONE;
1371: } else {
1372: if (scanningTextDecl) {
1373: err
1374: .jspError("jsp.error.xml.encodingDeclRequired");
1375: } else {
1376: err
1377: .jspError("jsp.error.xml.versionInfoRequired");
1378: }
1379: }
1380: break;
1381: }
1382: case STATE_ENCODING: {
1383: if (name == fEncodingSymbol) {
1384: if (!sawSpace) {
1385: reportFatalError(
1386: scanningTextDecl ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1387: : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1388: null);
1389: }
1390: encoding = fString.toString();
1391: state = scanningTextDecl ? STATE_DONE
1392: : STATE_STANDALONE;
1393: // TODO: check encoding name; set encoding on
1394: // entity scanner
1395: } else if (!scanningTextDecl
1396: && name == fStandaloneSymbol) {
1397: if (!sawSpace) {
1398: err
1399: .jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1400: }
1401: standalone = fString.toString();
1402: state = STATE_DONE;
1403: if (!standalone.equals("yes")
1404: && !standalone.equals("no")) {
1405: err.jspError("jsp.error.xml.sdDeclInvalid");
1406: }
1407: } else {
1408: err.jspError("jsp.error.xml.encodingDeclRequired");
1409: }
1410: break;
1411: }
1412: case STATE_STANDALONE: {
1413: if (name == fStandaloneSymbol) {
1414: if (!sawSpace) {
1415: err
1416: .jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1417: }
1418: standalone = fString.toString();
1419: state = STATE_DONE;
1420: if (!standalone.equals("yes")
1421: && !standalone.equals("no")) {
1422: err.jspError("jsp.error.xml.sdDeclInvalid");
1423: }
1424: } else {
1425: err.jspError("jsp.error.xml.encodingDeclRequired");
1426: }
1427: break;
1428: }
1429: default: {
1430: err.jspError("jsp.error.xml.noMorePseudoAttributes");
1431: }
1432: }
1433: sawSpace = skipSpaces();
1434: }
1435: // REVISIT: should we remove this error reporting?
1436: if (scanningTextDecl && state != STATE_DONE) {
1437: err.jspError("jsp.error.xml.morePseudoAttributes");
1438: }
1439:
1440: // If there is no data in the xml or text decl then we fail to report
1441: // error for version or encoding info above.
1442: if (scanningTextDecl) {
1443: if (!dataFoundForTarget && encoding == null) {
1444: err.jspError("jsp.error.xml.encodingDeclRequired");
1445: }
1446: } else {
1447: if (!dataFoundForTarget && version == null) {
1448: err.jspError("jsp.error.xml.versionInfoRequired");
1449: }
1450: }
1451:
1452: // end
1453: if (!skipChar('?')) {
1454: err.jspError("jsp.error.xml.xmlDeclUnterminated");
1455: }
1456: if (!skipChar('>')) {
1457: err.jspError("jsp.error.xml.xmlDeclUnterminated");
1458:
1459: }
1460:
1461: // fill in return array
1462: pseudoAttributeValues[0] = version;
1463: pseudoAttributeValues[1] = encoding;
1464: pseudoAttributeValues[2] = standalone;
1465: }
1466:
1467: // Adapted from:
1468: // org.apache.xerces.impl.XMLScanner.scanPseudoAttribute
1469: /**
1470: * Scans a pseudo attribute.
1471: *
1472: * @param scanningTextDecl True if scanning this pseudo-attribute for a
1473: * TextDecl; false if scanning XMLDecl. This
1474: * flag is needed to report the correct type of
1475: * error.
1476: * @param value The string to fill in with the attribute
1477: * value.
1478: *
1479: * @return The name of the attribute
1480: *
1481: * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
1482: * at the time of calling is lost.
1483: */
1484: public String scanPseudoAttribute(boolean scanningTextDecl,
1485: XMLString value) throws IOException, JasperException {
1486:
1487: String name = scanName();
1488: if (name == null) {
1489: err.jspError("jsp.error.xml.pseudoAttrNameExpected");
1490: }
1491: skipSpaces();
1492: if (!skipChar('=')) {
1493: reportFatalError(
1494: scanningTextDecl ? "jsp.error.xml.eqRequiredInTextDecl"
1495: : "jsp.error.xml.eqRequiredInXMLDecl", name);
1496: }
1497: skipSpaces();
1498: int quote = peekChar();
1499: if (quote != '\'' && quote != '"') {
1500: reportFatalError(
1501: scanningTextDecl ? "jsp.error.xml.quoteRequiredInTextDecl"
1502: : "jsp.error.xml.quoteRequiredInXMLDecl",
1503: name);
1504: }
1505: scanChar();
1506: int c = scanLiteral(quote, value);
1507: if (c != quote) {
1508: fStringBuffer2.clear();
1509: do {
1510: fStringBuffer2.append(value);
1511: if (c != -1) {
1512: if (c == '&' || c == '%' || c == '<' || c == ']') {
1513: fStringBuffer2.append((char) scanChar());
1514: } else if (XMLChar.isHighSurrogate(c)) {
1515: scanSurrogates(fStringBuffer2);
1516: } else if (XMLChar.isInvalid(c)) {
1517: String key = scanningTextDecl ? "jsp.error.xml.invalidCharInTextDecl"
1518: : "jsp.error.xml.invalidCharInXMLDecl";
1519: reportFatalError(key, Integer.toString(c, 16));
1520: scanChar();
1521: }
1522: }
1523: c = scanLiteral(quote, value);
1524: } while (c != quote);
1525: fStringBuffer2.append(value);
1526: value.setValues(fStringBuffer2);
1527: }
1528: if (!skipChar(quote)) {
1529: reportFatalError(
1530: scanningTextDecl ? "jsp.error.xml.closeQuoteMissingInTextDecl"
1531: : "jsp.error.xml.closeQuoteMissingInXMLDecl",
1532: name);
1533: }
1534:
1535: // return
1536: return name;
1537:
1538: }
1539:
1540: // Adapted from:
1541: // org.apache.xerces.impl.XMLScanner.scanPIData
1542: /**
1543: * Scans a processing data. This is needed to handle the situation
1544: * where a document starts with a processing instruction whose
1545: * target name <em>starts with</em> "xml". (e.g. xmlfoo)
1546: *
1547: * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1548: * at the time of calling is lost.
1549: *
1550: * @param target The PI target
1551: * @param data The string to fill in with the data
1552: */
1553: private void scanPIData(String target, XMLString data)
1554: throws IOException, JasperException {
1555:
1556: // check target
1557: if (target.length() == 3) {
1558: char c0 = Character.toLowerCase(target.charAt(0));
1559: char c1 = Character.toLowerCase(target.charAt(1));
1560: char c2 = Character.toLowerCase(target.charAt(2));
1561: if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
1562: err.jspError("jsp.error.xml.reservedPITarget");
1563: }
1564: }
1565:
1566: // spaces
1567: if (!skipSpaces()) {
1568: if (skipString("?>")) {
1569: // we found the end, there is no data
1570: data.clear();
1571: return;
1572: } else {
1573: // if there is data there should be some space
1574: err.jspError("jsp.error.xml.spaceRequiredInPI");
1575: }
1576: }
1577:
1578: fStringBuffer.clear();
1579: // data
1580: if (scanData("?>", fStringBuffer)) {
1581: do {
1582: int c = peekChar();
1583: if (c != -1) {
1584: if (XMLChar.isHighSurrogate(c)) {
1585: scanSurrogates(fStringBuffer);
1586: } else if (XMLChar.isInvalid(c)) {
1587: err.jspError("jsp.error.xml.invalidCharInPI",
1588: Integer.toHexString(c));
1589: scanChar();
1590: }
1591: }
1592: } while (scanData("?>", fStringBuffer));
1593: }
1594: data.setValues(fStringBuffer);
1595:
1596: }
1597:
1598: // Adapted from:
1599: // org.apache.xerces.impl.XMLScanner.scanSurrogates
1600: /**
1601: * Scans surrogates and append them to the specified buffer.
1602: * <p>
1603: * <strong>Note:</strong> This assumes the current char has already been
1604: * identified as a high surrogate.
1605: *
1606: * @param buf The StringBuffer to append the read surrogates to.
1607: * @returns True if it succeeded.
1608: */
1609: private boolean scanSurrogates(XMLStringBuffer buf)
1610: throws IOException, JasperException {
1611:
1612: int high = scanChar();
1613: int low = peekChar();
1614: if (!XMLChar.isLowSurrogate(low)) {
1615: err.jspError("jsp.error.xml.invalidCharInContent", Integer
1616: .toString(high, 16));
1617: return false;
1618: }
1619: scanChar();
1620:
1621: // convert surrogates to supplemental character
1622: int c = XMLChar.supplemental((char) high, (char) low);
1623:
1624: // supplemental character must be a valid XML character
1625: if (!XMLChar.isValid(c)) {
1626: err.jspError("jsp.error.xml.invalidCharInContent", Integer
1627: .toString(c, 16));
1628: return false;
1629: }
1630:
1631: // fill in the buffer
1632: buf.append((char) high);
1633: buf.append((char) low);
1634:
1635: return true;
1636:
1637: }
1638:
1639: // Adapted from:
1640: // org.apache.xerces.impl.XMLScanner.reportFatalError
1641: /**
1642: * Convenience function used in all XML scanners.
1643: */
1644: private void reportFatalError(String msgId, String arg)
1645: throws JasperException {
1646: err.jspError(msgId, arg);
1647: }
1648:
1649: }
|