Source Code Cross Referenced for XMLEncodingDetector.java in  » Sevlet-Container » apache-tomcat-6.0.14 » org » apache » jasper » xmlparser » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Sevlet Container » apache tomcat 6.0.14 » org.apache.jasper.xmlparser 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         * Licensed to the Apache Software Foundation (ASF) under one or more
0003:         * contributor license agreements.  See the NOTICE file distributed with
0004:         * this work for additional information regarding copyright ownership.
0005:         * The ASF licenses this file to You under the Apache License, Version 2.0
0006:         * (the "License"); you may not use this file except in compliance with
0007:         * the License.  You may obtain a copy of the License at
0008:         * 
0009:         *      http://www.apache.org/licenses/LICENSE-2.0
0010:         * 
0011:         * Unless required by applicable law or agreed to in writing, software
0012:         * distributed under the License is distributed on an "AS IS" BASIS,
0013:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014:         * See the License for the specific language governing permissions and
0015:         * limitations under the License.
0016:         * ====================================================================
0017:         *
0018:         * This software consists of voluntary contributions made by many
0019:         * individuals on behalf of the Apache Software Foundation and was
0020:         * originally based on software copyright (c) 1999, International
0021:         * Business Machines, Inc., http://www.apache.org.  For more
0022:         * information on the Apache Software Foundation, please see
0023:         * <http://www.apache.org/>.
0024:         */
0025:
0026:        package org.apache.jasper.xmlparser;
0027:
0028:        import java.io.EOFException;
0029:        import java.io.InputStream;
0030:        import java.io.InputStreamReader;
0031:        import java.io.IOException;
0032:        import java.io.Reader;
0033:        import java.util.Locale;
0034:        import java.util.jar.JarFile;
0035:
0036:        import org.apache.jasper.JasperException;
0037:        import org.apache.jasper.JspCompilationContext;
0038:        import org.apache.jasper.compiler.ErrorDispatcher;
0039:        import org.apache.jasper.compiler.JspUtil;
0040:
0041:        public class XMLEncodingDetector {
0042:
0043:            private InputStream stream;
0044:            private String encoding;
0045:            private boolean isEncodingSetInProlog;
0046:            private boolean isBomPresent;
0047:            private int skip;
0048:            private Boolean isBigEndian;
0049:            private Reader reader;
0050:
0051:            // org.apache.xerces.impl.XMLEntityManager fields
0052:            public static final int DEFAULT_BUFFER_SIZE = 2048;
0053:            public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
0054:            private boolean fAllowJavaEncodings;
0055:            private SymbolTable fSymbolTable;
0056:            private XMLEncodingDetector fCurrentEntity;
0057:            private int fBufferSize = DEFAULT_BUFFER_SIZE;
0058:
0059:            // org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields
0060:            private int lineNumber = 1;
0061:            private int columnNumber = 1;
0062:            private boolean literal;
0063:            private char[] ch = new char[DEFAULT_BUFFER_SIZE];
0064:            private int position;
0065:            private int count;
0066:            private boolean mayReadChunks = false;
0067:
0068:            // org.apache.xerces.impl.XMLScanner fields
0069:            private XMLString fString = new XMLString();
0070:            private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
0071:            private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
0072:            private final static String fVersionSymbol = "version";
0073:            private final static String fEncodingSymbol = "encoding";
0074:            private final static String fStandaloneSymbol = "standalone";
0075:
0076:            // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields
0077:            private int fMarkupDepth = 0;
0078:            private String[] fStrings = new String[3];
0079:
0080:            private ErrorDispatcher err;
0081:
0082:            /**
0083:             * Constructor
0084:             */
0085:            public XMLEncodingDetector() {
0086:                fSymbolTable = new SymbolTable();
0087:                fCurrentEntity = this ;
0088:            }
0089:
0090:            /**
0091:             * Autodetects the encoding of the XML document supplied by the given
0092:             * input stream.
0093:             *
0094:             * Encoding autodetection is done according to the XML 1.0 specification,
0095:             * Appendix F.1: Detection Without External Encoding Information.
0096:             *
0097:             * @return Two-element array, where the first element (of type
0098:             * java.lang.String) contains the name of the (auto)detected encoding, and
0099:             * the second element (of type java.lang.Boolean) specifies whether the 
0100:             * encoding was specified using the 'encoding' attribute of an XML prolog
0101:             * (TRUE) or autodetected (FALSE).
0102:             */
0103:            public static Object[] getEncoding(String fname, JarFile jarFile,
0104:                    JspCompilationContext ctxt, ErrorDispatcher err)
0105:                    throws IOException, JasperException {
0106:                InputStream inStream = JspUtil.getInputStream(fname, jarFile,
0107:                        ctxt, err);
0108:                XMLEncodingDetector detector = new XMLEncodingDetector();
0109:                Object[] ret = detector.getEncoding(inStream, err);
0110:                inStream.close();
0111:
0112:                return ret;
0113:            }
0114:
0115:            private Object[] getEncoding(InputStream in, ErrorDispatcher err)
0116:                    throws IOException, JasperException {
0117:                this .stream = in;
0118:                this .err = err;
0119:                createInitialReader();
0120:                scanXMLDecl();
0121:
0122:                return new Object[] { this .encoding,
0123:                        Boolean.valueOf(this .isEncodingSetInProlog),
0124:                        Boolean.valueOf(this .isBomPresent),
0125:                        Integer.valueOf(this .skip) };
0126:            }
0127:
0128:            // stub method
0129:            void endEntity() {
0130:            }
0131:
0132:            // Adapted from:
0133:            // org.apache.xerces.impl.XMLEntityManager.startEntity()
0134:            private void createInitialReader() throws IOException,
0135:                    JasperException {
0136:
0137:                // wrap this stream in RewindableInputStream
0138:                stream = new RewindableInputStream(stream);
0139:
0140:                // perform auto-detect of encoding if necessary
0141:                if (encoding == null) {
0142:                    // read first four bytes and determine encoding
0143:                    final byte[] b4 = new byte[4];
0144:                    int count = 0;
0145:                    for (; count < 4; count++) {
0146:                        b4[count] = (byte) stream.read();
0147:                    }
0148:                    if (count == 4) {
0149:                        Object[] encodingDesc = getEncodingName(b4, count);
0150:                        encoding = (String) (encodingDesc[0]);
0151:                        isBigEndian = (Boolean) (encodingDesc[1]);
0152:
0153:                        if (encodingDesc.length > 3) {
0154:                            isBomPresent = (Boolean) (encodingDesc[2]);
0155:                            skip = (Integer) (encodingDesc[3]);
0156:                        } else {
0157:                            isBomPresent = true;
0158:                            skip = (Integer) (encodingDesc[2]);
0159:                        }
0160:
0161:                        stream.reset();
0162:                        // Special case UTF-8 files with BOM created by Microsoft
0163:                        // tools. It's more efficient to consume the BOM than make
0164:                        // the reader perform extra checks. -Ac
0165:                        if (count > 2 && encoding.equals("UTF-8")) {
0166:                            int b0 = b4[0] & 0xFF;
0167:                            int b1 = b4[1] & 0xFF;
0168:                            int b2 = b4[2] & 0xFF;
0169:                            if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
0170:                                // ignore first three bytes...
0171:                                stream.skip(3);
0172:                            }
0173:                        }
0174:                        reader = createReader(stream, encoding, isBigEndian);
0175:                    } else {
0176:                        reader = createReader(stream, encoding, isBigEndian);
0177:                    }
0178:                }
0179:            }
0180:
0181:            // Adapted from:
0182:            // org.apache.xerces.impl.XMLEntityManager.createReader
0183:            /**
0184:             * Creates a reader capable of reading the given input stream in
0185:             * the specified encoding.
0186:             *
0187:             * @param inputStream  The input stream.
0188:             * @param encoding     The encoding name that the input stream is
0189:             *                     encoded using. If the user has specified that
0190:             *                     Java encoding names are allowed, then the
0191:             *                     encoding name may be a Java encoding name;
0192:             *                     otherwise, it is an ianaEncoding name.
0193:             * @param isBigEndian   For encodings (like uCS-4), whose names cannot
0194:             *                      specify a byte order, this tells whether the order
0195:             *                      is bigEndian. null means unknown or not relevant.
0196:             *
0197:             * @return Returns a reader.
0198:             */
0199:            private Reader createReader(InputStream inputStream,
0200:                    String encoding, Boolean isBigEndian) throws IOException,
0201:                    JasperException {
0202:
0203:                // normalize encoding name
0204:                if (encoding == null) {
0205:                    encoding = "UTF-8";
0206:                }
0207:
0208:                // try to use an optimized reader
0209:                String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
0210:                if (ENCODING.equals("UTF-8")) {
0211:                    return new UTF8Reader(inputStream, fBufferSize);
0212:                }
0213:                if (ENCODING.equals("US-ASCII")) {
0214:                    return new ASCIIReader(inputStream, fBufferSize);
0215:                }
0216:                if (ENCODING.equals("ISO-10646-UCS-4")) {
0217:                    if (isBigEndian != null) {
0218:                        boolean isBE = isBigEndian.booleanValue();
0219:                        if (isBE) {
0220:                            return new UCSReader(inputStream, UCSReader.UCS4BE);
0221:                        } else {
0222:                            return new UCSReader(inputStream, UCSReader.UCS4LE);
0223:                        }
0224:                    } else {
0225:                        err.jspError(
0226:                                "jsp.error.xml.encodingByteOrderUnsupported",
0227:                                encoding);
0228:                    }
0229:                }
0230:                if (ENCODING.equals("ISO-10646-UCS-2")) {
0231:                    if (isBigEndian != null) { // sould never happen with this encoding...
0232:                        boolean isBE = isBigEndian.booleanValue();
0233:                        if (isBE) {
0234:                            return new UCSReader(inputStream, UCSReader.UCS2BE);
0235:                        } else {
0236:                            return new UCSReader(inputStream, UCSReader.UCS2LE);
0237:                        }
0238:                    } else {
0239:                        err.jspError(
0240:                                "jsp.error.xml.encodingByteOrderUnsupported",
0241:                                encoding);
0242:                    }
0243:                }
0244:
0245:                // check for valid name
0246:                boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
0247:                boolean validJava = XMLChar.isValidJavaEncoding(encoding);
0248:                if (!validIANA || (fAllowJavaEncodings && !validJava)) {
0249:                    err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
0250:                    // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
0251:                    //       because every byte is a valid ISO Latin 1 character.
0252:                    //       It may not translate correctly but if we failed on
0253:                    //       the encoding anyway, then we're expecting the content
0254:                    //       of the document to be bad. This will just prevent an
0255:                    //       invalid UTF-8 sequence to be detected. This is only
0256:                    //       important when continue-after-fatal-error is turned
0257:                    //       on. -Ac
0258:                    encoding = "ISO-8859-1";
0259:                }
0260:
0261:                // try to use a Java reader
0262:                String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
0263:                if (javaEncoding == null) {
0264:                    if (fAllowJavaEncodings) {
0265:                        javaEncoding = encoding;
0266:                    } else {
0267:                        err.jspError("jsp.error.xml.encodingDeclInvalid",
0268:                                encoding);
0269:                        // see comment above.
0270:                        javaEncoding = "ISO8859_1";
0271:                    }
0272:                }
0273:                return new InputStreamReader(inputStream, javaEncoding);
0274:
0275:            } // createReader(InputStream,String, Boolean): Reader
0276:
0277:            // Adapted from:
0278:            // org.apache.xerces.impl.XMLEntityManager.getEncodingName
0279:            /**
0280:             * Returns the IANA encoding name that is auto-detected from
0281:             * the bytes specified, with the endian-ness of that encoding where
0282:             * appropriate.
0283:             *
0284:             * @param b4    The first four bytes of the input.
0285:             * @param count The number of bytes actually read.
0286:             * @return a 2-element array:  the first element, an IANA-encoding string,
0287:             *  the second element a Boolean which is true iff the document is big
0288:             *  endian, false if it's little-endian, and null if the distinction isn't
0289:             *  relevant.
0290:             */
0291:            private Object[] getEncodingName(byte[] b4, int count) {
0292:
0293:                if (count < 2) {
0294:                    return new Object[] { "UTF-8", null, Boolean.FALSE,
0295:                            Integer.valueOf(0) };
0296:                }
0297:
0298:                // UTF-16, with BOM
0299:                int b0 = b4[0] & 0xFF;
0300:                int b1 = b4[1] & 0xFF;
0301:                if (b0 == 0xFE && b1 == 0xFF) {
0302:                    // UTF-16, big-endian
0303:                    return new Object[] { "UTF-16BE", Boolean.TRUE,
0304:                            Integer.valueOf(2) };
0305:                }
0306:                if (b0 == 0xFF && b1 == 0xFE) {
0307:                    // UTF-16, little-endian
0308:                    return new Object[] { "UTF-16LE", Boolean.FALSE,
0309:                            Integer.valueOf(2) };
0310:                }
0311:
0312:                // default to UTF-8 if we don't have enough bytes to make a
0313:                // good determination of the encoding
0314:                if (count < 3) {
0315:                    return new Object[] { "UTF-8", null, Boolean.FALSE,
0316:                            Integer.valueOf(0) };
0317:                }
0318:
0319:                // UTF-8 with a BOM
0320:                int b2 = b4[2] & 0xFF;
0321:                if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
0322:                    return new Object[] { "UTF-8", null, Integer.valueOf(3) };
0323:                }
0324:
0325:                // default to UTF-8 if we don't have enough bytes to make a
0326:                // good determination of the encoding
0327:                if (count < 4) {
0328:                    return new Object[] { "UTF-8", null, Integer.valueOf(0) };
0329:                }
0330:
0331:                // other encodings
0332:                int b3 = b4[3] & 0xFF;
0333:                if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
0334:                    // UCS-4, big endian (1234)
0335:                    return new Object[] { "ISO-10646-UCS-4", new Boolean(true),
0336:                            Integer.valueOf(4) };
0337:                }
0338:                if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
0339:                    // UCS-4, little endian (4321)
0340:                    return new Object[] { "ISO-10646-UCS-4",
0341:                            new Boolean(false), Integer.valueOf(4) };
0342:                }
0343:                if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
0344:                    // UCS-4, unusual octet order (2143)
0345:                    // REVISIT: What should this be?
0346:                    return new Object[] { "ISO-10646-UCS-4", null,
0347:                            Integer.valueOf(4) };
0348:                }
0349:                if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
0350:                    // UCS-4, unusual octect order (3412)
0351:                    // REVISIT: What should this be?
0352:                    return new Object[] { "ISO-10646-UCS-4", null,
0353:                            Integer.valueOf(4) };
0354:                }
0355:                if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
0356:                    // UTF-16, big-endian, no BOM
0357:                    // (or could turn out to be UCS-2...
0358:                    // REVISIT: What should this be?
0359:                    return new Object[] { "UTF-16BE", new Boolean(true),
0360:                            Integer.valueOf(4) };
0361:                }
0362:                if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
0363:                    // UTF-16, little-endian, no BOM
0364:                    // (or could turn out to be UCS-2...
0365:                    return new Object[] { "UTF-16LE", new Boolean(false),
0366:                            Integer.valueOf(4) };
0367:                }
0368:                if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
0369:                    // EBCDIC
0370:                    // a la xerces1, return CP037 instead of EBCDIC here
0371:                    return new Object[] { "CP037", null, Integer.valueOf(4) };
0372:                }
0373:
0374:                // default encoding
0375:                return new Object[] { "UTF-8", null, Boolean.FALSE,
0376:                        Integer.valueOf(0) };
0377:
0378:            }
0379:
0380:            // Adapted from:
0381:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal
0382:            /** Returns true if the current entity being scanned is external. */
0383:            public boolean isExternal() {
0384:                return true;
0385:            }
0386:
0387:            // Adapted from:
0388:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar
0389:            /**
0390:             * Returns the next character on the input.
0391:             * <p>
0392:             * <strong>Note:</strong> The character is <em>not</em> consumed.
0393:             *
0394:             * @throws IOException  Thrown if i/o error occurs.
0395:             * @throws EOFException Thrown on end of file.
0396:             */
0397:            public int peekChar() throws IOException {
0398:
0399:                // load more characters, if needed
0400:                if (fCurrentEntity.position == fCurrentEntity.count) {
0401:                    load(0, true);
0402:                }
0403:
0404:                // peek at character
0405:                int c = fCurrentEntity.ch[fCurrentEntity.position];
0406:
0407:                // return peeked character
0408:                if (fCurrentEntity.isExternal()) {
0409:                    return c != '\r' ? c : '\n';
0410:                } else {
0411:                    return c;
0412:                }
0413:
0414:            } // peekChar():int
0415:
0416:            // Adapted from:
0417:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar
0418:            /**
0419:             * Returns the next character on the input.
0420:             * <p>
0421:             * <strong>Note:</strong> The character is consumed.
0422:             *
0423:             * @throws IOException  Thrown if i/o error occurs.
0424:             * @throws EOFException Thrown on end of file.
0425:             */
0426:            public int scanChar() throws IOException {
0427:
0428:                // load more characters, if needed
0429:                if (fCurrentEntity.position == fCurrentEntity.count) {
0430:                    load(0, true);
0431:                }
0432:
0433:                // scan character
0434:                int c = fCurrentEntity.ch[fCurrentEntity.position++];
0435:                boolean external = false;
0436:                if (c == '\n'
0437:                        || (c == '\r' && (external = fCurrentEntity
0438:                                .isExternal()))) {
0439:                    fCurrentEntity.lineNumber++;
0440:                    fCurrentEntity.columnNumber = 1;
0441:                    if (fCurrentEntity.position == fCurrentEntity.count) {
0442:                        fCurrentEntity.ch[0] = (char) c;
0443:                        load(1, false);
0444:                    }
0445:                    if (c == '\r' && external) {
0446:                        if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
0447:                            fCurrentEntity.position--;
0448:                        }
0449:                        c = '\n';
0450:                    }
0451:                }
0452:
0453:                // return character that was scanned
0454:                fCurrentEntity.columnNumber++;
0455:                return c;
0456:
0457:            }
0458:
0459:            // Adapted from:
0460:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName
0461:            /**
0462:             * Returns a string matching the Name production appearing immediately
0463:             * on the input as a symbol, or null if no Name string is present.
0464:             * <p>
0465:             * <strong>Note:</strong> The Name characters are consumed.
0466:             * <p>
0467:             * <strong>Note:</strong> The string returned must be a symbol. The
0468:             * SymbolTable can be used for this purpose.
0469:             *
0470:             * @throws IOException  Thrown if i/o error occurs.
0471:             * @throws EOFException Thrown on end of file.
0472:             *
0473:             * @see SymbolTable
0474:             * @see XMLChar#isName
0475:             * @see XMLChar#isNameStart
0476:             */
0477:            public String scanName() throws IOException {
0478:
0479:                // load more characters, if needed
0480:                if (fCurrentEntity.position == fCurrentEntity.count) {
0481:                    load(0, true);
0482:                }
0483:
0484:                // scan name
0485:                int offset = fCurrentEntity.position;
0486:                if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
0487:                    if (++fCurrentEntity.position == fCurrentEntity.count) {
0488:                        fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
0489:                        offset = 0;
0490:                        if (load(1, false)) {
0491:                            fCurrentEntity.columnNumber++;
0492:                            String symbol = fSymbolTable.addSymbol(
0493:                                    fCurrentEntity.ch, 0, 1);
0494:                            return symbol;
0495:                        }
0496:                    }
0497:                    while (XMLChar
0498:                            .isName(fCurrentEntity.ch[fCurrentEntity.position])) {
0499:                        if (++fCurrentEntity.position == fCurrentEntity.count) {
0500:                            int length = fCurrentEntity.position - offset;
0501:                            if (length == fBufferSize) {
0502:                                // bad luck we have to resize our buffer
0503:                                char[] tmp = new char[fBufferSize * 2];
0504:                                System.arraycopy(fCurrentEntity.ch, offset,
0505:                                        tmp, 0, length);
0506:                                fCurrentEntity.ch = tmp;
0507:                                fBufferSize *= 2;
0508:                            } else {
0509:                                System.arraycopy(fCurrentEntity.ch, offset,
0510:                                        fCurrentEntity.ch, 0, length);
0511:                            }
0512:                            offset = 0;
0513:                            if (load(length, false)) {
0514:                                break;
0515:                            }
0516:                        }
0517:                    }
0518:                }
0519:                int length = fCurrentEntity.position - offset;
0520:                fCurrentEntity.columnNumber += length;
0521:
0522:                // return name
0523:                String symbol = null;
0524:                if (length > 0) {
0525:                    symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset,
0526:                            length);
0527:                }
0528:                return symbol;
0529:
0530:            }
0531:
0532:            // Adapted from:
0533:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral
0534:            /**
0535:             * Scans a range of attribute value data, setting the fields of the
0536:             * XMLString structure, appropriately.
0537:             * <p>
0538:             * <strong>Note:</strong> The characters are consumed.
0539:             * <p>
0540:             * <strong>Note:</strong> This method does not guarantee to return
0541:             * the longest run of attribute value data. This method may return
0542:             * before the quote character due to reaching the end of the input
0543:             * buffer or any other reason.
0544:             * <p>
0545:             * <strong>Note:</strong> The fields contained in the XMLString
0546:             * structure are not guaranteed to remain valid upon subsequent calls
0547:             * to the entity scanner. Therefore, the caller is responsible for
0548:             * immediately using the returned character data or making a copy of
0549:             * the character data.
0550:             *
0551:             * @param quote   The quote character that signifies the end of the
0552:             *                attribute value data.
0553:             * @param content The content structure to fill.
0554:             *
0555:             * @return Returns the next character on the input, if known. This
0556:             *         value may be -1 but this does <em>note</em> designate
0557:             *         end of file.
0558:             *
0559:             * @throws IOException  Thrown if i/o error occurs.
0560:             * @throws EOFException Thrown on end of file.
0561:             */
0562:            public int scanLiteral(int quote, XMLString content)
0563:                    throws IOException {
0564:
0565:                // load more characters, if needed
0566:                if (fCurrentEntity.position == fCurrentEntity.count) {
0567:                    load(0, true);
0568:                } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0569:                    fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
0570:                    load(1, false);
0571:                    fCurrentEntity.position = 0;
0572:                }
0573:
0574:                // normalize newlines
0575:                int offset = fCurrentEntity.position;
0576:                int c = fCurrentEntity.ch[offset];
0577:                int newlines = 0;
0578:                boolean external = fCurrentEntity.isExternal();
0579:                if (c == '\n' || (c == '\r' && external)) {
0580:                    do {
0581:                        c = fCurrentEntity.ch[fCurrentEntity.position++];
0582:                        if (c == '\r' && external) {
0583:                            newlines++;
0584:                            fCurrentEntity.lineNumber++;
0585:                            fCurrentEntity.columnNumber = 1;
0586:                            if (fCurrentEntity.position == fCurrentEntity.count) {
0587:                                offset = 0;
0588:                                fCurrentEntity.position = newlines;
0589:                                if (load(newlines, false)) {
0590:                                    break;
0591:                                }
0592:                            }
0593:                            if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0594:                                fCurrentEntity.position++;
0595:                                offset++;
0596:                            }
0597:                            /*** NEWLINE NORMALIZATION ***/
0598:                            else {
0599:                                newlines++;
0600:                            }
0601:                            /***/
0602:                        } else if (c == '\n') {
0603:                            newlines++;
0604:                            fCurrentEntity.lineNumber++;
0605:                            fCurrentEntity.columnNumber = 1;
0606:                            if (fCurrentEntity.position == fCurrentEntity.count) {
0607:                                offset = 0;
0608:                                fCurrentEntity.position = newlines;
0609:                                if (load(newlines, false)) {
0610:                                    break;
0611:                                }
0612:                            }
0613:                            /*** NEWLINE NORMALIZATION ***
0614:                             if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
0615:                             && external) {
0616:                             fCurrentEntity.position++;
0617:                             offset++;
0618:                             }
0619:                             /***/
0620:                        } else {
0621:                            fCurrentEntity.position--;
0622:                            break;
0623:                        }
0624:                    } while (fCurrentEntity.position < fCurrentEntity.count - 1);
0625:                    for (int i = offset; i < fCurrentEntity.position; i++) {
0626:                        fCurrentEntity.ch[i] = '\n';
0627:                    }
0628:                    int length = fCurrentEntity.position - offset;
0629:                    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0630:                        content.setValues(fCurrentEntity.ch, offset, length);
0631:                        return -1;
0632:                    }
0633:                }
0634:
0635:                // scan literal value
0636:                while (fCurrentEntity.position < fCurrentEntity.count) {
0637:                    c = fCurrentEntity.ch[fCurrentEntity.position++];
0638:                    if ((c == quote && (!fCurrentEntity.literal || external))
0639:                            || c == '%' || !XMLChar.isContent(c)) {
0640:                        fCurrentEntity.position--;
0641:                        break;
0642:                    }
0643:                }
0644:                int length = fCurrentEntity.position - offset;
0645:                fCurrentEntity.columnNumber += length - newlines;
0646:                content.setValues(fCurrentEntity.ch, offset, length);
0647:
0648:                // return next character
0649:                if (fCurrentEntity.position != fCurrentEntity.count) {
0650:                    c = fCurrentEntity.ch[fCurrentEntity.position];
0651:                    // NOTE: We don't want to accidentally signal the
0652:                    //       end of the literal if we're expanding an
0653:                    //       entity appearing in the literal. -Ac
0654:                    if (c == quote && fCurrentEntity.literal) {
0655:                        c = -1;
0656:                    }
0657:                } else {
0658:                    c = -1;
0659:                }
0660:                return c;
0661:
0662:            }
0663:
0664:            /**
0665:             * Scans a range of character data up to the specified delimiter,
0666:             * setting the fields of the XMLString structure, appropriately.
0667:             * <p>
0668:             * <strong>Note:</strong> The characters are consumed.
0669:             * <p>
0670:             * <strong>Note:</strong> This assumes that the internal buffer is
0671:             * at least the same size, or bigger, than the length of the delimiter
0672:             * and that the delimiter contains at least one character.
0673:             * <p>
0674:             * <strong>Note:</strong> This method does not guarantee to return
0675:             * the longest run of character data. This method may return before
0676:             * the delimiter due to reaching the end of the input buffer or any
0677:             * other reason.
0678:             * <p>
0679:             * <strong>Note:</strong> The fields contained in the XMLString
0680:             * structure are not guaranteed to remain valid upon subsequent calls
0681:             * to the entity scanner. Therefore, the caller is responsible for
0682:             * immediately using the returned character data or making a copy of
0683:             * the character data.
0684:             *
0685:             * @param delimiter The string that signifies the end of the character
0686:             *                  data to be scanned.
0687:             * @param buffer    The data structure to fill.
0688:             *
0689:             * @return Returns true if there is more data to scan, false otherwise.
0690:             *
0691:             * @throws IOException  Thrown if i/o error occurs.
0692:             * @throws EOFException Thrown on end of file.
0693:             */
0694:            public boolean scanData(String delimiter, XMLStringBuffer buffer)
0695:                    throws IOException {
0696:
0697:                boolean done = false;
0698:                int delimLen = delimiter.length();
0699:                char charAt0 = delimiter.charAt(0);
0700:                boolean external = fCurrentEntity.isExternal();
0701:                do {
0702:
0703:                    // load more characters, if needed
0704:
0705:                    if (fCurrentEntity.position == fCurrentEntity.count) {
0706:                        load(0, true);
0707:                    } else if (fCurrentEntity.position >= fCurrentEntity.count
0708:                            - delimLen) {
0709:                        System.arraycopy(fCurrentEntity.ch,
0710:                                fCurrentEntity.position, fCurrentEntity.ch, 0,
0711:                                fCurrentEntity.count - fCurrentEntity.position);
0712:                        load(fCurrentEntity.count - fCurrentEntity.position,
0713:                                false);
0714:                        fCurrentEntity.position = 0;
0715:                    }
0716:                    if (fCurrentEntity.position >= fCurrentEntity.count
0717:                            - delimLen) {
0718:                        // something must be wrong with the input: e.g., file ends an
0719:                        // unterminated comment
0720:                        int length = fCurrentEntity.count
0721:                                - fCurrentEntity.position;
0722:                        buffer.append(fCurrentEntity.ch,
0723:                                fCurrentEntity.position, length);
0724:                        fCurrentEntity.columnNumber += fCurrentEntity.count;
0725:                        fCurrentEntity.position = fCurrentEntity.count;
0726:                        load(0, true);
0727:                        return false;
0728:                    }
0729:
0730:                    // normalize newlines
0731:                    int offset = fCurrentEntity.position;
0732:                    int c = fCurrentEntity.ch[offset];
0733:                    int newlines = 0;
0734:                    if (c == '\n' || (c == '\r' && external)) {
0735:                        do {
0736:                            c = fCurrentEntity.ch[fCurrentEntity.position++];
0737:                            if (c == '\r' && external) {
0738:                                newlines++;
0739:                                fCurrentEntity.lineNumber++;
0740:                                fCurrentEntity.columnNumber = 1;
0741:                                if (fCurrentEntity.position == fCurrentEntity.count) {
0742:                                    offset = 0;
0743:                                    fCurrentEntity.position = newlines;
0744:                                    if (load(newlines, false)) {
0745:                                        break;
0746:                                    }
0747:                                }
0748:                                if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0749:                                    fCurrentEntity.position++;
0750:                                    offset++;
0751:                                }
0752:                                /*** NEWLINE NORMALIZATION ***/
0753:                                else {
0754:                                    newlines++;
0755:                                }
0756:                            } else if (c == '\n') {
0757:                                newlines++;
0758:                                fCurrentEntity.lineNumber++;
0759:                                fCurrentEntity.columnNumber = 1;
0760:                                if (fCurrentEntity.position == fCurrentEntity.count) {
0761:                                    offset = 0;
0762:                                    fCurrentEntity.position = newlines;
0763:                                    fCurrentEntity.count = newlines;
0764:                                    if (load(newlines, false)) {
0765:                                        break;
0766:                                    }
0767:                                }
0768:                            } else {
0769:                                fCurrentEntity.position--;
0770:                                break;
0771:                            }
0772:                        } while (fCurrentEntity.position < fCurrentEntity.count - 1);
0773:                        for (int i = offset; i < fCurrentEntity.position; i++) {
0774:                            fCurrentEntity.ch[i] = '\n';
0775:                        }
0776:                        int length = fCurrentEntity.position - offset;
0777:                        if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0778:                            buffer.append(fCurrentEntity.ch, offset, length);
0779:                            return true;
0780:                        }
0781:                    }
0782:
0783:                    // iterate over buffer looking for delimiter
0784:                    OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
0785:                        c = fCurrentEntity.ch[fCurrentEntity.position++];
0786:                        if (c == charAt0) {
0787:                            // looks like we just hit the delimiter
0788:                            int delimOffset = fCurrentEntity.position - 1;
0789:                            for (int i = 1; i < delimLen; i++) {
0790:                                if (fCurrentEntity.position == fCurrentEntity.count) {
0791:                                    fCurrentEntity.position -= i;
0792:                                    break OUTER;
0793:                                }
0794:                                c = fCurrentEntity.ch[fCurrentEntity.position++];
0795:                                if (delimiter.charAt(i) != c) {
0796:                                    fCurrentEntity.position--;
0797:                                    break;
0798:                                }
0799:                            }
0800:                            if (fCurrentEntity.position == delimOffset
0801:                                    + delimLen) {
0802:                                done = true;
0803:                                break;
0804:                            }
0805:                        } else if (c == '\n' || (external && c == '\r')) {
0806:                            fCurrentEntity.position--;
0807:                            break;
0808:                        } else if (XMLChar.isInvalid(c)) {
0809:                            fCurrentEntity.position--;
0810:                            int length = fCurrentEntity.position - offset;
0811:                            fCurrentEntity.columnNumber += length - newlines;
0812:                            buffer.append(fCurrentEntity.ch, offset, length);
0813:                            return true;
0814:                        }
0815:                    }
0816:                    int length = fCurrentEntity.position - offset;
0817:                    fCurrentEntity.columnNumber += length - newlines;
0818:                    if (done) {
0819:                        length -= delimLen;
0820:                    }
0821:                    buffer.append(fCurrentEntity.ch, offset, length);
0822:
0823:                    // return true if string was skipped
0824:                } while (!done);
0825:                return !done;
0826:
0827:            }
0828:
0829:            // Adapted from:
0830:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar
0831:            /**
0832:             * Skips a character appearing immediately on the input.
0833:             * <p>
0834:             * <strong>Note:</strong> The character is consumed only if it matches
0835:             * the specified character.
0836:             *
0837:             * @param c The character to skip.
0838:             *
0839:             * @return Returns true if the character was skipped.
0840:             *
0841:             * @throws IOException  Thrown if i/o error occurs.
0842:             * @throws EOFException Thrown on end of file.
0843:             */
0844:            public boolean skipChar(int c) throws IOException {
0845:
0846:                // load more characters, if needed
0847:                if (fCurrentEntity.position == fCurrentEntity.count) {
0848:                    load(0, true);
0849:                }
0850:
0851:                // skip character
0852:                int cc = fCurrentEntity.ch[fCurrentEntity.position];
0853:                if (cc == c) {
0854:                    fCurrentEntity.position++;
0855:                    if (c == '\n') {
0856:                        fCurrentEntity.lineNumber++;
0857:                        fCurrentEntity.columnNumber = 1;
0858:                    } else {
0859:                        fCurrentEntity.columnNumber++;
0860:                    }
0861:                    return true;
0862:                } else if (c == '\n' && cc == '\r'
0863:                        && fCurrentEntity.isExternal()) {
0864:                    // handle newlines
0865:                    if (fCurrentEntity.position == fCurrentEntity.count) {
0866:                        fCurrentEntity.ch[0] = (char) cc;
0867:                        load(1, false);
0868:                    }
0869:                    fCurrentEntity.position++;
0870:                    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0871:                        fCurrentEntity.position++;
0872:                    }
0873:                    fCurrentEntity.lineNumber++;
0874:                    fCurrentEntity.columnNumber = 1;
0875:                    return true;
0876:                }
0877:
0878:                // character was not skipped
0879:                return false;
0880:
0881:            }
0882:
0883:            // Adapted from:
0884:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces
0885:            /**
0886:             * Skips space characters appearing immediately on the input.
0887:             * <p>
0888:             * <strong>Note:</strong> The characters are consumed only if they are
0889:             * space characters.
0890:             *
0891:             * @return Returns true if at least one space character was skipped.
0892:             *
0893:             * @throws IOException  Thrown if i/o error occurs.
0894:             * @throws EOFException Thrown on end of file.
0895:             *
0896:             * @see XMLChar#isSpace
0897:             */
0898:            public boolean skipSpaces() throws IOException {
0899:
0900:                // load more characters, if needed
0901:                if (fCurrentEntity.position == fCurrentEntity.count) {
0902:                    load(0, true);
0903:                }
0904:
0905:                // skip spaces
0906:                int c = fCurrentEntity.ch[fCurrentEntity.position];
0907:                if (XMLChar.isSpace(c)) {
0908:                    boolean external = fCurrentEntity.isExternal();
0909:                    do {
0910:                        boolean entityChanged = false;
0911:                        // handle newlines
0912:                        if (c == '\n' || (external && c == '\r')) {
0913:                            fCurrentEntity.lineNumber++;
0914:                            fCurrentEntity.columnNumber = 1;
0915:                            if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0916:                                fCurrentEntity.ch[0] = (char) c;
0917:                                entityChanged = load(1, true);
0918:                                if (!entityChanged)
0919:                                    // the load change the position to be 1,
0920:                                    // need to restore it when entity not changed
0921:                                    fCurrentEntity.position = 0;
0922:                            }
0923:                            if (c == '\r' && external) {
0924:                                // REVISIT: Does this need to be updated to fix the
0925:                                //          #x0D ^#x0A newline normalization problem? -Ac
0926:                                if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
0927:                                    fCurrentEntity.position--;
0928:                                }
0929:                            }
0930:                            /*** NEWLINE NORMALIZATION ***
0931:                             else {
0932:                             if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
0933:                             && external) {
0934:                             fCurrentEntity.position++;
0935:                             }
0936:                             }
0937:                             /***/
0938:                        } else {
0939:                            fCurrentEntity.columnNumber++;
0940:                        }
0941:                        // load more characters, if needed
0942:                        if (!entityChanged)
0943:                            fCurrentEntity.position++;
0944:                        if (fCurrentEntity.position == fCurrentEntity.count) {
0945:                            load(0, true);
0946:                        }
0947:                    } while (XMLChar
0948:                            .isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
0949:                    return true;
0950:                }
0951:
0952:                // no spaces were found
0953:                return false;
0954:
0955:            }
0956:
0957:            /**
0958:             * Skips the specified string appearing immediately on the input.
0959:             * <p>
0960:             * <strong>Note:</strong> The characters are consumed only if they are
0961:             * space characters.
0962:             *
0963:             * @param s The string to skip.
0964:             *
0965:             * @return Returns true if the string was skipped.
0966:             *
0967:             * @throws IOException  Thrown if i/o error occurs.
0968:             * @throws EOFException Thrown on end of file.
0969:             */
0970:            public boolean skipString(String s) throws IOException {
0971:
0972:                // load more characters, if needed
0973:                if (fCurrentEntity.position == fCurrentEntity.count) {
0974:                    load(0, true);
0975:                }
0976:
0977:                // skip string
0978:                final int length = s.length();
0979:                for (int i = 0; i < length; i++) {
0980:                    char c = fCurrentEntity.ch[fCurrentEntity.position++];
0981:                    if (c != s.charAt(i)) {
0982:                        fCurrentEntity.position -= i + 1;
0983:                        return false;
0984:                    }
0985:                    if (i < length - 1
0986:                            && fCurrentEntity.position == fCurrentEntity.count) {
0987:                        System.arraycopy(fCurrentEntity.ch,
0988:                                fCurrentEntity.count - i - 1,
0989:                                fCurrentEntity.ch, 0, i + 1);
0990:                        // REVISIT: Can a string to be skipped cross an
0991:                        //          entity boundary? -Ac
0992:                        if (load(i + 1, false)) {
0993:                            fCurrentEntity.position -= i + 1;
0994:                            return false;
0995:                        }
0996:                    }
0997:                }
0998:                fCurrentEntity.columnNumber += length;
0999:                return true;
1000:
1001:            }
1002:
1003:            // Adapted from:
1004:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.load
1005:            /**
1006:             * Loads a chunk of text.
1007:             *
1008:             * @param offset       The offset into the character buffer to
1009:             *                     read the next batch of characters.
1010:             * @param changeEntity True if the load should change entities
1011:             *                     at the end of the entity, otherwise leave
1012:             *                     the current entity in place and the entity
1013:             *                     boundary will be signaled by the return
1014:             *                     value.
1015:             *
1016:             * @returns Returns true if the entity changed as a result of this
1017:             *          load operation.
1018:             */
1019:            final boolean load(int offset, boolean changeEntity)
1020:                    throws IOException {
1021:
1022:                // read characters
1023:                int length = fCurrentEntity.mayReadChunks ? (fCurrentEntity.ch.length - offset)
1024:                        : (DEFAULT_XMLDECL_BUFFER_SIZE);
1025:                int count = fCurrentEntity.reader.read(fCurrentEntity.ch,
1026:                        offset, length);
1027:
1028:                // reset count and position
1029:                boolean entityChanged = false;
1030:                if (count != -1) {
1031:                    if (count != 0) {
1032:                        fCurrentEntity.count = count + offset;
1033:                        fCurrentEntity.position = offset;
1034:                    }
1035:                }
1036:
1037:                // end of this entity
1038:                else {
1039:                    fCurrentEntity.count = offset;
1040:                    fCurrentEntity.position = offset;
1041:                    entityChanged = true;
1042:                    if (changeEntity) {
1043:                        endEntity();
1044:                        if (fCurrentEntity == null) {
1045:                            throw new EOFException();
1046:                        }
1047:                        // handle the trailing edges
1048:                        if (fCurrentEntity.position == fCurrentEntity.count) {
1049:                            load(0, false);
1050:                        }
1051:                    }
1052:                }
1053:
1054:                return entityChanged;
1055:
1056:            }
1057:
1058:            // Adapted from:
1059:            // org.apache.xerces.impl.XMLEntityManager.RewindableInputStream
1060:            /**
1061:             * This class wraps the byte inputstreams we're presented with.
1062:             * We need it because java.io.InputStreams don't provide
1063:             * functionality to reread processed bytes, and they have a habit
1064:             * of reading more than one character when you call their read()
1065:             * methods.  This means that, once we discover the true (declared)
1066:             * encoding of a document, we can neither backtrack to read the
1067:             * whole doc again nor start reading where we are with a new
1068:             * reader.
1069:             *
1070:             * This class allows rewinding an inputStream by allowing a mark
1071:             * to be set, and the stream reset to that position.  <strong>The
1072:             * class assumes that it needs to read one character per
1073:             * invocation when it's read() method is inovked, but uses the
1074:             * underlying InputStream's read(char[], offset length) method--it
1075:             * won't buffer data read this way!</strong>
1076:             *
1077:             * @author Neil Graham, IBM
1078:             * @author Glenn Marcy, IBM
1079:             */
1080:            private final class RewindableInputStream extends InputStream {
1081:
1082:                private InputStream fInputStream;
1083:                private byte[] fData;
1084:                private int fStartOffset;
1085:                private int fEndOffset;
1086:                private int fOffset;
1087:                private int fLength;
1088:                private int fMark;
1089:
1090:                public RewindableInputStream(InputStream is) {
1091:                    fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
1092:                    fInputStream = is;
1093:                    fStartOffset = 0;
1094:                    fEndOffset = -1;
1095:                    fOffset = 0;
1096:                    fLength = 0;
1097:                    fMark = 0;
1098:                }
1099:
1100:                public void setStartOffset(int offset) {
1101:                    fStartOffset = offset;
1102:                }
1103:
1104:                public void rewind() {
1105:                    fOffset = fStartOffset;
1106:                }
1107:
1108:                public int read() throws IOException {
1109:                    int b = 0;
1110:                    if (fOffset < fLength) {
1111:                        return fData[fOffset++] & 0xff;
1112:                    }
1113:                    if (fOffset == fEndOffset) {
1114:                        return -1;
1115:                    }
1116:                    if (fOffset == fData.length) {
1117:                        byte[] newData = new byte[fOffset << 1];
1118:                        System.arraycopy(fData, 0, newData, 0, fOffset);
1119:                        fData = newData;
1120:                    }
1121:                    b = fInputStream.read();
1122:                    if (b == -1) {
1123:                        fEndOffset = fOffset;
1124:                        return -1;
1125:                    }
1126:                    fData[fLength++] = (byte) b;
1127:                    fOffset++;
1128:                    return b & 0xff;
1129:                }
1130:
1131:                public int read(byte[] b, int off, int len) throws IOException {
1132:                    int bytesLeft = fLength - fOffset;
1133:                    if (bytesLeft == 0) {
1134:                        if (fOffset == fEndOffset) {
1135:                            return -1;
1136:                        }
1137:                        // better get some more for the voracious reader...
1138:                        if (fCurrentEntity.mayReadChunks) {
1139:                            return fInputStream.read(b, off, len);
1140:                        }
1141:                        int returnedVal = read();
1142:                        if (returnedVal == -1) {
1143:                            fEndOffset = fOffset;
1144:                            return -1;
1145:                        }
1146:                        b[off] = (byte) returnedVal;
1147:                        return 1;
1148:                    }
1149:                    if (len < bytesLeft) {
1150:                        if (len <= 0) {
1151:                            return 0;
1152:                        }
1153:                    } else {
1154:                        len = bytesLeft;
1155:                    }
1156:                    if (b != null) {
1157:                        System.arraycopy(fData, fOffset, b, off, len);
1158:                    }
1159:                    fOffset += len;
1160:                    return len;
1161:                }
1162:
1163:                public long skip(long n) throws IOException {
1164:                    int bytesLeft;
1165:                    if (n <= 0) {
1166:                        return 0;
1167:                    }
1168:                    bytesLeft = fLength - fOffset;
1169:                    if (bytesLeft == 0) {
1170:                        if (fOffset == fEndOffset) {
1171:                            return 0;
1172:                        }
1173:                        return fInputStream.skip(n);
1174:                    }
1175:                    if (n <= bytesLeft) {
1176:                        fOffset += n;
1177:                        return n;
1178:                    }
1179:                    fOffset += bytesLeft;
1180:                    if (fOffset == fEndOffset) {
1181:                        return bytesLeft;
1182:                    }
1183:                    n -= bytesLeft;
1184:                    /*
1185:                     * In a manner of speaking, when this class isn't permitting more
1186:                     * than one byte at a time to be read, it is "blocking".  The
1187:                     * available() method should indicate how much can be read without
1188:                     * blocking, so while we're in this mode, it should only indicate
1189:                     * that bytes in its buffer are available; otherwise, the result of
1190:                     * available() on the underlying InputStream is appropriate.
1191:                     */
1192:                    return fInputStream.skip(n) + bytesLeft;
1193:                }
1194:
1195:                public int available() throws IOException {
1196:                    int bytesLeft = fLength - fOffset;
1197:                    if (bytesLeft == 0) {
1198:                        if (fOffset == fEndOffset) {
1199:                            return -1;
1200:                        }
1201:                        return fCurrentEntity.mayReadChunks ? fInputStream
1202:                                .available() : 0;
1203:                    }
1204:                    return bytesLeft;
1205:                }
1206:
1207:                public void mark(int howMuch) {
1208:                    fMark = fOffset;
1209:                }
1210:
1211:                public void reset() {
1212:                    fOffset = fMark;
1213:                }
1214:
1215:                public boolean markSupported() {
1216:                    return true;
1217:                }
1218:
1219:                public void close() throws IOException {
1220:                    if (fInputStream != null) {
1221:                        fInputStream.close();
1222:                        fInputStream = null;
1223:                    }
1224:                }
1225:            } // end of RewindableInputStream class
1226:
1227:            // Adapted from:
1228:            // org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch
1229:            private void scanXMLDecl() throws IOException, JasperException {
1230:
1231:                if (skipString("<?xml")) {
1232:                    fMarkupDepth++;
1233:                    // NOTE: special case where document starts with a PI
1234:                    //       whose name starts with "xml" (e.g. "xmlfoo")
1235:                    if (XMLChar.isName(peekChar())) {
1236:                        fStringBuffer.clear();
1237:                        fStringBuffer.append("xml");
1238:                        while (XMLChar.isName(peekChar())) {
1239:                            fStringBuffer.append((char) scanChar());
1240:                        }
1241:                        String target = fSymbolTable.addSymbol(
1242:                                fStringBuffer.ch, fStringBuffer.offset,
1243:                                fStringBuffer.length);
1244:                        scanPIData(target, fString);
1245:                    }
1246:
1247:                    // standard XML declaration
1248:                    else {
1249:                        scanXMLDeclOrTextDecl(false);
1250:                    }
1251:                }
1252:            }
1253:
1254:            // Adapted from:
1255:            // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl
1256:            /**
1257:             * Scans an XML or text declaration.
1258:             * <p>
1259:             * <pre>
1260:             * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1261:             * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1262:             * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
1263:             * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1264:             * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1265:             *                 | ('"' ('yes' | 'no') '"'))
1266:             *
1267:             * [77] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?>'
1268:             * </pre>
1269:             *
1270:             * @param scanningTextDecl True if a text declaration is to
1271:             *                         be scanned instead of an XML
1272:             *                         declaration.
1273:             */
1274:            private void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
1275:                    throws IOException, JasperException {
1276:
1277:                // scan decl
1278:                scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
1279:                fMarkupDepth--;
1280:
1281:                // pseudo-attribute values
1282:                String encodingPseudoAttr = fStrings[1];
1283:
1284:                // set encoding on reader
1285:                if (encodingPseudoAttr != null) {
1286:                    isEncodingSetInProlog = true;
1287:                    encoding = encodingPseudoAttr;
1288:                }
1289:            }
1290:
1291:            // Adapted from:
1292:            // org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl
1293:            /**
1294:             * Scans an XML or text declaration.
1295:             * <p>
1296:             * <pre>
1297:             * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1298:             * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1299:             * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
1300:             * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1301:             * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1302:             *                 | ('"' ('yes' | 'no') '"'))
1303:             *
1304:             * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1305:             * </pre>
1306:             *
1307:             * @param scanningTextDecl True if a text declaration is to
1308:             *                         be scanned instead of an XML
1309:             *                         declaration.
1310:             * @param pseudoAttributeValues An array of size 3 to return the version,
1311:             *                         encoding and standalone pseudo attribute values
1312:             *                         (in that order).
1313:             *
1314:             * <strong>Note:</strong> This method uses fString, anything in it
1315:             * at the time of calling is lost.
1316:             */
1317:            private void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
1318:                    String[] pseudoAttributeValues) throws IOException,
1319:                    JasperException {
1320:
1321:                // pseudo-attribute values
1322:                String version = null;
1323:                String encoding = null;
1324:                String standalone = null;
1325:
1326:                // scan pseudo-attributes
1327:                final int STATE_VERSION = 0;
1328:                final int STATE_ENCODING = 1;
1329:                final int STATE_STANDALONE = 2;
1330:                final int STATE_DONE = 3;
1331:                int state = STATE_VERSION;
1332:
1333:                boolean dataFoundForTarget = false;
1334:                boolean sawSpace = skipSpaces();
1335:                while (peekChar() != '?') {
1336:                    dataFoundForTarget = true;
1337:                    String name = scanPseudoAttribute(scanningTextDecl, fString);
1338:                    switch (state) {
1339:                    case STATE_VERSION: {
1340:                        if (name == fVersionSymbol) {
1341:                            if (!sawSpace) {
1342:                                reportFatalError(
1343:                                        scanningTextDecl ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
1344:                                                : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
1345:                                        null);
1346:                            }
1347:                            version = fString.toString();
1348:                            state = STATE_ENCODING;
1349:                            if (!version.equals("1.0")) {
1350:                                // REVISIT: XML REC says we should throw an error
1351:                                // in such cases.
1352:                                // some may object the throwing of fatalError.
1353:                                err.jspError(
1354:                                        "jsp.error.xml.versionNotSupported",
1355:                                        version);
1356:                            }
1357:                        } else if (name == fEncodingSymbol) {
1358:                            if (!scanningTextDecl) {
1359:                                err
1360:                                        .jspError("jsp.error.xml.versionInfoRequired");
1361:                            }
1362:                            if (!sawSpace) {
1363:                                reportFatalError(
1364:                                        scanningTextDecl ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1365:                                                : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1366:                                        null);
1367:                            }
1368:                            encoding = fString.toString();
1369:                            state = scanningTextDecl ? STATE_DONE
1370:                                    : STATE_STANDALONE;
1371:                        } else {
1372:                            if (scanningTextDecl) {
1373:                                err
1374:                                        .jspError("jsp.error.xml.encodingDeclRequired");
1375:                            } else {
1376:                                err
1377:                                        .jspError("jsp.error.xml.versionInfoRequired");
1378:                            }
1379:                        }
1380:                        break;
1381:                    }
1382:                    case STATE_ENCODING: {
1383:                        if (name == fEncodingSymbol) {
1384:                            if (!sawSpace) {
1385:                                reportFatalError(
1386:                                        scanningTextDecl ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1387:                                                : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1388:                                        null);
1389:                            }
1390:                            encoding = fString.toString();
1391:                            state = scanningTextDecl ? STATE_DONE
1392:                                    : STATE_STANDALONE;
1393:                            // TODO: check encoding name; set encoding on
1394:                            //       entity scanner
1395:                        } else if (!scanningTextDecl
1396:                                && name == fStandaloneSymbol) {
1397:                            if (!sawSpace) {
1398:                                err
1399:                                        .jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1400:                            }
1401:                            standalone = fString.toString();
1402:                            state = STATE_DONE;
1403:                            if (!standalone.equals("yes")
1404:                                    && !standalone.equals("no")) {
1405:                                err.jspError("jsp.error.xml.sdDeclInvalid");
1406:                            }
1407:                        } else {
1408:                            err.jspError("jsp.error.xml.encodingDeclRequired");
1409:                        }
1410:                        break;
1411:                    }
1412:                    case STATE_STANDALONE: {
1413:                        if (name == fStandaloneSymbol) {
1414:                            if (!sawSpace) {
1415:                                err
1416:                                        .jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1417:                            }
1418:                            standalone = fString.toString();
1419:                            state = STATE_DONE;
1420:                            if (!standalone.equals("yes")
1421:                                    && !standalone.equals("no")) {
1422:                                err.jspError("jsp.error.xml.sdDeclInvalid");
1423:                            }
1424:                        } else {
1425:                            err.jspError("jsp.error.xml.encodingDeclRequired");
1426:                        }
1427:                        break;
1428:                    }
1429:                    default: {
1430:                        err.jspError("jsp.error.xml.noMorePseudoAttributes");
1431:                    }
1432:                    }
1433:                    sawSpace = skipSpaces();
1434:                }
1435:                // REVISIT: should we remove this error reporting?
1436:                if (scanningTextDecl && state != STATE_DONE) {
1437:                    err.jspError("jsp.error.xml.morePseudoAttributes");
1438:                }
1439:
1440:                // If there is no data in the xml or text decl then we fail to report
1441:                // error for version or encoding info above.
1442:                if (scanningTextDecl) {
1443:                    if (!dataFoundForTarget && encoding == null) {
1444:                        err.jspError("jsp.error.xml.encodingDeclRequired");
1445:                    }
1446:                } else {
1447:                    if (!dataFoundForTarget && version == null) {
1448:                        err.jspError("jsp.error.xml.versionInfoRequired");
1449:                    }
1450:                }
1451:
1452:                // end
1453:                if (!skipChar('?')) {
1454:                    err.jspError("jsp.error.xml.xmlDeclUnterminated");
1455:                }
1456:                if (!skipChar('>')) {
1457:                    err.jspError("jsp.error.xml.xmlDeclUnterminated");
1458:
1459:                }
1460:
1461:                // fill in return array
1462:                pseudoAttributeValues[0] = version;
1463:                pseudoAttributeValues[1] = encoding;
1464:                pseudoAttributeValues[2] = standalone;
1465:            }
1466:
1467:            // Adapted from:
1468:            // org.apache.xerces.impl.XMLScanner.scanPseudoAttribute
1469:            /**
1470:             * Scans a pseudo attribute.
1471:             *
1472:             * @param scanningTextDecl True if scanning this pseudo-attribute for a
1473:             *                         TextDecl; false if scanning XMLDecl. This 
1474:             *                         flag is needed to report the correct type of
1475:             *                         error.
1476:             * @param value            The string to fill in with the attribute 
1477:             *                         value.
1478:             *
1479:             * @return The name of the attribute
1480:             *
1481:             * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
1482:             * at the time of calling is lost.
1483:             */
1484:            public String scanPseudoAttribute(boolean scanningTextDecl,
1485:                    XMLString value) throws IOException, JasperException {
1486:
1487:                String name = scanName();
1488:                if (name == null) {
1489:                    err.jspError("jsp.error.xml.pseudoAttrNameExpected");
1490:                }
1491:                skipSpaces();
1492:                if (!skipChar('=')) {
1493:                    reportFatalError(
1494:                            scanningTextDecl ? "jsp.error.xml.eqRequiredInTextDecl"
1495:                                    : "jsp.error.xml.eqRequiredInXMLDecl", name);
1496:                }
1497:                skipSpaces();
1498:                int quote = peekChar();
1499:                if (quote != '\'' && quote != '"') {
1500:                    reportFatalError(
1501:                            scanningTextDecl ? "jsp.error.xml.quoteRequiredInTextDecl"
1502:                                    : "jsp.error.xml.quoteRequiredInXMLDecl",
1503:                            name);
1504:                }
1505:                scanChar();
1506:                int c = scanLiteral(quote, value);
1507:                if (c != quote) {
1508:                    fStringBuffer2.clear();
1509:                    do {
1510:                        fStringBuffer2.append(value);
1511:                        if (c != -1) {
1512:                            if (c == '&' || c == '%' || c == '<' || c == ']') {
1513:                                fStringBuffer2.append((char) scanChar());
1514:                            } else if (XMLChar.isHighSurrogate(c)) {
1515:                                scanSurrogates(fStringBuffer2);
1516:                            } else if (XMLChar.isInvalid(c)) {
1517:                                String key = scanningTextDecl ? "jsp.error.xml.invalidCharInTextDecl"
1518:                                        : "jsp.error.xml.invalidCharInXMLDecl";
1519:                                reportFatalError(key, Integer.toString(c, 16));
1520:                                scanChar();
1521:                            }
1522:                        }
1523:                        c = scanLiteral(quote, value);
1524:                    } while (c != quote);
1525:                    fStringBuffer2.append(value);
1526:                    value.setValues(fStringBuffer2);
1527:                }
1528:                if (!skipChar(quote)) {
1529:                    reportFatalError(
1530:                            scanningTextDecl ? "jsp.error.xml.closeQuoteMissingInTextDecl"
1531:                                    : "jsp.error.xml.closeQuoteMissingInXMLDecl",
1532:                            name);
1533:                }
1534:
1535:                // return
1536:                return name;
1537:
1538:            }
1539:
1540:            // Adapted from:
1541:            // org.apache.xerces.impl.XMLScanner.scanPIData
1542:            /**
1543:             * Scans a processing data. This is needed to handle the situation
1544:             * where a document starts with a processing instruction whose 
1545:             * target name <em>starts with</em> "xml". (e.g. xmlfoo)
1546:             *
1547:             * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1548:             * at the time of calling is lost.
1549:             *
1550:             * @param target The PI target
1551:             * @param data The string to fill in with the data
1552:             */
1553:            private void scanPIData(String target, XMLString data)
1554:                    throws IOException, JasperException {
1555:
1556:                // check target
1557:                if (target.length() == 3) {
1558:                    char c0 = Character.toLowerCase(target.charAt(0));
1559:                    char c1 = Character.toLowerCase(target.charAt(1));
1560:                    char c2 = Character.toLowerCase(target.charAt(2));
1561:                    if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
1562:                        err.jspError("jsp.error.xml.reservedPITarget");
1563:                    }
1564:                }
1565:
1566:                // spaces
1567:                if (!skipSpaces()) {
1568:                    if (skipString("?>")) {
1569:                        // we found the end, there is no data
1570:                        data.clear();
1571:                        return;
1572:                    } else {
1573:                        // if there is data there should be some space
1574:                        err.jspError("jsp.error.xml.spaceRequiredInPI");
1575:                    }
1576:                }
1577:
1578:                fStringBuffer.clear();
1579:                // data
1580:                if (scanData("?>", fStringBuffer)) {
1581:                    do {
1582:                        int c = peekChar();
1583:                        if (c != -1) {
1584:                            if (XMLChar.isHighSurrogate(c)) {
1585:                                scanSurrogates(fStringBuffer);
1586:                            } else if (XMLChar.isInvalid(c)) {
1587:                                err.jspError("jsp.error.xml.invalidCharInPI",
1588:                                        Integer.toHexString(c));
1589:                                scanChar();
1590:                            }
1591:                        }
1592:                    } while (scanData("?>", fStringBuffer));
1593:                }
1594:                data.setValues(fStringBuffer);
1595:
1596:            }
1597:
1598:            // Adapted from:
1599:            // org.apache.xerces.impl.XMLScanner.scanSurrogates
1600:            /**
1601:             * Scans surrogates and append them to the specified buffer.
1602:             * <p>
1603:             * <strong>Note:</strong> This assumes the current char has already been
1604:             * identified as a high surrogate.
1605:             *
1606:             * @param buf The StringBuffer to append the read surrogates to.
1607:             * @returns True if it succeeded.
1608:             */
1609:            private boolean scanSurrogates(XMLStringBuffer buf)
1610:                    throws IOException, JasperException {
1611:
1612:                int high = scanChar();
1613:                int low = peekChar();
1614:                if (!XMLChar.isLowSurrogate(low)) {
1615:                    err.jspError("jsp.error.xml.invalidCharInContent", Integer
1616:                            .toString(high, 16));
1617:                    return false;
1618:                }
1619:                scanChar();
1620:
1621:                // convert surrogates to supplemental character
1622:                int c = XMLChar.supplemental((char) high, (char) low);
1623:
1624:                // supplemental character must be a valid XML character
1625:                if (!XMLChar.isValid(c)) {
1626:                    err.jspError("jsp.error.xml.invalidCharInContent", Integer
1627:                            .toString(c, 16));
1628:                    return false;
1629:                }
1630:
1631:                // fill in the buffer
1632:                buf.append((char) high);
1633:                buf.append((char) low);
1634:
1635:                return true;
1636:
1637:            }
1638:
1639:            // Adapted from:
1640:            // org.apache.xerces.impl.XMLScanner.reportFatalError
1641:            /**
1642:             * Convenience function used in all XML scanners.
1643:             */
1644:            private void reportFatalError(String msgId, String arg)
1645:                    throws JasperException {
1646:                err.jspError(msgId, arg);
1647:            }
1648:
1649:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.