Source Code Cross Referenced for XercesEncodingDetector.java in  » Sevlet-Container » tomcat-jasper2 » org » apache » jasper » xmlparser » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Sevlet Container » tomcat jasper2 » org.apache.jasper.xmlparser 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         * Copyright 1999,2004 The Apache Software Foundation.
0003:         * 
0004:         * Licensed under the Apache License, Version 2.0 (the "License");
0005:         * you may not use this file except in compliance with the License.
0006:         * You may obtain a copy of the License at
0007:         * 
0008:         *      http://www.apache.org/licenses/LICENSE-2.0
0009:         * 
0010:         * Unless required by applicable law or agreed to in writing, software
0011:         * distributed under the License is distributed on an "AS IS" BASIS,
0012:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013:         * See the License for the specific language governing permissions and
0014:         * limitations under the License.
0015:         */
0016:
0017:        package org.apache.jasper.xmlparser;
0018:
0019:        import java.io.EOFException;
0020:        import java.io.InputStream;
0021:        import java.io.InputStreamReader;
0022:        import java.io.IOException;
0023:        import java.io.Reader;
0024:        import java.util.Locale;
0025:        import java.util.jar.JarFile;
0026:
0027:        import org.apache.jasper.JasperException;
0028:        import org.apache.jasper.JspCompilationContext;
0029:        import org.apache.jasper.compiler.ErrorDispatcher;
0030:        import org.apache.jasper.compiler.JspUtil;
0031:
0032:        import org.apache.xerces.util.EncodingMap;
0033:        import org.apache.xerces.util.SymbolTable;
0034:        import org.apache.xerces.util.XMLChar;
0035:        import org.apache.xerces.util.XMLStringBuffer;
0036:        import org.apache.xerces.xni.XMLString;
0037:
0038:        public class XercesEncodingDetector extends XMLEncodingDetector {
0039:
0040:            private InputStream stream;
0041:            private String encoding;
0042:            private boolean isEncodingSetInProlog;
0043:            private Boolean isBigEndian;
0044:            private Reader reader;
0045:
0046:            // org.apache.xerces.impl.XMLEntityManager fields
0047:            public static final int DEFAULT_BUFFER_SIZE = 2048;
0048:            public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64;
0049:            private boolean fAllowJavaEncodings;
0050:            private SymbolTable fSymbolTable;
0051:            private XercesEncodingDetector fCurrentEntity;
0052:            private int fBufferSize = DEFAULT_BUFFER_SIZE;
0053:
0054:            // org.apache.xerces.impl.XMLEntityManager.ScannedEntity fields
0055:            private int lineNumber = 1;
0056:            private int columnNumber = 1;
0057:            private boolean literal;
0058:            private char[] ch = new char[DEFAULT_BUFFER_SIZE];
0059:            private int position;
0060:            private int count;
0061:            private boolean mayReadChunks = false;
0062:
0063:            // org.apache.xerces.impl.XMLScanner fields
0064:            private XMLString fString = new XMLString();
0065:            private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
0066:            private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
0067:            private final static String fVersionSymbol = "version";
0068:            private final static String fEncodingSymbol = "encoding";
0069:            private final static String fStandaloneSymbol = "standalone";
0070:
0071:            // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl fields
0072:            private int fMarkupDepth = 0;
0073:            private String[] fStrings = new String[3];
0074:
0075:            private ErrorDispatcher err;
0076:
0077:            /**
0078:             * Constructor
0079:             */
0080:            public XercesEncodingDetector() {
0081:                fSymbolTable = new SymbolTable();
0082:                fCurrentEntity = this ;
0083:            }
0084:
0085:            /**
0086:             * Autodetects the encoding of the XML document supplied by the given
0087:             * input stream.
0088:             *
0089:             * Encoding autodetection is done according to the XML 1.0 specification,
0090:             * Appendix F.1: Detection Without External Encoding Information.
0091:             *
0092:             * @param in The input stream to read
0093:             * @param err The error dispatcher
0094:             *
0095:             * @return Two-element array, where the first element (of type
0096:             * java.lang.String) contains the name of the (auto)detected encoding, and
0097:             * the second element (of type java.lang.Boolean) specifies whether the 
0098:             * encoding was specified using the 'encoding' attribute of an XML prolog
0099:             * (TRUE) or autodetected (FALSE).
0100:             */
0101:            public Object[] getEncoding(InputStream in, ErrorDispatcher err)
0102:                    throws IOException, JasperException {
0103:                XercesEncodingDetector detector = this ;
0104:                this .stream = in;
0105:                this .err = err;
0106:                detector.createInitialReader();
0107:                detector.scanXMLDecl();
0108:
0109:                return new Object[] { detector.encoding,
0110:                        new Boolean(detector.isEncodingSetInProlog) };
0111:            }
0112:
0113:            public Object[] getEncodingMethod(String fname, JarFile jarFile,
0114:                    JspCompilationContext ctxt, ErrorDispatcher err)
0115:                    throws IOException, JasperException {
0116:                InputStream inStream = JspUtil.getInputStream(fname, jarFile,
0117:                        ctxt, err);
0118:                Object[] ret = getEncoding(inStream, err);
0119:                inStream.close();
0120:
0121:                return ret;
0122:            }
0123:
0124:            // stub method
0125:            void endEntity() {
0126:            }
0127:
0128:            // Adapted from:
0129:            // org.apache.xerces.impl.XMLEntityManager.startEntity()
0130:            private void createInitialReader() throws IOException,
0131:                    JasperException {
0132:
0133:                // wrap this stream in RewindableInputStream
0134:                stream = new RewindableInputStream(stream);
0135:
0136:                // perform auto-detect of encoding if necessary
0137:                if (encoding == null) {
0138:                    // read first four bytes and determine encoding
0139:                    final byte[] b4 = new byte[4];
0140:                    int count = 0;
0141:                    for (; count < 4; count++) {
0142:                        b4[count] = (byte) stream.read();
0143:                    }
0144:                    if (count == 4) {
0145:                        Object[] encodingDesc = getEncodingName(b4, count);
0146:                        encoding = (String) (encodingDesc[0]);
0147:                        isBigEndian = (Boolean) (encodingDesc[1]);
0148:
0149:                        stream.reset();
0150:                        // Special case UTF-8 files with BOM created by Microsoft
0151:                        // tools. It's more efficient to consume the BOM than make
0152:                        // the reader perform extra checks. -Ac
0153:                        if (count > 2 && encoding.equals("UTF-8")) {
0154:                            int b0 = b4[0] & 0xFF;
0155:                            int b1 = b4[1] & 0xFF;
0156:                            int b2 = b4[2] & 0xFF;
0157:                            if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
0158:                                // ignore first three bytes...
0159:                                stream.skip(3);
0160:                            }
0161:                        }
0162:                        reader = createReader(stream, encoding, isBigEndian);
0163:                    } else {
0164:                        reader = createReader(stream, encoding, isBigEndian);
0165:                    }
0166:                }
0167:            }
0168:
0169:            // Adapted from:
0170:            // org.apache.xerces.impl.XMLEntityManager.createReader
0171:            /**
0172:             * Creates a reader capable of reading the given input stream in
0173:             * the specified encoding.
0174:             *
0175:             * @param inputStream  The input stream.
0176:             * @param encoding     The encoding name that the input stream is
0177:             *                     encoded using. If the user has specified that
0178:             *                     Java encoding names are allowed, then the
0179:             *                     encoding name may be a Java encoding name;
0180:             *                     otherwise, it is an ianaEncoding name.
0181:             * @param isBigEndian   For encodings (like uCS-4), whose names cannot
0182:             *                      specify a byte order, this tells whether the order
0183:             *                      is bigEndian. null means unknown or not relevant.
0184:             *
0185:             * @return Returns a reader.
0186:             */
0187:            private Reader createReader(InputStream inputStream,
0188:                    String encoding, Boolean isBigEndian) throws IOException,
0189:                    JasperException {
0190:
0191:                // normalize encoding name
0192:                if (encoding == null) {
0193:                    encoding = "UTF-8";
0194:                }
0195:
0196:                // try to use an optimized reader
0197:                String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
0198:                if (ENCODING.equals("UTF-8")) {
0199:                    return new UTF8Reader(inputStream, fBufferSize);
0200:                }
0201:                if (ENCODING.equals("US-ASCII")) {
0202:                    return new ASCIIReader(inputStream, fBufferSize);
0203:                }
0204:                if (ENCODING.equals("ISO-10646-UCS-4")) {
0205:                    if (isBigEndian != null) {
0206:                        boolean isBE = isBigEndian.booleanValue();
0207:                        if (isBE) {
0208:                            return new UCSReader(inputStream, UCSReader.UCS4BE);
0209:                        } else {
0210:                            return new UCSReader(inputStream, UCSReader.UCS4LE);
0211:                        }
0212:                    } else {
0213:                        err.jspError(
0214:                                "jsp.error.xml.encodingByteOrderUnsupported",
0215:                                encoding);
0216:                    }
0217:                }
0218:                if (ENCODING.equals("ISO-10646-UCS-2")) {
0219:                    if (isBigEndian != null) { // sould never happen with this encoding...
0220:                        boolean isBE = isBigEndian.booleanValue();
0221:                        if (isBE) {
0222:                            return new UCSReader(inputStream, UCSReader.UCS2BE);
0223:                        } else {
0224:                            return new UCSReader(inputStream, UCSReader.UCS2LE);
0225:                        }
0226:                    } else {
0227:                        err.jspError(
0228:                                "jsp.error.xml.encodingByteOrderUnsupported",
0229:                                encoding);
0230:                    }
0231:                }
0232:
0233:                // check for valid name
0234:                boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
0235:                boolean validJava = XMLChar.isValidJavaEncoding(encoding);
0236:                if (!validIANA || (fAllowJavaEncodings && !validJava)) {
0237:                    err.jspError("jsp.error.xml.encodingDeclInvalid", encoding);
0238:                    // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
0239:                    //       because every byte is a valid ISO Latin 1 character.
0240:                    //       It may not translate correctly but if we failed on
0241:                    //       the encoding anyway, then we're expecting the content
0242:                    //       of the document to be bad. This will just prevent an
0243:                    //       invalid UTF-8 sequence to be detected. This is only
0244:                    //       important when continue-after-fatal-error is turned
0245:                    //       on. -Ac
0246:                    encoding = "ISO-8859-1";
0247:                }
0248:
0249:                // try to use a Java reader
0250:                String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
0251:                if (javaEncoding == null) {
0252:                    if (fAllowJavaEncodings) {
0253:                        javaEncoding = encoding;
0254:                    } else {
0255:                        err.jspError("jsp.error.xml.encodingDeclInvalid",
0256:                                encoding);
0257:                        // see comment above.
0258:                        javaEncoding = "ISO8859_1";
0259:                    }
0260:                }
0261:                return new InputStreamReader(inputStream, javaEncoding);
0262:
0263:            } // createReader(InputStream,String, Boolean): Reader
0264:
0265:            // Adapted from:
0266:            // org.apache.xerces.impl.XMLEntityManager.getEncodingName
0267:            /**
0268:             * Returns the IANA encoding name that is auto-detected from
0269:             * the bytes specified, with the endian-ness of that encoding where
0270:             * appropriate.
0271:             *
0272:             * @param b4    The first four bytes of the input.
0273:             * @param count The number of bytes actually read.
0274:             * @return a 2-element array:  the first element, an IANA-encoding string,
0275:             *  the second element a Boolean which is true iff the document is big
0276:             *  endian, false if it's little-endian, and null if the distinction isn't
0277:             *  relevant.
0278:             */
0279:            private Object[] getEncodingName(byte[] b4, int count) {
0280:
0281:                if (count < 2) {
0282:                    return new Object[] { "UTF-8", null };
0283:                }
0284:
0285:                // UTF-16, with BOM
0286:                int b0 = b4[0] & 0xFF;
0287:                int b1 = b4[1] & 0xFF;
0288:                if (b0 == 0xFE && b1 == 0xFF) {
0289:                    // UTF-16, big-endian
0290:                    return new Object[] { "UTF-16BE", new Boolean(true) };
0291:                }
0292:                if (b0 == 0xFF && b1 == 0xFE) {
0293:                    // UTF-16, little-endian
0294:                    return new Object[] { "UTF-16LE", new Boolean(false) };
0295:                }
0296:
0297:                // default to UTF-8 if we don't have enough bytes to make a
0298:                // good determination of the encoding
0299:                if (count < 3) {
0300:                    return new Object[] { "UTF-8", null };
0301:                }
0302:
0303:                // UTF-8 with a BOM
0304:                int b2 = b4[2] & 0xFF;
0305:                if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
0306:                    return new Object[] { "UTF-8", null };
0307:                }
0308:
0309:                // default to UTF-8 if we don't have enough bytes to make a
0310:                // good determination of the encoding
0311:                if (count < 4) {
0312:                    return new Object[] { "UTF-8", null };
0313:                }
0314:
0315:                // other encodings
0316:                int b3 = b4[3] & 0xFF;
0317:                if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
0318:                    // UCS-4, big endian (1234)
0319:                    return new Object[] { "ISO-10646-UCS-4", new Boolean(true) };
0320:                }
0321:                if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
0322:                    // UCS-4, little endian (4321)
0323:                    return new Object[] { "ISO-10646-UCS-4", new Boolean(false) };
0324:                }
0325:                if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
0326:                    // UCS-4, unusual octet order (2143)
0327:                    // REVISIT: What should this be?
0328:                    return new Object[] { "ISO-10646-UCS-4", null };
0329:                }
0330:                if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
0331:                    // UCS-4, unusual octect order (3412)
0332:                    // REVISIT: What should this be?
0333:                    return new Object[] { "ISO-10646-UCS-4", null };
0334:                }
0335:                if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
0336:                    // UTF-16, big-endian, no BOM
0337:                    // (or could turn out to be UCS-2...
0338:                    // REVISIT: What should this be?
0339:                    return new Object[] { "UTF-16BE", new Boolean(true) };
0340:                }
0341:                if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
0342:                    // UTF-16, little-endian, no BOM
0343:                    // (or could turn out to be UCS-2...
0344:                    return new Object[] { "UTF-16LE", new Boolean(false) };
0345:                }
0346:                if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
0347:                    // EBCDIC
0348:                    // a la xerces1, return CP037 instead of EBCDIC here
0349:                    return new Object[] { "CP037", null };
0350:                }
0351:
0352:                // default encoding
0353:                return new Object[] { "UTF-8", null };
0354:
0355:            }
0356:
0357:            // Adapted from:
0358:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.isExternal
0359:            /** Returns true if the current entity being scanned is external. */
0360:            public boolean isExternal() {
0361:                return true;
0362:            }
0363:
0364:            // Adapted from:
0365:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.peekChar
0366:            /**
0367:             * Returns the next character on the input.
0368:             * <p>
0369:             * <strong>Note:</strong> The character is <em>not</em> consumed.
0370:             *
0371:             * @throws IOException  Thrown if i/o error occurs.
0372:             * @throws EOFException Thrown on end of file.
0373:             */
0374:            public int peekChar() throws IOException {
0375:
0376:                // load more characters, if needed
0377:                if (fCurrentEntity.position == fCurrentEntity.count) {
0378:                    load(0, true);
0379:                }
0380:
0381:                // peek at character
0382:                int c = fCurrentEntity.ch[fCurrentEntity.position];
0383:
0384:                // return peeked character
0385:                if (fCurrentEntity.isExternal()) {
0386:                    return c != '\r' ? c : '\n';
0387:                } else {
0388:                    return c;
0389:                }
0390:
0391:            } // peekChar():int
0392:
0393:            // Adapted from:
0394:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanChar
0395:            /**
0396:             * Returns the next character on the input.
0397:             * <p>
0398:             * <strong>Note:</strong> The character is consumed.
0399:             *
0400:             * @throws IOException  Thrown if i/o error occurs.
0401:             * @throws EOFException Thrown on end of file.
0402:             */
0403:            public int scanChar() throws IOException {
0404:
0405:                // load more characters, if needed
0406:                if (fCurrentEntity.position == fCurrentEntity.count) {
0407:                    load(0, true);
0408:                }
0409:
0410:                // scan character
0411:                int c = fCurrentEntity.ch[fCurrentEntity.position++];
0412:                boolean external = false;
0413:                if (c == '\n'
0414:                        || (c == '\r' && (external = fCurrentEntity
0415:                                .isExternal()))) {
0416:                    fCurrentEntity.lineNumber++;
0417:                    fCurrentEntity.columnNumber = 1;
0418:                    if (fCurrentEntity.position == fCurrentEntity.count) {
0419:                        fCurrentEntity.ch[0] = (char) c;
0420:                        load(1, false);
0421:                    }
0422:                    if (c == '\r' && external) {
0423:                        if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
0424:                            fCurrentEntity.position--;
0425:                        }
0426:                        c = '\n';
0427:                    }
0428:                }
0429:
0430:                // return character that was scanned
0431:                fCurrentEntity.columnNumber++;
0432:                return c;
0433:
0434:            }
0435:
0436:            // Adapted from:
0437:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanName
0438:            /**
0439:             * Returns a string matching the Name production appearing immediately
0440:             * on the input as a symbol, or null if no Name string is present.
0441:             * <p>
0442:             * <strong>Note:</strong> The Name characters are consumed.
0443:             * <p>
0444:             * <strong>Note:</strong> The string returned must be a symbol. The
0445:             * SymbolTable can be used for this purpose.
0446:             *
0447:             * @throws IOException  Thrown if i/o error occurs.
0448:             * @throws EOFException Thrown on end of file.
0449:             *
0450:             * @see SymbolTable
0451:             * @see XMLChar#isName
0452:             * @see XMLChar#isNameStart
0453:             */
0454:            public String scanName() throws IOException {
0455:
0456:                // load more characters, if needed
0457:                if (fCurrentEntity.position == fCurrentEntity.count) {
0458:                    load(0, true);
0459:                }
0460:
0461:                // scan name
0462:                int offset = fCurrentEntity.position;
0463:                if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
0464:                    if (++fCurrentEntity.position == fCurrentEntity.count) {
0465:                        fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
0466:                        offset = 0;
0467:                        if (load(1, false)) {
0468:                            fCurrentEntity.columnNumber++;
0469:                            String symbol = fSymbolTable.addSymbol(
0470:                                    fCurrentEntity.ch, 0, 1);
0471:                            return symbol;
0472:                        }
0473:                    }
0474:                    while (XMLChar
0475:                            .isName(fCurrentEntity.ch[fCurrentEntity.position])) {
0476:                        if (++fCurrentEntity.position == fCurrentEntity.count) {
0477:                            int length = fCurrentEntity.position - offset;
0478:                            if (length == fBufferSize) {
0479:                                // bad luck we have to resize our buffer
0480:                                char[] tmp = new char[fBufferSize * 2];
0481:                                System.arraycopy(fCurrentEntity.ch, offset,
0482:                                        tmp, 0, length);
0483:                                fCurrentEntity.ch = tmp;
0484:                                fBufferSize *= 2;
0485:                            } else {
0486:                                System.arraycopy(fCurrentEntity.ch, offset,
0487:                                        fCurrentEntity.ch, 0, length);
0488:                            }
0489:                            offset = 0;
0490:                            if (load(length, false)) {
0491:                                break;
0492:                            }
0493:                        }
0494:                    }
0495:                }
0496:                int length = fCurrentEntity.position - offset;
0497:                fCurrentEntity.columnNumber += length;
0498:
0499:                // return name
0500:                String symbol = null;
0501:                if (length > 0) {
0502:                    symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset,
0503:                            length);
0504:                }
0505:                return symbol;
0506:
0507:            }
0508:
0509:            // Adapted from:
0510:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.scanLiteral
0511:            /**
0512:             * Scans a range of attribute value data, setting the fields of the
0513:             * XMLString structure, appropriately.
0514:             * <p>
0515:             * <strong>Note:</strong> The characters are consumed.
0516:             * <p>
0517:             * <strong>Note:</strong> This method does not guarantee to return
0518:             * the longest run of attribute value data. This method may return
0519:             * before the quote character due to reaching the end of the input
0520:             * buffer or any other reason.
0521:             * <p>
0522:             * <strong>Note:</strong> The fields contained in the XMLString
0523:             * structure are not guaranteed to remain valid upon subsequent calls
0524:             * to the entity scanner. Therefore, the caller is responsible for
0525:             * immediately using the returned character data or making a copy of
0526:             * the character data.
0527:             *
0528:             * @param quote   The quote character that signifies the end of the
0529:             *                attribute value data.
0530:             * @param content The content structure to fill.
0531:             *
0532:             * @return Returns the next character on the input, if known. This
0533:             *         value may be -1 but this does <em>note</em> designate
0534:             *         end of file.
0535:             *
0536:             * @throws IOException  Thrown if i/o error occurs.
0537:             * @throws EOFException Thrown on end of file.
0538:             */
0539:            public int scanLiteral(int quote, XMLString content)
0540:                    throws IOException {
0541:
0542:                // load more characters, if needed
0543:                if (fCurrentEntity.position == fCurrentEntity.count) {
0544:                    load(0, true);
0545:                } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0546:                    fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
0547:                    load(1, false);
0548:                    fCurrentEntity.position = 0;
0549:                }
0550:
0551:                // normalize newlines
0552:                int offset = fCurrentEntity.position;
0553:                int c = fCurrentEntity.ch[offset];
0554:                int newlines = 0;
0555:                boolean external = fCurrentEntity.isExternal();
0556:                if (c == '\n' || (c == '\r' && external)) {
0557:                    do {
0558:                        c = fCurrentEntity.ch[fCurrentEntity.position++];
0559:                        if (c == '\r' && external) {
0560:                            newlines++;
0561:                            fCurrentEntity.lineNumber++;
0562:                            fCurrentEntity.columnNumber = 1;
0563:                            if (fCurrentEntity.position == fCurrentEntity.count) {
0564:                                offset = 0;
0565:                                fCurrentEntity.position = newlines;
0566:                                if (load(newlines, false)) {
0567:                                    break;
0568:                                }
0569:                            }
0570:                            if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0571:                                fCurrentEntity.position++;
0572:                                offset++;
0573:                            }
0574:                            /*** NEWLINE NORMALIZATION ***/
0575:                            else {
0576:                                newlines++;
0577:                            }
0578:                            /***/
0579:                        } else if (c == '\n') {
0580:                            newlines++;
0581:                            fCurrentEntity.lineNumber++;
0582:                            fCurrentEntity.columnNumber = 1;
0583:                            if (fCurrentEntity.position == fCurrentEntity.count) {
0584:                                offset = 0;
0585:                                fCurrentEntity.position = newlines;
0586:                                if (load(newlines, false)) {
0587:                                    break;
0588:                                }
0589:                            }
0590:                            /*** NEWLINE NORMALIZATION ***
0591:                             if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
0592:                             && external) {
0593:                             fCurrentEntity.position++;
0594:                             offset++;
0595:                             }
0596:                             /***/
0597:                        } else {
0598:                            fCurrentEntity.position--;
0599:                            break;
0600:                        }
0601:                    } while (fCurrentEntity.position < fCurrentEntity.count - 1);
0602:                    for (int i = offset; i < fCurrentEntity.position; i++) {
0603:                        fCurrentEntity.ch[i] = '\n';
0604:                    }
0605:                    int length = fCurrentEntity.position - offset;
0606:                    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0607:                        content.setValues(fCurrentEntity.ch, offset, length);
0608:                        return -1;
0609:                    }
0610:                }
0611:
0612:                // scan literal value
0613:                while (fCurrentEntity.position < fCurrentEntity.count) {
0614:                    c = fCurrentEntity.ch[fCurrentEntity.position++];
0615:                    if ((c == quote && (!fCurrentEntity.literal || external))
0616:                            || c == '%' || !XMLChar.isContent(c)) {
0617:                        fCurrentEntity.position--;
0618:                        break;
0619:                    }
0620:                }
0621:                int length = fCurrentEntity.position - offset;
0622:                fCurrentEntity.columnNumber += length - newlines;
0623:                content.setValues(fCurrentEntity.ch, offset, length);
0624:
0625:                // return next character
0626:                if (fCurrentEntity.position != fCurrentEntity.count) {
0627:                    c = fCurrentEntity.ch[fCurrentEntity.position];
0628:                    // NOTE: We don't want to accidentally signal the
0629:                    //       end of the literal if we're expanding an
0630:                    //       entity appearing in the literal. -Ac
0631:                    if (c == quote && fCurrentEntity.literal) {
0632:                        c = -1;
0633:                    }
0634:                } else {
0635:                    c = -1;
0636:                }
0637:                return c;
0638:
0639:            }
0640:
0641:            /**
0642:             * Scans a range of character data up to the specified delimiter,
0643:             * setting the fields of the XMLString structure, appropriately.
0644:             * <p>
0645:             * <strong>Note:</strong> The characters are consumed.
0646:             * <p>
0647:             * <strong>Note:</strong> This assumes that the internal buffer is
0648:             * at least the same size, or bigger, than the length of the delimiter
0649:             * and that the delimiter contains at least one character.
0650:             * <p>
0651:             * <strong>Note:</strong> This method does not guarantee to return
0652:             * the longest run of character data. This method may return before
0653:             * the delimiter due to reaching the end of the input buffer or any
0654:             * other reason.
0655:             * <p>
0656:             * <strong>Note:</strong> The fields contained in the XMLString
0657:             * structure are not guaranteed to remain valid upon subsequent calls
0658:             * to the entity scanner. Therefore, the caller is responsible for
0659:             * immediately using the returned character data or making a copy of
0660:             * the character data.
0661:             *
0662:             * @param delimiter The string that signifies the end of the character
0663:             *                  data to be scanned.
0664:             * @param buffer    The data structure to fill.
0665:             *
0666:             * @return Returns true if there is more data to scan, false otherwise.
0667:             *
0668:             * @throws IOException  Thrown if i/o error occurs.
0669:             * @throws EOFException Thrown on end of file.
0670:             */
0671:            public boolean scanData(String delimiter, XMLStringBuffer buffer)
0672:                    throws IOException {
0673:
0674:                boolean done = false;
0675:                int delimLen = delimiter.length();
0676:                char charAt0 = delimiter.charAt(0);
0677:                boolean external = fCurrentEntity.isExternal();
0678:                do {
0679:
0680:                    // load more characters, if needed
0681:
0682:                    if (fCurrentEntity.position == fCurrentEntity.count) {
0683:                        load(0, true);
0684:                    } else if (fCurrentEntity.position >= fCurrentEntity.count
0685:                            - delimLen) {
0686:                        System.arraycopy(fCurrentEntity.ch,
0687:                                fCurrentEntity.position, fCurrentEntity.ch, 0,
0688:                                fCurrentEntity.count - fCurrentEntity.position);
0689:                        load(fCurrentEntity.count - fCurrentEntity.position,
0690:                                false);
0691:                        fCurrentEntity.position = 0;
0692:                    }
0693:                    if (fCurrentEntity.position >= fCurrentEntity.count
0694:                            - delimLen) {
0695:                        // something must be wrong with the input: e.g., file ends an
0696:                        // unterminated comment
0697:                        int length = fCurrentEntity.count
0698:                                - fCurrentEntity.position;
0699:                        buffer.append(fCurrentEntity.ch,
0700:                                fCurrentEntity.position, length);
0701:                        fCurrentEntity.columnNumber += fCurrentEntity.count;
0702:                        fCurrentEntity.position = fCurrentEntity.count;
0703:                        load(0, true);
0704:                        return false;
0705:                    }
0706:
0707:                    // normalize newlines
0708:                    int offset = fCurrentEntity.position;
0709:                    int c = fCurrentEntity.ch[offset];
0710:                    int newlines = 0;
0711:                    if (c == '\n' || (c == '\r' && external)) {
0712:                        do {
0713:                            c = fCurrentEntity.ch[fCurrentEntity.position++];
0714:                            if (c == '\r' && external) {
0715:                                newlines++;
0716:                                fCurrentEntity.lineNumber++;
0717:                                fCurrentEntity.columnNumber = 1;
0718:                                if (fCurrentEntity.position == fCurrentEntity.count) {
0719:                                    offset = 0;
0720:                                    fCurrentEntity.position = newlines;
0721:                                    if (load(newlines, false)) {
0722:                                        break;
0723:                                    }
0724:                                }
0725:                                if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0726:                                    fCurrentEntity.position++;
0727:                                    offset++;
0728:                                }
0729:                                /*** NEWLINE NORMALIZATION ***/
0730:                                else {
0731:                                    newlines++;
0732:                                }
0733:                            } else if (c == '\n') {
0734:                                newlines++;
0735:                                fCurrentEntity.lineNumber++;
0736:                                fCurrentEntity.columnNumber = 1;
0737:                                if (fCurrentEntity.position == fCurrentEntity.count) {
0738:                                    offset = 0;
0739:                                    fCurrentEntity.position = newlines;
0740:                                    fCurrentEntity.count = newlines;
0741:                                    if (load(newlines, false)) {
0742:                                        break;
0743:                                    }
0744:                                }
0745:                            } else {
0746:                                fCurrentEntity.position--;
0747:                                break;
0748:                            }
0749:                        } while (fCurrentEntity.position < fCurrentEntity.count - 1);
0750:                        for (int i = offset; i < fCurrentEntity.position; i++) {
0751:                            fCurrentEntity.ch[i] = '\n';
0752:                        }
0753:                        int length = fCurrentEntity.position - offset;
0754:                        if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0755:                            buffer.append(fCurrentEntity.ch, offset, length);
0756:                            return true;
0757:                        }
0758:                    }
0759:
0760:                    // iterate over buffer looking for delimiter
0761:                    OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
0762:                        c = fCurrentEntity.ch[fCurrentEntity.position++];
0763:                        if (c == charAt0) {
0764:                            // looks like we just hit the delimiter
0765:                            int delimOffset = fCurrentEntity.position - 1;
0766:                            for (int i = 1; i < delimLen; i++) {
0767:                                if (fCurrentEntity.position == fCurrentEntity.count) {
0768:                                    fCurrentEntity.position -= i;
0769:                                    break OUTER;
0770:                                }
0771:                                c = fCurrentEntity.ch[fCurrentEntity.position++];
0772:                                if (delimiter.charAt(i) != c) {
0773:                                    fCurrentEntity.position--;
0774:                                    break;
0775:                                }
0776:                            }
0777:                            if (fCurrentEntity.position == delimOffset
0778:                                    + delimLen) {
0779:                                done = true;
0780:                                break;
0781:                            }
0782:                        } else if (c == '\n' || (external && c == '\r')) {
0783:                            fCurrentEntity.position--;
0784:                            break;
0785:                        } else if (XMLChar.isInvalid(c)) {
0786:                            fCurrentEntity.position--;
0787:                            int length = fCurrentEntity.position - offset;
0788:                            fCurrentEntity.columnNumber += length - newlines;
0789:                            buffer.append(fCurrentEntity.ch, offset, length);
0790:                            return true;
0791:                        }
0792:                    }
0793:                    int length = fCurrentEntity.position - offset;
0794:                    fCurrentEntity.columnNumber += length - newlines;
0795:                    if (done) {
0796:                        length -= delimLen;
0797:                    }
0798:                    buffer.append(fCurrentEntity.ch, offset, length);
0799:
0800:                    // return true if string was skipped
0801:                } while (!done);
0802:                return !done;
0803:
0804:            }
0805:
0806:            // Adapted from:
0807:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipChar
0808:            /**
0809:             * Skips a character appearing immediately on the input.
0810:             * <p>
0811:             * <strong>Note:</strong> The character is consumed only if it matches
0812:             * the specified character.
0813:             *
0814:             * @param c The character to skip.
0815:             *
0816:             * @return Returns true if the character was skipped.
0817:             *
0818:             * @throws IOException  Thrown if i/o error occurs.
0819:             * @throws EOFException Thrown on end of file.
0820:             */
0821:            public boolean skipChar(int c) throws IOException {
0822:
0823:                // load more characters, if needed
0824:                if (fCurrentEntity.position == fCurrentEntity.count) {
0825:                    load(0, true);
0826:                }
0827:
0828:                // skip character
0829:                int cc = fCurrentEntity.ch[fCurrentEntity.position];
0830:                if (cc == c) {
0831:                    fCurrentEntity.position++;
0832:                    if (c == '\n') {
0833:                        fCurrentEntity.lineNumber++;
0834:                        fCurrentEntity.columnNumber = 1;
0835:                    } else {
0836:                        fCurrentEntity.columnNumber++;
0837:                    }
0838:                    return true;
0839:                } else if (c == '\n' && cc == '\r'
0840:                        && fCurrentEntity.isExternal()) {
0841:                    // handle newlines
0842:                    if (fCurrentEntity.position == fCurrentEntity.count) {
0843:                        fCurrentEntity.ch[0] = (char) cc;
0844:                        load(1, false);
0845:                    }
0846:                    fCurrentEntity.position++;
0847:                    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
0848:                        fCurrentEntity.position++;
0849:                    }
0850:                    fCurrentEntity.lineNumber++;
0851:                    fCurrentEntity.columnNumber = 1;
0852:                    return true;
0853:                }
0854:
0855:                // character was not skipped
0856:                return false;
0857:
0858:            }
0859:
0860:            // Adapted from:
0861:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.skipSpaces
0862:            /**
0863:             * Skips space characters appearing immediately on the input.
0864:             * <p>
0865:             * <strong>Note:</strong> The characters are consumed only if they are
0866:             * space characters.
0867:             *
0868:             * @return Returns true if at least one space character was skipped.
0869:             *
0870:             * @throws IOException  Thrown if i/o error occurs.
0871:             * @throws EOFException Thrown on end of file.
0872:             *
0873:             * @see XMLChar#isSpace
0874:             */
0875:            public boolean skipSpaces() throws IOException {
0876:
0877:                // load more characters, if needed
0878:                if (fCurrentEntity.position == fCurrentEntity.count) {
0879:                    load(0, true);
0880:                }
0881:
0882:                // skip spaces
0883:                int c = fCurrentEntity.ch[fCurrentEntity.position];
0884:                if (XMLChar.isSpace(c)) {
0885:                    boolean external = fCurrentEntity.isExternal();
0886:                    do {
0887:                        boolean entityChanged = false;
0888:                        // handle newlines
0889:                        if (c == '\n' || (external && c == '\r')) {
0890:                            fCurrentEntity.lineNumber++;
0891:                            fCurrentEntity.columnNumber = 1;
0892:                            if (fCurrentEntity.position == fCurrentEntity.count - 1) {
0893:                                fCurrentEntity.ch[0] = (char) c;
0894:                                entityChanged = load(1, true);
0895:                                if (!entityChanged)
0896:                                    // the load change the position to be 1,
0897:                                    // need to restore it when entity not changed
0898:                                    fCurrentEntity.position = 0;
0899:                            }
0900:                            if (c == '\r' && external) {
0901:                                // REVISIT: Does this need to be updated to fix the
0902:                                //          #x0D ^#x0A newline normalization problem? -Ac
0903:                                if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
0904:                                    fCurrentEntity.position--;
0905:                                }
0906:                            }
0907:                            /*** NEWLINE NORMALIZATION ***
0908:                             else {
0909:                             if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
0910:                             && external) {
0911:                             fCurrentEntity.position++;
0912:                             }
0913:                             }
0914:                             /***/
0915:                        } else {
0916:                            fCurrentEntity.columnNumber++;
0917:                        }
0918:                        // load more characters, if needed
0919:                        if (!entityChanged)
0920:                            fCurrentEntity.position++;
0921:                        if (fCurrentEntity.position == fCurrentEntity.count) {
0922:                            load(0, true);
0923:                        }
0924:                    } while (XMLChar
0925:                            .isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
0926:                    return true;
0927:                }
0928:
0929:                // no spaces were found
0930:                return false;
0931:
0932:            }
0933:
0934:            /**
0935:             * Skips the specified string appearing immediately on the input.
0936:             * <p>
0937:             * <strong>Note:</strong> The characters are consumed only if they are
0938:             * space characters.
0939:             *
0940:             * @param s The string to skip.
0941:             *
0942:             * @return Returns true if the string was skipped.
0943:             *
0944:             * @throws IOException  Thrown if i/o error occurs.
0945:             * @throws EOFException Thrown on end of file.
0946:             */
0947:            public boolean skipString(String s) throws IOException {
0948:
0949:                // load more characters, if needed
0950:                if (fCurrentEntity.position == fCurrentEntity.count) {
0951:                    load(0, true);
0952:                }
0953:
0954:                // skip string
0955:                final int length = s.length();
0956:                for (int i = 0; i < length; i++) {
0957:                    char c = fCurrentEntity.ch[fCurrentEntity.position++];
0958:                    if (c != s.charAt(i)) {
0959:                        fCurrentEntity.position -= i + 1;
0960:                        return false;
0961:                    }
0962:                    if (i < length - 1
0963:                            && fCurrentEntity.position == fCurrentEntity.count) {
0964:                        System.arraycopy(fCurrentEntity.ch,
0965:                                fCurrentEntity.count - i - 1,
0966:                                fCurrentEntity.ch, 0, i + 1);
0967:                        // REVISIT: Can a string to be skipped cross an
0968:                        //          entity boundary? -Ac
0969:                        if (load(i + 1, false)) {
0970:                            fCurrentEntity.position -= i + 1;
0971:                            return false;
0972:                        }
0973:                    }
0974:                }
0975:                fCurrentEntity.columnNumber += length;
0976:                return true;
0977:
0978:            }
0979:
0980:            // Adapted from:
0981:            // org.apache.xerces.impl.XMLEntityManager.EntityScanner.load
0982:            /**
0983:             * Loads a chunk of text.
0984:             *
0985:             * @param offset       The offset into the character buffer to
0986:             *                     read the next batch of characters.
0987:             * @param changeEntity True if the load should change entities
0988:             *                     at the end of the entity, otherwise leave
0989:             *                     the current entity in place and the entity
0990:             *                     boundary will be signaled by the return
0991:             *                     value.
0992:             *
0993:             * @returns Returns true if the entity changed as a result of this
0994:             *          load operation.
0995:             */
0996:            final boolean load(int offset, boolean changeEntity)
0997:                    throws IOException {
0998:
0999:                // read characters
1000:                int length = fCurrentEntity.mayReadChunks ? (fCurrentEntity.ch.length - offset)
1001:                        : (DEFAULT_XMLDECL_BUFFER_SIZE);
1002:                int count = fCurrentEntity.reader.read(fCurrentEntity.ch,
1003:                        offset, length);
1004:
1005:                // reset count and position
1006:                boolean entityChanged = false;
1007:                if (count != -1) {
1008:                    if (count != 0) {
1009:                        fCurrentEntity.count = count + offset;
1010:                        fCurrentEntity.position = offset;
1011:                    }
1012:                }
1013:
1014:                // end of this entity
1015:                else {
1016:                    fCurrentEntity.count = offset;
1017:                    fCurrentEntity.position = offset;
1018:                    entityChanged = true;
1019:                    if (changeEntity) {
1020:                        endEntity();
1021:                        if (fCurrentEntity == null) {
1022:                            throw new EOFException();
1023:                        }
1024:                        // handle the trailing edges
1025:                        if (fCurrentEntity.position == fCurrentEntity.count) {
1026:                            load(0, false);
1027:                        }
1028:                    }
1029:                }
1030:
1031:                return entityChanged;
1032:
1033:            }
1034:
1035:            // Adapted from:
1036:            // org.apache.xerces.impl.XMLEntityManager.RewindableInputStream
1037:            /**
1038:             * This class wraps the byte inputstreams we're presented with.
1039:             * We need it because java.io.InputStreams don't provide
1040:             * functionality to reread processed bytes, and they have a habit
1041:             * of reading more than one character when you call their read()
1042:             * methods.  This means that, once we discover the true (declared)
1043:             * encoding of a document, we can neither backtrack to read the
1044:             * whole doc again nor start reading where we are with a new
1045:             * reader.
1046:             *
1047:             * This class allows rewinding an inputStream by allowing a mark
1048:             * to be set, and the stream reset to that position.  <strong>The
1049:             * class assumes that it needs to read one character per
1050:             * invocation when it's read() method is inovked, but uses the
1051:             * underlying InputStream's read(char[], offset length) method--it
1052:             * won't buffer data read this way!</strong>
1053:             *
1054:             * @author Neil Graham, IBM
1055:             * @author Glenn Marcy, IBM
1056:             */
1057:            private final class RewindableInputStream extends InputStream {
1058:
1059:                private InputStream fInputStream;
1060:                private byte[] fData;
1061:                private int fStartOffset;
1062:                private int fEndOffset;
1063:                private int fOffset;
1064:                private int fLength;
1065:                private int fMark;
1066:
1067:                public RewindableInputStream(InputStream is) {
1068:                    fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE];
1069:                    fInputStream = is;
1070:                    fStartOffset = 0;
1071:                    fEndOffset = -1;
1072:                    fOffset = 0;
1073:                    fLength = 0;
1074:                    fMark = 0;
1075:                }
1076:
1077:                public void setStartOffset(int offset) {
1078:                    fStartOffset = offset;
1079:                }
1080:
1081:                public void rewind() {
1082:                    fOffset = fStartOffset;
1083:                }
1084:
1085:                public int read() throws IOException {
1086:                    int b = 0;
1087:                    if (fOffset < fLength) {
1088:                        return fData[fOffset++] & 0xff;
1089:                    }
1090:                    if (fOffset == fEndOffset) {
1091:                        return -1;
1092:                    }
1093:                    if (fOffset == fData.length) {
1094:                        byte[] newData = new byte[fOffset << 1];
1095:                        System.arraycopy(fData, 0, newData, 0, fOffset);
1096:                        fData = newData;
1097:                    }
1098:                    b = fInputStream.read();
1099:                    if (b == -1) {
1100:                        fEndOffset = fOffset;
1101:                        return -1;
1102:                    }
1103:                    fData[fLength++] = (byte) b;
1104:                    fOffset++;
1105:                    return b & 0xff;
1106:                }
1107:
1108:                public int read(byte[] b, int off, int len) throws IOException {
1109:                    int bytesLeft = fLength - fOffset;
1110:                    if (bytesLeft == 0) {
1111:                        if (fOffset == fEndOffset) {
1112:                            return -1;
1113:                        }
1114:                        // better get some more for the voracious reader...
1115:                        if (fCurrentEntity.mayReadChunks) {
1116:                            return fInputStream.read(b, off, len);
1117:                        }
1118:                        int returnedVal = read();
1119:                        if (returnedVal == -1) {
1120:                            fEndOffset = fOffset;
1121:                            return -1;
1122:                        }
1123:                        b[off] = (byte) returnedVal;
1124:                        return 1;
1125:                    }
1126:                    if (len < bytesLeft) {
1127:                        if (len <= 0) {
1128:                            return 0;
1129:                        }
1130:                    } else {
1131:                        len = bytesLeft;
1132:                    }
1133:                    if (b != null) {
1134:                        System.arraycopy(fData, fOffset, b, off, len);
1135:                    }
1136:                    fOffset += len;
1137:                    return len;
1138:                }
1139:
1140:                public long skip(long n) throws IOException {
1141:                    int bytesLeft;
1142:                    if (n <= 0) {
1143:                        return 0;
1144:                    }
1145:                    bytesLeft = fLength - fOffset;
1146:                    if (bytesLeft == 0) {
1147:                        if (fOffset == fEndOffset) {
1148:                            return 0;
1149:                        }
1150:                        return fInputStream.skip(n);
1151:                    }
1152:                    if (n <= bytesLeft) {
1153:                        fOffset += n;
1154:                        return n;
1155:                    }
1156:                    fOffset += bytesLeft;
1157:                    if (fOffset == fEndOffset) {
1158:                        return bytesLeft;
1159:                    }
1160:                    n -= bytesLeft;
1161:                    /*
1162:                     * In a manner of speaking, when this class isn't permitting more
1163:                     * than one byte at a time to be read, it is "blocking".  The
1164:                     * available() method should indicate how much can be read without
1165:                     * blocking, so while we're in this mode, it should only indicate
1166:                     * that bytes in its buffer are available; otherwise, the result of
1167:                     * available() on the underlying InputStream is appropriate.
1168:                     */
1169:                    return fInputStream.skip(n) + bytesLeft;
1170:                }
1171:
1172:                public int available() throws IOException {
1173:                    int bytesLeft = fLength - fOffset;
1174:                    if (bytesLeft == 0) {
1175:                        if (fOffset == fEndOffset) {
1176:                            return -1;
1177:                        }
1178:                        return fCurrentEntity.mayReadChunks ? fInputStream
1179:                                .available() : 0;
1180:                    }
1181:                    return bytesLeft;
1182:                }
1183:
1184:                public void mark(int howMuch) {
1185:                    fMark = fOffset;
1186:                }
1187:
1188:                public void reset() {
1189:                    fOffset = fMark;
1190:                }
1191:
1192:                public boolean markSupported() {
1193:                    return true;
1194:                }
1195:
1196:                public void close() throws IOException {
1197:                    if (fInputStream != null) {
1198:                        fInputStream.close();
1199:                        fInputStream = null;
1200:                    }
1201:                }
1202:            } // end of RewindableInputStream class
1203:
1204:            // Adapted from:
1205:            // org.apache.xerces.impl.XMLDocumentScannerImpl.dispatch
1206:            private void scanXMLDecl() throws IOException, JasperException {
1207:
1208:                if (skipString("<?xml")) {
1209:                    fMarkupDepth++;
1210:                    // NOTE: special case where document starts with a PI
1211:                    //       whose name starts with "xml" (e.g. "xmlfoo")
1212:                    if (XMLChar.isName(peekChar())) {
1213:                        fStringBuffer.clear();
1214:                        fStringBuffer.append("xml");
1215:                        while (XMLChar.isName(peekChar())) {
1216:                            fStringBuffer.append((char) scanChar());
1217:                        }
1218:                        String target = fSymbolTable.addSymbol(
1219:                                fStringBuffer.ch, fStringBuffer.offset,
1220:                                fStringBuffer.length);
1221:                        scanPIData(target, fString);
1222:                    }
1223:
1224:                    // standard XML declaration
1225:                    else {
1226:                        scanXMLDeclOrTextDecl(false);
1227:                    }
1228:                }
1229:            }
1230:
1231:            // Adapted from:
1232:            // org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanXMLDeclOrTextDecl
1233:            /**
1234:             * Scans an XML or text declaration.
1235:             * <p>
1236:             * <pre>
1237:             * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1238:             * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1239:             * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
1240:             * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1241:             * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1242:             *                 | ('"' ('yes' | 'no') '"'))
1243:             *
1244:             * [77] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?>'
1245:             * </pre>
1246:             *
1247:             * @param scanningTextDecl True if a text declaration is to
1248:             *                         be scanned instead of an XML
1249:             *                         declaration.
1250:             */
1251:            private void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
1252:                    throws IOException, JasperException {
1253:
1254:                // scan decl
1255:                scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
1256:                fMarkupDepth--;
1257:
1258:                // pseudo-attribute values
1259:                String encodingPseudoAttr = fStrings[1];
1260:
1261:                // set encoding on reader
1262:                if (encodingPseudoAttr != null) {
1263:                    isEncodingSetInProlog = true;
1264:                    encoding = encodingPseudoAttr;
1265:                }
1266:            }
1267:
1268:            // Adapted from:
1269:            // org.apache.xerces.impl.XMLScanner.scanXMLDeclOrTextDecl
1270:            /**
1271:             * Scans an XML or text declaration.
1272:             * <p>
1273:             * <pre>
1274:             * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1275:             * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
1276:             * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
1277:             * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1278:             * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
1279:             *                 | ('"' ('yes' | 'no') '"'))
1280:             *
1281:             * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1282:             * </pre>
1283:             *
1284:             * @param scanningTextDecl True if a text declaration is to
1285:             *                         be scanned instead of an XML
1286:             *                         declaration.
1287:             * @param pseudoAttributeValues An array of size 3 to return the version,
1288:             *                         encoding and standalone pseudo attribute values
1289:             *                         (in that order).
1290:             *
1291:             * <strong>Note:</strong> This method uses fString, anything in it
1292:             * at the time of calling is lost.
1293:             */
1294:            private void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
1295:                    String[] pseudoAttributeValues) throws IOException,
1296:                    JasperException {
1297:
1298:                // pseudo-attribute values
1299:                String version = null;
1300:                String encoding = null;
1301:                String standalone = null;
1302:
1303:                // scan pseudo-attributes
1304:                final int STATE_VERSION = 0;
1305:                final int STATE_ENCODING = 1;
1306:                final int STATE_STANDALONE = 2;
1307:                final int STATE_DONE = 3;
1308:                int state = STATE_VERSION;
1309:
1310:                boolean dataFoundForTarget = false;
1311:                boolean sawSpace = skipSpaces();
1312:                while (peekChar() != '?') {
1313:                    dataFoundForTarget = true;
1314:                    String name = scanPseudoAttribute(scanningTextDecl, fString);
1315:                    switch (state) {
1316:                    case STATE_VERSION: {
1317:                        if (name == fVersionSymbol) {
1318:                            if (!sawSpace) {
1319:                                reportFatalError(
1320:                                        scanningTextDecl ? "jsp.error.xml.spaceRequiredBeforeVersionInTextDecl"
1321:                                                : "jsp.error.xml.spaceRequiredBeforeVersionInXMLDecl",
1322:                                        null);
1323:                            }
1324:                            version = fString.toString();
1325:                            state = STATE_ENCODING;
1326:                            if (!version.equals("1.0")) {
1327:                                // REVISIT: XML REC says we should throw an error
1328:                                // in such cases.
1329:                                // some may object the throwing of fatalError.
1330:                                err.jspError(
1331:                                        "jsp.error.xml.versionNotSupported",
1332:                                        version);
1333:                            }
1334:                        } else if (name == fEncodingSymbol) {
1335:                            if (!scanningTextDecl) {
1336:                                err
1337:                                        .jspError("jsp.error.xml.versionInfoRequired");
1338:                            }
1339:                            if (!sawSpace) {
1340:                                reportFatalError(
1341:                                        scanningTextDecl ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1342:                                                : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1343:                                        null);
1344:                            }
1345:                            encoding = fString.toString();
1346:                            state = scanningTextDecl ? STATE_DONE
1347:                                    : STATE_STANDALONE;
1348:                        } else {
1349:                            if (scanningTextDecl) {
1350:                                err
1351:                                        .jspError("jsp.error.xml.encodingDeclRequired");
1352:                            } else {
1353:                                err
1354:                                        .jspError("jsp.error.xml.versionInfoRequired");
1355:                            }
1356:                        }
1357:                        break;
1358:                    }
1359:                    case STATE_ENCODING: {
1360:                        if (name == fEncodingSymbol) {
1361:                            if (!sawSpace) {
1362:                                reportFatalError(
1363:                                        scanningTextDecl ? "jsp.error.xml.spaceRequiredBeforeEncodingInTextDecl"
1364:                                                : "jsp.error.xml.spaceRequiredBeforeEncodingInXMLDecl",
1365:                                        null);
1366:                            }
1367:                            encoding = fString.toString();
1368:                            state = scanningTextDecl ? STATE_DONE
1369:                                    : STATE_STANDALONE;
1370:                            // TODO: check encoding name; set encoding on
1371:                            //       entity scanner
1372:                        } else if (!scanningTextDecl
1373:                                && name == fStandaloneSymbol) {
1374:                            if (!sawSpace) {
1375:                                err
1376:                                        .jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1377:                            }
1378:                            standalone = fString.toString();
1379:                            state = STATE_DONE;
1380:                            if (!standalone.equals("yes")
1381:                                    && !standalone.equals("no")) {
1382:                                err.jspError("jsp.error.xml.sdDeclInvalid");
1383:                            }
1384:                        } else {
1385:                            err.jspError("jsp.error.xml.encodingDeclRequired");
1386:                        }
1387:                        break;
1388:                    }
1389:                    case STATE_STANDALONE: {
1390:                        if (name == fStandaloneSymbol) {
1391:                            if (!sawSpace) {
1392:                                err
1393:                                        .jspError("jsp.error.xml.spaceRequiredBeforeStandalone");
1394:                            }
1395:                            standalone = fString.toString();
1396:                            state = STATE_DONE;
1397:                            if (!standalone.equals("yes")
1398:                                    && !standalone.equals("no")) {
1399:                                err.jspError("jsp.error.xml.sdDeclInvalid");
1400:                            }
1401:                        } else {
1402:                            err.jspError("jsp.error.xml.encodingDeclRequired");
1403:                        }
1404:                        break;
1405:                    }
1406:                    default: {
1407:                        err.jspError("jsp.error.xml.noMorePseudoAttributes");
1408:                    }
1409:                    }
1410:                    sawSpace = skipSpaces();
1411:                }
1412:                // REVISIT: should we remove this error reporting?
1413:                if (scanningTextDecl && state != STATE_DONE) {
1414:                    err.jspError("jsp.error.xml.morePseudoAttributes");
1415:                }
1416:
1417:                // If there is no data in the xml or text decl then we fail to report
1418:                // error for version or encoding info above.
1419:                if (scanningTextDecl) {
1420:                    if (!dataFoundForTarget && encoding == null) {
1421:                        err.jspError("jsp.error.xml.encodingDeclRequired");
1422:                    }
1423:                } else {
1424:                    if (!dataFoundForTarget && version == null) {
1425:                        err.jspError("jsp.error.xml.versionInfoRequired");
1426:                    }
1427:                }
1428:
1429:                // end
1430:                if (!skipChar('?')) {
1431:                    err.jspError("jsp.error.xml.xmlDeclUnterminated");
1432:                }
1433:                if (!skipChar('>')) {
1434:                    err.jspError("jsp.error.xml.xmlDeclUnterminated");
1435:
1436:                }
1437:
1438:                // fill in return array
1439:                pseudoAttributeValues[0] = version;
1440:                pseudoAttributeValues[1] = encoding;
1441:                pseudoAttributeValues[2] = standalone;
1442:            }
1443:
1444:            // Adapted from:
1445:            // org.apache.xerces.impl.XMLScanner.scanPseudoAttribute
1446:            /**
1447:             * Scans a pseudo attribute.
1448:             *
1449:             * @param scanningTextDecl True if scanning this pseudo-attribute for a
1450:             *                         TextDecl; false if scanning XMLDecl. This 
1451:             *                         flag is needed to report the correct type of
1452:             *                         error.
1453:             * @param value            The string to fill in with the attribute 
1454:             *                         value.
1455:             *
1456:             * @return The name of the attribute
1457:             *
1458:             * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
1459:             * at the time of calling is lost.
1460:             */
1461:            public String scanPseudoAttribute(boolean scanningTextDecl,
1462:                    XMLString value) throws IOException, JasperException {
1463:
1464:                String name = scanName();
1465:                if (name == null) {
1466:                    err.jspError("jsp.error.xml.pseudoAttrNameExpected");
1467:                }
1468:                skipSpaces();
1469:                if (!skipChar('=')) {
1470:                    reportFatalError(
1471:                            scanningTextDecl ? "jsp.error.xml.eqRequiredInTextDecl"
1472:                                    : "jsp.error.xml.eqRequiredInXMLDecl", name);
1473:                }
1474:                skipSpaces();
1475:                int quote = peekChar();
1476:                if (quote != '\'' && quote != '"') {
1477:                    reportFatalError(
1478:                            scanningTextDecl ? "jsp.error.xml.quoteRequiredInTextDecl"
1479:                                    : "jsp.error.xml.quoteRequiredInXMLDecl",
1480:                            name);
1481:                }
1482:                scanChar();
1483:                int c = scanLiteral(quote, value);
1484:                if (c != quote) {
1485:                    fStringBuffer2.clear();
1486:                    do {
1487:                        fStringBuffer2.append(value);
1488:                        if (c != -1) {
1489:                            if (c == '&' || c == '%' || c == '<' || c == ']') {
1490:                                fStringBuffer2.append((char) scanChar());
1491:                            } else if (XMLChar.isHighSurrogate(c)) {
1492:                                scanSurrogates(fStringBuffer2);
1493:                            } else if (XMLChar.isInvalid(c)) {
1494:                                String key = scanningTextDecl ? "jsp.error.xml.invalidCharInTextDecl"
1495:                                        : "jsp.error.xml.invalidCharInXMLDecl";
1496:                                reportFatalError(key, Integer.toString(c, 16));
1497:                                scanChar();
1498:                            }
1499:                        }
1500:                        c = scanLiteral(quote, value);
1501:                    } while (c != quote);
1502:                    fStringBuffer2.append(value);
1503:                    value.setValues(fStringBuffer2);
1504:                }
1505:                if (!skipChar(quote)) {
1506:                    reportFatalError(
1507:                            scanningTextDecl ? "jsp.error.xml.closeQuoteMissingInTextDecl"
1508:                                    : "jsp.error.xml.closeQuoteMissingInXMLDecl",
1509:                            name);
1510:                }
1511:
1512:                // return
1513:                return name;
1514:
1515:            }
1516:
1517:            // Adapted from:
1518:            // org.apache.xerces.impl.XMLScanner.scanPIData
1519:            /**
1520:             * Scans a processing data. This is needed to handle the situation
1521:             * where a document starts with a processing instruction whose 
1522:             * target name <em>starts with</em> "xml". (e.g. xmlfoo)
1523:             *
1524:             * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1525:             * at the time of calling is lost.
1526:             *
1527:             * @param target The PI target
1528:             * @param data The string to fill in with the data
1529:             */
1530:            private void scanPIData(String target, XMLString data)
1531:                    throws IOException, JasperException {
1532:
1533:                // check target
1534:                if (target.length() == 3) {
1535:                    char c0 = Character.toLowerCase(target.charAt(0));
1536:                    char c1 = Character.toLowerCase(target.charAt(1));
1537:                    char c2 = Character.toLowerCase(target.charAt(2));
1538:                    if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
1539:                        err.jspError("jsp.error.xml.reservedPITarget");
1540:                    }
1541:                }
1542:
1543:                // spaces
1544:                if (!skipSpaces()) {
1545:                    if (skipString("?>")) {
1546:                        // we found the end, there is no data
1547:                        data.clear();
1548:                        return;
1549:                    } else {
1550:                        // if there is data there should be some space
1551:                        err.jspError("jsp.error.xml.spaceRequiredInPI");
1552:                    }
1553:                }
1554:
1555:                fStringBuffer.clear();
1556:                // data
1557:                if (scanData("?>", fStringBuffer)) {
1558:                    do {
1559:                        int c = peekChar();
1560:                        if (c != -1) {
1561:                            if (XMLChar.isHighSurrogate(c)) {
1562:                                scanSurrogates(fStringBuffer);
1563:                            } else if (XMLChar.isInvalid(c)) {
1564:                                err.jspError("jsp.error.xml.invalidCharInPI",
1565:                                        Integer.toHexString(c));
1566:                                scanChar();
1567:                            }
1568:                        }
1569:                    } while (scanData("?>", fStringBuffer));
1570:                }
1571:                data.setValues(fStringBuffer);
1572:
1573:            }
1574:
1575:            // Adapted from:
1576:            // org.apache.xerces.impl.XMLScanner.scanSurrogates
1577:            /**
1578:             * Scans surrogates and append them to the specified buffer.
1579:             * <p>
1580:             * <strong>Note:</strong> This assumes the current char has already been
1581:             * identified as a high surrogate.
1582:             *
1583:             * @param buf The StringBuffer to append the read surrogates to.
1584:             * @returns True if it succeeded.
1585:             */
1586:            private boolean scanSurrogates(XMLStringBuffer buf)
1587:                    throws IOException, JasperException {
1588:
1589:                int high = scanChar();
1590:                int low = peekChar();
1591:                if (!XMLChar.isLowSurrogate(low)) {
1592:                    err.jspError("jsp.error.xml.invalidCharInContent", Integer
1593:                            .toString(high, 16));
1594:                    return false;
1595:                }
1596:                scanChar();
1597:
1598:                // convert surrogates to supplemental character
1599:                int c = XMLChar.supplemental((char) high, (char) low);
1600:
1601:                // supplemental character must be a valid XML character
1602:                if (!XMLChar.isValid(c)) {
1603:                    err.jspError("jsp.error.xml.invalidCharInContent", Integer
1604:                            .toString(c, 16));
1605:                    return false;
1606:                }
1607:
1608:                // fill in the buffer
1609:                buf.append((char) high);
1610:                buf.append((char) low);
1611:
1612:                return true;
1613:
1614:            }
1615:
1616:            // Adapted from:
1617:            // org.apache.xerces.impl.XMLScanner.reportFatalError
1618:            /**
1619:             * Convenience function used in all XML scanners.
1620:             */
1621:            private void reportFatalError(String msgId, String arg)
1622:                    throws JasperException {
1623:                err.jspError(msgId, arg);
1624:            }
1625:
1626:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.