Source Code Cross Referenced for XMLScanner.java in  » XML » xerces-2_9_1 » org » apache » xerces » impl » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » XML » xerces 2_9_1 » org.apache.xerces.impl 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         * Licensed to the Apache Software Foundation (ASF) under one or more
0003:         * contributor license agreements.  See the NOTICE file distributed with
0004:         * this work for additional information regarding copyright ownership.
0005:         * The ASF licenses this file to You under the Apache License, Version 2.0
0006:         * (the "License"); you may not use this file except in compliance with
0007:         * the License.  You may obtain a copy of the License at
0008:         * 
0009:         *      http://www.apache.org/licenses/LICENSE-2.0
0010:         * 
0011:         * Unless required by applicable law or agreed to in writing, software
0012:         * distributed under the License is distributed on an "AS IS" BASIS,
0013:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014:         * See the License for the specific language governing permissions and
0015:         * limitations under the License.
0016:         */
0017:
0018:        package org.apache.xerces.impl;
0019:
0020:        import java.io.IOException;
0021:
0022:        import org.apache.xerces.impl.msg.XMLMessageFormatter;
0023:        import org.apache.xerces.util.SymbolTable;
0024:        import org.apache.xerces.util.XMLChar;
0025:        import org.apache.xerces.util.XMLResourceIdentifierImpl;
0026:        import org.apache.xerces.util.XMLStringBuffer;
0027:        import org.apache.xerces.xni.Augmentations;
0028:        import org.apache.xerces.xni.XMLResourceIdentifier;
0029:        import org.apache.xerces.xni.XMLString;
0030:        import org.apache.xerces.xni.XNIException;
0031:        import org.apache.xerces.xni.parser.XMLComponent;
0032:        import org.apache.xerces.xni.parser.XMLComponentManager;
0033:        import org.apache.xerces.xni.parser.XMLConfigurationException;
0034:
0035:        /**
0036:         * This class is responsible for holding scanning methods common to
0037:         * scanning the XML document structure and content as well as the DTD
0038:         * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit
0039:         * from this base class.
0040:         *
0041:         * <p>
0042:         * This component requires the following features and properties from the
0043:         * component manager that uses it:
0044:         * <ul>
0045:         *  <li>http://xml.org/sax/features/validation</li> 
0046:         *  <li>http://xml.org/sax/features/namespaces</li>
0047:         *  <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
0048:         *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
0049:         *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
0050:         *  <li>http://apache.org/xml/properties/internal/entity-manager</li>
0051:         * </ul>
0052:         * 
0053:         * @xerces.internal
0054:         *
0055:         * @author Andy Clark, IBM
0056:         * @author Arnaud  Le Hors, IBM
0057:         * @author Eric Ye, IBM
0058:         *
0059:         * @version $Id: XMLScanner.java 572055 2007-09-02 17:55:43Z mrglavas $
0060:         */
0061:        public abstract class XMLScanner implements  XMLComponent {
0062:
0063:            //
0064:            // Constants
0065:            //
0066:
0067:            // feature identifiers
0068:
0069:            /** Feature identifier: validation. */
0070:            protected static final String VALIDATION = Constants.SAX_FEATURE_PREFIX
0071:                    + Constants.VALIDATION_FEATURE;
0072:
0073:            /** Feature identifier: namespaces. */
0074:            protected static final String NAMESPACES = Constants.SAX_FEATURE_PREFIX
0075:                    + Constants.NAMESPACES_FEATURE;
0076:
0077:            /** Feature identifier: notify character references. */
0078:            protected static final String NOTIFY_CHAR_REFS = Constants.XERCES_FEATURE_PREFIX
0079:                    + Constants.NOTIFY_CHAR_REFS_FEATURE;
0080:
0081:            protected static final String PARSER_SETTINGS = Constants.XERCES_FEATURE_PREFIX
0082:                    + Constants.PARSER_SETTINGS;
0083:
0084:            // property identifiers
0085:
0086:            /** Property identifier: symbol table. */
0087:            protected static final String SYMBOL_TABLE = Constants.XERCES_PROPERTY_PREFIX
0088:                    + Constants.SYMBOL_TABLE_PROPERTY;
0089:
0090:            /** Property identifier: error reporter. */
0091:            protected static final String ERROR_REPORTER = Constants.XERCES_PROPERTY_PREFIX
0092:                    + Constants.ERROR_REPORTER_PROPERTY;
0093:
0094:            /** Property identifier: entity manager. */
0095:            protected static final String ENTITY_MANAGER = Constants.XERCES_PROPERTY_PREFIX
0096:                    + Constants.ENTITY_MANAGER_PROPERTY;
0097:
0098:            // debugging
0099:
0100:            /** Debug attribute normalization. */
0101:            protected static final boolean DEBUG_ATTR_NORMALIZATION = false;
0102:
0103:            //
0104:            // Data
0105:            //
0106:
0107:            // features
0108:
0109:            /** 
0110:             * Validation. This feature identifier is:
0111:             * http://xml.org/sax/features/validation
0112:             */
0113:            protected boolean fValidation = false;
0114:
0115:            /** Namespaces. */
0116:            protected boolean fNamespaces;
0117:
0118:            /** Character references notification. */
0119:            protected boolean fNotifyCharRefs = false;
0120:
0121:            /** Internal parser-settings feature */
0122:            protected boolean fParserSettings = true;
0123:
0124:            // properties
0125:
0126:            /** Symbol table. */
0127:            protected SymbolTable fSymbolTable;
0128:
0129:            /** Error reporter. */
0130:            protected XMLErrorReporter fErrorReporter;
0131:
0132:            /** Entity manager. */
0133:            protected XMLEntityManager fEntityManager;
0134:
0135:            // protected data
0136:
0137:            /** Entity scanner. */
0138:            protected XMLEntityScanner fEntityScanner;
0139:
0140:            /** Entity depth. */
0141:            protected int fEntityDepth;
0142:
0143:            /** Literal value of the last character refence scanned. */
0144:            protected String fCharRefLiteral = null;
0145:
0146:            /** Scanning attribute. */
0147:            protected boolean fScanningAttribute;
0148:
0149:            /** Report entity boundary. */
0150:            protected boolean fReportEntity;
0151:
0152:            // symbols
0153:
0154:            /** Symbol: "version". */
0155:            protected final static String fVersionSymbol = "version".intern();
0156:
0157:            /** Symbol: "encoding". */
0158:            protected final static String fEncodingSymbol = "encoding".intern();
0159:
0160:            /** Symbol: "standalone". */
0161:            protected final static String fStandaloneSymbol = "standalone"
0162:                    .intern();
0163:
0164:            /** Symbol: "amp". */
0165:            protected final static String fAmpSymbol = "amp".intern();
0166:
0167:            /** Symbol: "lt". */
0168:            protected final static String fLtSymbol = "lt".intern();
0169:
0170:            /** Symbol: "gt". */
0171:            protected final static String fGtSymbol = "gt".intern();
0172:
0173:            /** Symbol: "quot". */
0174:            protected final static String fQuotSymbol = "quot".intern();
0175:
0176:            /** Symbol: "apos". */
0177:            protected final static String fAposSymbol = "apos".intern();
0178:
0179:            // temporary variables
0180:
0181:            // NOTE: These objects are private to help prevent accidental modification
0182:            //       of values by a subclass. If there were protected *and* the sub-
0183:            //       modified the values, it would be difficult to track down the real
0184:            //       cause of the bug. By making these private, we avoid this 
0185:            //       possibility.
0186:
0187:            /** String. */
0188:            private final XMLString fString = new XMLString();
0189:
0190:            /** String buffer. */
0191:            private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
0192:
0193:            /** String buffer. */
0194:            private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
0195:
0196:            /** String buffer. */
0197:            private final XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
0198:
0199:            // temporary location for Resource identification information.
0200:            protected final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
0201:
0202:            //
0203:            // XMLComponent methods
0204:            //
0205:
0206:            /**
0207:             * 
0208:             * 
0209:             * @param componentManager The component manager.
0210:             *
0211:             * @throws SAXException Throws exception if required features and
0212:             *                      properties cannot be found.
0213:             */
0214:            public void reset(XMLComponentManager componentManager)
0215:                    throws XMLConfigurationException {
0216:
0217:                try {
0218:                    fParserSettings = componentManager
0219:                            .getFeature(PARSER_SETTINGS);
0220:                } catch (XMLConfigurationException e) {
0221:                    fParserSettings = true;
0222:                }
0223:
0224:                if (!fParserSettings) {
0225:                    // parser settings have not been changed
0226:                    init();
0227:                    return;
0228:                }
0229:
0230:                // Xerces properties
0231:                fSymbolTable = (SymbolTable) componentManager
0232:                        .getProperty(SYMBOL_TABLE);
0233:                fErrorReporter = (XMLErrorReporter) componentManager
0234:                        .getProperty(ERROR_REPORTER);
0235:                fEntityManager = (XMLEntityManager) componentManager
0236:                        .getProperty(ENTITY_MANAGER);
0237:
0238:                // sax features
0239:                try {
0240:                    fValidation = componentManager.getFeature(VALIDATION);
0241:                } catch (XMLConfigurationException e) {
0242:                    fValidation = false;
0243:                }
0244:                try {
0245:                    fNamespaces = componentManager.getFeature(NAMESPACES);
0246:                } catch (XMLConfigurationException e) {
0247:                    fNamespaces = true;
0248:                }
0249:                try {
0250:                    fNotifyCharRefs = componentManager
0251:                            .getFeature(NOTIFY_CHAR_REFS);
0252:                } catch (XMLConfigurationException e) {
0253:                    fNotifyCharRefs = false;
0254:                }
0255:
0256:                init();
0257:
0258:            } // reset(XMLComponentManager)
0259:
0260:            /**
0261:             * Sets the value of a property during parsing.
0262:             * 
0263:             * @param propertyId 
0264:             * @param value 
0265:             */
0266:            public void setProperty(String propertyId, Object value)
0267:                    throws XMLConfigurationException {
0268:
0269:                // Xerces properties
0270:                if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
0271:                    final int suffixLength = propertyId.length()
0272:                            - Constants.XERCES_PROPERTY_PREFIX.length();
0273:
0274:                    if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY
0275:                            .length()
0276:                            && propertyId
0277:                                    .endsWith(Constants.SYMBOL_TABLE_PROPERTY)) {
0278:                        fSymbolTable = (SymbolTable) value;
0279:                    } else if (suffixLength == Constants.ERROR_REPORTER_PROPERTY
0280:                            .length()
0281:                            && propertyId
0282:                                    .endsWith(Constants.ERROR_REPORTER_PROPERTY)) {
0283:                        fErrorReporter = (XMLErrorReporter) value;
0284:                    } else if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY
0285:                            .length()
0286:                            && propertyId
0287:                                    .endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
0288:                        fEntityManager = (XMLEntityManager) value;
0289:                    }
0290:                }
0291:
0292:            } // setProperty(String,Object)
0293:
0294:            /*
0295:             * Sets the feature of the scanner.
0296:             */
0297:            public void setFeature(String featureId, boolean value)
0298:                    throws XMLConfigurationException {
0299:
0300:                if (VALIDATION.equals(featureId)) {
0301:                    fValidation = value;
0302:                } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
0303:                    fNotifyCharRefs = value;
0304:                }
0305:            }
0306:
0307:            /*
0308:             * Gets the state of the feature of the scanner.
0309:             */
0310:            public boolean getFeature(String featureId)
0311:                    throws XMLConfigurationException {
0312:
0313:                if (VALIDATION.equals(featureId)) {
0314:                    return fValidation;
0315:                } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
0316:                    return fNotifyCharRefs;
0317:                }
0318:                throw new XMLConfigurationException(
0319:                        XMLConfigurationException.NOT_RECOGNIZED, featureId);
0320:            }
0321:
0322:            //
0323:            // Protected methods
0324:            //
0325:
0326:            // anybody calling this had better have set Symtoltable!
0327:            protected void reset() {
0328:                init();
0329:
0330:                // DTD preparsing defaults:
0331:                fValidation = true;
0332:                fNotifyCharRefs = false;
0333:
0334:            }
0335:
0336:            // common scanning methods
0337:
0338:            /**
0339:             * Scans an XML or text declaration.
0340:             * <p>
0341:             * <pre>
0342:             * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
0343:             * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
0344:             * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
0345:             * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
0346:             * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
0347:             *                 | ('"' ('yes' | 'no') '"'))
0348:             *
0349:             * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
0350:             * </pre>
0351:             *
0352:             * @param scanningTextDecl True if a text declaration is to
0353:             *                         be scanned instead of an XML
0354:             *                         declaration.
0355:             * @param pseudoAttributeValues An array of size 3 to return the version,
0356:             *                         encoding and standalone pseudo attribute values
0357:             *                         (in that order).
0358:             *
0359:             * <strong>Note:</strong> This method uses fString, anything in it
0360:             * at the time of calling is lost.
0361:             */
0362:            protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
0363:                    String[] pseudoAttributeValues) throws IOException,
0364:                    XNIException {
0365:
0366:                // pseudo-attribute values
0367:                String version = null;
0368:                String encoding = null;
0369:                String standalone = null;
0370:
0371:                // scan pseudo-attributes
0372:                final int STATE_VERSION = 0;
0373:                final int STATE_ENCODING = 1;
0374:                final int STATE_STANDALONE = 2;
0375:                final int STATE_DONE = 3;
0376:                int state = STATE_VERSION;
0377:
0378:                boolean dataFoundForTarget = false;
0379:                boolean sawSpace = fEntityScanner.skipDeclSpaces();
0380:                // since pseudoattributes are *not* attributes,
0381:                // their quotes don't need to be preserved in external parameter entities.
0382:                // the XMLEntityScanner#scanLiteral method will continue to
0383:                // emit -1 in such cases when it finds a quote; this is
0384:                // fine for other methods that parse scanned entities,
0385:                // but not for the scanning of pseudoattributes.  So,
0386:                // temporarily, we must mark the current entity as not being "literal"
0387:                XMLEntityManager.ScannedEntity currEnt = fEntityManager
0388:                        .getCurrentEntity();
0389:                boolean currLiteral = currEnt.literal;
0390:                currEnt.literal = false;
0391:                while (fEntityScanner.peekChar() != '?') {
0392:                    dataFoundForTarget = true;
0393:                    String name = scanPseudoAttribute(scanningTextDecl, fString);
0394:                    switch (state) {
0395:                    case STATE_VERSION: {
0396:                        if (name == fVersionSymbol) {
0397:                            if (!sawSpace) {
0398:                                reportFatalError(
0399:                                        scanningTextDecl ? "SpaceRequiredBeforeVersionInTextDecl"
0400:                                                : "SpaceRequiredBeforeVersionInXMLDecl",
0401:                                        null);
0402:                            }
0403:                            version = fString.toString();
0404:                            state = STATE_ENCODING;
0405:                            if (!versionSupported(version)) {
0406:                                reportFatalError(getVersionNotSupportedKey(),
0407:                                        new Object[] { version });
0408:                            }
0409:                        } else if (name == fEncodingSymbol) {
0410:                            if (!scanningTextDecl) {
0411:                                reportFatalError("VersionInfoRequired", null);
0412:                            }
0413:                            if (!sawSpace) {
0414:                                reportFatalError(
0415:                                        scanningTextDecl ? "SpaceRequiredBeforeEncodingInTextDecl"
0416:                                                : "SpaceRequiredBeforeEncodingInXMLDecl",
0417:                                        null);
0418:                            }
0419:                            encoding = fString.toString();
0420:                            state = scanningTextDecl ? STATE_DONE
0421:                                    : STATE_STANDALONE;
0422:                        } else {
0423:                            if (scanningTextDecl) {
0424:                                reportFatalError("EncodingDeclRequired", null);
0425:                            } else {
0426:                                reportFatalError("VersionInfoRequired", null);
0427:                            }
0428:                        }
0429:                        break;
0430:                    }
0431:                    case STATE_ENCODING: {
0432:                        if (name == fEncodingSymbol) {
0433:                            if (!sawSpace) {
0434:                                reportFatalError(
0435:                                        scanningTextDecl ? "SpaceRequiredBeforeEncodingInTextDecl"
0436:                                                : "SpaceRequiredBeforeEncodingInXMLDecl",
0437:                                        null);
0438:                            }
0439:                            encoding = fString.toString();
0440:                            state = scanningTextDecl ? STATE_DONE
0441:                                    : STATE_STANDALONE;
0442:                            // TODO: check encoding name; set encoding on
0443:                            //       entity scanner
0444:                        } else if (!scanningTextDecl
0445:                                && name == fStandaloneSymbol) {
0446:                            if (!sawSpace) {
0447:                                reportFatalError(
0448:                                        "SpaceRequiredBeforeStandalone", null);
0449:                            }
0450:                            standalone = fString.toString();
0451:                            state = STATE_DONE;
0452:                            if (!standalone.equals("yes")
0453:                                    && !standalone.equals("no")) {
0454:                                reportFatalError("SDDeclInvalid",
0455:                                        new Object[] { standalone });
0456:                            }
0457:                        } else {
0458:                            reportFatalError("EncodingDeclRequired", null);
0459:                        }
0460:                        break;
0461:                    }
0462:                    case STATE_STANDALONE: {
0463:                        if (name == fStandaloneSymbol) {
0464:                            if (!sawSpace) {
0465:                                reportFatalError(
0466:                                        "SpaceRequiredBeforeStandalone", null);
0467:                            }
0468:                            standalone = fString.toString();
0469:                            state = STATE_DONE;
0470:                            if (!standalone.equals("yes")
0471:                                    && !standalone.equals("no")) {
0472:                                reportFatalError("SDDeclInvalid",
0473:                                        new Object[] { standalone });
0474:                            }
0475:                        } else {
0476:                            reportFatalError("EncodingDeclRequired", null);
0477:                        }
0478:                        break;
0479:                    }
0480:                    default: {
0481:                        reportFatalError("NoMorePseudoAttributes", null);
0482:                    }
0483:                    }
0484:                    sawSpace = fEntityScanner.skipDeclSpaces();
0485:                }
0486:                // restore original literal value
0487:                if (currLiteral)
0488:                    currEnt.literal = true;
0489:                // REVISIT: should we remove this error reporting?
0490:                if (scanningTextDecl && state != STATE_DONE) {
0491:                    reportFatalError("MorePseudoAttributes", null);
0492:                }
0493:
0494:                // If there is no data in the xml or text decl then we fail to report error 
0495:                // for version or encoding info above.
0496:                if (scanningTextDecl) {
0497:                    if (!dataFoundForTarget && encoding == null) {
0498:                        reportFatalError("EncodingDeclRequired", null);
0499:                    }
0500:                } else {
0501:                    if (!dataFoundForTarget && version == null) {
0502:                        reportFatalError("VersionInfoRequired", null);
0503:                    }
0504:                }
0505:
0506:                // end
0507:                if (!fEntityScanner.skipChar('?')) {
0508:                    reportFatalError("XMLDeclUnterminated", null);
0509:                }
0510:                if (!fEntityScanner.skipChar('>')) {
0511:                    reportFatalError("XMLDeclUnterminated", null);
0512:
0513:                }
0514:
0515:                // fill in return array
0516:                pseudoAttributeValues[0] = version;
0517:                pseudoAttributeValues[1] = encoding;
0518:                pseudoAttributeValues[2] = standalone;
0519:
0520:            } // scanXMLDeclOrTextDecl(boolean)
0521:
0522:            /**
0523:             * Scans a pseudo attribute.
0524:             *
0525:             * @param scanningTextDecl True if scanning this pseudo-attribute for a
0526:             *                         TextDecl; false if scanning XMLDecl. This 
0527:             *                         flag is needed to report the correct type of
0528:             *                         error.
0529:             * @param value            The string to fill in with the attribute 
0530:             *                         value.
0531:             *
0532:             * @return The name of the attribute
0533:             *
0534:             * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
0535:             * at the time of calling is lost.
0536:             */
0537:            public String scanPseudoAttribute(boolean scanningTextDecl,
0538:                    XMLString value) throws IOException, XNIException {
0539:
0540:                // REVISIT: This method is used for generic scanning of 
0541:                // pseudo attributes, but since there are only three such
0542:                // attributes: version, encoding, and standalone there are
0543:                // for performant ways of scanning them. Every decl must
0544:                // have a version, and in TextDecls this version must
0545:                // be followed by an encoding declaration. Also the
0546:                // methods we invoke on the scanners allow non-ASCII
0547:                // characters to be parsed in the decls, but since
0548:                // we don't even know what the actual encoding of the
0549:                // document is until we scan the encoding declaration
0550:                // you cannot reliably read any characters outside
0551:                // of the ASCII range here. -- mrglavas
0552:                String name = fEntityScanner.scanName();
0553:                XMLEntityManager.print(fEntityManager.getCurrentEntity());
0554:                if (name == null) {
0555:                    reportFatalError("PseudoAttrNameExpected", null);
0556:                }
0557:                fEntityScanner.skipDeclSpaces();
0558:                if (!fEntityScanner.skipChar('=')) {
0559:                    reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
0560:                            : "EqRequiredInXMLDecl", new Object[] { name });
0561:                }
0562:                fEntityScanner.skipDeclSpaces();
0563:                int quote = fEntityScanner.peekChar();
0564:                if (quote != '\'' && quote != '"') {
0565:                    reportFatalError(
0566:                            scanningTextDecl ? "QuoteRequiredInTextDecl"
0567:                                    : "QuoteRequiredInXMLDecl",
0568:                            new Object[] { name });
0569:                }
0570:                fEntityScanner.scanChar();
0571:                int c = fEntityScanner.scanLiteral(quote, value);
0572:                if (c != quote) {
0573:                    fStringBuffer2.clear();
0574:                    do {
0575:                        fStringBuffer2.append(value);
0576:                        if (c != -1) {
0577:                            if (c == '&' || c == '%' || c == '<' || c == ']') {
0578:                                fStringBuffer2.append((char) fEntityScanner
0579:                                        .scanChar());
0580:                            }
0581:                            // REVISIT: Even if you could reliably read non-ASCII chars
0582:                            // why bother scanning for surrogates here? Only ASCII chars
0583:                            // match the productions in XMLDecls and TextDecls. -- mrglavas
0584:                            else if (XMLChar.isHighSurrogate(c)) {
0585:                                scanSurrogates(fStringBuffer2);
0586:                            } else if (isInvalidLiteral(c)) {
0587:                                String key = scanningTextDecl ? "InvalidCharInTextDecl"
0588:                                        : "InvalidCharInXMLDecl";
0589:                                reportFatalError(key, new Object[] { Integer
0590:                                        .toString(c, 16) });
0591:                                fEntityScanner.scanChar();
0592:                            }
0593:                        }
0594:                        c = fEntityScanner.scanLiteral(quote, value);
0595:                    } while (c != quote);
0596:                    fStringBuffer2.append(value);
0597:                    value.setValues(fStringBuffer2);
0598:                }
0599:                if (!fEntityScanner.skipChar(quote)) {
0600:                    reportFatalError(
0601:                            scanningTextDecl ? "CloseQuoteMissingInTextDecl"
0602:                                    : "CloseQuoteMissingInXMLDecl",
0603:                            new Object[] { name });
0604:                }
0605:
0606:                // return
0607:                return name;
0608:
0609:            } // scanPseudoAttribute(XMLString):String
0610:
0611:            /**
0612:             * Scans a processing instruction.
0613:             * <p>
0614:             * <pre>
0615:             * [16] PI ::= '&lt;?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
0616:             * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
0617:             * </pre>
0618:             * <strong>Note:</strong> This method uses fString, anything in it
0619:             * at the time of calling is lost.
0620:             */
0621:            protected void scanPI() throws IOException, XNIException {
0622:
0623:                // target
0624:                fReportEntity = false;
0625:                String target = null;
0626:                if (fNamespaces) {
0627:                    target = fEntityScanner.scanNCName();
0628:                } else {
0629:                    target = fEntityScanner.scanName();
0630:                }
0631:                if (target == null) {
0632:                    reportFatalError("PITargetRequired", null);
0633:                }
0634:
0635:                // scan data
0636:                scanPIData(target, fString);
0637:                fReportEntity = true;
0638:
0639:            } // scanPI()
0640:
0641:            /**
0642:             * Scans a processing data. This is needed to handle the situation
0643:             * where a document starts with a processing instruction whose 
0644:             * target name <em>starts with</em> "xml". (e.g. xmlfoo)
0645:             *
0646:             * <strong>Note:</strong> This method uses fStringBuffer, anything in it
0647:             * at the time of calling is lost.
0648:             *
0649:             * @param target The PI target
0650:             * @param data The string to fill in with the data
0651:             */
0652:            protected void scanPIData(String target, XMLString data)
0653:                    throws IOException, XNIException {
0654:
0655:                // check target
0656:                if (target.length() == 3) {
0657:                    char c0 = Character.toLowerCase(target.charAt(0));
0658:                    char c1 = Character.toLowerCase(target.charAt(1));
0659:                    char c2 = Character.toLowerCase(target.charAt(2));
0660:                    if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
0661:                        reportFatalError("ReservedPITarget", null);
0662:                    }
0663:                }
0664:
0665:                // spaces
0666:                if (!fEntityScanner.skipSpaces()) {
0667:                    if (fEntityScanner.skipString("?>")) {
0668:                        // we found the end, there is no data
0669:                        data.clear();
0670:                        return;
0671:                    } else {
0672:                        if (fNamespaces && fEntityScanner.peekChar() == ':') {
0673:                            fEntityScanner.scanChar();
0674:                            XMLStringBuffer colonName = new XMLStringBuffer(
0675:                                    target);
0676:                            colonName.append(":");
0677:                            String str = fEntityScanner.scanName();
0678:                            if (str != null)
0679:                                colonName.append(str);
0680:                            reportFatalError("ColonNotLegalWithNS",
0681:                                    new Object[] { colonName.toString() });
0682:                            fEntityScanner.skipSpaces();
0683:                        } else {
0684:                            // if there is data there should be some space
0685:                            reportFatalError("SpaceRequiredInPI", null);
0686:                        }
0687:                    }
0688:                }
0689:
0690:                fStringBuffer.clear();
0691:                // data
0692:                if (fEntityScanner.scanData("?>", fStringBuffer)) {
0693:                    do {
0694:                        int c = fEntityScanner.peekChar();
0695:                        if (c != -1) {
0696:                            if (XMLChar.isHighSurrogate(c)) {
0697:                                scanSurrogates(fStringBuffer);
0698:                            } else if (isInvalidLiteral(c)) {
0699:                                reportFatalError("InvalidCharInPI",
0700:                                        new Object[] { Integer.toHexString(c) });
0701:                                fEntityScanner.scanChar();
0702:                            }
0703:                        }
0704:                    } while (fEntityScanner.scanData("?>", fStringBuffer));
0705:                }
0706:                data.setValues(fStringBuffer);
0707:
0708:            } // scanPIData(String,XMLString)
0709:
0710:            /**
0711:             * Scans a comment.
0712:             * <p>
0713:             * <pre>
0714:             * [15] Comment ::= '&lt!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
0715:             * </pre>
0716:             * <p>
0717:             * <strong>Note:</strong> Called after scanning past '&lt;!--'
0718:             * <strong>Note:</strong> This method uses fString, anything in it
0719:             * at the time of calling is lost.
0720:             *
0721:             * @param text The buffer to fill in with the text.
0722:             */
0723:            protected void scanComment(XMLStringBuffer text)
0724:                    throws IOException, XNIException {
0725:
0726:                // text
0727:                // REVISIT: handle invalid character, eof
0728:                text.clear();
0729:                while (fEntityScanner.scanData("--", text)) {
0730:                    int c = fEntityScanner.peekChar();
0731:                    if (c != -1) {
0732:                        if (XMLChar.isHighSurrogate(c)) {
0733:                            scanSurrogates(text);
0734:                        } else if (isInvalidLiteral(c)) {
0735:                            reportFatalError("InvalidCharInComment",
0736:                                    new Object[] { Integer.toHexString(c) });
0737:                            fEntityScanner.scanChar();
0738:                        }
0739:                    }
0740:                }
0741:                if (!fEntityScanner.skipChar('>')) {
0742:                    reportFatalError("DashDashInComment", null);
0743:                }
0744:
0745:            } // scanComment()
0746:
0747:            /**
0748:             * Scans an attribute value and normalizes whitespace converting all
0749:             * whitespace characters to space characters.
0750:             * 
0751:             * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
0752:             *
0753:             * @param value The XMLString to fill in with the value.
0754:             * @param nonNormalizedValue The XMLString to fill in with the 
0755:             *                           non-normalized value.
0756:             * @param atName The name of the attribute being parsed (for error msgs).
0757:             * @param checkEntities true if undeclared entities should be reported as VC violation,  
0758:             *                      false if undeclared entities should be reported as WFC violation.
0759:             * @param eleName The name of element to which this attribute belongs.
0760:             *
0761:             * @return true if the non-normalized and normalized value are the same
0762:             * 
0763:             * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
0764:             * at the time of calling is lost.
0765:             **/
0766:            protected boolean scanAttributeValue(XMLString value,
0767:                    XMLString nonNormalizedValue, String atName,
0768:                    boolean checkEntities, String eleName) throws IOException,
0769:                    XNIException {
0770:                // quote
0771:                int quote = fEntityScanner.peekChar();
0772:                if (quote != '\'' && quote != '"') {
0773:                    reportFatalError("OpenQuoteExpected", new Object[] {
0774:                            eleName, atName });
0775:                }
0776:
0777:                fEntityScanner.scanChar();
0778:                int entityDepth = fEntityDepth;
0779:
0780:                int c = fEntityScanner.scanLiteral(quote, value);
0781:                if (DEBUG_ATTR_NORMALIZATION) {
0782:                    System.out.println("** scanLiteral -> \""
0783:                            + value.toString() + "\"");
0784:                }
0785:
0786:                int fromIndex = 0;
0787:                if (c == quote
0788:                        && (fromIndex = isUnchangedByNormalization(value)) == -1) {
0789:                    /** Both the non-normalized and normalized attribute values are equal. **/
0790:                    nonNormalizedValue.setValues(value);
0791:                    int cquote = fEntityScanner.scanChar();
0792:                    if (cquote != quote) {
0793:                        reportFatalError("CloseQuoteExpected", new Object[] {
0794:                                eleName, atName });
0795:                    }
0796:                    return true;
0797:                }
0798:                fStringBuffer2.clear();
0799:                fStringBuffer2.append(value);
0800:                normalizeWhitespace(value, fromIndex);
0801:                if (DEBUG_ATTR_NORMALIZATION) {
0802:                    System.out.println("** normalizeWhitespace -> \""
0803:                            + value.toString() + "\"");
0804:                }
0805:                if (c != quote) {
0806:                    fScanningAttribute = true;
0807:                    fStringBuffer.clear();
0808:                    do {
0809:                        fStringBuffer.append(value);
0810:                        if (DEBUG_ATTR_NORMALIZATION) {
0811:                            System.out.println("** value2: \""
0812:                                    + fStringBuffer.toString() + "\"");
0813:                        }
0814:                        if (c == '&') {
0815:                            fEntityScanner.skipChar('&');
0816:                            if (entityDepth == fEntityDepth) {
0817:                                fStringBuffer2.append('&');
0818:                            }
0819:                            if (fEntityScanner.skipChar('#')) {
0820:                                if (entityDepth == fEntityDepth) {
0821:                                    fStringBuffer2.append('#');
0822:                                }
0823:                                int ch = scanCharReferenceValue(fStringBuffer,
0824:                                        fStringBuffer2);
0825:                                if (ch != -1) {
0826:                                    if (DEBUG_ATTR_NORMALIZATION) {
0827:                                        System.out.println("** value3: \""
0828:                                                + fStringBuffer.toString()
0829:                                                + "\"");
0830:                                    }
0831:                                }
0832:                            } else {
0833:                                String entityName = fEntityScanner.scanName();
0834:                                if (entityName == null) {
0835:                                    reportFatalError("NameRequiredInReference",
0836:                                            null);
0837:                                } else if (entityDepth == fEntityDepth) {
0838:                                    fStringBuffer2.append(entityName);
0839:                                }
0840:                                if (!fEntityScanner.skipChar(';')) {
0841:                                    reportFatalError(
0842:                                            "SemicolonRequiredInReference",
0843:                                            new Object[] { entityName });
0844:                                } else if (entityDepth == fEntityDepth) {
0845:                                    fStringBuffer2.append(';');
0846:                                }
0847:                                if (entityName == fAmpSymbol) {
0848:                                    fStringBuffer.append('&');
0849:                                    if (DEBUG_ATTR_NORMALIZATION) {
0850:                                        System.out.println("** value5: \""
0851:                                                + fStringBuffer.toString()
0852:                                                + "\"");
0853:                                    }
0854:                                } else if (entityName == fAposSymbol) {
0855:                                    fStringBuffer.append('\'');
0856:                                    if (DEBUG_ATTR_NORMALIZATION) {
0857:                                        System.out.println("** value7: \""
0858:                                                + fStringBuffer.toString()
0859:                                                + "\"");
0860:                                    }
0861:                                } else if (entityName == fLtSymbol) {
0862:                                    fStringBuffer.append('<');
0863:                                    if (DEBUG_ATTR_NORMALIZATION) {
0864:                                        System.out.println("** value9: \""
0865:                                                + fStringBuffer.toString()
0866:                                                + "\"");
0867:                                    }
0868:                                } else if (entityName == fGtSymbol) {
0869:                                    fStringBuffer.append('>');
0870:                                    if (DEBUG_ATTR_NORMALIZATION) {
0871:                                        System.out.println("** valueB: \""
0872:                                                + fStringBuffer.toString()
0873:                                                + "\"");
0874:                                    }
0875:                                } else if (entityName == fQuotSymbol) {
0876:                                    fStringBuffer.append('"');
0877:                                    if (DEBUG_ATTR_NORMALIZATION) {
0878:                                        System.out.println("** valueD: \""
0879:                                                + fStringBuffer.toString()
0880:                                                + "\"");
0881:                                    }
0882:                                } else {
0883:                                    if (fEntityManager
0884:                                            .isExternalEntity(entityName)) {
0885:                                        reportFatalError(
0886:                                                "ReferenceToExternalEntity",
0887:                                                new Object[] { entityName });
0888:                                    } else {
0889:                                        if (!fEntityManager
0890:                                                .isDeclaredEntity(entityName)) {
0891:                                            //WFC & VC: Entity Declared
0892:                                            if (checkEntities) {
0893:                                                if (fValidation) {
0894:                                                    fErrorReporter
0895:                                                            .reportError(
0896:                                                                    XMLMessageFormatter.XML_DOMAIN,
0897:                                                                    "EntityNotDeclared",
0898:                                                                    new Object[] { entityName },
0899:                                                                    XMLErrorReporter.SEVERITY_ERROR);
0900:                                                }
0901:                                            } else {
0902:                                                reportFatalError(
0903:                                                        "EntityNotDeclared",
0904:                                                        new Object[] { entityName });
0905:                                            }
0906:                                        }
0907:                                        fEntityManager.startEntity(entityName,
0908:                                                true);
0909:                                    }
0910:                                }
0911:                            }
0912:                        } else if (c == '<') {
0913:                            reportFatalError("LessthanInAttValue",
0914:                                    new Object[] { eleName, atName });
0915:                            fEntityScanner.scanChar();
0916:                            if (entityDepth == fEntityDepth) {
0917:                                fStringBuffer2.append((char) c);
0918:                            }
0919:                        } else if (c == '%' || c == ']') {
0920:                            fEntityScanner.scanChar();
0921:                            fStringBuffer.append((char) c);
0922:                            if (entityDepth == fEntityDepth) {
0923:                                fStringBuffer2.append((char) c);
0924:                            }
0925:                            if (DEBUG_ATTR_NORMALIZATION) {
0926:                                System.out.println("** valueF: \""
0927:                                        + fStringBuffer.toString() + "\"");
0928:                            }
0929:                        } else if (c == '\n' || c == '\r') {
0930:                            fEntityScanner.scanChar();
0931:                            fStringBuffer.append(' ');
0932:                            if (entityDepth == fEntityDepth) {
0933:                                fStringBuffer2.append('\n');
0934:                            }
0935:                        } else if (c != -1 && XMLChar.isHighSurrogate(c)) {
0936:                            fStringBuffer3.clear();
0937:                            if (scanSurrogates(fStringBuffer3)) {
0938:                                fStringBuffer.append(fStringBuffer3);
0939:                                if (entityDepth == fEntityDepth) {
0940:                                    fStringBuffer2.append(fStringBuffer3);
0941:                                }
0942:                                if (DEBUG_ATTR_NORMALIZATION) {
0943:                                    System.out.println("** valueI: \""
0944:                                            + fStringBuffer.toString() + "\"");
0945:                                }
0946:                            }
0947:                        } else if (c != -1 && isInvalidLiteral(c)) {
0948:                            reportFatalError("InvalidCharInAttValue",
0949:                                    new Object[] { eleName, atName,
0950:                                            Integer.toString(c, 16) });
0951:                            fEntityScanner.scanChar();
0952:                            if (entityDepth == fEntityDepth) {
0953:                                fStringBuffer2.append((char) c);
0954:                            }
0955:                        }
0956:                        c = fEntityScanner.scanLiteral(quote, value);
0957:                        if (entityDepth == fEntityDepth) {
0958:                            fStringBuffer2.append(value);
0959:                        }
0960:                        normalizeWhitespace(value);
0961:                    } while (c != quote || entityDepth != fEntityDepth);
0962:                    fStringBuffer.append(value);
0963:                    if (DEBUG_ATTR_NORMALIZATION) {
0964:                        System.out.println("** valueN: \""
0965:                                + fStringBuffer.toString() + "\"");
0966:                    }
0967:                    value.setValues(fStringBuffer);
0968:                    fScanningAttribute = false;
0969:                }
0970:                nonNormalizedValue.setValues(fStringBuffer2);
0971:
0972:                // quote
0973:                int cquote = fEntityScanner.scanChar();
0974:                if (cquote != quote) {
0975:                    reportFatalError("CloseQuoteExpected", new Object[] {
0976:                            eleName, atName });
0977:                }
0978:                return nonNormalizedValue.equals(value.ch, value.offset,
0979:                        value.length);
0980:
0981:            } // scanAttributeValue()
0982:
0983:            /**
0984:             * Scans External ID and return the public and system IDs.
0985:             *
0986:             * @param identifiers An array of size 2 to return the system id,
0987:             *                    and public id (in that order).
0988:             * @param optionalSystemId Specifies whether the system id is optional.
0989:             *
0990:             * <strong>Note:</strong> This method uses fString and fStringBuffer,
0991:             * anything in them at the time of calling is lost.
0992:             */
0993:            protected void scanExternalID(String[] identifiers,
0994:                    boolean optionalSystemId) throws IOException, XNIException {
0995:
0996:                String systemId = null;
0997:                String publicId = null;
0998:                if (fEntityScanner.skipString("PUBLIC")) {
0999:                    if (!fEntityScanner.skipSpaces()) {
1000:                        reportFatalError("SpaceRequiredAfterPUBLIC", null);
1001:                    }
1002:                    scanPubidLiteral(fString);
1003:                    publicId = fString.toString();
1004:
1005:                    if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
1006:                        reportFatalError("SpaceRequiredBetweenPublicAndSystem",
1007:                                null);
1008:                    }
1009:                }
1010:
1011:                if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
1012:                    if (publicId == null && !fEntityScanner.skipSpaces()) {
1013:                        reportFatalError("SpaceRequiredAfterSYSTEM", null);
1014:                    }
1015:                    int quote = fEntityScanner.peekChar();
1016:                    if (quote != '\'' && quote != '"') {
1017:                        if (publicId != null && optionalSystemId) {
1018:                            // looks like we don't have any system id
1019:                            // simply return the public id
1020:                            identifiers[0] = null;
1021:                            identifiers[1] = publicId;
1022:                            return;
1023:                        }
1024:                        reportFatalError("QuoteRequiredInSystemID", null);
1025:                    }
1026:                    fEntityScanner.scanChar();
1027:                    XMLString ident = fString;
1028:                    if (fEntityScanner.scanLiteral(quote, ident) != quote) {
1029:                        fStringBuffer.clear();
1030:                        do {
1031:                            fStringBuffer.append(ident);
1032:                            int c = fEntityScanner.peekChar();
1033:                            if (XMLChar.isMarkup(c) || c == ']') {
1034:                                fStringBuffer.append((char) fEntityScanner
1035:                                        .scanChar());
1036:                            }
1037:                        } while (fEntityScanner.scanLiteral(quote, ident) != quote);
1038:                        fStringBuffer.append(ident);
1039:                        ident = fStringBuffer;
1040:                    }
1041:                    systemId = ident.toString();
1042:                    if (!fEntityScanner.skipChar(quote)) {
1043:                        reportFatalError("SystemIDUnterminated", null);
1044:                    }
1045:                }
1046:
1047:                // store result in array
1048:                identifiers[0] = systemId;
1049:                identifiers[1] = publicId;
1050:            }
1051:
1052:            /**
1053:             * Scans public ID literal.
1054:             *
1055:             * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 
1056:             * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
1057:             *
1058:             * The returned string is normalized according to the following rule,
1059:             * from http://www.w3.org/TR/REC-xml#dt-pubid:
1060:             *
1061:             * Before a match is attempted, all strings of white space in the public
1062:             * identifier must be normalized to single space characters (#x20), and
1063:             * leading and trailing white space must be removed.
1064:             *
1065:             * @param literal The string to fill in with the public ID literal.
1066:             * @return True on success.
1067:             *
1068:             * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
1069:             * the time of calling is lost.
1070:             */
1071:            protected boolean scanPubidLiteral(XMLString literal)
1072:                    throws IOException, XNIException {
1073:                int quote = fEntityScanner.scanChar();
1074:                if (quote != '\'' && quote != '"') {
1075:                    reportFatalError("QuoteRequiredInPublicID", null);
1076:                    return false;
1077:                }
1078:
1079:                fStringBuffer.clear();
1080:                // skip leading whitespace
1081:                boolean skipSpace = true;
1082:                boolean dataok = true;
1083:                while (true) {
1084:                    int c = fEntityScanner.scanChar();
1085:                    if (c == ' ' || c == '\n' || c == '\r') {
1086:                        if (!skipSpace) {
1087:                            // take the first whitespace as a space and skip the others
1088:                            fStringBuffer.append(' ');
1089:                            skipSpace = true;
1090:                        }
1091:                    } else if (c == quote) {
1092:                        if (skipSpace) {
1093:                            // if we finished on a space let's trim it
1094:                            fStringBuffer.length--;
1095:                        }
1096:                        literal.setValues(fStringBuffer);
1097:                        break;
1098:                    } else if (XMLChar.isPubid(c)) {
1099:                        fStringBuffer.append((char) c);
1100:                        skipSpace = false;
1101:                    } else if (c == -1) {
1102:                        reportFatalError("PublicIDUnterminated", null);
1103:                        return false;
1104:                    } else {
1105:                        dataok = false;
1106:                        reportFatalError("InvalidCharInPublicID",
1107:                                new Object[] { Integer.toHexString(c) });
1108:                    }
1109:                }
1110:                return dataok;
1111:            }
1112:
1113:            /**
1114:             * Normalize whitespace in an XMLString converting all whitespace
1115:             * characters to space characters.
1116:             */
1117:            protected void normalizeWhitespace(XMLString value) {
1118:                int end = value.offset + value.length;
1119:                for (int i = value.offset; i < end; ++i) {
1120:                    int c = value.ch[i];
1121:                    // Performance: For XML 1.0 documents take advantage of 
1122:                    // the fact that the only legal characters below 0x20 
1123:                    // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've 
1124:                    // already determined the well-formedness of these
1125:                    // characters it is sufficient (and safe) to check
1126:                    // against 0x20. -- mrglavas
1127:                    if (c < 0x20) {
1128:                        value.ch[i] = ' ';
1129:                    }
1130:                }
1131:            }
1132:
1133:            /**
1134:             * Normalize whitespace in an XMLString converting all whitespace
1135:             * characters to space characters.
1136:             */
1137:            protected void normalizeWhitespace(XMLString value, int fromIndex) {
1138:                int end = value.offset + value.length;
1139:                for (int i = value.offset + fromIndex; i < end; ++i) {
1140:                    int c = value.ch[i];
1141:                    // Performance: For XML 1.0 documents take advantage of 
1142:                    // the fact that the only legal characters below 0x20 
1143:                    // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've 
1144:                    // already determined the well-formedness of these
1145:                    // characters it is sufficient (and safe) to check
1146:                    // against 0x20. -- mrglavas
1147:                    if (c < 0x20) {
1148:                        value.ch[i] = ' ';
1149:                    }
1150:                }
1151:            }
1152:
1153:            /**
1154:             * Checks whether this string would be unchanged by normalization.
1155:             * 
1156:             * @return -1 if the value would be unchanged by normalization,
1157:             * otherwise the index of the first whitespace character which
1158:             * would be transformed.
1159:             */
1160:            protected int isUnchangedByNormalization(XMLString value) {
1161:                int end = value.offset + value.length;
1162:                for (int i = value.offset; i < end; ++i) {
1163:                    int c = value.ch[i];
1164:                    // Performance: For XML 1.0 documents take advantage of 
1165:                    // the fact that the only legal characters below 0x20 
1166:                    // are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've 
1167:                    // already determined the well-formedness of these
1168:                    // characters it is sufficient (and safe) to check
1169:                    // against 0x20. -- mrglavas
1170:                    if (c < 0x20) {
1171:                        return i - value.offset;
1172:                    }
1173:                }
1174:                return -1;
1175:            }
1176:
1177:            //
1178:            // XMLEntityHandler methods
1179:            //
1180:
1181:            /**
1182:             * This method notifies of the start of an entity. The document entity
1183:             * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 
1184:             * parameter entity names start with '%'; and general entities are just
1185:             * specified by their name.
1186:             * 
1187:             * @param name     The name of the entity.
1188:             * @param identifier The resource identifier.
1189:             * @param encoding The auto-detected IANA encoding name of the entity
1190:             *                 stream. This value will be null in those situations
1191:             *                 where the entity encoding is not auto-detected (e.g.
1192:             *                 internal entities or a document entity that is
1193:             *                 parsed from a java.io.Reader).
1194:             * @param augs     Additional information that may include infoset augmentations
1195:             *
1196:             * @throws XNIException Thrown by handler to signal an error.
1197:             */
1198:            public void startEntity(String name,
1199:                    XMLResourceIdentifier identifier, String encoding,
1200:                    Augmentations augs) throws XNIException {
1201:
1202:                // keep track of the entity depth
1203:                fEntityDepth++;
1204:                // must reset entity scanner
1205:                fEntityScanner = fEntityManager.getEntityScanner();
1206:
1207:            } // startEntity(String,XMLResourceIdentifier,String)
1208:
1209:            /**
1210:             * This method notifies the end of an entity. The document entity has
1211:             * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 
1212:             * parameter entity names start with '%'; and general entities are just
1213:             * specified by their name.
1214:             * 
1215:             * @param name The name of the entity.
1216:             * @param augs Additional information that may include infoset augmentations
1217:             *
1218:             * @throws XNIException Thrown by handler to signal an error.
1219:             */
1220:            public void endEntity(String name, Augmentations augs)
1221:                    throws XNIException {
1222:
1223:                // keep track of the entity depth
1224:                fEntityDepth--;
1225:
1226:            } // endEntity(String)
1227:
1228:            /**
1229:             * Scans a character reference and append the corresponding chars to the
1230:             * specified buffer.
1231:             *
1232:             * <p>
1233:             * <pre>
1234:             * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1235:             * </pre>
1236:             *
1237:             * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1238:             * at the time of calling is lost.
1239:             *
1240:             * @param buf the character buffer to append chars to
1241:             * @param buf2 the character buffer to append non-normalized chars to
1242:             *
1243:             * @return the character value or (-1) on conversion failure
1244:             */
1245:            protected int scanCharReferenceValue(XMLStringBuffer buf,
1246:                    XMLStringBuffer buf2) throws IOException, XNIException {
1247:
1248:                // scan hexadecimal value
1249:                boolean hex = false;
1250:                if (fEntityScanner.skipChar('x')) {
1251:                    if (buf2 != null) {
1252:                        buf2.append('x');
1253:                    }
1254:                    hex = true;
1255:                    fStringBuffer3.clear();
1256:                    boolean digit = true;
1257:
1258:                    int c = fEntityScanner.peekChar();
1259:                    digit = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')
1260:                            || (c >= 'A' && c <= 'F');
1261:                    if (digit) {
1262:                        if (buf2 != null) {
1263:                            buf2.append((char) c);
1264:                        }
1265:                        fEntityScanner.scanChar();
1266:                        fStringBuffer3.append((char) c);
1267:
1268:                        do {
1269:                            c = fEntityScanner.peekChar();
1270:                            digit = (c >= '0' && c <= '9')
1271:                                    || (c >= 'a' && c <= 'f')
1272:                                    || (c >= 'A' && c <= 'F');
1273:                            if (digit) {
1274:                                if (buf2 != null) {
1275:                                    buf2.append((char) c);
1276:                                }
1277:                                fEntityScanner.scanChar();
1278:                                fStringBuffer3.append((char) c);
1279:                            }
1280:                        } while (digit);
1281:                    } else {
1282:                        reportFatalError("HexdigitRequiredInCharRef", null);
1283:                    }
1284:                }
1285:
1286:                // scan decimal value
1287:                else {
1288:                    fStringBuffer3.clear();
1289:                    boolean digit = true;
1290:
1291:                    int c = fEntityScanner.peekChar();
1292:                    digit = c >= '0' && c <= '9';
1293:                    if (digit) {
1294:                        if (buf2 != null) {
1295:                            buf2.append((char) c);
1296:                        }
1297:                        fEntityScanner.scanChar();
1298:                        fStringBuffer3.append((char) c);
1299:
1300:                        do {
1301:                            c = fEntityScanner.peekChar();
1302:                            digit = c >= '0' && c <= '9';
1303:                            if (digit) {
1304:                                if (buf2 != null) {
1305:                                    buf2.append((char) c);
1306:                                }
1307:                                fEntityScanner.scanChar();
1308:                                fStringBuffer3.append((char) c);
1309:                            }
1310:                        } while (digit);
1311:                    } else {
1312:                        reportFatalError("DigitRequiredInCharRef", null);
1313:                    }
1314:                }
1315:
1316:                // end
1317:                if (!fEntityScanner.skipChar(';')) {
1318:                    reportFatalError("SemicolonRequiredInCharRef", null);
1319:                }
1320:                if (buf2 != null) {
1321:                    buf2.append(';');
1322:                }
1323:
1324:                // convert string to number
1325:                int value = -1;
1326:                try {
1327:                    value = Integer.parseInt(fStringBuffer3.toString(),
1328:                            hex ? 16 : 10);
1329:
1330:                    // character reference must be a valid XML character
1331:                    if (isInvalid(value)) {
1332:                        StringBuffer errorBuf = new StringBuffer(
1333:                                fStringBuffer3.length + 1);
1334:                        if (hex)
1335:                            errorBuf.append('x');
1336:                        errorBuf.append(fStringBuffer3.ch,
1337:                                fStringBuffer3.offset, fStringBuffer3.length);
1338:                        reportFatalError("InvalidCharRef",
1339:                                new Object[] { errorBuf.toString() });
1340:                    }
1341:                } catch (NumberFormatException e) {
1342:                    // Conversion failed, let -1 value drop through.
1343:                    // If we end up here, the character reference was invalid.
1344:                    StringBuffer errorBuf = new StringBuffer(
1345:                            fStringBuffer3.length + 1);
1346:                    if (hex)
1347:                        errorBuf.append('x');
1348:                    errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset,
1349:                            fStringBuffer3.length);
1350:                    reportFatalError("InvalidCharRef", new Object[] { errorBuf
1351:                            .toString() });
1352:                }
1353:
1354:                // append corresponding chars to the given buffer
1355:                if (!XMLChar.isSupplemental(value)) {
1356:                    buf.append((char) value);
1357:                } else {
1358:                    // character is supplemental, split it into surrogate chars
1359:                    buf.append(XMLChar.highSurrogate(value));
1360:                    buf.append(XMLChar.lowSurrogate(value));
1361:                }
1362:
1363:                // char refs notification code
1364:                if (fNotifyCharRefs && value != -1) {
1365:                    String literal = "#" + (hex ? "x" : "")
1366:                            + fStringBuffer3.toString();
1367:                    if (!fScanningAttribute) {
1368:                        fCharRefLiteral = literal;
1369:                    }
1370:                }
1371:
1372:                return value;
1373:            }
1374:
1375:            // returns true if the given character is not
1376:            // valid with respect to the version of
1377:            // XML understood by this scanner.
1378:            protected boolean isInvalid(int value) {
1379:                return (XMLChar.isInvalid(value));
1380:            } // isInvalid(int):  boolean
1381:
1382:            // returns true if the given character is not
1383:            // valid or may not be used outside a character reference 
1384:            // with respect to the version of XML understood by this scanner.
1385:            protected boolean isInvalidLiteral(int value) {
1386:                return (XMLChar.isInvalid(value));
1387:            } // isInvalidLiteral(int):  boolean
1388:
1389:            // returns true if the given character is 
1390:            // a valid nameChar with respect to the version of
1391:            // XML understood by this scanner.
1392:            protected boolean isValidNameChar(int value) {
1393:                return (XMLChar.isName(value));
1394:            } // isValidNameChar(int):  boolean
1395:
1396:            // returns true if the given character is 
1397:            // a valid nameStartChar with respect to the version of
1398:            // XML understood by this scanner.
1399:            protected boolean isValidNameStartChar(int value) {
1400:                return (XMLChar.isNameStart(value));
1401:            } // isValidNameStartChar(int):  boolean
1402:
1403:            // returns true if the given character is
1404:            // a valid NCName character with respect to the version of
1405:            // XML understood by this scanner.
1406:            protected boolean isValidNCName(int value) {
1407:                return (XMLChar.isNCName(value));
1408:            } // isValidNCName(int):  boolean
1409:
1410:            // returns true if the given character is 
1411:            // a valid high surrogate for a nameStartChar 
1412:            // with respect to the version of XML understood 
1413:            // by this scanner.
1414:            protected boolean isValidNameStartHighSurrogate(int value) {
1415:                return false;
1416:            } // isValidNameStartHighSurrogate(int):  boolean
1417:
1418:            protected boolean versionSupported(String version) {
1419:                return version.equals("1.0");
1420:            } // version Supported
1421:
1422:            // returns the error message key for unsupported
1423:            // versions of XML with respect to the version of
1424:            // XML understood by this scanner.
1425:            protected String getVersionNotSupportedKey() {
1426:                return "VersionNotSupported";
1427:            } // getVersionNotSupportedKey: String
1428:
1429:            /**
1430:             * Scans surrogates and append them to the specified buffer.
1431:             * <p>
1432:             * <strong>Note:</strong> This assumes the current char has already been
1433:             * identified as a high surrogate.
1434:             *
1435:             * @param buf The StringBuffer to append the read surrogates to.
1436:             * @return True if it succeeded.
1437:             */
1438:            protected boolean scanSurrogates(XMLStringBuffer buf)
1439:                    throws IOException, XNIException {
1440:
1441:                int high = fEntityScanner.scanChar();
1442:                int low = fEntityScanner.peekChar();
1443:                if (!XMLChar.isLowSurrogate(low)) {
1444:                    reportFatalError("InvalidCharInContent",
1445:                            new Object[] { Integer.toString(high, 16) });
1446:                    return false;
1447:                }
1448:                fEntityScanner.scanChar();
1449:
1450:                // convert surrogates to supplemental character
1451:                int c = XMLChar.supplemental((char) high, (char) low);
1452:
1453:                // supplemental character must be a valid XML character
1454:                if (isInvalid(c)) {
1455:                    reportFatalError("InvalidCharInContent",
1456:                            new Object[] { Integer.toString(c, 16) });
1457:                    return false;
1458:                }
1459:
1460:                // fill in the buffer
1461:                buf.append((char) high);
1462:                buf.append((char) low);
1463:
1464:                return true;
1465:
1466:            } // scanSurrogates():boolean
1467:
1468:            /**
1469:             * Convenience function used in all XML scanners.
1470:             */
1471:            protected void reportFatalError(String msgId, Object[] args)
1472:                    throws XNIException {
1473:                fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1474:                        msgId, args, XMLErrorReporter.SEVERITY_FATAL_ERROR);
1475:            }
1476:
1477:            // private methods
1478:            private void init() {
1479:                fEntityScanner = null;
1480:                // initialize vars
1481:                fEntityDepth = 0;
1482:                fReportEntity = true;
1483:                fResourceIdentifier.clear();
1484:            }
1485:
1486:        } // class XMLScanner
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.