Source Code Cross Referenced for XmlParser.java in » Swing-Library » jEdit » com » microstar » xml » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Swing Library » jEdit » com.microstar.xml
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        // XmlParser.java: the main parser class.
0002:        // NO WARRANTY! See README, and copyright below.
0003:        // $Id: XmlParser.java 4972 2004-02-22 20:00:54Z spestov $
0004:
0005:        package com.microstar.xml;
0006:
0007:        import java.io.BufferedInputStream;
0008:        import java.io.EOFException;
0009:        import java.io.InputStream;
0010:        import java.io.Reader;
0011:        import java.net.URL;
0012:        import java.net.URLConnection;
0013:        import java.util.Enumeration;
0014:        import java.util.Hashtable;
0015:        import java.util.Stack;
0016:
0017:        /**
0018:         * Parse XML documents and return parse events through call-backs.
0019:         * <p>You need to define a class implementing the <code>XmlHandler</code>
0020:         * interface: an object belonging to this class will receive the
0021:         * callbacks for the events.  (As an alternative to implementing
0022:         * the full XmlHandler interface, you can simply extend the 
0023:         * <code>HandlerBase</code> convenience class.)
0024:         * <p>Usage (assuming that <code>MyHandler</code> is your implementation
0025:         * of the <code>XmlHandler</code> interface):
0026:         * <pre>
0027:         * XmlHandler handler = new MyHandler();
0028:         * XmlParser parser = new XmlParser();
0029:         * parser.setHandler(handler);
0030:         * try {
0031:         *   parser.parse("http://www.host.com/doc.xml", null);
0032:         * } catch (Exception e) {
0033:         *   [do something interesting]
0034:         * }
0035:         * </pre>
0036:         * <p>Alternatively, you can use the standard SAX interfaces
0037:         * with the <code>SAXDriver</code> class as your entry point.
0038:         * @author Copyright (c) 1997, 1998 by Microstar Software Ltd.
0039:         * @author Written by David Megginson &lt;dmeggins@microstar.com&gt;
0040:         * @version 1.1
0041:         * @see XmlHandler
0042:         * @see HandlerBase
0043:         * @see SAXDriver
0044:         */
0045:        public class XmlParser {
0046:
0047:            //
0048:            // Use special cheats that speed up the code (currently about 50%),
0049:            // but may cause problems with future maintenance and add to the
0050:            // class file size (about 500 bytes).
0051:            //
0052:            private final static boolean USE_CHEATS = true;
0053:
0054:            //////////////////////////////////////////////////////////////////////
0055:            // Constructors.
0056:            ////////////////////////////////////////////////////////////////////////
0057:
0058:            /**
0059:             * Construct a new parser with no associated handler.
0060:             * @see #setHandler
0061:             * @see #parse
0062:             */
0063:            public XmlParser() {
0064:            }
0065:
0066:            /**
0067:             * Set the handler that will receive parsing events.
0068:             * @param handler The handler to receive callback events.
0069:             * @see #parse
0070:             * @see XmlHandler
0071:             */
0072:            public void setHandler(XmlHandler handler) {
0073:                this .handler = handler;
0074:            }
0075:
0076:            /**
0077:             * Parse an XML document from a URI.
0078:             * <p>You may parse a document more than once, but only one thread
0079:             * may call this method for an object at one time.
0080:             * @param systemId The URI of the document.
0081:             * @param publicId The public identifier of the document, or null.
0082:             * @param encoding The suggested encoding, or null if unknown.
0083:             * @exception java.lang.Exception Any exception thrown by your
0084:             *            own handlers, or any derivation of java.io.IOException
0085:             *            thrown by the parser itself.
0086:             */
0087:            public void parse(String systemId, String publicId, String encoding)
0088:                    throws java.lang.Exception {
0089:                doParse(systemId, publicId, null, null, encoding);
0090:            }
0091:
0092:            /**
0093:             * Parse an XML document from a byte stream.
0094:             * <p>The URI that you supply will become the base URI for
0095:             * resolving relative links, but &AElig;lfred will actually read
0096:             * the document from the supplied input stream.
0097:             * <p>You may parse a document more than once, but only one thread
0098:             * may call this method for an object at one time.
0099:             * @param systemId The base URI of the document, or null if not
0100:             *                 known.
0101:             * @param publicId The public identifier of the document, or null
0102:             *                 if not known.
0103:             * @param stream A byte input stream.
0104:             * @param encoding The suggested encoding, or null if unknown.
0105:             * @exception java.lang.Exception Any exception thrown by your
0106:             *            own handlers, or any derivation of java.io.IOException
0107:             *            thrown by the parser itself.
0108:             */
0109:            public void parse(String systemId, String publicId,
0110:                    InputStream stream, String encoding)
0111:                    throws java.lang.Exception {
0112:                doParse(systemId, publicId, null, stream, encoding);
0113:            }
0114:
0115:            /**
0116:             * Parse an XML document from a character stream.
0117:             * <p>The URI that you supply will become the base URI for
0118:             * resolving relative links, but &AElig;lfred will actually read
0119:             * the document from the supplied input stream.
0120:             * <p>You may parse a document more than once, but only one thread
0121:             * may call this method for an object at one time.
0122:             * @param systemId The base URI of the document, or null if not
0123:             *                 known.
0124:             * @param publicId The public identifier of the document, or null
0125:             *                 if not known.
0126:             * @param reader A character stream.
0127:             * @exception java.lang.Exception Any exception thrown by your
0128:             *            own handlers, or any derivation of java.io.IOException
0129:             *            thrown by the parser itself.
0130:             */
0131:            public void parse(String systemId, String publicId, Reader reader)
0132:                    throws java.lang.Exception {
0133:                doParse(systemId, publicId, reader, null, null);
0134:            }
0135:
0136:            private synchronized void doParse(String systemId, String publicId,
0137:                    Reader reader, InputStream stream, String encoding)
0138:                    throws java.lang.Exception {
0139:                basePublicId = publicId;
0140:                baseURI = systemId;
0141:                baseReader = reader;
0142:                baseInputStream = stream;
0143:
0144:                initializeVariables();
0145:
0146:                // Set the default entities here.
0147:                setInternalEntity(intern("amp"), "&#38;");
0148:                setInternalEntity(intern("lt"), "&#60;");
0149:                setInternalEntity(intern("gt"), "&#62;");
0150:                setInternalEntity(intern("apos"), "&#39;");
0151:                setInternalEntity(intern("quot"), "&#34;");
0152:
0153:                if (handler != null) {
0154:                    handler.startDocument();
0155:                }
0156:
0157:                pushURL("[document]", basePublicId, baseURI, baseReader,
0158:                        baseInputStream, encoding);
0159:
0160:                parseDocument();
0161:
0162:                if (handler != null) {
0163:                    handler.endDocument();
0164:                }
0165:                cleanupVariables();
0166:            }
0167:
0168:            ////////////////////////////////////////////////////////////////////////
0169:            // Constants.
0170:            ////////////////////////////////////////////////////////////////////////
0171:
0172:            //
0173:            // Constants for element content type.
0174:            //
0175:
0176:            /**
0177:             * Constant: an element has not been declared.
0178:             * @see #getElementContentType
0179:             */
0180:            public final static int CONTENT_UNDECLARED = 0;
0181:
0182:            /**
0183:             * Constant: the element has a content model of ANY.
0184:             * @see #getElementContentType
0185:             */
0186:            public final static int CONTENT_ANY = 1;
0187:
0188:            /**
0189:             * Constant: the element has declared content of EMPTY.
0190:             * @see #getElementContentType
0191:             */
0192:            public final static int CONTENT_EMPTY = 2;
0193:
0194:            /**
0195:             * Constant: the element has mixed content.
0196:             * @see #getElementContentType
0197:             */
0198:            public final static int CONTENT_MIXED = 3;
0199:
0200:            /**
0201:             * Constant: the element has element content.
0202:             * @see #getElementContentType
0203:             */
0204:            public final static int CONTENT_ELEMENTS = 4;
0205:
0206:            //
0207:            // Constants for the entity type.
0208:            //
0209:
0210:            /**
0211:             * Constant: the entity has not been declared.
0212:             * @see #getEntityType
0213:             */
0214:            public final static int ENTITY_UNDECLARED = 0;
0215:
0216:            /**
0217:             * Constant: the entity is internal.
0218:             * @see #getEntityType
0219:             */
0220:            public final static int ENTITY_INTERNAL = 1;
0221:
0222:            /**
0223:             * Constant: the entity is external, non-XML data.
0224:             * @see #getEntityType
0225:             */
0226:            public final static int ENTITY_NDATA = 2;
0227:
0228:            /**
0229:             * Constant: the entity is external XML data.
0230:             * @see #getEntityType
0231:             */
0232:            public final static int ENTITY_TEXT = 3;
0233:
0234:            //
0235:            // Constants for attribute type.
0236:            //
0237:
0238:            /**
0239:             * Constant: the attribute has not been declared for this element type.
0240:             * @see #getAttributeType
0241:             */
0242:            public final static int ATTRIBUTE_UNDECLARED = 0;
0243:
0244:            /**
0245:             * Constant: the attribute value is a string value.
0246:             * @see #getAttributeType
0247:             */
0248:            public final static int ATTRIBUTE_CDATA = 1;
0249:
0250:            /**
0251:             * Constant: the attribute value is a unique identifier.
0252:             * @see #getAttributeType
0253:             */
0254:            public final static int ATTRIBUTE_ID = 2;
0255:
0256:            /**
0257:             * Constant: the attribute value is a reference to a unique identifier.
0258:             * @see #getAttributeType
0259:             */
0260:            public final static int ATTRIBUTE_IDREF = 3;
0261:
0262:            /**
0263:             * Constant: the attribute value is a list of ID references.
0264:             * @see #getAttributeType
0265:             */
0266:            public final static int ATTRIBUTE_IDREFS = 4;
0267:
0268:            /**
0269:             * Constant: the attribute value is the name of an entity.
0270:             * @see #getAttributeType
0271:             */
0272:            public final static int ATTRIBUTE_ENTITY = 5;
0273:
0274:            /**
0275:             * Constant: the attribute value is a list of entity names.
0276:             * @see #getAttributeType
0277:             */
0278:            public final static int ATTRIBUTE_ENTITIES = 6;
0279:
0280:            /**
0281:             * Constant: the attribute value is a name token.
0282:             * @see #getAttributeType
0283:             */
0284:            public final static int ATTRIBUTE_NMTOKEN = 7;
0285:
0286:            /**
0287:             * Constant: the attribute value is a list of name tokens.
0288:             * @see #getAttributeType
0289:             */
0290:            public final static int ATTRIBUTE_NMTOKENS = 8;
0291:
0292:            /**
0293:             * Constant: the attribute value is a token from an enumeration.
0294:             * @see #getAttributeType
0295:             */
0296:            public final static int ATTRIBUTE_ENUMERATED = 9;
0297:
0298:            /**
0299:             * Constant: the attribute is the name of a notation.
0300:             * @see #getAttributeType
0301:             */
0302:            public final static int ATTRIBUTE_NOTATION = 10;
0303:
0304:            //
0305:            // When the class is loaded, populate the hash table of
0306:            // attribute types.
0307:            //
0308:
0309:            /**
0310:             * Hash table of attribute types.
0311:             */
0312:            private static Hashtable attributeTypeHash;
0313:            static {
0314:                attributeTypeHash = new Hashtable();
0315:                attributeTypeHash.put("CDATA", new Integer(ATTRIBUTE_CDATA));
0316:                attributeTypeHash.put("ID", new Integer(ATTRIBUTE_ID));
0317:                attributeTypeHash.put("IDREF", new Integer(ATTRIBUTE_IDREF));
0318:                attributeTypeHash.put("IDREFS", new Integer(ATTRIBUTE_IDREFS));
0319:                attributeTypeHash.put("ENTITY", new Integer(ATTRIBUTE_ENTITY));
0320:                attributeTypeHash.put("ENTITIES", new Integer(
0321:                        ATTRIBUTE_ENTITIES));
0322:                attributeTypeHash
0323:                        .put("NMTOKEN", new Integer(ATTRIBUTE_NMTOKEN));
0324:                attributeTypeHash.put("NMTOKENS", new Integer(
0325:                        ATTRIBUTE_NMTOKENS));
0326:                attributeTypeHash.put("NOTATION", new Integer(
0327:                        ATTRIBUTE_NOTATION));
0328:            }
0329:
0330:            //
0331:            // Constants for supported encodings.
0332:            //
0333:            private final static int ENCODING_UTF_8 = 1;
0334:            private final static int ENCODING_ISO_8859_1 = 2;
0335:            private final static int ENCODING_UCS_2_12 = 3;
0336:            private final static int ENCODING_UCS_2_21 = 4;
0337:            private final static int ENCODING_UCS_4_1234 = 5;
0338:            private final static int ENCODING_UCS_4_4321 = 6;
0339:            private final static int ENCODING_UCS_4_2143 = 7;
0340:            private final static int ENCODING_UCS_4_3412 = 8;
0341:
0342:            //
0343:            // Constants for attribute default value.
0344:            //
0345:
0346:            /**
0347:             * Constant: the attribute is not declared.
0348:             * @see #getAttributeDefaultValueType
0349:             */
0350:            public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 0;
0351:
0352:            /**
0353:             * Constant: the attribute has a literal default value specified.
0354:             * @see #getAttributeDefaultValueType
0355:             * @see #getAttributeDefaultValue
0356:             */
0357:            public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 1;
0358:
0359:            /**
0360:             * Constant: the attribute was declared #IMPLIED.
0361:             * @see #getAttributeDefaultValueType
0362:             */
0363:            public final static int ATTRIBUTE_DEFAULT_IMPLIED = 2;
0364:
0365:            /**
0366:             * Constant: the attribute was declared #REQUIRED.
0367:             * @see #getAttributeDefaultValueType
0368:             */
0369:            public final static int ATTRIBUTE_DEFAULT_REQUIRED = 3;
0370:
0371:            /**
0372:             * Constant: the attribute was declared #FIXED.
0373:             * @see #getAttributeDefaultValueType
0374:             * @see #getAttributeDefaultValue
0375:             */
0376:            public final static int ATTRIBUTE_DEFAULT_FIXED = 4;
0377:
0378:            //
0379:            // Constants for input.
0380:            //
0381:            private final static int INPUT_NONE = 0;
0382:            private final static int INPUT_INTERNAL = 1;
0383:            private final static int INPUT_EXTERNAL = 2;
0384:            private final static int INPUT_STREAM = 3;
0385:            private final static int INPUT_BUFFER = 4;
0386:            private final static int INPUT_READER = 5;
0387:
0388:            //
0389:            // Flags for reading literals.
0390:            //
0391:            private final static int LIT_CHAR_REF = 1;
0392:            private final static int LIT_ENTITY_REF = 2;
0393:            private final static int LIT_PE_REF = 4;
0394:            private final static int LIT_NORMALIZE = 8;
0395:
0396:            //
0397:            // Flags for parsing context.
0398:            //
0399:            private final static int CONTEXT_NONE = 0;
0400:            private final static int CONTEXT_DTD = 1;
0401:            private final static int CONTEXT_ENTITYVALUE = 2;
0402:            private final static int CONTEXT_ATTRIBUTEVALUE = 3;
0403:
0404:            //////////////////////////////////////////////////////////////////////
0405:            // Error reporting.
0406:            //////////////////////////////////////////////////////////////////////
0407:
0408:            /**
0409:             * Report an error.
0410:             * @param message The error message.
0411:             * @param textFound The text that caused the error (or null).
0412:             * @see XmlHandler#error
0413:             * @see #line
0414:             */
0415:            void error(String message, String textFound, String textExpected)
0416:                    throws java.lang.Exception {
0417:                errorCount++;
0418:                if (textFound != null) {
0419:                    message = message + " (found \"" + textFound + "\")";
0420:                }
0421:                if (textExpected != null) {
0422:                    message = message + " (expected \"" + textExpected + "\")";
0423:                }
0424:                if (handler != null) {
0425:                    String uri = null;
0426:
0427:                    if (externalEntity != null) {
0428:                        uri = externalEntity.getURL().toString();
0429:                    }
0430:                    handler.error(message, uri, line, column);
0431:                }
0432:            }
0433:
0434:            /**
0435:             * Report a serious error.
0436:             * @param message The error message.
0437:             * @param textFound The text that caused the error (or null).
0438:             */
0439:            void error(String message, char textFound, String textExpected)
0440:                    throws java.lang.Exception {
0441:                error(message, new Character(textFound).toString(),
0442:                        textExpected);
0443:            }
0444:
0445:            //////////////////////////////////////////////////////////////////////
0446:            // Major syntactic productions.
0447:            //////////////////////////////////////////////////////////////////////
0448:
0449:            /**
0450:             * Parse an XML document.
0451:             * <pre>
0452:             * [1] document ::= prolog element Misc*
0453:             * </pre>
0454:             * <p>This is the top-level parsing function for a single XML
0455:             * document.  As a minimum, a well-formed document must have
0456:             * a document element, and a valid document must have a prolog
0457:             * as well.
0458:             */
0459:            void parseDocument() throws java.lang.Exception {
0460:                char c;
0461:
0462:                parseProlog();
0463:                require('<');
0464:                parseElement();
0465:                try {
0466:                    parseMisc(); //skip all white, PIs, and comments
0467:                    c = readCh(); //if this doesn't throw an exception...
0468:                    error("unexpected characters after document end", c, null);
0469:                } catch (EOFException e) {
0470:                    return;
0471:                }
0472:            }
0473:
0474:            /**
0475:             * Skip a comment.
0476:             * <pre>
0477:             * [18] Comment ::= '&lt;!--' ((Char - '-') | ('-' (Char - '-')))* "-->"
0478:             * </pre>
0479:             * <p>(The <code>&lt;!--</code> has already been read.)
0480:             */
0481:            void parseComment() throws java.lang.Exception {
0482:                skipUntil("-->");
0483:            }
0484:
0485:            /**
0486:             * Parse a processing instruction and do a call-back.
0487:             * <pre>
0488:             * [19] PI ::= '&lt;?' Name (S (Char* - (Char* '?&gt;' Char*)))? '?&gt;'
0489:             * </pre>
0490:             * <p>(The <code>&lt;?</code> has already been read.)
0491:             * <p>An XML processing instruction <em>must</em> begin with
0492:             * a Name, which is the instruction's target.
0493:             */
0494:            void parsePI() throws java.lang.Exception {
0495:                String name;
0496:
0497:                name = readNmtoken(true);
0498:                if (!tryRead("?>")) {
0499:                    requireWhitespace();
0500:                    parseUntil("?>");
0501:                }
0502:                if (handler != null) {
0503:                    handler.processingInstruction(name, dataBufferToString());
0504:                }
0505:            }
0506:
0507:            /**
0508:             * Parse a CDATA marked section.
0509:             * <pre>
0510:             * [20] CDSect ::= CDStart CData CDEnd
0511:             * [21] CDStart ::= '&lt;![CDATA['
0512:             * [22] CData ::= (Char* - (Char* ']]&gt;' Char*))
0513:             * [23] CDEnd ::= ']]&gt;'
0514:             * </pre>
0515:             * <p>(The '&lt;![CDATA[' has already been read.)
0516:             * <p>Note that this just appends characters to the dataBuffer,
0517:             * without actually generating an event.
0518:             */
0519:            void parseCDSect() throws java.lang.Exception {
0520:                parseUntil("]]>");
0521:            }
0522:
0523:            /**
0524:             * Parse the prolog of an XML document.
0525:             * <pre>
0526:             * [24] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?
0527:             * </pre>
0528:             * <p>There are a couple of tricks here.  First, it is necessary to
0529:             * declare the XML default attributes after the DTD (if present)
0530:             * has been read.  Second, it is not possible to expand general
0531:             * references in attribute value literals until after the entire
0532:             * DTD (if present) has been parsed.
0533:             * <p>We do not look for the XML declaration here, because it is
0534:             * handled by pushURL().
0535:             * @see pushURL
0536:             */
0537:            void parseProlog() throws java.lang.Exception {
0538:                parseMisc();
0539:
0540:                if (tryRead("<!DOCTYPE")) {
0541:                    parseDoctypedecl();
0542:                    parseMisc();
0543:                }
0544:            }
0545:
0546:            /**
0547:             * Parse the XML declaration.
0548:             * <pre>
0549:             * [25] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?&gt;'
0550:             * [26] VersionInfo ::= S 'version' Eq ('"1.0"' | "'1.0'")
0551:             * [33] SDDecl ::= S 'standalone' Eq "'" ('yes' | 'no') "'"
0552:             *               | S 'standalone' Eq '"' ("yes" | "no") '"'
0553:             * [78] EncodingDecl ::= S 'encoding' Eq QEncoding
0554:             * </pre>
0555:             * <p>([80] to [82] are also significant.)
0556:             * <p>(The <code>&lt;?xml</code> and whitespace have already been read.)
0557:             * <p>TODO: validate value of standalone.
0558:             * @see #parseTextDecl
0559:             * @see #checkEncoding
0560:             */
0561:            void parseXMLDecl(boolean ignoreEncoding)
0562:                    throws java.lang.Exception {
0563:                String version;
0564:                String encodingName = null;
0565:                String standalone = null;
0566:
0567:                // Read the version.
0568:                require("version");
0569:                parseEq();
0570:                version = readLiteral(0);
0571:                if (!version.equals("1.0")) {
0572:                    error("unsupported XML version", version, "1.0");
0573:                }
0574:
0575:                // Try reading an encoding declaration.
0576:                skipWhitespace();
0577:                if (tryRead("encoding")) {
0578:                    parseEq();
0579:                    encodingName = readLiteral(0);
0580:                    checkEncoding(encodingName, ignoreEncoding);
0581:                }
0582:
0583:                // Try reading a standalone declaration
0584:                skipWhitespace();
0585:                if (tryRead("standalone")) {
0586:                    parseEq();
0587:                    standalone = readLiteral(0);
0588:                }
0589:
0590:                skipWhitespace();
0591:                require("?>");
0592:            }
0593:
0594:            /**
0595:             * Parse the Encoding PI.
0596:             * <pre>
0597:             * [78] EncodingDecl ::= S 'encoding' Eq QEncoding
0598:             * [79] EncodingPI ::= '&lt;?xml' S 'encoding' Eq QEncoding S? '?&gt;'
0599:             * [80] QEncoding ::= '"' Encoding '"' | "'" Encoding "'"
0600:             * [81] Encoding ::= LatinName
0601:             * [82] LatinName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
0602:             * </pre>
0603:             * <p>(The <code>&lt;?xml</code>' and whitespace have already been read.)
0604:             * @see #parseXMLDecl
0605:             * @see #checkEncoding
0606:             */
0607:            void parseTextDecl(boolean ignoreEncoding)
0608:                    throws java.lang.Exception {
0609:                String encodingName = null;
0610:
0611:                // Read an optional version.
0612:                if (tryRead("version")) {
0613:                    String version;
0614:                    parseEq();
0615:                    version = readLiteral(0);
0616:                    if (!version.equals("1.0")) {
0617:                        error("unsupported XML version", version, "1.0");
0618:                    }
0619:                    requireWhitespace();
0620:                }
0621:
0622:                // Read the encoding.
0623:                require("encoding");
0624:                parseEq();
0625:                encodingName = readLiteral(0);
0626:                checkEncoding(encodingName, ignoreEncoding);
0627:
0628:                skipWhitespace();
0629:                require("?>");
0630:            }
0631:
0632:            /**
0633:             * Check that the encoding specified makes sense.
0634:             * <p>Compare what the author has specified in the XML declaration
0635:             * or encoding PI with what we have detected.
0636:             * <p>This is also important for distinguishing among the various
0637:             * 7- and 8-bit encodings, such as ISO-LATIN-1 (I cannot autodetect
0638:             * those).
0639:             * @param encodingName The name of the encoding specified by the user.
0640:             * @see #parseXMLDecl
0641:             * @see #parseTextDecl
0642:             */
0643:            void checkEncoding(String encodingName, boolean ignoreEncoding)
0644:                    throws java.lang.Exception {
0645:                encodingName = encodingName.toUpperCase();
0646:
0647:                if (ignoreEncoding) {
0648:                    return;
0649:                }
0650:
0651:                switch (encoding) {
0652:                // 8-bit encodings
0653:                case ENCODING_UTF_8:
0654:                    if (encodingName.equals("ISO-8859-1")) {
0655:                        encoding = ENCODING_ISO_8859_1;
0656:                    } else if (!encodingName.equals("UTF-8")) {
0657:                        error("unsupported 8-bit encoding", encodingName,
0658:                                "UTF-8 or ISO-8859-1");
0659:                    }
0660:                    break;
0661:                // 16-bit encodings
0662:                case ENCODING_UCS_2_12:
0663:                case ENCODING_UCS_2_21:
0664:                    if (!encodingName.equals("ISO-10646-UCS-2")
0665:                            && !encodingName.equals("UTF-16")) {
0666:                        error("unsupported 16-bit encoding", encodingName,
0667:                                "ISO-10646-UCS-2");
0668:                    }
0669:                    break;
0670:                // 32-bit encodings
0671:                case ENCODING_UCS_4_1234:
0672:                case ENCODING_UCS_4_4321:
0673:                case ENCODING_UCS_4_2143:
0674:                case ENCODING_UCS_4_3412:
0675:                    if (!encodingName.equals("ISO-10646-UCS-4")) {
0676:                        error("unsupported 32-bit encoding", encodingName,
0677:                                "ISO-10646-UCS-4");
0678:                    }
0679:                }
0680:            }
0681:
0682:            /**
0683:             * Parse miscellaneous markup outside the document element and DOCTYPE
0684:             * declaration.
0685:             * <pre>
0686:             * [27] Misc ::= Comment | PI | S
0687:             * </pre>
0688:             */
0689:            void parseMisc() throws java.lang.Exception {
0690:                while (true) {
0691:                    skipWhitespace();
0692:                    if (tryRead("<?")) {
0693:                        parsePI();
0694:                    } else if (tryRead("<!--")) {
0695:                        parseComment();
0696:                    } else {
0697:                        return;
0698:                    }
0699:                }
0700:            }
0701:
0702:            /**
0703:             * Parse a document type declaration.
0704:             * <pre>
0705:             * [28] doctypedecl ::= '&lt;!DOCTYPE' S Name (S ExternalID)? S?
0706:             *                      ('[' %markupdecl* ']' S?)? '&gt;'
0707:             * </pre>
0708:             * <p>(The <code>&lt;!DOCTYPE</code> has already been read.)
0709:             */
0710:            void parseDoctypedecl() throws java.lang.Exception {
0711:                char c;
0712:                String doctypeName, ids[];
0713:
0714:                // Read the document type name.
0715:                requireWhitespace();
0716:                doctypeName = readNmtoken(true);
0717:
0718:                // Read the ExternalIDs.
0719:                skipWhitespace();
0720:                ids = readExternalIds(false);
0721:
0722:                // Look for a declaration subset.
0723:                skipWhitespace();
0724:                if (tryRead('[')) {
0725:
0726:                    // loop until the subset ends
0727:                    while (true) {
0728:                        context = CONTEXT_DTD;
0729:                        skipWhitespace();
0730:                        context = CONTEXT_NONE;
0731:                        if (tryRead(']')) {
0732:                            break; // end of subset
0733:                        } else {
0734:                            context = CONTEXT_DTD;
0735:                            parseMarkupdecl();
0736:                            context = CONTEXT_NONE;
0737:                        }
0738:                    }
0739:                }
0740:
0741:                // Read the external subset, if any
0742:                if (ids[1] != null) {
0743:                    pushURL("[external subset]", ids[0], ids[1], null, null,
0744:                            null);
0745:
0746:                    // Loop until we end up back at '>'
0747:                    while (true) {
0748:                        context = CONTEXT_DTD;
0749:                        skipWhitespace();
0750:                        context = CONTEXT_NONE;
0751:                        if (tryRead('>')) {
0752:                            break;
0753:                        } else {
0754:                            context = CONTEXT_DTD;
0755:                            parseMarkupdecl();
0756:                            context = CONTEXT_NONE;
0757:                        }
0758:                    }
0759:                } else {
0760:                    // No external subset.
0761:                    skipWhitespace();
0762:                    require('>');
0763:                }
0764:
0765:                if (handler != null) {
0766:                    handler.doctypeDecl(doctypeName, ids[0], ids[1]);
0767:                }
0768:
0769:                // Expand general entities in
0770:                // default values of attributes.
0771:                // (Do this after the doctypeDecl
0772:                // event!).
0773:                // expandAttributeDefaultValues();
0774:            }
0775:
0776:            /**
0777:             * Parse a markup declaration in the internal or external DTD subset.
0778:             * <pre>
0779:             * [29] markupdecl ::= ( %elementdecl | %AttlistDecl | %EntityDecl |
0780:             *                       %NotationDecl | %PI | %S | %Comment |
0781:             *                       InternalPERef )
0782:             * [30] InternalPERef ::= PEReference
0783:             * [31] extSubset ::= (%markupdecl | %conditionalSect)*
0784:             * </pre>
0785:             */
0786:            void parseMarkupdecl() throws java.lang.Exception {
0787:                if (tryRead("<!ELEMENT")) {
0788:                    parseElementdecl();
0789:                } else if (tryRead("<!ATTLIST")) {
0790:                    parseAttlistDecl();
0791:                } else if (tryRead("<!ENTITY")) {
0792:                    parseEntityDecl();
0793:                } else if (tryRead("<!NOTATION")) {
0794:                    parseNotationDecl();
0795:                } else if (tryRead("<?")) {
0796:                    parsePI();
0797:                } else if (tryRead("<!--")) {
0798:                    parseComment();
0799:                } else if (tryRead("<![")) {
0800:                    parseConditionalSect();
0801:                } else {
0802:                    error("expected markup declaration", null, null);
0803:                }
0804:            }
0805:
0806:            /**
0807:             * Parse an element, with its tags.
0808:             * <pre>
0809:             * [33] STag ::= '&lt;' Name (S Attribute)* S? '&gt;' [WFC: unique Att spec]
0810:             * [38] element ::= EmptyElement | STag content ETag
0811:             * [39] EmptyElement ::= '&lt;' Name (S Attribute)* S? '/&gt;'
0812:             *                       [WFC: unique Att spec]
0813:             * </pre>
0814:             * <p>(The '&lt;' has already been read.)
0815:             * <p>NOTE: this method actually chains onto parseContent(), if necessary,
0816:             * and parseContent() will take care of calling parseETag().
0817:             */
0818:            void parseElement() throws java.lang.Exception {
0819:                String gi;
0820:                char c;
0821:                int oldElementContent = currentElementContent;
0822:                String oldElement = currentElement;
0823:
0824:                // This is the (global) counter for the
0825:                // array of specified attributes.
0826:                tagAttributePos = 0;
0827:
0828:                // Read the element type name.
0829:                gi = readNmtoken(true);
0830:
0831:                // Determine the current content type.
0832:                currentElement = gi;
0833:                currentElementContent = getElementContentType(gi);
0834:                if (currentElementContent == CONTENT_UNDECLARED) {
0835:                    currentElementContent = CONTENT_ANY;
0836:                }
0837:
0838:                // Read the attributes, if any.
0839:                // After this loop, we should be just
0840:                // in front of the closing delimiter.
0841:                skipWhitespace();
0842:                c = readCh();
0843:                while (c != '/' && c != '>') {
0844:                    unread(c);
0845:                    parseAttribute(gi);
0846:                    skipWhitespace();
0847:                    c = readCh();
0848:                }
0849:                unread(c);
0850:
0851:                // Supply any defaulted attributes.
0852:                Enumeration atts = declaredAttributes(gi);
0853:                if (atts != null) {
0854:                    String aname;
0855:                    loop: while (atts.hasMoreElements()) {
0856:                        aname = (String) atts.nextElement();
0857:                        // See if it was specified.
0858:                        for (int i = 0; i < tagAttributePos; i++) {
0859:                            if (tagAttributes[i] == aname) {
0860:                                continue loop;
0861:                            }
0862:                        }
0863:                        // I guess not...
0864:                        if (handler != null) {
0865:                            handler.attribute(aname, getAttributeExpandedValue(
0866:                                    gi, aname), false);
0867:                        }
0868:                    }
0869:                }
0870:
0871:                // Figure out if this is a start tag
0872:                // or an empty element, and dispatch an
0873:                // event accordingly.
0874:                c = readCh();
0875:                switch (c) {
0876:                case '>':
0877:                    if (handler != null) {
0878:                        handler.startElement(gi);
0879:                    }
0880:                    parseContent();
0881:                    break;
0882:                case '/':
0883:                    require('>');
0884:                    if (handler != null) {
0885:                        handler.startElement(gi);
0886:                        handler.endElement(gi);
0887:                    }
0888:                    break;
0889:                }
0890:
0891:                // Restore the previous state.
0892:                currentElement = oldElement;
0893:                currentElementContent = oldElementContent;
0894:            }
0895:
0896:            /**
0897:             * Parse an attribute assignment.
0898:             * <pre>
0899:             * [34] Attribute ::= Name Eq AttValue
0900:             * </pre>
0901:             * @param name The name of the attribute's element.
0902:             * @see XmlHandler#attribute
0903:             */
0904:            void parseAttribute(String name) throws java.lang.Exception {
0905:                String aname;
0906:                int type;
0907:                String value;
0908:
0909:                // Read the attribute name.
0910:                aname = readNmtoken(true).intern();
0911:                type = getAttributeDefaultValueType(name, aname);
0912:
0913:                // Parse '='
0914:                parseEq();
0915:
0916:                // Read the value, normalizing whitespace
0917:                // if it is not CDATA.
0918:                if (type == ATTRIBUTE_CDATA || type == ATTRIBUTE_UNDECLARED) {
0919:                    value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF);
0920:                } else {
0921:                    value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF
0922:                            | LIT_NORMALIZE);
0923:                }
0924:
0925:                // Inform the handler about the
0926:                // attribute.
0927:                if (handler != null) {
0928:                    handler.attribute(aname, value, true);
0929:                }
0930:                dataBufferPos = 0;
0931:
0932:                // Note that the attribute has been
0933:                // specified.
0934:                if (tagAttributePos == tagAttributes.length) {
0935:                    String newAttrib[] = new String[tagAttributes.length * 2];
0936:                    System.arraycopy(tagAttributes, 0, newAttrib, 0,
0937:                            tagAttributePos);
0938:                    tagAttributes = newAttrib;
0939:                }
0940:                tagAttributes[tagAttributePos++] = aname;
0941:            }
0942:
0943:            /**
0944:             * Parse an equals sign surrounded by optional whitespace.
0945:             * [35] Eq ::= S? '=' S?
0946:             */
0947:            void parseEq() throws java.lang.Exception {
0948:                skipWhitespace();
0949:                require('=');
0950:                skipWhitespace();
0951:            }
0952:
0953:            /**
0954:             * Parse an end tag.
0955:             * [36] ETag ::= '</' Name S? '>'
0956:             * *NOTE: parseContent() chains to here.
0957:             */
0958:            void parseETag() throws java.lang.Exception {
0959:                String name;
0960:                name = readNmtoken(true);
0961:                if (name != currentElement) {
0962:                    error("mismatched end tag", name, currentElement);
0963:                }
0964:                skipWhitespace();
0965:                require('>');
0966:                if (handler != null) {
0967:                    handler.endElement(name);
0968:                }
0969:            }
0970:
0971:            /**
0972:             * Parse the content of an element.
0973:             * [37] content ::= (element | PCData | Reference | CDSect | PI | Comment)*
0974:             * [68] Reference ::= EntityRef | CharRef
0975:             */
0976:            void parseContent() throws java.lang.Exception {
0977:                String data;
0978:                char c;
0979:
0980:                while (true) {
0981:
0982:                    switch (currentElementContent) {
0983:                    case CONTENT_ANY:
0984:                    case CONTENT_MIXED:
0985:                        parsePCData();
0986:                        break;
0987:                    case CONTENT_ELEMENTS:
0988:                        parseWhitespace();
0989:                        break;
0990:                    }
0991:
0992:                    // Handle delimiters
0993:                    c = readCh();
0994:                    switch (c) {
0995:
0996:                    case '&': // Found "&"
0997:                        c = readCh();
0998:                        if (c == '#') {
0999:                            parseCharRef();
1000:                        } else {
1001:                            unread(c);
1002:                            parseEntityRef(true);
1003:                        }
1004:                        break;
1005:
1006:                    case '<': // Found "<"
1007:
1008:                        c = readCh();
1009:                        switch (c) {
1010:
1011:                        case '!': // Found "<!"
1012:                            c = readCh();
1013:                            switch (c) {
1014:                            case '-': // Found "<!-"
1015:                                require('-');
1016:                                parseComment();
1017:                                break;
1018:                            case '[': // Found "<!["
1019:                                require("CDATA[");
1020:                                parseCDSect();
1021:                                break;
1022:                            default:
1023:                                error("expected comment or CDATA section", c,
1024:                                        null);
1025:                                break;
1026:                            }
1027:                            break;
1028:
1029:                        case '?': // Found "<?"
1030:                            dataBufferFlush();
1031:                            parsePI();
1032:                            break;
1033:
1034:                        case '/': // Found "</"
1035:                            dataBufferFlush();
1036:                            parseETag();
1037:                            return;
1038:
1039:                        default: // Found "<" followed by something else
1040:                            dataBufferFlush();
1041:                            unread(c);
1042:                            parseElement();
1043:                            break;
1044:                        }
1045:                    }
1046:                }
1047:            }
1048:
1049:            /**
1050:             * Parse an element type declaration.
1051:             * [40] elementdecl ::= '<!ELEMENT' S %Name S (%S S)? %contentspec S? '>'
1052:             *                      [VC: Unique Element Declaration]
1053:             * *NOTE: the '<!ELEMENT' has already been read.
1054:             */
1055:            void parseElementdecl() throws java.lang.Exception {
1056:                String name;
1057:
1058:                requireWhitespace();
1059:                // Read the element type name.
1060:                name = readNmtoken(true);
1061:
1062:                requireWhitespace();
1063:                // Read the content model.
1064:                parseContentspec(name);
1065:
1066:                skipWhitespace();
1067:                require('>');
1068:            }
1069:
1070:            /**
1071:             * Content specification.
1072:             * [41] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements
1073:             */
1074:            void parseContentspec(String name) throws java.lang.Exception {
1075:                if (tryRead("EMPTY")) {
1076:                    setElement(name, CONTENT_EMPTY, null, null);
1077:                    return;
1078:                } else if (tryRead("ANY")) {
1079:                    setElement(name, CONTENT_ANY, null, null);
1080:                    return;
1081:                } else {
1082:                    require('(');
1083:                    dataBufferAppend('(');
1084:                    skipWhitespace();
1085:                    if (tryRead("#PCDATA")) {
1086:                        dataBufferAppend("#PCDATA");
1087:                        parseMixed();
1088:                        setElement(name, CONTENT_MIXED, dataBufferToString(),
1089:                                null);
1090:                    } else {
1091:                        parseElements();
1092:                        setElement(name, CONTENT_ELEMENTS,
1093:                                dataBufferToString(), null);
1094:                    }
1095:                }
1096:            }
1097:
1098:            /**
1099:             * Parse an element-content model.
1100:             * [42] elements ::= (choice | seq) ('?' | '*' | '+')?
1101:             * [44] cps ::= S? %cp S?
1102:             * [45] choice ::= '(' S? %ctokplus (S? '|' S? %ctoks)* S? ')'
1103:             * [46] ctokplus ::= cps ('|' cps)+
1104:             * [47] ctoks ::= cps ('|' cps)*
1105:             * [48] seq ::= '(' S? %stoks (S? ',' S? %stoks)* S? ')'
1106:             * [49] stoks ::= cps (',' cps)*
1107:             * *NOTE: the opening '(' and S have already been read.
1108:             * *TODO: go over parameter entity boundaries more carefully.
1109:             */
1110:            void parseElements() throws java.lang.Exception {
1111:                char c;
1112:                char sep;
1113:
1114:                // Parse the first content particle
1115:                skipWhitespace();
1116:                parseCp();
1117:
1118:                // Check for end or for a separator.
1119:                skipWhitespace();
1120:                c = readCh();
1121:                switch (c) {
1122:                case ')':
1123:                    dataBufferAppend(')');
1124:                    c = readCh();
1125:                    switch (c) {
1126:                    case '*':
1127:                    case '+':
1128:                    case '?':
1129:                        dataBufferAppend(c);
1130:                        break;
1131:                    default:
1132:                        unread(c);
1133:                    }
1134:                    return;
1135:                case ',': // Register the separator.
1136:                case '|':
1137:                    sep = c;
1138:                    dataBufferAppend(c);
1139:                    break;
1140:                default:
1141:                    error("bad separator in content model", c, null);
1142:                    return;
1143:                }
1144:
1145:                // Parse the rest of the content model.
1146:                while (true) {
1147:                    skipWhitespace();
1148:                    parseCp();
1149:                    skipWhitespace();
1150:                    c = readCh();
1151:                    if (c == ')') {
1152:                        dataBufferAppend(')');
1153:                        break;
1154:                    } else if (c != sep) {
1155:                        error("bad separator in content model", c, null);
1156:                        return;
1157:                    } else {
1158:                        dataBufferAppend(c);
1159:                    }
1160:                }
1161:
1162:                // Check for the occurrence indicator.
1163:                c = readCh();
1164:                switch (c) {
1165:                case '?':
1166:                case '*':
1167:                case '+':
1168:                    dataBufferAppend(c);
1169:                    return;
1170:                default:
1171:                    unread(c);
1172:                    return;
1173:                }
1174:            }
1175:
1176:            /**
1177:             * Parse a content particle.
1178:             * [43] cp ::= (Name | choice | seq) ('?' | '*' | '+')
1179:             * *NOTE: I actually use a slightly different production here:
1180:             *        cp ::= (elements | (Name ('?' | '*' | '+')?))
1181:             */
1182:            void parseCp() throws java.lang.Exception {
1183:                char c;
1184:
1185:                if (tryRead('(')) {
1186:                    dataBufferAppend('(');
1187:                    parseElements();
1188:                } else {
1189:                    dataBufferAppend(readNmtoken(true));
1190:                    c = readCh();
1191:                    switch (c) {
1192:                    case '?':
1193:                    case '*':
1194:                    case '+':
1195:                        dataBufferAppend(c);
1196:                        break;
1197:                    default:
1198:                        unread(c);
1199:                        break;
1200:                    }
1201:                }
1202:            }
1203:
1204:            /**
1205:             * Parse mixed content.
1206:             * [50] Mixed ::= '(' S? %( %'#PCDATA' (S? '|' S? %Mtoks)* ) S? ')*'
1207:             *              | '(' S? %('#PCDATA') S? ')'
1208:             * [51] Mtoks ::= %Name (S? '|' S? %Name)*
1209:             * *NOTE: the S and '#PCDATA' have already been read.
1210:             */
1211:            void parseMixed() throws java.lang.Exception {
1212:                char c;
1213:
1214:                // Check for PCDATA alone.
1215:                skipWhitespace();
1216:                if (tryRead(')')) {
1217:                    dataBufferAppend(")*");
1218:                    tryRead('*');
1219:                    return;
1220:                }
1221:
1222:                // Parse mixed content.
1223:                skipWhitespace();
1224:                while (!tryRead(")*")) {
1225:                    require('|');
1226:                    dataBufferAppend('|');
1227:                    skipWhitespace();
1228:                    dataBufferAppend(readNmtoken(true));
1229:                    skipWhitespace();
1230:                }
1231:                dataBufferAppend(")*");
1232:            }
1233:
1234:            /**
1235:             * Parse an attribute list declaration.
1236:             * [52] AttlistDecl ::= '<!ATTLIST' S %Name S? %AttDef+ S? '>'
1237:             * *NOTE: the '<!ATTLIST' has already been read.
1238:             */
1239:            void parseAttlistDecl() throws java.lang.Exception {
1240:                String elementName;
1241:
1242:                requireWhitespace();
1243:                elementName = readNmtoken(true);
1244:                requireWhitespace();
1245:                while (!tryRead('>')) {
1246:                    parseAttDef(elementName);
1247:                    skipWhitespace();
1248:                }
1249:            }
1250:
1251:            /**
1252:             * Parse a single attribute definition.
1253:             * [53] AttDef ::= S %Name S %AttType S %Default
1254:             */
1255:            void parseAttDef(String elementName) throws java.lang.Exception {
1256:                String name;
1257:                int type;
1258:                String enumeration = null;
1259:
1260:                // Read the attribute name.
1261:                name = readNmtoken(true);
1262:
1263:                // Read the attribute type.
1264:                requireWhitespace();
1265:                type = readAttType();
1266:
1267:                // Get the string of enumerated values
1268:                // if necessary.
1269:                if (type == ATTRIBUTE_ENUMERATED || type == ATTRIBUTE_NOTATION) {
1270:                    enumeration = dataBufferToString();
1271:                }
1272:
1273:                // Read the default value.
1274:                requireWhitespace();
1275:                parseDefault(elementName, name, type, enumeration);
1276:            }
1277:
1278:            /**
1279:             * Parse the attribute type.
1280:             * [54] AttType ::= StringType | TokenizedType | EnumeratedType
1281:             * [55] StringType ::= 'CDATA'
1282:             * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' |
1283:             *                        'NMTOKEN' | 'NMTOKENS'
1284:             * [57] EnumeratedType ::= NotationType | Enumeration
1285:             * *TODO: validate the type!!
1286:             */
1287:            int readAttType() throws java.lang.Exception {
1288:                String typeString;
1289:                Integer type;
1290:
1291:                if (tryRead('(')) {
1292:                    parseEnumeration();
1293:                    return ATTRIBUTE_ENUMERATED;
1294:                } else {
1295:                    typeString = readNmtoken(true);
1296:                    if (typeString.equals("NOTATION")) {
1297:                        parseNotationType();
1298:                    }
1299:                    type = (Integer) attributeTypeHash.get(typeString);
1300:                    if (type == null) {
1301:                        error("illegal attribute type", typeString, null);
1302:                        return ATTRIBUTE_UNDECLARED;
1303:                    } else {
1304:                        return type.intValue();
1305:                    }
1306:                }
1307:            }
1308:
1309:            /**
1310:             * Parse an enumeration.
1311:             * [60] Enumeration ::= '(' S? %Etoks (S? '|' S? %Etoks)* S? ')'
1312:             * [61] Etoks ::= %Nmtoken (S? '|' S? %Nmtoken)*
1313:             * *NOTE: the '(' has already been read.
1314:             */
1315:            void parseEnumeration() throws java.lang.Exception {
1316:                char c;
1317:
1318:                dataBufferAppend('(');
1319:
1320:                // Read the first token.
1321:                skipWhitespace();
1322:                dataBufferAppend(readNmtoken(true));
1323:                // Read the remaining tokens.
1324:                skipWhitespace();
1325:                while (!tryRead(')')) {
1326:                    require('|');
1327:                    dataBufferAppend('|');
1328:                    skipWhitespace();
1329:                    dataBufferAppend(readNmtoken(true));
1330:                    skipWhitespace();
1331:                }
1332:                dataBufferAppend(')');
1333:            }
1334:
1335:            /**
1336:             * Parse a notation type for an attribute.
1337:             * [58] NotationType ::= %'NOTATION' S '(' S? %Ntoks (S? '|' S? %Ntoks)*
1338:             *                       S? ')'
1339:             * [59] Ntoks ::= %Name (S? '|' S? %Name)
1340:             * *NOTE: the 'NOTATION' has already been read
1341:             */
1342:            void parseNotationType() throws java.lang.Exception {
1343:                requireWhitespace();
1344:                require('(');
1345:
1346:                parseEnumeration();
1347:            }
1348:
1349:            /**
1350:             * Parse the default value for an attribute.
1351:             * [62] Default ::= '#REQUIRED' | '#IMPLIED' | ((%'#FIXED' S)? %AttValue
1352:             */
1353:            void parseDefault(String elementName, String name, int type,
1354:                    String enumeration) throws java.lang.Exception {
1355:                int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
1356:                String value = null;
1357:                boolean normalizeWSFlag;
1358:
1359:                if (tryRead('#')) {
1360:                    if (tryRead("FIXED")) {
1361:                        valueType = ATTRIBUTE_DEFAULT_FIXED;
1362:                        requireWhitespace();
1363:                        context = CONTEXT_ATTRIBUTEVALUE;
1364:                        value = readLiteral(LIT_CHAR_REF);
1365:                        context = CONTEXT_DTD;
1366:                    } else if (tryRead("REQUIRED")) {
1367:                        valueType = ATTRIBUTE_DEFAULT_REQUIRED;
1368:                    } else if (tryRead("IMPLIED")) {
1369:                        valueType = ATTRIBUTE_DEFAULT_IMPLIED;
1370:                    } else {
1371:                        error("illegal keyword for attribute default value",
1372:                                null, null);
1373:                    }
1374:                } else {
1375:                    context = CONTEXT_ATTRIBUTEVALUE;
1376:                    value = readLiteral(LIT_CHAR_REF);
1377:                    context = CONTEXT_DTD;
1378:                }
1379:                setAttribute(elementName, name, type, enumeration, value,
1380:                        valueType);
1381:            }
1382:
1383:            /**
1384:             * Parse a conditional section.
1385:             * [63] conditionalSect ::= includeSect || ignoreSect
1386:             * [64] includeSect ::= '<![' %'INCLUDE' '[' (%markupdecl*)* ']]>'
1387:             * [65] ignoreSect ::= '<![' %'IGNORE' '[' ignoreSectContents* ']]>'
1388:             * [66] ignoreSectContents ::= ((SkipLit | Comment | PI) -(Char* ']]>'))
1389:             *                           | ('<![' ignoreSectContents* ']]>')
1390:             *                           | (Char - (']' | [<'"]))
1391:             *                           | ('<!' (Char - ('-' | '[')))
1392:             * *NOTE: the '<![' has already been read.
1393:             * *TODO: verify that I am handling ignoreSectContents right.
1394:             */
1395:            void parseConditionalSect() throws java.lang.Exception {
1396:                skipWhitespace();
1397:                if (tryRead("INCLUDE")) {
1398:                    skipWhitespace();
1399:                    require('[');
1400:                    skipWhitespace();
1401:                    while (!tryRead("]]>")) {
1402:                        parseMarkupdecl();
1403:                        skipWhitespace();
1404:                    }
1405:                } else if (tryRead("IGNORE")) {
1406:                    skipWhitespace();
1407:                    require('[');
1408:                    int nesting = 1;
1409:                    char c;
1410:                    for (int nest = 1; nest > 0;) {
1411:                        c = readCh();
1412:                        switch (c) {
1413:                        case '<':
1414:                            if (tryRead("![")) {
1415:                                nest++;
1416:                            }
1417:                        case ']':
1418:                            if (tryRead("]>")) {
1419:                                nest--;
1420:                            }
1421:                        }
1422:                    }
1423:                } else {
1424:                    error(
1425:                            "conditional section must begin with INCLUDE or IGNORE",
1426:                            null, null);
1427:                }
1428:            }
1429:
1430:            /**
1431:             * Read a character reference.
1432:             * [67] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1433:             * *NOTE: the '&#' has already been read.
1434:             */
1435:            void parseCharRef() throws java.lang.Exception {
1436:                int value = 0;
1437:                char c;
1438:
1439:                if (tryRead('x')) {
1440:                    loop1: while (true) {
1441:                        c = readCh();
1442:                        switch (c) {
1443:                        case '0':
1444:                        case '1':
1445:                        case '2':
1446:                        case '3':
1447:                        case '4':
1448:                        case '5':
1449:                        case '6':
1450:                        case '7':
1451:                        case '8':
1452:                        case '9':
1453:                        case 'a':
1454:                        case 'A':
1455:                        case 'b':
1456:                        case 'B':
1457:                        case 'c':
1458:                        case 'C':
1459:                        case 'd':
1460:                        case 'D':
1461:                        case 'e':
1462:                        case 'E':
1463:                        case 'f':
1464:                        case 'F':
1465:                            value *= 16;
1466:                            value += Integer.parseInt(new Character(c)
1467:                                    .toString(), 16);
1468:                            break;
1469:                        case ';':
1470:                            break loop1;
1471:                        default:
1472:                            error("illegal character in character reference",
1473:                                    c, null);
1474:                            break loop1;
1475:                        }
1476:                    }
1477:                } else {
1478:                    loop2: while (true) {
1479:                        c = readCh();
1480:                        switch (c) {
1481:                        case '0':
1482:                        case '1':
1483:                        case '2':
1484:                        case '3':
1485:                        case '4':
1486:                        case '5':
1487:                        case '6':
1488:                        case '7':
1489:                        case '8':
1490:                        case '9':
1491:                            value *= 10;
1492:                            value += Integer.parseInt(new Character(c)
1493:                                    .toString(), 10);
1494:                            break;
1495:                        case ';':
1496:                            break loop2;
1497:                        default:
1498:                            error("illegal character in character reference",
1499:                                    c, null);
1500:                            break loop2;
1501:                        }
1502:                    }
1503:                }
1504:
1505:                // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz
1506:                //  (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz: 
1507:                if (value <= 0x0000ffff) {
1508:                    // no surrogates needed
1509:                    dataBufferAppend((char) value);
1510:                } else if (value <= 0x000fffff) {
1511:                    // > 16 bits, surrogate needed
1512:                    dataBufferAppend((char) (0xd8 | ((value & 0x000ffc00) >> 10)));
1513:                    dataBufferAppend((char) (0xdc | (value & 0x0003ff)));
1514:                } else {
1515:                    // too big for surrogate
1516:                    error("character reference " + value
1517:                            + " is too large for UTF-16", new Integer(value)
1518:                            .toString(), null);
1519:                }
1520:            }
1521:
1522:            /**
1523:             * Parse a reference.
1524:             * [69] EntityRef ::= '&' Name ';'
1525:             * *NOTE: the '&' has already been read.
1526:             * @param externalAllowed External entities are allowed here.
1527:             */
1528:            void parseEntityRef(boolean externalAllowed)
1529:                    throws java.lang.Exception {
1530:                String name;
1531:
1532:                name = readNmtoken(true);
1533:                require(';');
1534:                switch (getEntityType(name)) {
1535:                case ENTITY_UNDECLARED:
1536:                    error("reference to undeclared entity", name, null);
1537:                    break;
1538:                case ENTITY_INTERNAL:
1539:                    pushString(name, getEntityValue(name));
1540:                    break;
1541:                case ENTITY_TEXT:
1542:                    if (externalAllowed) {
1543:                        pushURL(name, getEntityPublicId(name),
1544:                                getEntitySystemId(name), null, null, null);
1545:                    } else {
1546:                        error(
1547:                                "reference to external entity in attribute value.",
1548:                                name, null);
1549:                    }
1550:                    break;
1551:                case ENTITY_NDATA:
1552:                    if (externalAllowed) {
1553:                        error("data entity reference in content", name, null);
1554:                    } else {
1555:                        error(
1556:                                "reference to external entity in attribute value.",
1557:                                name, null);
1558:                    }
1559:                    break;
1560:                }
1561:            }
1562:
1563:            /**
1564:             * Parse a parameter entity reference.
1565:             * [70] PEReference ::= '%' Name ';'
1566:             * *NOTE: the '%' has already been read.
1567:             */
1568:            void parsePEReference(boolean isEntityValue)
1569:                    throws java.lang.Exception {
1570:                String name;
1571:
1572:                name = "%" + readNmtoken(true);
1573:                require(';');
1574:                switch (getEntityType(name)) {
1575:                case ENTITY_UNDECLARED:
1576:                    error("reference to undeclared parameter entity", name,
1577:                            null);
1578:                    break;
1579:                case ENTITY_INTERNAL:
1580:                    if (isEntityValue) {
1581:                        pushString(name, getEntityValue(name));
1582:                    } else {
1583:                        pushString(name, " " + getEntityValue(name) + ' ');
1584:                    }
1585:                    break;
1586:                case ENTITY_TEXT:
1587:                    if (isEntityValue) {
1588:                        pushString(null, " ");
1589:                    }
1590:                    pushURL(name, getEntityPublicId(name),
1591:                            getEntitySystemId(name), null, null, null);
1592:                    if (isEntityValue) {
1593:                        pushString(null, " ");
1594:                    }
1595:                    break;
1596:                }
1597:            }
1598:
1599:            /**
1600:             * Parse an entity declaration.
1601:             * [71] EntityDecl ::= '<!ENTITY' S %Name S %EntityDef S? '>'
1602:             *                   | '<!ENTITY' S '%' S %Name S %EntityDef S? '>'
1603:             * [72] EntityDef ::= EntityValue | ExternalDef
1604:             * [73] ExternalDef ::= ExternalID %NDataDecl?
1605:             * [74] ExternalID ::= 'SYSTEM' S SystemLiteral
1606:             *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
1607:             * [75] NDataDecl ::= S %'NDATA' S %Name
1608:             * *NOTE: the '<!ENTITY' has already been read.
1609:             */
1610:            void parseEntityDecl() throws java.lang.Exception {
1611:                char c;
1612:                boolean peFlag = false;
1613:                String name, value, notationName, ids[];
1614:
1615:                // Check for a parameter entity.
1616:                requireWhitespace();
1617:                if (tryRead('%')) {
1618:                    peFlag = true;
1619:                    requireWhitespace();
1620:                }
1621:
1622:                // Read the entity name, and prepend
1623:                // '%' if necessary.
1624:                name = readNmtoken(true);
1625:                if (peFlag) {
1626:                    name = "%" + name;
1627:                }
1628:
1629:                // Read the entity value.
1630:                requireWhitespace();
1631:                c = readCh();
1632:                unread(c);
1633:                if (c == '"' || c == '\'') {
1634:                    // Internal entity.
1635:                    context = CONTEXT_ENTITYVALUE;
1636:                    value = readLiteral(LIT_CHAR_REF | LIT_PE_REF);
1637:                    context = CONTEXT_DTD;
1638:                    setInternalEntity(name, value);
1639:                } else {
1640:                    // Read the external IDs
1641:                    ids = readExternalIds(false);
1642:                    if (ids[1] == null) {
1643:                        error("system identifer missing", name, null);
1644:                    }
1645:
1646:                    // Check for NDATA declaration.
1647:                    skipWhitespace();
1648:                    if (tryRead("NDATA")) {
1649:                        requireWhitespace();
1650:                        notationName = readNmtoken(true);
1651:                        setExternalDataEntity(name, ids[0], ids[1],
1652:                                notationName);
1653:                    } else {
1654:                        setExternalTextEntity(name, ids[0], ids[1]);
1655:                    }
1656:                }
1657:
1658:                // Finish the declaration.
1659:                skipWhitespace();
1660:                require('>');
1661:            }
1662:
1663:            /**
1664:             * Parse a notation declaration.
1665:             * [81] NotationDecl ::= '<!NOTATION' S %Name S %ExternalID S? '>'
1666:             * *NOTE: the '<!NOTATION' has already been read.
1667:             */
1668:            void parseNotationDecl() throws java.lang.Exception {
1669:                String nname, ids[];
1670:
1671:                requireWhitespace();
1672:                nname = readNmtoken(true);
1673:
1674:                requireWhitespace();
1675:
1676:                // Read the external identifiers.
1677:                ids = readExternalIds(true);
1678:                if (ids[0] == null && ids[1] == null) {
1679:                    error("external identifer missing", nname, null);
1680:                }
1681:
1682:                // Register the notation.
1683:                setNotation(nname, ids[0], ids[1]);
1684:
1685:                skipWhitespace();
1686:                require('>');
1687:            }
1688:
1689:            /**
1690:             * Parse PCDATA.
1691:             * <pre>
1692:             * [16] PCData ::= [^&lt;&amp;]*
1693:             * </pre>
1694:             * <p>The trick here is that the data stays in the dataBuffer without
1695:             * necessarily being converted to a string right away.
1696:             */
1697:            void parsePCData() throws java.lang.Exception {
1698:                char c;
1699:
1700:                // Start with a little cheat -- in most
1701:                // cases, the entire sequence of
1702:                // character data will already be in
1703:                // the readBuffer; if not, fall through to
1704:                // the normal approach.
1705:                if (USE_CHEATS) {
1706:                    int lineAugment = 0;
1707:                    int columnAugment = 0;
1708:
1709:                    loop: for (int i = readBufferPos; i < readBufferLength; i++) {
1710:                        switch (readBuffer[i]) {
1711:                        case '\n':
1712:                            lineAugment++;
1713:                            columnAugment = 0;
1714:                            break;
1715:                        case '&':
1716:                        case '<':
1717:                            int start = readBufferPos;
1718:                            columnAugment++;
1719:                            readBufferPos = i;
1720:                            if (lineAugment > 0) {
1721:                                line += lineAugment;
1722:                                column = columnAugment;
1723:                            } else {
1724:                                column += columnAugment;
1725:                            }
1726:                            dataBufferAppend(readBuffer, start, i - start);
1727:                            return;
1728:                        default:
1729:                            columnAugment++;
1730:                        }
1731:                    }
1732:                }
1733:
1734:                // OK, the cheat didn't work; start over
1735:                // and do it by the book.
1736:                while (true) {
1737:                    c = readCh();
1738:                    switch (c) {
1739:                    case '<':
1740:                    case '&':
1741:                        unread(c);
1742:                        return;
1743:                    default:
1744:                        dataBufferAppend(c);
1745:                        break;
1746:                    }
1747:                }
1748:            }
1749:
1750:            //////////////////////////////////////////////////////////////////////
1751:            // High-level reading and scanning methods.
1752:            //////////////////////////////////////////////////////////////////////
1753:
1754:            /**
1755:             * Require whitespace characters.
1756:             * [1] S ::= (#x20 | #x9 | #xd | #xa)+
1757:             */
1758:            void requireWhitespace() throws java.lang.Exception {
1759:                char c = readCh();
1760:                if (isWhitespace(c)) {
1761:                    skipWhitespace();
1762:                } else {
1763:                    error("whitespace expected", c, null);
1764:                }
1765:            }
1766:
1767:            /**
1768:             * Parse whitespace characters, and leave them in the data buffer.
1769:             */
1770:            void parseWhitespace() throws java.lang.Exception {
1771:                char c = readCh();
1772:                while (isWhitespace(c)) {
1773:                    dataBufferAppend(c);
1774:                    c = readCh();
1775:                }
1776:                unread(c);
1777:            }
1778:
1779:            /**
1780:             * Skip whitespace characters.
1781:             * [1] S ::= (#x20 | #x9 | #xd | #xa)+
1782:             */
1783:            void skipWhitespace() throws java.lang.Exception {
1784:                // Start with a little cheat.  Most of
1785:                // the time, the white space will fall
1786:                // within the current read buffer; if
1787:                // not, then fall through.
1788:                if (USE_CHEATS) {
1789:                    int lineAugment = 0;
1790:                    int columnAugment = 0;
1791:
1792:                    loop: for (int i = readBufferPos; i < readBufferLength; i++) {
1793:                        switch (readBuffer[i]) {
1794:                        case ' ':
1795:                        case '\t':
1796:                        case '\r':
1797:                            columnAugment++;
1798:                            break;
1799:                        case '\n':
1800:                            lineAugment++;
1801:                            columnAugment = 0;
1802:                            break;
1803:                        case '%':
1804:                            if (context == CONTEXT_DTD
1805:                                    || context == CONTEXT_ENTITYVALUE) {
1806:                                break loop;
1807:                            } // else fall through...
1808:                        default:
1809:                            readBufferPos = i;
1810:                            if (lineAugment > 0) {
1811:                                line += lineAugment;
1812:                                column = columnAugment;
1813:                            } else {
1814:                                column += columnAugment;
1815:                            }
1816:                            return;
1817:                        }
1818:                    }
1819:                }
1820:
1821:                // OK, do it by the book.
1822:                char c = readCh();
1823:                while (isWhitespace(c)) {
1824:                    c = readCh();
1825:                }
1826:                unread(c);
1827:            }
1828:
1829:            /**
1830:             * Read a name or name token.
1831:             * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1832:             * [7] Nmtoken ::= (NameChar)+
1833:             * *NOTE: [6] is implemented implicitly where required.
1834:             */
1835:            String readNmtoken(boolean isName) throws java.lang.Exception {
1836:                char c;
1837:
1838:                if (USE_CHEATS) {
1839:                    loop: for (int i = readBufferPos; i < readBufferLength; i++) {
1840:                        switch (readBuffer[i]) {
1841:                        case '%':
1842:                            if (context == CONTEXT_DTD
1843:                                    || context == CONTEXT_ENTITYVALUE) {
1844:                                break loop;
1845:                            } // else fall through...
1846:                        case '<':
1847:                        case '>':
1848:                        case '&':
1849:                        case ',':
1850:                        case '|':
1851:                        case '*':
1852:                        case '+':
1853:                        case '?':
1854:                        case ')':
1855:                        case '=':
1856:                        case '\'':
1857:                        case '"':
1858:                        case '[':
1859:                        case ' ':
1860:                        case '\t':
1861:                        case '\r':
1862:                        case '\n':
1863:                        case ';':
1864:                        case '/':
1865:                        case '#':
1866:                            int start = readBufferPos;
1867:                            if (i == start) {
1868:                                error("name expected", readBuffer[i], null);
1869:                            }
1870:                            readBufferPos = i;
1871:                            return intern(readBuffer, start, i - start);
1872:                        }
1873:                    }
1874:                }
1875:
1876:                nameBufferPos = 0;
1877:
1878:                // Read the first character.
1879:                loop: while (true) {
1880:                    c = readCh();
1881:                    switch (c) {
1882:                    case '%':
1883:                    case '<':
1884:                    case '>':
1885:                    case '&':
1886:                    case ',':
1887:                    case '|':
1888:                    case '*':
1889:                    case '+':
1890:                    case '?':
1891:                    case ')':
1892:                    case '=':
1893:                    case '\'':
1894:                    case '"':
1895:                    case '[':
1896:                    case ' ':
1897:                    case '\t':
1898:                    case '\n':
1899:                    case '\r':
1900:                    case ';':
1901:                    case '/':
1902:                        unread(c);
1903:                        if (nameBufferPos == 0) {
1904:                            error("name expected", null, null);
1905:                        }
1906:                        String s = intern(nameBuffer, 0, nameBufferPos);
1907:                        nameBufferPos = 0;
1908:                        return s;
1909:                    default:
1910:                        nameBuffer = (char[]) extendArray(nameBuffer,
1911:                                nameBuffer.length, nameBufferPos);
1912:                        nameBuffer[nameBufferPos++] = c;
1913:                    }
1914:                }
1915:            }
1916:
1917:            /**
1918:             * Read a literal.
1919:             * [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
1920:             *                 | "'" ([^<&'] | Reference)* "'"
1921:             * [11] SystemLiteral ::= '"' URLchar* '"' | "'" (URLchar - "'")* "'"
1922:             * [13] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1923:             * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
1924:             *                   | "'" ([^%&'] | PEReference | Reference)* "'"
1925:             */
1926:            String readLiteral(int flags) throws java.lang.Exception {
1927:                char delim, c;
1928:                int startLine = line;
1929:
1930:                // Find the delimiter.
1931:                delim = readCh();
1932:                if (delim != '"' && delim != '\'' && delim != (char) 0) {
1933:                    error("expected '\"' or \"'\"", delim, null);
1934:                    return null;
1935:                }
1936:
1937:                // Read the literal.
1938:                try {
1939:                    c = readCh();
1940:
1941:                    loop: while (c != delim) {
1942:                        switch (c) {
1943:                        // Literals never have line ends
1944:                        case '\n':
1945:                        case '\r':
1946:                            c = ' ';
1947:                            break;
1948:                        // References may be allowed
1949:                        case '&':
1950:                            if ((flags & LIT_CHAR_REF) > 0) {
1951:                                c = readCh();
1952:                                if (c == '#') {
1953:                                    parseCharRef();
1954:                                    c = readCh();
1955:                                    continue loop; // check the next character
1956:                                } else if ((flags & LIT_ENTITY_REF) > 0) {
1957:                                    unread(c);
1958:                                    parseEntityRef(false);
1959:                                    c = readCh();
1960:                                    continue loop;
1961:                                } else {
1962:                                    dataBufferAppend('&');
1963:                                }
1964:                            }
1965:                            break;
1966:
1967:                        default:
1968:                            break;
1969:                        }
1970:                        dataBufferAppend(c);
1971:                        c = readCh();
1972:                    }
1973:                } catch (EOFException e) {
1974:                    error(
1975:                            "end of input while looking for delimiter (started on line "
1976:                                    + startLine + ')', null, new Character(
1977:                                    delim).toString());
1978:                }
1979:
1980:                // Normalise whitespace if necessary.
1981:                if ((flags & LIT_NORMALIZE) > 0) {
1982:                    dataBufferNormalize();
1983:                }
1984:
1985:                // Return the value.
1986:                return dataBufferToString();
1987:            }
1988:
1989:            /**
1990:             * Try reading external identifiers.
1991:             * <p>The system identifier is not required for notations.
1992:             * @param inNotation Are we in a notation?
1993:             * @return A two-member String array containing the identifiers.
1994:             */
1995:            String[] readExternalIds(boolean inNotation)
1996:                    throws java.lang.Exception {
1997:                char c;
1998:                String ids[] = new String[2];
1999:
2000:                if (tryRead("PUBLIC")) {
2001:                    requireWhitespace();
2002:                    ids[0] = readLiteral(LIT_NORMALIZE); // public id
2003:                    if (inNotation) {
2004:                        skipWhitespace();
2005:                        if (tryRead('"') || tryRead('\'')) {
2006:                            ids[1] = readLiteral(0);
2007:                        }
2008:                    } else {
2009:                        requireWhitespace();
2010:                        ids[1] = readLiteral(0); // system id
2011:                    }
2012:                } else if (tryRead("SYSTEM")) {
2013:                    requireWhitespace();
2014:                    ids[1] = readLiteral(0); // system id
2015:                }
2016:
2017:                return ids;
2018:            }
2019:
2020:            /**
2021:             * Test if a character is whitespace.
2022:             * <pre>
2023:             * [1] S ::= (#x20 | #x9 | #xd | #xa)+
2024:             * </pre>
2025:             * @param c The character to test.
2026:             * @return true if the character is whitespace.
2027:             */
2028:            final boolean isWhitespace(char c) {
2029:                switch ((int) c) {
2030:                case 0x20:
2031:                case 0x09:
2032:                case 0x0d:
2033:                case 0x0a:
2034:                    return true;
2035:                default:
2036:                    return false;
2037:                }
2038:            }
2039:
2040:            //////////////////////////////////////////////////////////////////////
2041:            // Utility routines.
2042:            //////////////////////////////////////////////////////////////////////
2043:
2044:            /**
2045:             * Add a character to the data buffer.
2046:             */
2047:            void dataBufferAppend(char c) {
2048:                // Expand buffer if necessary.
2049:                dataBuffer = (char[]) extendArray(dataBuffer,
2050:                        dataBuffer.length, dataBufferPos);
2051:                dataBuffer[dataBufferPos++] = c;
2052:            }
2053:
2054:            /** 
2055:             * Add a string to the data buffer.
2056:             */
2057:            void dataBufferAppend(String s) {
2058:                dataBufferAppend(s.toCharArray(), 0, s.length());
2059:            }
2060:
2061:            /**
2062:             * Append (part of) a character array to the data buffer.
2063:             */
2064:            void dataBufferAppend(char ch[], int start, int length) {
2065:                dataBuffer = (char[]) extendArray(dataBuffer,
2066:                        dataBuffer.length, dataBufferPos + length);
2067:                System.arraycopy((Object) ch, start, (Object) dataBuffer,
2068:                        dataBufferPos, length);
2069:                dataBufferPos += length;
2070:            }
2071:
2072:            /**
2073:             * Normalise whitespace in the data buffer.
2074:             */
2075:            void dataBufferNormalize() {
2076:                int i = 0;
2077:                int j = 0;
2078:                int end = dataBufferPos;
2079:
2080:                // Skip whitespace at the start.
2081:                while (j < end && isWhitespace(dataBuffer[j])) {
2082:                    j++;
2083:                }
2084:
2085:                // Skip whitespace at the end.
2086:                while (end > j && isWhitespace(dataBuffer[end - 1])) {
2087:                    end--;
2088:                }
2089:
2090:                // Start copying to the left.
2091:                while (j < end) {
2092:
2093:                    char c = dataBuffer[j++];
2094:
2095:                    // Normalise all other whitespace to
2096:                    // a single space.
2097:                    if (isWhitespace(c)) {
2098:                        while (j < end && isWhitespace(dataBuffer[j++])) {
2099:                        }
2100:                        dataBuffer[i++] = ' ';
2101:                        dataBuffer[i++] = dataBuffer[j - 1];
2102:                    } else {
2103:                        dataBuffer[i++] = c;
2104:                    }
2105:                }
2106:
2107:                // The new length is <= the old one.
2108:                dataBufferPos = i;
2109:            }
2110:
2111:            /**
2112:             * Convert the data buffer to a string.
2113:             * @param internFlag true if the contents should be interned.
2114:             * @see #intern(char[],int,int)
2115:             */
2116:            String dataBufferToString() {
2117:                String s = new String(dataBuffer, 0, dataBufferPos);
2118:                dataBufferPos = 0;
2119:                return s;
2120:            }
2121:
2122:            /**
2123:             * Flush the contents of the data buffer to the handler, if
2124:             * appropriate, and reset the buffer for new input.
2125:             */
2126:            void dataBufferFlush() throws java.lang.Exception {
2127:                if (dataBufferPos > 0) {
2128:                    switch (currentElementContent) {
2129:                    case CONTENT_UNDECLARED:
2130:                    case CONTENT_EMPTY:
2131:                        // do nothing
2132:                        break;
2133:                    case CONTENT_MIXED:
2134:                    case CONTENT_ANY:
2135:                        if (handler != null) {
2136:                            handler.charData(dataBuffer, 0, dataBufferPos);
2137:                        }
2138:                        break;
2139:                    case CONTENT_ELEMENTS:
2140:                        if (handler != null) {
2141:                            handler.ignorableWhitespace(dataBuffer, 0,
2142:                                    dataBufferPos);
2143:                        }
2144:                        break;
2145:                    }
2146:                    dataBufferPos = 0;
2147:                }
2148:            }
2149:
2150:            /**
2151:             * Require a string to appear, or throw an exception.
2152:             */
2153:            void require(String delim) throws java.lang.Exception {
2154:                char ch[] = delim.toCharArray();
2155:                for (int i = 0; i < ch.length; i++) {
2156:                    require(ch[i]);
2157:                }
2158:            }
2159:
2160:            /**
2161:             * Require a character to appear, or throw an exception.
2162:             */
2163:            void require(char delim) throws java.lang.Exception {
2164:                char c = readCh();
2165:
2166:                if (c != delim) {
2167:                    error("expected character", c, new Character(delim)
2168:                            .toString());
2169:                }
2170:            }
2171:
2172:            /**
2173:             * Return an internalised version of a string.
2174:             * <p>&AElig;lfred uses this method to create an internalised version
2175:             * of all names and attribute values, so that it can test equality
2176:             * with <code>==</code> instead of <code>String.equals()</code>.
2177:             * <p>If you want to be able to test for equality in the same way,
2178:             * you can use this method to internalise your own strings first:
2179:             * <pre>
2180:             * String PARA = handler.intern("PARA");
2181:             * </pre>
2182:             * <p>Note that this will not return the same results as String.intern().
2183:             * @param s The string to internalise.
2184:             * @return An internalised version of the string.
2185:             * @see #intern(char[],int,int)
2186:             * @see java.lang.String#intern
2187:             */
2188:            public String intern(String s) {
2189:                char ch[] = s.toCharArray();
2190:                return intern(ch, 0, ch.length);
2191:            }
2192:
2193:            /**
2194:             * Create an internalised string from a character array.
2195:             * <p>This is much more efficient than constructing a non-internalised
2196:             * string first, and then internalising it.
2197:             * <p>Note that this will not return the same results as String.intern().
2198:             * @param ch an array of characters for building the string.
2199:             * @param start the starting position in the array.
2200:             * @param length the number of characters to place in the string.
2201:             * @return an internalised string.
2202:             * @see #intern(String)
2203:             * @see java.lang.String#intern
2204:             */
2205:            public String intern(char ch[], int start, int length) {
2206:                int index;
2207:                int hash = 0;
2208:
2209:                // Generate a hash code.
2210:                for (int i = start; i < start + length; i++) {
2211:                    hash = ((hash << 1) & 0xffffff) + (int) ch[i];
2212:                }
2213:
2214:                hash = hash % SYMBOL_TABLE_LENGTH;
2215:
2216:                // Get the bucket.
2217:                Object bucket[] = (Object[]) symbolTable[hash];
2218:                if (bucket == null) {
2219:                    symbolTable[hash] = bucket = new Object[8];
2220:                }
2221:
2222:                // Search for a matching tuple, and
2223:                // return the string if we find one.
2224:                for (index = 0; index < bucket.length; index += 2) {
2225:                    char chFound[] = (char[]) bucket[index];
2226:
2227:                    // Stop when we hit a null index.
2228:                    if (chFound == null) {
2229:                        break;
2230:                    }
2231:
2232:                    // If they're the same length,
2233:                    // check for a match.
2234:                    // If the loop finishes, 'index' will
2235:                    // contain the current bucket
2236:                    // position.
2237:                    if (chFound.length == length) {
2238:                        for (int i = 0; i < chFound.length; i++) {
2239:                            // Stop if there are no more tuples.
2240:                            if (ch[start + i] != chFound[i]) {
2241:                                break;
2242:                            } else if (i == length - 1) {
2243:                                // That's it, we have a match!
2244:                                return (String) bucket[index + 1];
2245:                            }
2246:                        }
2247:                    }
2248:                }
2249:
2250:                // Not found -- we'll have to add it.
2251:
2252:                // Do we have to grow the bucket?
2253:                bucket = (Object[]) extendArray(bucket, bucket.length, index);
2254:
2255:                // OK, add it to the end of the
2256:                // bucket.
2257:                String s = new String(ch, start, length);
2258:                bucket[index] = s.toCharArray();
2259:                bucket[index + 1] = s;
2260:                symbolTable[hash] = bucket;
2261:                return s;
2262:            }
2263:
2264:            /**
2265:             * Ensure the capacity of an array, allocating a new one if
2266:             * necessary.
2267:             */
2268:            Object extendArray(Object array, int currentSize, int requiredSize) {
2269:                if (requiredSize < currentSize) {
2270:                    return array;
2271:                } else {
2272:                    Object newArray = null;
2273:                    int newSize = currentSize * 2;
2274:
2275:                    if (newSize <= requiredSize) {
2276:                        newSize = requiredSize + 1;
2277:                    }
2278:
2279:                    if (array instanceof  char[]) {
2280:                        newArray = new char[newSize];
2281:                    } else if (array instanceof  Object[]) {
2282:                        newArray = new Object[newSize];
2283:                    }
2284:
2285:                    System.arraycopy(array, 0, newArray, 0, currentSize);
2286:                    return newArray;
2287:                }
2288:            }
2289:
2290:            //////////////////////////////////////////////////////////////////////
2291:            // XML query routines.
2292:            //////////////////////////////////////////////////////////////////////
2293:
2294:            //
2295:            // Elements
2296:            //
2297:
2298:            /**
2299:             * Get the declared elements for an XML document.
2300:             * <p>The results will be valid only after the DTD (if any) has been
2301:             * parsed.
2302:             * @return An enumeration of all element types declared for this
2303:             *         document (as Strings).
2304:             * @see #getElementContentType
2305:             * @see #getElementContentModel
2306:             */
2307:            public Enumeration declaredElements() {
2308:                return elementInfo.keys();
2309:            }
2310:
2311:            /**
2312:             * Look up the content type of an element.
2313:             * @param name The element type name.
2314:             * @return An integer constant representing the content type.
2315:             * @see #getElementContentModel
2316:             * @see #CONTENT_UNDECLARED
2317:             * @see #CONTENT_ANY
2318:             * @see #CONTENT_EMPTY
2319:             * @see #CONTENT_MIXED
2320:             * @see #CONTENT_ELEMENTS
2321:             */
2322:            public int getElementContentType(String name) {
2323:                Object element[] = (Object[]) elementInfo.get(name);
2324:                if (element == null) {
2325:                    return CONTENT_UNDECLARED;
2326:                } else {
2327:                    return ((Integer) element[0]).intValue();
2328:                }
2329:            }
2330:
2331:            /**
2332:             * Look up the content model of an element.
2333:             * <p>The result will always be null unless the content type is
2334:             * CONTENT_ELEMENTS or CONTENT_MIXED.
2335:             * @param name The element type name.
2336:             * @return The normalised content model, as a string.
2337:             * @see #getElementContentType
2338:             */
2339:            public String getElementContentModel(String name) {
2340:                Object element[] = (Object[]) elementInfo.get(name);
2341:                if (element == null) {
2342:                    return null;
2343:                } else {
2344:                    return (String) element[1];
2345:                }
2346:            }
2347:
2348:            /**
2349:             * Register an element.
2350:             * Array format:
2351:             *  element type
2352:             *  attribute hash table
2353:             */
2354:            void setElement(String name, int contentType, String contentModel,
2355:                    Hashtable attributes) throws java.lang.Exception {
2356:                Object element[];
2357:
2358:                // Try looking up the element
2359:                element = (Object[]) elementInfo.get(name);
2360:
2361:                // Make a new one if necessary.
2362:                if (element == null) {
2363:                    element = new Object[3];
2364:                    element[0] = new Integer(CONTENT_UNDECLARED);
2365:                    element[1] = null;
2366:                    element[2] = null;
2367:                } else if (contentType != CONTENT_UNDECLARED
2368:                        && ((Integer) element[0]).intValue() != CONTENT_UNDECLARED) {
2369:                    error("multiple declarations for element type", name, null);
2370:                    return;
2371:                }
2372:
2373:                // Insert the content type, if any.
2374:                if (contentType != CONTENT_UNDECLARED) {
2375:                    element[0] = new Integer(contentType);
2376:                }
2377:
2378:                // Insert the content model, if any.
2379:                if (contentModel != null) {
2380:                    element[1] = contentModel;
2381:                }
2382:
2383:                // Insert the attributes, if any.
2384:                if (attributes != null) {
2385:                    element[2] = attributes;
2386:                }
2387:
2388:                // Save the element info.
2389:                elementInfo.put(name, element);
2390:            }
2391:
2392:            /**
2393:             * Look up the attribute hash table for an element.
2394:             * The hash table is the second item in the element array.
2395:             */
2396:            Hashtable getElementAttributes(String name) {
2397:                Object element[] = (Object[]) elementInfo.get(name);
2398:                if (element == null) {
2399:                    return null;
2400:                } else {
2401:                    return (Hashtable) element[2];
2402:                }
2403:            }
2404:
2405:            //
2406:            // Attributes
2407:            //
2408:
2409:            /**
2410:             * Get the declared attributes for an element type.
2411:             * @param elname The name of the element type.
2412:             * @return An Enumeration of all the attributes declared for
2413:             *         a specific element type.  The results will be valid only
2414:             *         after the DTD (if any) has been parsed.
2415:             * @see #getAttributeType
2416:             * @see #getAttributeEnumeration
2417:             * @see #getAttributeDefaultValueType
2418:             * @see #getAttributeDefaultValue
2419:             * @see #getAttributeExpandedValue
2420:             */
2421:            public Enumeration declaredAttributes(String elname) {
2422:                Hashtable attlist = getElementAttributes(elname);
2423:
2424:                if (attlist == null) {
2425:                    return null;
2426:                } else {
2427:                    return attlist.keys();
2428:                }
2429:            }
2430:
2431:            /**
2432:             * Retrieve the declared type of an attribute.
2433:             * @param name The name of the associated element.
2434:             * @param aname The name of the attribute.
2435:             * @return An integer constant representing the attribute type.
2436:             * @see #ATTRIBUTE_UNDECLARED
2437:             * @see #ATTRIBUTE_CDATA
2438:             * @see #ATTRIBUTE_ID
2439:             * @see #ATTRIBUTE_IDREF
2440:             * @see #ATTRIBUTE_IDREFS
2441:             * @see #ATTRIBUTE_ENTITY
2442:             * @see #ATTRIBUTE_ENTITIES
2443:             * @see #ATTRIBUTE_NMTOKEN
2444:             * @see #ATTRIBUTE_NMTOKENS
2445:             * @see #ATTRIBUTE_ENUMERATED
2446:             * @see #ATTRIBUTE_NOTATION
2447:             */
2448:            public int getAttributeType(String name, String aname) {
2449:                Object attribute[] = getAttribute(name, aname);
2450:                if (attribute == null) {
2451:                    return ATTRIBUTE_UNDECLARED;
2452:                } else {
2453:                    return ((Integer) attribute[0]).intValue();
2454:                }
2455:            }
2456:
2457:            /**
2458:             * Retrieve the allowed values for an enumerated attribute type.
2459:             * @param name The name of the associated element.
2460:             * @param aname The name of the attribute.
2461:             * @return A string containing the token list.
2462:             * @see #ATTRIBUTE_ENUMERATED
2463:             * @see #ATTRIBUTE_NOTATION
2464:             */
2465:            public String getAttributeEnumeration(String name, String aname) {
2466:                Object attribute[] = getAttribute(name, aname);
2467:                if (attribute == null) {
2468:                    return null;
2469:                } else {
2470:                    return (String) attribute[3];
2471:                }
2472:            }
2473:
2474:            /**
2475:             * Retrieve the default value of a declared attribute.
2476:             * @param name The name of the associated element.
2477:             * @param aname The name of the attribute.
2478:             * @return The default value, or null if the attribute was
2479:             *         #IMPLIED or simply undeclared and unspecified.
2480:             * @see #getAttributeExpandedValue
2481:             */
2482:            public String getAttributeDefaultValue(String name, String aname) {
2483:                Object attribute[] = getAttribute(name, aname);
2484:                if (attribute == null) {
2485:                    return null;
2486:                } else {
2487:                    return (String) attribute[1];
2488:                }
2489:            }
2490:
2491:            /**
2492:             * Retrieve the expanded value of a declared attribute.
2493:             * <p>All general entities will be expanded.
2494:             * @param name The name of the associated element.
2495:             * @param aname The name of the attribute.
2496:             * @return The expanded default value, or null if the attribute was
2497:             *         #IMPLIED or simply undeclared
2498:             * @see #getAttributeDefaultValue
2499:             */
2500:            public String getAttributeExpandedValue(String name, String aname) {
2501:                Object attribute[] = getAttribute(name, aname);
2502:                if (attribute == null) {
2503:                    return null;
2504:                } else if (attribute[4] == null && attribute[1] != null) {
2505:                    try {
2506:                        pushString(null, (char) 0 + (String) attribute[1]
2507:                                + (char) 0);
2508:                        attribute[4] = readLiteral(LIT_NORMALIZE | LIT_CHAR_REF
2509:                                | LIT_ENTITY_REF);
2510:                    } catch (Exception e) {
2511:                    }
2512:                }
2513:                return (String) attribute[4];
2514:            }
2515:
2516:            /**
2517:             * Retrieve the default value type of a declared attribute.
2518:             * @see #ATTRIBUTE_DEFAULT_SPECIFIED
2519:             * @see #ATTRIBUTE_DEFAULT_IMPLIED
2520:             * @see #ATTRIBUTE_DEFAULT_REQUIRED
2521:             * @see #ATTRIBUTE_DEFAULT_FIXED
2522:             */
2523:            public int getAttributeDefaultValueType(String name, String aname) {
2524:                Object attribute[] = getAttribute(name, aname);
2525:                if (attribute == null) {
2526:                    return ATTRIBUTE_DEFAULT_UNDECLARED;
2527:                } else {
2528:                    return ((Integer) attribute[2]).intValue();
2529:                }
2530:            }
2531:
2532:            /**
2533:             * Register an attribute declaration for later retrieval.
2534:             * Format:
2535:             * - String type
2536:             * - String default value
2537:             * - int value type
2538:             * *TODO: do something with attribute types.
2539:             */
2540:            void setAttribute(String elName, String name, int type,
2541:                    String enumeration, String value, int valueType)
2542:                    throws java.lang.Exception {
2543:                Hashtable attlist;
2544:                Object attribute[];
2545:
2546:                // Create a new hashtable if necessary.
2547:                attlist = getElementAttributes(elName);
2548:                if (attlist == null) {
2549:                    attlist = new Hashtable();
2550:                }
2551:
2552:                // Check that the attribute doesn't
2553:                // already exist!
2554:                if (attlist.get(name) != null) {
2555:                    return;
2556:                } else {
2557:                    attribute = new Object[5];
2558:                    attribute[0] = new Integer(type);
2559:                    attribute[1] = value;
2560:                    attribute[2] = new Integer(valueType);
2561:                    attribute[3] = enumeration;
2562:                    attribute[4] = null;
2563:                    attlist.put(name.intern(), attribute);
2564:
2565:                    // Use CONTENT_UNDECLARED to avoid overwriting
2566:                    // existing element declaration.
2567:                    setElement(elName, CONTENT_UNDECLARED, null, attlist);
2568:                }
2569:            }
2570:
2571:            /**
2572:             * Retrieve the three-member array representing an
2573:             * attribute declaration.
2574:             */
2575:            Object[] getAttribute(String elName, String name) {
2576:                Hashtable attlist;
2577:                Object attribute[];
2578:
2579:                attlist = getElementAttributes(elName);
2580:                if (attlist == null) {
2581:                    return null;
2582:                }
2583:
2584:                attribute = (Object[]) attlist.get(name);
2585:                return attribute;
2586:            }
2587:
2588:            //
2589:            // Entities
2590:            //
2591:
2592:            /**
2593:             * Get declared entities.
2594:             * @return An Enumeration of all the entities declared for
2595:             *         this XML document.  The results will be valid only
2596:             *         after the DTD (if any) has been parsed.
2597:             * @see #getEntityType
2598:             * @see #getEntityPublicId
2599:             * @see #getEntitySystemId
2600:             * @see #getEntityValue
2601:             * @see #getEntityNotationName
2602:             */
2603:            public Enumeration declaredEntities() {
2604:                return entityInfo.keys();
2605:            }
2606:
2607:            /**
2608:             * Find the type of an entity.
2609:             * @returns An integer constant representing the entity type.
2610:             * @see #ENTITY_UNDECLARED
2611:             * @see #ENTITY_INTERNAL
2612:             * @see #ENTITY_NDATA
2613:             * @see #ENTITY_TEXT
2614:             */
2615:            public int getEntityType(String ename) {
2616:                Object entity[] = (Object[]) entityInfo.get(ename);
2617:                if (entity == null) {
2618:                    return ENTITY_UNDECLARED;
2619:                } else {
2620:                    return ((Integer) entity[0]).intValue();
2621:                }
2622:            }
2623:
2624:            /**
2625:             * Return an external entity's public identifier, if any.
2626:             * @param ename The name of the external entity.
2627:             * @return The entity's system identifier, or null if the
2628:             *         entity was not declared, if it is not an
2629:             *         external entity, or if no public identifier was
2630:             *         provided.
2631:             * @see #getEntityType
2632:             */
2633:            public String getEntityPublicId(String ename) {
2634:                Object entity[] = (Object[]) entityInfo.get(ename);
2635:                if (entity == null) {
2636:                    return null;
2637:                } else {
2638:                    return (String) entity[1];
2639:                }
2640:            }
2641:
2642:            /**
2643:             * Return an external entity's system identifier.
2644:             * @param ename The name of the external entity.
2645:             * @return The entity's system identifier, or null if the
2646:             *         entity was not declared, or if it is not an
2647:             *         external entity.
2648:             * @see #getEntityType
2649:             */
2650:            public String getEntitySystemId(String ename) {
2651:                Object entity[] = (Object[]) entityInfo.get(ename);
2652:                if (entity == null) {
2653:                    return null;
2654:                } else {
2655:                    return (String) entity[2];
2656:                }
2657:            }
2658:
2659:            /**
2660:             * Return the value of an internal entity.
2661:             * @param ename The name of the internal entity.
2662:             * @return The entity's value, or null if the entity was
2663:             *         not declared, or if it is not an internal entity.
2664:             * @see #getEntityType
2665:             */
2666:            public String getEntityValue(String ename) {
2667:                Object entity[] = (Object[]) entityInfo.get(ename);
2668:                if (entity == null) {
2669:                    return null;
2670:                } else {
2671:                    return (String) entity[3];
2672:                }
2673:            }
2674:
2675:            /**
2676:             * Get the notation name associated with an NDATA entity.
2677:             * @param ename The NDATA entity name.
2678:             * @return The associated notation name, or null if the
2679:             *         entity was not declared, or if it is not an
2680:             *         NDATA entity.
2681:             * @see #getEntityType
2682:             */
2683:            public String getEntityNotationName(String eName) {
2684:                Object entity[] = (Object[]) entityInfo.get(eName);
2685:                if (entity == null) {
2686:                    return null;
2687:                } else {
2688:                    return (String) entity[4];
2689:                }
2690:            }
2691:
2692:            /**
2693:             * Register an entity declaration for later retrieval.
2694:             */
2695:            void setInternalEntity(String eName, String value) {
2696:                setEntity(eName, ENTITY_INTERNAL, null, null, value, null);
2697:            }
2698:
2699:            /**
2700:             * Register an external data entity.
2701:             */
2702:            void setExternalDataEntity(String eName, String pubid,
2703:                    String sysid, String nName) {
2704:                setEntity(eName, ENTITY_NDATA, pubid, sysid, null, nName);
2705:            }
2706:
2707:            /**
2708:             * Register an external text entity.
2709:             */
2710:            void setExternalTextEntity(String eName, String pubid, String sysid) {
2711:                setEntity(eName, ENTITY_TEXT, pubid, sysid, null, null);
2712:            }
2713:
2714:            /**
2715:             * Register an entity declaration for later retrieval.
2716:             */
2717:            void setEntity(String eName, int eClass, String pubid,
2718:                    String sysid, String value, String nName) {
2719:                Object entity[];
2720:
2721:                if (entityInfo.get(eName) == null) {
2722:                    entity = new Object[5];
2723:                    entity[0] = new Integer(eClass);
2724:                    entity[1] = pubid;
2725:                    entity[2] = sysid;
2726:                    entity[3] = value;
2727:                    entity[4] = nName;
2728:
2729:                    entityInfo.put(eName, entity);
2730:                }
2731:            }
2732:
2733:            //
2734:            // Notations.
2735:            //
2736:
2737:            /**
2738:             * Get declared notations.
2739:             * @return An Enumeration of all the notations declared for
2740:             *         this XML document.  The results will be valid only
2741:             *         after the DTD (if any) has been parsed.
2742:             * @see #getNotationPublicId
2743:             * @see #getNotationSystemId
2744:             */
2745:            public Enumeration declaredNotations() {
2746:                return notationInfo.keys();
2747:            }
2748:
2749:            /**
2750:             * Look up the public identifier for a notation.
2751:             * You will normally use this method to look up a notation
2752:             * that was provided as an attribute value or for an NDATA entity.
2753:             * @param nname The name of the notation.
2754:             * @return A string containing the public identifier, or null
2755:             *         if none was provided or if no such notation was
2756:             *         declared.
2757:             * @see #getNotationSystemId
2758:             */
2759:            public String getNotationPublicId(String nname) {
2760:                Object notation[] = (Object[]) notationInfo.get(nname);
2761:                if (notation == null) {
2762:                    return null;
2763:                } else {
2764:                    return (String) notation[0];
2765:                }
2766:            }
2767:
2768:            /**
2769:             * Look up the system identifier for a notation.
2770:             * You will normally use this method to look up a notation
2771:             * that was provided as an attribute value or for an NDATA entity.
2772:             * @param nname The name of the notation.
2773:             * @return A string containing the system identifier, or null
2774:             *         if no such notation was declared.
2775:             * @see #getNotationPublicId
2776:             */
2777:            public String getNotationSystemId(String nname) {
2778:                Object notation[] = (Object[]) notationInfo.get(nname);
2779:                if (notation == null) {
2780:                    return null;
2781:                } else {
2782:                    return (String) notation[1];
2783:                }
2784:            }
2785:
2786:            /**
2787:             * Register a notation declaration for later retrieval.
2788:             * Format:
2789:             * - public id
2790:             * - system id
2791:             */
2792:            void setNotation(String nname, String pubid, String sysid)
2793:                    throws java.lang.Exception {
2794:                Object notation[];
2795:
2796:                if (notationInfo.get(nname) == null) {
2797:                    notation = new Object[2];
2798:                    notation[0] = pubid;
2799:                    notation[1] = sysid;
2800:                    notationInfo.put(nname, notation);
2801:                } else {
2802:                    error("multiple declarations of notation", nname, null);
2803:                }
2804:            }
2805:
2806:            //
2807:            // Location.
2808:            //
2809:
2810:            /**
2811:             * Return the current line number.
2812:             */
2813:            public int getLineNumber() {
2814:                return line;
2815:            }
2816:
2817:            /**
2818:             * Return the current column number.
2819:             */
2820:            public int getColumnNumber() {
2821:                return column;
2822:            }
2823:
2824:            //////////////////////////////////////////////////////////////////////
2825:            // High-level I/O.
2826:            //////////////////////////////////////////////////////////////////////
2827:
2828:            /**
2829:             * Read a single character from the readBuffer.
2830:             * <p>The readDataChunk() method maintains the buffer.
2831:             * <p>If we hit the end of an entity, try to pop the stack and
2832:             * keep going.
2833:             * <p>(This approach doesn't really enforce XML's rules about
2834:             * entity boundaries, but this is not currently a validating
2835:             * parser).
2836:             * <p>This routine also attempts to keep track of the current
2837:             * position in external entities, but it's not entirely accurate.
2838:             * @return The next available input character.
2839:             * @see #unread(char)
2840:             * @see #unread(String)
2841:             * @see #readDataChunk
2842:             * @see #readBuffer
2843:             * @see #line
2844:             * @return The next character from the current input source.
2845:             */
2846:            char readCh() throws java.lang.Exception {
2847:                char c;
2848:
2849:                // As long as there's nothing in the
2850:                // read buffer, try reading more data
2851:                // (for an external entity) or popping
2852:                // the entity stack (for either).
2853:                while (readBufferPos >= readBufferLength) {
2854:                    switch (sourceType) {
2855:                    case INPUT_READER:
2856:                    case INPUT_EXTERNAL:
2857:                    case INPUT_STREAM:
2858:                        readDataChunk();
2859:                        while (readBufferLength < 1) {
2860:                            popInput();
2861:                            if (readBufferLength < 1) {
2862:                                readDataChunk();
2863:                            }
2864:                        }
2865:                        break;
2866:
2867:                    default:
2868:                        popInput();
2869:                        break;
2870:                    }
2871:                }
2872:
2873:                c = readBuffer[readBufferPos++];
2874:
2875:                // This is a particularly nasty bit
2876:                // of code, that checks for a parameter
2877:                // entity reference but peeks ahead to
2878:                // catch the '%' in parameter entity
2879:                // declarations.
2880:                if (c == '%'
2881:                        && (context == CONTEXT_DTD || context == CONTEXT_ENTITYVALUE)) {
2882:                    char c2 = readCh();
2883:                    unread(c2);
2884:                    if (!isWhitespace(c2)) {
2885:                        parsePEReference(context == CONTEXT_ENTITYVALUE);
2886:                        return readCh();
2887:                    }
2888:                }
2889:
2890:                if (c == '\n') {
2891:                    line++;
2892:                    column = 0;
2893:                } else {
2894:                    column++;
2895:                }
2896:
2897:                return c;
2898:            }
2899:
2900:            /**
2901:             * Push a single character back onto the current input stream.
2902:             * <p>This method usually pushes the character back onto
2903:             * the readBuffer, while the unread(String) method treats the
2904:             * string as a new internal entity.
2905:             * <p>I don't think that this would ever be called with 
2906:             * readBufferPos = 0, because the methods always reads a character
2907:             * before unreading it, but just in case, I've added a boundary
2908:             * condition.
2909:             * @param c The character to push back.
2910:             * @see #readCh
2911:             * @see #unread(String)
2912:             * @see #unread(char[])
2913:             * @see #readBuffer
2914:             */
2915:            void unread(char c) throws java.lang.Exception {
2916:                // Normal condition.
2917:                if (c == '\n') {
2918:                    line--;
2919:                    column = -1;
2920:                }
2921:                if (readBufferPos > 0) {
2922:                    readBuffer[--readBufferPos] = c;
2923:                } else {
2924:                    pushString(null, new Character(c).toString());
2925:                }
2926:            }
2927:
2928:            /**
2929:             * Push a char array back onto the current input stream.
2930:             * <p>NOTE: you must <em>never</em> push back characters that you
2931:             * haven't actually read: use pushString() instead.
2932:             * @see #readCh
2933:             * @see #unread(char)
2934:             * @see #unread(String)
2935:             * @see #readBuffer
2936:             * @see #pushString
2937:             */
2938:            void unread(char ch[], int length) throws java.lang.Exception {
2939:                for (int i = 0; i < length; i++) {
2940:                    if (ch[i] == '\n') {
2941:                        line--;
2942:                        column = -1;
2943:                    }
2944:                }
2945:                if (length < readBufferPos) {
2946:                    readBufferPos -= length;
2947:                } else {
2948:                    pushCharArray(null, ch, 0, length);
2949:                    sourceType = INPUT_BUFFER;
2950:                }
2951:            }
2952:
2953:            /**
2954:             * Push a new external input source.
2955:             * <p>The source will be either an external text entity, or the DTD
2956:             * external subset.
2957:             * <p>TO DO: Right now, this method always attempts to autodetect
2958:             * the encoding; in the future, it should allow the caller to 
2959:             * request an encoding explicitly, and it should also look at the
2960:             * headers with an HTTP connection.
2961:             * @param url The java.net.URL object for the entity.
2962:             * @see XmlHandler#resolveEntity
2963:             * @see #pushString
2964:             * @see #sourceType
2965:             * @see #pushInput
2966:             * @see #detectEncoding
2967:             * @see #sourceType
2968:             * @see #readBuffer
2969:             */
2970:            void pushURL(String ename, String publicId, String systemId,
2971:                    Reader reader, InputStream stream, String encoding)
2972:                    throws java.lang.Exception {
2973:                URL url;
2974:                boolean ignoreEncoding = false;
2975:
2976:                // Push the existing status.
2977:                pushInput(ename);
2978:
2979:                // Create a new read buffer.
2980:                // (Note the four-character margin)
2981:                readBuffer = new char[READ_BUFFER_MAX + 4];
2982:                readBufferPos = 0;
2983:                readBufferLength = 0;
2984:                readBufferOverflow = -1;
2985:                is = null;
2986:                line = 1;
2987:
2988:                currentByteCount = 0;
2989:
2990:                // Flush any remaining data.
2991:                dataBufferFlush();
2992:
2993:                // Make the URL absolute.
2994:                if (systemId != null && externalEntity != null) {
2995:                    systemId = new URL(externalEntity.getURL(), systemId)
2996:                            .toString();
2997:                } else if (baseURI != null) {
2998:                    try {
2999:                        systemId = new URL(new URL(baseURI), systemId)
3000:                                .toString();
3001:                    } catch (Exception e) {
3002:                    }
3003:                }
3004:
3005:                // See if the application wants to
3006:                // redirect the system ID and/or
3007:                // supply its own character stream.
3008:                if (systemId != null && handler != null) {
3009:                    Object input = handler.resolveEntity(publicId, systemId);
3010:                    if (input != null) {
3011:                        if (input instanceof  String) {
3012:                            systemId = (String) input;
3013:                        } else if (input instanceof  InputStream) {
3014:                            stream = (InputStream) input;
3015:                        } else if (input instanceof  Reader) {
3016:                            reader = (Reader) input;
3017:                        }
3018:                    }
3019:                }
3020:
3021:                // Start the entity.
3022:                if (handler != null) {
3023:                    if (systemId != null) {
3024:                        handler.startExternalEntity(systemId);
3025:                    } else {
3026:                        handler.startExternalEntity("[external stream]");
3027:                    }
3028:                }
3029:
3030:                // Figure out what we're reading from.
3031:                if (reader != null) {
3032:                    // There's an explicit character stream.
3033:                    sourceType = INPUT_READER;
3034:                    this .reader = reader;
3035:                    tryEncodingDecl(true);
3036:                    return;
3037:                } else if (stream != null) {
3038:                    sourceType = INPUT_STREAM;
3039:                    is = stream;
3040:                } else {
3041:                    // We have to open our own stream
3042:                    // to the URL.
3043:
3044:                    // Set the new status
3045:                    sourceType = INPUT_EXTERNAL;
3046:                    url = new URL(systemId);
3047:
3048:                    externalEntity = url.openConnection();
3049:                    externalEntity.connect();
3050:                    is = externalEntity.getInputStream();
3051:                }
3052:
3053:                // If we get to here, there must be
3054:                // an InputStream available.
3055:                if (!is.markSupported()) {
3056:                    is = new BufferedInputStream(is);
3057:                }
3058:
3059:                // Attempt to detect the encoding.
3060:                if (encoding == null && externalEntity != null) {
3061:                    encoding = externalEntity.getContentEncoding();
3062:                }
3063:
3064:                if (encoding != null) {
3065:                    checkEncoding(encoding, false);
3066:                    ignoreEncoding = true;
3067:                } else {
3068:                    detectEncoding();
3069:                    ignoreEncoding = false;
3070:                }
3071:
3072:                // Read an XML or text declaration.
3073:                tryEncodingDecl(ignoreEncoding);
3074:            }
3075:
3076:            /**
3077:             * Check for an encoding declaration.
3078:             */
3079:            void tryEncodingDecl(boolean ignoreEncoding)
3080:                    throws java.lang.Exception {
3081:                // Read the XML/Encoding declaration.
3082:                if (tryRead("<?xml")) {
3083:                    if (tryWhitespace()) {
3084:                        if (inputStack.size() > 0) {
3085:                            parseTextDecl(ignoreEncoding);
3086:                        } else {
3087:                            parseXMLDecl(ignoreEncoding);
3088:                        }
3089:                    } else {
3090:                        unread("xml".toCharArray(), 3);
3091:                        parsePI();
3092:                    }
3093:                }
3094:            }
3095:
3096:            /**
3097:             * Attempt to detect the encoding of an entity.
3098:             * <p>The trick here (as suggested in the XML standard) is that
3099:             * any entity not in UTF-8, or in UCS-2 with a byte-order mark, 
3100:             * <b>must</b> begin with an XML declaration or an encoding
3101:             * declaration; we simply have to look for "&lt;?XML" in various
3102:             * encodings.
3103:             * <p>This method has no way to distinguish among 8-bit encodings.
3104:             * Instead, it assumes UTF-8, then (possibly) revises its assumption
3105:             * later in checkEncoding().  Any ASCII-derived 8-bit encoding
3106:             * should work, but most will be rejected later by checkEncoding().
3107:             * <p>I don't currently detect EBCDIC, since I'm concerned that it
3108:             * could also be a valid UTF-8 sequence; I'll have to do more checking
3109:             * later.
3110:             * @see #tryEncoding(byte[], byte, byte, byte, byte)
3111:             * @see #tryEncoding(byte[], byte, byte)
3112:             * @see #checkEncoding
3113:             * @see #read8bitEncodingDeclaration
3114:             */
3115:            void detectEncoding() throws java.lang.Exception {
3116:                byte signature[] = new byte[4];
3117:
3118:                // Read the first four bytes for
3119:                // autodetection.
3120:                is.mark(4);
3121:                is.read(signature);
3122:                is.reset();
3123:
3124:                // Look for a known signature.
3125:                if (tryEncoding(signature, (byte) 0x00, (byte) 0x00,
3126:                        (byte) 0x00, (byte) 0x3c)) {
3127:                    // UCS-4 must begin with "<!XML"
3128:                    // 0x00 0x00 0x00 0x3c: UCS-4, big-endian (1234)
3129:                    encoding = ENCODING_UCS_4_1234;
3130:                } else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x00,
3131:                        (byte) 0x00, (byte) 0x00)) {
3132:                    // UCS-4 must begin with "<!XML"
3133:                    // 0x3c 0x00 0x00 0x00: UCS-4, little-endian (4321)
3134:                    encoding = ENCODING_UCS_4_4321;
3135:                } else if (tryEncoding(signature, (byte) 0x00, (byte) 0x00,
3136:                        (byte) 0x3c, (byte) 0x00)) {
3137:                    // UCS-4 must begin with "<!XML"
3138:                    // 0x00 0x00 0x3c 0x00: UCS-4, unusual (2143)
3139:                    encoding = ENCODING_UCS_4_2143;
3140:                } else if (tryEncoding(signature, (byte) 0x00, (byte) 0x3c,
3141:                        (byte) 0x00, (byte) 0x00)) {
3142:                    // UCS-4 must begin with "<!XML"
3143:                    // 0x00 0x3c 0x00 0x00: UCS-4, unusual (3421)
3144:                    encoding = ENCODING_UCS_4_3412;
3145:                } else if (tryEncoding(signature, (byte) 0xfe, (byte) 0xff)) {
3146:                    // UCS-2 with a byte-order marker.
3147:                    // 0xfe 0xff: UCS-2, big-endian (12)
3148:                    encoding = ENCODING_UCS_2_12;
3149:                    is.read();
3150:                    is.read();
3151:                } else if (tryEncoding(signature, (byte) 0xff, (byte) 0xfe)) {
3152:                    // UCS-2 with a byte-order marker.
3153:                    // 0xff 0xfe: UCS-2, little-endian (21)
3154:                    encoding = ENCODING_UCS_2_21;
3155:                    is.read();
3156:                    is.read();
3157:                } else if (tryEncoding(signature, (byte) 0x00, (byte) 0x3c,
3158:                        (byte) 0x00, (byte) 0x3f)) {
3159:                    // UCS-2 without a BOM must begin with "<?XML"
3160:                    // 0x00 0x3c 0x00 0x3f: UCS-2, big-endian, no byte-order mark
3161:                    encoding = ENCODING_UCS_2_12;
3162:                    error("no byte-order mark for UCS-2 entity", null, null);
3163:                } else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x00,
3164:                        (byte) 0x3f, (byte) 0x00)) {
3165:                    // UCS-2 without a BOM must begin with "<?XML"
3166:                    // 0x3c 0x00 0x3f 0x00: UCS-2, little-endian, no byte-order mark
3167:                    encoding = ENCODING_UCS_2_21;
3168:                    error("no byte-order mark for UCS-2 entity", null, null);
3169:                } else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x3f,
3170:                        (byte) 0x78, (byte) 0x6d)) {
3171:                    // Some kind of 8-bit encoding with "<?XML"
3172:                    // 0x3c 0x3f 0x78 0x6d: UTF-8 or other 8-bit markup (read ENCODING)
3173:                    encoding = ENCODING_UTF_8;
3174:                    read8bitEncodingDeclaration();
3175:                } else {
3176:                    // Some kind of 8-bit encoding without "<?XML"
3177:                    // (otherwise) UTF-8 without encoding/XML declaration
3178:                    encoding = ENCODING_UTF_8;
3179:                }
3180:            }
3181:
3182:            /**
3183:             * Check for a four-byte signature.
3184:             * <p>Utility routine for detectEncoding().
3185:             * <p>Always looks for some part of "<?XML" in a specific encoding.
3186:             * @param sig The first four bytes read.
3187:             * @param b1 The first byte of the signature
3188:             * @param b2 The second byte of the signature
3189:             * @param b3 The third byte of the signature
3190:             * @param b4 The fourth byte of the signature
3191:             * @see #detectEncoding
3192:             */
3193:            boolean tryEncoding(byte sig[], byte b1, byte b2, byte b3, byte b4) {
3194:                return (sig[0] == b1 && sig[1] == b2 && sig[2] == b3 && sig[3] == b4);
3195:            }
3196:
3197:            /**
3198:             * Check for a two-byte signature.
3199:             * <p>Looks for a UCS-2 byte-order mark.
3200:             * <p>Utility routine for detectEncoding().
3201:             * @param sig The first four bytes read.
3202:             * @param b1 The first byte of the signature
3203:             * @param b2 The second byte of the signature
3204:             * @see #detectEncoding
3205:             */
3206:            boolean tryEncoding(byte sig[], byte b1, byte b2) {
3207:                return ((sig[0] == b1) && (sig[1] == b2));
3208:            }
3209:
3210:            /**
3211:             * This method pushes a string back onto input.
3212:             * <p>It is useful either as the expansion of an internal entity, 
3213:             * or for backtracking during the parse.
3214:             * <p>Call pushCharArray() to do the actual work.
3215:             * @param s The string to push back onto input.
3216:             * @see #pushCharArray
3217:             */
3218:            void pushString(String ename, String s) throws java.lang.Exception {
3219:                char ch[] = s.toCharArray();
3220:                pushCharArray(ename, ch, 0, ch.length);
3221:            }
3222:
3223:            /**
3224:             * Push a new internal input source.
3225:             * <p>This method is useful for expanding an internal entity,
3226:             * or for unreading a string of characters.  It creates a new
3227:             * readBuffer containing the characters in the array, instead
3228:             * of characters converted from an input byte stream.
3229:             * <p>I've added a couple of optimisations: don't push zero-
3230:             * length strings, and just push back a single character
3231:             * for 1-character strings; this should save some time and memory.
3232:             * @param ch The char array to push.
3233:             * @see #pushString
3234:             * @see #pushURL
3235:             * @see #readBuffer
3236:             * @see #sourceType
3237:             * @see #pushInput
3238:             */
3239:            void pushCharArray(String ename, char ch[], int start, int length)
3240:                    throws java.lang.Exception {
3241:                // Push the existing status
3242:                pushInput(ename);
3243:                sourceType = INPUT_INTERNAL;
3244:                readBuffer = ch;
3245:                readBufferPos = start;
3246:                readBufferLength = length;
3247:                readBufferOverflow = -1;
3248:            }
3249:
3250:            /**
3251:             * Save the current input source onto the stack.
3252:             * <p>This method saves all of the global variables associated with
3253:             * the current input source, so that they can be restored when a new
3254:             * input source has finished.  It also tests for entity recursion.
3255:             * <p>The method saves the following global variables onto a stack
3256:             * using a fixed-length array:
3257:             * <ol>
3258:             * <li>sourceType
3259:             * <li>externalEntity
3260:             * <li>readBuffer
3261:             * <li>readBufferPos
3262:             * <li>readBufferLength
3263:             * <li>line
3264:             * <li>encoding
3265:             * </ol>
3266:             * @param ename The name of the entity (if any) causing the new input.
3267:             * @see #popInput
3268:             * @see #sourceType
3269:             * @see #externalEntity
3270:             * @see #readBuffer
3271:             * @see #readBufferPos
3272:             * @see #readBufferLength
3273:             * @see #line
3274:             * @see #encoding
3275:             */
3276:            void pushInput(String ename) throws java.lang.Exception {
3277:                Object input[] = new Object[12];
3278:
3279:                // Check for entity recursion.
3280:                if (ename != null) {
3281:                    Enumeration entities = entityStack.elements();
3282:                    while (entities.hasMoreElements()) {
3283:                        String e = (String) entities.nextElement();
3284:                        if (e == ename) {
3285:                            error("recursive reference to entity", ename, null);
3286:                        }
3287:                    }
3288:                }
3289:                entityStack.push(ename);
3290:
3291:                // Don't bother if there is no input.
3292:                if (sourceType == INPUT_NONE) {
3293:                    return;
3294:                }
3295:
3296:                // Set up a snapshot of the current
3297:                // input source.
3298:                input[0] = new Integer(sourceType);
3299:                input[1] = externalEntity;
3300:                input[2] = readBuffer;
3301:                input[3] = new Integer(readBufferPos);
3302:                input[4] = new Integer(readBufferLength);
3303:                input[5] = new Integer(line);
3304:                input[6] = new Integer(encoding);
3305:                input[7] = new Integer(readBufferOverflow);
3306:                input[8] = is;
3307:                input[9] = new Integer(currentByteCount);
3308:                input[10] = new Integer(column);
3309:                input[11] = reader;
3310:
3311:                // Push it onto the stack.
3312:                inputStack.push(input);
3313:            }
3314:
3315:            /**
3316:             * Restore a previous input source.
3317:             * <p>This method restores all of the global variables associated with
3318:             * the current input source.
3319:             * @exception java.io.EOFException
3320:             *    If there are no more entries on the input stack.
3321:             * @see #pushInput
3322:             * @see #sourceType
3323:             * @see #externalEntity
3324:             * @see #readBuffer
3325:             * @see #readBufferPos
3326:             * @see #readBufferLength
3327:             * @see #line
3328:             * @see #encoding
3329:             */
3330:            void popInput() throws java.lang.Exception {
3331:                Object input[];
3332:
3333:                switch (sourceType) {
3334:
3335:                case INPUT_EXTERNAL:
3336:                    dataBufferFlush();
3337:                    if (handler != null && externalEntity != null) {
3338:                        handler.endExternalEntity(externalEntity.getURL()
3339:                                .toString());
3340:                    }
3341:                    break;
3342:                case INPUT_STREAM:
3343:                    dataBufferFlush();
3344:                    if (baseURI != null) {
3345:                        if (handler != null) {
3346:                            handler.endExternalEntity(baseURI);
3347:                        }
3348:                    }
3349:                    break;
3350:                case INPUT_READER:
3351:                    dataBufferFlush();
3352:                    if (baseURI != null) {
3353:                        if (handler != null) {
3354:                            handler.endExternalEntity(baseURI);
3355:                        }
3356:                    }
3357:                    break;
3358:                }
3359:
3360:                // Throw an EOFException if there
3361:                // is nothing else to pop.
3362:                if (inputStack.isEmpty()) {
3363:                    throw new EOFException();
3364:                } else {
3365:                    String s;
3366:                    input = (Object[]) inputStack.pop();
3367:                    s = (String) entityStack.pop();
3368:                }
3369:
3370:                sourceType = ((Integer) input[0]).intValue();
3371:                externalEntity = (URLConnection) input[1];
3372:                readBuffer = (char[]) input[2];
3373:                readBufferPos = ((Integer) input[3]).intValue();
3374:                readBufferLength = ((Integer) input[4]).intValue();
3375:                line = ((Integer) input[5]).intValue();
3376:                encoding = ((Integer) input[6]).intValue();
3377:                readBufferOverflow = ((Integer) input[7]).intValue();
3378:                is = (InputStream) input[8];
3379:                currentByteCount = ((Integer) input[9]).intValue();
3380:                column = ((Integer) input[10]).intValue();
3381:                reader = (Reader) input[11];
3382:            }
3383:
3384:            /**
3385:             * Return true if we can read the expected character.
3386:             * <p>Note that the character will be removed from the input stream
3387:             * on success, but will be put back on failure.  Do not attempt to
3388:             * read the character again if the method succeeds.
3389:             * @param delim The character that should appear next.  For a
3390:             *              insensitive match, you must supply this in upper-case.
3391:             * @return true if the character was successfully read, or false if
3392:             *         it was not.
3393:             * @see #tryRead(String)
3394:             */
3395:            boolean tryRead(char delim) throws java.lang.Exception {
3396:                char c;
3397:
3398:                // Read the character
3399:                c = readCh();
3400:
3401:                // Test for a match, and push the character
3402:                // back if the match fails.
3403:                if (c == delim) {
3404:                    return true;
3405:                } else {
3406:                    unread(c);
3407:                    return false;
3408:                }
3409:            }
3410:
3411:            /**
3412:             * Return true if we can read the expected string.
3413:             * <p>This is simply a convenience method.
3414:             * <p>Note that the string will be removed from the input stream
3415:             * on success, but will be put back on failure.  Do not attempt to
3416:             * read the string again if the method succeeds.
3417:             * <p>This method will push back a character rather than an
3418:             * array whenever possible (probably the majority of cases).
3419:             * <p><b>NOTE:</b> This method currently has a hard-coded limit
3420:             * of 100 characters for the delimiter.
3421:             * @param delim The string that should appear next.
3422:             * @return true if the string was successfully read, or false if
3423:             *         it was not.
3424:             * @see #tryRead(char)
3425:             */
3426:            boolean tryRead(String delim) throws java.lang.Exception {
3427:                char ch[] = delim.toCharArray();
3428:                char c;
3429:
3430:                // Compare the input, character-
3431:                // by character.
3432:
3433:                for (int i = 0; i < ch.length; i++) {
3434:                    c = readCh();
3435:                    if (c != ch[i]) {
3436:                        unread(c);
3437:                        if (i != 0) {
3438:                            unread(ch, i);
3439:                        }
3440:                        return false;
3441:                    }
3442:                }
3443:                return true;
3444:            }
3445:
3446:            /**
3447:             * Return true if we can read some whitespace.
3448:             * <p>This is simply a convenience method.
3449:             * <p>This method will push back a character rather than an
3450:             * array whenever possible (probably the majority of cases).
3451:             * @return true if whitespace was found.
3452:             */
3453:            boolean tryWhitespace() throws java.lang.Exception {
3454:                char c;
3455:                c = readCh();
3456:                if (isWhitespace(c)) {
3457:                    skipWhitespace();
3458:                    return true;
3459:                } else {
3460:                    unread(c);
3461:                    return false;
3462:                }
3463:            }
3464:
3465:            /**
3466:             * Read all data until we find the specified string.
3467:             * <p>This is especially useful for scanning marked sections.
3468:             * <p>This is a a little inefficient right now, since it calls tryRead()
3469:             * for every character.
3470:             * @param delim The string delimiter
3471:             * @see #tryRead(String, boolean)
3472:             * @see #readCh
3473:             */
3474:            void parseUntil(String delim) throws java.lang.Exception {
3475:                char c;
3476:                int startLine = line;
3477:
3478:                try {
3479:                    while (!tryRead(delim)) {
3480:                        c = readCh();
3481:                        dataBufferAppend(c);
3482:                    }
3483:                } catch (EOFException e) {
3484:                    error(
3485:                            "end of input while looking for delimiter (started on line "
3486:                                    + startLine + ')', null, delim);
3487:                }
3488:            }
3489:
3490:            /**
3491:             * Skip all data until we find the specified string.
3492:             * <p>This is especially useful for scanning comments.
3493:             * <p>This is a a little inefficient right now, since it calls tryRead()
3494:             * for every character.
3495:             * @param delim The string delimiter
3496:             * @see #tryRead(String, boolean)
3497:             * @see #readCh
3498:             */
3499:            void skipUntil(String delim) throws java.lang.Exception {
3500:                while (!tryRead(delim)) {
3501:                    readCh();
3502:                }
3503:            }
3504:
3505:            /**
3506:             * Read just the encoding declaration (or XML declaration) at the 
3507:             * start of an external entity.
3508:             * When this method is called, we know that the declaration is
3509:             * present (or appears to be).  We also know that the entity is
3510:             * in some sort of ASCII-derived 8-bit encoding.
3511:             * The idea of this is to let us read what the 8-bit encoding is
3512:             * before we've committed to converting any more of the file; the
3513:             * XML or encoding declaration must be in 7-bit ASCII, so we're
3514:             * safe as long as we don't go past it.
3515:             */
3516:            void read8bitEncodingDeclaration() throws java.lang.Exception {
3517:                int ch;
3518:                readBufferPos = readBufferLength = 0;
3519:
3520:                while (true) {
3521:                    ch = is.read();
3522:                    readBuffer[readBufferLength++] = (char) ch;
3523:                    switch (ch) {
3524:                    case (int) '>':
3525:                        return;
3526:                    case -1:
3527:                        error(
3528:                                "end of file before end of XML or encoding declaration.",
3529:                                null, "?>");
3530:                        return;
3531:                    }
3532:                    if (readBuffer.length == readBufferLength) {
3533:                        error("unfinished XML or encoding declaration", null,
3534:                                null);
3535:                    }
3536:                }
3537:            }
3538:
3539:            //////////////////////////////////////////////////////////////////////
3540:            // Low-level I/O.
3541:            //////////////////////////////////////////////////////////////////////
3542:
3543:            /**
3544:             * Read a chunk of data from an external input source.
3545:             * <p>This is simply a front-end that fills the rawReadBuffer
3546:             * with bytes, then calls the appropriate encoding handler.
3547:             * @see #encoding
3548:             * @see #rawReadBuffer
3549:             * @see #readBuffer
3550:             * @see #filterCR
3551:             * @see #copyUtf8ReadBuffer
3552:             * @see #copyIso8859_1ReadBuffer
3553:             * @see #copyUcs_2ReadBuffer
3554:             * @see #copyUcs_4ReadBuffer
3555:             */
3556:            void readDataChunk() throws java.lang.Exception {
3557:                int count, i, j;
3558:
3559:                // See if we have any overflow.
3560:                if (readBufferOverflow > -1) {
3561:                    readBuffer[0] = (char) readBufferOverflow;
3562:                    readBufferOverflow = -1;
3563:                    readBufferPos = 1;
3564:                    sawCR = true;
3565:                } else {
3566:                    readBufferPos = 0;
3567:                    sawCR = false;
3568:                }
3569:
3570:                // Special situation -- we're taking
3571:                // input from a character stream.
3572:                if (sourceType == INPUT_READER) {
3573:                    count = reader.read(readBuffer, readBufferPos,
3574:                            READ_BUFFER_MAX - 1);
3575:                    if (count < 0) {
3576:                        readBufferLength = -1;
3577:                    } else {
3578:                        readBufferLength = readBufferPos + count;
3579:                        filterCR();
3580:                        sawCR = false;
3581:                    }
3582:                    return;
3583:                }
3584:
3585:                // Read as many bytes as possible
3586:                // into the read buffer.
3587:                count = is.read(rawReadBuffer, 0, READ_BUFFER_MAX);
3588:
3589:                // Dispatch to an encoding-specific
3590:                // reader method to populate the
3591:                // readBuffer.
3592:                switch (encoding) {
3593:                case ENCODING_UTF_8:
3594:                    copyUtf8ReadBuffer(count);
3595:                    break;
3596:
3597:                case ENCODING_ISO_8859_1:
3598:                    copyIso8859_1ReadBuffer(count);
3599:                    break;
3600:
3601:                case ENCODING_UCS_2_12:
3602:                    copyUcs2ReadBuffer(count, 8, 0);
3603:                    break;
3604:
3605:                case ENCODING_UCS_2_21:
3606:                    copyUcs2ReadBuffer(count, 0, 8);
3607:                    break;
3608:
3609:                case ENCODING_UCS_4_1234:
3610:                    copyUcs4ReadBuffer(count, 24, 16, 8, 0);
3611:                    break;
3612:
3613:                case ENCODING_UCS_4_4321:
3614:                    copyUcs4ReadBuffer(count, 0, 8, 16, 24);
3615:                    break;
3616:
3617:                case ENCODING_UCS_4_2143:
3618:                    copyUcs4ReadBuffer(count, 16, 24, 0, 8);
3619:                    break;
3620:
3621:                case ENCODING_UCS_4_3412:
3622:                    copyUcs4ReadBuffer(count, 8, 0, 24, 16);
3623:                    break;
3624:                }
3625:
3626:                // Filter out all carriage returns
3627:                // if we've seen any.
3628:                if (sawCR) {
3629:                    filterCR();
3630:                    sawCR = false;
3631:                }
3632:
3633:                // Reset the position.
3634:                readBufferPos = 0;
3635:                currentByteCount += count;
3636:            }
3637:
3638:            /**
3639:             * Filter carriage returns in the read buffer.
3640:             * <p>CRLF becomes LF; CR becomes LF.
3641:             * @see #readDataChunk
3642:             * @see #readBuffer
3643:             * @see #readBufferOverflow
3644:             */
3645:            void filterCR() {
3646:                int i, j;
3647:
3648:                readBufferOverflow = -1;
3649:
3650:                loop: for (i = 0, j = 0; j < readBufferLength; i++, j++) {
3651:                    switch (readBuffer[j]) {
3652:                    case '\r':
3653:                        if (j == readBufferLength - 1) {
3654:                            readBufferOverflow = '\r';
3655:                            readBufferLength--;
3656:                            break loop;
3657:                        } else if (readBuffer[j + 1] == '\n') {
3658:                            j++;
3659:                        }
3660:                        readBuffer[i] = '\n';
3661:                        break;
3662:
3663:                    case '\n':
3664:                    default:
3665:                        readBuffer[i] = readBuffer[j];
3666:                        break;
3667:                    }
3668:                }
3669:                readBufferLength = i;
3670:            }
3671:
3672:            /**
3673:             * Convert a buffer of UTF-8-encoded bytes into UTF-16 characters.
3674:             * <p>When readDataChunk() calls this method, the raw bytes are in 
3675:             * rawReadBuffer, and the final characters will appear in 
3676:             * readBuffer.
3677:             * <p>The tricky part of this is dealing with UTF-8 multi-byte 
3678:             * sequences, but it doesn't seem to slow things down too much.
3679:             * @param count The number of bytes to convert.
3680:             * @see #readDataChunk
3681:             * @see #rawReadBuffer
3682:             * @see #readBuffer
3683:             * @see #getNextUtf8Byte
3684:             */
3685:            void copyUtf8ReadBuffer(int count) throws java.lang.Exception {
3686:                int i = 0;
3687:                int j = readBufferPos;
3688:                int b1;
3689:                boolean isSurrogate = false;
3690:                while (i < count) {
3691:                    b1 = rawReadBuffer[i++];
3692:                    isSurrogate = false;
3693:
3694:                    // Determine whether we are dealing
3695:                    // with a one-, two-, three-, or four-
3696:                    // byte sequence.
3697:                    if ((b1 & 0x80) == 0) {
3698:                        // 1-byte sequence: 000000000xxxxxxx = 0xxxxxxx
3699:                        readBuffer[j++] = (char) b1;
3700:                    } else if ((b1 & 0xe0) == 0xc0) {
3701:                        // 2-byte sequence: 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
3702:                        readBuffer[j++] = (char) (((b1 & 0x1f) << 6) | getNextUtf8Byte(
3703:                                i++, count));
3704:                    } else if ((b1 & 0xf0) == 0xe0) {
3705:                        // 3-byte sequence: zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
3706:                        readBuffer[j++] = (char) (((b1 & 0x0f) << 12)
3707:                                | (getNextUtf8Byte(i++, count) << 6) | getNextUtf8Byte(
3708:                                i++, count));
3709:                    } else if ((b1 & 0xf8) == 0xf0) {
3710:                        // 4-byte sequence: 11101110wwwwzzzzyy + 110111yyyyxxxxxx
3711:                        //     = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
3712:                        // (uuuuu = wwww + 1)
3713:                        isSurrogate = true;
3714:                        int b2 = getNextUtf8Byte(i++, count);
3715:                        int b3 = getNextUtf8Byte(i++, count);
3716:                        int b4 = getNextUtf8Byte(i++, count);
3717:                        readBuffer[j++] = (char) (0xd800
3718:                                | ((((b1 & 0x07) << 2) | ((b2 & 0x30) >> 4) - 1) << 6)
3719:                                | ((b2 & 0x0f) << 2) | ((b3 & 0x30) >> 4));
3720:                        readBuffer[j++] = (char) (0xdc | ((b3 & 0x0f) << 6) | b4);
3721:                        // TODO: test that surrogate value is legal.
3722:                    } else {
3723:                        // Otherwise, the 8th bit may not be set in UTF-8
3724:                        encodingError(
3725:                                "bad start for UTF-8 multi-byte sequence", b1,
3726:                                i);
3727:                    }
3728:                    if (readBuffer[j - 1] == '\r') {
3729:                        sawCR = true;
3730:                    }
3731:                }
3732:                // How many characters have we read?
3733:                readBufferLength = j;
3734:            }
3735:
3736:            /**
3737:             * Return the next byte value in a UTF-8 sequence.
3738:             * If it is not possible to get a byte from the current
3739:             * entity, throw an exception.
3740:             * @param pos The current position in the rawReadBuffer.
3741:             * @param count The number of bytes in the rawReadBuffer
3742:             * @return The significant six bits of a non-initial byte in
3743:             *         a UTF-8 sequence.
3744:             * @exception EOFException If the sequence is incomplete.
3745:             */
3746:            int getNextUtf8Byte(int pos, int count) throws java.lang.Exception {
3747:                int val;
3748:
3749:                // Take a character from the buffer
3750:                // or from the actual input stream.
3751:                if (pos < count) {
3752:                    val = rawReadBuffer[pos];
3753:                } else {
3754:                    val = is.read();
3755:                    if (val == -1) {
3756:                        encodingError(
3757:                                "unfinished multi-byte UTF-8 sequence at EOF",
3758:                                -1, pos);
3759:                    }
3760:                }
3761:
3762:                // Check for the correct bits at the
3763:                // start.
3764:                if ((val & 0xc0) != 0x80) {
3765:                    encodingError(
3766:                            "bad continuation of multi-byte UTF-8 sequence",
3767:                            val, pos + 1);
3768:                }
3769:
3770:                // Return the significant bits.
3771:                return (val & 0x3f);
3772:            }
3773:
3774:            /**
3775:             * Convert a buffer of ISO-8859-1-encoded bytes into UTF-16 characters.
3776:             * <p>When readDataChunk() calls this method, the raw bytes are in 
3777:             * rawReadBuffer, and the final characters will appear in 
3778:             * readBuffer.
3779:             * <p>This is a direct conversion, with no tricks.
3780:             * @param count The number of bytes to convert.
3781:             * @see #readDataChunk
3782:             * @see #rawReadBuffer
3783:             * @see #readBuffer
3784:             */
3785:            void copyIso8859_1ReadBuffer(int count) {
3786:                int i, j;
3787:                for (i = 0, j = readBufferPos; i < count; i++, j++) {
3788:                    readBuffer[j] = (char) (rawReadBuffer[i] & 0xff);
3789:                    if (readBuffer[j] == '\r') {
3790:                        sawCR = true;
3791:                    }
3792:                }
3793:                readBufferLength = j;
3794:            }
3795:
3796:            /**
3797:             * Convert a buffer of UCS-2-encoded bytes into UTF-16 characters.
3798:             * <p>When readDataChunk() calls this method, the raw bytes are in 
3799:             * rawReadBuffer, and the final characters will appear in 
3800:             * readBuffer.
3801:             * @param count The number of bytes to convert.
3802:             * @param shift1 The number of bits to shift byte 1.
3803:             * @param shift2 The number of bits to shift byte 2
3804:             * @see #readDataChunk
3805:             * @see #rawReadBuffer
3806:             * @see #readBuffer
3807:             */
3808:            void copyUcs2ReadBuffer(int count, int shift1, int shift2)
3809:                    throws java.lang.Exception {
3810:                int j = readBufferPos;
3811:
3812:                if (count > 0 && (count % 2) != 0) {
3813:                    encodingError("odd number of bytes in UCS-2 encoding", -1,
3814:                            count);
3815:                }
3816:                for (int i = 0; i < count; i += 2) {
3817:                    readBuffer[j++] = (char) (((rawReadBuffer[i] & 0xff) << shift1) | ((rawReadBuffer[i + 1] & 0xff) << shift2));
3818:                    if (readBuffer[j - 1] == '\r') {
3819:                        sawCR = true;
3820:                    }
3821:                }
3822:                readBufferLength = j;
3823:            }
3824:
3825:            /**
3826:             * Convert a buffer of UCS-4-encoded bytes into UTF-16 characters.
3827:             * <p>When readDataChunk() calls this method, the raw bytes are in 
3828:             * rawReadBuffer, and the final characters will appear in 
3829:             * readBuffer.
3830:             * <p>Java has 16-bit chars, but this routine will attempt to use
3831:             * surrogates to encoding values between 0x00010000 and 0x000fffff.
3832:             * @param count The number of bytes to convert.
3833:             * @param shift1 The number of bits to shift byte 1.
3834:             * @param shift2 The number of bits to shift byte 2
3835:             * @param shift3 The number of bits to shift byte 2
3836:             * @param shift4 The number of bits to shift byte 2
3837:             * @see #readDataChunk
3838:             * @see #rawReadBuffer
3839:             * @see #readBuffer
3840:             */
3841:            void copyUcs4ReadBuffer(int count, int shift1, int shift2,
3842:                    int shift3, int shift4) throws java.lang.Exception {
3843:                int j = readBufferPos;
3844:                int value;
3845:
3846:                if (count > 0 && (count % 4) != 0) {
3847:                    encodingError(
3848:                            "number of bytes in UCS-4 encoding not divisible by 4",
3849:                            -1, count);
3850:                }
3851:                for (int i = 0; i < count; i += 4) {
3852:                    value = (((rawReadBuffer[i] & 0xff) << shift1)
3853:                            | ((rawReadBuffer[i + 1] & 0xff) << shift2)
3854:                            | ((rawReadBuffer[i + 2] & 0xff) << shift3) | ((rawReadBuffer[i + 3] & 0xff) << shift4));
3855:                    if (value < 0x0000ffff) {
3856:                        readBuffer[j++] = (char) value;
3857:                        if (value == (int) '\r') {
3858:                            sawCR = true;
3859:                        }
3860:                    } else if (value < 0x000fffff) {
3861:                        readBuffer[j++] = (char) (0xd8 | ((value & 0x000ffc00) >> 10));
3862:                        readBuffer[j++] = (char) (0xdc | (value & 0x0003ff));
3863:                    } else {
3864:                        encodingError("value cannot be represented in UTF-16",
3865:                                value, i);
3866:                    }
3867:                }
3868:                readBufferLength = j;
3869:            }
3870:
3871:            /**
3872:             * Report a character encoding error.
3873:             */
3874:            void encodingError(String message, int value, int offset)
3875:                    throws java.lang.Exception {
3876:                String uri;
3877:
3878:                if (value >= 0) {
3879:                    message = message + " (byte value: 0x"
3880:                            + Integer.toHexString(value) + ')';
3881:                }
3882:                if (externalEntity != null) {
3883:                    uri = externalEntity.getURL().toString();
3884:                } else {
3885:                    uri = baseURI;
3886:                }
3887:                handler.error(message, uri, -1, offset + currentByteCount);
3888:            }
3889:
3890:            //////////////////////////////////////////////////////////////////////
3891:            // Local Variables.
3892:            //////////////////////////////////////////////////////////////////////
3893:
3894:            /**
3895:             * Re-initialize the variables for each parse.
3896:             */
3897:            void initializeVariables() {
3898:                // No errors; first line
3899:                errorCount = 0;
3900:                line = 1;
3901:                column = 0;
3902:
3903:                // Set up the buffers for data and names
3904:                dataBufferPos = 0;
3905:                dataBuffer = new char[DATA_BUFFER_INITIAL];
3906:                nameBufferPos = 0;
3907:                nameBuffer = new char[NAME_BUFFER_INITIAL];
3908:
3909:                // Set up the DTD hash tables
3910:                elementInfo = new Hashtable();
3911:                entityInfo = new Hashtable();
3912:                notationInfo = new Hashtable();
3913:
3914:                // Set up the variables for the current
3915:                // element context.
3916:                currentElement = null;
3917:                currentElementContent = CONTENT_UNDECLARED;
3918:
3919:                // Set up the input variables
3920:                sourceType = INPUT_NONE;
3921:                inputStack = new Stack();
3922:                entityStack = new Stack();
3923:                externalEntity = null;
3924:                tagAttributePos = 0;
3925:                tagAttributes = new String[100];
3926:                rawReadBuffer = new byte[READ_BUFFER_MAX];
3927:                readBufferOverflow = -1;
3928:
3929:                context = CONTEXT_NONE;
3930:
3931:                symbolTable = new Object[SYMBOL_TABLE_LENGTH];
3932:            }
3933:
3934:            /**
3935:             * Clean up after the parse to allow some garbage collection.
3936:             * Leave around anything that might be useful for queries.
3937:             */
3938:            void cleanupVariables() {
3939:                errorCount = -1;
3940:                line = -1;
3941:                column = -1;
3942:                dataBuffer = null;
3943:                nameBuffer = null;
3944:                currentElement = null;
3945:                currentElementContent = CONTENT_UNDECLARED;
3946:                sourceType = INPUT_NONE;
3947:                inputStack = null;
3948:                externalEntity = null;
3949:                entityStack = null;
3950:            }
3951:
3952:            //
3953:            // The current XML handler interface.
3954:            //
3955:            XmlHandler handler;
3956:
3957:            //
3958:            // I/O information.
3959:            //
3960:            private Reader reader; // current reader
3961:            private InputStream is; // current input stream
3962:            private int line; // current line number
3963:            private int column; // current column number
3964:            private int sourceType; // type of input source
3965:            private Stack inputStack; // stack of input soruces
3966:            private URLConnection externalEntity; // current external entity
3967:            private int encoding; // current character encoding.
3968:            private int currentByteCount; // how many bytes read from current source.
3969:
3970:            //
3971:            // Maintain a count of errors.
3972:            //
3973:            private int errorCount;
3974:
3975:            //
3976:            // Buffers for decoded but unparsed character input.
3977:            //
3978:            private final static int READ_BUFFER_MAX = 16384;
3979:            private char readBuffer[];
3980:            private int readBufferPos;
3981:            private int readBufferLength;
3982:            private int readBufferOverflow; // overflow character from last data chunk.
3983:
3984:            //
3985:            // Buffer for undecoded raw byte input.
3986:            //
3987:            private byte rawReadBuffer[];
3988:
3989:            //
3990:            // Buffer for parsed character data.
3991:            //
3992:            private static int DATA_BUFFER_INITIAL = 4096;
3993:            private char dataBuffer[];
3994:            private int dataBufferPos;
3995:
3996:            //
3997:            // Buffer for parsed names.
3998:            //
3999:            private static int NAME_BUFFER_INITIAL = 1024;
4000:            private char nameBuffer[];
4001:            private int nameBufferPos;
4002:
4003:            //
4004:            // Hashtables for DTD information on elements, entities, and notations.
4005:            //
4006:            private Hashtable elementInfo;
4007:            private Hashtable entityInfo;
4008:            private Hashtable notationInfo;
4009:
4010:            //
4011:            // Element type currently in force.
4012:            //
4013:            private String currentElement;
4014:            private int currentElementContent;
4015:
4016:            //
4017:            // Base external identifiers for resolution.
4018:            //
4019:            private String basePublicId;
4020:            private String baseURI;
4021:            private int baseEncoding;
4022:            private Reader baseReader;
4023:            private InputStream baseInputStream;
4024:            private char baseInputBuffer[];
4025:            private int baseInputBufferStart;
4026:            private int baseInputBufferLength;
4027:
4028:            //
4029:            // Stack of entity names, to help detect recursion.
4030:            //
4031:            private Stack entityStack;
4032:
4033:            //
4034:            // Are we in a context where PEs are allowed?
4035:            //
4036:            private int context;
4037:
4038:            //
4039:            // Symbol table, for internalising names.
4040:            //
4041:            private Object symbolTable[];
4042:            private final static int SYMBOL_TABLE_LENGTH = 1087;
4043:
4044:            //
4045:            // Hash table of attributes found in current start tag.
4046:            //
4047:            private String tagAttributes[];
4048:            private int tagAttributePos;
4049:
4050:            //
4051:            // Utility flag: have we noticed a CR while reading the last
4052:            // data chunk?  If so, we will have to go back and normalise
4053:            // CR/LF.
4054:            //
4055:            private boolean sawCR;
4056:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.