Source Code Cross Referenced for DTDParser.java in » 6.0-JDK-Modules » jaxb-xjc » com » sun » xml » dtdparser » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » 6.0 JDK Modules » jaxb xjc » com.sun.xml.dtdparser
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:         * @(#)XmlChars.java    1.1 00/08/05
0003:         *
0004:         * Copyright (c) 1998 Sun Microsystems, Inc. All Rights Reserved.
0005:         */
0006:
0007:        package com.sun.xml.dtdparser;
0008:
0009:        import org.xml.sax.EntityResolver;
0010:        import org.xml.sax.InputSource;
0011:        import org.xml.sax.Locator;
0012:        import org.xml.sax.SAXException;
0013:        import org.xml.sax.SAXParseException;
0014:
0015:        import java.io.IOException;
0016:        import java.util.ArrayList;
0017:        import java.util.Enumeration;
0018:        import java.util.Hashtable;
0019:        import java.util.Locale;
0020:        import java.util.Set;
0021:        import java.util.Vector;
0022:
0023:        /**
0024:         * This implements parsing of XML 1.0 DTDs.
0025:         * <p/>
0026:         * This conforms to the portion of the XML 1.0 specification related
0027:         * to the external DTD subset.
0028:         * <p/>
0029:         * For multi-language applications (such as web servers using XML
0030:         * processing to create dynamic content), a method supports choosing
0031:         * a locale for parser diagnostics which is both understood by the
0032:         * message recipient and supported by the parser.
0033:         * <p/>
0034:         * This parser produces a stream of parse events.  It supports some
0035:         * features (exposing comments, CDATA sections, and entity references)
0036:         * which are not required to be reported by conformant XML processors.
0037:         *
0038:         * @author David Brownell
0039:         * @author Janet Koenig
0040:         * @author Kohsuke KAWAGUCHI
0041:         * @version $Id: DTDParser.java,v 1.1 2005/05/31 22:28:54 kohsuke Exp $
0042:         */
0043:        public class DTDParser {
0044:            public final static String TYPE_CDATA = "CDATA";
0045:            public final static String TYPE_ID = "ID";
0046:            public final static String TYPE_IDREF = "IDREF";
0047:            public final static String TYPE_IDREFS = "IDREFS";
0048:            public final static String TYPE_ENTITY = "ENTITY";
0049:            public final static String TYPE_ENTITIES = "ENTITIES";
0050:            public final static String TYPE_NMTOKEN = "NMTOKEN";
0051:            public final static String TYPE_NMTOKENS = "NMTOKENS";
0052:            public final static String TYPE_NOTATION = "NOTATION";
0053:            public final static String TYPE_ENUMERATION = "ENUMERATION";
0054:
0055:            // stack of input entities being merged
0056:            private InputEntity in;
0057:
0058:            // temporaries reused during parsing
0059:            private StringBuffer strTmp;
0060:            private char nameTmp[];
0061:            private NameCache nameCache;
0062:            private char charTmp[] = new char[2];
0063:
0064:            // temporary DTD parsing state
0065:            private boolean doLexicalPE;
0066:
0067:            // DTD state, used during parsing
0068:            //    private SimpleHashtable    elements = new SimpleHashtable (47);
0069:            protected final Set declaredElements = new java.util.HashSet();
0070:            private SimpleHashtable params = new SimpleHashtable(7);
0071:
0072:            // exposed to package-private subclass
0073:            Hashtable notations = new Hashtable(7);
0074:            SimpleHashtable entities = new SimpleHashtable(17);
0075:
0076:            private SimpleHashtable ids = new SimpleHashtable();
0077:
0078:            // listeners for DTD parsing events
0079:            private DTDEventListener dtdHandler;
0080:
0081:            private EntityResolver resolver;
0082:            private Locale locale;
0083:
0084:            // string constants -- use these copies so "==" works
0085:            // package private
0086:            static final String strANY = "ANY";
0087:            static final String strEMPTY = "EMPTY";
0088:
0089:            /**
0090:             * Used by applications to request locale for diagnostics.
0091:             *
0092:             * @param l The locale to use, or null to use system defaults
0093:             *          (which may include only message IDs).
0094:             */
0095:            public void setLocale(Locale l) throws SAXException {
0096:
0097:                if (l != null && !messages.isLocaleSupported(l.toString())) {
0098:                    throw new SAXException(messages.getMessage(locale, "P-078",
0099:                            new Object[] { l }));
0100:                }
0101:                locale = l;
0102:            }
0103:
0104:            /**
0105:             * Returns the diagnostic locale.
0106:             */
0107:            public Locale getLocale() {
0108:                return locale;
0109:            }
0110:
0111:            /**
0112:             * Chooses a client locale to use for diagnostics, using the first
0113:             * language specified in the list that is supported by this parser.
0114:             * That locale is then set using <a href="#setLocale(java.util.Locale)">
0115:             * setLocale()</a>.  Such a list could be provided by a variety of user
0116:             * preference mechanisms, including the HTTP <em>Accept-Language</em>
0117:             * header field.
0118:             *
0119:             * @param languages Array of language specifiers, ordered with the most
0120:             *                  preferable one at the front.  For example, "en-ca" then "fr-ca",
0121:             *                  followed by "zh_CN".  Both RFC 1766 and Java styles are supported.
0122:             * @return The chosen locale, or null.
0123:             * @see MessageCatalog
0124:             */
0125:            public Locale chooseLocale(String languages[]) throws SAXException {
0126:
0127:                Locale l = messages.chooseLocale(languages);
0128:
0129:                if (l != null) {
0130:                    setLocale(l);
0131:                }
0132:                return l;
0133:            }
0134:
0135:            /**
0136:             * Lets applications control entity resolution.
0137:             */
0138:            public void setEntityResolver(EntityResolver r) {
0139:
0140:                resolver = r;
0141:            }
0142:
0143:            /**
0144:             * Returns the object used to resolve entities
0145:             */
0146:            public EntityResolver getEntityResolver() {
0147:
0148:                return resolver;
0149:            }
0150:
0151:            /**
0152:             * Used by applications to set handling of DTD parsing events.
0153:             */
0154:            public void setDtdHandler(DTDEventListener handler) {
0155:                dtdHandler = handler;
0156:                if (handler != null)
0157:                    handler.setDocumentLocator(new Locator() {
0158:                        public String getPublicId() {
0159:                            return DTDParser.this .getPublicId();
0160:                        }
0161:
0162:                        public String getSystemId() {
0163:                            return DTDParser.this .getSystemId();
0164:                        }
0165:
0166:                        public int getLineNumber() {
0167:                            return DTDParser.this .getLineNumber();
0168:                        }
0169:
0170:                        public int getColumnNumber() {
0171:                            return DTDParser.this .getColumnNumber();
0172:                        }
0173:                    });
0174:            }
0175:
0176:            /**
0177:             * Returns the handler used to for DTD parsing events.
0178:             */
0179:            public DTDEventListener getDtdHandler() {
0180:                return dtdHandler;
0181:            }
0182:
0183:            /**
0184:             * Parse a DTD.
0185:             */
0186:            public void parse(InputSource in) throws IOException, SAXException {
0187:                init();
0188:                parseInternal(in);
0189:            }
0190:
0191:            /**
0192:             * Parse a DTD.
0193:             */
0194:            public void parse(String uri) throws IOException, SAXException {
0195:                InputSource in;
0196:
0197:                init();
0198:                // System.out.println ("parse (\"" + uri + "\")");
0199:                in = resolver.resolveEntity(null, uri);
0200:
0201:                // If custom resolver punts resolution to parser, handle it ...
0202:                if (in == null) {
0203:                    in = Resolver.createInputSource(new java.net.URL(uri),
0204:                            false);
0205:
0206:                    // ... or if custom resolver doesn't correctly construct the
0207:                    // input entity, patch it up enough so relative URIs work, and
0208:                    // issue a warning to minimize later confusion.
0209:                } else if (in.getSystemId() == null) {
0210:                    warning("P-065", null);
0211:                    in.setSystemId(uri);
0212:                }
0213:
0214:                parseInternal(in);
0215:            }
0216:
0217:            // makes sure the parser is reset to "before a document"
0218:            private void init() {
0219:                in = null;
0220:
0221:                // alloc temporary data used in parsing
0222:                strTmp = new StringBuffer();
0223:                nameTmp = new char[20];
0224:                nameCache = new NameCache();
0225:
0226:                // reset doc info
0227:                //        isInAttribute = false;
0228:
0229:                doLexicalPE = false;
0230:
0231:                entities.clear();
0232:                notations.clear();
0233:                params.clear();
0234:                //    elements.clear ();
0235:                declaredElements.clear();
0236:
0237:                // initialize predefined references ... re-interpreted later
0238:                builtin("amp", "&#38;");
0239:                builtin("lt", "&#60;");
0240:                builtin("gt", ">");
0241:                builtin("quot", "\"");
0242:                builtin("apos", "'");
0243:
0244:                if (locale == null)
0245:                    locale = Locale.getDefault();
0246:                if (resolver == null)
0247:                    resolver = new Resolver();
0248:                if (dtdHandler == null)
0249:                    dtdHandler = new DTDHandlerBase();
0250:            }
0251:
0252:            private void builtin(String entityName, String entityValue) {
0253:                InternalEntity entity;
0254:                entity = new InternalEntity(entityName, entityValue
0255:                        .toCharArray());
0256:                entities.put(entityName, entity);
0257:            }
0258:
0259:            ////////////////////////////////////////////////////////////////
0260:            //
0261:            // parsing is by recursive descent, code roughly
0262:            // following the BNF rules except tweaked for simple
0263:            // lookahead.  rules are more or less in numeric order,
0264:            // except where code sharing suggests other structures.
0265:            //
0266:            // a classic benefit of recursive descent parsers:  it's
0267:            // relatively easy to get diagnostics that make sense.
0268:            //
0269:            ////////////////////////////////////////////////////////////////
0270:
0271:            private void parseInternal(InputSource input) throws IOException,
0272:                    SAXException {
0273:
0274:                if (input == null)
0275:                    fatal("P-000");
0276:
0277:                try {
0278:                    in = InputEntity.getInputEntity(dtdHandler, locale);
0279:                    in.init(input, null, null, false);
0280:
0281:                    dtdHandler.startDTD(in);
0282:
0283:                    // [30] extSubset ::= TextDecl? extSubsetDecl
0284:                    // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
0285:                    //        | PEReference | S )*
0286:                    //    ... same as [79] extPE, which is where the code is
0287:
0288:                    ExternalEntity externalSubset = new ExternalEntity(in);
0289:                    externalParameterEntity(externalSubset);
0290:
0291:                    if (!in.isEOF()) {
0292:                        fatal("P-001", new Object[] { Integer
0293:                                .toHexString(((int) getc())) });
0294:                    }
0295:                    afterRoot();
0296:                    dtdHandler.endDTD();
0297:
0298:                } catch (EndOfInputException e) {
0299:                    if (!in.isDocument()) {
0300:                        String name = in.getName();
0301:                        do { // force a relevant URI and line number
0302:                            in = in.pop();
0303:                        } while (in.isInternal());
0304:                        fatal("P-002", new Object[] { name });
0305:                    } else {
0306:                        fatal("P-003", null);
0307:                    }
0308:                } catch (RuntimeException e) {
0309:                    // Don't discard location that triggered the exception
0310:                    // ## Should properly wrap exception
0311:                    System.err.print("Internal DTD parser error: "); // ##
0312:                    e.printStackTrace();
0313:                    throw new SAXParseException(e.getMessage() != null ? e
0314:                            .getMessage() : e.getClass().getName(),
0315:                            getPublicId(), getSystemId(), getLineNumber(),
0316:                            getColumnNumber());
0317:
0318:                } finally {
0319:                    // recycle temporary data used during parsing
0320:                    strTmp = null;
0321:                    nameTmp = null;
0322:                    nameCache = null;
0323:
0324:                    // ditto input sources etc
0325:                    if (in != null) {
0326:                        in.close();
0327:                        in = null;
0328:                    }
0329:
0330:                    // get rid of all DTD info ... some of it would be
0331:                    // useful for editors etc, investigate later.
0332:
0333:                    params.clear();
0334:                    entities.clear();
0335:                    notations.clear();
0336:                    declaredElements.clear();
0337:                    //        elements.clear();
0338:                    ids.clear();
0339:                }
0340:            }
0341:
0342:            void afterRoot() throws SAXException {
0343:                // Make sure all IDREFs match declared ID attributes.  We scan
0344:                // after the document element is parsed, since XML allows forward
0345:                // references, and only now can we know if they're all resolved.
0346:
0347:                for (Enumeration e = ids.keys(); e.hasMoreElements();) {
0348:                    String id = (String) e.nextElement();
0349:                    Boolean value = (Boolean) ids.get(id);
0350:                    if (Boolean.FALSE == value)
0351:                        error("V-024", new Object[] { id });
0352:                }
0353:            }
0354:
0355:            // role is for diagnostics
0356:            private void whitespace(String roleId) throws IOException,
0357:                    SAXException {
0358:
0359:                // [3] S ::= (#x20 | #x9 | #xd | #xa)+
0360:                if (!maybeWhitespace()) {
0361:                    fatal("P-004", new Object[] { messages.getMessage(locale,
0362:                            roleId) });
0363:                }
0364:            }
0365:
0366:            // S?
0367:            private boolean maybeWhitespace() throws IOException, SAXException {
0368:
0369:                if (!doLexicalPE)
0370:                    return in.maybeWhitespace();
0371:
0372:                // see getc() for the PE logic -- this lets us splice
0373:                // expansions of PEs in "anywhere".  getc() has smarts,
0374:                // so for external PEs we don't bypass it.
0375:
0376:                // XXX we can marginally speed PE handling, and certainly
0377:                // be cleaner (hence potentially more correct), by using
0378:                // the observations that expanded PEs only start and stop
0379:                // where whitespace is allowed.  getc wouldn't need any
0380:                // "lexical" PE expansion logic, and no other method needs
0381:                // to handle termination of PEs.  (parsing of literals would
0382:                // still need to pop entities, but not parsing of references
0383:                // in content.)
0384:
0385:                char c = getc();
0386:                boolean saw = false;
0387:
0388:                while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
0389:                    saw = true;
0390:
0391:                    // this gracefully ends things when we stop playing
0392:                    // with internal parameters.  caller should have a
0393:                    // grammar rule allowing whitespace at end of entity.
0394:                    if (in.isEOF() && !in.isInternal())
0395:                        return saw;
0396:                    c = getc();
0397:                }
0398:                ungetc();
0399:                return saw;
0400:            }
0401:
0402:            private String maybeGetName() throws IOException, SAXException {
0403:
0404:                NameCacheEntry entry = maybeGetNameCacheEntry();
0405:                return (entry == null) ? null : entry.name;
0406:            }
0407:
0408:            private NameCacheEntry maybeGetNameCacheEntry() throws IOException,
0409:                    SAXException {
0410:
0411:                // [5] Name ::= (Letter|'_'|':') (Namechar)*
0412:                char c = getc();
0413:
0414:                if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
0415:                    ungetc();
0416:                    return null;
0417:                }
0418:                return nameCharString(c);
0419:            }
0420:
0421:            // Used when parsing enumerations
0422:            private String getNmtoken() throws IOException, SAXException {
0423:
0424:                // [7] Nmtoken ::= (Namechar)+
0425:                char c = getc();
0426:                if (!XmlChars.isNameChar(c))
0427:                    fatal("P-006", new Object[] { new Character(c) });
0428:                return nameCharString(c).name;
0429:            }
0430:
0431:            // n.b. this gets used when parsing attribute values (for
0432:            // internal references) so we can't use strTmp; it's also
0433:            // a hotspot for CPU and memory in the parser (called at least
0434:            // once for each element) so this has been optimized a bit.
0435:
0436:            private NameCacheEntry nameCharString(char c) throws IOException,
0437:                    SAXException {
0438:
0439:                int i = 1;
0440:
0441:                nameTmp[0] = c;
0442:                for (;;) {
0443:                    if ((c = in.getNameChar()) == 0)
0444:                        break;
0445:                    if (i >= nameTmp.length) {
0446:                        char tmp[] = new char[nameTmp.length + 10];
0447:                        System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
0448:                        nameTmp = tmp;
0449:                    }
0450:                    nameTmp[i++] = c;
0451:                }
0452:                return nameCache.lookupEntry(nameTmp, i);
0453:            }
0454:
0455:            //
0456:            // much similarity between parsing entity values in DTD
0457:            // and attribute values (in DTD or content) ... both follow
0458:            // literal parsing rules, newline canonicalization, etc
0459:            //
0460:            // leaves value in 'strTmp' ... either a "replacement text" (4.5),
0461:            // or else partially normalized attribute value (the first bit
0462:            // of 3.3.3's spec, without the "if not CDATA" bits).
0463:            //
0464:            private void parseLiteral(boolean isEntityValue)
0465:                    throws IOException, SAXException {
0466:
0467:                // [9] EntityValue ::=
0468:                //    '"' ([^"&%] | Reference | PEReference)* '"'
0469:                //    |    "'" ([^'&%] | Reference | PEReference)* "'"
0470:                // [10] AttValue ::=
0471:                //    '"' ([^"&]  | Reference             )* '"'
0472:                //    |    "'" ([^'&]  | Reference             )* "'"
0473:                char quote = getc();
0474:                char c;
0475:                InputEntity source = in;
0476:
0477:                if (quote != '\'' && quote != '"') {
0478:                    fatal("P-007");
0479:                }
0480:
0481:                // don't report entity expansions within attributes,
0482:                // they're reported "fully expanded" via SAX
0483:                //    isInAttribute = !isEntityValue;
0484:
0485:                // get value into strTmp
0486:                strTmp = new StringBuffer();
0487:
0488:                // scan, allowing entity push/pop wherever ...
0489:                // expanded entities can't terminate the literal!
0490:                for (;;) {
0491:                    if (in != source && in.isEOF()) {
0492:                        // we don't report end of parsed entities
0493:                        // within attributes (no SAX hooks)
0494:                        in = in.pop();
0495:                        continue;
0496:                    }
0497:                    if ((c = getc()) == quote && in == source) {
0498:                        break;
0499:                    }
0500:
0501:                    //
0502:                    // Basically the "reference in attribute value"
0503:                    // row of the chart in section 4.4 of the spec
0504:                    //
0505:                    if (c == '&') {
0506:                        String entityName = maybeGetName();
0507:
0508:                        if (entityName != null) {
0509:                            nextChar(';', "F-020", entityName);
0510:
0511:                            // 4.4 says:  bypass these here ... we'll catch
0512:                            // forbidden refs to unparsed entities on use
0513:                            if (isEntityValue) {
0514:                                strTmp.append('&');
0515:                                strTmp.append(entityName);
0516:                                strTmp.append(';');
0517:                                continue;
0518:                            }
0519:                            expandEntityInLiteral(entityName, entities,
0520:                                    isEntityValue);
0521:
0522:                            // character references are always included immediately
0523:                        } else if ((c = getc()) == '#') {
0524:                            int tmp = parseCharNumber();
0525:
0526:                            if (tmp > 0xffff) {
0527:                                tmp = surrogatesToCharTmp(tmp);
0528:                                strTmp.append(charTmp[0]);
0529:                                if (tmp == 2)
0530:                                    strTmp.append(charTmp[1]);
0531:                            } else
0532:                                strTmp.append((char) tmp);
0533:                        } else
0534:                            fatal("P-009");
0535:                        continue;
0536:
0537:                    }
0538:
0539:                    // expand parameter entities only within entity value literals
0540:                    if (c == '%' && isEntityValue) {
0541:                        String entityName = maybeGetName();
0542:
0543:                        if (entityName != null) {
0544:                            nextChar(';', "F-021", entityName);
0545:                            expandEntityInLiteral(entityName, params,
0546:                                    isEntityValue);
0547:                            continue;
0548:                        } else
0549:                            fatal("P-011");
0550:                    }
0551:
0552:                    // For attribute values ...
0553:                    if (!isEntityValue) {
0554:                        // 3.3.3 says whitespace normalizes to space...
0555:                        if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
0556:                            strTmp.append(' ');
0557:                            continue;
0558:                        }
0559:
0560:                        // "<" not legal in parsed literals ...
0561:                        if (c == '<')
0562:                            fatal("P-012");
0563:                    }
0564:
0565:                    strTmp.append(c);
0566:                }
0567:                //    isInAttribute = false;
0568:            }
0569:
0570:            // does a SINGLE expansion of the entity (often reparsed later)
0571:            private void expandEntityInLiteral(String name,
0572:                    SimpleHashtable table, boolean isEntityValue)
0573:                    throws IOException, SAXException {
0574:
0575:                Object entity = table.get(name);
0576:
0577:                if (entity instanceof  InternalEntity) {
0578:                    InternalEntity value = (InternalEntity) entity;
0579:                    pushReader(value.buf, name, !value.isPE);
0580:
0581:                } else if (entity instanceof  ExternalEntity) {
0582:                    if (!isEntityValue) // must be a PE ...
0583:                        fatal("P-013", new Object[] { name });
0584:                    // XXX if this returns false ...
0585:                    pushReader((ExternalEntity) entity);
0586:
0587:                } else if (entity == null) {
0588:                    //
0589:                    // Note:  much confusion about whether spec requires such
0590:                    // errors to be fatal in many cases, but none about whether
0591:                    // it allows "normal" errors to be unrecoverable!
0592:                    //
0593:                    fatal((table == params) ? "V-022" : "P-014",
0594:                            new Object[] { name });
0595:                }
0596:            }
0597:
0598:            // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
0599:            // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
0600:
0601:            // NOTE:  XML spec should explicitly say that PE ref syntax is
0602:            // ignored in PIs, comments, SystemLiterals, and Pubid Literal
0603:            // values ... can't process the XML spec's own DTD without doing
0604:            // that for comments.
0605:
0606:            private String getQuotedString(String type, String extra)
0607:                    throws IOException, SAXException {
0608:
0609:                // use in.getc to bypass PE processing
0610:                char quote = in.getc();
0611:
0612:                if (quote != '\'' && quote != '"')
0613:                    fatal("P-015", new Object[] { messages.getMessage(locale,
0614:                            type, new Object[] { extra }) });
0615:
0616:                char c;
0617:
0618:                strTmp = new StringBuffer();
0619:                while ((c = in.getc()) != quote)
0620:                    strTmp.append((char) c);
0621:                return strTmp.toString();
0622:            }
0623:
0624:            private String parsePublicId() throws IOException, SAXException {
0625:
0626:                // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
0627:                // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
0628:                String retval = getQuotedString("F-033", null);
0629:                for (int i = 0; i < retval.length(); i++) {
0630:                    char c = retval.charAt(i);
0631:                    if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
0632:                            && !(c >= 'A' && c <= 'Z')
0633:                            && !(c >= 'a' && c <= 'z'))
0634:                        fatal("P-016", new Object[] { new Character(c) });
0635:                }
0636:                strTmp = new StringBuffer();
0637:                strTmp.append(retval);
0638:                return normalize(false);
0639:            }
0640:
0641:            // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
0642:            // handled by:  InputEntity.parsedContent()
0643:
0644:            private boolean maybeComment(boolean skipStart) throws IOException,
0645:                    SAXException {
0646:
0647:                // [15] Comment ::= '<!--'
0648:                //        ( (Char - '-') | ('-' (Char - '-'))*
0649:                //        '-->'
0650:                if (!in.peek(skipStart ? "!--" : "<!--", null))
0651:                    return false;
0652:
0653:                boolean savedLexicalPE = doLexicalPE;
0654:                boolean saveCommentText;
0655:
0656:                doLexicalPE = false;
0657:                saveCommentText = false;
0658:                if (saveCommentText)
0659:                    strTmp = new StringBuffer();
0660:
0661:                oneComment: for (;;) {
0662:                    try {
0663:                        // bypass PE expansion, but permit PEs
0664:                        // to complete ... valid docs won't care.
0665:                        for (;;) {
0666:                            int c = getc();
0667:                            if (c == '-') {
0668:                                c = getc();
0669:                                if (c != '-') {
0670:                                    if (saveCommentText)
0671:                                        strTmp.append('-');
0672:                                    ungetc();
0673:                                    continue;
0674:                                }
0675:                                nextChar('>', "F-022", null);
0676:                                break oneComment;
0677:                            }
0678:                            if (saveCommentText)
0679:                                strTmp.append((char) c);
0680:                        }
0681:                    } catch (EndOfInputException e) {
0682:                        //
0683:                        // This is fatal EXCEPT when we're processing a PE...
0684:                        // in which case a validating processor reports an error.
0685:                        // External PEs are easy to detect; internal ones we
0686:                        // infer by being an internal entity outside an element.
0687:                        //
0688:                        if (in.isInternal()) {
0689:                            error("V-021", null);
0690:                        }
0691:                        fatal("P-017");
0692:                    }
0693:                }
0694:                doLexicalPE = savedLexicalPE;
0695:                if (saveCommentText)
0696:                    dtdHandler.comment(strTmp.toString());
0697:                return true;
0698:            }
0699:
0700:            private boolean maybePI(boolean skipStart) throws IOException,
0701:                    SAXException {
0702:
0703:                // [16] PI ::= '<?' PITarget
0704:                //        (S (Char* - (Char* '?>' Char*)))?
0705:                //        '?>'
0706:                // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
0707:                boolean savedLexicalPE = doLexicalPE;
0708:
0709:                if (!in.peek(skipStart ? "?" : "<?", null))
0710:                    return false;
0711:                doLexicalPE = false;
0712:
0713:                String target = maybeGetName();
0714:
0715:                if (target == null) {
0716:                    fatal("P-018");
0717:                }
0718:                if ("xml".equals(target)) {
0719:                    fatal("P-019");
0720:                }
0721:                if ("xml".equalsIgnoreCase(target)) {
0722:                    fatal("P-020", new Object[] { target });
0723:                }
0724:
0725:                if (maybeWhitespace()) {
0726:                    strTmp = new StringBuffer();
0727:                    try {
0728:                        for (;;) {
0729:                            // use in.getc to bypass PE processing
0730:                            char c = in.getc();
0731:                            //Reached the end of PI.
0732:                            if (c == '?' && in.peekc('>'))
0733:                                break;
0734:                            strTmp.append(c);
0735:                        }
0736:                    } catch (EndOfInputException e) {
0737:                        fatal("P-021");
0738:                    }
0739:                    dtdHandler.processingInstruction(target, strTmp.toString());
0740:                } else {
0741:                    if (!in.peek("?>", null)) {
0742:                        fatal("P-022");
0743:                    }
0744:                    dtdHandler.processingInstruction(target, "");
0745:                }
0746:
0747:                doLexicalPE = savedLexicalPE;
0748:                return true;
0749:            }
0750:
0751:            // [18] CDSect ::= CDStart CData CDEnd
0752:            // [19] CDStart ::= '<![CDATA['
0753:            // [20] CData ::= (Char* - (Char* ']]>' Char*))
0754:            // [21] CDEnd ::= ']]>'
0755:            //
0756:            //    ... handled by InputEntity.unparsedContent()
0757:
0758:            // collapsing several rules together ... 
0759:            // simpler than attribute literals -- no reference parsing!
0760:            private String maybeReadAttribute(String name, boolean must)
0761:                    throws IOException, SAXException {
0762:
0763:                // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
0764:                // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
0765:                // [32] SDDecl ::=  S 'standalone' Eq \'|\" ... \'|\"
0766:                if (!maybeWhitespace()) {
0767:                    if (!must) {
0768:                        return null;
0769:                    }
0770:                    fatal("P-024", new Object[] { name });
0771:                    // NOTREACHED
0772:                }
0773:
0774:                if (!peek(name)) {
0775:                    if (must) {
0776:                        fatal("P-024", new Object[] { name });
0777:                    } else {
0778:                        // To ensure that the whitespace is there so that when we
0779:                        // check for the next attribute we assure that the
0780:                        // whitespace still exists.
0781:                        ungetc();
0782:                        return null;
0783:                    }
0784:                }
0785:
0786:                // [25] Eq ::= S? '=' S?
0787:                maybeWhitespace();
0788:                nextChar('=', "F-023", null);
0789:                maybeWhitespace();
0790:
0791:                return getQuotedString("F-035", name);
0792:            }
0793:
0794:            private void readVersion(boolean must, String versionNum)
0795:                    throws IOException, SAXException {
0796:
0797:                String value = maybeReadAttribute("version", must);
0798:
0799:                // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
0800:
0801:                if (must && value == null)
0802:                    fatal("P-025", new Object[] { versionNum });
0803:                if (value != null) {
0804:                    int length = value.length();
0805:                    for (int i = 0; i < length; i++) {
0806:                        char c = value.charAt(i);
0807:                        if (!((c >= '0' && c <= '9') || c == '_' || c == '.'
0808:                                || (c >= 'a' && c <= 'z')
0809:                                || (c >= 'A' && c <= 'Z') || c == ':' || c == '-'))
0810:                            fatal("P-026", new Object[] { value });
0811:                    }
0812:                }
0813:                if (value != null && !value.equals(versionNum))
0814:                    error("P-027", new Object[] { versionNum, value });
0815:            }
0816:
0817:            // common code used by most markup declarations
0818:            // ... S (Q)Name ...
0819:            private String getMarkupDeclname(String roleId, boolean qname)
0820:                    throws IOException, SAXException {
0821:
0822:                String name;
0823:
0824:                whitespace(roleId);
0825:                name = maybeGetName();
0826:                if (name == null)
0827:                    fatal("P-005", new Object[] { messages.getMessage(locale,
0828:                            roleId) });
0829:                return name;
0830:            }
0831:
0832:            private boolean maybeMarkupDecl() throws IOException, SAXException {
0833:
0834:                // [29] markupdecl ::= elementdecl | Attlistdecl
0835:                //           | EntityDecl | NotationDecl | PI | Comment
0836:                return maybeElementDecl() || maybeAttlistDecl()
0837:                        || maybeEntityDecl() || maybeNotationDecl()
0838:                        || maybePI(false) || maybeComment(false);
0839:            }
0840:
0841:            private static final String XmlLang = "xml:lang";
0842:
0843:            private boolean isXmlLang(String value) {
0844:
0845:                // [33] LanguageId ::= Langcode ('-' Subcode)*
0846:                // [34] Langcode ::= ISO639Code | IanaCode | UserCode
0847:                // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
0848:                // [36] IanaCode ::= [iI] '-' SubCode
0849:                // [37] UserCode ::= [xX] '-' SubCode
0850:                // [38] SubCode ::= [a-zA-Z]+
0851:
0852:                // the ISO and IANA codes (and subcodes) are registered,
0853:                // but that's neither a WF nor a validity constraint.
0854:
0855:                int nextSuffix;
0856:                char c;
0857:
0858:                if (value.length() < 2)
0859:                    return false;
0860:                c = value.charAt(1);
0861:                if (c == '-') { // IANA, or user, code
0862:                    c = value.charAt(0);
0863:                    if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
0864:                        return false;
0865:                    nextSuffix = 1;
0866:                } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
0867:                    // 2 letter ISO code, or error
0868:                    c = value.charAt(0);
0869:                    if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
0870:                        return false;
0871:                    nextSuffix = 2;
0872:                } else
0873:                    return false;
0874:
0875:                // here "suffix" ::= '-' [a-zA-Z]+ suffix*
0876:                while (nextSuffix < value.length()) {
0877:                    c = value.charAt(nextSuffix);
0878:                    if (c != '-')
0879:                        break;
0880:                    while (++nextSuffix < value.length()) {
0881:                        c = value.charAt(nextSuffix);
0882:                        if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
0883:                            break;
0884:                    }
0885:                }
0886:                return value.length() == nextSuffix && c != '-';
0887:            }
0888:
0889:            //
0890:            // CHAPTER 3:  Logical Structures
0891:            //
0892:
0893:            /**
0894:             * To validate, subclassers should at this time make sure that
0895:             * values are of the declared types:<UL>
0896:             * <LI> ID and IDREF(S) values are Names
0897:             * <LI> NMTOKEN(S) are Nmtokens
0898:             * <LI> ENUMERATION values match one of the tokens
0899:             * <LI> NOTATION values match a notation name
0900:             * <LI> ENTITIY(IES) values match an unparsed external entity
0901:             * </UL>
0902:             * <p/>
0903:             * <P> Separately, make sure IDREF values match some ID
0904:             * provided in the document (in the afterRoot method).
0905:             */
0906:            /*    void validateAttributeSyntax (Attribute attr, String value)
0907:             throws DTDParseException {
0908:             // ID, IDREF(S) ... values are Names
0909:             if (Attribute.ID == attr.type()) {
0910:             if (!XmlNames.isName (value))
0911:             error ("V-025", new Object [] { value });
0912:
0913:             Boolean             b = (Boolean) ids.getNonInterned (value);
0914:             if (b == null || b.equals (Boolean.FALSE))
0915:             ids.put (value.intern (), Boolean.TRUE);
0916:             else
0917:             error ("V-026", new Object [] { value });
0918:
0919:             } else if (Attribute.IDREF == attr.type()) {
0920:             if (!XmlNames.isName (value))
0921:             error ("V-027", new Object [] { value });
0922:
0923:             Boolean             b = (Boolean) ids.getNonInterned (value);
0924:             if (b == null)
0925:             ids.put (value.intern (), Boolean.FALSE);
0926:
0927:             } else if (Attribute.IDREFS == attr.type()) {
0928:             StringTokenizer     tokenizer = new StringTokenizer (value);
0929:             Boolean             b;
0930:             boolean             sawValue = false;
0931:
0932:             while (tokenizer.hasMoreTokens ()) {
0933:             value = tokenizer.nextToken ();
0934:             if (!XmlNames.isName (value))
0935:             error ("V-027", new Object [] { value });
0936:             b = (Boolean) ids.getNonInterned (value);
0937:             if (b == null)
0938:             ids.put (value.intern (), Boolean.FALSE);
0939:             sawValue = true;
0940:             }
0941:             if (!sawValue)
0942:             error ("V-039", null);
0943:
0944:
0945:             // NMTOKEN(S) ... values are Nmtoken(s)
0946:             } else if (Attribute.NMTOKEN == attr.type()) {
0947:             if (!XmlNames.isNmtoken (value))
0948:             error ("V-028", new Object [] { value });
0949:
0950:             } else if (Attribute.NMTOKENS == attr.type()) {
0951:             StringTokenizer     tokenizer = new StringTokenizer (value);
0952:             boolean             sawValue = false;
0953:
0954:             while (tokenizer.hasMoreTokens ()) {
0955:             value = tokenizer.nextToken ();
0956:             if (!XmlNames.isNmtoken (value))
0957:             error ("V-028", new Object [] { value });
0958:             sawValue = true;
0959:             }
0960:             if (!sawValue)
0961:             error ("V-032", null);
0962:
0963:             // ENUMERATION ... values match one of the tokens
0964:             } else if (Attribute.ENUMERATION == attr.type()) {
0965:             for (int i = 0; i < attr.values().length; i++)
0966:             if (value.equals (attr.values()[i]))
0967:             return;
0968:             error ("V-029", new Object [] { value });
0969:
0970:             // NOTATION values match a notation name
0971:             } else if (Attribute.NOTATION == attr.type()) {
0972:             //
0973:             // XXX XML 1.0 spec should probably list references to
0974:             // externally defined notations in standalone docs as
0975:             // validity errors.  Ditto externally defined unparsed
0976:             // entities; neither should show up in attributes, else
0977:             // one needs to read the external declarations in order
0978:             // to make sense of the document (exactly what tagging
0979:             // a doc as "standalone" intends you won't need to do).
0980:             //
0981:             for (int i = 0; i < attr.values().length; i++)
0982:             if (value.equals (attr.values()[i]))
0983:             return;
0984:             error ("V-030", new Object [] { value });
0985:
0986:             // ENTITY(IES) values match an unparsed entity(ies)
0987:             } else if (Attribute.ENTITY == attr.type()) {
0988:             // see note above re standalone
0989:             if (!isUnparsedEntity (value))
0990:             error ("V-031", new Object [] { value });
0991:
0992:             } else if (Attribute.ENTITIES == attr.type()) {
0993:             StringTokenizer     tokenizer = new StringTokenizer (value);
0994:             boolean             sawValue = false;
0995:
0996:             while (tokenizer.hasMoreTokens ()) {
0997:             value = tokenizer.nextToken ();
0998:             // see note above re standalone
0999:             if (!isUnparsedEntity (value))
1000:             error ("V-031", new Object [] { value });
1001:             sawValue = true;
1002:             }
1003:             if (!sawValue)
1004:             error ("V-040", null);
1005:
1006:             } else if (Attribute.CDATA != attr.type())
1007:             throw new InternalError (attr.type());
1008:             }
1009:             */
1010:            /*
1011:             private boolean isUnparsedEntity (String name)
1012:             {
1013:             Object e = entities.getNonInterned (name);
1014:             if (e == null || !(e instanceof ExternalEntity))
1015:             return false;
1016:             return ((ExternalEntity)e).notation != null;
1017:             }
1018:             */
1019:            private boolean maybeElementDecl() throws IOException, SAXException {
1020:
1021:                // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1022:                // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1023:                InputEntity start = peekDeclaration("!ELEMENT");
1024:
1025:                if (start == null)
1026:                    return false;
1027:
1028:                // n.b. for content models where inter-element whitespace is
1029:                // ignorable, we mark that fact here.
1030:                String name = getMarkupDeclname("F-015", true);
1031:                //    Element        element = (Element) elements.get (name);
1032:                //    boolean        declEffective = false;
1033:
1034:                /*
1035:                 if (element != null) {
1036:                 if (element.contentModel() != null) {
1037:                 error ("V-012", new Object [] { name });
1038:                 } // else <!ATTLIST name ...> came first
1039:                 } else {
1040:                 element = new Element(name);
1041:                 elements.put (element.name(), element);
1042:                 declEffective = true;
1043:                 }
1044:                 */
1045:                if (declaredElements.contains(name))
1046:                    error("V-012", new Object[] { name });
1047:                else {
1048:                    declaredElements.add(name);
1049:                    //        declEffective = true;
1050:                }
1051:
1052:                short modelType;
1053:                whitespace("F-000");
1054:                if (peek(strEMPTY)) {
1055:                    ///        // leave element.contentModel as null for this case.
1056:                    dtdHandler.startContentModel(name,
1057:                            modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
1058:                } else if (peek(strANY)) {
1059:                    ///        element.setContentModel(new StringModel(StringModelType.ANY));
1060:                    dtdHandler.startContentModel(name,
1061:                            modelType = DTDEventListener.CONTENT_MODEL_ANY);
1062:                } else {
1063:                    modelType = getMixedOrChildren(name);
1064:                }
1065:
1066:                dtdHandler.endContentModel(name, modelType);
1067:
1068:                maybeWhitespace();
1069:                char c = getc();
1070:                if (c != '>')
1071:                    fatal("P-036", new Object[] { name, new Character(c) });
1072:                if (start != in)
1073:                    error("V-013", null);
1074:
1075:                ///        dtdHandler.elementDecl(element);
1076:
1077:                return true;
1078:            }
1079:
1080:            // We're leaving the content model as a regular expression;
1081:            // it's an efficient natural way to express such things, and
1082:            // libraries often interpret them.  No whitespace in the
1083:            // model we store, though!
1084:
1085:            /**
1086:             * returns content model type.
1087:             */
1088:            private short getMixedOrChildren(String elementName/*Element element*/)
1089:                    throws IOException, SAXException {
1090:
1091:                InputEntity start;
1092:
1093:                // [47] children ::= (choice|seq) ('?'|'*'|'+')?
1094:                strTmp = new StringBuffer();
1095:
1096:                nextChar('(', "F-028", elementName);
1097:                start = in;
1098:                maybeWhitespace();
1099:                strTmp.append('(');
1100:
1101:                short modelType;
1102:                if (peek("#PCDATA")) {
1103:                    strTmp.append("#PCDATA");
1104:                    dtdHandler.startContentModel(elementName,
1105:                            modelType = DTDEventListener.CONTENT_MODEL_MIXED);
1106:                    getMixed(elementName, start);
1107:                } else {
1108:                    dtdHandler
1109:                            .startContentModel(
1110:                                    elementName,
1111:                                    modelType = DTDEventListener.CONTENT_MODEL_CHILDREN);
1112:                    getcps(elementName, start);
1113:                }
1114:
1115:                return modelType;
1116:            }
1117:
1118:            // '(' S? already consumed
1119:            // matching ')' must be in "start" entity if validating
1120:            private void getcps(/*Element element,*/String elementName,
1121:                    InputEntity start) throws IOException, SAXException {
1122:
1123:                // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
1124:                // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
1125:                // [50] seq    ::= '(' S? cp (S? ',' S? cp)* S? ')'
1126:                boolean decided = false;
1127:                char type = 0;
1128:                //        ContentModel       retval, temp, current;
1129:
1130:                //        retval = temp = current = null;
1131:
1132:                dtdHandler.startModelGroup();
1133:
1134:                do {
1135:                    String tag;
1136:
1137:                    tag = maybeGetName();
1138:                    if (tag != null) {
1139:                        strTmp.append(tag);
1140:                        //                temp = new ElementModel(tag);
1141:                        //                getFrequency((RepeatableContent)temp);
1142:                        ///->
1143:                        dtdHandler.childElement(tag, getFrequency());
1144:                        ///<-
1145:                    } else if (peek("(")) {
1146:                        InputEntity next = in;
1147:                        strTmp.append('(');
1148:                        maybeWhitespace();
1149:                        //                temp = getcps(element, next);
1150:                        //                getFrequency(temp);
1151:                        ///->
1152:                        getcps(elementName, next);
1153:                        ///                getFrequency();        <- this looks like a bug
1154:                        ///<-
1155:                    } else
1156:                        fatal((type == 0) ? "P-039" : ((type == ',') ? "P-037"
1157:                                : "P-038"),
1158:                                new Object[] { new Character(getc()) });
1159:
1160:                    maybeWhitespace();
1161:                    if (decided) {
1162:                        char c = getc();
1163:
1164:                        //                if (current != null) {
1165:                        //                    current.addChild(temp);
1166:                        //                }
1167:                        if (c == type) {
1168:                            strTmp.append(type);
1169:                            maybeWhitespace();
1170:                            reportConnector(type);
1171:                            continue;
1172:                        } else if (c == '\u0029') { // rparen
1173:                            ungetc();
1174:                            continue;
1175:                        } else {
1176:                            fatal((type == 0) ? "P-041" : "P-040",
1177:                                    new Object[] { new Character(c),
1178:                                            new Character(type) });
1179:                        }
1180:                    } else {
1181:                        type = getc();
1182:                        switch (type) {
1183:                        case '|':
1184:                        case ',':
1185:                            reportConnector(type);
1186:                            break;
1187:                        default:
1188:                            //                        retval = temp;
1189:                            ungetc();
1190:                            continue;
1191:                        }
1192:                        //                retval = (ContentModel)current;
1193:                        decided = true;
1194:                        //                current.addChild(temp);
1195:                        strTmp.append(type);
1196:                    }
1197:                    maybeWhitespace();
1198:                } while (!peek(")"));
1199:
1200:                if (in != start)
1201:                    error("V-014", new Object[] { elementName });
1202:                strTmp.append(')');
1203:
1204:                dtdHandler.endModelGroup(getFrequency());
1205:                //        return retval;
1206:            }
1207:
1208:            private void reportConnector(char type) throws SAXException {
1209:                switch (type) {
1210:                case '|':
1211:                    dtdHandler.connector(DTDEventListener.CHOICE); ///<-
1212:                    return;
1213:                case ',':
1214:                    dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
1215:                    return;
1216:                default:
1217:                    throw new Error(); //assertion failed.
1218:                }
1219:            }
1220:
1221:            private short getFrequency() throws IOException, SAXException {
1222:
1223:                final char c = getc();
1224:
1225:                if (c == '?') {
1226:                    strTmp.append(c);
1227:                    return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
1228:                    //        original.setRepeat(Repeat.ZERO_OR_ONE);
1229:                } else if (c == '+') {
1230:                    strTmp.append(c);
1231:                    return DTDEventListener.OCCURENCE_ONE_OR_MORE;
1232:                    //        original.setRepeat(Repeat.ONE_OR_MORE);
1233:                } else if (c == '*') {
1234:                    strTmp.append(c);
1235:                    return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
1236:                    //        original.setRepeat(Repeat.ZERO_OR_MORE);
1237:                } else {
1238:                    ungetc();
1239:                    return DTDEventListener.OCCURENCE_ONCE;
1240:                }
1241:            }
1242:
1243:            // '(' S? '#PCDATA' already consumed 
1244:            // matching ')' must be in "start" entity if validating
1245:            private void getMixed(String elementName, /*Element element,*/
1246:                    InputEntity start) throws IOException, SAXException {
1247:
1248:                // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
1249:                //        | '(' S? '#PCDATA'                   S? ')'
1250:                maybeWhitespace();
1251:                if (peek("\u0029*") || peek("\u0029")) {
1252:                    if (in != start)
1253:                        error("V-014", new Object[] { elementName });
1254:                    strTmp.append(')');
1255:                    //            element.setContentModel(new StringModel(StringModelType.PCDATA));
1256:                    return;
1257:                }
1258:
1259:                ArrayList l = new ArrayList();
1260:                //    l.add(new StringModel(StringModelType.PCDATA));
1261:
1262:                while (peek("|")) {
1263:                    String name;
1264:
1265:                    strTmp.append('|');
1266:                    maybeWhitespace();
1267:
1268:                    doLexicalPE = true;
1269:                    name = maybeGetName();
1270:                    if (name == null)
1271:                        fatal("P-042", new Object[] { elementName,
1272:                                Integer.toHexString(getc()) });
1273:                    if (l.contains(name)) {
1274:                        error("V-015", new Object[] { name });
1275:                    } else {
1276:                        l.add(name);
1277:                        dtdHandler.mixedElement(name);
1278:                    }
1279:                    strTmp.append(name);
1280:                    maybeWhitespace();
1281:                }
1282:
1283:                if (!peek("\u0029*")) // right paren
1284:                    fatal("P-043", new Object[] { elementName,
1285:                            new Character(getc()) });
1286:                if (in != start)
1287:                    error("V-014", new Object[] { elementName });
1288:                strTmp.append(')');
1289:                //        ChoiceModel cm = new ChoiceModel((Collection)l);
1290:                //    cm.setRepeat(Repeat.ZERO_OR_MORE);
1291:                //       element.setContentModel(cm);
1292:            }
1293:
1294:            private boolean maybeAttlistDecl() throws IOException, SAXException {
1295:
1296:                // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1297:                InputEntity start = peekDeclaration("!ATTLIST");
1298:
1299:                if (start == null)
1300:                    return false;
1301:
1302:                String elementName = getMarkupDeclname("F-016", true);
1303:                //    Element    element = (Element) elements.get (name);
1304:
1305:                //    if (element == null) {
1306:                //        // not yet declared -- no problem.
1307:                //        element = new Element(name);
1308:                //        elements.put(name, element);
1309:                //    }
1310:
1311:                while (!peek(">")) {
1312:
1313:                    // [53] AttDef ::= S Name S AttType S DefaultDecl
1314:                    // [54] AttType ::= StringType | TokenizedType | EnumeratedType
1315:
1316:                    // look for global attribute definitions, don't expand for now...
1317:                    maybeWhitespace();
1318:                    char c = getc();
1319:                    if (c == '%') {
1320:                        String entityName = maybeGetName();
1321:                        if (entityName != null) {
1322:                            nextChar(';', "F-021", entityName);
1323:                            whitespace("F-021");
1324:                            continue;
1325:                        } else
1326:                            fatal("P-011");
1327:                    }
1328:
1329:                    ungetc();
1330:                    // look for attribute name otherwise
1331:                    String attName = maybeGetName();
1332:                    if (attName == null) {
1333:                        fatal("P-044", new Object[] { new Character(getc()) });
1334:                    }
1335:                    whitespace("F-001");
1336:
1337:                    ///        Attribute    a = new Attribute (name);
1338:
1339:                    String typeName;
1340:                    Vector values = null; // notation/enumeration values
1341:
1342:                    // Note:  use the type constants from Attribute
1343:                    // so that "==" may be used (faster)
1344:
1345:                    // [55] StringType ::= 'CDATA'
1346:                    if (peek(TYPE_CDATA))
1347:                        ///            a.setType(Attribute.CDATA);
1348:                        typeName = TYPE_CDATA;
1349:
1350:                    // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
1351:                    //        | 'ENTITY' | 'ENTITIES'
1352:                    //        | 'NMTOKEN' | 'NMTOKENS'
1353:                    // n.b. if "IDREFS" is there, both "ID" and "IDREF"
1354:                    // match peekahead ... so this order matters!
1355:                    else if (peek(TYPE_IDREFS))
1356:                        typeName = TYPE_IDREFS;
1357:                    else if (peek(TYPE_IDREF))
1358:                        typeName = TYPE_IDREF;
1359:                    else if (peek(TYPE_ID)) {
1360:                        typeName = TYPE_ID;
1361:                        // TODO: should implement this error check?
1362:                        ///        if (element.id() != null) {
1363:                        ///                    error ("V-016", new Object [] { element.id() });
1364:                        ///        } else
1365:                        ///            element.setId(name);
1366:                    } else if (peek(TYPE_ENTITY))
1367:                        typeName = TYPE_ENTITY;
1368:                    else if (peek(TYPE_ENTITIES))
1369:                        typeName = TYPE_ENTITIES;
1370:                    else if (peek(TYPE_NMTOKENS))
1371:                        typeName = TYPE_NMTOKENS;
1372:                    else if (peek(TYPE_NMTOKEN))
1373:                        typeName = TYPE_NMTOKEN;
1374:
1375:                    // [57] EnumeratedType ::= NotationType | Enumeration
1376:                    // [58] NotationType ::= 'NOTATION' S '(' S? Name
1377:                    //        (S? '|' S? Name)* S? ')'
1378:                    else if (peek(TYPE_NOTATION)) {
1379:                        typeName = TYPE_NOTATION;
1380:                        whitespace("F-002");
1381:                        nextChar('(', "F-029", null);
1382:                        maybeWhitespace();
1383:
1384:                        values = new Vector();
1385:                        do {
1386:                            String name;
1387:                            if ((name = maybeGetName()) == null)
1388:                                fatal("P-068");
1389:                            // permit deferred declarations
1390:                            if (notations.get(name) == null)
1391:                                notations.put(name, name);
1392:                            values.addElement(name);
1393:                            maybeWhitespace();
1394:                            if (peek("|"))
1395:                                maybeWhitespace();
1396:                        } while (!peek(")"));
1397:                        ///            a.setValues(new String [v.size ()]);
1398:                        ///            for (int i = 0; i < v.size (); i++)
1399:                        ///                a.setValue(i, (String)v.elementAt(i));
1400:
1401:                        // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
1402:                    } else if (peek("(")) {
1403:                        ///            a.setType(Attribute.ENUMERATION);
1404:                        typeName = TYPE_ENUMERATION;
1405:
1406:                        maybeWhitespace();
1407:
1408:                        ///            Vector v = new Vector ();
1409:                        values = new Vector();
1410:                        do {
1411:                            String name = getNmtoken();
1412:                            ///                v.addElement (name);
1413:                            values.addElement(name);
1414:                            maybeWhitespace();
1415:                            if (peek("|"))
1416:                                maybeWhitespace();
1417:                        } while (!peek(")"));
1418:                        ///            a.setValues(new String [v.size ()]);
1419:                        ///            for (int i = 0; i < v.size (); i++)
1420:                        ///                a.setValue(i, (String)v.elementAt(i));
1421:                    } else {
1422:                        fatal("P-045", new Object[] { attName,
1423:                                new Character(getc()) });
1424:                        typeName = null;
1425:                    }
1426:
1427:                    short attributeUse;
1428:                    String defaultValue = null;
1429:
1430:                    // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
1431:                    //        | (('#FIXED' S)? AttValue)
1432:                    whitespace("F-003");
1433:                    if (peek("#REQUIRED"))
1434:                        attributeUse = DTDEventListener.USE_REQUIRED;
1435:                    ///            a.setIsRequired(true);
1436:                    else if (peek("#FIXED")) {
1437:                        ///            if (a.type() == Attribute.ID)
1438:                        if (typeName == TYPE_ID)
1439:                            error("V-017", new Object[] { attName });
1440:                        ///            a.setIsFixed(true);
1441:                        attributeUse = DTDEventListener.USE_FIXED;
1442:                        whitespace("F-004");
1443:                        parseLiteral(false);
1444:                        ///            if (a.type() != Attribute.CDATA)
1445:                        ///                a.setDefaultValue(normalize(false));
1446:                        ///            else
1447:                        ///                a.setDefaultValue(strTmp.toString());
1448:
1449:                        if (typeName == TYPE_CDATA)
1450:                            defaultValue = normalize(false);
1451:                        else
1452:                            defaultValue = strTmp.toString();
1453:
1454:                        // TODO: implement this check        
1455:                        ///            if (a.type() != Attribute.CDATA)
1456:                        ///                validateAttributeSyntax (a, a.defaultValue());
1457:                    } else if (!peek("#IMPLIED")) {
1458:                        attributeUse = DTDEventListener.USE_IMPLIED;
1459:
1460:                        ///            if (a.type() == Attribute.ID)
1461:                        if (typeName == TYPE_ID)
1462:                            error("V-018", new Object[] { attName });
1463:                        parseLiteral(false);
1464:                        ///            if (a.type() != Attribute.CDATA)
1465:                        ///                a.setDefaultValue(normalize(false));
1466:                        ///            else
1467:                        ///                a.setDefaultValue(strTmp.toString());
1468:                        if (typeName == TYPE_CDATA)
1469:                            defaultValue = normalize(false);
1470:                        else
1471:                            defaultValue = strTmp.toString();
1472:
1473:                        // TODO: implement this check        
1474:                        ///            if (a.type() != Attribute.CDATA)
1475:                        ///                validateAttributeSyntax (a, a.defaultValue());
1476:                    } else {
1477:                        // TODO: this looks like an fatal error.
1478:                        attributeUse = DTDEventListener.USE_NORMAL;
1479:                    }
1480:
1481:                    if (XmlLang.equals(attName)
1482:                            && defaultValue/* a.defaultValue()*/!= null
1483:                            && !isXmlLang(defaultValue/*a.defaultValue()*/))
1484:                        error(
1485:                                "P-033",
1486:                                new Object[] { defaultValue /*a.defaultValue()*/});
1487:
1488:                    // TODO: isn't it an error to specify the same attribute twice?
1489:                    ///        if (!element.attributes().contains(a)) {
1490:                    ///            element.addAttribute(a);
1491:                    ///            dtdHandler.attributeDecl(a);
1492:                    ///        }
1493:
1494:                    String[] v = (values != null) ? (String[]) values
1495:                            .toArray(new String[0]) : null;
1496:                    dtdHandler.attributeDecl(elementName, attName, typeName, v,
1497:                            attributeUse, defaultValue);
1498:                    maybeWhitespace();
1499:                }
1500:                if (start != in)
1501:                    error("V-013", null);
1502:                return true;
1503:            }
1504:
1505:            // used when parsing literal attribute values,
1506:            // or public identifiers.
1507:            //
1508:            // input in strTmp
1509:            private String normalize(boolean invalidIfNeeded) {
1510:
1511:                // this can allocate an extra string...
1512:
1513:                String s = strTmp.toString();
1514:                String s2 = s.trim();
1515:                boolean didStrip = false;
1516:
1517:                if (s != s2) {
1518:                    s = s2;
1519:                    s2 = null;
1520:                    didStrip = true;
1521:                }
1522:                strTmp = new StringBuffer();
1523:                for (int i = 0; i < s.length(); i++) {
1524:                    char c = s.charAt(i);
1525:                    if (!XmlChars.isSpace(c)) {
1526:                        strTmp.append(c);
1527:                        continue;
1528:                    }
1529:                    strTmp.append(' ');
1530:                    while (++i < s.length() && XmlChars.isSpace(s.charAt(i)))
1531:                        didStrip = true;
1532:                    i--;
1533:                }
1534:                if (didStrip)
1535:                    return strTmp.toString();
1536:                else
1537:                    return s;
1538:            }
1539:
1540:            private boolean maybeConditionalSect() throws IOException,
1541:                    SAXException {
1542:
1543:                // [61] conditionalSect ::= includeSect | ignoreSect
1544:
1545:                if (!peek("<!["))
1546:                    return false;
1547:
1548:                String keyword;
1549:                InputEntity start = in;
1550:
1551:                maybeWhitespace();
1552:
1553:                if ((keyword = maybeGetName()) == null)
1554:                    fatal("P-046");
1555:                maybeWhitespace();
1556:                nextChar('[', "F-030", null);
1557:
1558:                // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
1559:                //                extSubsetDecl ']]>'
1560:                if ("INCLUDE".equals(keyword)) {
1561:                    for (;;) {
1562:                        while (in.isEOF() && in != start)
1563:                            in = in.pop();
1564:                        if (in.isEOF()) {
1565:                            error("V-020", null);
1566:                        }
1567:                        if (peek("]]>"))
1568:                            break;
1569:
1570:                        doLexicalPE = false;
1571:                        if (maybeWhitespace())
1572:                            continue;
1573:                        if (maybePEReference())
1574:                            continue;
1575:                        doLexicalPE = true;
1576:                        if (maybeMarkupDecl() || maybeConditionalSect())
1577:                            continue;
1578:
1579:                        fatal("P-047");
1580:                    }
1581:
1582:                    // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
1583:                    //            ignoreSectcontents ']]>'
1584:                    // [64] ignoreSectcontents ::= Ignore ('<!['
1585:                    //            ignoreSectcontents ']]>' Ignore)*
1586:                    // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
1587:                } else if ("IGNORE".equals(keyword)) {
1588:                    int nestlevel = 1;
1589:                    // ignoreSectcontents
1590:                    doLexicalPE = false;
1591:                    while (nestlevel > 0) {
1592:                        char c = getc(); // will pop input entities
1593:                        if (c == '<') {
1594:                            if (peek("!["))
1595:                                nestlevel++;
1596:                        } else if (c == ']') {
1597:                            if (peek("]>"))
1598:                                nestlevel--;
1599:                        } else
1600:                            continue;
1601:                    }
1602:                } else
1603:                    fatal("P-048", new Object[] { keyword });
1604:                return true;
1605:            }
1606:
1607:            //
1608:            // CHAPTER 4:  Physical Structures
1609:            //
1610:
1611:            // parse decimal or hex numeric character reference
1612:            private int parseCharNumber() throws IOException, SAXException {
1613:
1614:                char c;
1615:                int retval = 0;
1616:
1617:                // n.b. we ignore overflow ...
1618:                if (getc() != 'x') {
1619:                    ungetc();
1620:                    for (;;) {
1621:                        c = getc();
1622:                        if (c >= '0' && c <= '9') {
1623:                            retval *= 10;
1624:                            retval += (c - '0');
1625:                            continue;
1626:                        }
1627:                        if (c == ';')
1628:                            return retval;
1629:                        fatal("P-049");
1630:                    }
1631:                } else
1632:                    for (;;) {
1633:                        c = getc();
1634:                        if (c >= '0' && c <= '9') {
1635:                            retval <<= 4;
1636:                            retval += (c - '0');
1637:                            continue;
1638:                        }
1639:                        if (c >= 'a' && c <= 'f') {
1640:                            retval <<= 4;
1641:                            retval += 10 + (c - 'a');
1642:                            continue;
1643:                        }
1644:                        if (c >= 'A' && c <= 'F') {
1645:                            retval <<= 4;
1646:                            retval += 10 + (c - 'A');
1647:                            continue;
1648:                        }
1649:                        if (c == ';')
1650:                            return retval;
1651:                        fatal("P-050");
1652:                    }
1653:            }
1654:
1655:            // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
1656:            // though still subject to the 'Char' construct in XML
1657:            private int surrogatesToCharTmp(int ucs4) throws SAXException {
1658:
1659:                if (ucs4 <= 0xffff) {
1660:                    if (XmlChars.isChar(ucs4)) {
1661:                        charTmp[0] = (char) ucs4;
1662:                        return 1;
1663:                    }
1664:                } else if (ucs4 <= 0x0010ffff) {
1665:                    // we represent these as UNICODE surrogate pairs
1666:                    ucs4 -= 0x10000;
1667:                    charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
1668:                    charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
1669:                    return 2;
1670:                }
1671:                fatal("P-051", new Object[] { Integer.toHexString(ucs4) });
1672:                // NOTREACHED
1673:                return -1;
1674:            }
1675:
1676:            private boolean maybePEReference() throws IOException, SAXException {
1677:
1678:                // This is the SYNTACTIC version of this construct.
1679:                // When processing external entities, there is also
1680:                // a LEXICAL version; see getc() and doLexicalPE.
1681:
1682:                // [69] PEReference ::= '%' Name ';'
1683:                if (!in.peekc('%'))
1684:                    return false;
1685:
1686:                String name = maybeGetName();
1687:                Object entity;
1688:
1689:                if (name == null)
1690:                    fatal("P-011");
1691:                nextChar(';', "F-021", name);
1692:                entity = params.get(name);
1693:
1694:                if (entity instanceof  InternalEntity) {
1695:                    InternalEntity value = (InternalEntity) entity;
1696:                    pushReader(value.buf, name, false);
1697:
1698:                } else if (entity instanceof  ExternalEntity) {
1699:                    pushReader((ExternalEntity) entity);
1700:                    externalParameterEntity((ExternalEntity) entity);
1701:
1702:                } else if (entity == null) {
1703:                    error("V-022", new Object[] { name });
1704:                }
1705:                return true;
1706:            }
1707:
1708:            private boolean maybeEntityDecl() throws IOException, SAXException {
1709:
1710:                // [70] EntityDecl ::= GEDecl | PEDecl
1711:                // [71] GEDecl ::= '<!ENTITY' S       Name S EntityDef S? '>'
1712:                // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF     S? '>'
1713:                // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1714:                // [74] PEDef     ::= EntityValue |  ExternalID
1715:                //
1716:                InputEntity start = peekDeclaration("!ENTITY");
1717:
1718:                if (start == null)
1719:                    return false;
1720:
1721:                String entityName;
1722:                SimpleHashtable defns;
1723:                ExternalEntity externalId;
1724:                boolean doStore;
1725:
1726:                // PE expansion gets selectively turned off several places:
1727:                // in ENTITY declarations (here), in comments, in PIs.
1728:
1729:                // Here, we allow PE entities to be declared, and allows
1730:                // literals to include PE refs without the added spaces
1731:                // required with their expansion in markup decls.
1732:
1733:                doLexicalPE = false;
1734:                whitespace("F-005");
1735:                if (in.peekc('%')) {
1736:                    whitespace("F-006");
1737:                    defns = params;
1738:                } else
1739:                    defns = entities;
1740:
1741:                ungetc(); // leave some whitespace
1742:                doLexicalPE = true;
1743:                entityName = getMarkupDeclname("F-017", false);
1744:                whitespace("F-007");
1745:                externalId = maybeExternalID();
1746:
1747:                //
1748:                // first definition sticks ... e.g. internal subset PEs are used
1749:                // to override DTD defaults.  It's also an "error" to incorrectly
1750:                // redefine builtin internal entities, but since reporting such
1751:                // errors is optional we only give warnings ("just in case") for
1752:                // non-parameter entities.
1753:                //
1754:                doStore = (defns.get(entityName) == null);
1755:                if (!doStore && defns == entities)
1756:                    warning("P-054", new Object[] { entityName });
1757:
1758:                // internal entities
1759:                if (externalId == null) {
1760:                    char value[];
1761:                    InternalEntity entity;
1762:
1763:                    doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd"
1764:                    parseLiteral(true);
1765:                    doLexicalPE = true;
1766:                    if (doStore) {
1767:                        value = new char[strTmp.length()];
1768:                        if (value.length != 0)
1769:                            strTmp.getChars(0, value.length, value, 0);
1770:                        entity = new InternalEntity(entityName, value);
1771:                        entity.isPE = (defns == params);
1772:                        entity.isFromInternalSubset = false;
1773:                        defns.put(entityName, entity);
1774:                        if (defns == entities)
1775:                            dtdHandler.internalGeneralEntityDecl(entityName,
1776:                                    new String(value));
1777:                    }
1778:
1779:                    // external entities (including unparsed)
1780:                } else {
1781:                    // [76] NDataDecl ::= S 'NDATA' S Name
1782:                    if (defns == entities && maybeWhitespace() && peek("NDATA")) {
1783:                        externalId.notation = getMarkupDeclname("F-018", false);
1784:
1785:                        // flag undeclared notation for checking after
1786:                        // the DTD is fully processed
1787:                        if (notations.get(externalId.notation) == null)
1788:                            notations.put(externalId.notation, Boolean.TRUE);
1789:                    }
1790:                    externalId.name = entityName;
1791:                    externalId.isPE = (defns == params);
1792:                    externalId.isFromInternalSubset = false;
1793:                    if (doStore) {
1794:                        defns.put(entityName, externalId);
1795:                        if (externalId.notation != null)
1796:                            dtdHandler.unparsedEntityDecl(entityName,
1797:                                    externalId.publicId, externalId.systemId,
1798:                                    externalId.notation);
1799:                        else if (defns == entities)
1800:                            dtdHandler.externalGeneralEntityDecl(entityName,
1801:                                    externalId.publicId, externalId.systemId);
1802:                    }
1803:                }
1804:                maybeWhitespace();
1805:                nextChar('>', "F-031", entityName);
1806:                if (start != in)
1807:                    error("V-013", null);
1808:                return true;
1809:            }
1810:
1811:            private ExternalEntity maybeExternalID() throws IOException,
1812:                    SAXException {
1813:
1814:                // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1815:                //        | 'PUBLIC' S' PubidLiteral S Systemliteral
1816:                String temp = null;
1817:                ExternalEntity retval;
1818:
1819:                if (peek("PUBLIC")) {
1820:                    whitespace("F-009");
1821:                    temp = parsePublicId();
1822:                } else if (!peek("SYSTEM"))
1823:                    return null;
1824:
1825:                retval = new ExternalEntity(in);
1826:                retval.publicId = temp;
1827:                whitespace("F-008");
1828:                retval.systemId = parseSystemId();
1829:                return retval;
1830:            }
1831:
1832:            private String parseSystemId() throws IOException, SAXException {
1833:
1834:                String uri = getQuotedString("F-034", null);
1835:                int temp = uri.indexOf(':');
1836:
1837:                // resolve relative URIs ... must do it here since
1838:                // it's relative to the source file holding the URI!
1839:
1840:                // "new java.net.URL (URL, string)" conforms to RFC 1630,
1841:                // but we can't use that except when the URI is a URL.
1842:                // The entity resolver is allowed to handle URIs that are
1843:                // not URLs, so we pass URIs through with scheme intact
1844:                if (temp == -1 || uri.indexOf('/') < temp) {
1845:                    String baseURI;
1846:
1847:                    baseURI = in.getSystemId();
1848:                    if (baseURI == null)
1849:                        fatal("P-055", new Object[] { uri });
1850:                    if (uri.length() == 0)
1851:                        uri = ".";
1852:                    baseURI = baseURI
1853:                            .substring(0, baseURI.lastIndexOf('/') + 1);
1854:                    if (uri.charAt(0) != '/')
1855:                        uri = baseURI + uri;
1856:                    else {
1857:                        // XXX slashes at the beginning of a relative URI are
1858:                        // a special case we don't handle.
1859:                        throw new InternalError();
1860:                    }
1861:
1862:                    // letting other code map any "/xxx/../" or "/./" to "/",
1863:                    // since all URIs must handle it the same.
1864:                }
1865:                // check for fragment ID in URI
1866:                if (uri.indexOf('#') != -1)
1867:                    error("P-056", new Object[] { uri });
1868:                return uri;
1869:            }
1870:
1871:            private void maybeTextDecl() throws IOException, SAXException {
1872:
1873:                // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1874:                if (peek("<?xml")) {
1875:                    readVersion(false, "1.0");
1876:                    readEncoding(true);
1877:                    maybeWhitespace();
1878:                    if (!peek("?>"))
1879:                        fatal("P-057");
1880:                }
1881:            }
1882:
1883:            private void externalParameterEntity(ExternalEntity next)
1884:                    throws IOException, SAXException {
1885:
1886:                //
1887:                // Reap the intended benefits of standalone declarations:
1888:                // don't deal with external parameter entities, except to
1889:                // validate the standalone declaration.
1890:                //
1891:
1892:                // n.b. "in external parameter entities" (and external
1893:                // DTD subset, same grammar) parameter references can
1894:                // occur "within" markup declarations ... expansions can
1895:                // cross syntax rules.  Flagged here; affects getc().
1896:
1897:                // [79] ExtPE ::= TextDecl? extSubsetDecl
1898:                // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
1899:                //        | PEReference | S )*
1900:                InputEntity pe;
1901:
1902:                // XXX if this returns false ...
1903:
1904:                pe = in;
1905:                maybeTextDecl();
1906:                while (!pe.isEOF()) {
1907:                    // pop internal PEs (and whitespace before/after)
1908:                    if (in.isEOF()) {
1909:                        in = in.pop();
1910:                        continue;
1911:                    }
1912:                    doLexicalPE = false;
1913:                    if (maybeWhitespace())
1914:                        continue;
1915:                    if (maybePEReference())
1916:                        continue;
1917:                    doLexicalPE = true;
1918:                    if (maybeMarkupDecl() || maybeConditionalSect())
1919:                        continue;
1920:                    break;
1921:                }
1922:                // if (in != pe) throw new InternalError("who popped my PE?");
1923:                if (!pe.isEOF())
1924:                    fatal("P-059", new Object[] { in.getName() });
1925:            }
1926:
1927:            private void readEncoding(boolean must) throws IOException,
1928:                    SAXException {
1929:
1930:                // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1931:                String name = maybeReadAttribute("encoding", must);
1932:
1933:                if (name == null)
1934:                    return;
1935:                for (int i = 0; i < name.length(); i++) {
1936:                    char c = name.charAt(i);
1937:                    if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
1938:                        continue;
1939:                    if (i != 0
1940:                            && ((c >= '0' && c <= '9') || c == '-' || c == '_' || c == '.'))
1941:                        continue;
1942:                    fatal("P-060", new Object[] { new Character(c) });
1943:                }
1944:
1945:                //
1946:                // This should be the encoding in use, and it's even an error for
1947:                // it to be anything else (in certain cases that are impractical to
1948:                // to test, and may even be insufficient).  So, we do the best we
1949:                // can, and warn if things look suspicious.  Note that Java doesn't
1950:                // uniformly expose the encodings, and that the names it uses
1951:                // internally are nonstandard.  Also, that the XML spec allows
1952:                // such "errors" not to be reported at all.
1953:                //
1954:                String currentEncoding = in.getEncoding();
1955:
1956:                if (currentEncoding != null
1957:                        && !name.equalsIgnoreCase(currentEncoding))
1958:                    warning("P-061", new Object[] { name, currentEncoding });
1959:            }
1960:
1961:            private boolean maybeNotationDecl() throws IOException,
1962:                    SAXException {
1963:
1964:                // [82] NotationDecl ::= '<!NOTATION' S Name S
1965:                //        (ExternalID | PublicID) S? '>'
1966:                // [83] PublicID ::= 'PUBLIC' S PubidLiteral
1967:                InputEntity start = peekDeclaration("!NOTATION");
1968:
1969:                if (start == null)
1970:                    return false;
1971:
1972:                String name = getMarkupDeclname("F-019", false);
1973:                ExternalEntity entity = new ExternalEntity(in);
1974:
1975:                whitespace("F-011");
1976:                if (peek("PUBLIC")) {
1977:                    whitespace("F-009");
1978:                    entity.publicId = parsePublicId();
1979:                    if (maybeWhitespace()) {
1980:                        if (!peek(">"))
1981:                            entity.systemId = parseSystemId();
1982:                        else
1983:                            ungetc();
1984:                    }
1985:                } else if (peek("SYSTEM")) {
1986:                    whitespace("F-008");
1987:                    entity.systemId = parseSystemId();
1988:                } else
1989:                    fatal("P-062");
1990:                maybeWhitespace();
1991:                nextChar('>', "F-032", name);
1992:                if (start != in)
1993:                    error("V-013", null);
1994:                if (entity.systemId != null
1995:                        && entity.systemId.indexOf('#') != -1)
1996:                    error("P-056", new Object[] { entity.systemId });
1997:
1998:                Object value = notations.get(name);
1999:                if (value != null && value instanceof  ExternalEntity)
2000:                    warning("P-063", new Object[] { name });
2001:
2002:                else {
2003:                    notations.put(name, entity);
2004:                    dtdHandler.notationDecl(name, entity.publicId,
2005:                            entity.systemId);
2006:                }
2007:                return true;
2008:            }
2009:
2010:            ////////////////////////////////////////////////////////////////
2011:            //
2012:            //    UTILITIES
2013:            //
2014:            ////////////////////////////////////////////////////////////////
2015:
2016:            private char getc() throws IOException, SAXException {
2017:
2018:                if (!doLexicalPE) {
2019:                    char c = in.getc();
2020:                    return c;
2021:                }
2022:
2023:                //
2024:                // External parameter entities get funky processing of '%param;'
2025:                // references.  It's not clearly defined in the XML spec; but it
2026:                // boils down to having those refs be _lexical_ in most cases to
2027:                // include partial syntax productions.  It also needs selective
2028:                // enabling; "<!ENTITY % foo ...>" must work, for example, and
2029:                // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
2030:                // if it's expanded in a literal, else "ab  cd".  PEs also do
2031:                // not expand within comments or PIs, and external PEs are only
2032:                // allowed to have markup decls (and so aren't handled lexically).
2033:                //
2034:                // This PE handling should be merged into maybeWhitespace, where
2035:                // it can be dealt with more consistently.
2036:                //
2037:                // Also, there are some validity constraints in this area.
2038:                //
2039:                char c;
2040:
2041:                while (in.isEOF()) {
2042:                    if (in.isInternal() || (doLexicalPE && !in.isDocument()))
2043:                        in = in.pop();
2044:                    else {
2045:                        fatal("P-064", new Object[] { in.getName() });
2046:                    }
2047:                }
2048:                if ((c = in.getc()) == '%' && doLexicalPE) {
2049:                    // PE ref ::= '%' name ';'
2050:                    String name = maybeGetName();
2051:                    Object entity;
2052:
2053:                    if (name == null)
2054:                        fatal("P-011");
2055:                    nextChar(';', "F-021", name);
2056:                    entity = params.get(name);
2057:
2058:                    // push a magic "entity" before and after the
2059:                    // real one, so ungetc() behaves uniformly
2060:                    pushReader(" ".toCharArray(), null, false);
2061:                    if (entity instanceof  InternalEntity)
2062:                        pushReader(((InternalEntity) entity).buf, name, false);
2063:                    else if (entity instanceof  ExternalEntity)
2064:                        // PEs can't be unparsed!
2065:                        // XXX if this returns false ...
2066:                        pushReader((ExternalEntity) entity);
2067:                    else if (entity == null)
2068:                        // see note in maybePEReference re making this be nonfatal.
2069:                        fatal("V-022");
2070:                    else
2071:                        throw new InternalError();
2072:                    pushReader(" ".toCharArray(), null, false);
2073:                    return in.getc();
2074:                }
2075:                return c;
2076:            }
2077:
2078:            private void ungetc() {
2079:
2080:                in.ungetc();
2081:            }
2082:
2083:            private boolean peek(String s) throws IOException, SAXException {
2084:
2085:                return in.peek(s, null);
2086:            }
2087:
2088:            // Return the entity starting the specified declaration
2089:            // (for validating declaration nesting) else null.
2090:
2091:            private InputEntity peekDeclaration(String s) throws IOException,
2092:                    SAXException {
2093:
2094:                InputEntity start;
2095:
2096:                if (!in.peekc('<'))
2097:                    return null;
2098:                start = in;
2099:                if (in.peek(s, null))
2100:                    return start;
2101:                in.ungetc();
2102:                return null;
2103:            }
2104:
2105:            private void nextChar(char c, String location, String near)
2106:                    throws IOException, SAXException {
2107:
2108:                while (in.isEOF() && !in.isDocument())
2109:                    in = in.pop();
2110:                if (!in.peekc(c))
2111:                    fatal("P-008", new Object[] { new Character(c),
2112:                            messages.getMessage(locale, location),
2113:                            (near == null ? "" : ('"' + near + '"')) });
2114:            }
2115:
2116:            private void pushReader(char buf[], String name, boolean isGeneral)
2117:                    throws SAXException {
2118:
2119:                InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2120:                r.init(buf, name, in, !isGeneral);
2121:                in = r;
2122:            }
2123:
2124:            private boolean pushReader(ExternalEntity next) throws IOException,
2125:                    SAXException {
2126:
2127:                InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2128:                InputSource s;
2129:                try {
2130:                    s = next.getInputSource(resolver);
2131:                } catch (IOException e) {
2132:                    String msg = "unable to open the external entity from :"
2133:                            + next.systemId;
2134:                    if (next.publicId != null)
2135:                        msg += " (public id:" + next.publicId + ")";
2136:
2137:                    SAXParseException spe = new SAXParseException(msg,
2138:                            getPublicId(), getSystemId(), getLineNumber(),
2139:                            getColumnNumber(), e);
2140:                    dtdHandler.fatalError(spe);
2141:                    throw e;
2142:                }
2143:
2144:                r.init(s, next.name, in, next.isPE);
2145:                in = r;
2146:                return true;
2147:            }
2148:
2149:            public String getPublicId() {
2150:
2151:                return (in == null) ? null : in.getPublicId();
2152:            }
2153:
2154:            public String getSystemId() {
2155:
2156:                return (in == null) ? null : in.getSystemId();
2157:            }
2158:
2159:            public int getLineNumber() {
2160:
2161:                return (in == null) ? -1 : in.getLineNumber();
2162:            }
2163:
2164:            public int getColumnNumber() {
2165:
2166:                return (in == null) ? -1 : in.getColumnNumber();
2167:            }
2168:
2169:            // error handling convenience routines
2170:
2171:            private void warning(String messageId, Object parameters[])
2172:                    throws SAXException {
2173:
2174:                SAXParseException e = new SAXParseException(messages
2175:                        .getMessage(locale, messageId, parameters),
2176:                        getPublicId(), getSystemId(), getLineNumber(),
2177:                        getColumnNumber());
2178:
2179:                dtdHandler.warning(e);
2180:            }
2181:
2182:            void error(String messageId, Object parameters[])
2183:                    throws SAXException {
2184:
2185:                SAXParseException e = new SAXParseException(messages
2186:                        .getMessage(locale, messageId, parameters),
2187:                        getPublicId(), getSystemId(), getLineNumber(),
2188:                        getColumnNumber());
2189:
2190:                dtdHandler.error(e);
2191:            }
2192:
2193:            private void fatal(String messageId) throws SAXException {
2194:
2195:                fatal(messageId, null);
2196:            }
2197:
2198:            private void fatal(String messageId, Object parameters[])
2199:                    throws SAXException {
2200:
2201:                SAXParseException e = new SAXParseException(messages
2202:                        .getMessage(locale, messageId, parameters),
2203:                        getPublicId(), getSystemId(), getLineNumber(),
2204:                        getColumnNumber());
2205:
2206:                dtdHandler.fatalError(e);
2207:
2208:                throw e;
2209:            }
2210:
2211:            //
2212:            // Map char arrays to strings ... cuts down both on memory and
2213:            // CPU usage for element/attribute/other names that are reused.
2214:            //
2215:            // Documents typically repeat names a lot, so we more or less
2216:            // intern all the strings within the document; since some strings
2217:            // are repeated in multiple documents (e.g. stylesheets) we go
2218:            // a bit further, and intern globally.
2219:            //
2220:            static class NameCache {
2221:                //
2222:                // Unless we auto-grow this, the default size should be a
2223:                // reasonable bit larger than needed for most XML files
2224:                // we've yet seen (and be prime).  If it's too small, the
2225:                // penalty is just excess cache collisions.
2226:                //
2227:                NameCacheEntry hashtable[] = new NameCacheEntry[541];
2228:
2229:                //
2230:                // Usually we just want to get the 'symbol' for these chars
2231:                //
2232:                String lookup(char value[], int len) {
2233:
2234:                    return lookupEntry(value, len).name;
2235:                }
2236:
2237:                //
2238:                // Sometimes we need to scan the chars in the resulting
2239:                // string, so there's an accessor which exposes them.
2240:                // (Mostly for element end tags.)
2241:                //
2242:                NameCacheEntry lookupEntry(char value[], int len) {
2243:
2244:                    int index = 0;
2245:                    NameCacheEntry entry;
2246:
2247:                    // hashing to get index
2248:                    for (int i = 0; i < len; i++)
2249:                        index = index * 31 + value[i];
2250:                    index &= 0x7fffffff;
2251:                    index %= hashtable.length;
2252:
2253:                    // return entry if one's there ...
2254:                    for (entry = hashtable[index]; entry != null; entry = entry.next) {
2255:                        if (entry.matches(value, len))
2256:                            return entry;
2257:                    }
2258:
2259:                    // else create new one
2260:                    entry = new NameCacheEntry();
2261:                    entry.chars = new char[len];
2262:                    System.arraycopy(value, 0, entry.chars, 0, len);
2263:                    entry.name = new String(entry.chars);
2264:                    //
2265:                    // NOTE:  JDK 1.1 has a fixed size string intern table,
2266:                    // with non-GC'd entries.  It can panic here; that's a
2267:                    // JDK problem, use 1.2 or later with many identifiers.
2268:                    //
2269:                    entry.name = entry.name.intern(); // "global" intern
2270:                    entry.next = hashtable[index];
2271:                    hashtable[index] = entry;
2272:                    return entry;
2273:                }
2274:            }
2275:
2276:            static class NameCacheEntry {
2277:
2278:                String name;
2279:                char chars[];
2280:                NameCacheEntry next;
2281:
2282:                boolean matches(char value[], int len) {
2283:
2284:                    if (chars.length != len)
2285:                        return false;
2286:                    for (int i = 0; i < len; i++)
2287:                        if (value[i] != chars[i])
2288:                            return false;
2289:                    return true;
2290:                }
2291:            }
2292:
2293:            //
2294:            // Message catalog for diagnostics.
2295:            //
2296:            static final Catalog messages = new Catalog();
2297:
2298:            static final class Catalog extends MessageCatalog {
2299:
2300:                Catalog() {
2301:                    super (DTDParser.class);
2302:                }
2303:            }
2304:
2305:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.