0001: /*
0002: * @(#)XmlChars.java 1.1 00/08/05
0003: *
0004: * Copyright (c) 1998 Sun Microsystems, Inc. All Rights Reserved.
0005: */
0006:
0007: package com.sun.xml.dtdparser;
0008:
0009: import org.xml.sax.EntityResolver;
0010: import org.xml.sax.InputSource;
0011: import org.xml.sax.Locator;
0012: import org.xml.sax.SAXException;
0013: import org.xml.sax.SAXParseException;
0014:
0015: import java.io.IOException;
0016: import java.util.ArrayList;
0017: import java.util.Enumeration;
0018: import java.util.Hashtable;
0019: import java.util.Locale;
0020: import java.util.Set;
0021: import java.util.Vector;
0022:
0023: /**
0024: * This implements parsing of XML 1.0 DTDs.
0025: * <p/>
0026: * This conforms to the portion of the XML 1.0 specification related
0027: * to the external DTD subset.
0028: * <p/>
0029: * For multi-language applications (such as web servers using XML
0030: * processing to create dynamic content), a method supports choosing
0031: * a locale for parser diagnostics which is both understood by the
0032: * message recipient and supported by the parser.
0033: * <p/>
0034: * This parser produces a stream of parse events. It supports some
0035: * features (exposing comments, CDATA sections, and entity references)
0036: * which are not required to be reported by conformant XML processors.
0037: *
0038: * @author David Brownell
0039: * @author Janet Koenig
0040: * @author Kohsuke KAWAGUCHI
0041: * @version $Id: DTDParser.java,v 1.1 2005/05/31 22:28:54 kohsuke Exp $
0042: */
0043: public class DTDParser {
0044: public final static String TYPE_CDATA = "CDATA";
0045: public final static String TYPE_ID = "ID";
0046: public final static String TYPE_IDREF = "IDREF";
0047: public final static String TYPE_IDREFS = "IDREFS";
0048: public final static String TYPE_ENTITY = "ENTITY";
0049: public final static String TYPE_ENTITIES = "ENTITIES";
0050: public final static String TYPE_NMTOKEN = "NMTOKEN";
0051: public final static String TYPE_NMTOKENS = "NMTOKENS";
0052: public final static String TYPE_NOTATION = "NOTATION";
0053: public final static String TYPE_ENUMERATION = "ENUMERATION";
0054:
0055: // stack of input entities being merged
0056: private InputEntity in;
0057:
0058: // temporaries reused during parsing
0059: private StringBuffer strTmp;
0060: private char nameTmp[];
0061: private NameCache nameCache;
0062: private char charTmp[] = new char[2];
0063:
0064: // temporary DTD parsing state
0065: private boolean doLexicalPE;
0066:
0067: // DTD state, used during parsing
0068: // private SimpleHashtable elements = new SimpleHashtable (47);
0069: protected final Set declaredElements = new java.util.HashSet();
0070: private SimpleHashtable params = new SimpleHashtable(7);
0071:
0072: // exposed to package-private subclass
0073: Hashtable notations = new Hashtable(7);
0074: SimpleHashtable entities = new SimpleHashtable(17);
0075:
0076: private SimpleHashtable ids = new SimpleHashtable();
0077:
0078: // listeners for DTD parsing events
0079: private DTDEventListener dtdHandler;
0080:
0081: private EntityResolver resolver;
0082: private Locale locale;
0083:
0084: // string constants -- use these copies so "==" works
0085: // package private
0086: static final String strANY = "ANY";
0087: static final String strEMPTY = "EMPTY";
0088:
0089: /**
0090: * Used by applications to request locale for diagnostics.
0091: *
0092: * @param l The locale to use, or null to use system defaults
0093: * (which may include only message IDs).
0094: */
0095: public void setLocale(Locale l) throws SAXException {
0096:
0097: if (l != null && !messages.isLocaleSupported(l.toString())) {
0098: throw new SAXException(messages.getMessage(locale, "P-078",
0099: new Object[] { l }));
0100: }
0101: locale = l;
0102: }
0103:
0104: /**
0105: * Returns the diagnostic locale.
0106: */
0107: public Locale getLocale() {
0108: return locale;
0109: }
0110:
0111: /**
0112: * Chooses a client locale to use for diagnostics, using the first
0113: * language specified in the list that is supported by this parser.
0114: * That locale is then set using <a href="#setLocale(java.util.Locale)">
0115: * setLocale()</a>. Such a list could be provided by a variety of user
0116: * preference mechanisms, including the HTTP <em>Accept-Language</em>
0117: * header field.
0118: *
0119: * @param languages Array of language specifiers, ordered with the most
0120: * preferable one at the front. For example, "en-ca" then "fr-ca",
0121: * followed by "zh_CN". Both RFC 1766 and Java styles are supported.
0122: * @return The chosen locale, or null.
0123: * @see MessageCatalog
0124: */
0125: public Locale chooseLocale(String languages[]) throws SAXException {
0126:
0127: Locale l = messages.chooseLocale(languages);
0128:
0129: if (l != null) {
0130: setLocale(l);
0131: }
0132: return l;
0133: }
0134:
0135: /**
0136: * Lets applications control entity resolution.
0137: */
0138: public void setEntityResolver(EntityResolver r) {
0139:
0140: resolver = r;
0141: }
0142:
0143: /**
0144: * Returns the object used to resolve entities
0145: */
0146: public EntityResolver getEntityResolver() {
0147:
0148: return resolver;
0149: }
0150:
0151: /**
0152: * Used by applications to set handling of DTD parsing events.
0153: */
0154: public void setDtdHandler(DTDEventListener handler) {
0155: dtdHandler = handler;
0156: if (handler != null)
0157: handler.setDocumentLocator(new Locator() {
0158: public String getPublicId() {
0159: return DTDParser.this .getPublicId();
0160: }
0161:
0162: public String getSystemId() {
0163: return DTDParser.this .getSystemId();
0164: }
0165:
0166: public int getLineNumber() {
0167: return DTDParser.this .getLineNumber();
0168: }
0169:
0170: public int getColumnNumber() {
0171: return DTDParser.this .getColumnNumber();
0172: }
0173: });
0174: }
0175:
0176: /**
0177: * Returns the handler used to for DTD parsing events.
0178: */
0179: public DTDEventListener getDtdHandler() {
0180: return dtdHandler;
0181: }
0182:
0183: /**
0184: * Parse a DTD.
0185: */
0186: public void parse(InputSource in) throws IOException, SAXException {
0187: init();
0188: parseInternal(in);
0189: }
0190:
0191: /**
0192: * Parse a DTD.
0193: */
0194: public void parse(String uri) throws IOException, SAXException {
0195: InputSource in;
0196:
0197: init();
0198: // System.out.println ("parse (\"" + uri + "\")");
0199: in = resolver.resolveEntity(null, uri);
0200:
0201: // If custom resolver punts resolution to parser, handle it ...
0202: if (in == null) {
0203: in = Resolver.createInputSource(new java.net.URL(uri),
0204: false);
0205:
0206: // ... or if custom resolver doesn't correctly construct the
0207: // input entity, patch it up enough so relative URIs work, and
0208: // issue a warning to minimize later confusion.
0209: } else if (in.getSystemId() == null) {
0210: warning("P-065", null);
0211: in.setSystemId(uri);
0212: }
0213:
0214: parseInternal(in);
0215: }
0216:
0217: // makes sure the parser is reset to "before a document"
0218: private void init() {
0219: in = null;
0220:
0221: // alloc temporary data used in parsing
0222: strTmp = new StringBuffer();
0223: nameTmp = new char[20];
0224: nameCache = new NameCache();
0225:
0226: // reset doc info
0227: // isInAttribute = false;
0228:
0229: doLexicalPE = false;
0230:
0231: entities.clear();
0232: notations.clear();
0233: params.clear();
0234: // elements.clear ();
0235: declaredElements.clear();
0236:
0237: // initialize predefined references ... re-interpreted later
0238: builtin("amp", "&");
0239: builtin("lt", "<");
0240: builtin("gt", ">");
0241: builtin("quot", "\"");
0242: builtin("apos", "'");
0243:
0244: if (locale == null)
0245: locale = Locale.getDefault();
0246: if (resolver == null)
0247: resolver = new Resolver();
0248: if (dtdHandler == null)
0249: dtdHandler = new DTDHandlerBase();
0250: }
0251:
0252: private void builtin(String entityName, String entityValue) {
0253: InternalEntity entity;
0254: entity = new InternalEntity(entityName, entityValue
0255: .toCharArray());
0256: entities.put(entityName, entity);
0257: }
0258:
0259: ////////////////////////////////////////////////////////////////
0260: //
0261: // parsing is by recursive descent, code roughly
0262: // following the BNF rules except tweaked for simple
0263: // lookahead. rules are more or less in numeric order,
0264: // except where code sharing suggests other structures.
0265: //
0266: // a classic benefit of recursive descent parsers: it's
0267: // relatively easy to get diagnostics that make sense.
0268: //
0269: ////////////////////////////////////////////////////////////////
0270:
0271: private void parseInternal(InputSource input) throws IOException,
0272: SAXException {
0273:
0274: if (input == null)
0275: fatal("P-000");
0276:
0277: try {
0278: in = InputEntity.getInputEntity(dtdHandler, locale);
0279: in.init(input, null, null, false);
0280:
0281: dtdHandler.startDTD(in);
0282:
0283: // [30] extSubset ::= TextDecl? extSubsetDecl
0284: // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
0285: // | PEReference | S )*
0286: // ... same as [79] extPE, which is where the code is
0287:
0288: ExternalEntity externalSubset = new ExternalEntity(in);
0289: externalParameterEntity(externalSubset);
0290:
0291: if (!in.isEOF()) {
0292: fatal("P-001", new Object[] { Integer
0293: .toHexString(((int) getc())) });
0294: }
0295: afterRoot();
0296: dtdHandler.endDTD();
0297:
0298: } catch (EndOfInputException e) {
0299: if (!in.isDocument()) {
0300: String name = in.getName();
0301: do { // force a relevant URI and line number
0302: in = in.pop();
0303: } while (in.isInternal());
0304: fatal("P-002", new Object[] { name });
0305: } else {
0306: fatal("P-003", null);
0307: }
0308: } catch (RuntimeException e) {
0309: // Don't discard location that triggered the exception
0310: // ## Should properly wrap exception
0311: System.err.print("Internal DTD parser error: "); // ##
0312: e.printStackTrace();
0313: throw new SAXParseException(e.getMessage() != null ? e
0314: .getMessage() : e.getClass().getName(),
0315: getPublicId(), getSystemId(), getLineNumber(),
0316: getColumnNumber());
0317:
0318: } finally {
0319: // recycle temporary data used during parsing
0320: strTmp = null;
0321: nameTmp = null;
0322: nameCache = null;
0323:
0324: // ditto input sources etc
0325: if (in != null) {
0326: in.close();
0327: in = null;
0328: }
0329:
0330: // get rid of all DTD info ... some of it would be
0331: // useful for editors etc, investigate later.
0332:
0333: params.clear();
0334: entities.clear();
0335: notations.clear();
0336: declaredElements.clear();
0337: // elements.clear();
0338: ids.clear();
0339: }
0340: }
0341:
0342: void afterRoot() throws SAXException {
0343: // Make sure all IDREFs match declared ID attributes. We scan
0344: // after the document element is parsed, since XML allows forward
0345: // references, and only now can we know if they're all resolved.
0346:
0347: for (Enumeration e = ids.keys(); e.hasMoreElements();) {
0348: String id = (String) e.nextElement();
0349: Boolean value = (Boolean) ids.get(id);
0350: if (Boolean.FALSE == value)
0351: error("V-024", new Object[] { id });
0352: }
0353: }
0354:
0355: // role is for diagnostics
0356: private void whitespace(String roleId) throws IOException,
0357: SAXException {
0358:
0359: // [3] S ::= (#x20 | #x9 | #xd | #xa)+
0360: if (!maybeWhitespace()) {
0361: fatal("P-004", new Object[] { messages.getMessage(locale,
0362: roleId) });
0363: }
0364: }
0365:
0366: // S?
0367: private boolean maybeWhitespace() throws IOException, SAXException {
0368:
0369: if (!doLexicalPE)
0370: return in.maybeWhitespace();
0371:
0372: // see getc() for the PE logic -- this lets us splice
0373: // expansions of PEs in "anywhere". getc() has smarts,
0374: // so for external PEs we don't bypass it.
0375:
0376: // XXX we can marginally speed PE handling, and certainly
0377: // be cleaner (hence potentially more correct), by using
0378: // the observations that expanded PEs only start and stop
0379: // where whitespace is allowed. getc wouldn't need any
0380: // "lexical" PE expansion logic, and no other method needs
0381: // to handle termination of PEs. (parsing of literals would
0382: // still need to pop entities, but not parsing of references
0383: // in content.)
0384:
0385: char c = getc();
0386: boolean saw = false;
0387:
0388: while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
0389: saw = true;
0390:
0391: // this gracefully ends things when we stop playing
0392: // with internal parameters. caller should have a
0393: // grammar rule allowing whitespace at end of entity.
0394: if (in.isEOF() && !in.isInternal())
0395: return saw;
0396: c = getc();
0397: }
0398: ungetc();
0399: return saw;
0400: }
0401:
0402: private String maybeGetName() throws IOException, SAXException {
0403:
0404: NameCacheEntry entry = maybeGetNameCacheEntry();
0405: return (entry == null) ? null : entry.name;
0406: }
0407:
0408: private NameCacheEntry maybeGetNameCacheEntry() throws IOException,
0409: SAXException {
0410:
0411: // [5] Name ::= (Letter|'_'|':') (Namechar)*
0412: char c = getc();
0413:
0414: if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
0415: ungetc();
0416: return null;
0417: }
0418: return nameCharString(c);
0419: }
0420:
0421: // Used when parsing enumerations
0422: private String getNmtoken() throws IOException, SAXException {
0423:
0424: // [7] Nmtoken ::= (Namechar)+
0425: char c = getc();
0426: if (!XmlChars.isNameChar(c))
0427: fatal("P-006", new Object[] { new Character(c) });
0428: return nameCharString(c).name;
0429: }
0430:
0431: // n.b. this gets used when parsing attribute values (for
0432: // internal references) so we can't use strTmp; it's also
0433: // a hotspot for CPU and memory in the parser (called at least
0434: // once for each element) so this has been optimized a bit.
0435:
0436: private NameCacheEntry nameCharString(char c) throws IOException,
0437: SAXException {
0438:
0439: int i = 1;
0440:
0441: nameTmp[0] = c;
0442: for (;;) {
0443: if ((c = in.getNameChar()) == 0)
0444: break;
0445: if (i >= nameTmp.length) {
0446: char tmp[] = new char[nameTmp.length + 10];
0447: System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
0448: nameTmp = tmp;
0449: }
0450: nameTmp[i++] = c;
0451: }
0452: return nameCache.lookupEntry(nameTmp, i);
0453: }
0454:
0455: //
0456: // much similarity between parsing entity values in DTD
0457: // and attribute values (in DTD or content) ... both follow
0458: // literal parsing rules, newline canonicalization, etc
0459: //
0460: // leaves value in 'strTmp' ... either a "replacement text" (4.5),
0461: // or else partially normalized attribute value (the first bit
0462: // of 3.3.3's spec, without the "if not CDATA" bits).
0463: //
0464: private void parseLiteral(boolean isEntityValue)
0465: throws IOException, SAXException {
0466:
0467: // [9] EntityValue ::=
0468: // '"' ([^"&%] | Reference | PEReference)* '"'
0469: // | "'" ([^'&%] | Reference | PEReference)* "'"
0470: // [10] AttValue ::=
0471: // '"' ([^"&] | Reference )* '"'
0472: // | "'" ([^'&] | Reference )* "'"
0473: char quote = getc();
0474: char c;
0475: InputEntity source = in;
0476:
0477: if (quote != '\'' && quote != '"') {
0478: fatal("P-007");
0479: }
0480:
0481: // don't report entity expansions within attributes,
0482: // they're reported "fully expanded" via SAX
0483: // isInAttribute = !isEntityValue;
0484:
0485: // get value into strTmp
0486: strTmp = new StringBuffer();
0487:
0488: // scan, allowing entity push/pop wherever ...
0489: // expanded entities can't terminate the literal!
0490: for (;;) {
0491: if (in != source && in.isEOF()) {
0492: // we don't report end of parsed entities
0493: // within attributes (no SAX hooks)
0494: in = in.pop();
0495: continue;
0496: }
0497: if ((c = getc()) == quote && in == source) {
0498: break;
0499: }
0500:
0501: //
0502: // Basically the "reference in attribute value"
0503: // row of the chart in section 4.4 of the spec
0504: //
0505: if (c == '&') {
0506: String entityName = maybeGetName();
0507:
0508: if (entityName != null) {
0509: nextChar(';', "F-020", entityName);
0510:
0511: // 4.4 says: bypass these here ... we'll catch
0512: // forbidden refs to unparsed entities on use
0513: if (isEntityValue) {
0514: strTmp.append('&');
0515: strTmp.append(entityName);
0516: strTmp.append(';');
0517: continue;
0518: }
0519: expandEntityInLiteral(entityName, entities,
0520: isEntityValue);
0521:
0522: // character references are always included immediately
0523: } else if ((c = getc()) == '#') {
0524: int tmp = parseCharNumber();
0525:
0526: if (tmp > 0xffff) {
0527: tmp = surrogatesToCharTmp(tmp);
0528: strTmp.append(charTmp[0]);
0529: if (tmp == 2)
0530: strTmp.append(charTmp[1]);
0531: } else
0532: strTmp.append((char) tmp);
0533: } else
0534: fatal("P-009");
0535: continue;
0536:
0537: }
0538:
0539: // expand parameter entities only within entity value literals
0540: if (c == '%' && isEntityValue) {
0541: String entityName = maybeGetName();
0542:
0543: if (entityName != null) {
0544: nextChar(';', "F-021", entityName);
0545: expandEntityInLiteral(entityName, params,
0546: isEntityValue);
0547: continue;
0548: } else
0549: fatal("P-011");
0550: }
0551:
0552: // For attribute values ...
0553: if (!isEntityValue) {
0554: // 3.3.3 says whitespace normalizes to space...
0555: if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
0556: strTmp.append(' ');
0557: continue;
0558: }
0559:
0560: // "<" not legal in parsed literals ...
0561: if (c == '<')
0562: fatal("P-012");
0563: }
0564:
0565: strTmp.append(c);
0566: }
0567: // isInAttribute = false;
0568: }
0569:
0570: // does a SINGLE expansion of the entity (often reparsed later)
0571: private void expandEntityInLiteral(String name,
0572: SimpleHashtable table, boolean isEntityValue)
0573: throws IOException, SAXException {
0574:
0575: Object entity = table.get(name);
0576:
0577: if (entity instanceof InternalEntity) {
0578: InternalEntity value = (InternalEntity) entity;
0579: pushReader(value.buf, name, !value.isPE);
0580:
0581: } else if (entity instanceof ExternalEntity) {
0582: if (!isEntityValue) // must be a PE ...
0583: fatal("P-013", new Object[] { name });
0584: // XXX if this returns false ...
0585: pushReader((ExternalEntity) entity);
0586:
0587: } else if (entity == null) {
0588: //
0589: // Note: much confusion about whether spec requires such
0590: // errors to be fatal in many cases, but none about whether
0591: // it allows "normal" errors to be unrecoverable!
0592: //
0593: fatal((table == params) ? "V-022" : "P-014",
0594: new Object[] { name });
0595: }
0596: }
0597:
0598: // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
0599: // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
0600:
0601: // NOTE: XML spec should explicitly say that PE ref syntax is
0602: // ignored in PIs, comments, SystemLiterals, and Pubid Literal
0603: // values ... can't process the XML spec's own DTD without doing
0604: // that for comments.
0605:
0606: private String getQuotedString(String type, String extra)
0607: throws IOException, SAXException {
0608:
0609: // use in.getc to bypass PE processing
0610: char quote = in.getc();
0611:
0612: if (quote != '\'' && quote != '"')
0613: fatal("P-015", new Object[] { messages.getMessage(locale,
0614: type, new Object[] { extra }) });
0615:
0616: char c;
0617:
0618: strTmp = new StringBuffer();
0619: while ((c = in.getc()) != quote)
0620: strTmp.append((char) c);
0621: return strTmp.toString();
0622: }
0623:
0624: private String parsePublicId() throws IOException, SAXException {
0625:
0626: // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
0627: // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
0628: String retval = getQuotedString("F-033", null);
0629: for (int i = 0; i < retval.length(); i++) {
0630: char c = retval.charAt(i);
0631: if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
0632: && !(c >= 'A' && c <= 'Z')
0633: && !(c >= 'a' && c <= 'z'))
0634: fatal("P-016", new Object[] { new Character(c) });
0635: }
0636: strTmp = new StringBuffer();
0637: strTmp.append(retval);
0638: return normalize(false);
0639: }
0640:
0641: // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
0642: // handled by: InputEntity.parsedContent()
0643:
0644: private boolean maybeComment(boolean skipStart) throws IOException,
0645: SAXException {
0646:
0647: // [15] Comment ::= '<!--'
0648: // ( (Char - '-') | ('-' (Char - '-'))*
0649: // '-->'
0650: if (!in.peek(skipStart ? "!--" : "<!--", null))
0651: return false;
0652:
0653: boolean savedLexicalPE = doLexicalPE;
0654: boolean saveCommentText;
0655:
0656: doLexicalPE = false;
0657: saveCommentText = false;
0658: if (saveCommentText)
0659: strTmp = new StringBuffer();
0660:
0661: oneComment: for (;;) {
0662: try {
0663: // bypass PE expansion, but permit PEs
0664: // to complete ... valid docs won't care.
0665: for (;;) {
0666: int c = getc();
0667: if (c == '-') {
0668: c = getc();
0669: if (c != '-') {
0670: if (saveCommentText)
0671: strTmp.append('-');
0672: ungetc();
0673: continue;
0674: }
0675: nextChar('>', "F-022", null);
0676: break oneComment;
0677: }
0678: if (saveCommentText)
0679: strTmp.append((char) c);
0680: }
0681: } catch (EndOfInputException e) {
0682: //
0683: // This is fatal EXCEPT when we're processing a PE...
0684: // in which case a validating processor reports an error.
0685: // External PEs are easy to detect; internal ones we
0686: // infer by being an internal entity outside an element.
0687: //
0688: if (in.isInternal()) {
0689: error("V-021", null);
0690: }
0691: fatal("P-017");
0692: }
0693: }
0694: doLexicalPE = savedLexicalPE;
0695: if (saveCommentText)
0696: dtdHandler.comment(strTmp.toString());
0697: return true;
0698: }
0699:
0700: private boolean maybePI(boolean skipStart) throws IOException,
0701: SAXException {
0702:
0703: // [16] PI ::= '<?' PITarget
0704: // (S (Char* - (Char* '?>' Char*)))?
0705: // '?>'
0706: // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
0707: boolean savedLexicalPE = doLexicalPE;
0708:
0709: if (!in.peek(skipStart ? "?" : "<?", null))
0710: return false;
0711: doLexicalPE = false;
0712:
0713: String target = maybeGetName();
0714:
0715: if (target == null) {
0716: fatal("P-018");
0717: }
0718: if ("xml".equals(target)) {
0719: fatal("P-019");
0720: }
0721: if ("xml".equalsIgnoreCase(target)) {
0722: fatal("P-020", new Object[] { target });
0723: }
0724:
0725: if (maybeWhitespace()) {
0726: strTmp = new StringBuffer();
0727: try {
0728: for (;;) {
0729: // use in.getc to bypass PE processing
0730: char c = in.getc();
0731: //Reached the end of PI.
0732: if (c == '?' && in.peekc('>'))
0733: break;
0734: strTmp.append(c);
0735: }
0736: } catch (EndOfInputException e) {
0737: fatal("P-021");
0738: }
0739: dtdHandler.processingInstruction(target, strTmp.toString());
0740: } else {
0741: if (!in.peek("?>", null)) {
0742: fatal("P-022");
0743: }
0744: dtdHandler.processingInstruction(target, "");
0745: }
0746:
0747: doLexicalPE = savedLexicalPE;
0748: return true;
0749: }
0750:
0751: // [18] CDSect ::= CDStart CData CDEnd
0752: // [19] CDStart ::= '<![CDATA['
0753: // [20] CData ::= (Char* - (Char* ']]>' Char*))
0754: // [21] CDEnd ::= ']]>'
0755: //
0756: // ... handled by InputEntity.unparsedContent()
0757:
0758: // collapsing several rules together ...
0759: // simpler than attribute literals -- no reference parsing!
0760: private String maybeReadAttribute(String name, boolean must)
0761: throws IOException, SAXException {
0762:
0763: // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
0764: // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
0765: // [32] SDDecl ::= S 'standalone' Eq \'|\" ... \'|\"
0766: if (!maybeWhitespace()) {
0767: if (!must) {
0768: return null;
0769: }
0770: fatal("P-024", new Object[] { name });
0771: // NOTREACHED
0772: }
0773:
0774: if (!peek(name)) {
0775: if (must) {
0776: fatal("P-024", new Object[] { name });
0777: } else {
0778: // To ensure that the whitespace is there so that when we
0779: // check for the next attribute we assure that the
0780: // whitespace still exists.
0781: ungetc();
0782: return null;
0783: }
0784: }
0785:
0786: // [25] Eq ::= S? '=' S?
0787: maybeWhitespace();
0788: nextChar('=', "F-023", null);
0789: maybeWhitespace();
0790:
0791: return getQuotedString("F-035", name);
0792: }
0793:
0794: private void readVersion(boolean must, String versionNum)
0795: throws IOException, SAXException {
0796:
0797: String value = maybeReadAttribute("version", must);
0798:
0799: // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+
0800:
0801: if (must && value == null)
0802: fatal("P-025", new Object[] { versionNum });
0803: if (value != null) {
0804: int length = value.length();
0805: for (int i = 0; i < length; i++) {
0806: char c = value.charAt(i);
0807: if (!((c >= '0' && c <= '9') || c == '_' || c == '.'
0808: || (c >= 'a' && c <= 'z')
0809: || (c >= 'A' && c <= 'Z') || c == ':' || c == '-'))
0810: fatal("P-026", new Object[] { value });
0811: }
0812: }
0813: if (value != null && !value.equals(versionNum))
0814: error("P-027", new Object[] { versionNum, value });
0815: }
0816:
0817: // common code used by most markup declarations
0818: // ... S (Q)Name ...
0819: private String getMarkupDeclname(String roleId, boolean qname)
0820: throws IOException, SAXException {
0821:
0822: String name;
0823:
0824: whitespace(roleId);
0825: name = maybeGetName();
0826: if (name == null)
0827: fatal("P-005", new Object[] { messages.getMessage(locale,
0828: roleId) });
0829: return name;
0830: }
0831:
0832: private boolean maybeMarkupDecl() throws IOException, SAXException {
0833:
0834: // [29] markupdecl ::= elementdecl | Attlistdecl
0835: // | EntityDecl | NotationDecl | PI | Comment
0836: return maybeElementDecl() || maybeAttlistDecl()
0837: || maybeEntityDecl() || maybeNotationDecl()
0838: || maybePI(false) || maybeComment(false);
0839: }
0840:
0841: private static final String XmlLang = "xml:lang";
0842:
0843: private boolean isXmlLang(String value) {
0844:
0845: // [33] LanguageId ::= Langcode ('-' Subcode)*
0846: // [34] Langcode ::= ISO639Code | IanaCode | UserCode
0847: // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
0848: // [36] IanaCode ::= [iI] '-' SubCode
0849: // [37] UserCode ::= [xX] '-' SubCode
0850: // [38] SubCode ::= [a-zA-Z]+
0851:
0852: // the ISO and IANA codes (and subcodes) are registered,
0853: // but that's neither a WF nor a validity constraint.
0854:
0855: int nextSuffix;
0856: char c;
0857:
0858: if (value.length() < 2)
0859: return false;
0860: c = value.charAt(1);
0861: if (c == '-') { // IANA, or user, code
0862: c = value.charAt(0);
0863: if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
0864: return false;
0865: nextSuffix = 1;
0866: } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
0867: // 2 letter ISO code, or error
0868: c = value.charAt(0);
0869: if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
0870: return false;
0871: nextSuffix = 2;
0872: } else
0873: return false;
0874:
0875: // here "suffix" ::= '-' [a-zA-Z]+ suffix*
0876: while (nextSuffix < value.length()) {
0877: c = value.charAt(nextSuffix);
0878: if (c != '-')
0879: break;
0880: while (++nextSuffix < value.length()) {
0881: c = value.charAt(nextSuffix);
0882: if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
0883: break;
0884: }
0885: }
0886: return value.length() == nextSuffix && c != '-';
0887: }
0888:
0889: //
0890: // CHAPTER 3: Logical Structures
0891: //
0892:
0893: /**
0894: * To validate, subclassers should at this time make sure that
0895: * values are of the declared types:<UL>
0896: * <LI> ID and IDREF(S) values are Names
0897: * <LI> NMTOKEN(S) are Nmtokens
0898: * <LI> ENUMERATION values match one of the tokens
0899: * <LI> NOTATION values match a notation name
0900: * <LI> ENTITIY(IES) values match an unparsed external entity
0901: * </UL>
0902: * <p/>
0903: * <P> Separately, make sure IDREF values match some ID
0904: * provided in the document (in the afterRoot method).
0905: */
0906: /* void validateAttributeSyntax (Attribute attr, String value)
0907: throws DTDParseException {
0908: // ID, IDREF(S) ... values are Names
0909: if (Attribute.ID == attr.type()) {
0910: if (!XmlNames.isName (value))
0911: error ("V-025", new Object [] { value });
0912:
0913: Boolean b = (Boolean) ids.getNonInterned (value);
0914: if (b == null || b.equals (Boolean.FALSE))
0915: ids.put (value.intern (), Boolean.TRUE);
0916: else
0917: error ("V-026", new Object [] { value });
0918:
0919: } else if (Attribute.IDREF == attr.type()) {
0920: if (!XmlNames.isName (value))
0921: error ("V-027", new Object [] { value });
0922:
0923: Boolean b = (Boolean) ids.getNonInterned (value);
0924: if (b == null)
0925: ids.put (value.intern (), Boolean.FALSE);
0926:
0927: } else if (Attribute.IDREFS == attr.type()) {
0928: StringTokenizer tokenizer = new StringTokenizer (value);
0929: Boolean b;
0930: boolean sawValue = false;
0931:
0932: while (tokenizer.hasMoreTokens ()) {
0933: value = tokenizer.nextToken ();
0934: if (!XmlNames.isName (value))
0935: error ("V-027", new Object [] { value });
0936: b = (Boolean) ids.getNonInterned (value);
0937: if (b == null)
0938: ids.put (value.intern (), Boolean.FALSE);
0939: sawValue = true;
0940: }
0941: if (!sawValue)
0942: error ("V-039", null);
0943:
0944:
0945: // NMTOKEN(S) ... values are Nmtoken(s)
0946: } else if (Attribute.NMTOKEN == attr.type()) {
0947: if (!XmlNames.isNmtoken (value))
0948: error ("V-028", new Object [] { value });
0949:
0950: } else if (Attribute.NMTOKENS == attr.type()) {
0951: StringTokenizer tokenizer = new StringTokenizer (value);
0952: boolean sawValue = false;
0953:
0954: while (tokenizer.hasMoreTokens ()) {
0955: value = tokenizer.nextToken ();
0956: if (!XmlNames.isNmtoken (value))
0957: error ("V-028", new Object [] { value });
0958: sawValue = true;
0959: }
0960: if (!sawValue)
0961: error ("V-032", null);
0962:
0963: // ENUMERATION ... values match one of the tokens
0964: } else if (Attribute.ENUMERATION == attr.type()) {
0965: for (int i = 0; i < attr.values().length; i++)
0966: if (value.equals (attr.values()[i]))
0967: return;
0968: error ("V-029", new Object [] { value });
0969:
0970: // NOTATION values match a notation name
0971: } else if (Attribute.NOTATION == attr.type()) {
0972: //
0973: // XXX XML 1.0 spec should probably list references to
0974: // externally defined notations in standalone docs as
0975: // validity errors. Ditto externally defined unparsed
0976: // entities; neither should show up in attributes, else
0977: // one needs to read the external declarations in order
0978: // to make sense of the document (exactly what tagging
0979: // a doc as "standalone" intends you won't need to do).
0980: //
0981: for (int i = 0; i < attr.values().length; i++)
0982: if (value.equals (attr.values()[i]))
0983: return;
0984: error ("V-030", new Object [] { value });
0985:
0986: // ENTITY(IES) values match an unparsed entity(ies)
0987: } else if (Attribute.ENTITY == attr.type()) {
0988: // see note above re standalone
0989: if (!isUnparsedEntity (value))
0990: error ("V-031", new Object [] { value });
0991:
0992: } else if (Attribute.ENTITIES == attr.type()) {
0993: StringTokenizer tokenizer = new StringTokenizer (value);
0994: boolean sawValue = false;
0995:
0996: while (tokenizer.hasMoreTokens ()) {
0997: value = tokenizer.nextToken ();
0998: // see note above re standalone
0999: if (!isUnparsedEntity (value))
1000: error ("V-031", new Object [] { value });
1001: sawValue = true;
1002: }
1003: if (!sawValue)
1004: error ("V-040", null);
1005:
1006: } else if (Attribute.CDATA != attr.type())
1007: throw new InternalError (attr.type());
1008: }
1009: */
1010: /*
1011: private boolean isUnparsedEntity (String name)
1012: {
1013: Object e = entities.getNonInterned (name);
1014: if (e == null || !(e instanceof ExternalEntity))
1015: return false;
1016: return ((ExternalEntity)e).notation != null;
1017: }
1018: */
1019: private boolean maybeElementDecl() throws IOException, SAXException {
1020:
1021: // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
1022: // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1023: InputEntity start = peekDeclaration("!ELEMENT");
1024:
1025: if (start == null)
1026: return false;
1027:
1028: // n.b. for content models where inter-element whitespace is
1029: // ignorable, we mark that fact here.
1030: String name = getMarkupDeclname("F-015", true);
1031: // Element element = (Element) elements.get (name);
1032: // boolean declEffective = false;
1033:
1034: /*
1035: if (element != null) {
1036: if (element.contentModel() != null) {
1037: error ("V-012", new Object [] { name });
1038: } // else <!ATTLIST name ...> came first
1039: } else {
1040: element = new Element(name);
1041: elements.put (element.name(), element);
1042: declEffective = true;
1043: }
1044: */
1045: if (declaredElements.contains(name))
1046: error("V-012", new Object[] { name });
1047: else {
1048: declaredElements.add(name);
1049: // declEffective = true;
1050: }
1051:
1052: short modelType;
1053: whitespace("F-000");
1054: if (peek(strEMPTY)) {
1055: /// // leave element.contentModel as null for this case.
1056: dtdHandler.startContentModel(name,
1057: modelType = DTDEventListener.CONTENT_MODEL_EMPTY);
1058: } else if (peek(strANY)) {
1059: /// element.setContentModel(new StringModel(StringModelType.ANY));
1060: dtdHandler.startContentModel(name,
1061: modelType = DTDEventListener.CONTENT_MODEL_ANY);
1062: } else {
1063: modelType = getMixedOrChildren(name);
1064: }
1065:
1066: dtdHandler.endContentModel(name, modelType);
1067:
1068: maybeWhitespace();
1069: char c = getc();
1070: if (c != '>')
1071: fatal("P-036", new Object[] { name, new Character(c) });
1072: if (start != in)
1073: error("V-013", null);
1074:
1075: /// dtdHandler.elementDecl(element);
1076:
1077: return true;
1078: }
1079:
1080: // We're leaving the content model as a regular expression;
1081: // it's an efficient natural way to express such things, and
1082: // libraries often interpret them. No whitespace in the
1083: // model we store, though!
1084:
1085: /**
1086: * returns content model type.
1087: */
1088: private short getMixedOrChildren(String elementName/*Element element*/)
1089: throws IOException, SAXException {
1090:
1091: InputEntity start;
1092:
1093: // [47] children ::= (choice|seq) ('?'|'*'|'+')?
1094: strTmp = new StringBuffer();
1095:
1096: nextChar('(', "F-028", elementName);
1097: start = in;
1098: maybeWhitespace();
1099: strTmp.append('(');
1100:
1101: short modelType;
1102: if (peek("#PCDATA")) {
1103: strTmp.append("#PCDATA");
1104: dtdHandler.startContentModel(elementName,
1105: modelType = DTDEventListener.CONTENT_MODEL_MIXED);
1106: getMixed(elementName, start);
1107: } else {
1108: dtdHandler
1109: .startContentModel(
1110: elementName,
1111: modelType = DTDEventListener.CONTENT_MODEL_CHILDREN);
1112: getcps(elementName, start);
1113: }
1114:
1115: return modelType;
1116: }
1117:
1118: // '(' S? already consumed
1119: // matching ')' must be in "start" entity if validating
1120: private void getcps(/*Element element,*/String elementName,
1121: InputEntity start) throws IOException, SAXException {
1122:
1123: // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
1124: // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
1125: // [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')'
1126: boolean decided = false;
1127: char type = 0;
1128: // ContentModel retval, temp, current;
1129:
1130: // retval = temp = current = null;
1131:
1132: dtdHandler.startModelGroup();
1133:
1134: do {
1135: String tag;
1136:
1137: tag = maybeGetName();
1138: if (tag != null) {
1139: strTmp.append(tag);
1140: // temp = new ElementModel(tag);
1141: // getFrequency((RepeatableContent)temp);
1142: ///->
1143: dtdHandler.childElement(tag, getFrequency());
1144: ///<-
1145: } else if (peek("(")) {
1146: InputEntity next = in;
1147: strTmp.append('(');
1148: maybeWhitespace();
1149: // temp = getcps(element, next);
1150: // getFrequency(temp);
1151: ///->
1152: getcps(elementName, next);
1153: /// getFrequency(); <- this looks like a bug
1154: ///<-
1155: } else
1156: fatal((type == 0) ? "P-039" : ((type == ',') ? "P-037"
1157: : "P-038"),
1158: new Object[] { new Character(getc()) });
1159:
1160: maybeWhitespace();
1161: if (decided) {
1162: char c = getc();
1163:
1164: // if (current != null) {
1165: // current.addChild(temp);
1166: // }
1167: if (c == type) {
1168: strTmp.append(type);
1169: maybeWhitespace();
1170: reportConnector(type);
1171: continue;
1172: } else if (c == '\u0029') { // rparen
1173: ungetc();
1174: continue;
1175: } else {
1176: fatal((type == 0) ? "P-041" : "P-040",
1177: new Object[] { new Character(c),
1178: new Character(type) });
1179: }
1180: } else {
1181: type = getc();
1182: switch (type) {
1183: case '|':
1184: case ',':
1185: reportConnector(type);
1186: break;
1187: default:
1188: // retval = temp;
1189: ungetc();
1190: continue;
1191: }
1192: // retval = (ContentModel)current;
1193: decided = true;
1194: // current.addChild(temp);
1195: strTmp.append(type);
1196: }
1197: maybeWhitespace();
1198: } while (!peek(")"));
1199:
1200: if (in != start)
1201: error("V-014", new Object[] { elementName });
1202: strTmp.append(')');
1203:
1204: dtdHandler.endModelGroup(getFrequency());
1205: // return retval;
1206: }
1207:
1208: private void reportConnector(char type) throws SAXException {
1209: switch (type) {
1210: case '|':
1211: dtdHandler.connector(DTDEventListener.CHOICE); ///<-
1212: return;
1213: case ',':
1214: dtdHandler.connector(DTDEventListener.SEQUENCE); ///<-
1215: return;
1216: default:
1217: throw new Error(); //assertion failed.
1218: }
1219: }
1220:
1221: private short getFrequency() throws IOException, SAXException {
1222:
1223: final char c = getc();
1224:
1225: if (c == '?') {
1226: strTmp.append(c);
1227: return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
1228: // original.setRepeat(Repeat.ZERO_OR_ONE);
1229: } else if (c == '+') {
1230: strTmp.append(c);
1231: return DTDEventListener.OCCURENCE_ONE_OR_MORE;
1232: // original.setRepeat(Repeat.ONE_OR_MORE);
1233: } else if (c == '*') {
1234: strTmp.append(c);
1235: return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
1236: // original.setRepeat(Repeat.ZERO_OR_MORE);
1237: } else {
1238: ungetc();
1239: return DTDEventListener.OCCURENCE_ONCE;
1240: }
1241: }
1242:
1243: // '(' S? '#PCDATA' already consumed
1244: // matching ')' must be in "start" entity if validating
1245: private void getMixed(String elementName, /*Element element,*/
1246: InputEntity start) throws IOException, SAXException {
1247:
1248: // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
1249: // | '(' S? '#PCDATA' S? ')'
1250: maybeWhitespace();
1251: if (peek("\u0029*") || peek("\u0029")) {
1252: if (in != start)
1253: error("V-014", new Object[] { elementName });
1254: strTmp.append(')');
1255: // element.setContentModel(new StringModel(StringModelType.PCDATA));
1256: return;
1257: }
1258:
1259: ArrayList l = new ArrayList();
1260: // l.add(new StringModel(StringModelType.PCDATA));
1261:
1262: while (peek("|")) {
1263: String name;
1264:
1265: strTmp.append('|');
1266: maybeWhitespace();
1267:
1268: doLexicalPE = true;
1269: name = maybeGetName();
1270: if (name == null)
1271: fatal("P-042", new Object[] { elementName,
1272: Integer.toHexString(getc()) });
1273: if (l.contains(name)) {
1274: error("V-015", new Object[] { name });
1275: } else {
1276: l.add(name);
1277: dtdHandler.mixedElement(name);
1278: }
1279: strTmp.append(name);
1280: maybeWhitespace();
1281: }
1282:
1283: if (!peek("\u0029*")) // right paren
1284: fatal("P-043", new Object[] { elementName,
1285: new Character(getc()) });
1286: if (in != start)
1287: error("V-014", new Object[] { elementName });
1288: strTmp.append(')');
1289: // ChoiceModel cm = new ChoiceModel((Collection)l);
1290: // cm.setRepeat(Repeat.ZERO_OR_MORE);
1291: // element.setContentModel(cm);
1292: }
1293:
1294: private boolean maybeAttlistDecl() throws IOException, SAXException {
1295:
1296: // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
1297: InputEntity start = peekDeclaration("!ATTLIST");
1298:
1299: if (start == null)
1300: return false;
1301:
1302: String elementName = getMarkupDeclname("F-016", true);
1303: // Element element = (Element) elements.get (name);
1304:
1305: // if (element == null) {
1306: // // not yet declared -- no problem.
1307: // element = new Element(name);
1308: // elements.put(name, element);
1309: // }
1310:
1311: while (!peek(">")) {
1312:
1313: // [53] AttDef ::= S Name S AttType S DefaultDecl
1314: // [54] AttType ::= StringType | TokenizedType | EnumeratedType
1315:
1316: // look for global attribute definitions, don't expand for now...
1317: maybeWhitespace();
1318: char c = getc();
1319: if (c == '%') {
1320: String entityName = maybeGetName();
1321: if (entityName != null) {
1322: nextChar(';', "F-021", entityName);
1323: whitespace("F-021");
1324: continue;
1325: } else
1326: fatal("P-011");
1327: }
1328:
1329: ungetc();
1330: // look for attribute name otherwise
1331: String attName = maybeGetName();
1332: if (attName == null) {
1333: fatal("P-044", new Object[] { new Character(getc()) });
1334: }
1335: whitespace("F-001");
1336:
1337: /// Attribute a = new Attribute (name);
1338:
1339: String typeName;
1340: Vector values = null; // notation/enumeration values
1341:
1342: // Note: use the type constants from Attribute
1343: // so that "==" may be used (faster)
1344:
1345: // [55] StringType ::= 'CDATA'
1346: if (peek(TYPE_CDATA))
1347: /// a.setType(Attribute.CDATA);
1348: typeName = TYPE_CDATA;
1349:
1350: // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
1351: // | 'ENTITY' | 'ENTITIES'
1352: // | 'NMTOKEN' | 'NMTOKENS'
1353: // n.b. if "IDREFS" is there, both "ID" and "IDREF"
1354: // match peekahead ... so this order matters!
1355: else if (peek(TYPE_IDREFS))
1356: typeName = TYPE_IDREFS;
1357: else if (peek(TYPE_IDREF))
1358: typeName = TYPE_IDREF;
1359: else if (peek(TYPE_ID)) {
1360: typeName = TYPE_ID;
1361: // TODO: should implement this error check?
1362: /// if (element.id() != null) {
1363: /// error ("V-016", new Object [] { element.id() });
1364: /// } else
1365: /// element.setId(name);
1366: } else if (peek(TYPE_ENTITY))
1367: typeName = TYPE_ENTITY;
1368: else if (peek(TYPE_ENTITIES))
1369: typeName = TYPE_ENTITIES;
1370: else if (peek(TYPE_NMTOKENS))
1371: typeName = TYPE_NMTOKENS;
1372: else if (peek(TYPE_NMTOKEN))
1373: typeName = TYPE_NMTOKEN;
1374:
1375: // [57] EnumeratedType ::= NotationType | Enumeration
1376: // [58] NotationType ::= 'NOTATION' S '(' S? Name
1377: // (S? '|' S? Name)* S? ')'
1378: else if (peek(TYPE_NOTATION)) {
1379: typeName = TYPE_NOTATION;
1380: whitespace("F-002");
1381: nextChar('(', "F-029", null);
1382: maybeWhitespace();
1383:
1384: values = new Vector();
1385: do {
1386: String name;
1387: if ((name = maybeGetName()) == null)
1388: fatal("P-068");
1389: // permit deferred declarations
1390: if (notations.get(name) == null)
1391: notations.put(name, name);
1392: values.addElement(name);
1393: maybeWhitespace();
1394: if (peek("|"))
1395: maybeWhitespace();
1396: } while (!peek(")"));
1397: /// a.setValues(new String [v.size ()]);
1398: /// for (int i = 0; i < v.size (); i++)
1399: /// a.setValue(i, (String)v.elementAt(i));
1400:
1401: // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
1402: } else if (peek("(")) {
1403: /// a.setType(Attribute.ENUMERATION);
1404: typeName = TYPE_ENUMERATION;
1405:
1406: maybeWhitespace();
1407:
1408: /// Vector v = new Vector ();
1409: values = new Vector();
1410: do {
1411: String name = getNmtoken();
1412: /// v.addElement (name);
1413: values.addElement(name);
1414: maybeWhitespace();
1415: if (peek("|"))
1416: maybeWhitespace();
1417: } while (!peek(")"));
1418: /// a.setValues(new String [v.size ()]);
1419: /// for (int i = 0; i < v.size (); i++)
1420: /// a.setValue(i, (String)v.elementAt(i));
1421: } else {
1422: fatal("P-045", new Object[] { attName,
1423: new Character(getc()) });
1424: typeName = null;
1425: }
1426:
1427: short attributeUse;
1428: String defaultValue = null;
1429:
1430: // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
1431: // | (('#FIXED' S)? AttValue)
1432: whitespace("F-003");
1433: if (peek("#REQUIRED"))
1434: attributeUse = DTDEventListener.USE_REQUIRED;
1435: /// a.setIsRequired(true);
1436: else if (peek("#FIXED")) {
1437: /// if (a.type() == Attribute.ID)
1438: if (typeName == TYPE_ID)
1439: error("V-017", new Object[] { attName });
1440: /// a.setIsFixed(true);
1441: attributeUse = DTDEventListener.USE_FIXED;
1442: whitespace("F-004");
1443: parseLiteral(false);
1444: /// if (a.type() != Attribute.CDATA)
1445: /// a.setDefaultValue(normalize(false));
1446: /// else
1447: /// a.setDefaultValue(strTmp.toString());
1448:
1449: if (typeName == TYPE_CDATA)
1450: defaultValue = normalize(false);
1451: else
1452: defaultValue = strTmp.toString();
1453:
1454: // TODO: implement this check
1455: /// if (a.type() != Attribute.CDATA)
1456: /// validateAttributeSyntax (a, a.defaultValue());
1457: } else if (!peek("#IMPLIED")) {
1458: attributeUse = DTDEventListener.USE_IMPLIED;
1459:
1460: /// if (a.type() == Attribute.ID)
1461: if (typeName == TYPE_ID)
1462: error("V-018", new Object[] { attName });
1463: parseLiteral(false);
1464: /// if (a.type() != Attribute.CDATA)
1465: /// a.setDefaultValue(normalize(false));
1466: /// else
1467: /// a.setDefaultValue(strTmp.toString());
1468: if (typeName == TYPE_CDATA)
1469: defaultValue = normalize(false);
1470: else
1471: defaultValue = strTmp.toString();
1472:
1473: // TODO: implement this check
1474: /// if (a.type() != Attribute.CDATA)
1475: /// validateAttributeSyntax (a, a.defaultValue());
1476: } else {
1477: // TODO: this looks like an fatal error.
1478: attributeUse = DTDEventListener.USE_NORMAL;
1479: }
1480:
1481: if (XmlLang.equals(attName)
1482: && defaultValue/* a.defaultValue()*/!= null
1483: && !isXmlLang(defaultValue/*a.defaultValue()*/))
1484: error(
1485: "P-033",
1486: new Object[] { defaultValue /*a.defaultValue()*/});
1487:
1488: // TODO: isn't it an error to specify the same attribute twice?
1489: /// if (!element.attributes().contains(a)) {
1490: /// element.addAttribute(a);
1491: /// dtdHandler.attributeDecl(a);
1492: /// }
1493:
1494: String[] v = (values != null) ? (String[]) values
1495: .toArray(new String[0]) : null;
1496: dtdHandler.attributeDecl(elementName, attName, typeName, v,
1497: attributeUse, defaultValue);
1498: maybeWhitespace();
1499: }
1500: if (start != in)
1501: error("V-013", null);
1502: return true;
1503: }
1504:
1505: // used when parsing literal attribute values,
1506: // or public identifiers.
1507: //
1508: // input in strTmp
1509: private String normalize(boolean invalidIfNeeded) {
1510:
1511: // this can allocate an extra string...
1512:
1513: String s = strTmp.toString();
1514: String s2 = s.trim();
1515: boolean didStrip = false;
1516:
1517: if (s != s2) {
1518: s = s2;
1519: s2 = null;
1520: didStrip = true;
1521: }
1522: strTmp = new StringBuffer();
1523: for (int i = 0; i < s.length(); i++) {
1524: char c = s.charAt(i);
1525: if (!XmlChars.isSpace(c)) {
1526: strTmp.append(c);
1527: continue;
1528: }
1529: strTmp.append(' ');
1530: while (++i < s.length() && XmlChars.isSpace(s.charAt(i)))
1531: didStrip = true;
1532: i--;
1533: }
1534: if (didStrip)
1535: return strTmp.toString();
1536: else
1537: return s;
1538: }
1539:
1540: private boolean maybeConditionalSect() throws IOException,
1541: SAXException {
1542:
1543: // [61] conditionalSect ::= includeSect | ignoreSect
1544:
1545: if (!peek("<!["))
1546: return false;
1547:
1548: String keyword;
1549: InputEntity start = in;
1550:
1551: maybeWhitespace();
1552:
1553: if ((keyword = maybeGetName()) == null)
1554: fatal("P-046");
1555: maybeWhitespace();
1556: nextChar('[', "F-030", null);
1557:
1558: // [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
1559: // extSubsetDecl ']]>'
1560: if ("INCLUDE".equals(keyword)) {
1561: for (;;) {
1562: while (in.isEOF() && in != start)
1563: in = in.pop();
1564: if (in.isEOF()) {
1565: error("V-020", null);
1566: }
1567: if (peek("]]>"))
1568: break;
1569:
1570: doLexicalPE = false;
1571: if (maybeWhitespace())
1572: continue;
1573: if (maybePEReference())
1574: continue;
1575: doLexicalPE = true;
1576: if (maybeMarkupDecl() || maybeConditionalSect())
1577: continue;
1578:
1579: fatal("P-047");
1580: }
1581:
1582: // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
1583: // ignoreSectcontents ']]>'
1584: // [64] ignoreSectcontents ::= Ignore ('<!['
1585: // ignoreSectcontents ']]>' Ignore)*
1586: // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
1587: } else if ("IGNORE".equals(keyword)) {
1588: int nestlevel = 1;
1589: // ignoreSectcontents
1590: doLexicalPE = false;
1591: while (nestlevel > 0) {
1592: char c = getc(); // will pop input entities
1593: if (c == '<') {
1594: if (peek("!["))
1595: nestlevel++;
1596: } else if (c == ']') {
1597: if (peek("]>"))
1598: nestlevel--;
1599: } else
1600: continue;
1601: }
1602: } else
1603: fatal("P-048", new Object[] { keyword });
1604: return true;
1605: }
1606:
1607: //
1608: // CHAPTER 4: Physical Structures
1609: //
1610:
1611: // parse decimal or hex numeric character reference
1612: private int parseCharNumber() throws IOException, SAXException {
1613:
1614: char c;
1615: int retval = 0;
1616:
1617: // n.b. we ignore overflow ...
1618: if (getc() != 'x') {
1619: ungetc();
1620: for (;;) {
1621: c = getc();
1622: if (c >= '0' && c <= '9') {
1623: retval *= 10;
1624: retval += (c - '0');
1625: continue;
1626: }
1627: if (c == ';')
1628: return retval;
1629: fatal("P-049");
1630: }
1631: } else
1632: for (;;) {
1633: c = getc();
1634: if (c >= '0' && c <= '9') {
1635: retval <<= 4;
1636: retval += (c - '0');
1637: continue;
1638: }
1639: if (c >= 'a' && c <= 'f') {
1640: retval <<= 4;
1641: retval += 10 + (c - 'a');
1642: continue;
1643: }
1644: if (c >= 'A' && c <= 'F') {
1645: retval <<= 4;
1646: retval += 10 + (c - 'A');
1647: continue;
1648: }
1649: if (c == ';')
1650: return retval;
1651: fatal("P-050");
1652: }
1653: }
1654:
1655: // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
1656: // though still subject to the 'Char' construct in XML
1657: private int surrogatesToCharTmp(int ucs4) throws SAXException {
1658:
1659: if (ucs4 <= 0xffff) {
1660: if (XmlChars.isChar(ucs4)) {
1661: charTmp[0] = (char) ucs4;
1662: return 1;
1663: }
1664: } else if (ucs4 <= 0x0010ffff) {
1665: // we represent these as UNICODE surrogate pairs
1666: ucs4 -= 0x10000;
1667: charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
1668: charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff));
1669: return 2;
1670: }
1671: fatal("P-051", new Object[] { Integer.toHexString(ucs4) });
1672: // NOTREACHED
1673: return -1;
1674: }
1675:
1676: private boolean maybePEReference() throws IOException, SAXException {
1677:
1678: // This is the SYNTACTIC version of this construct.
1679: // When processing external entities, there is also
1680: // a LEXICAL version; see getc() and doLexicalPE.
1681:
1682: // [69] PEReference ::= '%' Name ';'
1683: if (!in.peekc('%'))
1684: return false;
1685:
1686: String name = maybeGetName();
1687: Object entity;
1688:
1689: if (name == null)
1690: fatal("P-011");
1691: nextChar(';', "F-021", name);
1692: entity = params.get(name);
1693:
1694: if (entity instanceof InternalEntity) {
1695: InternalEntity value = (InternalEntity) entity;
1696: pushReader(value.buf, name, false);
1697:
1698: } else if (entity instanceof ExternalEntity) {
1699: pushReader((ExternalEntity) entity);
1700: externalParameterEntity((ExternalEntity) entity);
1701:
1702: } else if (entity == null) {
1703: error("V-022", new Object[] { name });
1704: }
1705: return true;
1706: }
1707:
1708: private boolean maybeEntityDecl() throws IOException, SAXException {
1709:
1710: // [70] EntityDecl ::= GEDecl | PEDecl
1711: // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
1712: // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF S? '>'
1713: // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
1714: // [74] PEDef ::= EntityValue | ExternalID
1715: //
1716: InputEntity start = peekDeclaration("!ENTITY");
1717:
1718: if (start == null)
1719: return false;
1720:
1721: String entityName;
1722: SimpleHashtable defns;
1723: ExternalEntity externalId;
1724: boolean doStore;
1725:
1726: // PE expansion gets selectively turned off several places:
1727: // in ENTITY declarations (here), in comments, in PIs.
1728:
1729: // Here, we allow PE entities to be declared, and allows
1730: // literals to include PE refs without the added spaces
1731: // required with their expansion in markup decls.
1732:
1733: doLexicalPE = false;
1734: whitespace("F-005");
1735: if (in.peekc('%')) {
1736: whitespace("F-006");
1737: defns = params;
1738: } else
1739: defns = entities;
1740:
1741: ungetc(); // leave some whitespace
1742: doLexicalPE = true;
1743: entityName = getMarkupDeclname("F-017", false);
1744: whitespace("F-007");
1745: externalId = maybeExternalID();
1746:
1747: //
1748: // first definition sticks ... e.g. internal subset PEs are used
1749: // to override DTD defaults. It's also an "error" to incorrectly
1750: // redefine builtin internal entities, but since reporting such
1751: // errors is optional we only give warnings ("just in case") for
1752: // non-parameter entities.
1753: //
1754: doStore = (defns.get(entityName) == null);
1755: if (!doStore && defns == entities)
1756: warning("P-054", new Object[] { entityName });
1757:
1758: // internal entities
1759: if (externalId == null) {
1760: char value[];
1761: InternalEntity entity;
1762:
1763: doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd"
1764: parseLiteral(true);
1765: doLexicalPE = true;
1766: if (doStore) {
1767: value = new char[strTmp.length()];
1768: if (value.length != 0)
1769: strTmp.getChars(0, value.length, value, 0);
1770: entity = new InternalEntity(entityName, value);
1771: entity.isPE = (defns == params);
1772: entity.isFromInternalSubset = false;
1773: defns.put(entityName, entity);
1774: if (defns == entities)
1775: dtdHandler.internalGeneralEntityDecl(entityName,
1776: new String(value));
1777: }
1778:
1779: // external entities (including unparsed)
1780: } else {
1781: // [76] NDataDecl ::= S 'NDATA' S Name
1782: if (defns == entities && maybeWhitespace() && peek("NDATA")) {
1783: externalId.notation = getMarkupDeclname("F-018", false);
1784:
1785: // flag undeclared notation for checking after
1786: // the DTD is fully processed
1787: if (notations.get(externalId.notation) == null)
1788: notations.put(externalId.notation, Boolean.TRUE);
1789: }
1790: externalId.name = entityName;
1791: externalId.isPE = (defns == params);
1792: externalId.isFromInternalSubset = false;
1793: if (doStore) {
1794: defns.put(entityName, externalId);
1795: if (externalId.notation != null)
1796: dtdHandler.unparsedEntityDecl(entityName,
1797: externalId.publicId, externalId.systemId,
1798: externalId.notation);
1799: else if (defns == entities)
1800: dtdHandler.externalGeneralEntityDecl(entityName,
1801: externalId.publicId, externalId.systemId);
1802: }
1803: }
1804: maybeWhitespace();
1805: nextChar('>', "F-031", entityName);
1806: if (start != in)
1807: error("V-013", null);
1808: return true;
1809: }
1810:
1811: private ExternalEntity maybeExternalID() throws IOException,
1812: SAXException {
1813:
1814: // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1815: // | 'PUBLIC' S' PubidLiteral S Systemliteral
1816: String temp = null;
1817: ExternalEntity retval;
1818:
1819: if (peek("PUBLIC")) {
1820: whitespace("F-009");
1821: temp = parsePublicId();
1822: } else if (!peek("SYSTEM"))
1823: return null;
1824:
1825: retval = new ExternalEntity(in);
1826: retval.publicId = temp;
1827: whitespace("F-008");
1828: retval.systemId = parseSystemId();
1829: return retval;
1830: }
1831:
1832: private String parseSystemId() throws IOException, SAXException {
1833:
1834: String uri = getQuotedString("F-034", null);
1835: int temp = uri.indexOf(':');
1836:
1837: // resolve relative URIs ... must do it here since
1838: // it's relative to the source file holding the URI!
1839:
1840: // "new java.net.URL (URL, string)" conforms to RFC 1630,
1841: // but we can't use that except when the URI is a URL.
1842: // The entity resolver is allowed to handle URIs that are
1843: // not URLs, so we pass URIs through with scheme intact
1844: if (temp == -1 || uri.indexOf('/') < temp) {
1845: String baseURI;
1846:
1847: baseURI = in.getSystemId();
1848: if (baseURI == null)
1849: fatal("P-055", new Object[] { uri });
1850: if (uri.length() == 0)
1851: uri = ".";
1852: baseURI = baseURI
1853: .substring(0, baseURI.lastIndexOf('/') + 1);
1854: if (uri.charAt(0) != '/')
1855: uri = baseURI + uri;
1856: else {
1857: // XXX slashes at the beginning of a relative URI are
1858: // a special case we don't handle.
1859: throw new InternalError();
1860: }
1861:
1862: // letting other code map any "/xxx/../" or "/./" to "/",
1863: // since all URIs must handle it the same.
1864: }
1865: // check for fragment ID in URI
1866: if (uri.indexOf('#') != -1)
1867: error("P-056", new Object[] { uri });
1868: return uri;
1869: }
1870:
1871: private void maybeTextDecl() throws IOException, SAXException {
1872:
1873: // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
1874: if (peek("<?xml")) {
1875: readVersion(false, "1.0");
1876: readEncoding(true);
1877: maybeWhitespace();
1878: if (!peek("?>"))
1879: fatal("P-057");
1880: }
1881: }
1882:
1883: private void externalParameterEntity(ExternalEntity next)
1884: throws IOException, SAXException {
1885:
1886: //
1887: // Reap the intended benefits of standalone declarations:
1888: // don't deal with external parameter entities, except to
1889: // validate the standalone declaration.
1890: //
1891:
1892: // n.b. "in external parameter entities" (and external
1893: // DTD subset, same grammar) parameter references can
1894: // occur "within" markup declarations ... expansions can
1895: // cross syntax rules. Flagged here; affects getc().
1896:
1897: // [79] ExtPE ::= TextDecl? extSubsetDecl
1898: // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
1899: // | PEReference | S )*
1900: InputEntity pe;
1901:
1902: // XXX if this returns false ...
1903:
1904: pe = in;
1905: maybeTextDecl();
1906: while (!pe.isEOF()) {
1907: // pop internal PEs (and whitespace before/after)
1908: if (in.isEOF()) {
1909: in = in.pop();
1910: continue;
1911: }
1912: doLexicalPE = false;
1913: if (maybeWhitespace())
1914: continue;
1915: if (maybePEReference())
1916: continue;
1917: doLexicalPE = true;
1918: if (maybeMarkupDecl() || maybeConditionalSect())
1919: continue;
1920: break;
1921: }
1922: // if (in != pe) throw new InternalError("who popped my PE?");
1923: if (!pe.isEOF())
1924: fatal("P-059", new Object[] { in.getName() });
1925: }
1926:
1927: private void readEncoding(boolean must) throws IOException,
1928: SAXException {
1929:
1930: // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1931: String name = maybeReadAttribute("encoding", must);
1932:
1933: if (name == null)
1934: return;
1935: for (int i = 0; i < name.length(); i++) {
1936: char c = name.charAt(i);
1937: if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
1938: continue;
1939: if (i != 0
1940: && ((c >= '0' && c <= '9') || c == '-' || c == '_' || c == '.'))
1941: continue;
1942: fatal("P-060", new Object[] { new Character(c) });
1943: }
1944:
1945: //
1946: // This should be the encoding in use, and it's even an error for
1947: // it to be anything else (in certain cases that are impractical to
1948: // to test, and may even be insufficient). So, we do the best we
1949: // can, and warn if things look suspicious. Note that Java doesn't
1950: // uniformly expose the encodings, and that the names it uses
1951: // internally are nonstandard. Also, that the XML spec allows
1952: // such "errors" not to be reported at all.
1953: //
1954: String currentEncoding = in.getEncoding();
1955:
1956: if (currentEncoding != null
1957: && !name.equalsIgnoreCase(currentEncoding))
1958: warning("P-061", new Object[] { name, currentEncoding });
1959: }
1960:
1961: private boolean maybeNotationDecl() throws IOException,
1962: SAXException {
1963:
1964: // [82] NotationDecl ::= '<!NOTATION' S Name S
1965: // (ExternalID | PublicID) S? '>'
1966: // [83] PublicID ::= 'PUBLIC' S PubidLiteral
1967: InputEntity start = peekDeclaration("!NOTATION");
1968:
1969: if (start == null)
1970: return false;
1971:
1972: String name = getMarkupDeclname("F-019", false);
1973: ExternalEntity entity = new ExternalEntity(in);
1974:
1975: whitespace("F-011");
1976: if (peek("PUBLIC")) {
1977: whitespace("F-009");
1978: entity.publicId = parsePublicId();
1979: if (maybeWhitespace()) {
1980: if (!peek(">"))
1981: entity.systemId = parseSystemId();
1982: else
1983: ungetc();
1984: }
1985: } else if (peek("SYSTEM")) {
1986: whitespace("F-008");
1987: entity.systemId = parseSystemId();
1988: } else
1989: fatal("P-062");
1990: maybeWhitespace();
1991: nextChar('>', "F-032", name);
1992: if (start != in)
1993: error("V-013", null);
1994: if (entity.systemId != null
1995: && entity.systemId.indexOf('#') != -1)
1996: error("P-056", new Object[] { entity.systemId });
1997:
1998: Object value = notations.get(name);
1999: if (value != null && value instanceof ExternalEntity)
2000: warning("P-063", new Object[] { name });
2001:
2002: else {
2003: notations.put(name, entity);
2004: dtdHandler.notationDecl(name, entity.publicId,
2005: entity.systemId);
2006: }
2007: return true;
2008: }
2009:
2010: ////////////////////////////////////////////////////////////////
2011: //
2012: // UTILITIES
2013: //
2014: ////////////////////////////////////////////////////////////////
2015:
2016: private char getc() throws IOException, SAXException {
2017:
2018: if (!doLexicalPE) {
2019: char c = in.getc();
2020: return c;
2021: }
2022:
2023: //
2024: // External parameter entities get funky processing of '%param;'
2025: // references. It's not clearly defined in the XML spec; but it
2026: // boils down to having those refs be _lexical_ in most cases to
2027: // include partial syntax productions. It also needs selective
2028: // enabling; "<!ENTITY % foo ...>" must work, for example, and
2029: // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
2030: // if it's expanded in a literal, else "ab cd". PEs also do
2031: // not expand within comments or PIs, and external PEs are only
2032: // allowed to have markup decls (and so aren't handled lexically).
2033: //
2034: // This PE handling should be merged into maybeWhitespace, where
2035: // it can be dealt with more consistently.
2036: //
2037: // Also, there are some validity constraints in this area.
2038: //
2039: char c;
2040:
2041: while (in.isEOF()) {
2042: if (in.isInternal() || (doLexicalPE && !in.isDocument()))
2043: in = in.pop();
2044: else {
2045: fatal("P-064", new Object[] { in.getName() });
2046: }
2047: }
2048: if ((c = in.getc()) == '%' && doLexicalPE) {
2049: // PE ref ::= '%' name ';'
2050: String name = maybeGetName();
2051: Object entity;
2052:
2053: if (name == null)
2054: fatal("P-011");
2055: nextChar(';', "F-021", name);
2056: entity = params.get(name);
2057:
2058: // push a magic "entity" before and after the
2059: // real one, so ungetc() behaves uniformly
2060: pushReader(" ".toCharArray(), null, false);
2061: if (entity instanceof InternalEntity)
2062: pushReader(((InternalEntity) entity).buf, name, false);
2063: else if (entity instanceof ExternalEntity)
2064: // PEs can't be unparsed!
2065: // XXX if this returns false ...
2066: pushReader((ExternalEntity) entity);
2067: else if (entity == null)
2068: // see note in maybePEReference re making this be nonfatal.
2069: fatal("V-022");
2070: else
2071: throw new InternalError();
2072: pushReader(" ".toCharArray(), null, false);
2073: return in.getc();
2074: }
2075: return c;
2076: }
2077:
2078: private void ungetc() {
2079:
2080: in.ungetc();
2081: }
2082:
2083: private boolean peek(String s) throws IOException, SAXException {
2084:
2085: return in.peek(s, null);
2086: }
2087:
2088: // Return the entity starting the specified declaration
2089: // (for validating declaration nesting) else null.
2090:
2091: private InputEntity peekDeclaration(String s) throws IOException,
2092: SAXException {
2093:
2094: InputEntity start;
2095:
2096: if (!in.peekc('<'))
2097: return null;
2098: start = in;
2099: if (in.peek(s, null))
2100: return start;
2101: in.ungetc();
2102: return null;
2103: }
2104:
2105: private void nextChar(char c, String location, String near)
2106: throws IOException, SAXException {
2107:
2108: while (in.isEOF() && !in.isDocument())
2109: in = in.pop();
2110: if (!in.peekc(c))
2111: fatal("P-008", new Object[] { new Character(c),
2112: messages.getMessage(locale, location),
2113: (near == null ? "" : ('"' + near + '"')) });
2114: }
2115:
2116: private void pushReader(char buf[], String name, boolean isGeneral)
2117: throws SAXException {
2118:
2119: InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2120: r.init(buf, name, in, !isGeneral);
2121: in = r;
2122: }
2123:
2124: private boolean pushReader(ExternalEntity next) throws IOException,
2125: SAXException {
2126:
2127: InputEntity r = InputEntity.getInputEntity(dtdHandler, locale);
2128: InputSource s;
2129: try {
2130: s = next.getInputSource(resolver);
2131: } catch (IOException e) {
2132: String msg = "unable to open the external entity from :"
2133: + next.systemId;
2134: if (next.publicId != null)
2135: msg += " (public id:" + next.publicId + ")";
2136:
2137: SAXParseException spe = new SAXParseException(msg,
2138: getPublicId(), getSystemId(), getLineNumber(),
2139: getColumnNumber(), e);
2140: dtdHandler.fatalError(spe);
2141: throw e;
2142: }
2143:
2144: r.init(s, next.name, in, next.isPE);
2145: in = r;
2146: return true;
2147: }
2148:
2149: public String getPublicId() {
2150:
2151: return (in == null) ? null : in.getPublicId();
2152: }
2153:
2154: public String getSystemId() {
2155:
2156: return (in == null) ? null : in.getSystemId();
2157: }
2158:
2159: public int getLineNumber() {
2160:
2161: return (in == null) ? -1 : in.getLineNumber();
2162: }
2163:
2164: public int getColumnNumber() {
2165:
2166: return (in == null) ? -1 : in.getColumnNumber();
2167: }
2168:
2169: // error handling convenience routines
2170:
2171: private void warning(String messageId, Object parameters[])
2172: throws SAXException {
2173:
2174: SAXParseException e = new SAXParseException(messages
2175: .getMessage(locale, messageId, parameters),
2176: getPublicId(), getSystemId(), getLineNumber(),
2177: getColumnNumber());
2178:
2179: dtdHandler.warning(e);
2180: }
2181:
2182: void error(String messageId, Object parameters[])
2183: throws SAXException {
2184:
2185: SAXParseException e = new SAXParseException(messages
2186: .getMessage(locale, messageId, parameters),
2187: getPublicId(), getSystemId(), getLineNumber(),
2188: getColumnNumber());
2189:
2190: dtdHandler.error(e);
2191: }
2192:
2193: private void fatal(String messageId) throws SAXException {
2194:
2195: fatal(messageId, null);
2196: }
2197:
2198: private void fatal(String messageId, Object parameters[])
2199: throws SAXException {
2200:
2201: SAXParseException e = new SAXParseException(messages
2202: .getMessage(locale, messageId, parameters),
2203: getPublicId(), getSystemId(), getLineNumber(),
2204: getColumnNumber());
2205:
2206: dtdHandler.fatalError(e);
2207:
2208: throw e;
2209: }
2210:
2211: //
2212: // Map char arrays to strings ... cuts down both on memory and
2213: // CPU usage for element/attribute/other names that are reused.
2214: //
2215: // Documents typically repeat names a lot, so we more or less
2216: // intern all the strings within the document; since some strings
2217: // are repeated in multiple documents (e.g. stylesheets) we go
2218: // a bit further, and intern globally.
2219: //
2220: static class NameCache {
2221: //
2222: // Unless we auto-grow this, the default size should be a
2223: // reasonable bit larger than needed for most XML files
2224: // we've yet seen (and be prime). If it's too small, the
2225: // penalty is just excess cache collisions.
2226: //
2227: NameCacheEntry hashtable[] = new NameCacheEntry[541];
2228:
2229: //
2230: // Usually we just want to get the 'symbol' for these chars
2231: //
2232: String lookup(char value[], int len) {
2233:
2234: return lookupEntry(value, len).name;
2235: }
2236:
2237: //
2238: // Sometimes we need to scan the chars in the resulting
2239: // string, so there's an accessor which exposes them.
2240: // (Mostly for element end tags.)
2241: //
2242: NameCacheEntry lookupEntry(char value[], int len) {
2243:
2244: int index = 0;
2245: NameCacheEntry entry;
2246:
2247: // hashing to get index
2248: for (int i = 0; i < len; i++)
2249: index = index * 31 + value[i];
2250: index &= 0x7fffffff;
2251: index %= hashtable.length;
2252:
2253: // return entry if one's there ...
2254: for (entry = hashtable[index]; entry != null; entry = entry.next) {
2255: if (entry.matches(value, len))
2256: return entry;
2257: }
2258:
2259: // else create new one
2260: entry = new NameCacheEntry();
2261: entry.chars = new char[len];
2262: System.arraycopy(value, 0, entry.chars, 0, len);
2263: entry.name = new String(entry.chars);
2264: //
2265: // NOTE: JDK 1.1 has a fixed size string intern table,
2266: // with non-GC'd entries. It can panic here; that's a
2267: // JDK problem, use 1.2 or later with many identifiers.
2268: //
2269: entry.name = entry.name.intern(); // "global" intern
2270: entry.next = hashtable[index];
2271: hashtable[index] = entry;
2272: return entry;
2273: }
2274: }
2275:
2276: static class NameCacheEntry {
2277:
2278: String name;
2279: char chars[];
2280: NameCacheEntry next;
2281:
2282: boolean matches(char value[], int len) {
2283:
2284: if (chars.length != len)
2285: return false;
2286: for (int i = 0; i < len; i++)
2287: if (value[i] != chars[i])
2288: return false;
2289: return true;
2290: }
2291: }
2292:
2293: //
2294: // Message catalog for diagnostics.
2295: //
2296: static final Catalog messages = new Catalog();
2297:
2298: static final class Catalog extends MessageCatalog {
2299:
2300: Catalog() {
2301: super (DTDParser.class);
2302: }
2303: }
2304:
2305: }
|