0001: /*
0002: * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
0003: *
0004: * This file is part of Resin(R) Open Source
0005: *
0006: * Each copy or derived work must preserve the copyright notice and this
0007: * notice unmodified.
0008: *
0009: * Resin Open Source is free software; you can redistribute it and/or modify
0010: * it under the terms of the GNU General Public License as published by
0011: * the Free Software Foundation; either version 2 of the License, or
0012: * (at your option) any later version.
0013: *
0014: * Resin Open Source is distributed in the hope that it will be useful,
0015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
0017: * of NON-INFRINGEMENT. See the GNU General Public License for more
0018: * details.
0019: *
0020: * You should have received a copy of the GNU General Public License
0021: * along with Resin Open Source; if not, write to the
0022: *
0023: * Free Software Foundation, Inc.
0024: * 59 Temple Place, Suite 330
0025: * Boston, MA 02111-1307 USA
0026: *
0027: * @author Scott Ferguson
0028: */
0029:
0030: package com.caucho.xml2;
0031:
0032: import com.caucho.util.*;
0033: import com.caucho.vfs.Path;
0034: import com.caucho.vfs.ReadStream;
0035: import com.caucho.vfs.ReaderWriterStream;
0036: import com.caucho.vfs.Vfs;
0037: import com.caucho.vfs.WriteStream;
0038: import com.caucho.xml2.readers.MacroReader;
0039: import com.caucho.xml2.readers.Utf16Reader;
0040: import com.caucho.xml2.readers.Utf8Reader;
0041: import com.caucho.xml2.readers.XmlReader;
0042:
0043: import org.w3c.dom.Document;
0044: import org.w3c.dom.Node;
0045: import org.xml.sax.InputSource;
0046: import org.xml.sax.Locator;
0047: import org.xml.sax.SAXException;
0048: import org.xml.sax.SAXParseException;
0049:
0050: import javax.xml.namespace.QName;
0051: import java.io.FileNotFoundException;
0052: import java.io.IOException;
0053: import java.io.InputStream;
0054: import java.util.*;
0055: import java.util.logging.Level;
0056:
0057: /**
0058: * A configurable XML parser. Loose versions of XML and HTML are supported
0059: * by changing the Policy object.
0060: *
0061: * <p>Normally, applications will use Xml, LooseXml, Html, or LooseHtml.
0062: */
0063: public class DtdParser {
0064: private static final L10N L = new L10N(DtdParser.class);
0065:
0066: static HashMap<String, String> _attrTypes = new HashMap<String, String>();
0067: static Entities _entities = new XmlEntities();
0068:
0069: private XmlParser _xmlParser;
0070:
0071: QAttributes _attributes;
0072: QAttributes _nullAttributes;
0073:
0074: boolean _inDtd;
0075: boolean _strictComments = true;
0076:
0077: CharBuffer _text;
0078: CharBuffer _eltName;
0079: CharBuffer _cb;
0080: CharBuffer _buf = new CharBuffer();
0081: String _textFilename;
0082: int _textLine;
0083:
0084: char[] _textBuffer = new char[1024];
0085: int _textLength;
0086: int _textCapacity = _textBuffer.length;
0087: boolean _isIgnorableWhitespace;
0088: boolean _isJspText;
0089:
0090: CharBuffer _name = new CharBuffer();
0091: CharBuffer _nameBuffer = new CharBuffer();
0092:
0093: MacroReader _macro = new MacroReader();
0094: int _macroIndex = 0;
0095: int _macroLength = 0;
0096: char[] _macroBuffer;
0097:
0098: QName[] _elementNames = new QName[64];
0099: NamespaceMap[] _namespaces = new NamespaceMap[64];
0100: int[] _elementLines = new int[64];
0101: int _elementTop;
0102:
0103: NamespaceMap _namespaceMap;
0104:
0105: ArrayList<String> _attrNames = new ArrayList<String>();
0106: ArrayList<String> _attrValues = new ArrayList<String>();
0107:
0108: ReadStream _is;
0109: XmlReader _reader;
0110:
0111: String _extPublicId;
0112: String _extSystemId;
0113:
0114: QName _activeNode;
0115: QName _topNamespaceNode;
0116: boolean _isTagStart;
0117: boolean _stopOnIncludeEnd;
0118: boolean _hasTopElement;
0119: boolean _hasDoctype;
0120: QDocumentType _dtd;
0121:
0122: public DtdParser(XmlParser xmlParser, QDocumentType dtd) {
0123: _xmlParser = xmlParser;
0124: _dtd = dtd;
0125: }
0126:
0127: /**
0128: * Parses the DTD.
0129: *
0130: * <pre>
0131: * dtd-item ::= <!ELEMENT ... |
0132: * <!ATTLIST ... |
0133: * <!NOTATION ... |
0134: * <!ENTITY ... |
0135: * <!-- comment |
0136: * <? pi |
0137: * %pe-ref;
0138: * </pre>
0139: *
0140: * @return the next character.
0141: */
0142: int parseDoctypeDecl(QDocumentType doctype) throws IOException,
0143: SAXException {
0144: _hasDoctype = true;
0145: int ch = 0;
0146:
0147: for (ch = _xmlParser.skipWhitespace(read()); ch >= 0
0148: && ch != ']'; ch = _xmlParser.skipWhitespace(read())) {
0149: if (ch == '<') {
0150: if ((ch = read()) == '!') {
0151: if (XmlChar.isNameStart(ch = read())) {
0152: ch = _xmlParser.parseName(_text, ch);
0153: String name = _text.toString();
0154:
0155: if (name.equals("ELEMENT"))
0156: parseElementDecl(doctype);
0157: else if (name.equals("ATTLIST"))
0158: parseAttlistDecl(doctype);
0159: else if (name.equals("NOTATION"))
0160: parseNotationDecl(doctype);
0161: else if (name.equals("ENTITY"))
0162: parseEntityDecl(doctype);
0163: else
0164: throw error("unknown declaration '" + name
0165: + "'");
0166: } else if (ch == '-')
0167: parseComment();
0168: else if (ch == '[') {
0169: ch = _xmlParser.parseName(_text, read());
0170: String name = _text.toString();
0171:
0172: if (name.equals("IGNORE")) {
0173: parseIgnore();
0174: } else if (name.equals("INCLUDE")) {
0175: parseIgnore();
0176: } else
0177: throw error(L.l(
0178: "unknown declaration '{0}'", name));
0179: }
0180: } else if (ch == '?') {
0181: parsePI();
0182: } else
0183: throw error(L.l("expected markup at {0}",
0184: badChar(ch)));
0185: } else if (ch == '%') {
0186: ch = _xmlParser.parseName(_buf, read());
0187:
0188: if (ch != ';')
0189: throw error(L
0190: .l(
0191: "'%{0};' expects ';' at {1}. Parameter entities have a '%name;' syntax.",
0192: _buf, badChar(ch)));
0193:
0194: addPEReference(_text, _buf.toString());
0195: } else {
0196: throw error(L.l("expected '<' at {0}", badChar(ch)));
0197: }
0198:
0199: _text.clear();
0200: }
0201: _text.clear();
0202:
0203: return read();
0204: }
0205:
0206: private int parseNameToken(CharBuffer name, int ch)
0207: throws IOException, SAXException {
0208: name.clear();
0209:
0210: if (!XmlChar.isNameChar(ch))
0211: throw error(L.l("expected name at {0}", badChar(ch)));
0212:
0213: for (; XmlChar.isNameChar(ch); ch = read())
0214: name.append((char) ch);
0215:
0216: return ch;
0217: }
0218:
0219: private void appendText(String s) {
0220: if (_text.length() == 0) {
0221: _textFilename = getFilename();
0222: _textLine = getLine();
0223: }
0224:
0225: _text.append(s);
0226: }
0227:
0228: private int parseCharacterReference() throws IOException,
0229: SAXException {
0230: int ch = read();
0231:
0232: int radix = 10;
0233: if (ch == 'x') {
0234: radix = 16;
0235: ch = read();
0236: }
0237:
0238: int value = 0;
0239: for (; ch != ';'; ch = read()) {
0240: if (ch >= '0' && ch <= '9')
0241: value = radix * value + ch - '0';
0242: else if (radix == 16 && ch >= 'a' && ch <= 'f')
0243: value = radix * value + ch - 'a' + 10;
0244: else if (radix == 16 && ch >= 'A' && ch <= 'F')
0245: value = radix * value + ch - 'A' + 10;
0246: else
0247: throw error(L.l("malformed entity ref at {0}",
0248: badChar(ch)));
0249: }
0250:
0251: if (value > 0xffff)
0252: throw error(L.l("malformed entity ref at {0}", "" + value));
0253:
0254: // xml/0072
0255: if (!isChar(value))
0256: throw error(L.l("illegal character ref at {0}",
0257: badChar(value)));
0258:
0259: return value;
0260: }
0261:
0262: /**
0263: * Parses an attribute value.
0264: *
0265: * <pre>
0266: * value ::= '[^']*'
0267: * ::= "[^"]*"
0268: * ::= [^ />]*
0269: * </pre>
0270: *
0271: * @param value the CharBuffer which will contain the value.
0272: * @param ch the next character from the input stream.
0273: * @param isGeneral true if general entities are allowed.
0274: *
0275: * @return the following character from the input stream
0276: */
0277: private int parseValue(CharBuffer value, int ch, boolean isGeneral)
0278: throws IOException, SAXException {
0279: int end = ch;
0280:
0281: value.clear();
0282:
0283: if (end == '\'' || end == '"')
0284: ch = read();
0285: else {
0286: value.append((char) end);
0287: for (ch = read(); ch >= 0 && XmlChar.isNameChar(ch); ch = read())
0288: value.append((char) ch);
0289:
0290: throw error(L
0291: .l(
0292: "XML attribute value must be quoted at '{0}'. XML attribute syntax is either attr=\"value\" or attr='value'.",
0293: value));
0294: }
0295:
0296: while (ch != -1
0297: && (end != 0 && ch != end || end == 0
0298: && isAttributeChar(ch))) {
0299: if (end == 0 && ch == '/') {
0300: ch = read();
0301: if (!isWhitespace(ch) && ch != '>') {
0302: value.append('/');
0303: value.append((char) ch);
0304: } else {
0305: unread(ch);
0306: return '/';
0307: }
0308: } else if (ch == '&') {
0309: if ((ch = read()) == '#')
0310: value.append((char) parseCharacterReference());
0311: else if (!isGeneral) {
0312: value.append('&');
0313: value.append((char) ch);
0314: } else if (XmlChar.isNameStart(ch)) {
0315: ch = _xmlParser.parseName(_buf, ch);
0316: String name = _buf.toString();
0317:
0318: if (ch != ';')
0319: throw error(L.l("expected '{0}' at {1}", ";",
0320: badChar(ch)));
0321: else {
0322: int lookup = _entities.getEntity(name);
0323:
0324: if (lookup >= 0 && lookup <= 0xffff) {
0325: ch = read();
0326: value.append((char) lookup);
0327: continue;
0328: }
0329:
0330: QEntity entity = _dtd == null ? null : _dtd
0331: .getEntity(name);
0332: if (entity != null && entity._value != null)
0333: _xmlParser.setMacroAttr(entity._value);
0334: else
0335: throw error(L
0336: .l(
0337: "expected local reference at '&{0};'",
0338: name));
0339: }
0340: }
0341: } else if (ch == '%' && !isGeneral) {
0342: ch = read();
0343:
0344: if (!XmlChar.isNameStart(ch)) {
0345: value.append('%');
0346: continue;
0347: } else {
0348: ch = _xmlParser.parseName(_buf, ch);
0349:
0350: if (ch != ';')
0351: throw error(L.l("expected '{0}' at {1}", ";",
0352: badChar(ch)));
0353: else
0354: addPEReference(value, _buf.toString());
0355: }
0356: } else if (isGeneral) {
0357: if (ch == '\r') {
0358: ch = read();
0359: if (ch != '\n') {
0360: value.append('\n');
0361: continue;
0362: }
0363: }
0364: value.append((char) ch);
0365: } else if (ch == '\r') {
0366: value.append(' ');
0367:
0368: if ((ch = read()) != '\n')
0369: continue;
0370: } else if (ch == '\n')
0371: value.append(' ');
0372: else
0373: value.append((char) ch);
0374:
0375: ch = read();
0376: }
0377:
0378: if (end != 0)
0379: ch = read();
0380:
0381: return ch;
0382: }
0383:
0384: private boolean isAttributeChar(int ch) {
0385: switch (ch) {
0386: case ' ':
0387: case '\t':
0388: case '\n':
0389: case '\r':
0390: return false;
0391: case '<':
0392: case '>':
0393: case '\'':
0394: case '"':
0395: case '=':
0396: return false;
0397: default:
0398: return true;
0399: }
0400: }
0401:
0402: private int parsePI() throws IOException, SAXException {
0403: int ch;
0404:
0405: ch = read();
0406: if (!XmlChar.isNameStart(ch))
0407: throw error(L
0408: .l(
0409: "expected name after '<?' at {0}. Processing instructions expect a name like <?foo ... ?>",
0410: badChar(ch)));
0411: ch = _xmlParser.parseName(_text, ch);
0412:
0413: String piName = _text.toString();
0414: if (!piName.equals("xml"))
0415: return parsePITail(piName, ch);
0416: else {
0417: throw error(L
0418: .l("<?xml ... ?> occurs after content. The <?xml ... ?> prolog must be at the document start."));
0419:
0420: }
0421: }
0422:
0423: private int parsePITail(String piName, int ch) throws IOException,
0424: SAXException {
0425: ch = _xmlParser.skipWhitespace(ch);
0426:
0427: _text.clear();
0428: while (ch != -1) {
0429: if (ch == '?') {
0430: if ((ch = read()) == '>')
0431: break;
0432: else
0433: _text.append('?');
0434: } else {
0435: _text.append((char) ch);
0436: ch = read();
0437: }
0438: }
0439:
0440: QProcessingInstruction pi;
0441: pi = new QProcessingInstruction(piName, _text.toString());
0442: pi._owner = _dtd._owner;
0443: _dtd.appendChild(pi);
0444:
0445: return read();
0446: }
0447:
0448: /**
0449: * Parses a comment. The "<!--" has already been read.
0450: */
0451: private void parseComment() throws IOException, SAXException {
0452: int ch = read();
0453:
0454: if (ch != '-')
0455: throw error(L.l("expected comment at {0}", badChar(ch)));
0456:
0457: ch = read();
0458:
0459: comment: while (ch != -1) {
0460: if (ch == '-') {
0461: ch = read();
0462:
0463: while (ch == '-') {
0464: if ((ch = read()) == '>')
0465: break comment;
0466: else if (_strictComments)
0467: throw error(L.l("XML forbids '--' in comments"));
0468: else if (ch == '-') {
0469: } else {
0470: break;
0471: }
0472: }
0473: } else if (!XmlChar.isChar(ch)) {
0474: throw error(L.l("bad character {0}", hex(ch)));
0475: } else {
0476: ch = read();
0477: }
0478: }
0479:
0480: QComment comment = new QComment(_buf.toString());
0481: comment._owner = _dtd._owner;
0482: _dtd.appendChild(comment);
0483: }
0484:
0485: /**
0486: * Ignores content to the ']]>'
0487: */
0488: private void parseIgnore() throws IOException, SAXException {
0489: int ch = read();
0490:
0491: while (ch >= 0) {
0492: if (ch != ']') {
0493: ch = read();
0494: } else if ((ch = read()) != ']') {
0495: } else if ((ch = read()) == '>')
0496: return;
0497: }
0498: }
0499:
0500: private int parseContentSpec(QElementDef def, int ch)
0501: throws IOException, SAXException {
0502: ch = expandPE(ch);
0503:
0504: if (XmlChar.isNameStart(ch)) {
0505: ch = _xmlParser.parseName(_text, ch);
0506: String name = _text.toString();
0507:
0508: if (name.equals("EMPTY")) {
0509: def._content = "EMPTY";
0510: return ch;
0511: } else if (name.equals("ANY")) {
0512: def._content = "ANY";
0513: return ch;
0514: } else
0515: throw error(L.l("expected EMPTY or ANY at '{0}'", name));
0516: } else if (ch != '(') {
0517: throw error(L
0518: .l(
0519: "expected grammar definition starting with '(' at {0}. <!ELEMENT> definitions have the syntax <!ELEMENT name - - (grammar)>",
0520: badChar(ch)));
0521: } else {
0522: QContentParticle cp = new QContentParticle();
0523: def._content = cp;
0524:
0525: return parseContentParticle(cp, true);
0526: }
0527: }
0528:
0529: /**
0530: * Parses a content-particle, i.e. a grammer particle in the DTD
0531: * regexp.
0532: */
0533: private int parseContentParticle(QContentParticle cp, boolean isTop)
0534: throws IOException, SAXException {
0535: boolean hasCdata = false;
0536: cp._separator = 0;
0537: cp._repeat = 0;
0538: int ch;
0539:
0540: ch = expandPE(read());
0541:
0542: for (; ch != -1; ch = expandPE(ch)) {
0543: if (ch == '(') {
0544: QContentParticle child = new QContentParticle();
0545: cp.addChild(child);
0546:
0547: ch = parseContentParticle(child, false);
0548: } else if (XmlChar.isNameStart(ch)) {
0549: ch = _xmlParser.parseName(_text, ch);
0550: cp.addChild(_text.toString());
0551: } else if (ch == '#') {
0552: ch = _xmlParser.parseName(_text, read());
0553: String name = _text.toString();
0554:
0555: if (cp._children.size() != 0)
0556: throw error(L.l("'#{0}' must occur first", name));
0557: if (!isTop)
0558: throw error(L.l(
0559: "'#{0}' may only occur at top level", name));
0560:
0561: if (name.equals("PCDATA"))
0562: cp.addChild("#PCDATA");
0563: else
0564: throw error(L.l(
0565: "illegal content particle at '#{0}'", name));
0566:
0567: hasCdata = true;
0568: } else
0569: throw error(L.l("expected content particle at {0}",
0570: badChar(ch)));
0571:
0572: ch = expandPE(ch);
0573:
0574: if (ch == '?' || ch == '*' || ch == '+') {
0575: Object child = cp.getChild(cp.getChildSize() - 1);
0576: if (child instanceof QContentParticle) {
0577: QContentParticle cpChild = (QContentParticle) child;
0578: cpChild._repeat = ch;
0579: } else {
0580: QContentParticle cpChild = new QContentParticle();
0581: cpChild.addChild(child);
0582: cpChild._repeat = ch;
0583: cp.setChild(cp.getChildSize() - 1, cpChild);
0584: }
0585:
0586: ch = expandPE(read());
0587: }
0588:
0589: if (ch == ')')
0590: break;
0591: else if (cp._separator == 0) {
0592: if (ch == '|')
0593: cp._separator = ch;
0594: else if (hasCdata)
0595: throw error(L.l(
0596: "#PCDATA must be separated by '|' at {0}",
0597: badChar(ch)));
0598: else if (ch == ',')
0599: cp._separator = ch;
0600: else
0601: throw error(L.l("expected separator at {0}",
0602: badChar(ch)));
0603:
0604: ch = read();
0605: } else if (ch != cp._separator)
0606: throw error(L.l("expected '{0}' at {1}", ""
0607: + (char) cp._separator, badChar(ch)));
0608: else
0609: ch = read();
0610: }
0611:
0612: ch = expandPE(read());
0613:
0614: if (hasCdata && (ch == '+' || ch == '?'))
0615: throw error(L.l("pcdata clause can not have {0}",
0616: badChar(ch)));
0617: else if (ch == '*' || ch == '+' || ch == '?') {
0618: cp._repeat = ch;
0619: return read();
0620: } else
0621: return ch;
0622: }
0623:
0624: private int expandPE(int ch) throws IOException, SAXException {
0625: ch = _xmlParser.skipWhitespace(ch);
0626:
0627: while (ch == '%') {
0628: parsePEReference();
0629: ch = _xmlParser.skipWhitespace(read());
0630: }
0631:
0632: return ch;
0633: }
0634:
0635: /**
0636: * Parses a PE reference %foo; and inserts the macro text to the input
0637: * stream.
0638: */
0639: private void parsePEReference() throws IOException, SAXException {
0640: int ch = _xmlParser.parseName(_buf, read());
0641:
0642: if (ch != ';')
0643: throw error(L
0644: .l(
0645: "'%{0};' expects ';' at {1}. Parameter entities have a '%name;' syntax.",
0646: _buf, badChar(ch)));
0647:
0648: addPEReference(_text, _buf.toString());
0649: }
0650:
0651: /**
0652: * Expands the macro value of a PE reference.
0653: */
0654: private void addPEReference(CharBuffer value, String name)
0655: throws IOException, SAXException {
0656: QEntity entity = _dtd.getParameterEntity(name);
0657:
0658: if (entity == null && !_dtd.isExternal())
0659: throw error(L
0660: .l(
0661: "'%{0};' is an unknown parameter entity. Parameter entities must be defined in an <!ENTITY> declaration before use.",
0662: name));
0663: else if (entity != null && entity._value != null) {
0664: _xmlParser.setMacro(entity._value);
0665: } else if (entity != null && entity.getSystemId() != null) {
0666: _xmlParser.pushInclude(entity.getPublicId(), entity
0667: .getSystemId());
0668: } else {
0669: value.append("%");
0670: value.append(name);
0671: value.append(";");
0672: }
0673: }
0674:
0675: /**
0676: * <!ELEMENT name contentspec>
0677: */
0678: private void parseElementDecl(QDocumentType doctype)
0679: throws IOException, SAXException {
0680: int ch = _xmlParser.skipWhitespace(read());
0681:
0682: ch = _xmlParser.parseName(_text, ch);
0683: String name = _text.toString();
0684:
0685: ch = _xmlParser.skipWhitespace(ch);
0686:
0687: QElementDef def = _dtd.addElement(name);
0688: def.setLocation(getSystemId(), getFilename(), getLine(),
0689: getColumn());
0690:
0691: boolean needsStartTag = true;
0692: boolean needsEndTag = true;
0693:
0694: ch = parseContentSpec(def, ch);
0695:
0696: ch = _xmlParser.skipWhitespace(ch);
0697:
0698: if (ch != '>')
0699: throw error(L.l("'<!ELEMENT' must close with '>' at {0}",
0700: badChar(ch)));
0701: }
0702:
0703: private static String toAttrDefault(CharBuffer text) {
0704: for (int i = 0; i < text.length(); i++) {
0705: int ch = text.charAt(i);
0706:
0707: if (ch == '"') {
0708: text.delete(i, i + 1);
0709: text.insert(i, """);
0710: i--;
0711: } else if (ch == '\'') {
0712: text.delete(i, i + 1);
0713: text.insert(i, "'");
0714: i--;
0715: }
0716: }
0717:
0718: return text.toString();
0719: }
0720:
0721: /**
0722: * <!ATTLIST name (attr type def)*>
0723: */
0724: private void parseAttlistDecl(QDocumentType doctype)
0725: throws IOException, SAXException {
0726: int ch = _xmlParser.skipWhitespace(read());
0727:
0728: ch = _xmlParser.parseName(_text, ch);
0729: String name = _text.toString();
0730:
0731: ch = _xmlParser.skipWhitespace(ch);
0732:
0733: QElementDef def = _dtd.addElement(name);
0734:
0735: while (XmlChar.isNameStart((ch = expandPE(ch)))) {
0736: ch = _xmlParser.parseName(_text, ch);
0737: String attrName = _text.toString();
0738:
0739: String attrType = null;
0740: ArrayList<String> enumeration = null;
0741: ch = expandPE(ch);
0742: if (ch == '(') {
0743: attrType = "#ENUM";
0744: enumeration = new ArrayList<String>();
0745: do {
0746: ch = expandPE(read());
0747:
0748: ch = parseNameToken(_text, ch);
0749: enumeration.add(_text.toString());
0750:
0751: ch = expandPE(ch);
0752: } while (ch == '|');
0753:
0754: if (ch != ')')
0755: throw error(L
0756: .l(
0757: "expected '{0}' at {1}. <!ATTRLIST> enumerations definitions are enclosed in '(' ... ')'.",
0758: ")", badChar(ch)));
0759: ch = read();
0760: } else {
0761: ch = _xmlParser.parseName(_text, ch);
0762: attrType = _text.toString();
0763:
0764: if (attrType.equals("NOTATION")) {
0765: enumeration = new ArrayList<String>();
0766: ch = expandPE(ch);
0767: if (ch != '(')
0768: throw error(L.l("expected '{0}' at {1}", "(",
0769: badChar(ch)));
0770:
0771: do {
0772: ch = expandPE(read());
0773:
0774: ch = _xmlParser.parseName(_text, ch);
0775: enumeration.add(_text.toString());
0776:
0777: ch = expandPE(ch);
0778: } while (ch == '|');
0779:
0780: if (ch != ')')
0781: throw error(L.l("expected '{0}' at {1}", ")",
0782: badChar(ch)));
0783: ch = read();
0784: } else if (_attrTypes.get(attrType) != null) {
0785: } else
0786: throw error(L.l("expected attribute type at '{0}'",
0787: attrType));
0788: }
0789:
0790: ch = _xmlParser.skipWhitespace(ch);
0791: String qualifier = null;
0792: String attrDefault = null;
0793: if (ch == '#') {
0794: ch = _xmlParser.parseName(_text, read());
0795: qualifier = "#" + _text.toString();
0796:
0797: if (qualifier.equals("#IMPLIED")) {
0798: } else if (qualifier.equals("#REQUIRED")) {
0799: } else if (qualifier.equals("#FIXED")) {
0800: ch = _xmlParser.skipWhitespace(ch);
0801: ch = parseValue(_text, ch, false);
0802: attrDefault = _text.toString();
0803: } else
0804: throw error(L.l(
0805: "expected attribute default at '{0}'",
0806: qualifier));
0807: } else if (ch != '>') {
0808: ch = parseValue(_text, ch, false);
0809: attrDefault = _text.toString();
0810: }
0811:
0812: def.addAttribute(attrName, attrType, enumeration,
0813: qualifier, attrDefault);
0814: if (attrType != null && attrType.equals("ID"))
0815: doctype.setElementId(name, attrName);
0816:
0817: ch = _xmlParser.skipWhitespace(ch);
0818: }
0819:
0820: if (ch != '>')
0821: throw error(L.l("expected '{0}' at {1}", ">", badChar(ch)));
0822: }
0823:
0824: /**
0825: * <!NOTATION name systemId publicId>
0826: */
0827: private void parseNotationDecl(QDocumentType doctype)
0828: throws IOException, SAXException {
0829: int ch = _xmlParser.skipWhitespace(read());
0830:
0831: ch = _xmlParser.parseName(_text, ch);
0832: String name = _text.toString();
0833:
0834: ch = _xmlParser.skipWhitespace(ch);
0835: ch = _xmlParser.parseName(_text, ch);
0836: String key = _text.toString();
0837:
0838: ch = _xmlParser.skipWhitespace(ch);
0839: ch = parseValue(_text, ch, false);
0840: String id = _text.toString();
0841:
0842: ch = _xmlParser.skipWhitespace(ch);
0843:
0844: QNotation notation;
0845:
0846: if (key.equals("PUBLIC")) {
0847: String systemId = null;
0848:
0849: if (ch == '"' || ch == '\'') {
0850: ch = parseValue(_text, ch, false);
0851: ch = _xmlParser.skipWhitespace(ch);
0852: systemId = _text.toString();
0853: }
0854:
0855: notation = new QNotation(name, id, systemId);
0856: notation._owner = doctype._owner;
0857: notation.setLocation(getSystemId(), getFilename(),
0858: getLine(), getColumn());
0859: } else if (key.equals("SYSTEM")) {
0860: notation = new QNotation(name, null, id);
0861: notation._owner = doctype._owner;
0862: notation.setLocation(getSystemId(), getFilename(),
0863: getLine(), getColumn());
0864: } else
0865: throw error(L.l("expected PUBLIC or SYSTEM at '{0}'", key));
0866:
0867: doctype.addNotation(notation);
0868: doctype.appendChild(notation);
0869:
0870: if (ch != '>')
0871: throw error(L.l("expected '{0}' at {1}", ">", badChar(ch)));
0872: }
0873:
0874: /**
0875: * externalID ::= PUBLIC publicId systemId
0876: * ::= SYSTEM systemId
0877: */
0878: private int parseExternalID(int ch) throws IOException,
0879: SAXException {
0880: ch = _xmlParser.parseName(_text, ch);
0881: String key = _text.toString();
0882: ch = _xmlParser.skipWhitespace(ch);
0883:
0884: _extSystemId = null;
0885: _extPublicId = null;
0886: if (key.equals("PUBLIC")) {
0887: ch = parseValue(_text, ch, false);
0888: _extPublicId = _text.toString();
0889: ch = _xmlParser.skipWhitespace(ch);
0890:
0891: if (_extPublicId.indexOf('&') > 0)
0892: throw error(L
0893: .l(
0894: "Illegal character '&' in PUBLIC identifier '{0}'",
0895: _extPublicId));
0896:
0897: ch = parseValue(_text, ch, false);
0898: ch = _xmlParser.skipWhitespace(ch);
0899: _extSystemId = _text.toString();
0900: } else if (key.equals("SYSTEM")) {
0901: ch = parseValue(_text, ch, false);
0902: _extSystemId = _text.toString();
0903: } else
0904: throw error(L.l("expected PUBLIC or SYSTEM at '{0}'", key));
0905:
0906: return ch;
0907: }
0908:
0909: /**
0910: * <!ENTITY name systemId publicId>
0911: */
0912: private void parseEntityDecl(QDocumentType doctype)
0913: throws IOException, SAXException {
0914: int ch = _xmlParser.skipWhitespace(read());
0915:
0916: boolean isPe = ch == '%';
0917:
0918: if (isPe)
0919: ch = _xmlParser.skipWhitespace(read());
0920:
0921: ch = _xmlParser.parseName(_text, ch);
0922: String name = _text.toString();
0923:
0924: ch = _xmlParser.skipWhitespace(ch);
0925:
0926: QEntity entity;
0927: if (ch == '"' || ch == '\'') {
0928: ch = parseValue(_text, ch, false);
0929:
0930: entity = new QEntity(name, _text.toString(), null, null);
0931: entity._owner = doctype._owner;
0932: entity.setLocation(getSystemId(), getFilename(), getLine(),
0933: getColumn());
0934: } else {
0935: ch = parseExternalID(ch);
0936:
0937: entity = new QEntity(name, null, _extPublicId, _extSystemId);
0938: entity._owner = doctype._owner;
0939: entity.setLocation(getSystemId(), getFilename(), getLine(),
0940: getColumn());
0941:
0942: ch = _xmlParser.skipWhitespace(ch);
0943: if (!isPe && XmlChar.isNameStart(ch)) {
0944: ch = _xmlParser.parseName(_text, ch);
0945: String key = _text.toString();
0946: if (key.equals("NDATA")) {
0947: ch = _xmlParser.skipWhitespace(ch);
0948: ch = _xmlParser.parseName(_text, ch);
0949:
0950: String ndata = _text.toString();
0951:
0952: entity._ndata = ndata;
0953: } else
0954: throw error(L.l("expected 'NDATA' at '{0}'", key));
0955: }
0956: }
0957:
0958: entity._isPe = isPe;
0959:
0960: if (isPe)
0961: doctype.addParameterEntity(entity);
0962: else
0963: doctype.addEntity(entity);
0964:
0965: doctype.appendChild(entity);
0966:
0967: ch = _xmlParser.skipWhitespace(ch);
0968:
0969: if (ch != '>')
0970: throw error(L.l("expected '>' at {0}", badChar(ch)));
0971: }
0972:
0973: private boolean isWhitespace(int ch) {
0974: return ch <= 0x20
0975: && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd);
0976: }
0977:
0978: private boolean isChar(int ch) {
0979: return (ch >= 0x20 && ch <= 0xd7ff || ch == 0x9 || ch == 0xa
0980: || ch == 0xd || ch >= 0xe000 && ch <= 0xfffd);
0981: }
0982:
0983: /**
0984: * Returns the hex representation of a byte.
0985: */
0986: private static String hex(int value) {
0987: CharBuffer cb = CharBuffer.allocate();
0988:
0989: for (int b = 3; b >= 0; b--) {
0990: int v = (value >> (4 * b)) & 0xf;
0991: if (v < 10)
0992: cb.append((char) (v + '0'));
0993: else
0994: cb.append((char) (v - 10 + 'a'));
0995: }
0996:
0997: return cb.close();
0998: }
0999:
1000: private int read() throws IOException, SAXException {
1001: return _xmlParser.read();
1002: }
1003:
1004: public void unread(int ch) {
1005: _xmlParser.unread(ch);
1006: }
1007:
1008: private String getSystemId() {
1009: return _xmlParser.getSystemId();
1010: }
1011:
1012: private String getFilename() {
1013: return _xmlParser.getFilename();
1014: }
1015:
1016: private XmlParseException error(String msg) {
1017: return _xmlParser.error(msg);
1018: }
1019:
1020: private int getLine() {
1021: return _xmlParser.getLine();
1022: }
1023:
1024: private int getColumn() {
1025: return _xmlParser.getColumn();
1026: }
1027:
1028: private String badChar(int ch) {
1029: return _xmlParser.badChar(ch);
1030: }
1031:
1032: static {
1033: _attrTypes.put("CDATA", "CDATA");
1034: _attrTypes.put("ID", "ID");
1035: _attrTypes.put("IDREF", "IDREF");
1036: _attrTypes.put("IDREFS", "IDREFS");
1037: _attrTypes.put("ENTITY", "ENTITY");
1038: _attrTypes.put("ENTITIES", "ENTITIES");
1039: _attrTypes.put("NMTOKEN", "NMTOKEN");
1040: _attrTypes.put("NMTOKENS", "NMTOKENS");
1041: }
1042: }
|