0001: /*
0002: * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
0003: *
0004: * This file is part of Resin(R) Open Source
0005: *
0006: * Each copy or derived work must preserve the copyright notice and this
0007: * notice unmodified.
0008: *
0009: * Resin Open Source is free software; you can redistribute it and/or modify
0010: * it under the terms of the GNU General Public License as published by
0011: * the Free Software Foundation; either version 2 of the License, or
0012: * (at your option) any later version.
0013: *
0014: * Resin Open Source is distributed in the hope that it will be useful,
0015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
0017: * of NON-INFRINGEMENT. See the GNU General Public License for more
0018: * details.
0019: *
0020: * You should have received a copy of the GNU General Public License
0021: * along with Resin Open Source; if not, write to the
0022: * Free SoftwareFoundation, Inc.
0023: * 59 Temple Place, Suite 330
0024: * Boston, MA 02111-1307 USA
0025: *
0026: * @author Scott Ferguson
0027: */
0028:
0029: package com.caucho.xml;
0030:
0031: import com.caucho.util.CharBuffer;
0032: import com.caucho.vfs.Path;
0033: import com.caucho.vfs.ReadStream;
0034: import com.caucho.vfs.ReaderWriterStream;
0035: import com.caucho.vfs.Vfs;
0036: import com.caucho.vfs.WriteStream;
0037: import com.caucho.xml.readers.MacroReader;
0038: import com.caucho.xml.readers.Utf16Reader;
0039: import com.caucho.xml.readers.Utf8Reader;
0040: import com.caucho.xml.readers.XmlReader;
0041:
0042: import org.w3c.dom.Document;
0043: import org.w3c.dom.Node;
0044: import org.xml.sax.InputSource;
0045: import org.xml.sax.Locator;
0046: import org.xml.sax.SAXException;
0047: import org.xml.sax.SAXParseException;
0048:
0049: import java.io.FileNotFoundException;
0050: import java.io.IOException;
0051: import java.io.InputStream;
0052: import java.util.ArrayList;
0053: import java.util.Arrays;
0054: import java.util.logging.Level;
0055:
0056: /**
0057: * A configurable XML parser. Loose versions of XML and HTML are supported
0058: * by changing the Policy object.
0059: *
0060: * <p>Normally, applications will use Xml, LooseXml, Html, or LooseHtml.
0061: */
0062: public class XmlParser extends AbstractParser {
0063: // Xerces uses the following
0064: public static final String XMLNS = "http://www.w3.org/2000/xmlns/";
0065: public static final String XML = "http://www.w3.org/XML/1998/namespace";
0066:
0067: static final QName DOC_NAME = new QName(null, "#document", null);
0068: static final QName TEXT_NAME = new QName(null, "#text", null);
0069: static final QName JSP_NAME = new QName(null, "#jsp", null);
0070: static final QName WHITESPACE_NAME = new QName(null, "#whitespace",
0071: null);
0072: static final QName JSP_ATTRIBUTE_NAME = new QName("xtp",
0073: "jsp-attribute", null);
0074:
0075: QAttributes _attributes;
0076: QAttributes _nullAttributes;
0077:
0078: boolean _inDtd;
0079:
0080: CharBuffer _text;
0081: CharBuffer _eltName;
0082: CharBuffer _cb;
0083: CharBuffer _buf = new CharBuffer();
0084: String _textFilename;
0085: int _textLine;
0086:
0087: char[] _textBuffer = new char[1024];
0088: int _textLength;
0089: int _textCapacity = _textBuffer.length;
0090: boolean _isIgnorableWhitespace;
0091: boolean _isJspText;
0092:
0093: CharBuffer _name = new CharBuffer();
0094: CharBuffer _nameBuffer = new CharBuffer();
0095:
0096: MacroReader _macro = new MacroReader();
0097: int _macroIndex = 0;
0098: int _macroLength = 0;
0099: char[] _macroBuffer;
0100:
0101: QName[] _elementNames = new QName[64];
0102: NamespaceMap[] _namespaces = new NamespaceMap[64];
0103: int[] _elementLines = new int[64];
0104: int _elementTop;
0105:
0106: NamespaceMap _namespaceMap;
0107:
0108: ArrayList<String> _attrNames = new ArrayList<String>();
0109: ArrayList<String> _attrValues = new ArrayList<String>();
0110:
0111: ReadStream _is;
0112: XmlReader _reader;
0113:
0114: String _extPublicId;
0115: String _extSystemId;
0116:
0117: QName _activeNode;
0118: QName _topNamespaceNode;
0119: boolean _isTagStart;
0120: boolean _stopOnIncludeEnd;
0121: boolean _hasTopElement;
0122: boolean _hasDoctype;
0123: boolean _isHtml;
0124: Locator _locator = new LocatorImpl(this );
0125:
0126: public XmlParser() {
0127: clear();
0128: }
0129:
0130: /**
0131: * Creates a new parser with a given parsing policy and dtd.
0132: *
0133: * @param policy the parsing policy, handling optional tags.
0134: * @param dtd the parser's dtd.
0135: */
0136: XmlParser(Policy policy, QDocumentType dtd) {
0137: super (policy, dtd);
0138:
0139: clear();
0140: }
0141:
0142: /**
0143: * Initialize the parser.
0144: */
0145: void init() {
0146: super .init();
0147:
0148: _attributes = new QAttributes();
0149: _nullAttributes = new QAttributes();
0150: _eltName = new CharBuffer();
0151: _text = new CharBuffer();
0152:
0153: _isHtml = _policy instanceof HtmlPolicy;
0154:
0155: // jsp/193b
0156: // _namespaceMap = null;
0157:
0158: _textLength = 0;
0159: _isIgnorableWhitespace = true;
0160: _elementTop = 0;
0161: _elementLines[0] = 1;
0162:
0163: _line = 1;
0164:
0165: _dtd = null;
0166: _inDtd = false;
0167: _isTagStart = false;
0168: _stopOnIncludeEnd = false;
0169:
0170: _extPublicId = null;
0171: _extSystemId = null;
0172:
0173: // _filename = null;
0174: _publicId = null;
0175: _systemId = null;
0176:
0177: _hasTopElement = false;
0178: _hasDoctype = false;
0179:
0180: _macroIndex = 0;
0181: _macroLength = 0;
0182:
0183: _reader = null;
0184:
0185: // _owner = null;
0186:
0187: _policy.init();
0188: }
0189:
0190: /**
0191: * Parse the document from a read stream.
0192: *
0193: * @param is read stream to parse from.
0194: *
0195: * @return The parsed document.
0196: */
0197: Document parseInt(ReadStream is) throws IOException, SAXException {
0198: _is = is;
0199:
0200: if (_filename == null && _systemId != null)
0201: _filename = _systemId;
0202: else if (_filename == null)
0203: _filename = _is.getUserPath();
0204:
0205: if (_systemId == null) {
0206: _systemId = _is.getPath().getURL();
0207: if ("null:".equals(_systemId)
0208: || "string:".equals(_systemId))
0209: _systemId = "stream";
0210: }
0211:
0212: /* xsl/0401
0213: if (_isNamespaceAware)
0214: _namespaceMap = new NamespaceMap(null, "", "");
0215: */
0216: _policy.setNamespaceAware(_isNamespaceAware);
0217:
0218: if (_filename == null)
0219: _filename = _systemId;
0220:
0221: if (_filename == null)
0222: _filename = "stream";
0223:
0224: if (_dtd != null)
0225: _dtd.setSystemId(_systemId);
0226:
0227: if (_builder != null) {
0228: if (!"string:".equals(_systemId)
0229: && !"stream".equals(_systemId))
0230: _builder.setSystemId(_systemId);
0231: _builder.setFilename(_is.getPath().getURL());
0232: }
0233:
0234: if (_contentHandler == null)
0235: _contentHandler = new org.xml.sax.helpers.DefaultHandler();
0236:
0237: _contentHandler.setDocumentLocator(_locator);
0238:
0239: if (_owner == null)
0240: _owner = new QDocument();
0241: if (_defaultEncoding != null)
0242: _owner.setAttribute("encoding", _defaultEncoding);
0243: _owner.addDepend(is.getPath());
0244:
0245: _activeNode = DOC_NAME;
0246:
0247: _policy.setStream(is);
0248: _policy.setNamespace(_namespaceMap);
0249:
0250: _contentHandler.startDocument();
0251:
0252: int ch = parseXMLDeclaration(null);
0253:
0254: ch = skipWhitespace(ch);
0255: parseNode(ch, false);
0256:
0257: /*
0258: if (dbg.canWrite()) {
0259: printDebugNode(dbg, doc, 0);
0260: dbg.flush();
0261: }
0262: */
0263:
0264: if (_strictXml && !_hasTopElement)
0265: throw error(L
0266: .l("XML file has no top-element. All well-formed XML files have a single top-level element."));
0267:
0268: if (_contentHandler != null)
0269: _contentHandler.endDocument();
0270:
0271: QDocument owner = _owner;
0272: _owner = null;
0273:
0274: return owner;
0275: }
0276:
0277: /**
0278: * The main dispatch loop.
0279: *
0280: * @param node the current node
0281: * @param ch the next character
0282: * @param special true for the short form, <foo/bar/>
0283: */
0284: private void parseNode(int ch, boolean special) throws IOException,
0285: SAXException {
0286: //boolean isTop = node instanceof QDocument;
0287:
0288: _text.clear();
0289:
0290: loop: while (true) {
0291: if (_textLength == 0) {
0292: _textFilename = getFilename();
0293: _textLine = getLine();
0294: }
0295:
0296: switch (ch) {
0297: case -1:
0298: if (_textLength != 0)
0299: appendText();
0300: if (!_stopOnIncludeEnd && _reader.getNext() != null) {
0301: popInclude();
0302: if (_reader != null)
0303: parseNode(_reader.read(), special);
0304: return;
0305: }
0306: closeTag("");
0307: return;
0308:
0309: case ' ':
0310: case '\t':
0311: case '\n':
0312: case '\r':
0313: if (!_normalizeWhitespace)
0314: addText((char) ch);
0315: else if (_textLength == 0) {
0316: if (!_isTagStart)
0317: addText(' ');
0318: } else if (_textBuffer[_textLength - 1] != ' ') {
0319: addText(' ');
0320: }
0321: ch = _reader.read();
0322: break;
0323:
0324: case 0xffff:
0325: // marker for end of text for serialization
0326: return;
0327:
0328: default:
0329: addText((char) ch);
0330: ch = _reader.read();
0331: break;
0332:
0333: case '/':
0334: if (!special) {
0335: addText((char) ch);
0336: ch = _reader.read();
0337: continue;
0338: }
0339: ch = _reader.read();
0340: if (ch == '>' || ch == -1) {
0341: appendText();
0342: popNode();
0343: return;
0344: }
0345: addText('/');
0346: break;
0347:
0348: case '&':
0349: ch = parseEntityReference();
0350: break;
0351:
0352: case '<':
0353: boolean endTag = false;
0354: ch = _reader.read();
0355:
0356: if (ch == '/' && !special) {
0357: if (_normalizeWhitespace && _textLength > 0
0358: && _textBuffer[_textLength - 1] == ' ') {
0359: _textLength--;
0360: }
0361: appendText();
0362:
0363: ch = _reader.parseName(_name, _reader.read());
0364:
0365: if (ch != '>') {
0366: // XXX: Hack for Java PetStore
0367: while (XmlChar.isWhitespace(ch))
0368: ch = _reader.read();
0369:
0370: if (ch != '>')
0371: throw error(L
0372: .l(
0373: "`</{0}>' expected `>' at {1}. Closing tags must close immediately after the tag name.",
0374: _name, badChar(ch)));
0375: }
0376:
0377: closeTag(_policy.getName(_name).getName());
0378: ch = _reader.read();
0379: }
0380: // element: <tag attr=value ... attr=value> ...
0381: else if (XmlChar.isNameStart(ch)) {
0382: appendText();
0383:
0384: parseElement(ch);
0385: ch = _reader.read();
0386: }
0387: // <! ...
0388: else if (ch == '!') {
0389: // <![CDATA[ ... ]]>
0390: if ((ch = _reader.read()) == '[') {
0391: parseCdata();
0392: ch = _reader.read();
0393: }
0394: // <!-- ... -->
0395: else if (ch == '-') {
0396: parseComment();
0397:
0398: ch = _reader.read();
0399: } else if (XmlChar.isNameStart(ch)) {
0400: appendText();
0401: ch = _reader.parseName(_name, ch);
0402: String declName = _name.toString();
0403: if (declName.equals("DOCTYPE")) {
0404: parseDoctype(ch);
0405: if (_contentHandler instanceof DOMBuilder)
0406: ((DOMBuilder) _contentHandler)
0407: .dtd(_dtd);
0408:
0409: ch = _reader.read();
0410: } else if (_forgiving
0411: && declName.equalsIgnoreCase("doctype")) {
0412: parseDoctype(ch);
0413: if (_contentHandler instanceof DOMBuilder)
0414: ((DOMBuilder) _contentHandler)
0415: .dtd(_dtd);
0416:
0417: ch = _reader.read();
0418: } else
0419: throw error(L
0420: .l(
0421: "expected `<!DOCTYPE' declaration at {0}",
0422: declName));
0423: } else if (_forgiving) {
0424: addText("<!");
0425: } else
0426: throw error(L
0427: .l(
0428: "expected `<!DOCTYPE' declaration at {0}",
0429: badChar(ch)));
0430: }
0431: // PI: <?tag attr=value ... attr=value?>
0432: else if (ch == '?') {
0433: ch = parsePI();
0434: } else if (_strictXml) {
0435: throw error(L
0436: .l(
0437: "expected tag name after `<' at {0}. Open tag names must immediately follow the open brace like `<foo ...>'",
0438: badChar(ch)));
0439: }
0440: // implicit <![CDATA[ for <% ... %>
0441: else if (_isJsp && ch == '%') {
0442: ch = _reader.read();
0443:
0444: appendText();
0445: _isJspText = ch != '=';
0446:
0447: addText("<%");
0448:
0449: while (ch >= 0) {
0450: if (ch == '%') {
0451: ch = _reader.read();
0452: if (ch == '>') {
0453: addText("%>");
0454: ch = _reader.read();
0455: break;
0456: } else
0457: addText('%');
0458: } else {
0459: addText((char) ch);
0460: ch = _reader.read();
0461: }
0462: }
0463:
0464: appendText();
0465: _isJspText = false;
0466: } else {
0467: addText('<');
0468: }
0469: }
0470: }
0471: }
0472:
0473: /**
0474: * Parses the <!DOCTYPE> declaration.
0475: */
0476: private void parseDoctype(int ch) throws IOException, SAXException {
0477: if (_activeNode != DOC_NAME)
0478: throw error(L
0479: .l("<!DOCTYPE immediately follow the <?xml ...?> declaration."));
0480:
0481: _inDtd = true;
0482:
0483: ch = skipWhitespace(ch);
0484: ch = _reader.parseName(_nameBuffer, ch);
0485: String name = _nameBuffer.toString();
0486: ch = skipWhitespace(ch);
0487:
0488: if (_dtd == null)
0489: _dtd = new QDocumentType(name);
0490:
0491: _dtd.setName(name);
0492:
0493: if (XmlChar.isNameStart(ch)) {
0494: ch = parseExternalID(ch);
0495: ch = skipWhitespace(ch);
0496:
0497: _dtd._publicId = _extPublicId;
0498: _dtd._systemId = _extSystemId;
0499: }
0500:
0501: if (_dtd._systemId != null && !_dtd._systemId.equals("")) {
0502: InputStream is = null;
0503:
0504: unread(ch);
0505:
0506: XmlReader oldReader = _reader;
0507: boolean hasInclude = false;
0508:
0509: try {
0510: pushInclude(_extPublicId, _extSystemId);
0511: hasInclude = true;
0512: } catch (Exception e) {
0513: if (log.isLoggable(Level.FINEST))
0514: log.log(Level.FINER, e.toString(), e);
0515: else
0516: log.finer(e.toString());
0517: }
0518:
0519: if (hasInclude) {
0520: _stopOnIncludeEnd = true;
0521: try {
0522: ch = parseDoctypeDecl(_dtd);
0523: } catch (XmlParseException e) {
0524: if (_extSystemId != null
0525: && _extSystemId.startsWith("http")) {
0526: log.log(Level.FINE, e.toString(), e);
0527: } else
0528: throw e;
0529: }
0530: _stopOnIncludeEnd = false;
0531:
0532: while (_reader != null && _reader != oldReader)
0533: popInclude();
0534: }
0535:
0536: if (_reader != null)
0537: ch = skipWhitespace(read());
0538: }
0539:
0540: if (ch == '[')
0541: ch = parseDoctypeDecl(_dtd);
0542:
0543: ch = skipWhitespace(ch);
0544:
0545: _inDtd = false;
0546:
0547: if (ch != '>')
0548: throw error(L.l("expected `>' in <!DOCTYPE at {0}",
0549: badChar(ch)));
0550: }
0551:
0552: /**
0553: * Parses the DTD.
0554: *
0555: * <pre>
0556: * dtd-item ::= <!ELEMENT ... |
0557: * <!ATTLIST ... |
0558: * <!NOTATION ... |
0559: * <!ENTITY ... |
0560: * <!-- comment |
0561: * <? pi |
0562: * %pe-ref;
0563: * </pre>
0564: *
0565: * @return the next character.
0566: */
0567: private int parseDoctypeDecl(QDocumentType doctype)
0568: throws IOException, SAXException {
0569: _hasDoctype = true;
0570: int ch = 0;
0571:
0572: for (ch = skipWhitespace(read()); ch >= 0 && ch != ']'; ch = skipWhitespace(read())) {
0573: if (ch == '<') {
0574: if ((ch = read()) == '!') {
0575: if (XmlChar.isNameStart(ch = read())) {
0576: ch = _reader.parseName(_text, ch);
0577: String name = _text.toString();
0578:
0579: if (name.equals("ELEMENT"))
0580: parseElementDecl(doctype);
0581: else if (name.equals("ATTLIST"))
0582: parseAttlistDecl(doctype);
0583: else if (name.equals("NOTATION"))
0584: parseNotationDecl(doctype);
0585: else if (name.equals("ENTITY"))
0586: parseEntityDecl(doctype);
0587: else
0588: throw error("unknown declaration `" + name
0589: + "'");
0590: } else if (ch == '-')
0591: parseComment();
0592: else if (ch == '[') {
0593: ch = _reader.parseName(_text, read());
0594: String name = _text.toString();
0595:
0596: if (name.equals("IGNORE")) {
0597: parseIgnore();
0598: } else if (name.equals("INCLUDE")) {
0599: parseIgnore();
0600: } else
0601: throw error("unknown declaration `" + name
0602: + "'");
0603: }
0604: } else if (ch == '?') {
0605: parsePI();
0606: } else
0607: throw error(L.l("expected markup at {0}",
0608: badChar(ch)));
0609: } else if (ch == '%') {
0610: ch = _reader.parseName(_buf, read());
0611:
0612: if (ch != ';')
0613: throw error(L
0614: .l(
0615: "`%{0};' expects `;' at {1}. Parameter entities have a `%name;' syntax.",
0616: _buf, badChar(ch)));
0617:
0618: addPEReference(_text, _buf.toString());
0619: } else {
0620: throw error(L.l("expected '<' at {0}", badChar(ch)));
0621: }
0622:
0623: _text.clear();
0624: }
0625: _text.clear();
0626:
0627: return read();
0628: }
0629:
0630: /**
0631: * Parses an element.
0632: *
0633: * @param ch the current character
0634: */
0635: private void parseElement(int ch) throws IOException, SAXException {
0636: ch = _reader.parseName(_eltName, ch);
0637:
0638: NamespaceMap oldNamespace = _namespaceMap;
0639:
0640: if (ch != '>' && ch != '/')
0641: ch = parseAttributes(ch, true);
0642: else
0643: _attributes.clear();
0644:
0645: QName qname = _policy.getName(_eltName);
0646:
0647: if (_isValidating && _dtd != null) {
0648: QElementDef elementDef = _dtd.getElement(qname.getName());
0649:
0650: if (elementDef != null)
0651: elementDef.fillDefaults(_attributes);
0652: }
0653:
0654: if (ch == '/') {
0655: // empty tag: <foo/>
0656: if ((ch = _reader.read()) == '>') {
0657: addElement(qname, true, _attributes, oldNamespace);
0658: }
0659: // short tag: </foo/some text here/>
0660: else {
0661: addElement(qname, false, _attributes, oldNamespace);
0662: parseNode(ch, true);
0663: }
0664: } else if (ch == '>') {
0665: addElement(qname, false, _attributes, oldNamespace);
0666: } else
0667: throw error(L
0668: .l(
0669: "unexpected character {0} while parsing `{1}' attributes. Expected an attribute name or `>' or `/>'. XML element syntax is:\n <name attr-1=\"value-1\" ... attr-n=\"value-n\">",
0670: badChar(ch), qname.getName()));
0671: }
0672:
0673: /**
0674: * Parses the attributes in an element.
0675: *
0676: * @param ch the next character to reader.read.
0677: *
0678: * @return the next character to read.
0679: */
0680: private int parseAttributes(int ch, boolean isElement)
0681: throws IOException, SAXException {
0682: ch = skipWhitespace(ch);
0683: _attributes.clear();
0684:
0685: _attrNames.clear();
0686: _attrValues.clear();
0687:
0688: boolean hasWhitespace = true;
0689:
0690: while (ch != -1) {
0691: if (!XmlChar.isNameStart(ch)) {
0692: if (!_isJsp || ch != '<')
0693: break;
0694:
0695: ch = parseJspAttribute(isElement);
0696: continue;
0697: }
0698:
0699: if (!hasWhitespace)
0700: throw error(L
0701: .l("attributes must be separated by whitespace"));
0702:
0703: hasWhitespace = false;
0704:
0705: ch = _reader.parseName(_text, ch);
0706:
0707: if (!_text.startsWith("xmlns")) {
0708: } else {
0709: QName name;
0710:
0711: if (_isNamespaceAware
0712: && _contentHandler instanceof DOMBuilder)
0713: name = _policy.getNamespaceName(_text);
0714: else
0715: name = new QName(_text.toString(), null);
0716:
0717: String prefix;
0718:
0719: if (_text.length() > 5) {
0720: prefix = _text.substring(6);
0721:
0722: if (prefix.equals(""))
0723: throw error(L
0724: .l(
0725: "'{0}' is an illegal namespace declaration.",
0726: _text));
0727: } else {
0728: prefix = "";
0729: }
0730:
0731: _text.clear();
0732: ch = skipWhitespace(ch);
0733: if (ch != '=')
0734: throw error(L.l("xmlns: needs value at {0}",
0735: badChar(ch)));
0736: ch = skipWhitespace(_reader.read());
0737: ch = parseValue(_text, ch, true);
0738:
0739: hasWhitespace = isWhitespace(ch);
0740:
0741: ch = skipWhitespace(ch);
0742:
0743: // topNamespaceNode = element;
0744: String uri = _text.toString();
0745:
0746: if (_isXmlnsPrefix) {
0747: _namespaceMap = new NamespaceMap(_namespaceMap,
0748: prefix, uri);
0749: _policy.setNamespace(_namespaceMap);
0750:
0751: _contentHandler.startPrefixMapping(prefix, uri);
0752: }
0753:
0754: // needed for xml/032e
0755: if (isElement && _isXmlnsAttribute) {
0756: _attributes.add(name, uri);
0757: }
0758:
0759: continue;
0760: }
0761:
0762: String attrName = _text.toString();
0763: _attrNames.add(attrName);
0764:
0765: _text.clear();
0766: ch = skipWhitespace(ch);
0767:
0768: String value = null;
0769:
0770: if (ch == '=') {
0771: ch = skipWhitespace(_reader.read());
0772: ch = parseValue(_text, ch, true);
0773:
0774: hasWhitespace = isWhitespace(ch);
0775:
0776: ch = skipWhitespace(ch);
0777:
0778: value = _text.toString();
0779: } else if (_strictAttributes) {
0780: throw error(L
0781: .l(
0782: "attribute `{0}' expects value at {1}. XML requires attributes to have explicit values.",
0783: attrName, badChar(ch)));
0784: } else {
0785: value = attrName; // xxx: conflict xsl/0432
0786: hasWhitespace = true;
0787: }
0788:
0789: _attrValues.add(value);
0790: }
0791:
0792: int len = _attrNames.size();
0793: for (int i = 0; i < len; i++) {
0794: String attrName = _attrNames.get(i);
0795: String value = _attrValues.get(i);
0796:
0797: _text.clear();
0798: _text.append(attrName);
0799: QName name;
0800:
0801: if (_contentHandler instanceof DOMBuilder)
0802: name = _policy.getAttributeName(_eltName, _text, true);
0803: else
0804: name = _policy.getAttributeName(_eltName, _text);
0805:
0806: _attributes.add(name, value);
0807: }
0808:
0809: return ch;
0810: }
0811:
0812: /**
0813: * Special parser to handle the use of <%= as an attribute in JSP
0814: * files. Covers cases like the following:
0815: *
0816: * <pre>
0817: * <options>
0818: * <option name="foo" <%= test.isSelected("foo") %>/>
0819: * </options>
0820: * </pre>
0821: *
0822: * @param element the parent element
0823: *
0824: * @return the next character to read.
0825: */
0826: private int parseJspAttribute(boolean isElement)
0827: throws IOException, XmlParseException {
0828: int ch = _reader.read();
0829:
0830: if (ch != '%')
0831: throw error(L.l("unexpected char `{0}' in element", "%"));
0832:
0833: ch = _reader.read();
0834: if (ch != '=')
0835: throw error(L.l("unexpected char `{0}' in element", "="));
0836:
0837: _text.clear();
0838: ch = _reader.read();
0839: while (ch >= 0) {
0840: if (ch == '%') {
0841: ch = _reader.read();
0842: if (ch == '>') {
0843: ch = _reader.read();
0844: break;
0845: }
0846: _text.append((char) ch);
0847: } else {
0848: _text.append((char) ch);
0849: ch = _reader.read();
0850: }
0851: }
0852:
0853: String value = _text.toString();
0854:
0855: if (isElement)
0856: _attributes.add(JSP_ATTRIBUTE_NAME, value);
0857:
0858: return ch;
0859: }
0860:
0861: /**
0862: * Handle processing at a close tag. For strict XML, this will normally
0863: * just change the current node to its parent, but HTML has a more
0864: * complicated policy.
0865: */
0866: private void closeTag(String endTagName) throws IOException,
0867: SAXException {
0868: while (_activeNode != null && _activeNode != DOC_NAME) {
0869: switch (_policy.elementCloseAction(this , _activeNode,
0870: endTagName)) {
0871: case Policy.POP:
0872: //if (dbg.canWrite())
0873: // dbg.println("</" + activeNode.getNodeName() + ">");
0874:
0875: popNode();
0876: return;
0877:
0878: case Policy.POP_AND_LOOP:
0879: //if (dbg.canWrite())
0880: // dbg.println("</" + activeNode.getNodeName() + ">");
0881:
0882: popNode();
0883: break;
0884:
0885: case Policy.IGNORE:
0886: return;
0887:
0888: default:
0889: throw new RuntimeException();
0890: }
0891: }
0892:
0893: if (!_extraForgiving && endTagName != null
0894: && !endTagName.equals(""))
0895: throw error(L
0896: .l(
0897: "Unexpected end tag `</{0}>' at top-level. All open tags have already been closed.",
0898: endTagName));
0899: }
0900:
0901: /**
0902: * Handles processing of the resin:include tag.
0903: */
0904: private void handleResinInclude() throws IOException, SAXException {
0905: String filename = _attributes.getValue("path");
0906:
0907: if (filename == null || filename.equals(""))
0908: filename = _attributes.getValue("href");
0909:
0910: if (filename.equals(""))
0911: throw error(L
0912: .l("<resin:include> expects a `path' attribute."));
0913:
0914: pushInclude(filename);
0915: }
0916:
0917: /**
0918: * Handles processing of the resin:include tag.
0919: */
0920: private void handleResinIncludeDirectory() throws IOException,
0921: SAXException {
0922: String filename = _attributes.getValue("path");
0923:
0924: if (filename == null || filename.equals(""))
0925: filename = _attributes.getValue("href");
0926:
0927: String extension = _attributes.getValue("extension");
0928:
0929: if (filename.equals(""))
0930: throw error(L
0931: .l("<resin:include> expects a `path' attribute."));
0932:
0933: Path pwd;
0934: if (_searchPath != null)
0935: pwd = _searchPath;
0936: else
0937: pwd = Vfs.lookup(_systemId).getParent();
0938:
0939: Path dir = pwd.lookup(filename);
0940: if (!dir.isDirectory())
0941: throw error(L
0942: .l(
0943: "`{0}' is not a directory for resin:include-directory. The href for resin:include-directory must refer to a directory.",
0944: dir.getNativePath()));
0945:
0946: String[] list = dir.list();
0947: Arrays.sort(list);
0948: for (int i = list.length - 1; i >= 0; i--) {
0949: if (list[i].startsWith(".") || extension != null
0950: && !list[i].endsWith(extension))
0951: continue;
0952:
0953: pushInclude(dir.lookup(list[i]).getPath());
0954: }
0955: }
0956:
0957: private int parseNameToken(CharBuffer name, int ch)
0958: throws IOException, SAXException {
0959: name.clear();
0960:
0961: if (!XmlChar.isNameChar(ch))
0962: throw error(L.l("expected name at {0}", badChar(ch)));
0963:
0964: for (; XmlChar.isNameChar(ch); ch = _reader.read())
0965: name.append((char) ch);
0966:
0967: return ch;
0968: }
0969:
0970: /**
0971: * Pop the top-level node
0972: */
0973: private void popNode() throws SAXException {
0974: QName node = _activeNode;
0975:
0976: if (_activeNode != DOC_NAME) {
0977: String uri = _activeNode.getNamespaceURI();
0978: String localName = _activeNode.getLocalName();
0979:
0980: if (uri == null) {
0981: uri = "";
0982:
0983: if (_isNamespaceAware)
0984: localName = _activeNode.getName();
0985: else
0986: localName = "";
0987: }
0988:
0989: _contentHandler.endElement(uri, localName, _activeNode
0990: .getName());
0991: }
0992:
0993: if (_elementTop > 0) {
0994: _elementTop--;
0995: NamespaceMap oldMap = _namespaces[_elementTop];
0996:
0997: popNamespaces(oldMap);
0998:
0999: _activeNode = _elementNames[_elementTop];
1000: }
1001:
1002: if (_elementTop == 0)
1003: _activeNode = DOC_NAME;
1004: }
1005:
1006: public void pushNamespace(String prefix, String uri) {
1007: _namespaceMap = new NamespaceMap(_namespaceMap, prefix, uri);
1008:
1009: _policy.setNamespace(_namespaceMap);
1010: }
1011:
1012: private void popNamespaces(NamespaceMap oldMap) throws SAXException {
1013: for (; _namespaceMap != null && _namespaceMap != oldMap; _namespaceMap = _namespaceMap.next) {
1014: _contentHandler.endPrefixMapping(_namespaceMap.prefix);
1015: }
1016: _namespaceMap = oldMap;
1017: _policy.setNamespace(_namespaceMap);
1018: }
1019:
1020: private void appendText(String s) {
1021: if (_text.length() == 0) {
1022: _textFilename = getFilename();
1023: _textLine = getLine();
1024: }
1025:
1026: _text.append(s);
1027: }
1028:
1029: /**
1030: * Parses an entity reference:
1031: *
1032: * <pre>
1033: * er ::= &#d+;
1034: * ::= &name;
1035: * </pre>
1036: */
1037: private int parseEntityReference() throws IOException, SAXException {
1038: int ch;
1039:
1040: ch = _reader.read();
1041:
1042: // character reference
1043: if (ch == '#') {
1044: addText((char) parseCharacterReference());
1045:
1046: return _reader.read();
1047: }
1048: // entity reference
1049: else if (XmlChar.isNameStart(ch)) {
1050: ch = _reader.parseName(_buf, ch);
1051:
1052: if (ch != ';' && _strictXml)
1053: throw error(L
1054: .l(
1055: "`&{0};' expected `;' at {0}. Entity references have a `&name;' syntax.",
1056: _buf, badChar(ch)));
1057: else if (ch != ';') {
1058: addText('&');
1059: addText(_buf.toString());
1060: return ch;
1061: }
1062:
1063: addEntityReference(_buf.toString());
1064:
1065: ch = _reader.read();
1066:
1067: return ch;
1068: } else if (_strictXml) {
1069: throw error(L.l("expected name at {0}", badChar(ch)));
1070: } else {
1071: addText('&');
1072: return ch;
1073: }
1074: }
1075:
1076: private int parseCharacterReference() throws IOException,
1077: SAXException {
1078: int ch = _reader.read();
1079:
1080: int radix = 10;
1081: if (ch == 'x') {
1082: radix = 16;
1083: ch = _reader.read();
1084: }
1085:
1086: int value = 0;
1087: for (; ch != ';'; ch = _reader.read()) {
1088: if (ch >= '0' && ch <= '9')
1089: value = radix * value + ch - '0';
1090: else if (radix == 16 && ch >= 'a' && ch <= 'f')
1091: value = radix * value + ch - 'a' + 10;
1092: else if (radix == 16 && ch >= 'A' && ch <= 'F')
1093: value = radix * value + ch - 'A' + 10;
1094: else
1095: throw error(L.l("malformed entity ref at {0}",
1096: badChar(ch)));
1097: }
1098:
1099: if (value > 0xffff)
1100: throw error(L.l("malformed entity ref at {0}", "" + value));
1101:
1102: // xml/0072
1103: if (_strictCharacters && !isChar(value))
1104: throw error(L.l("illegal character ref at {0}",
1105: badChar(value)));
1106:
1107: return value;
1108: }
1109:
1110: /**
1111: * Looks up a named entity reference, filling the text.
1112: */
1113: private void addEntityReference(String name) throws IOException,
1114: SAXException {
1115: boolean expand = !_entitiesAsText || _hasDoctype
1116: || !_switchToXml;
1117: // XXX: not quite the right logic. There should be a soft expandEntities
1118:
1119: if (!expand) {
1120: addText("&" + name + ";");
1121: return;
1122: }
1123:
1124: int ch = _entities.getEntity(name);
1125: if (ch >= 0 && ch <= 0xffff) {
1126: addText((char) ch);
1127: return;
1128: }
1129:
1130: QEntity entity = _dtd == null ? null : _dtd.getEntity(name);
1131:
1132: if (!_expandEntities) {
1133: addText("&" + name + ";");
1134: return;
1135: }
1136:
1137: if (entity == null
1138: && (_dtd == null || _dtd.getName() == null || !_dtd
1139: .isExternal())) {
1140: if (_strictXml)
1141: throw error(L
1142: .l(
1143: "`&{0};' is an unknown entity. XML predefines only `<', `&', `>', `'' and `"'. All other entities must be defined in an <!ENTITY> definition in the DTD.",
1144: name));
1145: else {
1146: if (expand && _contentHandler instanceof DOMBuilder) {
1147: appendText();
1148: ((DOMBuilder) _contentHandler)
1149: .entityReference(name);
1150: } else
1151: addText("&" + name + ";");
1152: }
1153: } else if (entity != null) {
1154: if (expand && entity._isSpecial && entity._value != null)
1155: addText(entity._value);
1156: else if (entity.getSystemId() != null) {
1157: if (pushSystemEntity(entity)) {
1158: }
1159: /* XXX:??
1160: else if (strictXml) {
1161: throw error(L.l("can't open external entity at `&{0};'", name));
1162: }
1163: */
1164: else if (_contentHandler instanceof DOMBuilder) {
1165: appendText();
1166: ((DOMBuilder) _contentHandler)
1167: .entityReference(name);
1168: } else
1169: addText("&" + name + ";");
1170: } else if (expand && entity._value != null)
1171: setMacro(entity._value);
1172: else
1173: addText("&" + name + ";");
1174: } else {
1175: if (expand && _contentHandler instanceof DOMBuilder) {
1176: appendText();
1177: ((DOMBuilder) _contentHandler).entityReference(name);
1178: } else
1179: // XXX: error?
1180: addText("&" + name + ";");
1181: }
1182: }
1183:
1184: private boolean pushSystemEntity(QEntity entity)
1185: throws IOException, SAXException {
1186: String publicId = entity.getPublicId();
1187: String systemId = entity.getSystemId();
1188: String value = null;
1189: InputSource source = null;
1190: ReadStream is = null;
1191:
1192: if (_entityResolver != null)
1193: source = _entityResolver.resolveEntity(publicId, systemId);
1194:
1195: if (source != null && source.getByteStream() != null)
1196: is = Vfs.openRead(source.getByteStream());
1197: else if (source != null && source.getCharacterStream() != null)
1198: is = Vfs.openRead(source.getCharacterStream());
1199: else if (source != null && source.getSystemId() != null
1200: && _searchPath.lookup(source.getSystemId()).isFile()) {
1201: _owner.addDepend(_searchPath.lookup(source.getSystemId()));
1202: is = _searchPath.lookup(source.getSystemId()).openRead();
1203: } else if (systemId != null && !systemId.equals("")) {
1204: String path = systemId;
1205: if (path.startsWith("file:"))
1206: path = path.substring(5);
1207: if (_searchPath != null
1208: && _searchPath.lookup(path).isFile()) {
1209: _owner.addDepend(_searchPath.lookup(path));
1210: is = _searchPath.lookup(path).openRead();
1211: }
1212: }
1213:
1214: if (is == null)
1215: return false;
1216:
1217: _filename = systemId;
1218: _systemId = systemId;
1219:
1220: Path oldSearchPath = _searchPath;
1221: Path path = is.getPath();
1222: if (path != null) {
1223: _owner.addDepend(path);
1224:
1225: if (_searchPath != null) {
1226: _searchPath = path.getParent();
1227: _reader.setSearchPath(oldSearchPath);
1228: }
1229: }
1230:
1231: _is = is;
1232: _line = 1;
1233:
1234: XmlReader oldReader = _reader;
1235: _reader = null;
1236:
1237: int ch = parseXMLDeclaration(oldReader);
1238: unread(ch);
1239:
1240: return true;
1241: }
1242:
1243: /**
1244: * Parses an attribute value.
1245: *
1246: * <pre>
1247: * value ::= '[^']*'
1248: * ::= "[^"]*"
1249: * ::= [^ />]*
1250: * </pre>
1251: *
1252: * @param value the CharBuffer which will contain the value.
1253: * @param ch the next character from the input stream.
1254: * @param isGeneral true if general entities are allowed.
1255: *
1256: * @return the following character from the input stream
1257: */
1258: private int parseValue(CharBuffer value, int ch, boolean isGeneral)
1259: throws IOException, SAXException {
1260: int end = ch;
1261:
1262: value.clear();
1263:
1264: if (end == '\'' || end == '"')
1265: ch = _reader.read();
1266: else if (_strictAttributes) {
1267: value.append((char) end);
1268: for (ch = _reader.read(); ch >= 0 && XmlChar.isNameChar(ch); ch = _reader
1269: .read())
1270: value.append((char) ch);
1271:
1272: throw error(L
1273: .l(
1274: "XML attribute value must be quoted at `{0}'. XML attribute syntax is either attr=\"value\" or attr='value'.",
1275: value));
1276: } else
1277: end = 0;
1278:
1279: while (ch != -1
1280: && (end != 0 && ch != end || end == 0
1281: && isAttributeChar(ch))) {
1282: if (end == 0 && ch == '/') {
1283: ch = _reader.read();
1284: if (!isWhitespace(ch) && ch != '>') {
1285: value.append('/');
1286: value.append((char) ch);
1287: } else {
1288: unread(ch);
1289: return '/';
1290: }
1291: } else if (ch == '&' && !_entitiesAsText) {
1292: if ((ch = _reader.read()) == '#')
1293: value.append((char) parseCharacterReference());
1294: else if (!isGeneral) {
1295: value.append('&');
1296: value.append((char) ch);
1297: } else if (XmlChar.isNameStart(ch)) {
1298: ch = _reader.parseName(_buf, ch);
1299: String name = _buf.toString();
1300:
1301: if (ch != ';' && _strictXml)
1302: throw error(L.l("expected `{0}' at {1}", ";",
1303: badChar(ch)));
1304: else if (ch != ';') {
1305: value.append('&');
1306: value.append(name);
1307: continue;
1308: } else {
1309: int lookup = _entities.getEntity(name);
1310:
1311: if (lookup >= 0 && lookup <= 0xffff) {
1312: ch = _reader.read();
1313: value.append((char) lookup);
1314: continue;
1315: }
1316:
1317: QEntity entity = _dtd == null ? null : _dtd
1318: .getEntity(name);
1319: if (entity != null && entity._value != null)
1320: setMacroAttr(entity._value);
1321: else if (_strictXml)
1322: throw error(L
1323: .l(
1324: "expected local reference at `&{0};'",
1325: name));
1326: else {
1327: value.append('&');
1328: value.append(name);
1329: value.append(';');
1330: }
1331: }
1332: }
1333: } else if (ch == '%' && !isGeneral) {
1334: ch = _reader.read();
1335:
1336: if (!XmlChar.isNameStart(ch)) {
1337: value.append('%');
1338: continue;
1339: } else {
1340: ch = _reader.parseName(_buf, ch);
1341:
1342: if (ch != ';')
1343: throw error(L.l("expected `{0}' at {1}", ";",
1344: badChar(ch)));
1345: else
1346: addPEReference(value, _buf.toString());
1347: }
1348: } else if (ch == '<' && _isJsp) {
1349: value.append('<');
1350:
1351: ch = _reader.read();
1352:
1353: if (ch != '%')
1354: continue;
1355:
1356: value.append('%');
1357:
1358: ch = _reader.read();
1359: while (ch >= 0) {
1360: if (ch == '%') {
1361: ch = _reader.read();
1362: if (ch == '>') {
1363: value.append("%>");
1364: break;
1365: } else
1366: value.append('%');
1367: } else {
1368: value.append((char) ch);
1369: ch = _reader.read();
1370: }
1371: }
1372: } else if (isGeneral) {
1373: if (ch == '\r') {
1374: ch = _reader.read();
1375: if (ch != '\n') {
1376: value.append('\n');
1377: continue;
1378: }
1379: }
1380: value.append((char) ch);
1381: } else if (ch == '\r') {
1382: value.append(' ');
1383:
1384: if ((ch = _reader.read()) != '\n')
1385: continue;
1386: } else if (ch == '\n')
1387: value.append(' ');
1388: else
1389: value.append((char) ch);
1390:
1391: ch = _reader.read();
1392: }
1393:
1394: if (end != 0)
1395: ch = _reader.read();
1396:
1397: return ch;
1398: }
1399:
1400: private boolean isAttributeChar(int ch) {
1401: switch (ch) {
1402: case ' ':
1403: case '\t':
1404: case '\n':
1405: case '\r':
1406: return false;
1407: case '<':
1408: case '>':
1409: case '\'':
1410: case '"':
1411: case '=':
1412: return false;
1413: default:
1414: return true;
1415: }
1416: }
1417:
1418: private void parsePcdata(QNode node) throws IOException,
1419: SAXException {
1420: int ch;
1421: String tail = "</" + node.getNodeName() + ">";
1422:
1423: _text.clear();
1424: ch = _reader.read();
1425: if (ch == '\n')
1426: ch = _reader.read();
1427:
1428: for (; ch != -1; ch = _reader.read()) {
1429: addText((char) ch);
1430:
1431: if (_text.endsWith(tail)) {
1432: _text.setLength(_text.length() - tail.length());
1433: if (_text.length() > 1
1434: && _text.charAt(_text.length() - 1) == '\n')
1435: _text.setLength(_text.length() - 1);
1436: appendText();
1437: return;
1438: }
1439: }
1440:
1441: throw error("bad pcdata");
1442: }
1443:
1444: private int parseXMLDeclaration(XmlReader oldReader)
1445: throws IOException, SAXException {
1446: int startOffset = _is.getOffset();
1447: boolean isEBCDIC = false;
1448: int ch = _is.read();
1449:
1450: XmlReader reader = null;
1451:
1452: // utf-16 starts with \xfe \xff
1453: if (ch == 0xfe) {
1454: ch = _is.read();
1455: if (ch == 0xff) {
1456: _owner.setAttribute("encoding", "UTF-16");
1457: _is.setEncoding("utf-16");
1458:
1459: reader = new Utf16Reader(this , _is);
1460:
1461: ch = reader.read();
1462: }
1463: }
1464: // utf-16 rev starts with \xff \xfe
1465: else if (ch == 0xff) {
1466: ch = _is.read();
1467: if (ch == 0xfe) {
1468: _owner.setAttribute("encoding", "UTF-16");
1469: _is.setEncoding("utf-16");
1470:
1471: reader = new Utf16Reader(this , _is);
1472: ((Utf16Reader) reader).setReverse(true);
1473:
1474: ch = reader.read();
1475: }
1476: }
1477: // utf-16 can also start with \x00 <
1478: else if (ch == 0x00) {
1479: ch = _is.read();
1480: _owner.setAttribute("encoding", "UTF-16");
1481: _is.setEncoding("utf-16");
1482:
1483: reader = new Utf16Reader(this , _is);
1484: }
1485: // utf-8 BOM is \xef \xbb \xbf
1486: else if (ch == 0xef) {
1487: ch = _is.read();
1488: if (ch == 0xbb) {
1489: ch = _is.read();
1490:
1491: if (ch == 0xbf) {
1492: ch = _is.read();
1493:
1494: _owner.setAttribute("encoding", "UTF-8");
1495: _is.setEncoding("utf-8");
1496:
1497: reader = new Utf8Reader(this , _is);
1498: }
1499: }
1500: } else if (ch == 0x4c) {
1501: // ebcdic
1502: // xml/00l1
1503: _is.unread();
1504: // _is.setEncoding("cp037");
1505: _is.setEncoding("cp500");
1506:
1507: isEBCDIC = true;
1508:
1509: reader = new XmlReader(this , _is);
1510:
1511: ch = reader.read();
1512: } else {
1513: int ch2 = _is.read();
1514:
1515: if (ch2 == 0x00) {
1516: _owner.setAttribute("encoding", "UTF-16LE");
1517: _is.setEncoding("utf-16le");
1518:
1519: reader = new Utf16Reader(this , _is);
1520: ((Utf16Reader) reader).setReverse(true);
1521: } else if (ch2 > 0)
1522: _is.unread();
1523: }
1524:
1525: if (reader != null && reader != oldReader) {
1526: } else if (_policy instanceof HtmlPolicy
1527: || _is.getSource() instanceof ReaderWriterStream) {
1528: reader = new XmlReader(this , _is);
1529: } else {
1530: reader = new Utf8Reader(this , _is);
1531: }
1532:
1533: if (ch == '\n')
1534: reader.setLine(2);
1535:
1536: reader.setSystemId(_systemId);
1537: if (_systemId == null)
1538: reader.setSystemId(_filename);
1539: reader.setFilename(_filename);
1540: reader.setPublicId(_publicId);
1541:
1542: reader.setNext(oldReader);
1543:
1544: _reader = reader;
1545:
1546: /* XXX: this might be too strict. */
1547: /*
1548: if (! strictXml) {
1549: for (; XmlChar.isWhitespace(ch); ch = reader.read()) {
1550: }
1551: }
1552: */
1553:
1554: if (ch != '<')
1555: return ch;
1556:
1557: if (parseXMLDecl(_reader) && isEBCDIC) {
1558: // EBCDIC requires a re-read
1559: _is.setOffset(startOffset);
1560:
1561: ch = _reader.read();
1562: if (ch != '<')
1563: throw new IllegalStateException();
1564:
1565: parseXMLDecl(_reader);
1566: }
1567:
1568: return _reader.read();
1569: }
1570:
1571: private boolean parseXMLDecl(XmlReader reader) throws IOException,
1572: SAXException {
1573: int ch = reader.read();
1574: if (ch != '?') {
1575: unread((char) ch);
1576: unread('<');
1577: return false;
1578: }
1579:
1580: ch = _reader.read();
1581: if (!XmlChar.isNameStart(ch))
1582: throw error(L
1583: .l(
1584: "expected name after '<?' at {0}. Processing instructions expect a name like <?foo ... ?>",
1585: badChar(ch)));
1586: ch = _reader.parseName(_text, ch);
1587:
1588: String piName = _text.toString();
1589: if (!piName.equals("xml")) {
1590: ch = parsePITail(piName, ch);
1591: unread(ch);
1592: return false;
1593: }
1594:
1595: if (_switchToXml && _activeNode == DOC_NAME && !_inDtd) {
1596: _policy = new XmlPolicy();
1597: }
1598:
1599: ch = parseAttributes(ch, false);
1600:
1601: if (ch != '?')
1602: throw error(L
1603: .l(
1604: "expected `?' at {0}. Processing instructions end with `?>' like <?foo ... ?>",
1605: badChar(ch)));
1606: if ((ch = _reader.read()) != '>')
1607: throw error(L
1608: .l(
1609: "expected `>' at {0}. Processing instructions end with `?>' like <?foo ... ?>",
1610: ">", badChar(ch)));
1611:
1612: for (int i = 0; i < _attributes.getLength(); i++) {
1613: QName name = _attributes.getName(i);
1614: String value = _attributes.getValue(i);
1615:
1616: if (_owner != null)
1617: _owner.setAttribute(name.getName(), value);
1618:
1619: if (name.getName().equals("encoding")) { // xml/00hb // && ! _inDtd) {
1620: String encoding = value;
1621:
1622: if (!_isStaticEncoding
1623: && !encoding.equalsIgnoreCase("UTF-8")
1624: && !encoding.equalsIgnoreCase("UTF-16")
1625: && !(_is.getSource() instanceof ReaderWriterStream)) {
1626: _is.setEncoding(encoding);
1627:
1628: XmlReader oldReader = _reader;
1629:
1630: _reader = new XmlReader(this , _is);
1631: // _reader.setNext(oldReader);
1632:
1633: _reader.setLine(oldReader.getLine());
1634:
1635: _reader.setSystemId(_filename);
1636: _reader.setPublicId(null);
1637: }
1638: }
1639: }
1640:
1641: return true;
1642: }
1643:
1644: private int parsePI() throws IOException, SAXException {
1645: int ch;
1646:
1647: appendText();
1648: ch = _reader.read();
1649: if (!XmlChar.isNameStart(ch))
1650: throw error(L
1651: .l(
1652: "expected name after '<?' at {0}. Processing instructions expect a name like <?foo ... ?>",
1653: badChar(ch)));
1654: ch = _reader.parseName(_text, ch);
1655:
1656: String piName = _text.toString();
1657: if (!piName.equals("xml"))
1658: return parsePITail(piName, ch);
1659: else if (_switchToXml && _activeNode == DOC_NAME && !_inDtd) {
1660: _policy = new XmlPolicy();
1661: return parsePITail(piName, ch);
1662: } else {
1663: throw error(L
1664: .l("<?xml ... ?> occurs after content. The <?xml ... ?> prolog must be at the document start."));
1665:
1666: }
1667: }
1668:
1669: private int parsePITail(String piName, int ch) throws IOException,
1670: SAXException {
1671: ch = skipWhitespace(ch);
1672:
1673: _text.clear();
1674: while (ch != -1) {
1675: if (ch == '?') {
1676: if ((ch = _reader.read()) == '>')
1677: break;
1678: else
1679: _text.append('?');
1680: } else {
1681: _text.append((char) ch);
1682: ch = _reader.read();
1683: }
1684: }
1685:
1686: if (_inDtd) {
1687: QProcessingInstruction pi;
1688: pi = new QProcessingInstruction(piName, _text.toString());
1689: pi._owner = _dtd._owner;
1690: _dtd.appendChild(pi);
1691: } else
1692: _contentHandler.processingInstruction(piName, _text
1693: .toString());
1694:
1695: return _reader.read();
1696: }
1697:
1698: /**
1699: * Parses a comment. The "<!--" has already been read.
1700: */
1701: private void parseComment() throws IOException, SAXException {
1702: if (!_skipComments)
1703: appendText();
1704:
1705: int ch = _reader.read();
1706:
1707: if (ch != '-')
1708: throw error(L.l("expected comment at {0}", badChar(ch)));
1709:
1710: ch = _reader.read();
1711:
1712: if (!_skipComments)
1713: _buf.clear();
1714:
1715: comment: while (ch != -1) {
1716: if (ch == '-') {
1717: ch = _reader.read();
1718:
1719: while (ch == '-') {
1720: if ((ch = _reader.read()) == '>')
1721: break comment;
1722: else if (_strictComments)
1723: throw error(L.l("XML forbids `--' in comments"));
1724: else if (ch == '-') {
1725: if (!_skipComments)
1726: _buf.append('-');
1727: } else {
1728: if (!_skipComments)
1729: _buf.append("--");
1730: break;
1731: }
1732: }
1733:
1734: _buf.append('-');
1735: } else if (!XmlChar.isChar(ch)) {
1736: throw error(L.l("bad character {0}", hex(ch)));
1737: } else {
1738: _buf.append((char) ch);
1739: ch = _reader.read();
1740: }
1741: }
1742:
1743: if (_inDtd) {
1744: QComment comment = new QComment(_buf.toString());
1745: comment._owner = _dtd._owner;
1746: _dtd.appendChild(comment);
1747: } else if (_skipComments) {
1748: } else if (_contentHandler instanceof XMLWriter
1749: && !_skipComments) {
1750: ((XMLWriter) _contentHandler).comment(_buf.toString());
1751: _isIgnorableWhitespace = true;
1752: } else if (_lexicalHandler != null) {
1753: _lexicalHandler.comment(_buf.getBuffer(), 0, _buf
1754: .getLength());
1755: _isIgnorableWhitespace = true;
1756: }
1757: }
1758:
1759: /**
1760: * Parses the contents of a cdata section.
1761: *
1762: * <pre>
1763: * cdata ::= <![CDATA[ ... ]]>
1764: * </pre>
1765: */
1766: private void parseCdata() throws IOException, SAXException {
1767: int ch;
1768:
1769: if (_forgiving) {
1770: if ((ch = _reader.read()) != 'C') {
1771: appendText("<![" + (char) ch);
1772: return;
1773: } else if ((ch = _reader.read()) != 'D') {
1774: appendText("<![C" + (char) ch);
1775: return;
1776: } else if ((ch = _reader.read()) != 'A') {
1777: appendText("<![CD" + (char) ch);
1778: return;
1779: } else if ((ch = _reader.read()) != 'T') {
1780: appendText("<![CDA" + (char) ch);
1781: return;
1782: } else if ((ch = _reader.read()) != 'A') {
1783: appendText("<![CDAT" + (char) ch);
1784: return;
1785: } else if ((ch = _reader.read()) != '[') {
1786: appendText("<![CDATA" + (char) ch);
1787: return;
1788: }
1789: } else if ((ch = _reader.read()) != 'C'
1790: || (ch = _reader.read()) != 'D'
1791: || (ch = _reader.read()) != 'A'
1792: || (ch = _reader.read()) != 'T'
1793: || (ch = _reader.read()) != 'A'
1794: || (ch = _reader.read()) != '[') {
1795: throw error(L.l("expected `<![CDATA[' at {0}", badChar(ch)));
1796: }
1797:
1798: ch = _reader.read();
1799:
1800: if (_lexicalHandler != null) {
1801: _lexicalHandler.startCDATA();
1802: appendText();
1803: } else if (!_isCoalescing)
1804: appendText();
1805:
1806: cdata: while (ch != -1) {
1807: if (ch == ']') {
1808: ch = _reader.read();
1809:
1810: while (ch == ']') {
1811: if ((ch = _reader.read()) == '>')
1812: break cdata;
1813: else if (ch == ']')
1814: addText(']');
1815: else {
1816: addText(']');
1817: break;
1818: }
1819: }
1820:
1821: addText(']');
1822: } else if (_strictCharacters && !isChar(ch)) {
1823: throw error(L.l("expected character in cdata at {0}",
1824: badChar(ch)));
1825: } else {
1826: addText((char) ch);
1827: ch = _reader.read();
1828: }
1829: }
1830:
1831: if (_lexicalHandler != null) {
1832: appendText();
1833: _lexicalHandler.endCDATA();
1834: } else if (!_isCoalescing)
1835: appendText();
1836: }
1837:
1838: /**
1839: * Ignores content to the ']]>'
1840: */
1841: private void parseIgnore() throws IOException, SAXException {
1842: int ch = read();
1843:
1844: while (ch >= 0) {
1845: if (ch != ']') {
1846: ch = read();
1847: } else if ((ch = read()) != ']') {
1848: } else if ((ch = read()) == '>')
1849: return;
1850: }
1851: }
1852:
1853: private int parseContentSpec(QElementDef def, int ch)
1854: throws IOException, SAXException {
1855: ch = expandPE(ch);
1856:
1857: if (XmlChar.isNameStart(ch)) {
1858: ch = _reader.parseName(_text, ch);
1859: String name = _text.toString();
1860:
1861: if (name.equals("EMPTY")) {
1862: def._content = "EMPTY";
1863: return ch;
1864: } else if (name.equals("ANY")) {
1865: def._content = "ANY";
1866: return ch;
1867: } else
1868: throw error(L.l("expected EMPTY or ANY at `{0}'", name));
1869: } else if (ch != '(') {
1870: throw error(L
1871: .l(
1872: "expected grammar definition starting with '(' at {0}. <!ELEMENT> definitions have the syntax <!ELEMENT name - - (grammar)>",
1873: badChar(ch)));
1874: } else {
1875: QContentParticle cp = new QContentParticle();
1876: def._content = cp;
1877:
1878: return parseContentParticle(cp, true);
1879: }
1880: }
1881:
1882: /**
1883: * Parses a content-particle, i.e. a grammer particle in the DTD
1884: * regexp.
1885: */
1886: private int parseContentParticle(QContentParticle cp, boolean isTop)
1887: throws IOException, SAXException {
1888: boolean hasCdata = false;
1889: cp._separator = 0;
1890: cp._repeat = 0;
1891: int ch;
1892:
1893: ch = expandPE(_reader.read());
1894:
1895: for (; ch != -1; ch = expandPE(ch)) {
1896: if (ch == '(') {
1897: QContentParticle child = new QContentParticle();
1898: cp.addChild(child);
1899:
1900: ch = parseContentParticle(child, false);
1901: } else if (XmlChar.isNameStart(ch)) {
1902: ch = _reader.parseName(_text, ch);
1903: cp.addChild(_text.toString());
1904: } else if (ch == '#') {
1905: ch = _reader.parseName(_text, _reader.read());
1906: String name = _text.toString();
1907:
1908: if (_strictXml && cp._children.size() != 0)
1909: throw error(L.l("`#{0}' must occur first", name));
1910: if (_strictXml && !isTop)
1911: throw error(L.l(
1912: "`#{0}' may only occur at top level", name));
1913:
1914: if (name.equals("PCDATA"))
1915: cp.addChild("#PCDATA");
1916: else
1917: throw error(L.l(
1918: "illegal content particle at `#{0}'", name));
1919:
1920: hasCdata = true;
1921: } else
1922: throw error(L.l("expected content particle at {0}",
1923: badChar(ch)));
1924:
1925: ch = expandPE(ch);
1926:
1927: if (ch == '?' || ch == '*' || ch == '+') {
1928: Object child = cp.getChild(cp.getChildSize() - 1);
1929: if (child instanceof QContentParticle) {
1930: QContentParticle cpChild = (QContentParticle) child;
1931: cpChild._repeat = ch;
1932: } else {
1933: QContentParticle cpChild = new QContentParticle();
1934: cpChild.addChild(child);
1935: cpChild._repeat = ch;
1936: cp.setChild(cp.getChildSize() - 1, cpChild);
1937: }
1938:
1939: ch = expandPE(_reader.read());
1940: }
1941:
1942: if (ch == ')')
1943: break;
1944: else if (cp._separator == 0) {
1945: if (ch == '|')
1946: cp._separator = ch;
1947: else if (hasCdata && _strictXml)
1948: throw error(L.l(
1949: "#PCDATA must be separated by `|' at {0}",
1950: badChar(ch)));
1951: else if (ch == ',')
1952: cp._separator = ch;
1953: else if (!_strictXml && ch == '&')
1954: cp._separator = ch;
1955: else
1956: throw error(L.l("expected separator at {0}",
1957: badChar(ch)));
1958:
1959: ch = _reader.read();
1960: } else if (ch != cp._separator)
1961: throw error(L.l("expected `{0}' at {1}", ""
1962: + (char) cp._separator, badChar(ch)));
1963: else
1964: ch = _reader.read();
1965: }
1966:
1967: ch = expandPE(_reader.read());
1968:
1969: if (_strictXml && hasCdata && (ch == '+' || ch == '?'))
1970: throw error(L.l("pcdata clause can not have {0}",
1971: badChar(ch)));
1972: else if (ch == '*' || ch == '+' || ch == '?') {
1973: cp._repeat = ch;
1974: return _reader.read();
1975: } else
1976: return ch;
1977: }
1978:
1979: private int expandPE(int ch) throws IOException, SAXException {
1980: ch = skipWhitespace(ch);
1981:
1982: while (ch == '%') {
1983: parsePEReference();
1984: ch = skipWhitespace(_reader.read());
1985: }
1986:
1987: return ch;
1988: }
1989:
1990: /**
1991: * Parses a PE reference %foo; and inserts the macro text to the input
1992: * stream.
1993: */
1994: private void parsePEReference() throws IOException, SAXException {
1995: int ch = _reader.parseName(_buf, _reader.read());
1996:
1997: if (ch != ';')
1998: throw error(L
1999: .l(
2000: "`%{0};' expects `;' at {1}. Parameter entities have a `%name;' syntax.",
2001: _buf, badChar(ch)));
2002:
2003: addPEReference(_text, _buf.toString());
2004: }
2005:
2006: /**
2007: * Expands the macro value of a PE reference.
2008: */
2009: private void addPEReference(CharBuffer value, String name)
2010: throws IOException, SAXException {
2011: QEntity entity = _dtd.getParameterEntity(name);
2012:
2013: if (entity == null && !_dtd.isExternal())
2014: throw error(L
2015: .l(
2016: "`%{0};' is an unknown parameter entity. Parameter entities must be defined in an <!ENTITY> declaration before use.",
2017: name));
2018: else if (entity != null && entity._value != null) {
2019: setMacro(entity._value);
2020: } else if (entity != null && entity.getSystemId() != null) {
2021: pushInclude(entity.getPublicId(), entity.getSystemId());
2022: } else {
2023: value.append("%");
2024: value.append(name);
2025: value.append(";");
2026: }
2027: }
2028:
2029: /**
2030: * <!ELEMENT name contentspec>
2031: */
2032: private void parseElementDecl(QDocumentType doctype)
2033: throws IOException, SAXException {
2034: int ch = skipWhitespace(_reader.read());
2035:
2036: ch = _reader.parseName(_text, ch);
2037: String name = _text.toString();
2038:
2039: ch = skipWhitespace(ch);
2040:
2041: QElementDef def = _dtd.addElement(name);
2042: def.setLocation(getSystemId(), getFilename(), getLine(),
2043: getColumn());
2044:
2045: boolean needsStartTag = true;
2046: boolean needsEndTag = true;
2047:
2048: if (_optionalTags && (ch == 'O' || ch == '-')) {
2049: needsStartTag = ch == '-';
2050:
2051: ch = skipWhitespace(ch);
2052:
2053: if (ch == '0')
2054: needsEndTag = false;
2055: else if (ch == '-')
2056: needsEndTag = true;
2057: else
2058: throw error(L.l("unknown short tag"));
2059: }
2060:
2061: ch = parseContentSpec(def, ch);
2062:
2063: ch = skipWhitespace(ch);
2064:
2065: if (ch != '>')
2066: throw error(L.l("`<!ELEMENT' must close with `>' at {0}",
2067: badChar(ch)));
2068: }
2069:
2070: private static String toAttrDefault(CharBuffer text) {
2071: for (int i = 0; i < text.length(); i++) {
2072: int ch = text.charAt(i);
2073:
2074: if (ch == '"') {
2075: text.delete(i, i + 1);
2076: text.insert(i, """);
2077: i--;
2078: } else if (ch == '\'') {
2079: text.delete(i, i + 1);
2080: text.insert(i, "'");
2081: i--;
2082: }
2083: }
2084:
2085: return text.toString();
2086: }
2087:
2088: /**
2089: * <!ATTLIST name (attr type def)*>
2090: */
2091: private void parseAttlistDecl(QDocumentType doctype)
2092: throws IOException, SAXException {
2093: int ch = skipWhitespace(_reader.read());
2094:
2095: ch = _reader.parseName(_text, ch);
2096: String name = _text.toString();
2097:
2098: ch = skipWhitespace(ch);
2099:
2100: QElementDef def = _dtd.addElement(name);
2101:
2102: while (XmlChar.isNameStart((ch = expandPE(ch)))) {
2103: ch = _reader.parseName(_text, ch);
2104: String attrName = _text.toString();
2105:
2106: String attrType = null;
2107: ArrayList<String> enumeration = null;
2108: ch = expandPE(ch);
2109: if (ch == '(') {
2110: attrType = "#ENUM";
2111: enumeration = new ArrayList<String>();
2112: do {
2113: ch = expandPE(_reader.read());
2114:
2115: ch = parseNameToken(_text, ch);
2116: enumeration.add(_text.toString());
2117:
2118: ch = expandPE(ch);
2119: } while (ch == '|');
2120:
2121: if (ch != ')')
2122: throw error(L
2123: .l(
2124: "expected `{0}' at {1}. <!ATTRLIST> enumerations definitions are enclosed in '(' ... ')'.",
2125: ")", badChar(ch)));
2126: ch = _reader.read();
2127: } else {
2128: ch = _reader.parseName(_text, ch);
2129: attrType = _text.toString();
2130:
2131: if (attrType.equals("NOTATION")) {
2132: enumeration = new ArrayList<String>();
2133: ch = expandPE(ch);
2134: if (ch != '(')
2135: throw error(L.l("expected `{0}' at {1}", "(",
2136: badChar(ch)));
2137:
2138: do {
2139: ch = expandPE(_reader.read());
2140:
2141: ch = _reader.parseName(_text, ch);
2142: enumeration.add(_text.toString());
2143:
2144: ch = expandPE(ch);
2145: } while (ch == '|');
2146:
2147: if (ch != ')')
2148: throw error(L.l("expected `{0}' at {1}", ")",
2149: badChar(ch)));
2150: ch = _reader.read();
2151: } else if (_attrTypes.get(attrType) != null) {
2152: } else
2153: throw error(L.l("expected attribute type at `{0}'",
2154: attrType));
2155: }
2156:
2157: ch = skipWhitespace(ch);
2158: String qualifier = null;
2159: String attrDefault = null;
2160: if (ch == '#') {
2161: ch = _reader.parseName(_text, _reader.read());
2162: qualifier = "#" + _text.toString();
2163:
2164: if (qualifier.equals("#IMPLIED")) {
2165: } else if (qualifier.equals("#REQUIRED")) {
2166: } else if (qualifier.equals("#FIXED")) {
2167: ch = skipWhitespace(ch);
2168: ch = parseValue(_text, ch, false);
2169: attrDefault = _text.toString();
2170: } else
2171: throw error(L.l(
2172: "expected attribute default at `{0}'",
2173: qualifier));
2174: } else if (ch != '>') {
2175: ch = parseValue(_text, ch, false);
2176: attrDefault = _text.toString();
2177: }
2178:
2179: def.addAttribute(attrName, attrType, enumeration,
2180: qualifier, attrDefault);
2181: if (attrType != null && attrType.equals("ID"))
2182: doctype.setElementId(name, attrName);
2183:
2184: ch = skipWhitespace(ch);
2185: }
2186:
2187: if (ch != '>')
2188: throw error(L.l("expected `{0}' at {1}", ">", badChar(ch)));
2189: }
2190:
2191: /**
2192: * <!NOTATION name systemId publicId>
2193: */
2194: private void parseNotationDecl(QDocumentType doctype)
2195: throws IOException, SAXException {
2196: int ch = skipWhitespace(_reader.read());
2197:
2198: ch = _reader.parseName(_text, ch);
2199: String name = _text.toString();
2200:
2201: ch = skipWhitespace(ch);
2202: ch = _reader.parseName(_text, ch);
2203: String key = _text.toString();
2204:
2205: ch = skipWhitespace(ch);
2206: ch = parseValue(_text, ch, false);
2207: String id = _text.toString();
2208:
2209: ch = skipWhitespace(ch);
2210:
2211: QNotation notation;
2212:
2213: if (key.equals("PUBLIC")) {
2214: String systemId = null;
2215:
2216: if (ch == '"' || ch == '\'') {
2217: ch = parseValue(_text, ch, false);
2218: ch = skipWhitespace(ch);
2219: systemId = _text.toString();
2220: }
2221:
2222: notation = new QNotation(name, id, systemId);
2223: notation._owner = doctype._owner;
2224: notation.setLocation(getSystemId(), getFilename(),
2225: getLine(), getColumn());
2226: } else if (key.equals("SYSTEM")) {
2227: notation = new QNotation(name, null, id);
2228: notation._owner = doctype._owner;
2229: notation.setLocation(getSystemId(), getFilename(),
2230: getLine(), getColumn());
2231: } else
2232: throw error(L.l("expected PUBLIC or SYSTEM at `{0}'", key));
2233:
2234: doctype.addNotation(notation);
2235: doctype.appendChild(notation);
2236:
2237: if (ch != '>')
2238: throw error(L.l("expected `{0}' at {1}", ">", badChar(ch)));
2239: }
2240:
2241: /**
2242: * externalID ::= PUBLIC publicId systemId
2243: * ::= SYSTEM systemId
2244: */
2245: private int parseExternalID(int ch) throws IOException,
2246: SAXException {
2247: ch = _reader.parseName(_text, ch);
2248: String key = _text.toString();
2249: ch = skipWhitespace(ch);
2250:
2251: _extSystemId = null;
2252: _extPublicId = null;
2253: if (key.equals("PUBLIC") || _forgiving
2254: && key.equalsIgnoreCase("public")) {
2255: ch = parseValue(_text, ch, false);
2256: _extPublicId = _text.toString();
2257: ch = skipWhitespace(ch);
2258:
2259: if (_extPublicId.indexOf('&') > 0)
2260: throw error(L
2261: .l(
2262: "Illegal character '&' in PUBLIC identifier '{0}'",
2263: _extPublicId));
2264:
2265: ch = parseValue(_text, ch, false);
2266: ch = skipWhitespace(ch);
2267: _extSystemId = _text.toString();
2268: } else if (key.equals("SYSTEM") || _forgiving
2269: && key.equalsIgnoreCase("system")) {
2270: ch = parseValue(_text, ch, false);
2271: _extSystemId = _text.toString();
2272: } else
2273: throw error(L.l("expected PUBLIC or SYSTEM at `{0}'", key));
2274:
2275: return ch;
2276: }
2277:
2278: /**
2279: * <!ENTITY name systemId publicId>
2280: */
2281: private void parseEntityDecl(QDocumentType doctype)
2282: throws IOException, SAXException {
2283: int ch = skipWhitespace(_reader.read());
2284:
2285: boolean isPe = ch == '%';
2286:
2287: if (isPe)
2288: ch = skipWhitespace(_reader.read());
2289:
2290: ch = _reader.parseName(_text, ch);
2291: String name = _text.toString();
2292:
2293: ch = skipWhitespace(ch);
2294:
2295: QEntity entity;
2296: if (ch == '"' || ch == '\'') {
2297: ch = parseValue(_text, ch, false);
2298:
2299: entity = new QEntity(name, _text.toString(), null, null);
2300: entity._owner = doctype._owner;
2301: entity.setLocation(getSystemId(), getFilename(), getLine(),
2302: getColumn());
2303: } else {
2304: ch = parseExternalID(ch);
2305:
2306: entity = new QEntity(name, null, _extPublicId, _extSystemId);
2307: entity._owner = doctype._owner;
2308: entity.setLocation(getSystemId(), getFilename(), getLine(),
2309: getColumn());
2310:
2311: ch = skipWhitespace(ch);
2312: if (!isPe && XmlChar.isNameStart(ch)) {
2313: ch = _reader.parseName(_text, ch);
2314: String key = _text.toString();
2315: if (key.equals("NDATA")) {
2316: ch = skipWhitespace(ch);
2317: ch = _reader.parseName(_text, ch);
2318:
2319: String ndata = _text.toString();
2320:
2321: entity._ndata = ndata;
2322: } else
2323: throw error(L.l("expected `NDATA' at `{0}'", key));
2324: }
2325: }
2326:
2327: entity._isPe = isPe;
2328:
2329: if (isPe)
2330: doctype.addParameterEntity(entity);
2331: else
2332: doctype.addEntity(entity);
2333:
2334: doctype.appendChild(entity);
2335:
2336: ch = skipWhitespace(ch);
2337:
2338: if (ch != '>')
2339: throw error(L.l("expected `>' at {0}", badChar(ch)));
2340: }
2341:
2342: private boolean isWhitespace(int ch) {
2343: return ch <= 0x20
2344: && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd);
2345: }
2346:
2347: private boolean isChar(int ch) {
2348: return (ch >= 0x20 && ch <= 0xd7ff || ch == 0x9 || ch == 0xa
2349: || ch == 0xd || ch >= 0xe000 && ch <= 0xfffd);
2350: }
2351:
2352: /**
2353: * Returns the hex representation of a byte.
2354: */
2355: private static String hex(int value) {
2356: CharBuffer cb = CharBuffer.allocate();
2357:
2358: for (int b = 3; b >= 0; b--) {
2359: int v = (value >> (4 * b)) & 0xf;
2360: if (v < 10)
2361: cb.append((char) (v + '0'));
2362: else
2363: cb.append((char) (v - 10 + 'a'));
2364: }
2365:
2366: return cb.close();
2367: }
2368:
2369: /**
2370: * Returns the current filename.
2371: */
2372: public String getFilename() {
2373: return _filename;
2374: }
2375:
2376: /**
2377: * Returns the current line.
2378: */
2379: public int getLine() {
2380: return _line;
2381: }
2382:
2383: /**
2384: * Returns the current column.
2385: */
2386: private int getColumn() {
2387: return 0;
2388: }
2389:
2390: /**
2391: * Returns the opening line of the current node.
2392: */
2393: int getNodeLine() {
2394: if (_elementTop > 0)
2395: return _elementLines[_elementTop - 1];
2396: else
2397: return 1;
2398: }
2399:
2400: /**
2401: * Returns the current public id being read.
2402: */
2403: public String getPublicId() {
2404: if (_reader != null)
2405: return _reader.getPublicId();
2406: else
2407: return _publicId;
2408: }
2409:
2410: /**
2411: * Returns the current system id being read.
2412: */
2413: public String getSystemId() {
2414: if (_reader != null)
2415: return _reader.getSystemId();
2416: else if (_systemId != null)
2417: return _systemId;
2418: else
2419: return _filename;
2420: }
2421:
2422: public void setLine(int line) {
2423: _line = line;
2424: }
2425:
2426: public int getLineNumber() {
2427: return getLine();
2428: }
2429:
2430: public int getColumnNumber() {
2431: return getColumn();
2432: }
2433:
2434: /**
2435: * Adds a string to the current text buffer.
2436: */
2437: private void addText(String s) throws IOException, SAXException {
2438: int len = s.length();
2439:
2440: for (int i = 0; i < len; i++)
2441: addText(s.charAt(i));
2442: }
2443:
2444: /**
2445: * Adds a character to the current text buffer.
2446: */
2447: private void addText(char ch) throws IOException, SAXException {
2448: if (_textLength >= _textCapacity) {
2449: appendText();
2450: }
2451:
2452: if (_textLength > 0 && _textBuffer[_textLength - 1] == '\r') {
2453: _textBuffer[_textLength - 1] = '\n';
2454: if (ch == '\n')
2455: return;
2456: }
2457:
2458: if (_isIgnorableWhitespace && !XmlChar.isWhitespace(ch))
2459: _isIgnorableWhitespace = false;
2460:
2461: _textBuffer[_textLength++] = ch;
2462: }
2463:
2464: /**
2465: * Flushes the text buffer to the SAX callback.
2466: */
2467: private void appendText() throws IOException, SAXException {
2468: if (_textLength > 0) {
2469: if (_activeNode == DOC_NAME) {
2470: if (_isJspText) {
2471: _contentHandler.characters(_textBuffer, 0,
2472: _textLength);
2473: } else if (_isIgnorableWhitespace) {
2474: } else if (_strictXml)
2475: throw error(L.l("expected top element at `{0}'",
2476: new String(_textBuffer, 0, _textLength)));
2477: else {
2478: addChild(TEXT_NAME);
2479: _contentHandler.characters(_textBuffer, 0,
2480: _textLength);
2481: }
2482: } else if (_isJspText) {
2483: _contentHandler.characters(_textBuffer, 0, _textLength);
2484: } else if (_isIgnorableWhitespace) {
2485: if (_isHtml)
2486: _contentHandler.characters(_textBuffer, 0,
2487: _textLength);
2488: else
2489: _contentHandler.ignorableWhitespace(_textBuffer, 0,
2490: _textLength);
2491: } else if (_strictXml && !_isIgnorableWhitespace
2492: && _activeNode == DOC_NAME) {
2493: } else {
2494: if (_isJspText) {
2495: } else if (_isIgnorableWhitespace)
2496: addChild(WHITESPACE_NAME);
2497: else
2498: addChild(TEXT_NAME);
2499: _contentHandler.characters(_textBuffer, 0, _textLength);
2500: }
2501:
2502: _textLength = 0;
2503: _isIgnorableWhitespace = true;
2504: }
2505: }
2506:
2507: private void addElement(String child, boolean isEmpty,
2508: QAttributes attributes, NamespaceMap oldNamespace)
2509: throws IOException, SAXException {
2510: _text.clear();
2511: _text.append(child);
2512: addElement(_policy.getName(_text), isEmpty, attributes,
2513: oldNamespace);
2514: }
2515:
2516: /**
2517: * Adds an element as a child of the current tree. Some
2518: * DTDs, like HTML, will push additional nodes to make
2519: * the tree work, e.g. the body tag.
2520: *
2521: * @param child the new child to be added.
2522: * @param isEmpty true if the tag is already closed.
2523: */
2524: private void addElement(QName child, boolean isEmpty,
2525: QAttributes attributes, NamespaceMap oldNamespace)
2526: throws IOException, SAXException {
2527: if (!_doResinInclude) {
2528: } else if (child.getName() == "include"
2529: && child.getNamespaceURI() == "http://caucho.com/ns/resin/core"
2530: || child.getName() == "resin:include") {
2531: if (!isEmpty)
2532: throw error(L.l("resin:include must be an empty tag"));
2533:
2534: handleResinInclude();
2535: return;
2536: } else if (child.getName() == "include-directory"
2537: && child.getNamespaceURI() == "http://caucho.com/ns/resin/core"
2538: || child.getName() == "resin:include-directory") {
2539: if (!isEmpty)
2540: throw error(L
2541: .l("resin:include-directory must be an empty tag"));
2542:
2543: handleResinIncludeDirectory();
2544: return;
2545: }
2546:
2547: if (_activeNode == DOC_NAME && _hasTopElement && _strictXml)
2548: throw error(L.l(
2549: "expected a single top-level element at `{0}'",
2550: child.getName()));
2551:
2552: _hasTopElement = true;
2553:
2554: String childURI = child.getNamespaceURI();
2555: String childLocal = child.getLocalName();
2556:
2557: if (childURI == null) {
2558: childURI = "";
2559:
2560: if (_isNamespaceAware)
2561: childLocal = child.getName();
2562: else
2563: childLocal = "";
2564: }
2565:
2566: while (true) {
2567: int action = _policy.openAction(this , _activeNode, child);
2568:
2569: switch (action) {
2570: case Policy.IGNORE:
2571: return;
2572:
2573: case Policy.PUSH:
2574: //if (dbg.canWrite())
2575: // dbg.println("<" + child.getNodeName() + ">");
2576:
2577: if (_contentHandler instanceof DOMBuilder)
2578: ((DOMBuilder) _contentHandler).startElement(child,
2579: attributes);
2580: else {
2581: _contentHandler.startElement(childURI, childLocal,
2582: child.getName(), attributes);
2583: }
2584:
2585: if (isEmpty) {
2586: _contentHandler.endElement(childURI, childLocal,
2587: child.getName());
2588:
2589: popNamespaces(oldNamespace);
2590: } else {
2591: if (_elementTop == _elementNames.length) {
2592: int len = _elementNames.length;
2593: QName[] names = new QName[2 * len];
2594: NamespaceMap[] newNamespaces = new NamespaceMap[2 * len];
2595: int[] lines = new int[2 * len];
2596: System.arraycopy(_elementNames, 0, names, 0,
2597: len);
2598: System.arraycopy(_elementLines, 0, lines, 0,
2599: len);
2600: System.arraycopy(_namespaces, 0, newNamespaces,
2601: 0, len);
2602: _elementNames = names;
2603: _elementLines = lines;
2604: _namespaces = newNamespaces;
2605: }
2606: _namespaces[_elementTop] = oldNamespace;
2607: _elementLines[_elementTop] = getLine();
2608: _elementNames[_elementTop] = _activeNode;
2609: _elementTop++;
2610: _activeNode = child;
2611: _isTagStart = true;
2612: }
2613: return;
2614:
2615: case Policy.PUSH_EMPTY:
2616: //if (dbg.canWrite())
2617: // dbg.println("<" + child.getNodeName() + "/>");
2618:
2619: if (_contentHandler instanceof DOMBuilder)
2620: ((DOMBuilder) _contentHandler).startElement(child,
2621: attributes);
2622: else {
2623: _contentHandler.startElement(childURI, childLocal,
2624: child.getName(), attributes);
2625: }
2626:
2627: _contentHandler.endElement(childURI, childLocal, child
2628: .getName());
2629:
2630: popNamespaces(oldNamespace);
2631: return;
2632:
2633: case Policy.PUSH_OPT:
2634: addElement(_policy.getOpt(), false, _nullAttributes,
2635: oldNamespace);
2636: break;
2637:
2638: case Policy.PUSH_VERBATIM:
2639: if (_contentHandler instanceof DOMBuilder)
2640: ((DOMBuilder) _contentHandler).startElement(child,
2641: attributes);
2642: else
2643: _contentHandler.startElement(childURI, childLocal,
2644: child.getName(), attributes);
2645:
2646: scanVerbatim(child.getName());
2647: appendText();
2648: _contentHandler.endElement(childURI, childLocal, child
2649: .getName());
2650: return;
2651:
2652: case Policy.POP:
2653: //if (dbg.canWrite())
2654: // dbg.println("</" + activeNode.getNodeName() + ">");
2655:
2656: popNode();
2657:
2658: if (_activeNode == null)
2659: return;
2660: break;
2661:
2662: default:
2663: throw error(L.l("can't add `{0}' to `{1}'", child
2664: .getName(), _activeNode.getName()));
2665: }
2666: }
2667: }
2668:
2669: /**
2670: * Adds a child node to the current node.
2671: */
2672: private void addChild(QName child) throws IOException, SAXException {
2673: while (_activeNode != null) {
2674: int action = _policy.openAction(this , _activeNode, child);
2675:
2676: switch (action) {
2677: case Policy.IGNORE:
2678: return;
2679:
2680: case Policy.PUSH:
2681: _isTagStart = true;
2682:
2683: case Policy.PUSH_EMPTY:
2684: //if (dbg.canWrite())
2685: // dbg.println("<" + child.getNodeName() + ">");
2686:
2687: /*
2688: if (child.getNodeType() == child.TEXT_NODE) {
2689: String value = child.getNodeValue();
2690: contentHandler.characters(value.toCharArray(), 0, value.length());
2691: }
2692: */
2693: return;
2694:
2695: case Policy.PUSH_OPT:
2696: addElement(_policy.getOpt(), false, _nullAttributes,
2697: _namespaceMap);
2698: break;
2699:
2700: case Policy.PUSH_VERBATIM:
2701: scanVerbatim(child.getName());
2702: return;
2703:
2704: case Policy.POP:
2705: // if (dbg.canWrite())
2706: // dbg.println("</" + activeNode.getNodeName() + ">");
2707:
2708: popNode();
2709: break;
2710: default:
2711: throw error(L.l("cannot add `{0}' to `{1}'", child
2712: .getName(), _activeNode.getName()));
2713: }
2714: }
2715: }
2716:
2717: private void scanVerbatim(String name) throws IOException,
2718: SAXException {
2719: int ch = _reader.read();
2720:
2721: while (ch >= 0) {
2722: if (ch != '<') {
2723: addText((char) ch);
2724: ch = _reader.read();
2725: } else if ((ch = _reader.read()) != '/')
2726: addText('<');
2727: else {
2728: ch = _reader.parseName(_eltName, _reader.read());
2729:
2730: if (!_eltName.matchesIgnoreCase(name)) {
2731: addText("</");
2732: addText(_eltName.toString());
2733: } else if (ch != '>') {
2734: addText("</");
2735: addText(_eltName.toString());
2736: } else {
2737: return;
2738: }
2739: }
2740: }
2741:
2742: throw error(L.l("expected </{0}> at {1}", name, badChar(ch)));
2743: }
2744:
2745: private int skipWhitespace(int ch) throws IOException, SAXException {
2746: while (ch <= 0x20
2747: && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd)) {
2748: ch = read();
2749: }
2750:
2751: return ch;
2752: }
2753:
2754: public void setReader(XmlReader reader) {
2755: _reader = reader;
2756: }
2757:
2758: /**
2759: * Adds text to the macro, escaping attribute values.
2760: */
2761: private void setMacroAttr(String text) throws IOException,
2762: SAXException {
2763: if (_reader != _macro) {
2764: _macro.init(this , _reader);
2765: _reader = _macro;
2766: }
2767:
2768: int j = _macroIndex;
2769: for (int i = 0; i < text.length(); i++) {
2770: int ch = text.charAt(i);
2771:
2772: if (ch == '\'')
2773: _macro.add("'");
2774: else if (ch == '"')
2775: _macro.add(""");
2776: else
2777: _macro.add((char) ch);
2778: }
2779: }
2780:
2781: private void pushInclude(String systemId) throws IOException,
2782: SAXException {
2783: pushInclude(null, systemId);
2784: }
2785:
2786: /**
2787: * Pushes the named file as a lexical include.
2788: *
2789: * @param systemId the name of the file to include.
2790: */
2791: private void pushInclude(String publicId, String systemId)
2792: throws IOException, SAXException {
2793: InputStream stream = openStream(systemId, publicId);
2794: if (stream == null)
2795: throw new FileNotFoundException(systemId);
2796: _is = Vfs.openRead(stream);
2797: Path oldSearchPath = _searchPath;
2798: Path path = _is.getPath();
2799: if (path != null) {
2800: _owner.addDepend(path);
2801:
2802: if (_searchPath != null) {
2803: _searchPath = path.getParent();
2804: _reader.setSearchPath(oldSearchPath);
2805: }
2806: }
2807:
2808: _filename = systemId;
2809: /*
2810: XmlReader nextReader;
2811: if (_reader instanceof Utf8Reader)
2812: nextReader = new Utf8Reader(this, _is);
2813: else {
2814: _is.setEncoding(_reader.getReadStream().getEncoding());
2815: nextReader = new XmlReader(this, _is);
2816: }
2817: _reader = nextReader;
2818: */
2819:
2820: XmlReader oldReader = _reader;
2821: _reader = null;
2822:
2823: _line = 1;
2824:
2825: int ch = parseXMLDeclaration(oldReader);
2826:
2827: XmlReader reader = _reader;
2828:
2829: if (reader instanceof MacroReader)
2830: reader = reader.getNext();
2831:
2832: reader.setSystemId(systemId);
2833: reader.setFilename(systemId);
2834: reader.setPublicId(publicId);
2835: reader.setNext(oldReader);
2836:
2837: unread(ch);
2838: }
2839:
2840: private void popInclude() throws IOException, SAXException {
2841: XmlReader oldReader = _reader;
2842: _reader = _reader.getNext();
2843: oldReader.setNext(null);
2844: _filename = _reader.getFilename();
2845: _line = _reader.getLine();
2846: _is = _reader.getReadStream();
2847: if (_reader.getSearchPath() != null)
2848: _searchPath = _reader.getSearchPath();
2849: }
2850:
2851: private void setMacro(String text) throws IOException, SAXException {
2852: if (_reader == _macro) {
2853: } else if (_macro.getNext() == null) {
2854: _macro.init(this , _reader);
2855: _reader = _macro;
2856: } else {
2857: _macro = new MacroReader();
2858: _macro.init(this , _reader);
2859: _reader = _macro;
2860: }
2861:
2862: _macro.add(text);
2863: }
2864:
2865: private int read() throws IOException, SAXException {
2866: int ch = _reader.read();
2867: while (ch < 0 && _reader.getNext() != null) {
2868: if (_stopOnIncludeEnd)
2869: return -1;
2870:
2871: popInclude();
2872: ch = _reader.read();
2873: }
2874:
2875: return ch;
2876: }
2877:
2878: public void unread(int ch) {
2879: if (ch < 0) {
2880: return;
2881: } else if (_reader == _macro) {
2882: } else if (_macro.getNext() == null) {
2883: _macro.init(this , _reader);
2884: _reader = _macro;
2885: } else {
2886: _macro = new MacroReader();
2887: _macro.init(this , _reader);
2888: _reader = _macro;
2889: }
2890:
2891: _macro.prepend((char) ch);
2892: }
2893:
2894: /**
2895: * Returns an error including the current line.
2896: *
2897: * @param text the error message text.
2898: */
2899: XmlParseException error(String text) {
2900: if (_errorHandler != null) {
2901: SAXParseException e = new SAXParseException(text, _locator);
2902:
2903: try {
2904: _errorHandler.fatalError(e);
2905: } catch (SAXException e1) {
2906: }
2907: }
2908:
2909: return new XmlParseException(_filename, _line, text);
2910: }
2911:
2912: public void free() {
2913: _filename = null;
2914: }
2915:
2916: /**
2917: * Returns a user-readable string for an error character.
2918: */
2919: static String badChar(int ch) {
2920: if (ch < 0 || ch == 0xffff)
2921: return L.l("end of file");
2922: else if (ch == '\n' || ch == '\r')
2923: return L.l("end of line");
2924: else if (ch >= 0x20 && ch <= 0x7f)
2925: return "`" + (char) ch + "'";
2926: else
2927: return "`" + (char) ch + "' (\\u" + hex(ch) + ")";
2928: }
2929:
2930: private void printDebugNode(WriteStream s, Node node, int depth)
2931: throws IOException {
2932: if (node == null)
2933: return;
2934:
2935: for (int i = 0; i < depth; i++)
2936: s.print(' ');
2937:
2938: if (node.getFirstChild() != null) {
2939: s.println("<" + node.getNodeName() + ">");
2940: for (Node child = node.getFirstChild(); child != null; child = child
2941: .getNextSibling()) {
2942: printDebugNode(s, child, depth + 2);
2943: }
2944: for (int i = 0; i < depth; i++)
2945: s.print(' ');
2946: s.println("</" + node.getNodeName() + ">");
2947: } else
2948: s.println("<" + node.getNodeName() + "/>");
2949: }
2950:
2951: public static class LocatorImpl implements ExtendedLocator {
2952: XmlParser _parser;
2953:
2954: LocatorImpl(XmlParser parser) {
2955: _parser = parser;
2956: }
2957:
2958: public String getSystemId() {
2959: if (_parser._reader != null
2960: && _parser._reader.getSystemId() != null)
2961: return _parser._reader.getSystemId();
2962: else if (_parser.getSystemId() != null)
2963: return _parser.getSystemId();
2964: else if (_parser._reader != null
2965: && _parser._reader.getFilename() != null)
2966: return _parser._reader.getFilename();
2967: else if (_parser.getFilename() != null)
2968: return _parser.getFilename();
2969: else
2970: return null;
2971: }
2972:
2973: public String getFilename() {
2974: if (_parser._reader != null
2975: && _parser._reader.getFilename() != null)
2976: return _parser._reader.getFilename();
2977: else if (_parser.getFilename() != null)
2978: return _parser.getFilename();
2979: else if (_parser._reader != null
2980: && _parser._reader.getSystemId() != null)
2981: return _parser._reader.getSystemId();
2982: else if (_parser.getSystemId() != null)
2983: return _parser.getSystemId();
2984: else
2985: return null;
2986: }
2987:
2988: public String getPublicId() {
2989: if (_parser._reader != null)
2990: return _parser._reader.getPublicId();
2991: else
2992: return _parser.getPublicId();
2993: }
2994:
2995: public int getLineNumber() {
2996: if (_parser._reader != null)
2997: return _parser._reader.getLine();
2998: else
2999: return _parser.getLineNumber();
3000: }
3001:
3002: public int getColumnNumber() {
3003: return _parser.getColumnNumber();
3004: }
3005: }
3006: }
|