0001: /*
0002: * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
0003: *
0004: * This file is part of Resin(R) Open Source
0005: *
0006: * Each copy or derived work must preserve the copyright notice and this
0007: * notice unmodified.
0008: *
0009: * Resin Open Source is free software; you can redistribute it and/or modify
0010: * it under the terms of the GNU General Public License as published by
0011: * the Free Software Foundation; either version 2 of the License, or
0012: * (at your option) any later version.
0013: *
0014: * Resin Open Source is distributed in the hope that it will be useful,
0015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
0017: * of NON-INFRINGEMENT. See the GNU General Public License for more
0018: * details.
0019: *
0020: * You should have received a copy of the GNU General Public License
0021: * along with Resin Open Source; if not, write to the
0022: *
0023: * Free Software Foundation, Inc.
0024: * 59 Temple Place, Suite 330
0025: * Boston, MA 02111-1307 USA
0026: *
0027: * @author Scott Ferguson
0028: */
0029:
0030: package com.caucho.xml2;
0031:
0032: import com.caucho.util.CharBuffer;
0033: import com.caucho.vfs.*;
0034: import com.caucho.xml2.readers.MacroReader;
0035: import com.caucho.xml2.readers.Utf16Reader;
0036: import com.caucho.xml2.readers.Utf8Reader;
0037: import com.caucho.xml2.readers.XmlReader;
0038:
0039: import org.w3c.dom.Document;
0040: import org.w3c.dom.Node;
0041: import org.xml.sax.InputSource;
0042: import org.xml.sax.Locator;
0043: import org.xml.sax.SAXException;
0044: import org.xml.sax.SAXParseException;
0045:
0046: import javax.xml.namespace.QName;
0047: import java.io.FileNotFoundException;
0048: import java.io.IOException;
0049: import java.io.InputStream;
0050: import java.util.ArrayList;
0051: import java.util.Arrays;
0052: import java.util.logging.Level;
0053:
0054: /**
0055: * A configurable XML parser. Loose versions of XML and HTML are supported
0056: * by changing the Policy object.
0057: *
0058: * <p>Normally, applications will use Xml, LooseXml, Html, or LooseHtml.
0059: */
0060: public class XmlParser extends AbstractParser {
0061: // Xerces uses the following
0062: public static final String XMLNS = "http://www.w3.org/2000/xmlns/";
0063: public static final String XML = "http://www.w3.org/XML/1998/namespace";
0064:
0065: static final QName DOC_NAME = new QName("#document");
0066: static final QName TEXT_NAME = new QName("#text");
0067: static final QName WHITESPACE_NAME = new QName("#whitespace");
0068:
0069: private static final boolean[] XML_NAME_CHAR;
0070:
0071: QAttributes _attributes;
0072: QAttributes _nullAttributes;
0073:
0074: CharBuffer _text;
0075: CharBuffer _eltName;
0076: CharBuffer _cb;
0077: CharBuffer _buf = new CharBuffer();
0078: String _textFilename;
0079: int _textLine;
0080:
0081: TempCharBuffer _tempInputBuffer;
0082: char[] _inputBuffer;
0083: int _inputOffset;
0084: int _inputLength;
0085:
0086: char[] _textBuffer = new char[1024];
0087: int _textLength;
0088: int _textCapacity = _textBuffer.length;
0089: boolean _isIgnorableWhitespace;
0090:
0091: char[] _valueBuffer = _textBuffer;
0092:
0093: CharBuffer _name = new CharBuffer();
0094: CharBuffer _nameBuffer = new CharBuffer();
0095:
0096: MacroReader _macro = new MacroReader();
0097: int _macroIndex = 0;
0098: int _macroLength = 0;
0099: char[] _macroBuffer;
0100:
0101: int[] _elementLines = new int[64];
0102: int _elementTop;
0103:
0104: ArrayList<SaxIntern.Entry> _attrNames = new ArrayList<SaxIntern.Entry>();
0105: ArrayList<String> _attrValues = new ArrayList<String>();
0106:
0107: ReadStream _is;
0108: XmlReader _reader;
0109:
0110: String _extPublicId;
0111: String _extSystemId;
0112:
0113: NamespaceContextImpl _namespace = new NamespaceContextImpl();
0114: SaxIntern _intern = new SaxIntern(_namespace);;
0115:
0116: QName _activeNode;
0117: QName _topNamespaceNode;
0118: boolean _isTagStart;
0119: boolean _stopOnIncludeEnd;
0120: boolean _hasTopElement;
0121: boolean _hasDoctype;
0122: Locator _locator = new LocatorImpl(this );
0123:
0124: public XmlParser() {
0125: }
0126:
0127: /**
0128: * Creates a new parser with a given parsing policy and dtd.
0129: *
0130: * @param policy the parsing policy, handling optional tags.
0131: * @param dtd the parser's dtd.
0132: */
0133: XmlParser(QDocumentType dtd) {
0134: super (dtd);
0135: }
0136:
0137: /**
0138: * Initialize the parser.
0139: */
0140: void init() {
0141: super .init();
0142:
0143: _attributes = new QAttributes();
0144: _nullAttributes = new QAttributes();
0145: _eltName = new CharBuffer();
0146: _text = new CharBuffer();
0147:
0148: _textLength = 0;
0149: _isIgnorableWhitespace = true;
0150: _elementTop = 0;
0151: _elementLines[0] = 1;
0152:
0153: _line = 1;
0154:
0155: _dtd = null;
0156: _isTagStart = false;
0157: _stopOnIncludeEnd = false;
0158:
0159: _extPublicId = null;
0160: _extSystemId = null;
0161:
0162: _filename = null;
0163: _publicId = null;
0164: _systemId = null;
0165:
0166: _hasTopElement = false;
0167: _hasDoctype = false;
0168:
0169: _macroIndex = 0;
0170: _macroLength = 0;
0171:
0172: _reader = null;
0173:
0174: // _owner = null;
0175: }
0176:
0177: /**
0178: * Parse the document from a read stream.
0179: *
0180: * @param is read stream to parse from.
0181: *
0182: * @return The parsed document.
0183: */
0184: Document parseInt(ReadStream is) throws IOException, SAXException {
0185: _tempInputBuffer = TempCharBuffer.allocate();
0186: _inputBuffer = _tempInputBuffer.getBuffer();
0187: _inputLength = _inputOffset = 0;
0188:
0189: _is = is;
0190:
0191: if (_filename == null && _systemId != null)
0192: _filename = _systemId;
0193: else if (_filename == null)
0194: _filename = _is.getUserPath();
0195:
0196: if (_systemId == null) {
0197: _systemId = _is.getPath().getURL();
0198: if ("null:".equals(_systemId)
0199: || "string:".equals(_systemId))
0200: _systemId = "stream";
0201: }
0202:
0203: if (_filename == null)
0204: _filename = _systemId;
0205:
0206: if (_filename == null)
0207: _filename = "stream";
0208:
0209: if (_dtd != null)
0210: _dtd.setSystemId(_systemId);
0211:
0212: if (_builder != null) {
0213: if (!"string:".equals(_systemId)
0214: && !"stream".equals(_systemId))
0215: _builder.setSystemId(_systemId);
0216: _builder.setFilename(_is.getPath().getURL());
0217: }
0218:
0219: if (_contentHandler == null)
0220: _contentHandler = new org.xml.sax.helpers.DefaultHandler();
0221:
0222: _contentHandler.setDocumentLocator(_locator);
0223:
0224: if (_owner == null)
0225: _owner = new QDocument();
0226: if (_defaultEncoding != null)
0227: _owner.setAttribute("encoding", _defaultEncoding);
0228: _owner.addDepend(is.getPath());
0229:
0230: _activeNode = DOC_NAME;
0231:
0232: _contentHandler.startDocument();
0233:
0234: parseXMLDeclaration(null);
0235:
0236: parseNode();
0237:
0238: /*
0239: if (dbg.canWrite()) {
0240: printDebugNode(dbg, doc, 0);
0241: dbg.flush();
0242: }
0243: */
0244:
0245: if (!_hasTopElement)
0246: throw error(L
0247: .l("XML file has no top-element. All well-formed XML files have a single top-level element."));
0248:
0249: _contentHandler.endDocument();
0250:
0251: QDocument owner = _owner;
0252: _owner = null;
0253:
0254: return owner;
0255: }
0256:
0257: /**
0258: * The main dispatch loop.
0259: *
0260: * @param node the current node
0261: * @param ch the next character
0262: */
0263: private void parseNode() throws IOException, SAXException {
0264: char[] valueBuffer = _valueBuffer;
0265: int valueLength = valueBuffer.length;
0266: int valueOffset = 0;
0267: boolean isWhitespace = true;
0268:
0269: char[] inputBuffer = _inputBuffer;
0270: int inputLength = _inputLength;
0271: int inputOffset = _inputOffset;
0272:
0273: loop: while (true) {
0274: int ch;
0275:
0276: if (inputOffset < inputLength)
0277: ch = inputBuffer[inputOffset++];
0278: else if (fillBuffer()) {
0279: inputBuffer = _inputBuffer;
0280: inputOffset = _inputOffset;
0281: inputLength = _inputLength;
0282:
0283: ch = inputBuffer[inputOffset++];
0284: } else {
0285: if (valueOffset > 0)
0286: addText(valueBuffer, 0, valueOffset, isWhitespace);
0287:
0288: _inputOffset = inputOffset;
0289: _inputLength = inputLength;
0290:
0291: close();
0292: return;
0293: }
0294:
0295: switch (ch) {
0296: case '\n':
0297: _line++;
0298: valueBuffer[valueOffset++] = (char) ch;
0299: break;
0300:
0301: case ' ':
0302: case '\t':
0303: case '\r':
0304: valueBuffer[valueOffset++] = (char) ch;
0305: break;
0306:
0307: case 0xffff:
0308: // marker for end of text for serialization (?)
0309: if (valueOffset > 0)
0310: addText(valueBuffer, 0, valueOffset, isWhitespace);
0311:
0312: _inputOffset = inputOffset;
0313: _inputLength = inputLength;
0314: return;
0315:
0316: case '&':
0317: if (valueOffset > 0)
0318: addText(valueBuffer, 0, valueOffset, isWhitespace);
0319:
0320: _inputOffset = inputOffset;
0321: _inputLength = inputLength;
0322:
0323: parseEntityReference();
0324:
0325: inputOffset = _inputOffset;
0326: inputLength = _inputOffset;
0327: break;
0328:
0329: case '<':
0330: if (valueOffset > 0)
0331: addText(valueBuffer, 0, valueOffset, isWhitespace);
0332:
0333: _inputOffset = inputOffset;
0334: _inputLength = inputLength;
0335:
0336: ch = read();
0337:
0338: if (ch == '/') {
0339: SaxIntern.Entry entry = parseName(0, false);
0340:
0341: ch = read();
0342:
0343: if (ch != '>') {
0344: throw error(L
0345: .l(
0346: "'</{0}>' expected '>' at {1}. Closing tags must close immediately after the tag name.",
0347: entry.getName(), badChar(ch)));
0348: }
0349:
0350: _namespace.pop(entry);
0351: }
0352: // element: <tag attr=value ... attr=value> ...
0353: else if (XmlChar.isNameStart(ch)) {
0354: parseElement(ch);
0355: ch = read();
0356: }
0357: // <! ...
0358: else if (ch == '!') {
0359: // <![CDATA[ ... ]]>
0360: if ((ch = read()) == '[') {
0361: parseCdata();
0362: ch = read();
0363: }
0364: // <!-- ... -->
0365: else if (ch == '-') {
0366: parseComment();
0367:
0368: ch = read();
0369: } else if (XmlChar.isNameStart(ch)) {
0370: unread(ch);
0371:
0372: SaxIntern.Entry entry = parseName(0, false);
0373:
0374: String declName = entry.getName();
0375: if (declName.equals("DOCTYPE")) {
0376: parseDoctype();
0377: if (_contentHandler instanceof DOMBuilder)
0378: ((DOMBuilder) _contentHandler)
0379: .dtd(_dtd);
0380: } else
0381: throw error(L
0382: .l(
0383: "expected '<!DOCTYPE' declaration at {0}",
0384: declName));
0385: } else
0386: throw error(L
0387: .l(
0388: "expected '<!DOCTYPE' declaration at {0}",
0389: badChar(ch)));
0390: }
0391: // PI: <?tag attr=value ... attr=value?>
0392: else if (ch == '?') {
0393: parsePI();
0394: } else {
0395: throw error(L
0396: .l(
0397: "expected tag name after '<' at {0}. Open tag names must immediately follow the open brace like '<foo ...>'",
0398: badChar(ch)));
0399: }
0400:
0401: inputOffset = _inputOffset;
0402: inputLength = _inputLength;
0403: break;
0404:
0405: default:
0406: isWhitespace = false;
0407: valueBuffer[valueOffset++] = (char) ch;
0408: break;
0409: }
0410:
0411: if (valueOffset == valueLength) {
0412: addText(valueBuffer, 0, valueOffset, isWhitespace);
0413:
0414: valueOffset = 0;
0415: }
0416: }
0417: }
0418:
0419: /**
0420: * Parses the <!DOCTYPE> declaration.
0421: */
0422: private void parseDoctype() throws IOException, SAXException {
0423: if (_activeNode != DOC_NAME)
0424: throw error(L
0425: .l("<!DOCTYPE immediately follow the <?xml ...?> declaration."));
0426:
0427: int ch = skipWhitespace(read());
0428: ch = _reader.parseName(_nameBuffer, ch);
0429: String name = _nameBuffer.toString();
0430: ch = skipWhitespace(ch);
0431:
0432: if (_dtd == null)
0433: _dtd = new QDocumentType(name);
0434:
0435: _dtd.setName(name);
0436:
0437: if (XmlChar.isNameStart(ch)) {
0438: ch = parseExternalID(ch);
0439: ch = skipWhitespace(ch);
0440:
0441: _dtd._publicId = _extPublicId;
0442: _dtd._systemId = _extSystemId;
0443: }
0444:
0445: if (_dtd._systemId != null && !_dtd._systemId.equals("")) {
0446: InputStream is = null;
0447:
0448: unread(ch);
0449:
0450: XmlReader oldReader = _reader;
0451: boolean hasInclude = false;
0452:
0453: try {
0454: pushInclude(_extPublicId, _extSystemId);
0455: hasInclude = true;
0456: } catch (Exception e) {
0457: if (log.isLoggable(Level.FINEST))
0458: log.log(Level.FINER, e.toString(), e);
0459: else
0460: log.finer(e.toString());
0461: }
0462:
0463: if (hasInclude) {
0464: _stopOnIncludeEnd = true;
0465: try {
0466: DtdParser dtdParser = new DtdParser(this , _dtd);
0467: ch = dtdParser.parseDoctypeDecl(_dtd);
0468: } catch (XmlParseException e) {
0469: if (_extSystemId != null
0470: && _extSystemId.startsWith("http")) {
0471: log.log(Level.FINE, e.toString(), e);
0472: } else
0473: throw e;
0474: }
0475: _stopOnIncludeEnd = false;
0476:
0477: while (_reader != null && _reader != oldReader)
0478: popInclude();
0479: }
0480:
0481: if (_reader != null)
0482: ch = skipWhitespace(read());
0483: }
0484:
0485: if (ch == '[') {
0486: DtdParser dtdParser = new DtdParser(this , _dtd);
0487: ch = dtdParser.parseDoctypeDecl(_dtd);
0488: }
0489:
0490: ch = skipWhitespace(ch);
0491:
0492: if (ch != '>')
0493: throw error(L.l("expected '>' in <!DOCTYPE at {0}",
0494: badChar(ch)));
0495: }
0496:
0497: /**
0498: * Parses an element.
0499: *
0500: * @param ch the current character
0501: */
0502: private void parseElement(int ch) throws IOException, SAXException {
0503: unread(ch);
0504:
0505: SaxIntern.Entry entry = parseName(0, false);
0506:
0507: _namespace.push(entry);
0508:
0509: ch = read();
0510:
0511: if (ch != '>' && ch != '/') {
0512: ch = parseAttributes(ch, true);
0513: } else
0514: _attributes.clear();
0515:
0516: QName qName = entry.getQName();
0517:
0518: if (_isValidating && _dtd != null) {
0519: QElementDef elementDef = _dtd.getElement(qName
0520: .getLocalPart());
0521:
0522: if (elementDef != null)
0523: elementDef.fillDefaults(_attributes);
0524: }
0525:
0526: _contentHandler.startElement(entry.getUri(), entry
0527: .getLocalName(), entry.getName(), _attributes);
0528:
0529: _hasTopElement = true;
0530:
0531: if (ch == '/') {
0532: // empty tag: <foo/>
0533: if ((ch = read()) == '>') {
0534: _contentHandler.endElement(entry.getUri(), entry
0535: .getLocalName(), entry.getName());
0536:
0537: _namespace.pop(entry);
0538: }
0539: // short tag: </foo/some text here/>
0540: else {
0541: throw error(L
0542: .l(
0543: "unexpected character {0} after '/', expected '/>'",
0544: badChar(ch), entry.getName()));
0545: }
0546: } else if (ch != '>') {
0547: throw error(L
0548: .l(
0549: "unexpected character {0} while parsing '{1}' attributes. Expected an attribute name or '>' or '/>'. XML element syntax is:\n <name attr-1=\"value-1\" ... attr-n=\"value-n\">",
0550: badChar(ch), entry.getName()));
0551: }
0552: }
0553:
0554: /**
0555: * Parses the attributes in an element.
0556: *
0557: * @param ch the next character to reader.read.
0558: *
0559: * @return the next character to read.
0560: */
0561: private int parseAttributes(int ch, boolean isElement)
0562: throws IOException, SAXException {
0563: _attributes.clear();
0564:
0565: _attrNames.clear();
0566: _attrValues.clear();
0567:
0568: while (ch != -1) {
0569: boolean hasWhitespace = false;
0570:
0571: while (ch <= 0x20
0572: && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) {
0573: hasWhitespace = true;
0574: ch = read();
0575: }
0576:
0577: if (!XmlChar.isNameStart(ch)) {
0578: break;
0579: }
0580:
0581: if (!hasWhitespace)
0582: throw error(L
0583: .l("attributes must be separated by whitespace"));
0584:
0585: hasWhitespace = false;
0586:
0587: unread(ch);
0588:
0589: SaxIntern.Entry entry = parseName(0, true);
0590:
0591: ch = read();
0592:
0593: while (ch <= 0x20
0594: && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) {
0595: ch = read();
0596: }
0597:
0598: String value = null;
0599:
0600: if (ch != '=') {
0601: throw error(L
0602: .l(
0603: "attribute '{0}' expects value at {1}. XML requires attributes to have explicit values.",
0604: entry.getName(), badChar(ch)));
0605: }
0606:
0607: ch = read();
0608:
0609: while (ch <= 0x20
0610: && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) {
0611: ch = read();
0612: }
0613:
0614: value = parseValue(ch);
0615:
0616: ch = read();
0617:
0618: if (entry.isXmlns()) {
0619: String prefix;
0620:
0621: if (entry.getPrefix() != null)
0622: prefix = entry.getLocalName();
0623: else
0624: prefix = "";
0625:
0626: String uri = value;
0627:
0628: if (_isXmlnsPrefix) {
0629: _contentHandler.startPrefixMapping(prefix, uri);
0630: }
0631:
0632: // needed for xml/032e
0633: if (isElement && _isXmlnsAttribute) {
0634: _attributes.add(entry.getQName(), uri);
0635: }
0636: } else {
0637: _attrNames.add(entry);
0638: _attrValues.add(value);
0639: }
0640: }
0641:
0642: int len = _attrNames.size();
0643: for (int i = 0; i < len; i++) {
0644: SaxIntern.Entry attrEntry = _attrNames.get(i);
0645: String value = _attrValues.get(i);
0646:
0647: QName name = attrEntry.getQName();
0648:
0649: _attributes.add(name, value);
0650: }
0651:
0652: return ch;
0653: }
0654:
0655: /**
0656: * Parses an entity reference:
0657: *
0658: * <pre>
0659: * er ::= &#d+;
0660: * ::= &name;
0661: * </pre>
0662: */
0663: private int parseEntityReference() throws IOException, SAXException {
0664: int ch;
0665:
0666: ch = read();
0667:
0668: // character reference
0669: if (ch == '#') {
0670: addText((char) parseCharacterReference());
0671:
0672: return read();
0673: }
0674: // entity reference
0675: else if (XmlChar.isNameStart(ch)) {
0676: ch = _reader.parseName(_buf, ch);
0677:
0678: if (ch != ';' && _strictXml)
0679: throw error(L
0680: .l(
0681: "'&{0};' expected ';' at {0}. Entity references have a '&name;' syntax.",
0682: _buf, badChar(ch)));
0683: else if (ch != ';') {
0684: addText('&');
0685: addText(_buf.toString());
0686: return ch;
0687: }
0688:
0689: addEntityReference(_buf.toString());
0690:
0691: ch = read();
0692:
0693: return ch;
0694: } else if (_strictXml) {
0695: throw error(L.l("expected name at {0}", badChar(ch)));
0696: } else {
0697: addText('&');
0698: return ch;
0699: }
0700: }
0701:
0702: private int parseCharacterReference() throws IOException,
0703: SAXException {
0704: int ch = read();
0705:
0706: int radix = 10;
0707: if (ch == 'x') {
0708: radix = 16;
0709: ch = read();
0710: }
0711:
0712: int value = 0;
0713: for (; ch != ';'; ch = read()) {
0714: if (ch >= '0' && ch <= '9')
0715: value = radix * value + ch - '0';
0716: else if (radix == 16 && ch >= 'a' && ch <= 'f')
0717: value = radix * value + ch - 'a' + 10;
0718: else if (radix == 16 && ch >= 'A' && ch <= 'F')
0719: value = radix * value + ch - 'A' + 10;
0720: else
0721: throw error(L.l("malformed entity ref at {0}",
0722: badChar(ch)));
0723: }
0724:
0725: if (value > 0xffff)
0726: throw error(L.l("malformed entity ref at {0}", "" + value));
0727:
0728: // xml/0072
0729: if (_strictCharacters && !isChar(value))
0730: throw error(L.l("illegal character ref at {0}",
0731: badChar(value)));
0732:
0733: return value;
0734: }
0735:
0736: /**
0737: * Looks up a named entity reference, filling the text.
0738: */
0739: private void addEntityReference(String name) throws IOException,
0740: SAXException {
0741: boolean expand = !_entitiesAsText || _hasDoctype;
0742: // XXX: not quite the right logic. There should be a soft expandEntities
0743:
0744: if (!expand) {
0745: addText("&" + name + ";");
0746: return;
0747: }
0748:
0749: int ch = _entities.getEntity(name);
0750: if (ch >= 0 && ch <= 0xffff) {
0751: addText((char) ch);
0752: return;
0753: }
0754:
0755: QEntity entity = _dtd == null ? null : _dtd.getEntity(name);
0756:
0757: if (!_expandEntities) {
0758: addText("&" + name + ";");
0759: return;
0760: }
0761:
0762: if (entity == null
0763: && (_dtd == null || _dtd.getName() == null || !_dtd
0764: .isExternal())) {
0765: throw error(L
0766: .l(
0767: "'&{0};' is an unknown entity. XML predefines only '<', '&', '>', ''' and '"'. All other entities must be defined in an <!ENTITY> definition in the DTD.",
0768: name));
0769: } else if (entity != null) {
0770: if (entity._isSpecial && entity._value != null)
0771: addText(entity._value);
0772: else if (entity.getSystemId() != null) {
0773: if (pushSystemEntity(entity)) {
0774: }
0775: /* XXX:??
0776: else if (strictXml) {
0777: throw error(L.l("can't open external entity at '&{0};'", name));
0778: }
0779: */
0780: else if (_contentHandler instanceof DOMBuilder) {
0781: ((DOMBuilder) _contentHandler)
0782: .entityReference(name);
0783: } else
0784: addText("&" + name + ";");
0785: } else if (expand && entity._value != null)
0786: setMacro(entity._value);
0787: else
0788: addText("&" + name + ";");
0789: } else {
0790: if (_contentHandler instanceof DOMBuilder) {
0791: ((DOMBuilder) _contentHandler).entityReference(name);
0792: } else
0793: // XXX: error?
0794: addText("&" + name + ";");
0795: }
0796: }
0797:
0798: private boolean pushSystemEntity(QEntity entity)
0799: throws IOException, SAXException {
0800: String publicId = entity.getPublicId();
0801: String systemId = entity.getSystemId();
0802: String value = null;
0803: InputSource source = null;
0804: ReadStream is = null;
0805:
0806: if (_entityResolver != null)
0807: source = _entityResolver.resolveEntity(publicId, systemId);
0808:
0809: if (source != null && source.getByteStream() != null)
0810: is = Vfs.openRead(source.getByteStream());
0811: else if (source != null && source.getCharacterStream() != null)
0812: is = Vfs.openRead(source.getCharacterStream());
0813: else if (source != null && source.getSystemId() != null
0814: && _searchPath.lookup(source.getSystemId()).isFile()) {
0815: _owner.addDepend(_searchPath.lookup(source.getSystemId()));
0816: is = _searchPath.lookup(source.getSystemId()).openRead();
0817: } else if (systemId != null && !systemId.equals("")) {
0818: String path = systemId;
0819: if (path.startsWith("file:"))
0820: path = path.substring(5);
0821: if (_searchPath.lookup(path).isFile()) {
0822: _owner.addDepend(_searchPath.lookup(path));
0823: is = _searchPath.lookup(path).openRead();
0824: }
0825: }
0826:
0827: if (is == null)
0828: return false;
0829:
0830: _filename = systemId;
0831: _systemId = systemId;
0832:
0833: Path oldSearchPath = _searchPath;
0834: Path path = is.getPath();
0835: if (path != null) {
0836: _owner.addDepend(path);
0837:
0838: if (_searchPath != null) {
0839: _searchPath = path.getParent();
0840: _reader.setSearchPath(oldSearchPath);
0841: }
0842: }
0843:
0844: _is = is;
0845: _line = 1;
0846:
0847: XmlReader oldReader = _reader;
0848: _reader = null;
0849:
0850: parseXMLDeclaration(oldReader);
0851:
0852: return true;
0853: }
0854:
0855: private boolean isAttributeChar(int ch) {
0856: switch (ch) {
0857: case ' ':
0858: case '\t':
0859: case '\n':
0860: case '\r':
0861: return false;
0862: case '<':
0863: case '>':
0864: case '\'':
0865: case '"':
0866: case '=':
0867: return false;
0868: default:
0869: return true;
0870: }
0871: }
0872:
0873: private int parsePI() throws IOException, SAXException {
0874: int ch;
0875:
0876: ch = read();
0877: if (!XmlChar.isNameStart(ch))
0878: throw error(L
0879: .l(
0880: "expected name after '<?' at {0}. Processing instructions expect a name like <?foo ... ?>",
0881: badChar(ch)));
0882: ch = _reader.parseName(_text, ch);
0883:
0884: String piName = _text.toString();
0885: if (!piName.equals("xml"))
0886: return parsePITail(piName, ch);
0887: else {
0888: throw error(L
0889: .l("<?xml ... ?> occurs after content. The <?xml ... ?> prolog must be at the document start."));
0890:
0891: }
0892: }
0893:
0894: private int parsePITail(String piName, int ch) throws IOException,
0895: SAXException {
0896: ch = skipWhitespace(ch);
0897:
0898: _text.clear();
0899: while (ch != -1) {
0900: if (ch == '?') {
0901: if ((ch = read()) == '>')
0902: break;
0903: else
0904: _text.append('?');
0905: } else {
0906: _text.append((char) ch);
0907: ch = read();
0908: }
0909: }
0910:
0911: _contentHandler.processingInstruction(piName, _text.toString());
0912:
0913: return read();
0914: }
0915:
0916: /**
0917: * Parses a comment. The "<!--" has already been read.
0918: */
0919: private void parseComment() throws IOException, SAXException {
0920: int ch = read();
0921:
0922: if (ch != '-')
0923: throw error(L.l("expected comment at {0}", badChar(ch)));
0924:
0925: ch = read();
0926:
0927: if (!_skipComments)
0928: _buf.clear();
0929:
0930: comment: while (ch != -1) {
0931: if (ch == '-') {
0932: ch = read();
0933:
0934: while (ch == '-') {
0935: if ((ch = read()) == '>')
0936: break comment;
0937: else if (_strictComments)
0938: throw error(L.l("XML forbids '--' in comments"));
0939: else if (ch == '-') {
0940: if (!_skipComments)
0941: _buf.append('-');
0942: } else {
0943: if (!_skipComments)
0944: _buf.append("--");
0945: break;
0946: }
0947: }
0948:
0949: _buf.append('-');
0950: } else if (!XmlChar.isChar(ch)) {
0951: throw error(L.l("bad character {0}", hex(ch)));
0952: } else {
0953: _buf.append((char) ch);
0954: ch = read();
0955: }
0956: }
0957:
0958: if (_skipComments) {
0959: } else if (_contentHandler instanceof XMLWriter
0960: && !_skipComments) {
0961: ((XMLWriter) _contentHandler).comment(_buf.toString());
0962: _isIgnorableWhitespace = true;
0963: } else if (_lexicalHandler != null) {
0964: _lexicalHandler.comment(_buf.getBuffer(), 0, _buf
0965: .getLength());
0966: _isIgnorableWhitespace = true;
0967: }
0968: }
0969:
0970: /**
0971: * Parses the contents of a cdata section.
0972: *
0973: * <pre>
0974: * cdata ::= <![CDATA[ ... ]]>
0975: * </pre>
0976: */
0977: private void parseCdata() throws IOException, SAXException {
0978: int ch;
0979:
0980: if ((ch = read()) != 'C' || (ch = read()) != 'D'
0981: || (ch = read()) != 'A' || (ch = read()) != 'T'
0982: || (ch = read()) != 'A' || (ch = read()) != '[') {
0983: throw error(L.l("expected '<![CDATA[' at {0}", badChar(ch)));
0984: }
0985:
0986: ch = read();
0987:
0988: if (_lexicalHandler != null) {
0989: _lexicalHandler.startCDATA();
0990: }
0991:
0992: cdata: while (ch != -1) {
0993: if (ch == ']') {
0994: ch = read();
0995:
0996: while (ch == ']') {
0997: if ((ch = read()) == '>')
0998: break cdata;
0999: else if (ch == ']')
1000: addText(']');
1001: else {
1002: addText(']');
1003: break;
1004: }
1005: }
1006:
1007: addText(']');
1008: } else if (_strictCharacters && !isChar(ch)) {
1009: throw error(L.l("expected character in cdata at {0}",
1010: badChar(ch)));
1011: } else {
1012: addText((char) ch);
1013: ch = read();
1014: }
1015: }
1016:
1017: if (_lexicalHandler != null) {
1018: _lexicalHandler.endCDATA();
1019: }
1020: }
1021:
1022: /**
1023: * Expands the macro value of a PE reference.
1024: */
1025: private void addPEReference(CharBuffer value, String name)
1026: throws IOException, SAXException {
1027: QEntity entity = _dtd.getParameterEntity(name);
1028:
1029: if (entity == null && !_dtd.isExternal())
1030: throw error(L
1031: .l(
1032: "'%{0};' is an unknown parameter entity. Parameter entities must be defined in an <!ENTITY> declaration before use.",
1033: name));
1034: else if (entity != null && entity._value != null) {
1035: setMacro(entity._value);
1036: } else if (entity != null && entity.getSystemId() != null) {
1037: pushInclude(entity.getPublicId(), entity.getSystemId());
1038: } else {
1039: value.append("%");
1040: value.append(name);
1041: value.append(";");
1042: }
1043: }
1044:
1045: private static String toAttrDefault(CharBuffer text) {
1046: for (int i = 0; i < text.length(); i++) {
1047: int ch = text.charAt(i);
1048:
1049: if (ch == '"') {
1050: text.delete(i, i + 1);
1051: text.insert(i, """);
1052: i--;
1053: } else if (ch == '\'') {
1054: text.delete(i, i + 1);
1055: text.insert(i, "'");
1056: i--;
1057: }
1058: }
1059:
1060: return text.toString();
1061: }
1062:
1063: /**
1064: * externalID ::= PUBLIC publicId systemId
1065: * ::= SYSTEM systemId
1066: */
1067: private int parseExternalID(int ch) throws IOException,
1068: SAXException {
1069: ch = _reader.parseName(_text, ch);
1070: String key = _text.toString();
1071: ch = skipWhitespace(ch);
1072:
1073: _extSystemId = null;
1074: _extPublicId = null;
1075: if (key.equals("PUBLIC")) {
1076: _extPublicId = parseValue(ch);
1077: ch = skipWhitespace(read());
1078:
1079: if (_extPublicId.indexOf('&') > 0)
1080: throw error(L
1081: .l(
1082: "Illegal character '&' in PUBLIC identifier '{0}'",
1083: _extPublicId));
1084:
1085: _extSystemId = parseValue(ch);
1086: ch = skipWhitespace(read());
1087: } else if (key.equals("SYSTEM")) {
1088: _extSystemId = parseValue(ch);
1089: ch = read();
1090: } else
1091: throw error(L.l("expected PUBLIC or SYSTEM at '{0}'", key));
1092:
1093: return ch;
1094: }
1095:
1096: /**
1097: * Parses an attribute value.
1098: *
1099: * <pre>
1100: * value ::= '[^']*'
1101: * ::= "[^"]*"
1102: * ::= [^ />]*
1103: * </pre>
1104: *
1105: * @param value the CharBuffer which will contain the value.
1106: * @param ch the next character from the input stream.
1107: * @param isGeneral true if general entities are allowed.
1108: *
1109: * @return the following character from the input stream
1110: */
1111: private String parseValue(int ch) throws IOException, SAXException {
1112: int end = ch;
1113:
1114: char[] valueBuffer = _valueBuffer;
1115: int valueLength = 0;
1116:
1117: if (end != '\'' && end != '"') {
1118: valueBuffer[valueLength++] = (char) end;
1119: for (ch = read(); ch >= 0 && XmlChar.isNameChar(ch); ch = read()) {
1120: valueBuffer[valueLength++] = (char) ch;
1121: }
1122:
1123: String value = new String(valueBuffer, 0, valueLength);
1124:
1125: throw error(L
1126: .l(
1127: "XML attribute value must be quoted at '{0}'. XML attribute syntax is either attr=\"value\" or attr='value'.",
1128: value));
1129: }
1130:
1131: ch = read();
1132:
1133: while (ch >= 0 && ch != end) {
1134: if (ch == '&') {
1135: if ((ch = read()) == '#') {
1136: valueBuffer[valueLength++] = (char) parseCharacterReference();
1137: } else if (XmlChar.isNameStart(ch)) {
1138: ch = _reader.parseName(_buf, ch);
1139: String name = _buf.toString();
1140:
1141: if (ch != ';')
1142: throw error(L.l("expected '{0}' at {1}", ";",
1143: badChar(ch)));
1144: else {
1145: int lookup = _entities.getEntity(name);
1146:
1147: if (lookup >= 0 && lookup <= 0xffff) {
1148: ch = read();
1149: valueBuffer[valueLength++] = (char) lookup;
1150: continue;
1151: }
1152:
1153: QEntity entity = _dtd == null ? null : _dtd
1154: .getEntity(name);
1155: if (entity != null && entity._value != null)
1156: setMacroAttr(entity._value);
1157: else
1158: throw error(L
1159: .l(
1160: "expected local reference at '&{0};'",
1161: name));
1162: }
1163: }
1164: } else {
1165: if (ch == '\r') {
1166: ch = read();
1167: if (ch != '\n') {
1168: valueBuffer[valueLength++] = '\n';
1169: continue;
1170: }
1171: }
1172:
1173: valueBuffer[valueLength++] = (char) ch;
1174: }
1175:
1176: ch = read();
1177: }
1178:
1179: return new String(valueBuffer, 0, valueLength);
1180: }
1181:
1182: private boolean isWhitespace(int ch) {
1183: return ch <= 0x20
1184: && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd);
1185: }
1186:
1187: private boolean isChar(int ch) {
1188: return (ch >= 0x20 && ch <= 0xd7ff || ch == 0x9 || ch == 0xa
1189: || ch == 0xd || ch >= 0xe000 && ch <= 0xfffd);
1190: }
1191:
1192: /**
1193: * Returns the hex representation of a byte.
1194: */
1195: private static String hex(int value) {
1196: CharBuffer cb = CharBuffer.allocate();
1197:
1198: for (int b = 3; b >= 0; b--) {
1199: int v = (value >> (4 * b)) & 0xf;
1200: if (v < 10)
1201: cb.append((char) (v + '0'));
1202: else
1203: cb.append((char) (v - 10 + 'a'));
1204: }
1205:
1206: return cb.close();
1207: }
1208:
1209: /**
1210: * Returns the current filename.
1211: */
1212: public String getFilename() {
1213: return _filename;
1214: }
1215:
1216: /**
1217: * Returns the current line.
1218: */
1219: public int getLine() {
1220: return _line;
1221: }
1222:
1223: /**
1224: * Returns the current column.
1225: */
1226: int getColumn() {
1227: return -1;
1228: }
1229:
1230: /**
1231: * Returns the opening line of the current node.
1232: */
1233: int getNodeLine() {
1234: if (_elementTop > 0)
1235: return _elementLines[_elementTop - 1];
1236: else
1237: return 1;
1238: }
1239:
1240: /**
1241: * Returns the current public id being read.
1242: */
1243: public String getPublicId() {
1244: if (_reader != null)
1245: return _reader.getPublicId();
1246: else
1247: return _publicId;
1248: }
1249:
1250: /**
1251: * Returns the current system id being read.
1252: */
1253: public String getSystemId() {
1254: if (_reader != null)
1255: return _reader.getSystemId();
1256: else if (_systemId != null)
1257: return _systemId;
1258: else
1259: return _filename;
1260: }
1261:
1262: public void setLine(int line) {
1263: _line = line;
1264: }
1265:
1266: public int getLineNumber() {
1267: return getLine();
1268: }
1269:
1270: public int getColumnNumber() {
1271: return getColumn();
1272: }
1273:
1274: /**
1275: * Adds a string to the current text buffer.
1276: */
1277: private void addText(String s) throws IOException, SAXException {
1278: int len = s.length();
1279:
1280: for (int i = 0; i < len; i++)
1281: addText(s.charAt(i));
1282: }
1283:
1284: /**
1285: * Adds a character to the current text buffer.
1286: */
1287: private void addText(char ch) throws IOException, SAXException {
1288: if (_textLength > 0 && _textBuffer[_textLength - 1] == '\r') {
1289: _textBuffer[_textLength - 1] = '\n';
1290: if (ch == '\n')
1291: return;
1292: }
1293:
1294: if (_isIgnorableWhitespace && !XmlChar.isWhitespace(ch))
1295: _isIgnorableWhitespace = false;
1296:
1297: _textBuffer[_textLength++] = ch;
1298: }
1299:
1300: /**
1301: * Flushes the text buffer to the SAX callback.
1302: */
1303: private void addText(char[] buffer, int offset, int length,
1304: boolean isWhitespace) throws IOException, SAXException {
1305: if (length <= 0)
1306: return;
1307:
1308: if (_namespace.getDepth() == 1) {
1309: if (!isWhitespace) {
1310: throw error(L.l("expected top element at '{0}'",
1311: new String(buffer, offset, length)));
1312: } else {
1313: _contentHandler.ignorableWhitespace(buffer, offset,
1314: length);
1315: }
1316: } else
1317: _contentHandler.characters(buffer, offset, length);
1318: }
1319:
1320: /**
1321: * Parses a name.
1322: */
1323: private SaxIntern.Entry parseName(int offset, boolean isAttribute)
1324: throws IOException {
1325: char[] inputBuf = _inputBuffer;
1326: int inputLength = _inputLength;
1327: int inputOffset = _inputOffset;
1328:
1329: char[] valueBuf = _valueBuffer;
1330: int valueLength = offset;
1331:
1332: int colon = 0;
1333:
1334: while (true) {
1335: if (inputOffset < inputLength) {
1336: char ch = inputBuf[inputOffset++];
1337:
1338: if (XML_NAME_CHAR[ch]) {
1339: valueBuf[valueLength++] = ch;
1340: } else if (ch == ':') {
1341: if (colon <= 0)
1342: colon = valueLength;
1343:
1344: valueBuf[valueLength++] = ch;
1345: } else {
1346: _inputOffset = inputOffset - 1;
1347:
1348: return _intern.add(valueBuf, offset, valueLength
1349: - offset, colon, isAttribute);
1350: }
1351: } else if (fillBuffer()) {
1352: inputLength = _inputLength;
1353: inputOffset = 0;
1354: } else {
1355: return _intern.add(valueBuf, offset, valueLength
1356: - offset, colon, isAttribute);
1357: }
1358: }
1359: }
1360:
1361: final int skipWhitespace(int ch) throws IOException, SAXException {
1362: while (ch <= 0x20
1363: && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd)) {
1364: ch = read();
1365: }
1366:
1367: return ch;
1368: }
1369:
1370: public void setReader(XmlReader reader) {
1371: _reader = reader;
1372: }
1373:
1374: /**
1375: * Adds text to the macro, escaping attribute values.
1376: */
1377: void setMacroAttr(String text) throws IOException, SAXException {
1378: if (_reader != _macro) {
1379: _macro.init(this , _reader);
1380: _reader = _macro;
1381: }
1382:
1383: int j = _macroIndex;
1384: for (int i = 0; i < text.length(); i++) {
1385: int ch = text.charAt(i);
1386:
1387: if (ch == '\'')
1388: _macro.add("'");
1389: else if (ch == '"')
1390: _macro.add(""");
1391: else
1392: _macro.add((char) ch);
1393: }
1394: }
1395:
1396: void pushInclude(String systemId) throws IOException, SAXException {
1397: pushInclude(null, systemId);
1398: }
1399:
1400: /**
1401: * Pushes the named file as a lexical include.
1402: *
1403: * @param systemId the name of the file to include.
1404: */
1405: void pushInclude(String publicId, String systemId)
1406: throws IOException, SAXException {
1407: InputStream stream = openStream(systemId, publicId);
1408: if (stream == null)
1409: throw new FileNotFoundException(systemId);
1410: _is = Vfs.openRead(stream);
1411: Path oldSearchPath = _searchPath;
1412: Path path = _is.getPath();
1413: if (path != null) {
1414: _owner.addDepend(path);
1415:
1416: if (_searchPath != null) {
1417: _searchPath = path.getParent();
1418: _reader.setSearchPath(oldSearchPath);
1419: }
1420: }
1421:
1422: _filename = systemId;
1423: /*
1424: XmlReader nextReader;
1425: if (_reader instanceof Utf8Reader)
1426: nextReader = new Utf8Reader(this, _is);
1427: else {
1428: _is.setEncoding(_reader.getReadStream().getEncoding());
1429: nextReader = new XmlReader(this, _is);
1430: }
1431: _reader = nextReader;
1432: */
1433:
1434: XmlReader oldReader = _reader;
1435: _reader = null;
1436:
1437: _line = 1;
1438: parseXMLDeclaration(oldReader);
1439: int ch = read();
1440:
1441: XmlReader reader = _reader;
1442:
1443: if (reader instanceof MacroReader)
1444: reader = reader.getNext();
1445:
1446: reader.setSystemId(systemId);
1447: reader.setFilename(systemId);
1448: reader.setPublicId(publicId);
1449: reader.setNext(oldReader);
1450:
1451: unread(ch);
1452: }
1453:
1454: private void popInclude() throws IOException, SAXException {
1455: XmlReader oldReader = _reader;
1456: _reader = _reader.getNext();
1457: oldReader.setNext(null);
1458: _filename = _reader.getFilename();
1459: _line = _reader.getLine();
1460: _is = _reader.getReadStream();
1461: if (_reader.getSearchPath() != null)
1462: _searchPath = _reader.getSearchPath();
1463: }
1464:
1465: void setMacro(String text) throws IOException, SAXException {
1466: if (_reader == _macro) {
1467: } else if (_macro.getNext() == null) {
1468: _macro.init(this , _reader);
1469: _reader = _macro;
1470: } else {
1471: _macro = new MacroReader();
1472: _macro.init(this , _reader);
1473: _reader = _macro;
1474: }
1475:
1476: _macro.add(text);
1477: }
1478:
1479: protected final int read() throws IOException, SAXException {
1480: int inputOffset = _inputOffset;
1481:
1482: if (inputOffset < _inputLength) {
1483: char ch = _inputBuffer[inputOffset];
1484:
1485: _inputOffset = inputOffset + 1;
1486:
1487: return ch;
1488: } else if (fillBuffer()) {
1489: return _inputBuffer[_inputOffset++];
1490: } else
1491: return -1;
1492: }
1493:
1494: public final void unread(int ch) {
1495: if (ch < 0 || _inputOffset <= 0)
1496: return;
1497:
1498: _inputOffset--;
1499: }
1500:
1501: protected boolean fillBuffer() throws IOException {
1502: int len = _is.read(_inputBuffer, 0, _inputBuffer.length);
1503:
1504: if (len >= 0) {
1505: _inputLength = len;
1506: _inputOffset = 0;
1507:
1508: return true;
1509: } else {
1510: _inputLength = 0;
1511: _inputOffset = 0;
1512:
1513: return false;
1514: }
1515: }
1516:
1517: private void parseXMLDeclaration(XmlReader oldReader)
1518: throws IOException, SAXException {
1519: int startOffset = _is.getOffset();
1520: boolean isEBCDIC = false;
1521: int ch = _is.read();
1522:
1523: XmlReader reader = null;
1524:
1525: // utf-16 starts with \xfe \xff
1526: if (ch == 0xfe) {
1527: ch = _is.read();
1528: if (ch == 0xff) {
1529: _owner.setAttribute("encoding", "UTF-16");
1530: _is.setEncoding("utf-16");
1531:
1532: reader = new Utf16Reader(this , _is);
1533:
1534: ch = reader.read();
1535: }
1536: }
1537: // utf-16 rev starts with \xff \xfe
1538: else if (ch == 0xff) {
1539: ch = _is.read();
1540: if (ch == 0xfe) {
1541: _owner.setAttribute("encoding", "UTF-16");
1542: _is.setEncoding("utf-16");
1543:
1544: reader = new Utf16Reader(this , _is);
1545: ((Utf16Reader) reader).setReverse(true);
1546:
1547: ch = reader.read();
1548: }
1549: }
1550: // utf-16 can also start with \x00 <
1551: else if (ch == 0x00) {
1552: ch = _is.read();
1553: _owner.setAttribute("encoding", "UTF-16");
1554: _is.setEncoding("utf-16");
1555:
1556: reader = new Utf16Reader(this , _is);
1557: }
1558: // utf-8 BOM is \xef \xbb \xbf
1559: else if (ch == 0xef) {
1560: ch = _is.read();
1561: if (ch == 0xbb) {
1562: ch = _is.read();
1563:
1564: if (ch == 0xbf) {
1565: ch = _is.read();
1566:
1567: _owner.setAttribute("encoding", "UTF-8");
1568: _is.setEncoding("utf-8");
1569:
1570: reader = new Utf8Reader(this , _is);
1571: }
1572: }
1573: } else if (ch == 0x4c) {
1574: // ebcdic
1575: // xml/00l1
1576: _is.unread();
1577: // _is.setEncoding("cp037");
1578: _is.setEncoding("cp500");
1579:
1580: isEBCDIC = true;
1581:
1582: reader = new XmlReader(this , _is);
1583:
1584: ch = reader.read();
1585: } else {
1586: int ch2 = _is.read();
1587:
1588: if (ch2 == 0x00) {
1589: _owner.setAttribute("encoding", "UTF-16LE");
1590: _is.setEncoding("utf-16le");
1591:
1592: reader = new Utf16Reader(this , _is);
1593: ((Utf16Reader) reader).setReverse(true);
1594: } else if (ch2 > 0)
1595: _is.unread();
1596: }
1597:
1598: if (reader != null && reader != oldReader) {
1599: } else if (_is.getSource() instanceof ReaderWriterStream) {
1600: reader = new XmlReader(this , _is);
1601: } else {
1602: reader = new Utf8Reader(this , _is);
1603: }
1604:
1605: if (ch == '\n')
1606: reader.setLine(2);
1607:
1608: reader.setSystemId(_systemId);
1609: if (_systemId == null)
1610: reader.setSystemId(_filename);
1611: reader.setFilename(_filename);
1612: reader.setPublicId(_publicId);
1613:
1614: reader.setNext(oldReader);
1615:
1616: _reader = reader;
1617:
1618: /* XXX: this might be too strict. */
1619: /*
1620: if (! strictXml) {
1621: for (; XmlChar.isWhitespace(ch); ch = reader.read()) {
1622: }
1623: }
1624: */
1625:
1626: if (ch != '<') {
1627: unreadByte(ch);
1628: return;
1629: }
1630:
1631: if (parseXMLDecl(_reader) && isEBCDIC) {
1632: // EBCDIC requires a re-read
1633: _is.setOffset(startOffset);
1634:
1635: ch = read();
1636: if (ch != '<')
1637: throw new IllegalStateException();
1638:
1639: parseXMLDecl(_reader);
1640: }
1641: }
1642:
1643: private boolean parseXMLDecl(XmlReader reader) throws IOException,
1644: SAXException {
1645: int ch = readByte();
1646: if (ch != '?') {
1647: unreadByte((char) ch);
1648: unreadByte('<');
1649: return false;
1650: }
1651:
1652: ch = read();
1653: if (!XmlChar.isNameStart(ch))
1654: throw error(L
1655: .l(
1656: "expected name after '<?' at {0}. Processing instructions expect a name like <?foo ... ?>",
1657: badChar(ch)));
1658: ch = _reader.parseName(_text, ch);
1659:
1660: String piName = _text.toString();
1661: if (!piName.equals("xml")) {
1662: ch = parsePITail(piName, ch);
1663: unreadByte(ch);
1664: return false;
1665: }
1666:
1667: ch = parseAttributes(ch, false);
1668:
1669: if (ch != '?')
1670: throw error(L
1671: .l(
1672: "expected '?' at {0}. Processing instructions end with '?>' like <?foo ... ?>",
1673: badChar(ch)));
1674: if ((ch = read()) != '>')
1675: throw error(L
1676: .l(
1677: "expected '>' at {0}. Processing instructions end with '?>' like <?foo ... ?>",
1678: ">", badChar(ch)));
1679:
1680: for (int i = 0; i < _attributes.getLength(); i++) {
1681: QName name = _attributes.getName(i);
1682: String value = _attributes.getValue(i);
1683:
1684: if (_owner != null)
1685: _owner.setAttribute(name.getLocalPart(), value);
1686:
1687: if (name.getLocalPart().equals("encoding")) { // xml/00hb // && ! _inDtd) {
1688: String encoding = value;
1689:
1690: if (!_isStaticEncoding
1691: && !encoding.equalsIgnoreCase("UTF-8")
1692: && !encoding.equalsIgnoreCase("UTF-16")
1693: && !(_is.getSource() instanceof ReaderWriterStream)) {
1694: _is.setEncoding(encoding);
1695:
1696: XmlReader oldReader = _reader;
1697:
1698: _reader = new XmlReader(this , _is);
1699: // _reader.setNext(oldReader);
1700:
1701: _reader.setLine(oldReader.getLine());
1702:
1703: _reader.setSystemId(_filename);
1704: _reader.setPublicId(null);
1705: }
1706: }
1707: }
1708:
1709: return true;
1710: }
1711:
1712: protected int readByte() throws IOException {
1713: return _is.read();
1714: }
1715:
1716: protected void unreadByte(int ch) {
1717: _is.unread();
1718: }
1719:
1720: /**
1721: * Returns an error including the current line.
1722: *
1723: * @param text the error message text.
1724: */
1725: XmlParseException error(String text) {
1726: if (_errorHandler != null) {
1727: SAXParseException e = new SAXParseException(text, _locator);
1728:
1729: try {
1730: _errorHandler.fatalError(e);
1731: } catch (SAXException e1) {
1732: }
1733: }
1734:
1735: return new XmlParseException(_filename + ":" + _line + ": "
1736: + text);
1737: }
1738:
1739: public void free() {
1740: }
1741:
1742: int parseName(CharBuffer cb, int ch) throws IOException,
1743: SAXException {
1744: return _reader.parseName(cb, ch);
1745: }
1746:
1747: /**
1748: * Returns a user-readable string for an error character.
1749: */
1750: static String badChar(int ch) {
1751: if (ch < 0 || ch == 0xffff)
1752: return L.l("end of file");
1753: else if (ch == '\n' || ch == '\r')
1754: return L.l("end of line");
1755: else if (ch >= 0x20 && ch <= 0x7f)
1756: return "'" + (char) ch + "'";
1757: else
1758: return "'" + (char) ch + "' (\\u" + hex(ch) + ")";
1759: }
1760:
1761: private void printDebugNode(WriteStream s, Node node, int depth)
1762: throws IOException {
1763: if (node == null)
1764: return;
1765:
1766: for (int i = 0; i < depth; i++)
1767: s.print(' ');
1768:
1769: if (node.getFirstChild() != null) {
1770: s.println("<" + node.getNodeName() + ">");
1771: for (Node child = node.getFirstChild(); child != null; child = child
1772: .getNextSibling()) {
1773: printDebugNode(s, child, depth + 2);
1774: }
1775: for (int i = 0; i < depth; i++)
1776: s.print(' ');
1777: s.println("</" + node.getNodeName() + ">");
1778: } else
1779: s.println("<" + node.getNodeName() + "/>");
1780: }
1781:
1782: public void close() {
1783: TempCharBuffer tempInputBuffer = _tempInputBuffer;
1784: _tempInputBuffer = null;
1785:
1786: _inputBuffer = null;
1787:
1788: if (tempInputBuffer != null)
1789: TempCharBuffer.free(tempInputBuffer);
1790: }
1791:
1792: public static class LocatorImpl implements ExtendedLocator {
1793: XmlParser _parser;
1794:
1795: LocatorImpl(XmlParser parser) {
1796: _parser = parser;
1797: }
1798:
1799: public String getSystemId() {
1800: if (_parser._reader != null
1801: && _parser._reader.getSystemId() != null)
1802: return _parser._reader.getSystemId();
1803: else if (_parser.getSystemId() != null)
1804: return _parser.getSystemId();
1805: else if (_parser._reader != null
1806: && _parser._reader.getFilename() != null)
1807: return _parser._reader.getFilename();
1808: else if (_parser.getFilename() != null)
1809: return _parser.getFilename();
1810: else
1811: return null;
1812: }
1813:
1814: public String getFilename() {
1815: if (_parser._reader != null
1816: && _parser._reader.getFilename() != null)
1817: return _parser._reader.getFilename();
1818: else if (_parser.getFilename() != null)
1819: return _parser.getFilename();
1820: else if (_parser._reader != null
1821: && _parser._reader.getSystemId() != null)
1822: return _parser._reader.getSystemId();
1823: else if (_parser.getSystemId() != null)
1824: return _parser.getSystemId();
1825: else
1826: return null;
1827: }
1828:
1829: public String getPublicId() {
1830: if (_parser._reader != null)
1831: return _parser._reader.getPublicId();
1832: else
1833: return _parser.getPublicId();
1834: }
1835:
1836: public int getLineNumber() {
1837: if (_parser._reader != null)
1838: return _parser._reader.getLine();
1839: else
1840: return _parser.getLineNumber();
1841: }
1842:
1843: public int getColumnNumber() {
1844: return _parser.getColumnNumber();
1845: }
1846: }
1847:
1848: static {
1849: XML_NAME_CHAR = new boolean[65536];
1850:
1851: for (int i = 0; i < 65536; i++) {
1852: XML_NAME_CHAR[i] = XmlChar.isNameChar(i) && i != ':';
1853: }
1854: }
1855: }
|