001: /*
002: * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
003: *
004: * This file is part of Resin(R) Open Source
005: *
006: * Each copy or derived work must preserve the copyright notice and this
007: * notice unmodified.
008: *
009: * Resin Open Source is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU General Public License as published by
011: * the Free Software Foundation; either version 2 of the License, or
012: * (at your option) any later version.
013: *
014: * Resin Open Source is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
017: * of NON-INFRINGEMENT. See the GNU General Public License for more
018: * details.
019: *
020: * You should have received a copy of the GNU General Public License
021: * along with Resin Open Source; if not, write to the
022: * Free SoftwareFoundation, Inc.
023: * 59 Temple Place, Suite 330
024: * Boston, MA 02111-1307 USA
025: *
026: * @author Scott Ferguson
027: */
028:
029: package com.caucho.xml2;
030:
031: import com.caucho.util.L10N;
032: import com.caucho.vfs.ReadStream;
033: import com.caucho.vfs.TempCharBuffer;
034: import com.caucho.vfs.Vfs;
035: import com.caucho.xml.ExtendedLocator;
036: import com.caucho.xml.QName;
037: import com.caucho.xml.XmlChar;
038:
039: import org.xml.sax.*;
040:
041: import java.io.IOException;
042: import java.io.InputStream;
043: import java.io.Reader;
044: import java.util.HashMap;
045:
046: /**
047: * A fast XML parser.
048: */
049: public class XMLReaderImpl implements XMLReader {
050: private static final L10N L = new L10N(XMLReaderImpl.class);
051:
052: // Xerces uses the following
053: public static final String XMLNS = "http://www.w3.org/2000/xmlns/";
054:
055: static final QName DOC_NAME = new QName(null, "#document", null);
056: static final QName TEXT_NAME = new QName(null, "#text", null);
057: static final QName JSP_NAME = new QName(null, "#jsp", null);
058: static final QName WHITESPACE_NAME = new QName(null, "#whitespace",
059: null);
060: static final QName JSP_ATTRIBUTE_NAME = new QName("xtp",
061: "jsp-attribute", null);
062: static final String LEXICAL_HANDLER = "http://xml.org/sax/properties/lexical-handler";
063:
064: private static final boolean[] XML_NAME_CHAR;
065:
066: private ContentHandler _contentHandler;
067: private EntityResolver _entityResolver;
068: private DTDHandler _dtdHandler;
069: private ErrorHandler _errorHandler;
070:
071: private Reader _reader;
072:
073: private final AttributesImpl _attributes = new AttributesImpl();
074: private final ExtendedLocator _locator = new LocatorImpl();
075:
076: private final Intern _intern = new Intern();
077:
078: private final HashMap<NameKey, QName> _nameMap = new HashMap<NameKey, QName>();
079:
080: private final NameKey _nameKey = new NameKey();
081:
082: private char[] _valueBuf;
083: private char[] _inputBuf;
084: private int _inputOffset;
085: private int _inputLength;
086:
087: private String _filename;
088: private String _systemId;
089: private String _publicId;
090: private int _line;
091:
092: /**
093: * Returns a SAX feature.
094: *
095: * <p>All XMLReaders are required to recognize the
096: * http://xml.org/sax/features/namespaces and the
097: * http://xml.org/sax/features/namespace-prefixes feature names.</p>
098: */
099: public boolean getFeature(String name)
100: throws SAXNotRecognizedException, SAXNotSupportedException {
101: throw new SAXNotRecognizedException(name);
102: }
103:
104: /**
105: * Sets a SAX property.
106: */
107: public void setProperty(String name, Object value)
108: throws SAXNotRecognizedException, SAXNotSupportedException {
109: if (LEXICAL_HANDLER.equals(name)) {
110: } else
111: throw new SAXNotRecognizedException(name);
112: }
113:
114: /**
115: * Returns a SAX property.
116: */
117: public Object getProperty(String name)
118: throws SAXNotRecognizedException, SAXNotSupportedException {
119: throw new SAXNotRecognizedException(name);
120: }
121:
122: /**
123: * Sets a SAX feature.
124: */
125: public void setFeature(String name, boolean value)
126: throws SAXNotRecognizedException, SAXNotSupportedException {
127: throw new SAXNotRecognizedException(name);
128: }
129:
130: /**
131: * Sets the SAX entityResolver.
132: *
133: * @param resolver the entity resolver
134: */
135: public void setEntityResolver(EntityResolver resolver) {
136: _entityResolver = resolver;
137: }
138:
139: /**
140: * Gets the SAX entityResolver.
141: *
142: * @return the entity resolver
143: */
144: public EntityResolver getEntityResolver() {
145: return _entityResolver;
146: }
147:
148: /**
149: * Sets the SAX DTD handler
150: *
151: * @param handler the dtd handler
152: */
153: public void setDTDHandler(DTDHandler handler) {
154: _dtdHandler = handler;
155: }
156:
157: /**
158: * Gets the SAX DTD handler
159: *
160: * @return the dtd handler
161: */
162: public DTDHandler getDTDHandler() {
163: return _dtdHandler;
164: }
165:
166: /**
167: * Sets the SAX content handler
168: *
169: * @param handler the content handler
170: */
171: public void setContentHandler(ContentHandler handler) {
172: _contentHandler = handler;
173: }
174:
175: /**
176: * Gets the SAX content handler
177: *
178: * @param handler the content handler
179: */
180: public ContentHandler getContentHandler() {
181: return _contentHandler;
182: }
183:
184: /**
185: * Sets the SAX errorHandler.
186: *
187: * @param handler the error handler
188: */
189: public void setErrorHandler(ErrorHandler handler) {
190: _errorHandler = handler;
191: }
192:
193: /**
194: * Gets the SAX errorHandler.
195: *
196: * @param handler the error handler
197: */
198: public ErrorHandler getErrorHandler() {
199: return _errorHandler;
200: }
201:
202: /**
203: * parses the input source.
204: *
205: * @param source the source to parse from
206: */
207: public void parse(InputSource source) throws IOException,
208: SAXException {
209: InputStream is = source.getByteStream();
210: if (is != null) {
211: _systemId = source.getSystemId();
212:
213: if (is instanceof ReadStream) {
214: _filename = ((ReadStream) is).getPath().getUserPath();
215: if (_systemId == null)
216: _systemId = ((ReadStream) is).getPath().getURL();
217: } else {
218: _filename = _systemId;
219: }
220:
221: _reader = new java.io.InputStreamReader(is);
222:
223: parseImpl();
224: } else
225: throw new IllegalArgumentException();
226: }
227:
228: /**
229: * Parses the file at the given string
230: *
231: * @param url the source url to parse from
232: */
233: public void parse(String systemId) throws IOException, SAXException {
234: ReadStream is = Vfs.lookup(systemId).openRead();
235:
236: _reader = is.getReader();
237: _systemId = systemId;
238: _filename = systemId;
239: try {
240: parseImpl();
241: } finally {
242: _reader = null;
243: }
244: }
245:
246: /**
247: * Parses the file at the given string
248: *
249: * @param url the source url to parse from
250: */
251: private void parseImpl() throws IOException, SAXException {
252: TempCharBuffer inputBuffer = TempCharBuffer.allocate();
253: TempCharBuffer valueBuffer = TempCharBuffer.allocate();
254: try {
255: _valueBuf = valueBuffer.getBuffer();
256: _inputBuf = inputBuffer.getBuffer();
257: _inputLength = 0;
258: _inputOffset = 0;
259: _line = 1;
260:
261: _contentHandler.setDocumentLocator(_locator);
262: _contentHandler.startDocument();
263:
264: parseContent();
265:
266: _contentHandler.endDocument();
267: } finally {
268: _inputBuf = null;
269: _valueBuf = null;
270:
271: TempCharBuffer.free(inputBuffer);
272: TempCharBuffer.free(valueBuffer);
273: }
274: }
275:
276: /**
277: * Parses XML content.
278: */
279: private void parseContent() throws IOException, SAXException {
280: char[] inputBuf = _inputBuf;
281: char[] valueBuffer = _valueBuf;
282: int valueLength = valueBuffer.length;
283: int valueOffset = 0;
284:
285: boolean isWhitespace = true;
286: boolean seenCr = false;
287:
288: while (true) {
289: if (_inputLength == _inputOffset && !fillBuffer()) {
290: writeText(valueBuffer, valueOffset, isWhitespace);
291: return;
292: }
293:
294: char ch = inputBuf[_inputOffset++];
295:
296: switch (ch) {
297: case ' ':
298: case '\t':
299: if (valueOffset < valueLength)
300: valueBuffer[valueOffset++] = ch;
301: else {
302: writeText(valueBuffer, valueOffset, isWhitespace);
303: valueOffset = 0;
304: }
305: break;
306:
307: case '\n':
308: if (valueOffset < valueLength)
309: valueBuffer[valueOffset++] = ch;
310: else {
311: writeText(valueBuffer, valueOffset, isWhitespace);
312: valueOffset = 0;
313: }
314: _line++;
315: break;
316:
317: case '\r':
318: if (valueOffset < valueLength)
319: valueBuffer[valueOffset++] = ch;
320: else {
321: writeText(valueBuffer, valueOffset, isWhitespace);
322: valueOffset = 0;
323: }
324:
325: addCarriageReturnLine();
326: break;
327:
328: case '<':
329: if (valueOffset > 0) {
330: writeText(valueBuffer, valueOffset, isWhitespace);
331: valueOffset = 0;
332: }
333:
334: if (_inputLength == _inputOffset && !fillBuffer())
335: error("XXX: unexpected eof");
336:
337: ch = inputBuf[_inputOffset];
338: switch (ch) {
339: case '!':
340: break;
341: case '?':
342: break;
343: case '/':
344: _inputOffset++;
345: return;
346: default:
347: parseElement();
348: break;
349: }
350:
351: isWhitespace = true;
352: break;
353:
354: case '&':
355: if (valueOffset > 0) {
356: writeText(valueBuffer, valueOffset, isWhitespace);
357: valueOffset = 0;
358: }
359: isWhitespace = true;
360: break;
361:
362: default:
363: isWhitespace = false;
364: if (valueOffset < valueLength)
365: valueBuffer[valueOffset++] = ch;
366: else {
367: writeText(valueBuffer, valueOffset, false);
368: valueOffset = 0;
369: }
370: break;
371: }
372: }
373: }
374:
375: /**
376: * Parses the element.
377: */
378: private void parseElement() throws IOException, SAXException {
379: InternQName qName = parseName();
380: String name = qName.getName();
381:
382: _attributes.clear();
383:
384: while (true) {
385: int ch = read();
386:
387: switch (ch) {
388: case -1:
389: throw error("XXX: unexpected eof");
390:
391: case ' ':
392: case '\t':
393: break;
394:
395: case '\r':
396: addCarriageReturnLine();
397: break;
398:
399: case '\n':
400: _line++;
401: break;
402:
403: case '/':
404: if ((ch = read()) != '>')
405: throw error("XXX: expected '>'");
406:
407: _contentHandler.startElement("", "", name, _attributes);
408: _contentHandler.endElement("", "", name);
409:
410: return;
411:
412: case '>':
413: _contentHandler.startElement("", "", name, _attributes);
414:
415: parseContent();
416:
417: InternQName tailQName = parseName();
418: String tailName = tailQName.getName();
419:
420: if ((ch = read()) != '>')
421: throw error("XXX: expected '>'");
422:
423: if (!name.equals(tailName))
424: throw error("XXX: mismatch name");
425:
426: _contentHandler.endElement("", "", name);
427:
428: return;
429:
430: default:
431: if (XmlChar.isNameStart(ch)) {
432: unread();
433:
434: InternQName attrName = parseName();
435: ch = skipWhitespace(read());
436:
437: if (ch != '=')
438: throw error(L
439: .l(
440: "Expected '=' for attribute value at {0}.",
441: badChar(ch)));
442:
443: String attrValue = parseValue();
444:
445: _attributes.add(attrName, attrValue);
446: } else
447: throw error(L
448: .l(
449: "{0} is an unexpected character in element.",
450: badChar(ch)));
451: }
452: }
453: }
454:
455: /**
456: * Parses a name.
457: */
458: private QName parseAttrName() throws IOException {
459: int valueOffset = 0;
460:
461: char[] inputBuf = _inputBuf;
462: char[] valueBuf = _valueBuf;
463:
464: int inputLength = _inputLength;
465: int inputOffset = _inputOffset;
466:
467: while (true) {
468: if (inputOffset < inputLength) {
469: } else if (fillBuffer()) {
470: inputLength = _inputLength;
471: inputOffset = 0;
472: } else {
473: _nameKey.init(valueBuf, 0, valueOffset);
474:
475: QName name = _nameMap.get(_nameKey);
476:
477: if (name == null) {
478: name = new QName(new String(valueBuf, 0,
479: valueOffset), null);
480: _nameMap.put(new NameKey(valueBuf, 0, valueOffset),
481: name);
482: }
483:
484: return name;
485: }
486:
487: char ch = inputBuf[inputOffset++];
488:
489: if (XML_NAME_CHAR[ch])
490: valueBuf[valueOffset++] = ch;
491: else if (ch == ':') {
492: valueBuf[valueOffset++] = ch;
493: } else {
494: _inputOffset = inputOffset - 1;
495:
496: QName name = _nameMap.get(_nameKey);
497:
498: if (name == null) {
499: name = new QName(new String(valueBuf, 0,
500: valueOffset), null);
501: _nameMap.put(new NameKey(valueBuf, 0, valueOffset),
502: name);
503: }
504:
505: return name;
506: }
507: }
508: }
509:
510: /**
511: * Parses a name.
512: */
513: private InternQName parseName() throws IOException {
514: int valueOffset = 0;
515:
516: char[] inputBuf = _inputBuf;
517: char[] valueBuf = _valueBuf;
518:
519: int inputLength = _inputLength;
520: int inputOffset = _inputOffset;
521: int colon = 0;
522:
523: while (true) {
524: if (inputOffset < inputLength) {
525: char ch = inputBuf[inputOffset++];
526:
527: if (XML_NAME_CHAR[ch]) {
528: valueBuf[valueOffset++] = ch;
529: } else if (ch == ':') {
530: if (colon <= 0)
531: colon = valueOffset;
532:
533: valueBuf[valueOffset++] = ch;
534: } else {
535: _inputOffset = inputOffset - 1;
536:
537: return _intern.add(valueBuf, 0, valueOffset, colon);
538: }
539: } else if (fillBuffer()) {
540: inputLength = _inputLength;
541: inputOffset = 0;
542: } else {
543: return _intern.add(valueBuf, 0, valueOffset, colon);
544: }
545: }
546: }
547:
548: /**
549: * Writes text data.
550: */
551: private void writeText(char[] buffer, int length,
552: boolean isWhitespace) throws SAXException {
553: }
554:
555: /**
556: * Adds the line for cr
557: */
558: private void addCarriageReturnLine() throws IOException {
559: if (_inputLength == _inputOffset && !fillBuffer())
560: _line++;
561: else if (_inputBuf[_inputOffset] != '\n')
562: _line++;
563: }
564:
565: /**
566: * Parses an attribute value.
567: */
568: private String parseValue() throws IOException, SAXException {
569: int end = skipWhitespace(read());
570:
571: if (end != '\'' && end != '"')
572: throw error(L.l("expected quote at '{0}'", badChar(end)));
573:
574: int index = 0;
575: char[] inputBuf = _inputBuf;
576: char[] valueBuf = _valueBuf;
577:
578: while (true) {
579: if (_inputLength == _inputOffset && !fillBuffer())
580: throw error(L
581: .l("Unexpected end of file in attribute value."));
582:
583: char ch = inputBuf[_inputOffset++];
584:
585: switch (ch) {
586: case '&':
587: throw error(L.l("Can't handle entities yet."));
588:
589: case '\r':
590: addCarriageReturnLine();
591: ch = ' ';
592: break;
593:
594: case '\n':
595: _line++;
596: ch = ' ';
597: break;
598:
599: case '\'':
600: case '"':
601: if (ch == end)
602: return new String(valueBuf, 0, index);
603: break;
604: }
605:
606: valueBuf[index++] = ch;
607: }
608: }
609:
610: /**
611: * Skips whitespace, returning the next character.
612: */
613: private int skipWhitespace(int ch) throws IOException {
614: while (true) {
615: switch (ch) {
616: case -1:
617: return -1;
618:
619: case ' ':
620: case '\t':
621: break;
622:
623: case '\r':
624: addCarriageReturnLine();
625: break;
626:
627: case '\n':
628: _line++;
629: break;
630:
631: default:
632: return ch;
633: }
634:
635: if (_inputLength == _inputOffset && !fillBuffer())
636: return -1;
637:
638: ch = _inputBuf[_inputOffset++];
639: }
640: }
641:
642: /**
643: * Reads a character.
644: */
645: private int read() throws IOException {
646: if (_inputLength == _inputOffset && !fillBuffer())
647: return -1;
648: else
649: return _inputBuf[_inputOffset++];
650: }
651:
652: /**
653: * Reads a character.
654: */
655: private void unread() throws IOException {
656: _inputOffset--;
657: }
658:
659: /**
660: * Fills the input buffer.
661: */
662: private boolean fillBuffer() throws IOException {
663: _inputOffset = 0;
664: _inputLength = _reader.read(_inputBuf, 0, _inputBuf.length);
665:
666: return _inputLength > 0;
667: }
668:
669: /**
670: * Returns a string for a bad char.
671: */
672: private String badChar(int ch) {
673: return "" + (char) ch;
674: }
675:
676: /**
677: * Returns an error.
678: */
679: private SAXException error(String msg) {
680: return new SAXException(msg);
681: }
682:
683: class LocatorImpl implements ExtendedLocator {
684: /**
685: * Returns the parser's system id.
686: */
687: public String getSystemId() {
688: return _systemId;
689: /*
690: if (_parser._reader != null && _parser._reader.getSystemId() != null)
691: return _parser._reader.getSystemId();
692: else if (_parser.getSystemId() != null)
693: return _parser.getSystemId();
694: else if (_parser._reader != null && _parser._reader.getFilename() != null)
695: return _parser._reader.getFilename();
696: else if (_parser.getFilename() != null)
697: return _parser.getFilename();
698: else
699: return null;
700: */
701: }
702:
703: /**
704: * Returns the parser's filename.
705: */
706: public String getFilename() {
707: return _filename;
708: /*
709: if (_parser._reader != null && _parser._reader.getFilename() != null)
710: return _parser._reader.getFilename();
711: else if (_parser.getFilename() != null)
712: return _parser.getFilename();
713: else if (_parser._reader != null && _parser._reader.getSystemId() != null)
714: return _parser._reader.getSystemId();
715: else if (_parser.getSystemId() != null)
716: return _parser.getSystemId();
717: else
718: return null;
719: */
720: }
721:
722: /**
723: * Returns the public id.
724: */
725: public String getPublicId() {
726: return _publicId;
727: /*
728: if (_parser._reader != null)
729: return _parser._reader.getPublicId();
730: else
731: return _parser.getPublicId();
732: */
733: }
734:
735: /**
736: * Returns the line number.
737: */
738: public int getLineNumber() {
739: return _line;
740: /*
741: if (_parser._reader != null)
742: return _parser._reader.getLine();
743: else
744: return _parser.getLineNumber();
745: */
746: }
747:
748: /**
749: * Returns the column.
750: */
751: public int getColumnNumber() {
752: return -1;
753: }
754: }
755:
756: static class NameKey {
757: char[] _buf;
758: int _offset;
759: int _length;
760:
761: NameKey() {
762: }
763:
764: NameKey(char[] buf, int offset, int length) {
765: _buf = new char[length];
766: System.arraycopy(buf, offset, _buf, 0, length);
767: _offset = 0;
768: _length = 0;
769: }
770:
771: void init(char[] buf, int offset, int length) {
772: _buf = buf;
773: _offset = offset;
774: _length = length;
775: }
776:
777: @Override
778: public int hashCode() {
779: int hash = 37;
780:
781: char buf[] = _buf;
782: for (int i = _length - 1; i >= 0; i--)
783: hash = 65537 * hash + buf[i];
784:
785: return hash;
786: }
787:
788: @Override
789: public boolean equals(Object o) {
790: NameKey key = (NameKey) o;
791:
792: int length = _length;
793: if (length != key._length)
794: return false;
795:
796: char[] aBuf = _buf;
797: char[] bBuf = key._buf;
798:
799: int aOffset = _offset;
800: int bOffset = key._offset;
801:
802: for (int i = 0; i < length; i++) {
803: if (aBuf[aOffset + i] != bBuf[bOffset + i])
804: return false;
805: }
806:
807: return true;
808: }
809: }
810:
811: static {
812: XML_NAME_CHAR = new boolean[65536];
813:
814: for (int i = 0; i < 65536; i++) {
815: XML_NAME_CHAR[i] = XmlChar.isNameChar(i) && i != ':';
816: }
817: }
818: }
|