001: /*
002: * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
003: *
004: * This file is part of Resin(R) Open Source
005: *
006: * Each copy or derived work must preserve the copyright notice and this
007: * notice unmodified.
008: *
009: * Resin Open Source is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU General Public License as published by
011: * the Free Software Foundation; either version 2 of the License, or
012: * (at your option) any later version.
013: *
014: * Resin Open Source is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
017: * of NON-INFRINGEMENT. See the GNU General Public License for more
018: * details.
019: *
020: * You should have received a copy of the GNU General Public License
021: * along with Resin Open Source; if not, write to the
022: * Free SoftwareFoundation, Inc.
023: * 59 Temple Place, Suite 330
024: * Boston, MA 02111-1307 USA
025: *
026: * @author Scott Ferguson
027: */
028:
029: package com.caucho.xml2;
030:
031: import com.caucho.server.util.CauchoSystem;
032: import com.caucho.util.L10N;
033: import com.caucho.vfs.Path;
034: import com.caucho.vfs.ReadStream;
035: import com.caucho.vfs.Vfs;
036: import com.caucho.vfs.VfsStream;
037:
038: import org.w3c.dom.Document;
039: import org.xml.sax.*;
040: import org.xml.sax.ext.LexicalHandler;
041:
042: import javax.xml.parsers.DocumentBuilderFactory;
043: import java.io.FileNotFoundException;
044: import java.io.IOException;
045: import java.io.InputStream;
046: import java.util.Hashtable;
047: import java.util.Locale;
048: import java.util.logging.Logger;
049:
050: abstract public class AbstractParser implements XMLReader, Parser {
051: static final Logger log = Logger.getLogger(AbstractParser.class
052: .getName());
053: static final L10N L = new L10N(AbstractParser.class);
054:
055: static Hashtable<String, String> _attrTypes = new Hashtable<String, String>();
056: static Entities _xmlEntities = new XmlEntities();
057:
058: boolean _isCoalescing = true;
059:
060: boolean _optionalTags = true;
061: boolean _skipWhitespace;
062: boolean _skipComments;
063: boolean _strictComments;
064: boolean _strictAttributes;
065: boolean _entitiesAsText = false;
066: boolean _expandEntities = true;
067: boolean _strictCharacters;
068: boolean _strictXml;
069: boolean _singleTopElement;
070:
071: boolean _isNamespaceAware = true;
072: boolean _isNamespacePrefixes = true;
073: boolean _isSAXNamespaces = false;
074:
075: boolean _isXmlnsPrefix;
076: boolean _isXmlnsAttribute;
077:
078: boolean _isValidating = false;
079:
080: boolean _isStaticEncoding = false;
081: String _defaultEncoding = "UTF-8";
082:
083: // sax stuff
084: ContentHandler _contentHandler;
085: EntityResolver _entityResolver;
086: DTDHandler _dtdHandler;
087: LexicalHandler _lexicalHandler;
088: ErrorHandler _errorHandler;
089: Locale _locale;
090:
091: Entities _entities;
092: QDocument _owner;
093: QDocumentType _dtd;
094:
095: DOMBuilder _builder;
096:
097: Path _searchPath;
098:
099: String _publicId;
100: String _systemId;
101: String _filename;
102: int _line = 1;
103:
104: /**
105: * Creates a new parser with the XmlPolicy and a new dtd.
106: */
107: AbstractParser() {
108: this (null);
109: }
110:
111: /**
112: * Creates a new parser with a given policy and dtd.
113: *
114: * @param policy the parsing policy, handling optional tags.
115: * @param dtd the parser's dtd.
116: */
117: AbstractParser(QDocumentType dtd) {
118: if (dtd == null)
119: dtd = new QDocumentType(null);
120: _dtd = dtd;
121:
122: _entities = _xmlEntities;
123: }
124:
125: void clear() {
126: _isCoalescing = true;
127:
128: _isNamespaceAware = true;
129: _isSAXNamespaces = false;
130: _isNamespacePrefixes = false;
131: _optionalTags = true;
132: _skipWhitespace = false;
133: _skipComments = false;
134: _strictComments = false;
135: _strictAttributes = false;
136: _entitiesAsText = false;
137: _expandEntities = true;
138: _strictCharacters = false;
139: _strictXml = false;
140: _singleTopElement = false;
141:
142: _defaultEncoding = "UTF-8";
143: _isStaticEncoding = false;
144: }
145:
146: void init() {
147: /*
148: _isXmlnsPrefix = (_isNamespaceAware ||
149: _isSAXNamespaces ||
150: _isNamespacePrefixes);
151: */
152: _isXmlnsPrefix = _isNamespaceAware || _isNamespacePrefixes;
153: _isXmlnsAttribute = _isNamespacePrefixes || !_isNamespaceAware;
154: }
155:
156: /**
157: * Sets the owner.
158: */
159: public void setOwner(QDocument doc) {
160: _owner = doc;
161: }
162:
163: /**
164: * Sets the configuration for a document builder.
165: */
166: public void setConfig(DocumentBuilderFactory factory) {
167: if (_builder == null)
168: _builder = new DOMBuilder();
169:
170: _isCoalescing = factory.isCoalescing();
171: setExpandEntities(factory.isExpandEntityReferences());
172: setSkipComments(factory.isIgnoringComments());
173: setSkipWhitespace(factory.isIgnoringElementContentWhitespace());
174: setNamespaceAware(factory.isNamespaceAware());
175: setNamespacePrefixes(false);
176: setValidating(factory.isValidating());
177: }
178:
179: public void setEntitiesAsText(boolean entitiesAsText) {
180: _entitiesAsText = entitiesAsText;
181: }
182:
183: public boolean getEntitiesAsText() {
184: return _entitiesAsText;
185: }
186:
187: public void setExpandEntities(boolean expandEntities) {
188: _expandEntities = expandEntities;
189: }
190:
191: /**
192: * Set to true if comments should be skipped. If false events will be
193: * generated for the comments.
194: */
195: public void setSkipComments(boolean skipComments) {
196: _skipComments = skipComments;
197: }
198:
199: /**
200: * Set to true if ignorable-whitespace should be skipped.
201: */
202: public void setSkipWhitespace(boolean skipWhitespace) {
203: _skipWhitespace = skipWhitespace;
204: }
205:
206: /**
207: * Returns true if text and cdata nodes will be combined.
208: */
209: public boolean isCoalescing() {
210: return _isCoalescing;
211: }
212:
213: /**
214: * Set true if text and cdata nodes should be combined.
215: */
216: public void setCoalescing(boolean isCoalescing) {
217: _isCoalescing = isCoalescing;
218: }
219:
220: /**
221: * Returns true if the XML should be validated
222: */
223: public boolean isValidating() {
224: return _isValidating;
225: }
226:
227: /**
228: * Set true if the XML should be validated
229: */
230: public void setValidating(boolean isValidating) {
231: _isValidating = isValidating;
232: }
233:
234: /**
235: * Returns true if the parsing is namespace aware.
236: */
237: public boolean isNamespaceAware() {
238: return _isNamespaceAware;
239: }
240:
241: /**
242: * Set true if the parsing is namespace aware.
243: */
244: public void setNamespaceAware(boolean isNamespaceAware) {
245: _isNamespaceAware = isNamespaceAware;
246: }
247:
248: /**
249: * Returns true if the parsing uses sax namespaces
250: */
251: public boolean isSAXNamespaces() {
252: return _isSAXNamespaces;
253: }
254:
255: /**
256: * Set true if the parsing uses sax namespaces
257: */
258: public void setSAXNamespaces(boolean isNamespaces) {
259: _isSAXNamespaces = isNamespaces;
260: }
261:
262: /**
263: * Returns true if the parsing uses namespace prefixes
264: */
265: public boolean isNamespacePrefixes() {
266: return _isNamespacePrefixes;
267: }
268:
269: /**
270: * Set true if the parsing uses sax namespaces
271: */
272: public void setNamespacePrefixes(boolean isNamespaces) {
273: _isNamespacePrefixes = isNamespaces;
274: }
275:
276: public boolean getSkipComments() {
277: return _skipComments;
278: }
279:
280: /**
281: * Sets the default encoding if none is specified.
282: *
283: * @param encoding the default encoding
284: */
285: public void setDefaultEncoding(String encoding) {
286: _defaultEncoding = encoding;
287: }
288:
289: /**
290: * Gets the default encoding if none is specified.
291: */
292: public String getDefaultEncoding() {
293: return _defaultEncoding;
294: }
295:
296: public Object getProperty(String name)
297: throws SAXNotRecognizedException {
298: if (name
299: .equals("http://xml.org/sax/properties/lexical-handler"))
300: return _lexicalHandler;
301: else if (name.equals("http://xml.org/sax/properties/dom-node"))
302: return null;
303: else if (name
304: .equals("http://xml.org/sax/properties/xml-string"))
305: return null;
306: else
307: throw new SAXNotRecognizedException(name);
308: }
309:
310: public void setProperty(String name, Object obj)
311: throws SAXNotSupportedException {
312: if (name
313: .equals("http://xml.org/sax/properties/lexical-handler"))
314: _lexicalHandler = (LexicalHandler) obj;
315: else if (name
316: .equals("http://xml.org/sax/handlers/LexicalHandler"))
317: _lexicalHandler = (LexicalHandler) obj;
318: else
319: throw new SAXNotSupportedException(name);
320: }
321:
322: public boolean getFeature(String name)
323: throws SAXNotRecognizedException {
324: if (name.equals("http://xml.org/sax/features/namespaces"))
325: return _isSAXNamespaces;
326: else if (name
327: .equals("http://xml.org/sax/features/namespace-prefixes"))
328: return _isNamespacePrefixes;
329: else if (name
330: .equals("http://xml.org/sax/features/string-interning"))
331: return true;
332: else if (name.equals("http://xml.org/sax/features/validation"))
333: return _isValidating;
334: else if (name
335: .equals("http://xml.org/sax/features/external-general-entities"))
336: return true;
337: else if (name
338: .equals("http://xml.org/sax/features/external-parameter-entities"))
339: return false;
340: else if (name
341: .equals("http://caucho.com/xml/features/skip-comments"))
342: return _skipComments;
343: else
344: throw new SAXNotRecognizedException(name);
345: }
346:
347: public void setFeature(String name, boolean value)
348: throws SAXNotSupportedException {
349: if (name.equals("http://xml.org/sax/features/namespaces")) {
350: _isNamespaceAware = value;
351: } else if (name
352: .equals("http://xml.org/sax/features/namespace-prefixes")) {
353: // setting namespace-prefixes, even if false, sets namespace-aware
354: // see xml/032b
355: _isNamespacePrefixes = value;
356: _isNamespaceAware = true;
357: } else if (name
358: .equals("http://caucho.com/xml/features/skip-comments")) {
359: _skipComments = value;
360: } else if (name
361: .equals("http://xml.org/sax/features/validation"))
362: _isValidating = value;
363: else
364: throw new SAXNotSupportedException(name);
365: }
366:
367: public void setLexicalHandler(LexicalHandler handler) {
368: _lexicalHandler = handler;
369: }
370:
371: /**
372: * Sets the callback object to find files.
373: *
374: * @param resolver the object to find files.
375: */
376: public void setEntityResolver(EntityResolver resolver) {
377: _entityResolver = resolver;
378: }
379:
380: /**
381: * Sets the callback object finding files from system ids.
382: *
383: * @return the resolver to find files.
384: */
385: public EntityResolver getEntityResolver() {
386: return _entityResolver;
387: }
388:
389: public void setDTDHandler(DTDHandler handler) {
390: _dtdHandler = handler;
391: }
392:
393: public DTDHandler getDTDHandler() {
394: return _dtdHandler;
395: }
396:
397: public void setContentHandler(ContentHandler handler) {
398: _contentHandler = handler;
399: }
400:
401: public ContentHandler getContentHandler() {
402: return _contentHandler;
403: }
404:
405: /**
406: * Configures the document handler callback.
407: *
408: * @param handler the new document handler.
409: */
410: public void setDocumentHandler(DocumentHandler handler) {
411: if (handler == null)
412: _contentHandler = null;
413: else
414: _contentHandler = new ContentHandlerAdapter(handler);
415: }
416:
417: public void setErrorHandler(ErrorHandler handler) {
418: _errorHandler = handler;
419: }
420:
421: public ErrorHandler getErrorHandler() {
422: return _errorHandler;
423: }
424:
425: public void setLocale(Locale locale) {
426: _locale = locale;
427: }
428:
429: /**
430: * SAX parsing from a SAX InputSource
431: *
432: * @param source source containing the XML
433: */
434: public void parse(InputSource source) throws IOException,
435: SAXException {
436: init();
437:
438: if (_searchPath == null) {
439: if (source.getSystemId() != null)
440: _searchPath = Vfs.lookup(source.getSystemId())
441: .getParent();
442: }
443:
444: _systemId = source.getSystemId();
445: _publicId = source.getPublicId();
446: ReadStream stream;
447: String encoding = null;
448:
449: if (source.getByteStream() != null) {
450: stream = Vfs.openRead(source.getByteStream());
451: encoding = source.getEncoding();
452: } else if (source.getCharacterStream() != null) {
453: encoding = "UTF-8";
454: _isStaticEncoding = true;
455: stream = Vfs.openRead(source.getCharacterStream());
456: } else if (source.getSystemId() != null) {
457: InputStream is = openStream(source.getSystemId(), source
458: .getPublicId(), null, true);
459: stream = Vfs.openRead(is);
460: encoding = source.getEncoding();
461: } else
462: throw new FileNotFoundException(L.l("invalid InputSource"));
463:
464: if (encoding != null)
465: stream.setEncoding(encoding);
466:
467: try {
468: parseInt(stream);
469: } finally {
470: stream.close();
471: }
472: }
473:
474: /**
475: * SAX parsing from an InputStream
476: *
477: * @param is stream containing the XML
478: */
479: public void parse(InputStream is) throws IOException, SAXException {
480: init();
481:
482: _systemId = "stream";
483:
484: if (is instanceof ReadStream) {
485: Path path = ((ReadStream) is).getPath();
486: _systemId = path.getURL();
487: _filename = path.getUserPath();
488:
489: if (_searchPath != null) {
490: } else if (path != null)
491: _searchPath = path.getParent();
492:
493: parseInt((ReadStream) is);
494: } else {
495: ReadStream rs = VfsStream.openRead(is);
496: try {
497: parseInt(rs);
498: } finally {
499: if (rs != is)
500: rs.close();
501: }
502: }
503: }
504:
505: /**
506: * SAX parsing from an InputStream
507: *
508: * @param is stream containing the XML
509: */
510: public void parse(InputStream is, String systemId)
511: throws IOException, SAXException {
512: init();
513:
514: parseImpl(is, systemId);
515: }
516:
517: /**
518: * SAX parsing from an InputStream
519: *
520: * @param is stream containing the XML
521: */
522: public void parseImpl(InputStream is, String systemId)
523: throws IOException, SAXException {
524: if (is instanceof ReadStream) {
525: Path path = ((ReadStream) is).getPath();
526:
527: if (_searchPath != null) {
528: } else if (path != null) {
529: _searchPath = path.getParent();
530: if (systemId != null)
531: _searchPath = _searchPath.lookup(systemId)
532: .getParent();
533: } else if (systemId != null)
534: _searchPath = Vfs.lookup(systemId).getParent();
535:
536: if (systemId == null) {
537: systemId = path.getURL();
538: _filename = ((ReadStream) is).getUserPath();
539: } else
540: _filename = systemId;
541:
542: _systemId = systemId;
543:
544: parseInt((ReadStream) is);
545: } else {
546: if (systemId == null) {
547: _systemId = "anonymous.xml";
548: } else {
549: _searchPath = Vfs.lookup(systemId).getParent();
550: _systemId = systemId;
551: }
552:
553: ReadStream rs = VfsStream.openRead(is);
554: try {
555: parseInt(rs);
556: } finally {
557: if (rs != is)
558: rs.close();
559: }
560: }
561: }
562:
563: /**
564: * SAX parsing from a file path
565: *
566: * @param systemId path to the file containing the XML
567: */
568: public void parse(String systemId) throws IOException, SAXException {
569: InputStream is = openTopStream(systemId, null);
570: try {
571: parse(is);
572: } finally {
573: is.close();
574: }
575: }
576:
577: /**
578: * SAX parsing from a VFS path
579: */
580: public void parse(Path path) throws IOException, SAXException {
581: init();
582:
583: if (_searchPath == null)
584: _searchPath = path.getParent();
585:
586: ReadStream is = path.openRead();
587: try {
588: parseInt(is);
589: } finally {
590: is.close();
591: }
592: }
593:
594: /**
595: * SAX parsing from a string.
596: *
597: * @param string string containing the XML
598: */
599: public void parseString(String string) throws IOException,
600: SAXException {
601: init();
602:
603: ReadStream is = Vfs.openString(string);
604:
605: try {
606: parseInt(is);
607: } finally {
608: is.close();
609: }
610: }
611:
612: /**
613: * Parses a document from a SAX InputSource
614: *
615: * @param source SAX InputSource containing the XML data.
616: */
617: public Document parseDocument(InputSource source)
618: throws IOException, SAXException {
619: init();
620:
621: QDocument doc = new QDocument();
622:
623: if (_builder == null)
624: _builder = new DOMBuilder();
625:
626: _builder.init(doc);
627: setOwner(doc);
628:
629: doc.setSystemId(source.getSystemId());
630: _builder.setSystemId(source.getSystemId());
631: _builder.setStrictXML(_strictXml);
632: _builder.setCoalescing(_isCoalescing);
633: _builder.setSkipWhitespace(_skipWhitespace);
634: _contentHandler = _builder;
635:
636: parse(source);
637:
638: return doc;
639: }
640:
641: /**
642: * Parses a document from system path.
643: *
644: * @param systemId path to the XML data.
645: */
646: public Document parseDocument(String systemId) throws IOException,
647: SAXException {
648: InputStream is = openTopStream(systemId, null);
649: try {
650: return parseDocument(is);
651: } finally {
652: is.close();
653: }
654: }
655:
656: /**
657: * Parses a document from a VFS path
658: *
659: * @param path the VFS path containing the XML document.
660: */
661: public Document parseDocument(Path path) throws IOException,
662: SAXException {
663: if (_searchPath == null)
664: _searchPath = path.getParent();
665:
666: ReadStream is = path.openRead();
667: try {
668: return parseDocument(is);
669: } finally {
670: is.close();
671: }
672: }
673:
674: /**
675: * Parses an input stream into a DOM document
676: *
677: * @param is the input stream containing the XML
678: *
679: * @return the parsed document.
680: */
681: public Document parseDocument(InputStream is) throws IOException,
682: SAXException {
683: return parseDocument(is, null);
684: }
685:
686: /**
687: * Parses an input stream into a DOM document
688: *
689: * @param is the input stream containing the XML
690: * @param systemId the URL of the stream.
691: *
692: * @return the parsed document.
693: */
694: public Document parseDocument(InputStream is, String systemId)
695: throws IOException, SAXException {
696: init();
697:
698: QDocument doc = new QDocument();
699: parseDocument(doc, is, systemId);
700:
701: return doc;
702: }
703:
704: public void parseDocument(QDocument doc, InputStream is,
705: String systemId) throws IOException, SAXException {
706: _owner = doc;
707:
708: if (_builder == null)
709: _builder = new DOMBuilder();
710:
711: _builder.init(_owner);
712: _builder.setSystemId(systemId);
713: _builder.setCoalescing(_isCoalescing);
714: _builder.setSkipWhitespace(_skipWhitespace);
715: _contentHandler = _builder;
716:
717: parseImpl(is, systemId);
718: }
719:
720: /**
721: * Parses a string into a DOM document
722: *
723: * @param string the string containing the XML
724: */
725: public Document parseDocumentString(String string)
726: throws IOException, SAXException {
727: ReadStream is = Vfs.openString(string);
728:
729: try {
730: _isStaticEncoding = true;
731: return parseDocument(is);
732: } finally {
733: is.close();
734: }
735: }
736:
737: /**
738: * Looks up an input stream from the system id.
739: */
740: public InputStream openStream(String systemId, String publicId)
741: throws IOException, SAXException {
742: return openStream(systemId, publicId, _entityResolver, false);
743: }
744:
745: /**
746: * Looks up an input stream from the system id.
747: */
748: public InputStream openTopStream(String systemId, String publicId)
749: throws IOException, SAXException {
750: return openStream(systemId, publicId, _entityResolver, true);
751: }
752:
753: /**
754: * Looks up an input stream from the system id.
755: */
756: public InputStream openStream(String systemId, String publicId,
757: EntityResolver entityResolver) throws IOException,
758: SAXException {
759: return openStream(systemId, publicId, entityResolver, false);
760: }
761:
762: /**
763: * Looks up an input stream from the system id.
764: */
765: protected InputStream openStream(String systemId, String publicId,
766: EntityResolver entityResolver, boolean isTop)
767: throws IOException, SAXException {
768: int colon = systemId.indexOf(':');
769: int slash = systemId.indexOf('/');
770:
771: boolean isAbsolute = colon > 0 && (colon < slash || slash < 0);
772:
773: if (slash == 0 || !isAbsolute) {
774: Path pwd;
775:
776: if (_searchPath != null)
777: pwd = _searchPath;
778: else
779: pwd = Vfs.lookup(systemId).getParent();
780:
781: String newId = pwd.lookup(systemId).getURL();
782: if (!newId.startsWith("error:"))
783: systemId = newId;
784: else {
785: int tail = _systemId.lastIndexOf('/');
786: if (tail >= 0)
787: systemId = _systemId.substring(0, tail + 1)
788: + systemId;
789: }
790: }
791:
792: // xml/03c5 -- must be after the normalization
793: if (entityResolver != null) {
794: InputSource source = entityResolver.resolveEntity(publicId,
795: systemId);
796:
797: if (source != null) {
798: _filename = systemId;
799: _systemId = systemId;
800:
801: return openSource(source);
802: }
803: }
804:
805: int ch;
806: if (CauchoSystem.isWindows()
807: && systemId.startsWith("file:")
808: && systemId.length() > 7
809: && systemId.charAt(6) == ':'
810: && (((ch = systemId.charAt(5)) >= 'a' && ch <= 'z') || ch >= 'A'
811: && ch <= 'Z')) {
812: colon = 1;
813: isAbsolute = false;
814: systemId = "/" + systemId.substring(5);
815: }
816:
817: if (!isTop && isAbsolute && !systemId.startsWith("file:")
818: && !systemId.startsWith("jar:")
819: && !(colon == 1 && CauchoSystem.isWindows())) {
820: throw new RemoteURLException(
821: L
822: .l(
823: "URL `{0}' was not opened because it is a remote URL. Any URL scheme other than file: must be handled by a custom entity resolver.",
824: systemId));
825: } else if (_searchPath != null) {
826: return _searchPath.lookup(systemId).openRead();
827: } else
828: return Vfs.lookup(systemId).openRead();
829: }
830:
831: /**
832: * Opens the source
833: */
834: protected InputStream openSource(InputSource source)
835: throws IOException, SAXException {
836: if (source.getByteStream() != null) {
837: return source.getByteStream();
838: } else if (source.getCharacterStream() != null) {
839: return Vfs.openRead(source.getCharacterStream());
840: } else if (source.getSystemId() != null) {
841: return Vfs.openRead(source.getSystemId());
842: } else
843: throw new FileNotFoundException(L.l(
844: "invalid InputSource {0}", source));
845: }
846:
847: /**
848: * Parse the document from a read stream.
849: *
850: * @param is read stream to parse from.
851: *
852: * @return The parsed document.
853: */
854: abstract Document parseInt(ReadStream is) throws IOException,
855: SAXException;
856:
857: static {
858: _attrTypes.put("CDATA", "CDATA");
859: _attrTypes.put("ID", "ID");
860: _attrTypes.put("IDREF", "IDREF");
861: _attrTypes.put("IDREFS", "IDREFS");
862: _attrTypes.put("ENTITY", "ENTITY");
863: _attrTypes.put("ENTITIES", "ENTITIES");
864: _attrTypes.put("NMTOKEN", "NMTOKEN");
865: _attrTypes.put("NMTOKENS", "NMTOKENS");
866: }
867: }
|