001: /*
002: * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved
003: *
004: * This file is part of Resin(R) Open Source
005: *
006: * Each copy or derived work must preserve the copyright notice and this
007: * notice unmodified.
008: *
009: * Resin Open Source is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU General Public License as published by
011: * the Free Software Foundation; either version 2 of the License, or
012: * (at your option) any later version.
013: *
014: * Resin Open Source is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
017: * of NON-INFRINGEMENT. See the GNU General Public License for more
018: * details.
019: *
020: * You should have received a copy of the GNU General Public License
021: * along with Resin Open Source; if not, write to the
022: * Free SoftwareFoundation, Inc.
023: * 59 Temple Place, Suite 330
024: * Boston, MA 02111-1307 USA
025: *
026: * @author Scott Ferguson
027: */
028:
029: package com.caucho.xml;
030:
031: import com.caucho.server.util.CauchoSystem;
032: import com.caucho.util.L10N;
033: import com.caucho.vfs.Path;
034: import com.caucho.vfs.ReadStream;
035: import com.caucho.vfs.Vfs;
036: import com.caucho.vfs.VfsStream;
037:
038: import org.w3c.dom.Document;
039: import org.xml.sax.*;
040: import org.xml.sax.ext.LexicalHandler;
041:
042: import javax.xml.parsers.DocumentBuilderFactory;
043: import java.io.FileNotFoundException;
044: import java.io.IOException;
045: import java.io.InputStream;
046: import java.util.Hashtable;
047: import java.util.Locale;
048: import java.util.logging.Logger;
049:
050: abstract public class AbstractParser implements XMLReader, Parser {
051: static final Logger log = Logger.getLogger(AbstractParser.class
052: .getName());
053: static final L10N L = new L10N(AbstractParser.class);
054:
055: static Hashtable<String, String> _attrTypes = new Hashtable<String, String>();
056: static Entities _xmlEntities = new XmlEntities();
057:
058: Policy _policy;
059:
060: boolean _isCoalescing = true;
061:
062: boolean _optionalTags = true;
063: boolean _skipWhitespace;
064: boolean _skipComments;
065: boolean _strictComments;
066: boolean _strictAttributes;
067: boolean _entitiesAsText = false;
068: boolean _expandEntities = true;
069: boolean _strictCharacters;
070: boolean _strictXml;
071: boolean _singleTopElement;
072: boolean _normalizeWhitespace = false;
073: boolean _forgiving;
074: boolean _extraForgiving;
075: boolean _switchToXml = false;
076: boolean _doResinInclude = false;
077:
078: boolean _isNamespaceAware = true;
079: boolean _isNamespacePrefixes = true;
080: boolean _isSAXNamespaces = false;
081:
082: boolean _isXmlnsPrefix;
083: boolean _isXmlnsAttribute;
084:
085: boolean _isValidating = false;
086:
087: boolean _isJsp;
088:
089: boolean _isStaticEncoding = false;
090: String _defaultEncoding = "UTF-8";
091:
092: // sax stuff
093: ContentHandler _contentHandler;
094: EntityResolver _entityResolver;
095: DTDHandler _dtdHandler;
096: LexicalHandler _lexicalHandler;
097: ErrorHandler _errorHandler;
098: Locale _locale;
099:
100: Entities _entities;
101: QDocument _owner;
102: QDocumentType _dtd;
103:
104: DOMBuilder _builder;
105:
106: Path _searchPath;
107:
108: String _publicId;
109: String _systemId;
110: String _filename;
111: int _line = 1;
112:
113: /**
114: * Creates a new parser with the XmlPolicy and a new dtd.
115: */
116: AbstractParser() {
117: this (new XmlPolicy(), null);
118:
119: _policy.strictComments = true;
120: _policy.strictAttributes = true;
121: _policy.strictCharacters = true;
122: _policy.strictXml = true;
123: _policy.singleTopElement = true;
124: _policy.optionalTags = false;
125: }
126:
127: /**
128: * Creates a new parser with a given policy and dtd.
129: *
130: * @param policy the parsing policy, handling optional tags.
131: * @param dtd the parser's dtd.
132: */
133: AbstractParser(Policy policy, QDocumentType dtd) {
134: _policy = policy;
135:
136: if (dtd == null)
137: dtd = new QDocumentType(null);
138: _dtd = dtd;
139:
140: _entities = _xmlEntities;
141: if (policy instanceof HtmlPolicy)
142: _entities = HtmlEntities.create(4.0);
143: }
144:
145: void clear() {
146: _isCoalescing = true;
147:
148: _isNamespaceAware = true;
149: _isSAXNamespaces = false;
150: _isNamespacePrefixes = false;
151: _optionalTags = true;
152: _skipWhitespace = false;
153: _skipComments = false;
154: _strictComments = false;
155: _strictAttributes = false;
156: _entitiesAsText = false;
157: _expandEntities = true;
158: _strictCharacters = false;
159: _strictXml = false;
160: _singleTopElement = false;
161: _normalizeWhitespace = false;
162: _forgiving = false;
163: _extraForgiving = false;
164: _switchToXml = false;
165: _doResinInclude = false;
166:
167: _isJsp = false;
168:
169: _defaultEncoding = "UTF-8";
170: _isStaticEncoding = false;
171: }
172:
173: void init() {
174: /*
175: _isXmlnsPrefix = (_isNamespaceAware ||
176: _isSAXNamespaces ||
177: _isNamespacePrefixes);
178: */
179: _isXmlnsPrefix = _isNamespaceAware || _isNamespacePrefixes;
180: _isXmlnsAttribute = _isNamespacePrefixes || !_isNamespaceAware;
181: }
182:
183: /**
184: * Sets the owner.
185: */
186: public void setOwner(QDocument doc) {
187: _owner = doc;
188: }
189:
190: public void setFilename(String filename) {
191: _filename = filename;
192: }
193:
194: /**
195: * Sets the configuration for a document builder.
196: */
197: public void setConfig(DocumentBuilderFactory factory) {
198: if (_builder == null)
199: _builder = new DOMBuilder();
200:
201: _isCoalescing = factory.isCoalescing();
202: setExpandEntities(factory.isExpandEntityReferences());
203: setSkipComments(factory.isIgnoringComments());
204: setSkipWhitespace(factory.isIgnoringElementContentWhitespace());
205: setNamespaceAware(factory.isNamespaceAware());
206: setNamespacePrefixes(false);
207: setValidating(factory.isValidating());
208: }
209:
210: public void setEntitiesAsText(boolean entitiesAsText) {
211: _entitiesAsText = entitiesAsText;
212: }
213:
214: public boolean getEntitiesAsText() {
215: return _entitiesAsText;
216: }
217:
218: public void setExpandEntities(boolean expandEntities) {
219: _expandEntities = expandEntities;
220: _policy.expandEntities = expandEntities;
221: }
222:
223: /**
224: * Set to true if comments should be skipped. If false events will be
225: * generated for the comments.
226: */
227: public void setSkipComments(boolean skipComments) {
228: _skipComments = skipComments;
229: }
230:
231: /**
232: * Set to true if ignorable-whitespace should be skipped.
233: */
234: public void setSkipWhitespace(boolean skipWhitespace) {
235: _skipWhitespace = skipWhitespace;
236: }
237:
238: /**
239: * Returns true if text and cdata nodes will be combined.
240: */
241: public boolean isCoalescing() {
242: return _isCoalescing;
243: }
244:
245: /**
246: * Set true if text and cdata nodes should be combined.
247: */
248: public void setCoalescing(boolean isCoalescing) {
249: _isCoalescing = isCoalescing;
250: }
251:
252: /**
253: * Returns true if the XML should be validated
254: */
255: public boolean isValidating() {
256: return _isValidating;
257: }
258:
259: /**
260: * Set true if the XML should be validated
261: */
262: public void setValidating(boolean isValidating) {
263: _isValidating = isValidating;
264: }
265:
266: /**
267: * Returns true if the parsing is namespace aware.
268: */
269: public boolean isNamespaceAware() {
270: return _isNamespaceAware;
271: }
272:
273: /**
274: * Set true if the parsing is namespace aware.
275: */
276: public void setNamespaceAware(boolean isNamespaceAware) {
277: _isNamespaceAware = isNamespaceAware;
278: }
279:
280: /**
281: * Returns true if the parsing uses sax namespaces
282: */
283: public boolean isSAXNamespaces() {
284: return _isSAXNamespaces;
285: }
286:
287: /**
288: * Set true if the parsing uses sax namespaces
289: */
290: public void setSAXNamespaces(boolean isNamespaces) {
291: _isSAXNamespaces = isNamespaces;
292: }
293:
294: /**
295: * Returns true if the parsing uses namespace prefixes
296: */
297: public boolean isNamespacePrefixes() {
298: return _isNamespacePrefixes;
299: }
300:
301: /**
302: * Set true if the parsing uses sax namespaces
303: */
304: public void setNamespacePrefixes(boolean isNamespaces) {
305: _isNamespacePrefixes = isNamespaces;
306: }
307:
308: /**
309: * If true, normalizes HTML tags to lower case.
310: */
311: public void setToLower(boolean toLower) {
312: if (_policy instanceof HtmlPolicy)
313: ((HtmlPolicy) _policy).setToLower(toLower);
314: }
315:
316: public boolean getSkipComments() {
317: return _skipComments;
318: }
319:
320: /**
321: * Sets the parser as a forgiving parser, allowing some non-strict
322: * XML.
323: *
324: * @param forgiving if true, forgives non-strict XML.
325: */
326: public void setForgiving(boolean forgiving) {
327: _forgiving = forgiving;
328: }
329:
330: /**
331: * Returns true if the parser is forgiving.
332: *
333: * @return true if the parser forgives non-strict XML.
334: */
335: public boolean getForgiving() {
336: return _forgiving;
337: }
338:
339: /**
340: * Set true if the parser should switch from HTML to XML if it detects
341: * the <?xml ?> header.
342: */
343: public void setAutodetectXml(boolean autodetectXml) {
344: _switchToXml = autodetectXml;
345: }
346:
347: /**
348: * Sets the parser to handle special JSP forgiveness.
349: *
350: * @param isJsp if true, handles special JSP forgiveness.
351: */
352: public void setJsp(boolean isJsp) {
353: _isJsp = isJsp;
354:
355: if (_policy instanceof HtmlPolicy)
356: ((HtmlPolicy) _policy).setJsp(isJsp);
357: }
358:
359: /**
360: * Returns true if the parser should handle special JSP forgiveness.
361: *
362: * @return true if handles special JSP forgiveness.
363: */
364: public boolean getJsp() {
365: return _isJsp;
366: }
367:
368: /**
369: * Sets the search path for included documents. MergePaths are often
370: * used.
371: *
372: * @param path the path to search
373: */
374: public void setSearchPath(Path path) {
375: _searchPath = path;
376: }
377:
378: /**
379: * Gets the search path for included documents. MergePaths are often
380: * used.
381: *
382: * @return the path to search
383: */
384: public Path getSearchPath() {
385: return _searchPath;
386: }
387:
388: /**
389: * Sets the default encoding if none is specified.
390: *
391: * @param encoding the default encoding
392: */
393: public void setDefaultEncoding(String encoding) {
394: _defaultEncoding = encoding;
395: }
396:
397: /**
398: * Gets the default encoding if none is specified.
399: */
400: public String getDefaultEncoding() {
401: return _defaultEncoding;
402: }
403:
404: /**
405: * Enables including of other XML documents with resin:include.
406: *
407: * @param doResinInclude if true, enables the include.
408: */
409: public void setResinInclude(boolean doResinInclude) {
410: _doResinInclude = doResinInclude;
411: }
412:
413: /**
414: * Returns true if resin:include will include other XML documents.
415: *
416: * @param doResinInclude if true, enables the include.
417: */
418: public boolean getResinInclude() {
419: return _doResinInclude;
420: }
421:
422: public Object getProperty(String name)
423: throws SAXNotRecognizedException {
424: if (name
425: .equals("http://xml.org/sax/properties/lexical-handler"))
426: return _lexicalHandler;
427: else if (name.equals("http://xml.org/sax/properties/dom-node"))
428: return null;
429: else if (name
430: .equals("http://xml.org/sax/properties/xml-string"))
431: return null;
432: else
433: throw new SAXNotRecognizedException(name);
434: }
435:
436: public void setProperty(String name, Object obj)
437: throws SAXNotSupportedException {
438: if (name
439: .equals("http://xml.org/sax/properties/lexical-handler"))
440: _lexicalHandler = (LexicalHandler) obj;
441: else if (name
442: .equals("http://xml.org/sax/handlers/LexicalHandler"))
443: _lexicalHandler = (LexicalHandler) obj;
444: else
445: throw new SAXNotSupportedException(name);
446: }
447:
448: public boolean getFeature(String name)
449: throws SAXNotRecognizedException {
450: if (name.equals("http://xml.org/sax/features/namespaces"))
451: return _isSAXNamespaces;
452: else if (name
453: .equals("http://xml.org/sax/features/namespace-prefixes"))
454: return _isNamespacePrefixes;
455: else if (name
456: .equals("http://xml.org/sax/features/string-interning"))
457: return true;
458: else if (name.equals("http://xml.org/sax/features/validation"))
459: return _isValidating;
460: else if (name
461: .equals("http://xml.org/sax/features/external-general-entities"))
462: return true;
463: else if (name
464: .equals("http://xml.org/sax/features/external-parameter-entities"))
465: return false;
466: else if (name
467: .equals("http://caucho.com/xml/features/skip-comments"))
468: return _skipComments;
469: else if (name
470: .equals("http://caucho.com/xml/features/resin-include"))
471: return _doResinInclude;
472: else
473: throw new SAXNotRecognizedException(name);
474: }
475:
476: public void setFeature(String name, boolean value)
477: throws SAXNotSupportedException {
478: if (name.equals("http://xml.org/sax/features/namespaces")) {
479: _isNamespaceAware = value;
480: } else if (name
481: .equals("http://xml.org/sax/features/namespace-prefixes")) {
482: // setting namespace-prefixes, even if false, sets namespace-aware
483: // see xml/032b
484: _isNamespacePrefixes = value;
485: _isNamespaceAware = true;
486: } else if (name
487: .equals("http://caucho.com/xml/features/skip-comments")) {
488: _skipComments = value;
489: } else if (name
490: .equals("http://caucho.com/xml/features/resin-include"))
491: _doResinInclude = value;
492: else if (name.equals("http://xml.org/sax/features/validation"))
493: _isValidating = value;
494: else
495: throw new SAXNotSupportedException(name);
496: }
497:
498: public void setLexicalHandler(LexicalHandler handler) {
499: _lexicalHandler = handler;
500: }
501:
502: /**
503: * Sets the callback object to find files.
504: *
505: * @param resolver the object to find files.
506: */
507: public void setEntityResolver(EntityResolver resolver) {
508: _entityResolver = resolver;
509: }
510:
511: /**
512: * Sets the callback object finding files from system ids.
513: *
514: * @return the resolver to find files.
515: */
516: public EntityResolver getEntityResolver() {
517: return _entityResolver;
518: }
519:
520: public void setDTDHandler(DTDHandler handler) {
521: _dtdHandler = handler;
522: }
523:
524: public DTDHandler getDTDHandler() {
525: return _dtdHandler;
526: }
527:
528: public void setContentHandler(ContentHandler handler) {
529: _contentHandler = handler;
530: }
531:
532: public ContentHandler getContentHandler() {
533: return _contentHandler;
534: }
535:
536: /**
537: * Configures the document handler callback.
538: *
539: * @param handler the new document handler.
540: */
541: public void setDocumentHandler(DocumentHandler handler) {
542: if (handler == null)
543: _contentHandler = null;
544: else
545: _contentHandler = new ContentHandlerAdapter(handler);
546: }
547:
548: public void setErrorHandler(ErrorHandler handler) {
549: _errorHandler = handler;
550: }
551:
552: public ErrorHandler getErrorHandler() {
553: return _errorHandler;
554: }
555:
556: public void setLocale(Locale locale) {
557: _locale = locale;
558: }
559:
560: /**
561: * SAX parsing from a SAX InputSource
562: *
563: * @param source source containing the XML
564: */
565: public void parse(InputSource source) throws IOException,
566: SAXException {
567: init();
568:
569: if (_searchPath == null) {
570: if (source.getSystemId() != null)
571: _searchPath = Vfs.lookup(source.getSystemId())
572: .getParent();
573: }
574:
575: _systemId = source.getSystemId();
576: _publicId = source.getPublicId();
577: ReadStream stream;
578: String encoding = null;
579:
580: if (source.getByteStream() != null) {
581: stream = Vfs.openRead(source.getByteStream());
582: encoding = source.getEncoding();
583: } else if (source.getCharacterStream() != null) {
584: encoding = "UTF-8";
585: _isStaticEncoding = true;
586: stream = Vfs.openRead(source.getCharacterStream());
587: } else if (source.getSystemId() != null) {
588: InputStream is = openStream(source.getSystemId(), source
589: .getPublicId(), null, true);
590: stream = Vfs.openRead(is);
591: encoding = source.getEncoding();
592: } else
593: throw new FileNotFoundException(L.l("invalid InputSource"));
594:
595: if (encoding != null)
596: stream.setEncoding(encoding);
597:
598: try {
599: parseInt(stream);
600: } finally {
601: stream.close();
602: }
603: }
604:
605: /**
606: * SAX parsing from an InputStream
607: *
608: * @param is stream containing the XML
609: */
610: public void parse(InputStream is) throws IOException, SAXException {
611: init();
612:
613: _systemId = "stream";
614:
615: if (is instanceof ReadStream) {
616: Path path = ((ReadStream) is).getPath();
617: _systemId = path.getURL();
618: _filename = path.getUserPath();
619:
620: if (_searchPath != null) {
621: } else if (path != null)
622: _searchPath = path.getParent();
623:
624: parseInt((ReadStream) is);
625: } else {
626: ReadStream rs = VfsStream.openRead(is);
627: try {
628: parseInt(rs);
629: } finally {
630: if (rs != is)
631: rs.close();
632: }
633: }
634: }
635:
636: /**
637: * SAX parsing from an InputStream
638: *
639: * @param is stream containing the XML
640: */
641: public void parse(InputStream is, String systemId)
642: throws IOException, SAXException {
643: init();
644:
645: parseImpl(is, systemId);
646: }
647:
648: /**
649: * SAX parsing from an InputStream
650: *
651: * @param is stream containing the XML
652: */
653: public void parseImpl(InputStream is, String systemId)
654: throws IOException, SAXException {
655: if (is instanceof ReadStream) {
656: Path path = ((ReadStream) is).getPath();
657:
658: if (_searchPath != null) {
659: } else if (path != null) {
660: _searchPath = path.getParent();
661: if (systemId != null)
662: _searchPath = _searchPath.lookup(systemId)
663: .getParent();
664: } else if (systemId != null)
665: _searchPath = Vfs.lookup(systemId).getParent();
666:
667: if (systemId == null) {
668: systemId = path.getURL();
669: _filename = ((ReadStream) is).getUserPath();
670: } else
671: _filename = systemId;
672:
673: _systemId = systemId;
674:
675: parseInt((ReadStream) is);
676: } else {
677: if (systemId == null) {
678: _systemId = "anonymous.xml";
679: } else {
680: _searchPath = Vfs.lookup(systemId).getParent();
681: _systemId = systemId;
682: }
683:
684: ReadStream rs = VfsStream.openRead(is);
685: try {
686: parseInt(rs);
687: } finally {
688: if (rs != is)
689: rs.close();
690: }
691: }
692: }
693:
694: /**
695: * SAX parsing from a file path
696: *
697: * @param systemId path to the file containing the XML
698: */
699: public void parse(String systemId) throws IOException, SAXException {
700: InputStream is = openTopStream(systemId, null);
701: try {
702: parse(is);
703: } finally {
704: is.close();
705: }
706: }
707:
708: /**
709: * SAX parsing from a VFS path
710: */
711: public void parse(Path path) throws IOException, SAXException {
712: init();
713:
714: if (_searchPath == null)
715: _searchPath = path.getParent();
716:
717: ReadStream is = path.openRead();
718: try {
719: parseInt(is);
720: } finally {
721: is.close();
722: }
723: }
724:
725: /**
726: * SAX parsing from a string.
727: *
728: * @param string string containing the XML
729: */
730: public void parseString(String string) throws IOException,
731: SAXException {
732: init();
733:
734: ReadStream is = Vfs.openString(string);
735:
736: try {
737: parseInt(is);
738: } finally {
739: is.close();
740: }
741: }
742:
743: /**
744: * Parses a document from a SAX InputSource
745: *
746: * @param source SAX InputSource containing the XML data.
747: */
748: public Document parseDocument(InputSource source)
749: throws IOException, SAXException {
750: init();
751:
752: QDocument doc = new QDocument();
753:
754: if (_builder == null)
755: _builder = new DOMBuilder();
756:
757: _builder.init(doc);
758: setOwner(doc);
759:
760: doc.setSystemId(source.getSystemId());
761: _builder.setSystemId(source.getSystemId());
762: _builder.setStrictXML(_strictXml);
763: _builder.setCoalescing(_isCoalescing);
764: _builder.setSkipWhitespace(_skipWhitespace);
765: _contentHandler = _builder;
766:
767: parse(source);
768:
769: return doc;
770: }
771:
772: /**
773: * Parses a document from system path.
774: *
775: * @param systemId path to the XML data.
776: */
777: public Document parseDocument(String systemId) throws IOException,
778: SAXException {
779: InputStream is = openTopStream(systemId, null);
780: try {
781: return parseDocument(is);
782: } finally {
783: is.close();
784: }
785: }
786:
787: /**
788: * Parses a document from a VFS path
789: *
790: * @param path the VFS path containing the XML document.
791: */
792: public Document parseDocument(Path path) throws IOException,
793: SAXException {
794: if (_searchPath == null)
795: _searchPath = path.getParent();
796:
797: ReadStream is = path.openRead();
798: try {
799: return parseDocument(is);
800: } finally {
801: is.close();
802: }
803: }
804:
805: /**
806: * Parses an input stream into a DOM document
807: *
808: * @param is the input stream containing the XML
809: *
810: * @return the parsed document.
811: */
812: public Document parseDocument(InputStream is) throws IOException,
813: SAXException {
814: return parseDocument(is, null);
815: }
816:
817: /**
818: * Parses an input stream into a DOM document
819: *
820: * @param is the input stream containing the XML
821: * @param systemId the URL of the stream.
822: *
823: * @return the parsed document.
824: */
825: public Document parseDocument(InputStream is, String systemId)
826: throws IOException, SAXException {
827: init();
828:
829: QDocument doc = new QDocument();
830: parseDocument(doc, is, systemId);
831:
832: return doc;
833: }
834:
835: public void parseDocument(QDocument doc, InputStream is,
836: String systemId) throws IOException, SAXException {
837: _owner = doc;
838:
839: if (_builder == null)
840: _builder = new DOMBuilder();
841:
842: _builder.init(_owner);
843: _builder.setSystemId(systemId);
844: _builder.setCoalescing(_isCoalescing);
845: _builder.setSkipWhitespace(_skipWhitespace);
846: _contentHandler = _builder;
847:
848: parseImpl(is, systemId);
849: }
850:
851: /**
852: * Parses a string into a DOM document
853: *
854: * @param string the string containing the XML
855: */
856: public Document parseDocumentString(String string)
857: throws IOException, SAXException {
858: ReadStream is = Vfs.openString(string);
859:
860: try {
861: _isStaticEncoding = true;
862: return parseDocument(is);
863: } finally {
864: is.close();
865: }
866: }
867:
868: /**
869: * Looks up an input stream from the system id.
870: */
871: public InputStream openStream(String systemId, String publicId)
872: throws IOException, SAXException {
873: return openStream(systemId, publicId, _entityResolver, false);
874: }
875:
876: /**
877: * Looks up an input stream from the system id.
878: */
879: public InputStream openTopStream(String systemId, String publicId)
880: throws IOException, SAXException {
881: return openStream(systemId, publicId, _entityResolver, true);
882: }
883:
884: /**
885: * Looks up an input stream from the system id.
886: */
887: public InputStream openStream(String systemId, String publicId,
888: EntityResolver entityResolver) throws IOException,
889: SAXException {
890: return openStream(systemId, publicId, entityResolver, false);
891: }
892:
893: /**
894: * Looks up an input stream from the system id.
895: */
896: protected InputStream openStream(String systemId, String publicId,
897: EntityResolver entityResolver, boolean isTop)
898: throws IOException, SAXException {
899: int colon = systemId.indexOf(':');
900: int slash = systemId.indexOf('/');
901:
902: boolean isAbsolute = colon > 0 && (colon < slash || slash < 0);
903:
904: if (slash == 0 || !isAbsolute) {
905: Path pwd;
906:
907: if (_searchPath != null)
908: pwd = _searchPath;
909: else
910: pwd = Vfs.lookup(systemId).getParent();
911:
912: String newId = pwd.lookup(systemId).getURL();
913: if (!newId.startsWith("error:"))
914: systemId = newId;
915: else {
916: int tail = _systemId.lastIndexOf('/');
917: if (tail >= 0)
918: systemId = _systemId.substring(0, tail + 1)
919: + systemId;
920: }
921: }
922:
923: // xml/03c5 -- must be after the normalization
924: if (entityResolver != null) {
925: InputSource source = entityResolver.resolveEntity(publicId,
926: systemId);
927:
928: if (source != null) {
929: _filename = systemId;
930: _systemId = systemId;
931:
932: return openSource(source);
933: }
934: }
935:
936: int ch;
937: if (CauchoSystem.isWindows()
938: && systemId.startsWith("file:")
939: && systemId.length() > 7
940: && systemId.charAt(6) == ':'
941: && (((ch = systemId.charAt(5)) >= 'a' && ch <= 'z') || ch >= 'A'
942: && ch <= 'Z')) {
943: colon = 1;
944: isAbsolute = false;
945: systemId = "/" + systemId.substring(5);
946: }
947:
948: if (!isTop && isAbsolute && !systemId.startsWith("file:")
949: && !systemId.startsWith("jar:")
950: && !(colon == 1 && CauchoSystem.isWindows())) {
951: throw new RemoteURLException(
952: L
953: .l(
954: "URL `{0}' was not opened because it is a remote URL. Any URL scheme other than file: must be handled by a custom entity resolver.",
955: systemId));
956: } else if (_searchPath != null) {
957: return _searchPath.lookup(systemId).openRead();
958: } else
959: return Vfs.lookup(systemId).openRead();
960: }
961:
962: /**
963: * Opens the source
964: */
965: protected InputStream openSource(InputSource source)
966: throws IOException, SAXException {
967: if (source.getByteStream() != null) {
968: return source.getByteStream();
969: } else if (source.getCharacterStream() != null) {
970: return Vfs.openRead(source.getCharacterStream());
971: } else if (source.getSystemId() != null) {
972: return Vfs.openRead(source.getSystemId());
973: } else
974: throw new FileNotFoundException(L.l(
975: "invalid InputSource {0}", source));
976: }
977:
978: /**
979: * Parse the document from a read stream.
980: *
981: * @param is read stream to parse from.
982: *
983: * @return The parsed document.
984: */
985: abstract Document parseInt(ReadStream is) throws IOException,
986: SAXException;
987:
988: static {
989: _attrTypes.put("CDATA", "CDATA");
990: _attrTypes.put("ID", "ID");
991: _attrTypes.put("IDREF", "IDREF");
992: _attrTypes.put("IDREFS", "IDREFS");
993: _attrTypes.put("ENTITY", "ENTITY");
994: _attrTypes.put("ENTITIES", "ENTITIES");
995: _attrTypes.put("NMTOKEN", "NMTOKEN");
996: _attrTypes.put("NMTOKENS", "NMTOKENS");
997: }
998: }
|