001: /*
002:
003: Licensed to the Apache Software Foundation (ASF) under one or more
004: contributor license agreements. See the NOTICE file distributed with
005: this work for additional information regarding copyright ownership.
006: The ASF licenses this file to You under the Apache License, Version 2.0
007: (the "License"); you may not use this file except in compliance with
008: the License. You may obtain a copy of the License at
009:
010: http://www.apache.org/licenses/LICENSE-2.0
011:
012: Unless required by applicable law or agreed to in writing, software
013: distributed under the License is distributed on an "AS IS" BASIS,
014: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: See the License for the specific language governing permissions and
016: limitations under the License.
017:
018: */
019: package org.apache.batik.dom.util;
020:
021: import java.io.IOException;
022: import java.io.InputStream;
023: import java.io.InterruptedIOException;
024: import java.io.Reader;
025: import java.util.Iterator;
026: import java.util.LinkedList;
027: import java.util.List;
028:
029: import javax.xml.parsers.ParserConfigurationException;
030: import javax.xml.parsers.SAXParser;
031: import javax.xml.parsers.SAXParserFactory;
032:
033: import org.apache.batik.util.HaltingThread;
034: import org.apache.batik.util.XMLConstants;
035:
036: import org.xml.sax.Attributes;
037: import org.xml.sax.ErrorHandler;
038: import org.xml.sax.InputSource;
039: import org.xml.sax.Locator;
040: import org.xml.sax.SAXException;
041: import org.xml.sax.SAXNotRecognizedException;
042: import org.xml.sax.SAXParseException;
043: import org.xml.sax.XMLReader;
044: import org.xml.sax.ext.LexicalHandler;
045: import org.xml.sax.helpers.DefaultHandler;
046: import org.xml.sax.helpers.XMLReaderFactory;
047:
048: import org.w3c.dom.DOMImplementation;
049: import org.w3c.dom.Document;
050: import org.w3c.dom.Element;
051: import org.w3c.dom.Node;
052:
053: /**
054: * This class contains methods for creating Document instances
055: * from an URI using SAX2.
056: *
057: * @author <a href="mailto:stephane@hillion.org">Stephane Hillion</a>
058: * @version $Id: SAXDocumentFactory.java 509851 2007-02-21 01:12:30Z deweese $
059: */
060: public class SAXDocumentFactory extends DefaultHandler implements
061: LexicalHandler, DocumentFactory {
062:
063: /**
064: * The DOM implementation used to create the document.
065: */
066: protected DOMImplementation implementation;
067:
068: /**
069: * The SAX2 parser classname.
070: */
071: protected String parserClassName;
072:
073: /**
074: * The SAX2 parser object.
075: */
076: protected XMLReader parser;
077:
078: /**
079: * The created document.
080: */
081: protected Document document;
082:
083: /**
084: * The created document descriptor.
085: */
086: protected DocumentDescriptor documentDescriptor;
087:
088: /**
089: * Whether a document descriptor must be generated.
090: */
091: protected boolean createDocumentDescriptor;
092:
093: /**
094: * The current node.
095: */
096: protected Node currentNode;
097:
098: /**
099: * The locator.
100: */
101: protected Locator locator;
102:
103: /**
104: * Contains collected string data. May be Text, CDATA or Comment.
105: */
106: protected StringBuffer stringBuffer = new StringBuffer();
107: /**
108: * Indicates if stringBuffer has content, needed in case of
109: * zero sized "text" content.
110: */
111: protected boolean stringContent;
112:
113: /**
114: * True if the parser is currently parsing a DTD.
115: */
116: protected boolean inDTD;
117:
118: /**
119: * True if the parser is currently parsing a CDATA section.
120: */
121: protected boolean inCDATA;
122:
123: /**
124: * Whether the parser still hasn't read the document element's
125: * opening tag.
126: */
127: protected boolean inProlog;
128:
129: /**
130: * Whether the parser is in validating mode.
131: */
132: protected boolean isValidating;
133:
134: /**
135: * Whether the document just parsed was standalone.
136: */
137: protected boolean isStandalone;
138:
139: /**
140: * XML version of the document just parsed.
141: */
142: protected String xmlVersion;
143:
144: /**
145: * The stack used to store the namespace URIs.
146: */
147: protected HashTableStack namespaces;
148:
149: /**
150: * The error handler.
151: */
152: protected ErrorHandler errorHandler;
153:
154: protected interface PreInfo {
155: Node createNode(Document doc);
156: }
157:
158: static class ProcessingInstructionInfo implements PreInfo {
159: public String target, data;
160:
161: public ProcessingInstructionInfo(String target, String data) {
162: this .target = target;
163: this .data = data;
164: }
165:
166: public Node createNode(Document doc) {
167: return doc.createProcessingInstruction(target, data);
168: }
169: }
170:
171: static class CommentInfo implements PreInfo {
172: public String comment;
173:
174: public CommentInfo(String comment) {
175: this .comment = comment;
176: }
177:
178: public Node createNode(Document doc) {
179: return doc.createComment(comment);
180: }
181: }
182:
183: static class CDataInfo implements PreInfo {
184: public String cdata;
185:
186: public CDataInfo(String cdata) {
187: this .cdata = cdata;
188: }
189:
190: public Node createNode(Document doc) {
191: return doc.createCDATASection(cdata);
192: }
193: }
194:
195: static class TextInfo implements PreInfo {
196: public String text;
197:
198: public TextInfo(String text) {
199: this .text = text;
200: }
201:
202: public Node createNode(Document doc) {
203: return doc.createTextNode(text);
204: }
205: }
206:
207: /**
208: * Various elements encountered prior to real document root element.
209: * List of PreInfo objects.
210: */
211: protected List preInfo;
212:
213: /**
214: * Creates a new SAXDocumentFactory object.
215: * No document descriptor will be created while generating a document.
216: * @param impl The DOM implementation to use for building the DOM tree.
217: * @param parser The SAX2 parser classname.
218: */
219: public SAXDocumentFactory(DOMImplementation impl, String parser) {
220: implementation = impl;
221: parserClassName = parser;
222: }
223:
224: /**
225: * Creates a new SAXDocumentFactory object.
226: * @param impl The DOM implementation to use for building the DOM tree.
227: * @param parser The SAX2 parser classname.
228: * @param dd Whether a document descriptor must be generated.
229: */
230: public SAXDocumentFactory(DOMImplementation impl, String parser,
231: boolean dd) {
232: implementation = impl;
233: parserClassName = parser;
234: createDocumentDescriptor = dd;
235: }
236:
237: /**
238: * Creates a Document instance.
239: * @param ns The namespace URI of the root element of the document.
240: * @param root The name of the root element of the document.
241: * @param uri The document URI.
242: * @exception IOException if an error occured while reading the document.
243: */
244: public Document createDocument(String ns, String root, String uri)
245: throws IOException {
246: return createDocument(ns, root, uri, new InputSource(uri));
247: }
248:
249: /**
250: * Creates a Document instance.
251: * @param uri The document URI.
252: * @exception IOException if an error occured while reading the document.
253: */
254: public Document createDocument(String uri) throws IOException {
255: return createDocument(new InputSource(uri));
256: }
257:
258: /**
259: * Creates a Document instance.
260: * @param ns The namespace URI of the root element of the document.
261: * @param root The name of the root element of the document.
262: * @param uri The document URI.
263: * @param is The document input stream.
264: * @exception IOException if an error occured while reading the document.
265: */
266: public Document createDocument(String ns, String root, String uri,
267: InputStream is) throws IOException {
268: InputSource inp = new InputSource(is);
269: inp.setSystemId(uri);
270: return createDocument(ns, root, uri, inp);
271: }
272:
273: /**
274: * Creates a Document instance.
275: * @param uri The document URI.
276: * @param is The document input stream.
277: * @exception IOException if an error occured while reading the document.
278: */
279: public Document createDocument(String uri, InputStream is)
280: throws IOException {
281: InputSource inp = new InputSource(is);
282: inp.setSystemId(uri);
283: return createDocument(inp);
284: }
285:
286: /**
287: * Creates a Document instance.
288: * @param ns The namespace URI of the root element of the document.
289: * @param root The name of the root element of the document.
290: * @param uri The document URI.
291: * @param r The document reader.
292: * @exception IOException if an error occured while reading the document.
293: */
294: public Document createDocument(String ns, String root, String uri,
295: Reader r) throws IOException {
296: InputSource inp = new InputSource(r);
297: inp.setSystemId(uri);
298: return createDocument(ns, root, uri, inp);
299: }
300:
301: /**
302: * Creates a Document instance.
303: * @param ns The namespace URI of the root element of the document.
304: * @param root The name of the root element of the document.
305: * @param uri The document URI.
306: * @param r an XMLReaderInstance
307: * @exception IOException if an error occured while reading the document.
308: */
309: public Document createDocument(String ns, String root, String uri,
310: XMLReader r) throws IOException {
311: r.setContentHandler(this );
312: r.setDTDHandler(this );
313: r.setEntityResolver(this );
314: try {
315: r.parse(uri);
316: } catch (SAXException e) {
317: Exception ex = e.getException();
318: if (ex != null && ex instanceof InterruptedIOException) {
319: throw (InterruptedIOException) ex;
320: }
321: throw new SAXIOException(e);
322: }
323: currentNode = null;
324: Document ret = document;
325: document = null;
326: return ret;
327: }
328:
329: /**
330: * Creates a Document instance.
331: * @param uri The document URI.
332: * @param r The document reader.
333: * @exception IOException if an error occured while reading the document.
334: */
335: public Document createDocument(String uri, Reader r)
336: throws IOException {
337: InputSource inp = new InputSource(r);
338: inp.setSystemId(uri);
339: return createDocument(inp);
340: }
341:
342: /**
343: * Creates a Document.
344: * @param ns The namespace URI of the root element.
345: * @param root The name of the root element.
346: * @param uri The document URI.
347: * @param is The document input source.
348: * @exception IOException if an error occured while reading the document.
349: */
350: protected Document createDocument(String ns, String root,
351: String uri, InputSource is) throws IOException {
352: Document ret = createDocument(is);
353: Element docElem = ret.getDocumentElement();
354:
355: String lname = root;
356: String nsURI = ns;
357: if (ns == null) {
358: int idx = lname.indexOf(':');
359: String nsp = (idx == -1 || idx == lname.length() - 1) ? ""
360: : lname.substring(0, idx);
361: nsURI = namespaces.get(nsp);
362: if (idx != -1 && idx != lname.length() - 1) {
363: lname = lname.substring(idx + 1);
364: }
365: }
366:
367: String docElemNS = docElem.getNamespaceURI();
368: if ((docElemNS != nsURI)
369: && ((docElemNS == null) || (!docElemNS.equals(nsURI))))
370: throw new IOException(
371: "Root element namespace does not match that requested:\n"
372: + "Requested: " + nsURI + "\n" + "Found: "
373: + docElemNS);
374:
375: if (docElemNS != null) {
376: if (!docElem.getLocalName().equals(lname))
377: throw new IOException(
378: "Root element does not match that requested:\n"
379: + "Requested: " + lname + "\n"
380: + "Found: " + docElem.getLocalName());
381: } else {
382: if (!docElem.getNodeName().equals(lname))
383: throw new IOException(
384: "Root element does not match that requested:\n"
385: + "Requested: " + lname + "\n"
386: + "Found: " + docElem.getNodeName());
387: }
388:
389: return ret;
390: }
391:
392: static SAXParserFactory saxFactory;
393: static {
394: saxFactory = SAXParserFactory.newInstance();
395: }
396:
397: /**
398: * Creates a Document.
399: * @param is The document input source.
400: * @exception IOException if an error occured while reading the document.
401: */
402: protected Document createDocument(InputSource is)
403: throws IOException {
404: try {
405: if (parserClassName != null) {
406: parser = XMLReaderFactory
407: .createXMLReader(parserClassName);
408: } else {
409: SAXParser saxParser;
410: try {
411: saxParser = saxFactory.newSAXParser();
412: } catch (ParserConfigurationException pce) {
413: throw new IOException(
414: "Could not create SAXParser: "
415: + pce.getMessage());
416: }
417: parser = saxParser.getXMLReader();
418: }
419:
420: parser.setContentHandler(this );
421: parser.setDTDHandler(this );
422: parser.setEntityResolver(this );
423: parser.setErrorHandler((errorHandler == null) ? this
424: : errorHandler);
425:
426: parser.setFeature("http://xml.org/sax/features/namespaces",
427: true);
428: parser.setFeature(
429: "http://xml.org/sax/features/namespace-prefixes",
430: true);
431: parser.setFeature("http://xml.org/sax/features/validation",
432: isValidating);
433: parser.setProperty(
434: "http://xml.org/sax/properties/lexical-handler",
435: this );
436: parser.parse(is);
437: } catch (SAXException e) {
438: Exception ex = e.getException();
439: if (ex != null && ex instanceof InterruptedIOException) {
440: throw (InterruptedIOException) ex;
441: }
442: throw new SAXIOException(e);
443: }
444:
445: currentNode = null;
446: Document ret = document;
447: document = null;
448: locator = null;
449: parser = null;
450: return ret;
451: }
452:
453: /**
454: * Returns the document descriptor associated with the latest created
455: * document.
456: * @return null if no document or descriptor was previously generated.
457: */
458: public DocumentDescriptor getDocumentDescriptor() {
459: return documentDescriptor;
460: }
461:
462: /**
463: * <b>SAX</b>: Implements {@link
464: * org.xml.sax.ContentHandler#setDocumentLocator(Locator)}.
465: */
466: public void setDocumentLocator(Locator l) {
467: locator = l;
468: }
469:
470: /**
471: * Sets whether or not the XML parser will validate the XML document
472: * depending on the specified parameter.
473: *
474: * @param isValidating indicates that the XML parser will validate the XML
475: * document
476: */
477: public void setValidating(boolean isValidating) {
478: this .isValidating = isValidating;
479: }
480:
481: /**
482: * Returns true if the XML parser validates the XML stream, false
483: * otherwise.
484: */
485: public boolean isValidating() {
486: return isValidating;
487: }
488:
489: /**
490: * Sets a custom error handler.
491: */
492: public void setErrorHandler(ErrorHandler eh) {
493: errorHandler = eh;
494: }
495:
496: public DOMImplementation getDOMImplementation(String ver) {
497: return implementation;
498: }
499:
500: /**
501: * <b>SAX</b>: Implements {@link
502: * org.xml.sax.ErrorHandler#fatalError(SAXParseException)}.
503: */
504: public void fatalError(SAXParseException ex) throws SAXException {
505: throw ex;
506: }
507:
508: /**
509: * <b>SAX</b>: Implements {@link
510: * org.xml.sax.ErrorHandler#error(SAXParseException)}.
511: */
512: public void error(SAXParseException ex) throws SAXException {
513: throw ex;
514: }
515:
516: /**
517: * <b>SAX</b>: Implements {@link
518: * org.xml.sax.ErrorHandler#warning(SAXParseException)}.
519: */
520: public void warning(SAXParseException ex) throws SAXException {
521: }
522:
523: /**
524: * <b>SAX</b>: Implements {@link
525: * org.xml.sax.ContentHandler#startDocument()}.
526: */
527: public void startDocument() throws SAXException {
528: preInfo = new LinkedList();
529: namespaces = new HashTableStack();
530: namespaces.put("xml", XMLSupport.XML_NAMESPACE_URI);
531: namespaces.put("xmlns", XMLSupport.XMLNS_NAMESPACE_URI);
532: namespaces.put("", null);
533:
534: inDTD = false;
535: inCDATA = false;
536: inProlog = true;
537: currentNode = null;
538: document = null;
539: isStandalone = false;
540: xmlVersion = XMLConstants.XML_VERSION_10;
541:
542: stringBuffer.setLength(0);
543: stringContent = false;
544:
545: if (createDocumentDescriptor) {
546: documentDescriptor = new DocumentDescriptor();
547: } else {
548: documentDescriptor = null;
549: }
550: }
551:
552: /**
553: * <b>SAX</b>: Implements {@link
554: * org.xml.sax.ContentHandler#startElement(String,String,String,Attributes)}.
555: */
556: public void startElement(String uri, String localName,
557: String rawName, Attributes attributes) throws SAXException {
558: // Check If we should halt early.
559: if (HaltingThread.hasBeenHalted()) {
560: throw new SAXException(new InterruptedIOException());
561: }
562:
563: if (inProlog) {
564: inProlog = false;
565: try {
566: isStandalone = parser
567: .getFeature("http://xml.org/sax/features/is-standalone");
568: } catch (SAXNotRecognizedException ex) {
569: }
570: try {
571: xmlVersion = (String) parser
572: .getProperty("http://xml.org/sax/properties/document-xml-version");
573: } catch (SAXNotRecognizedException ex) {
574: }
575: }
576:
577: // Namespaces resolution
578: int len = attributes.getLength();
579: namespaces.push();
580: String version = null;
581: for (int i = 0; i < len; i++) {
582: String aname = attributes.getQName(i);
583: int slen = aname.length();
584: if (slen < 5)
585: continue;
586: if (aname.equals("version")) {
587: version = attributes.getValue(i);
588: continue;
589: }
590: if (!aname.startsWith("xmlns"))
591: continue;
592: if (slen == 5) {
593: String ns = attributes.getValue(i);
594: if (ns.length() == 0)
595: ns = null;
596: namespaces.put("", ns);
597: } else if (aname.charAt(5) == ':') {
598: String ns = attributes.getValue(i);
599: if (ns.length() == 0) {
600: ns = null;
601: }
602: namespaces.put(aname.substring(6), ns);
603: }
604: }
605:
606: // Add any collected String Data before element.
607: appendStringData();
608:
609: // Element creation
610: Element e;
611: int idx = rawName.indexOf(':');
612: String nsp = (idx == -1 || idx == rawName.length() - 1) ? ""
613: : rawName.substring(0, idx);
614: String nsURI = namespaces.get(nsp);
615: if (currentNode == null) {
616: implementation = getDOMImplementation(version);
617: document = implementation.createDocument(nsURI, rawName,
618: null);
619: Iterator i = preInfo.iterator();
620: currentNode = e = document.getDocumentElement();
621: while (i.hasNext()) {
622: PreInfo pi = (PreInfo) i.next();
623: Node n = pi.createNode(document);
624: document.insertBefore(n, e);
625: }
626: preInfo = null;
627: } else {
628: e = document.createElementNS(nsURI, rawName);
629: currentNode.appendChild(e);
630: currentNode = e;
631: }
632:
633: // Storage of the line number.
634: if (createDocumentDescriptor && locator != null) {
635: documentDescriptor.setLocation(e, locator.getLineNumber(),
636: locator.getColumnNumber());
637: }
638:
639: // Attributes creation
640: for (int i = 0; i < len; i++) {
641: String aname = attributes.getQName(i);
642: if (aname.equals("xmlns")) {
643: e.setAttributeNS(XMLSupport.XMLNS_NAMESPACE_URI, aname,
644: attributes.getValue(i));
645: } else {
646: idx = aname.indexOf(':');
647: nsURI = (idx == -1) ? null : namespaces.get(aname
648: .substring(0, idx));
649: e.setAttributeNS(nsURI, aname, attributes.getValue(i));
650: }
651: }
652: }
653:
654: /**
655: * <b>SAX</b>: Implements {@link
656: * org.xml.sax.ContentHandler#endElement(String,String,String)}.
657: */
658: public void endElement(String uri, String localName, String rawName)
659: throws SAXException {
660: appendStringData(); // add string data if any.
661:
662: if (currentNode != null)
663: currentNode = currentNode.getParentNode();
664: namespaces.pop();
665: }
666:
667: public void appendStringData() {
668: if (!stringContent)
669: return;
670:
671: String str = stringBuffer.toString();
672: stringBuffer.setLength(0); // reuse buffer.
673: stringContent = false;
674: if (currentNode == null) {
675: if (inCDATA)
676: preInfo.add(new CDataInfo(str));
677: else
678: preInfo.add(new TextInfo(str));
679: } else {
680: Node n;
681: if (inCDATA)
682: n = document.createCDATASection(str);
683: else
684: n = document.createTextNode(str);
685: currentNode.appendChild(n);
686: }
687: }
688:
689: /**
690: * <b>SAX</b>: Implements {@link
691: * org.xml.sax.ContentHandler#characters(char[],int,int)}.
692: */
693: public void characters(char[] ch, int start, int length)
694: throws SAXException {
695: stringBuffer.append(ch, start, length);
696: stringContent = true;
697: }
698:
699: /**
700: * <b>SAX</b>: Implements {@link
701: * org.xml.sax.ContentHandler#ignorableWhitespace(char[],int,int)}.
702: */
703: public void ignorableWhitespace(char[] ch, int start, int length)
704: throws SAXException {
705: stringBuffer.append(ch, start, length);
706: stringContent = true;
707: }
708:
709: /**
710: * <b>SAX</b>: Implements {@link
711: * org.xml.sax.ContentHandler#processingInstruction(String,String)}.
712: */
713: public void processingInstruction(String target, String data)
714: throws SAXException {
715: if (inDTD)
716: return;
717:
718: appendStringData(); // Add any collected String Data before PI
719:
720: if (currentNode == null)
721: preInfo.add(new ProcessingInstructionInfo(target, data));
722: else
723: currentNode.appendChild(document
724: .createProcessingInstruction(target, data));
725: }
726:
727: // LexicalHandler /////////////////////////////////////////////////////////
728:
729: /**
730: * <b>SAX</b>: Implements {@link
731: * org.xml.sax.ext.LexicalHandler#startDTD(String,String,String)}.
732: */
733: public void startDTD(String name, String publicId, String systemId)
734: throws SAXException {
735: appendStringData(); // Add collected string data before entering DTD
736: inDTD = true;
737: }
738:
739: /**
740: * <b>SAX</b>: Implements {@link org.xml.sax.ext.LexicalHandler#endDTD()}.
741: */
742: public void endDTD() throws SAXException {
743: inDTD = false;
744: }
745:
746: /**
747: * <b>SAX</b>: Implements
748: * {@link org.xml.sax.ext.LexicalHandler#startEntity(String)}.
749: */
750: public void startEntity(String name) throws SAXException {
751: }
752:
753: /**
754: * <b>SAX</b>: Implements
755: * {@link org.xml.sax.ext.LexicalHandler#endEntity(String)}.
756: */
757: public void endEntity(String name) throws SAXException {
758: }
759:
760: /**
761: * <b>SAX</b>: Implements {@link
762: * org.xml.sax.ext.LexicalHandler#startCDATA()}.
763: */
764: public void startCDATA() throws SAXException {
765: appendStringData(); // Add any collected String Data before CData
766: inCDATA = true;
767: stringContent = true; // always create CDATA even if empty.
768: }
769:
770: /**
771: * <b>SAX</b>: Implements {@link
772: * org.xml.sax.ext.LexicalHandler#endCDATA()}.
773: */
774: public void endCDATA() throws SAXException {
775: appendStringData(); // Add the CDATA section
776: inCDATA = false;
777: }
778:
779: /**
780: * <b>SAX</b>: Implements
781: * {@link org.xml.sax.ext.LexicalHandler#comment(char[],int,int)}.
782: */
783: public void comment(char[] ch, int start, int length)
784: throws SAXException {
785: if (inDTD)
786: return;
787: appendStringData();
788:
789: String str = new String(ch, start, length);
790: if (currentNode == null) {
791: preInfo.add(new CommentInfo(str));
792: } else {
793: currentNode.appendChild(document.createComment(str));
794: }
795: }
796: }
|