001: /*
002: ******************************************************************
003: Copyright (c) 2001-2007, Jeff Martin, Tim Bacon
004: All rights reserved.
005:
006: Redistribution and use in source and binary forms, with or without
007: modification, are permitted provided that the following conditions
008: are met:
009:
010: * Redistributions of source code must retain the above copyright
011: notice, this list of conditions and the following disclaimer.
012: * Redistributions in binary form must reproduce the above
013: copyright notice, this list of conditions and the following
014: disclaimer in the documentation and/or other materials provided
015: with the distribution.
016: * Neither the name of the xmlunit.sourceforge.net nor the names
017: of its contributors may be used to endorse or promote products
018: derived from this software without specific prior written
019: permission.
020:
021: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
022: "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
023: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
024: FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
025: COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
026: INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
027: BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
028: LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
029: CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
030: LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
031: ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
032: POSSIBILITY OF SUCH DAMAGE.
033:
034: ******************************************************************
035: */
036:
037: package org.custommonkey.xmlunit;
038:
039: import javax.xml.parsers.DocumentBuilder;
040: import javax.xml.parsers.ParserConfigurationException;
041:
042: import org.w3c.dom.Comment;
043: import org.w3c.dom.Document;
044: import org.w3c.dom.Element;
045: import org.w3c.dom.Node;
046: import org.w3c.dom.ProcessingInstruction;
047: import org.w3c.dom.Text;
048:
049: import org.xml.sax.Attributes;
050: import org.xml.sax.Locator;
051: import org.xml.sax.SAXException;
052: import org.xml.sax.ext.LexicalHandler;
053: import org.xml.sax.helpers.DefaultHandler;
054:
055: /**
056: * Uses Sax events from the <code>ContentHandler</code> and
057: * <code>LexicalHandler</code> interfaces to build a DOM document in a tolerant
058: * fashion -- it can cope with start tags without end tags, and end tags without
059: * start tags for example.
060: * Although this subverts the idea of XML being well-formed, it is intended
061: * for use with HTML pages so that they can be transformed into DOM
062: * trees, without being XHTML to start with.
063: * Note that this class currently does not handle entity, DTD or CDATA tags.
064: * <br />Examples and more at <a href="http://xmlunit.sourceforge.net"/>xmlunit.sourceforge.net</a>
065: * @see HTMLDocumentBuilder#parse
066: */
067: public class TolerantSaxDocumentBuilder extends DefaultHandler
068: implements LexicalHandler {
069: private final DocumentBuilder documentBuilder;
070: private final StringBuffer traceBuffer;
071: private Document currentDocument;
072: private Element currentElement;
073:
074: /**
075: * Constructor for specific JAXP parser
076: * @param documentBuilder the JAXP parser to use to construct an empty
077: * DOM document that will be built up with SAX calls
078: * @throws ParserConfigurationException
079: */
080: public TolerantSaxDocumentBuilder(DocumentBuilder documentBuilder)
081: throws ParserConfigurationException {
082: this .documentBuilder = documentBuilder;
083: this .traceBuffer = new StringBuffer();
084: }
085:
086: /**
087: * @return the Document built up through the Sax calls
088: */
089: public Document getDocument() {
090: return currentDocument;
091: }
092:
093: /**
094: * @return the trace of Sax calls that were used to build up the Document
095: */
096: public String getTrace() {
097: return traceBuffer.toString();
098: }
099:
100: /**
101: * ContentHandler method
102: * @throws SAXException
103: */
104: public void startDocument() throws SAXException {
105: traceBuffer.delete(0, traceBuffer.length());
106: trace("startDocument");
107: currentDocument = documentBuilder.newDocument();
108: currentElement = null;
109: }
110:
111: /**
112: * ContentHandler method
113: * @throws SAXException
114: */
115: public void endDocument() throws SAXException {
116: trace("endDocument");
117: }
118:
119: /**
120: * ContentHandler method.
121: */
122: public void characters(char[] data, int start, int length) {
123: if (length >= 0) {
124: String characterData = new String(data, start, length);
125: trace("characters:" + characterData);
126: if (currentElement == null) {
127: warn("Can't append text node to null currentElement");
128: } else {
129: Text textNode = currentDocument
130: .createTextNode(characterData);
131: currentElement.appendChild(textNode);
132: }
133: } else {
134: warn("characters called with negative length");
135: }
136: }
137:
138: /**
139: * ContentHandler method
140: * @throws SAXException
141: */
142: public void startElement(String namespaceURI, String localName,
143: String qName, Attributes atts) throws SAXException {
144: trace("startElement:" + localName + "~" + qName);
145: Element newElement = createElement(namespaceURI, qName, atts);
146: appendNode(newElement);
147: currentElement = newElement;
148: }
149:
150: /**
151: * ContentHandler method
152: * @throws SAXException
153: */
154: public void endElement(String namespaceURI, String localName,
155: String qName) throws SAXException {
156: trace("endElement:" + localName + "~" + qName);
157: if (currentElement == null) {
158: warn(qName + ": endElement before any startElement");
159: return;
160: }
161:
162: Node parentNode = null;
163: boolean atDocumentRoot = false, foundTagToEnd = false;
164: Element startElement = currentElement;
165:
166: while (!(foundTagToEnd || atDocumentRoot)) {
167: parentNode = currentElement.getParentNode();
168:
169: if (parentNode.getNodeType() == Node.ELEMENT_NODE) {
170: foundTagToEnd = isElementMatching(currentElement, qName);
171: currentElement = (Element) parentNode;
172: } else if (parentNode.getNodeType() == Node.DOCUMENT_NODE) {
173: atDocumentRoot = true;
174: if (startElement == currentDocument
175: .getDocumentElement()) {
176: foundTagToEnd = isElementMatching(startElement,
177: qName);
178: } else {
179: currentElement = startElement;
180: }
181: } else {
182: throw new IllegalArgumentException(
183: "Closing element "
184: + qName
185: + ": expecting a parent ELEMENT_NODE but found "
186: + parentNode);
187: }
188: }
189: if (!foundTagToEnd) {
190: warn(qName + ": endElement does not match startElement!");
191: }
192: }
193:
194: private boolean isElementMatching(Element anElement, String qname) {
195: return anElement.getNodeName() != null
196: && anElement.getNodeName().equals(qname);
197: }
198:
199: /**
200: * Unhandled ContentHandler method
201: * @throws SAXException
202: */
203: public void endPrefixMapping(String prefix) throws SAXException {
204: unhandled("endPrefixMapping");
205: }
206:
207: /**
208: * Unhandled ContentHandler method
209: * @throws SAXException
210: */
211: public void ignorableWhitespace(char ch[], int start, int length)
212: throws SAXException {
213: unhandled("ignorableWhitespace");
214: }
215:
216: /**
217: * ContentHandler method
218: * @throws SAXException
219: */
220: public void processingInstruction(String target, String data)
221: throws SAXException {
222: trace("processingInstruction");
223: ProcessingInstruction instruction = currentDocument
224: .createProcessingInstruction(target, data);
225: appendNode(instruction);
226: }
227:
228: /**
229: * Unhandled ContentHandler method
230: */
231: public void setDocumentLocator(Locator locator) {
232: unhandled("setDocumentLocator");
233: }
234:
235: /**
236: * Unhandled ContentHandler method
237: * @throws SAXException
238: */
239: public void skippedEntity(String name) throws SAXException {
240: unhandled("skippedEntity");
241: }
242:
243: /**
244: * Unhandled ContentHandler method
245: * @throws SAXException
246: */
247: public void startPrefixMapping(String prefix, String uri)
248: throws SAXException {
249: unhandled("startPrefixMapping");
250: }
251:
252: /**
253: * Unhandled LexicalHandler method.
254: * DOM currently doesn't allow DTD to be retrofitted onto a Document.
255: * @throws SAXException
256: */
257: public void startDTD(String name, String publicId, String systemId)
258: throws SAXException {
259: unhandled("startDTD");
260: }
261:
262: /**
263: * Unhandled LexicalHandler method
264: * @throws SAXException
265: */
266: public void endDTD() throws SAXException {
267: unhandled("endDTD");
268: }
269:
270: /**
271: * Unhandled LexicalHandler method
272: * @throws SAXException
273: */
274: public void startEntity(String name) throws SAXException {
275: unhandled("startEntity");
276: }
277:
278: /**
279: * Unhandled LexicalHandler method
280: * @throws SAXException
281: */
282: public void endEntity(String name) throws SAXException {
283: unhandled("endEntity");
284: }
285:
286: /**
287: * Unhandled LexicalHandler method
288: * @throws SAXException
289: */
290: public void startCDATA() throws SAXException {
291: unhandled("startCDATA");
292: }
293:
294: /**
295: * Unhandled LexicalHandler method
296: * @throws SAXException
297: */
298: public void endCDATA() throws SAXException {
299: unhandled("endCDATA");
300: }
301:
302: /**
303: * LexicalHandler method
304: * @throws SAXException
305: */
306: public void comment(char ch[], int start, int length)
307: throws SAXException {
308: String commentText = new String(ch, start, length);
309: trace("comment:" + commentText);
310: Comment comment = currentDocument.createComment(commentText);
311: appendNode(comment);
312: }
313:
314: /**
315: * Log an unhandled ContentHandler or LexicalHandler method
316: * @param method
317: */
318: private void unhandled(String method) {
319: trace("Unhandled callback: " + method);
320: }
321:
322: /**
323: * Log a warning about badly formed markup
324: * @param msg
325: */
326: private void warn(String msg) {
327: trace("WARNING: " + msg);
328: }
329:
330: /**
331: * Log a handled ContentHandler or LexicalHandler method
332: * for tracing / debug purposes
333: * @param method
334: */
335: private void trace(String method) {
336: traceBuffer.append(method).append('\n');
337: }
338:
339: /**
340: * Create a DOM Element for insertion into the current document
341: * @param namespaceURI
342: * @param qName
343: * @param attributes
344: * @return the created Element
345: */
346: private Element createElement(String namespaceURI, String qName,
347: Attributes attributes) {
348: Element newElement = currentDocument.createElement(qName);
349:
350: if (namespaceURI != null && namespaceURI.length() > 0) {
351: newElement.setPrefix(namespaceURI);
352: }
353:
354: for (int i = 0; attributes != null
355: && i < attributes.getLength(); ++i) {
356: newElement.setAttribute(attributes.getQName(i), attributes
357: .getValue(i));
358: }
359:
360: return newElement;
361: }
362:
363: /**
364: * Append a node to the current document or the current element in the document
365: * @param appendNode
366: */
367: private void appendNode(Node appendNode) {
368: if (currentElement == null) {
369: currentDocument.appendChild(appendNode);
370: } else {
371: currentElement.appendChild(appendNode);
372: }
373: }
374: }
|