001: /*
002: Copyright (c) 2003-2007, Dennis M. Sosnoski
003: All rights reserved.
004:
005: Redistribution and use in source and binary forms, with or without modification,
006: are permitted provided that the following conditions are met:
007:
008: * Redistributions of source code must retain the above copyright notice, this
009: list of conditions and the following disclaimer.
010: * Redistributions in binary form must reproduce the above copyright notice,
011: this list of conditions and the following disclaimer in the documentation
012: and/or other materials provided with the distribution.
013: * Neither the name of JiBX nor the names of its contributors may be used
014: to endorse or promote products derived from this software without specific
015: prior written permission.
016:
017: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
018: ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
019: WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
021: ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: */
028:
029: package org.jibx.extras;
030:
031: import java.io.IOException;
032: import java.io.PrintStream;
033: import java.io.Reader;
034:
035: import org.xmlpull.v1.XmlPullParser;
036: import org.xmlpull.v1.XmlPullParserException;
037: import org.xmlpull.v1.XmlPullParserFactory;
038:
039: /**
040: * XML document comparator. This uses XMLPull parsers to read a pair of
041: * documents in parallel, comparing the streams of components seen from the two
042: * documents. The comparison ignores differences in whitespace separating
043: * elements, but treats whitespace as significant within elements with only
044: * character data content.
045: *
046: * @author Dennis M. Sosnoski
047: */
048: public class DocumentComparator {
049: /** Parser for first document. */
050: protected XmlPullParser m_parserA;
051:
052: /** Parser for second document. */
053: protected XmlPullParser m_parserB;
054:
055: /** Print stream for reporting differences. */
056: protected PrintStream m_differencePrint;
057:
058: /**
059: * Constructor. Builds the actual parser.
060: *
061: * @param print print stream for reporting differences
062: * @throws XmlPullParserException on error creating parsers
063: */
064: public DocumentComparator(PrintStream print)
065: throws XmlPullParserException {
066: XmlPullParserFactory factory = XmlPullParserFactory
067: .newInstance();
068: factory.setNamespaceAware(true);
069: m_parserA = factory.newPullParser();
070: m_parserB = factory.newPullParser();
071: m_differencePrint = print;
072: }
073:
074: /**
075: * Build parse input position description.
076: *
077: * @param parser for which to build description
078: * @return text description of current parse position
079: */
080: protected String buildPositionString(XmlPullParser parser) {
081: return " line " + parser.getLineNumber() + ", col "
082: + parser.getColumnNumber();
083: }
084:
085: /**
086: * Build name string.
087: *
088: * @param ns namespace URI
089: * @param name local name
090: * @return printable names string
091: */
092: protected String buildName(String ns, String name) {
093: if ("".equals(ns)) {
094: return name;
095: } else {
096: return "{" + ns + '}' + name;
097: }
098: }
099:
100: /**
101: * Prints error description text. The generated text include position
102: * information from both documents.
103: *
104: * @param msg error message text
105: */
106: protected void printError(String msg) {
107: if (m_differencePrint != null) {
108: m_differencePrint.println(msg + " - from "
109: + buildPositionString(m_parserA) + " to "
110: + buildPositionString(m_parserB));
111: }
112: }
113:
114: /**
115: * Verifies that the attributes on the current start tags match. Any
116: * mismatches are printed immediately.
117: *
118: * @return <code>true</code> if the attributes match, <code>false</code> if
119: * not
120: */
121: protected boolean matchAttributes() {
122: int counta = m_parserA.getAttributeCount();
123: int countb = m_parserB.getAttributeCount();
124: boolean[] flags = new boolean[countb];
125: boolean match = true;
126: for (int i = 0; i < counta; i++) {
127: String name = m_parserA.getAttributeName(i);
128: String ns = m_parserA.getAttributeNamespace(i);
129: String value = m_parserA.getAttributeValue(i);
130: boolean found = false;
131: for (int j = 0; j < countb; j++) {
132: if (name.equals(m_parserB.getAttributeName(j))
133: && ns
134: .equals(m_parserB
135: .getAttributeNamespace(j))) {
136: flags[j] = true;
137: if (!value.equals(m_parserB.getAttributeValue(j))) {
138: if (match) {
139: printError("Attribute mismatch");
140: match = false;
141: }
142: m_differencePrint
143: .println(" attribute "
144: + buildName(ns, name)
145: + " value '"
146: + value
147: + "' != '"
148: + m_parserB
149: .getAttributeValue(j)
150: + '\'');
151: }
152: found = true;
153: break;
154: }
155: }
156: if (!found) {
157: if (match) {
158: printError("Attribute mismatch");
159: match = false;
160: }
161: m_differencePrint.println(" attribute "
162: + buildName(ns, name)
163: + " is missing from second document");
164: }
165: }
166: for (int i = 0; i < countb; i++) {
167: if (!flags[i]) {
168: if (match) {
169: printError("Attribute mismatch");
170: match = false;
171: }
172: m_differencePrint.println(" attribute "
173: + buildName(m_parserB.getAttributeNamespace(i),
174: m_parserB.getAttributeName(i))
175: + " is missing from first document");
176: }
177: }
178: return match;
179: }
180:
181: /**
182: * Check if two text strings match, ignoring leading and trailing spaces.
183: * Any mismatch is printed immediately, with the supplied lead text.
184: *
185: * @param texta
186: * @param textb
187: * @param lead error text lead
188: * @return <code>true</code> if the texts match, <code>false</code> if
189: * not
190: */
191: protected boolean matchText(String texta, String textb, String lead) {
192: if (texta.trim().equals(textb.trim())) {
193: return true;
194: } else {
195: printError(lead);
196: if (m_differencePrint != null) {
197: m_differencePrint.println(" \"" + texta + "\" (length "
198: + texta.length() + " vs. \"" + textb
199: + "\" (length " + textb.length() + ')');
200:
201: }
202: return false;
203: }
204: }
205:
206: /**
207: * Verifies that the current start or end tag names match.
208: *
209: * @return <code>true</code> if the names match, <code>false</code> if not
210: */
211: protected boolean matchNames() {
212: return m_parserA.getName().equals(m_parserB.getName())
213: && m_parserA.getNamespace().equals(
214: m_parserB.getNamespace());
215: }
216:
217: /**
218: * Compares a pair of documents by reading them in parallel from a pair of
219: * parsers. The comparison ignores differences in whitespace separating
220: * elements, but treats whitespace as significant within elements with only
221: * character data content.
222: *
223: * @param rdra reader for first document to be compared
224: * @param rdrb reader for second document to be compared
225: * @return <code>true</code> if the documents are the same,
226: * <code>false</code> if they're different
227: */
228: public boolean compare(Reader rdra, Reader rdrb) {
229: try {
230:
231: // set the documents and initialize
232: m_parserA.setInput(rdra);
233: m_parserB.setInput(rdrb);
234: boolean content = false;
235: String texta = "";
236: String textb = "";
237: boolean same = true;
238: while (true) {
239:
240: // start by collecting and moving past text content
241: if (m_parserA.getEventType() == XmlPullParser.TEXT) {
242: texta = m_parserA.getText();
243: m_parserA.next();
244: }
245: if (m_parserB.getEventType() == XmlPullParser.TEXT) {
246: textb = m_parserB.getText();
247: m_parserB.next();
248: }
249:
250: // now check actual tag state
251: int typea = m_parserA.getEventType();
252: int typeb = m_parserB.getEventType();
253: if (typea != typeb) {
254: printError("Different document structure");
255: return false;
256: } else if (typea == XmlPullParser.START_TAG) {
257:
258: // compare start tags, attributes, and prior text
259: content = true;
260: if (!matchNames()) {
261: printError("Different start tags");
262: return false;
263: } else {
264: if (!matchAttributes()) {
265: same = false;
266: }
267: if (!matchText(texta, textb,
268: "Different text content between elements")) {
269: same = false;
270: }
271: }
272: texta = textb = "";
273:
274: } else if (typea == XmlPullParser.END_TAG) {
275:
276: // compare end tags and prior text
277: if (!matchNames()) {
278: printError("Different end tags");
279: return false;
280: }
281: if (content) {
282: if (!matchText(texta, textb,
283: "Different text content")) {
284: same = false;
285: }
286: content = false;
287: } else {
288: if (!matchText(texta, textb,
289: "Different text content between elements")) {
290: same = false;
291: }
292: }
293: texta = textb = "";
294:
295: } else if (typea == XmlPullParser.END_DOCUMENT) {
296: return same;
297: }
298:
299: // advance both parsers to next component
300: m_parserA.next();
301: m_parserB.next();
302:
303: }
304: } catch (IOException ex) {
305: if (m_differencePrint != null) {
306: ex.printStackTrace(m_differencePrint);
307: }
308: return false;
309: } catch (XmlPullParserException ex) {
310: if (m_differencePrint != null) {
311: ex.printStackTrace(m_differencePrint);
312: }
313: return false;
314: }
315: }
316: }
|