001: /*
002: * Copyright (c) 2000-2001 Sosnoski Software Solutions, Inc.
003: *
004: * Permission is hereby granted, free of charge, to any person obtaining a copy
005: * of this software and associated documentation files (the "Software"), to deal
006: * in the Software without restriction, including without limitation the rights
007: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
008: * copies of the Software, and to permit persons to whom the Software is
009: * furnished to do so, subject to the following conditions:
010: *
011: * The above copyright notice and this permission notice shall be included in
012: * all copies or substantial portions of the Software.
013: *
014: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
017: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019: * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
020: * IN THE SOFTWARE.
021: */
022:
023: package com.sosnoski.xmlbench;
024:
025: import java.io.*;
026: import java.util.*;
027:
028: import org.dom4j.*;
029: import org.dom4j.io.*;
030:
031: /**
032: * Benchmark for measuring performance of the dom4j document representation.
033: *
034: * @author Dennis M. Sosnoski
035: * @version 1.2
036: */
037:
038: public class BenchDOM4J extends BenchDocBase {
039: /** SAX reader used within a test run. */
040: private SAXReader m_reader;
041:
042: /** Document factory used within a test run (copied from reader). */
043: private DocumentFactory m_factory;
044:
045: /** XML output serializer used within a test run. */
046: private XMLWriter m_writer;
047:
048: /**
049: * Constructor.
050: */
051:
052: public BenchDOM4J() {
053: super ("dom4j");
054: }
055:
056: /**
057: * Build document representation by parsing XML. This implementation
058: * creates a SAX reader if one does not already exist, then reuses
059: * that reader for the duration of a test run..
060: *
061: * @param in XML document input stream
062: * @return document representation
063: */
064:
065: protected Object build(InputStream in) {
066: if (m_reader == null) {
067: m_reader = new SAXReader(false);
068: m_factory = m_reader.getDocumentFactory();
069: }
070: Object doc = null;
071: try {
072: doc = m_reader.read(in);
073: } catch (Exception ex) {
074: ex.printStackTrace(System.out);
075: System.exit(0);
076: }
077: return doc;
078: }
079:
080: /**
081: * Walk subtree for element. This recursively walks through the document
082: * nodes under an element, accumulating summary information.
083: *
084: * @param element element to be walked
085: * @param summary document summary information
086: */
087:
088: protected void walkElement(Element element, DocumentSummary summary) {
089:
090: // include attribute values in summary
091: int acnt = element.attributeCount();
092: for (int i = 0; i < acnt; i++) {
093: summary.addAttribute(element.attribute(i).getValue()
094: .length());
095: }
096:
097: // loop through children
098: int ncnt = element.nodeCount();
099: for (int i = 0; i < ncnt; i++) {
100:
101: // handle child by type
102: Node child = element.node(i);
103: int type = child.getNodeType();
104: if (type == Node.TEXT_NODE) {
105: summary.addContent(child.getText().length());
106: } else if (type == Node.ELEMENT_NODE) {
107: summary.addElements(1);
108: walkElement((Element) child, summary);
109: }
110:
111: }
112: }
113:
114: /**
115: * Walk and summarize document. This method walks through the nodes
116: * of the document, accumulating summary information.
117: *
118: * @param doc document representation to be walked
119: * @param summary output document summary information
120: */
121:
122: protected void walk(Object doc, DocumentSummary summary) {
123: summary.addElements(1);
124: walkElement(((Document) doc).getRootElement(), summary);
125: }
126:
127: /**
128: * Output a document as XML text. This method uses the method defined
129: * by dom4j to output a text representation of the document.
130: *
131: * @param doc document representation to be output
132: * @param out XML document output stream
133: */
134:
135: protected void output(Object doc, OutputStream out) {
136: try {
137: if (m_writer == null) {
138: m_writer = new XMLWriter();
139: }
140: m_writer.setOutputStream(out);
141: m_writer.write((Document) doc);
142: m_writer.flush();
143: } catch (Exception ex) {
144: ex.printStackTrace(System.err);
145: System.exit(0);
146: }
147: }
148:
149: /**
150: * Modify subtree for element. This recursively walks through the document
151: * nodes under an element, performing the modifications.
152: *
153: * @param element element to be walked
154: */
155:
156: protected void modifyElement(Element element) {
157:
158: // check for children present
159: if (element.nodeCount() > 0) {
160:
161: // loop through child nodes
162: List children = element.content();
163: int ccnt = children.size();
164: QName qname = null;
165: boolean content = false;
166: for (int i = 0; i < ccnt; i++) {
167:
168: // handle child by node type
169: Node child = (Node) children.get(i);
170: if (child.getNodeType() == Node.TEXT_NODE) {
171:
172: // trim whitespace from content text
173: String trimmed = child.getText().trim();
174: if (trimmed.length() == 0) {
175:
176: // delete child if only whitespace (adjusting index)
177: children.remove(i--);
178: --ccnt;
179:
180: } else {
181:
182: // construct qualified name for wrapper element
183: if (!content) {
184: qname = m_factory.createQName("text",
185: element.getNamespace());
186: content = true;
187: }
188:
189: // wrap the trimmed content with new element
190: Element text = m_factory.createElement(qname);
191: text.addText(trimmed);
192: children.set(i, text);
193:
194: }
195: } else if (child.getNodeType() == Node.ELEMENT_NODE) {
196:
197: // handle child elements with recursive call
198: modifyElement((Element) child);
199:
200: }
201: }
202:
203: // check if we've seen any non-whitespace content for element
204: if (content) {
205:
206: // add attribute flagging content found
207: element.addAttribute(qname, "true");
208:
209: }
210: }
211: }
212:
213: /**
214: * Modify a document representation. This implementation of the abstract
215: * superclass method walks the document representation performing the
216: * following modifications: remove all content segments which consist only
217: * of whitespace; add an attribute "text" set to "true" to any elements
218: * which directly contain non-whitespace text content; and replace each
219: * non-whitespace text content segment with a "text" element which wraps
220: * the trimmed content.
221: *
222: * @param doc document representation to be modified
223: */
224:
225: protected void modify(Object doc) {
226: modifyElement(((Document) doc).getRootElement());
227: }
228:
229: /**
230: * Reset test class instance. This discards the SAX reader used
231: * within a test pass.
232: */
233:
234: protected void reset() {
235: m_reader = null;
236: m_factory = null;
237: m_writer = null;
238: }
239: }
|