001: /*
002: * Copyright (c) 2000-2001 Sosnoski Software Solutions, Inc.
003: *
004: * Permission is hereby granted, free of charge, to any person obtaining a copy
005: * of this software and associated documentation files (the "Software"), to deal
006: * in the Software without restriction, including without limitation the rights
007: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
008: * copies of the Software, and to permit persons to whom the Software is
009: * furnished to do so, subject to the following conditions:
010: *
011: * The above copyright notice and this permission notice shall be included in
012: * all copies or substantial portions of the Software.
013: *
014: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
017: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019: * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
020: * IN THE SOFTWARE.
021: */
022:
023: package com.sosnoski.xmlbench;
024:
025: import java.io.*;
026: import java.util.*;
027:
028: import electric.xml.*;
029:
030: /**
031: * Benchmark for measuring performance of the Electric XML document
032: * representation.
033: *
034: * @author Dennis M. Sosnoski
035: * @version 1.2
036: */
037:
038: public class BenchElectric extends BenchDocBase {
039: /**
040: * Constructor.
041: */
042:
043: public BenchElectric() {
044: super ("EXML");
045: }
046:
047: /**
048: * Build document representation by parsing XML. This implementation uses
049: * the method defined by Electric DOM to build the document from an input
050: * stream. Note that Electric DOM supports other methods for constructing
051: * the document, but an input stream is considered the most representative
052: * of real applications.
053: *
054: * @param in XML document input stream
055: * @return document representation
056: */
057:
058: protected Object build(InputStream in) {
059: Object doc = null;
060: try {
061: doc = new Document(in);
062: } catch (Exception ex) {
063: ex.printStackTrace(System.err);
064: System.exit(0);
065: }
066: return doc;
067: }
068:
069: /**
070: * Walk subtree for element. This recursively walks through the document
071: * nodes under an element, accumulating summary information.
072: *
073: * @param element element to be walked
074: * @param summary document summary information
075: */
076:
077: protected void walkElement(Element element, DocumentSummary summary) {
078:
079: // include attribute values in summary
080: if (element.hasAttributes()) {
081: Attributes attrs = element.getAttributes();
082: Attribute attr;
083: while ((attr = attrs.next()) != null) {
084: summary.addAttribute(attr.getValue().length());
085: }
086: }
087:
088: // loop through children
089: if (element.hasChildren()) {
090: Child child = element.getChildren().first();
091: while (child != null) {
092:
093: // handle child by type
094: if (child instanceof Text) {
095: summary.addContent(((Text) child).getString()
096: .length());
097: } else if (child instanceof Element) {
098: summary.addElements(1);
099: walkElement((Element) child, summary);
100: }
101: child = child.getNextSibling();
102:
103: }
104: }
105: }
106:
107: /**
108: * Walk and summarize document. This method walks through the nodes
109: * of the document, accumulating summary information.
110: *
111: * @param doc document representation to be walked
112: * @param summary output document summary information
113: */
114:
115: protected void walk(Object doc, DocumentSummary summary) {
116: summary.addElements(1);
117: walkElement(((Document) doc).getRoot(), summary);
118: }
119:
120: /**
121: * Output a document as XML text. This implementation uses the method
122: * defined by Electric DOM to output a text representation of the document.
123: *
124: * @param doc document representation to be output
125: * @param out XML document output stream
126: */
127:
128: protected void output(Object doc, OutputStream out) {
129: Document edoc = (Document) doc;
130: try {
131: edoc.write(out);
132: } catch (Exception ex) {
133: ex.printStackTrace(System.err);
134: System.exit(0);
135: }
136: }
137:
138: /**
139: * Modify subtree for element. This recursively walks through the document
140: * nodes under an element, performing the modifications.
141: *
142: * @param element element to be walked
143: */
144:
145: protected void modifyElement(Element element) {
146:
147: // check for children present
148: if (element.hasChildren()) {
149:
150: // loop through child nodes
151: Child child;
152: Child next = element.getChildren().first();
153: String prefix = null;
154: boolean content = false;
155: while ((child = next) != null) {
156:
157: // set next before we change anything
158: next = child.getNextSibling();
159:
160: // handle child by node type
161: if (child instanceof Text) {
162:
163: // trim whitespace from content text
164: String trimmed = ((Text) child).getString().trim();
165: if (trimmed.length() == 0) {
166:
167: // delete child if only whitespace (adjusting index)
168: child.remove();
169:
170: } else {
171:
172: // construct qualified name for wrapper element
173: if (!content) {
174: prefix = element.getPrefix();
175: content = true;
176: }
177:
178: // wrap the trimmed content with new element
179: Element text = new Element();
180: text.addText(trimmed);
181: child.replaceWith(text);
182: text.setName(prefix, "text");
183:
184: }
185: } else if (child instanceof Element) {
186:
187: // handle child elements with recursive call
188: modifyElement((Element) child);
189:
190: }
191: }
192:
193: // check if we've seen any non-whitespace content for element
194: if (content) {
195:
196: // add attribute flagging content found
197: element.setAttribute(prefix, "text", "true");
198:
199: }
200: }
201: }
202:
203: /**
204: * Modify a document representation. This implementation of the abstract
205: * superclass method walks the document representation performing the
206: * following modifications: remove all content segments which consist only
207: * of whitespace; add an attribute "text" set to "true" to any elements
208: * which directly contain non-whitespace text content; and replace each
209: * non-whitespace text content segment with a "text" element which wraps
210: * the trimmed content.
211: *
212: * @param doc document representation to be modified
213: */
214:
215: protected void modify(Object doc) {
216: modifyElement(((Document) doc).getRoot());
217: }
218: }
|