001: /*
002: * Copyright (c) 2000-2001 Sosnoski Software Solutions, Inc.
003: *
004: * Permission is hereby granted, free of charge, to any person obtaining a copy
005: * of this software and associated documentation files (the "Software"), to deal
006: * in the Software without restriction, including without limitation the rights
007: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
008: * copies of the Software, and to permit persons to whom the Software is
009: * furnished to do so, subject to the following conditions:
010: *
011: * The above copyright notice and this permission notice shall be included in
012: * all copies or substantial portions of the Software.
013: *
014: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
017: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019: * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
020: * IN THE SOFTWARE.
021: */
022:
023: package com.sosnoski.xmlbench;
024:
025: import java.io.*;
026:
027: import java.util.*;
028:
029: /**
030: * Abstract base class for document representation benchmark tests. This class
031: * defines the basic tests along with some implementation methods which must
032: * be defined by the subclass for each particular document representation to
033: * be tested.
034: *
035: * @author Dennis M. Sosnoski
036: * @version 1.2
037: */
038:
039: public abstract class BenchDocBase extends BenchBase {
040: /**
041: * Constructor.
042: *
043: * @param config test configuration name
044: */
045:
046: protected BenchDocBase(String config) {
047: super (config);
048: }
049:
050: /**
051: * Build document representation by parsing XML. This method must be
052: * implemented by each subclass to use the appropriate construction
053: * technique.
054: *
055: * @param in XML document input stream
056: * @return document representation
057: */
058:
059: protected abstract Object build(InputStream in);
060:
061: /**
062: * Walk and summarize document. This method should walk through the nodes
063: * of the document, accumulating summary information. It must be
064: * implemented by each subclass.
065: *
066: * @param doc document representation to be walked
067: * @param summary output document summary information
068: */
069:
070: protected abstract void walk(Object doc, DocumentSummary summary);
071:
072: /**
073: * Output a document as XML text. This method must be implemented by each
074: * subclass to use the appropriate output technique.
075: *
076: * @param doc document representation to be output
077: * @param out XML document output stream
078: */
079:
080: protected abstract void output(Object document, OutputStream out);
081:
082: /**
083: * Modify a document representation. This method must be implemented by each
084: * subclass to walk the document representation performing the following
085: * modifications: remove all content segments which consist only of
086: * whitespace; add an attribute "text" set to "true" to any elements which
087: * directly contain non-whitespace text content; and replace each
088: * non-whitespace text content segment with a "text" element which wraps
089: * the content.
090: *
091: * @param doc document representation to be modified
092: */
093:
094: protected abstract void modify(Object document);
095:
096: /**
097: * Reset test class instance. This method should be overridden by any
098: * subclasses which retain state information during the execution of a
099: * test. Any such information should be cleared when this method is called.
100: */
101:
102: protected void reset() {
103: }
104:
105: /**
106: * Serialize a document to a byte array.
107: *
108: * @param doc document representation to be serialized
109: * @param out serialized document output stream
110: * @return <code>true</code> if successful, <code>false</code> if
111: * representation does not support serialization
112: */
113:
114: protected boolean serialize(Object doc, OutputStream out) {
115: try {
116: ObjectOutputStream os = new ObjectOutputStream(out);
117: os.writeObject(doc);
118: os.close();
119: return true;
120: } catch (NotSerializableException ex) {
121: } catch (Exception ex) {
122: ex.printStackTrace(System.err);
123: System.exit(0);
124: }
125: return false;
126: }
127:
128: /**
129: * Unserialize a document from a byte array.
130: *
131: * @param in serialized document input stream
132: * @return unserialized document representation
133: */
134:
135: protected Object unserialize(InputStream in) {
136: Object restored = null;
137: try {
138: ObjectInputStream os = new ObjectInputStream(in);
139: restored = os.readObject();
140: } catch (Exception ex) {
141: ex.printStackTrace(System.err);
142: System.exit(0);
143: }
144: return restored;
145: }
146:
147: /**
148: * Main time test method. This implementation of the abstract base class
149: * method performs the normal sequence of speed tests. Subclasses which
150: * cannot use the normal test sequence must override this method with
151: * their own variation.
152: *
153: * @param passes number of passes of each test
154: * @param excludes number of initialization passes excluded from averages
155: * @param texts document texts for test
156: * @return result times array
157: */
158:
159: // private boolean s_firstTime = true;
160: public int[] runTimeTest(int passes, int excludes, byte[][] texts) {
161:
162: // allocate array for result values
163: int doccnt = texts.length;
164: int[] results = new int[TIME_RESULT_COUNT];
165: for (int i = 0; i < results.length; i++) {
166: results[i] = Integer.MIN_VALUE;
167: }
168:
169: // create the reusable objects
170: ByteArrayInputStream[][] ins = new ByteArrayInputStream[passes][];
171: for (int i = 0; i < passes; i++) {
172: ins[i] = new ByteArrayInputStream[doccnt];
173: for (int j = 0; j < doccnt; j++) {
174: ins[i][j] = new ByteArrayInputStream(texts[j]);
175: }
176: }
177: ByteArrayOutputStream[] outs = new ByteArrayOutputStream[doccnt];
178: for (int i = 0; i < doccnt; i++) {
179: outs[i] = new ByteArrayOutputStream(texts[i].length * 2);
180: }
181:
182: // set start time for tests
183: initTime();
184:
185: // first build the specified number of copies of the documents
186: Object[][] docs = new Object[passes][doccnt];
187: int best = Integer.MAX_VALUE;
188: int sum = 0;
189: for (int i = 0; i < passes; i++) {
190: for (int j = 0; j < doccnt; j++) {
191: docs[i][j] = build(ins[i][j]);
192: }
193: int time = testPassTime();
194: if (m_printPass) {
195: reportValue("Build document pass " + i, time);
196: }
197: if (best > time) {
198: best = time;
199: }
200: if (i >= excludes) {
201: sum += time;
202: }
203: }
204: results[BUILD_MIN_INDEX] = best;
205: results[BUILD_AVERAGE_INDEX] = sum / (passes - excludes);
206:
207: // walk the constructed document copies
208: DocumentSummary info = new DocumentSummary();
209: best = Integer.MAX_VALUE;
210: sum = 0;
211: for (int i = 0; i < passes; i++) {
212: info.reset();
213: for (int j = 0; j < doccnt; j++) {
214: walk(docs[i][j], info);
215: }
216: int time = testPassTime();
217: if (m_printPass) {
218: reportValue("Walk document pass " + i, time);
219: }
220: if (best > time) {
221: best = time;
222: }
223: if (i >= excludes) {
224: sum += time;
225: }
226: }
227: results[WALK_MIN_INDEX] = best;
228: results[WALK_AVERAGE_INDEX] = sum / (passes - excludes);
229:
230: // generate text representation of document copies
231: byte[][] outputs = new byte[doccnt][];
232: best = Integer.MAX_VALUE;
233: sum = 0;
234: for (int i = 0; i < passes; i++) {
235: for (int j = 0; j < doccnt; j++) {
236: outs[j].reset();
237: output(docs[i][j], outs[j]);
238: }
239: int time = testPassTime();
240: if (m_printPass) {
241: reportValue("Generate text pass " + i, time);
242: }
243: if (best > time) {
244: best = time;
245: }
246: if (i >= excludes) {
247: sum += time;
248: }
249: }
250: results[TEXT_MIN_INDEX] = best;
251: results[TEXT_AVERAGE_INDEX] = sum / (passes - excludes);
252:
253: // save copy of output for later check parse
254: for (int i = 0; i < doccnt; i++) {
255: outputs[i] = outs[i].toByteArray();
256: outs[i].reset();
257: }
258: initTime();
259:
260: // check serialization support for document
261: byte[][] serials = null;
262: Object[] restores = null;
263: int slength = 0;
264: if (!(docs[0][0] instanceof Serializable)) {
265: if (m_printPass) {
266: m_printStream
267: .println(" **Serialization not supported by model**");
268: }
269: } else {
270:
271: // serialize with printing of times
272: best = Integer.MAX_VALUE;
273: sum = 0;
274: for (int i = 0; i < passes; i++) {
275: for (int j = 0; j < doccnt; j++) {
276: outs[j].reset();
277: serialize(docs[i][j], outs[j]);
278: }
279: int time = testPassTime();
280: if (m_printPass) {
281: reportValue("Serialize pass " + i, time);
282: }
283: if (best > time) {
284: best = time;
285: }
286: if (i >= excludes) {
287: sum += time;
288: }
289: }
290: results[SERIALIZE_MIN_INDEX] = best;
291: results[SERIALIZE_AVERAGE_INDEX] = sum
292: / (passes - excludes);
293:
294: // restore from serialized form
295: serials = new byte[doccnt][];
296: ByteArrayInputStream[] sins = new ByteArrayInputStream[doccnt];
297: restores = new Object[doccnt];
298: for (int i = 0; i < doccnt; i++) {
299: serials[i] = outs[i].toByteArray();
300: sins[i] = new ByteArrayInputStream(serials[i]);
301: slength += serials[i].length;
302: }
303: results[SERIALIZE_SIZE_INDEX] = slength;
304: best = Integer.MAX_VALUE;
305: sum = 0;
306: for (int i = 0; i < passes; i++) {
307: for (int j = 0; j < doccnt; j++) {
308: sins[j].reset();
309: restores[j] = unserialize(sins[j]);
310: }
311: int time = testPassTime();
312: if (m_printPass) {
313: reportValue("Unserialize pass " + i, time);
314: }
315: if (best > time) {
316: best = time;
317: }
318: if (i >= excludes) {
319: sum += time;
320: }
321: }
322: results[UNSERIALIZE_MIN_INDEX] = best;
323: results[UNSERIALIZE_AVERAGE_INDEX] = sum
324: / (passes - excludes);
325: }
326:
327: // modify the document representation
328: initTime();
329: best = Integer.MAX_VALUE;
330: sum = 0;
331: for (int i = 0; i < passes; i++) {
332: for (int j = 0; j < doccnt; j++) {
333: modify(docs[i][j]);
334: }
335: int time = testPassTime();
336: if (m_printPass) {
337: reportValue("Modify pass " + i, time);
338: }
339: if (best > time) {
340: best = time;
341: }
342: if (i >= excludes) {
343: sum += time;
344: }
345: }
346: results[MODIFY_MIN_INDEX] = best;
347: results[MODIFY_AVERAGE_INDEX] = sum / (passes - excludes);
348:
349: // make sure generated text matches original document (outside timing)
350: Object[] checks = new Object[doccnt];
351: DocumentSummary verify = new DocumentSummary();
352: for (int i = 0; i < doccnt; i++) {
353: checks[i] = build(new ByteArrayInputStream(outputs[i]));
354: walk(checks[i], verify);
355: }
356: if (!info.structureEquals(verify)) {
357: PrintStream err = m_printStream != null ? m_printStream
358: : System.err;
359: err.println(" **" + getName() + " Error: "
360: + "Document built from output text does "
361: + "not match original document**");
362: printSummary(" Original", info, err);
363: printSummary(" Rebuild", verify, err);
364: }
365:
366: // check if restored from serialized form
367: if (restores != null) {
368:
369: // validate the serialization for exact match (outside timing)
370: verify.reset();
371: for (int i = 0; i < doccnt; i++) {
372: walk(restores[i], verify);
373: }
374: if (!info.equals(verify)) {
375: PrintStream err = m_printStream != null ? m_printStream
376: : System.err;
377: err.println(" **" + getName() + " Error: "
378: + "Document built from output text does "
379: + "not match original document**");
380: printSummary(" Original", info, err);
381: printSummary(" Rebuild", verify, err);
382: }
383: }
384:
385: // copy document summary values for return
386: results[ELEMENT_COUNT_INDEX] = info.getElementCount();
387: results[ATTRIBUTE_COUNT_INDEX] = info.getAttributeCount();
388: results[CONTENT_COUNT_INDEX] = info.getContentCount();
389: results[TEXTCHAR_COUNT_INDEX] = info.getTextCharCount();
390: results[ATTRCHAR_COUNT_INDEX] = info.getAttrCharCount();
391:
392: // print summary for document
393: if (m_printSummary) {
394: printSummary(" Document", info, m_printStream);
395: int ilength = 0;
396: int olength = 0;
397: for (int i = 0; i < doccnt; i++) {
398: ilength += texts[i].length;
399: olength += outputs[i].length;
400: }
401: m_printStream.println(" Original text size was " + ilength
402: + ", output text size was " + olength);
403: if (serials != null) {
404: m_printStream.println(" Serialized length was "
405: + slength);
406: }
407: info.reset();
408: walk(docs[0][0], info);
409: printSummary(" Modified document", info, m_printStream);
410: /* if (s_firstTime) {
411: out.reset();
412: output(docs[0][0], out);
413: m_printStream.println(" Text of modified document:");
414: m_printStream.println(out.toString());
415: s_firstTime = false;
416: } */
417: }
418: reset();
419: return results;
420: }
421:
422: /**
423: * Main space test method. This implementation of the abstract base class
424: * method performs the normal sequence of space tests.
425: *
426: * @param passes number of passes of each test
427: * @param excludes number of initialization passes excluded from averages
428: * @param texts document texts for test
429: * @return result values array
430: */
431:
432: public int[] runSpaceTest(int passes, int excludes, byte[][] texts) {
433:
434: // allocate array for result values
435: int[] results = new int[SPACE_RESULT_COUNT];
436: for (int i = 0; i < results.length; i++) {
437: results[i] = Integer.MIN_VALUE;
438: }
439:
440: // create the reusable objects
441: int doccnt = texts.length;
442: ByteArrayInputStream[][] ins = new ByteArrayInputStream[passes][];
443: for (int i = 0; i < passes; i++) {
444: ins[i] = new ByteArrayInputStream[doccnt];
445: for (int j = 0; j < doccnt; j++) {
446: ins[i][j] = new ByteArrayInputStream(texts[j]);
447: }
448: }
449: DocumentSummary info = new DocumentSummary();
450:
451: // initialize memory information for tests
452: initMemory();
453: results[INITIAL_MEMORY_INDEX] = (int) m_lastMemory;
454:
455: // first build the documents
456: Object[][] docs = new Object[passes][doccnt];
457: int base = (int) m_lastMemory;
458: for (int i = 0; i < passes; i++) {
459: for (int j = 0; j < doccnt; j++) {
460: docs[i][j] = build(ins[i][j]);
461: }
462: if (i == 0) {
463: results[FIRST_SPACE_INDEX] = testPassSpace();
464: if (excludes == 1) {
465: base = (int) m_lastMemory;
466: }
467: } else if ((i + 1) == excludes) {
468: testPassSpace();
469: base = (int) m_lastMemory;
470: }
471: if (m_printPass) {
472: reportValue("Build document pass " + i, testPassSpace());
473: }
474: }
475: testPassSpace();
476: results[AVERAGE_SPACE_INDEX] = ((int) m_lastMemory - base)
477: / (passes - excludes);
478:
479: // now walk the constructed document copies
480: base = (int) m_lastMemory;
481: for (int i = 0; i < passes; i++) {
482: info.reset();
483: for (int j = 0; j < doccnt; j++) {
484: walk(docs[i][j], info);
485: }
486: if ((i + 1) == excludes) {
487: testPassSpace();
488: base = (int) m_lastMemory;
489: }
490: if (m_printPass) {
491: reportValue("Walk document pass " + i, testPassSpace());
492: }
493: }
494: testPassSpace();
495: results[WALKED_SPACE_INDEX] = ((int) m_lastMemory - base)
496: / (passes - excludes);
497:
498: // free all constructed objects to find final space
499: docs = null;
500: reset();
501: initMemory();
502: results[DELTA_MEMORY_INDEX] = (int) m_lastMemory
503: - results[INITIAL_MEMORY_INDEX];
504: return results;
505: }
506: }
|