001: package org.apache.lucene.benchmark.stats;
002:
003: /**
004: * Copyright 2005 The Apache Software Foundation
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: import java.io.File;
020: import java.text.NumberFormat;
021: import java.util.ArrayList;
022: import java.util.Collection;
023: import java.util.Date;
024: import java.util.HashMap;
025: import java.util.Iterator;
026: import java.util.LinkedHashMap;
027: import java.util.List;
028: import java.util.Vector;
029:
030: import org.apache.lucene.analysis.Analyzer;
031: import org.apache.lucene.benchmark.Constants;
032: import org.apache.lucene.store.Directory;
033:
034: /**
035: * This class holds together all parameters related to a test. Single test is
036: * performed several times, and all results are averaged.
037: *
038: * @author Andrzej Bialecki <ab@getopt.org>
039: */
040: public class TestData {
041: public static int[] MAX_BUFFERED_DOCS_COUNTS = new int[] { 10, 20,
042: 50, 100, 200, 500 };
043: public static int[] MERGEFACTOR_COUNTS = new int[] { 10, 20, 50,
044: 100, 200, 500 };
045:
046: /**
047: * ID of this test data.
048: */
049: private String id;
050: /**
051: * Heap size.
052: */
053: private long heap;
054: /**
055: * List of results for each test run with these parameters.
056: */
057: private Vector runData = new Vector();
058: private int maxBufferedDocs, mergeFactor;
059: /**
060: * Directory containing source files.
061: */
062: private File source;
063: /**
064: * Lucene Directory implementation for creating an index.
065: */
066: private Directory directory;
067: /**
068: * Analyzer to use when adding documents.
069: */
070: private Analyzer analyzer;
071: /**
072: * If true, use compound file format.
073: */
074: private boolean compound;
075: /**
076: * If true, optimize index when finished adding documents.
077: */
078: private boolean optimize;
079: /**
080: * Data for search benchmarks.
081: */
082: private QueryData[] queries;
083:
084: public TestData() {
085: heap = Runtime.getRuntime().maxMemory();
086: }
087:
088: private static class DCounter {
089: double total;
090: int count, recordCount;
091: }
092:
093: private static class LCounter {
094: long total;
095: int count;
096: }
097:
098: private static class LDCounter {
099: double Dtotal;
100: int Dcount, DrecordCount;
101: long Ltotal0;
102: int Lcount0;
103: long Ltotal1;
104: int Lcount1;
105: }
106:
107: /**
108: * Get a textual summary of the benchmark results, average from all test runs.
109: */
110: static final String ID = "# testData id ";
111: static final String OP = "operation ";
112: static final String RUNCNT = " runCnt";
113: static final String RECCNT = " recCnt";
114: static final String RECSEC = " rec/s";
115: static final String FREEMEM = " avgFreeMem";
116: static final String TOTMEM = " avgTotalMem";
117: static final String COLS[] = { ID, OP, RUNCNT, RECCNT, RECSEC,
118: FREEMEM, TOTMEM };
119:
120: public String showRunData(String prefix) {
121: if (runData.size() == 0) {
122: return "# [NO RUN DATA]";
123: }
124: HashMap resByTask = new HashMap();
125: StringBuffer sb = new StringBuffer();
126: String lineSep = System.getProperty("line.separator");
127: sb
128: .append("warm = Warm Index Reader")
129: .append(lineSep)
130: .append("srch = Search Index")
131: .append(lineSep)
132: .append(
133: "trav = Traverse Hits list, optionally retrieving document")
134: .append(lineSep).append(lineSep);
135: for (int i = 0; i < COLS.length; i++) {
136: sb.append(COLS[i]);
137: }
138: sb.append("\n");
139: LinkedHashMap mapMem = new LinkedHashMap();
140: LinkedHashMap mapSpeed = new LinkedHashMap();
141: for (int i = 0; i < runData.size(); i++) {
142: TestRunData trd = (TestRunData) runData.get(i);
143: Collection labels = trd.getLabels();
144: Iterator it = labels.iterator();
145: while (it.hasNext()) {
146: String label = (String) it.next();
147: MemUsage mem = trd.getMemUsage(label);
148: if (mem != null) {
149: TestData.LCounter[] tm = (TestData.LCounter[]) mapMem
150: .get(label);
151: if (tm == null) {
152: tm = new TestData.LCounter[2];
153: tm[0] = new TestData.LCounter();
154: tm[1] = new TestData.LCounter();
155: mapMem.put(label, tm);
156: }
157: tm[0].total += mem.avgFree;
158: tm[0].count++;
159: tm[1].total += mem.avgTotal;
160: tm[1].count++;
161: }
162: TimeData td = trd.getTotals(label);
163: if (td != null) {
164: TestData.DCounter dc = (TestData.DCounter) mapSpeed
165: .get(label);
166: if (dc == null) {
167: dc = new TestData.DCounter();
168: mapSpeed.put(label, dc);
169: }
170: dc.count++;
171: //dc.total += td.getRate();
172: dc.total += (td.count > 0 && td.elapsed <= 0 ? 1
173: : td.elapsed); // assume atleast 1ms for any countable op
174: dc.recordCount += td.count;
175: }
176: }
177: }
178: LinkedHashMap res = new LinkedHashMap();
179: Iterator it = mapSpeed.keySet().iterator();
180: while (it.hasNext()) {
181: String label = (String) it.next();
182: TestData.DCounter dc = (TestData.DCounter) mapSpeed
183: .get(label);
184: res
185: .put(
186: label,
187: format(dc.count, RUNCNT)
188: + format(dc.recordCount / dc.count,
189: RECCNT)
190: + format(
191: 1,
192: (float) (dc.recordCount * 1000.0 / (dc.total > 0 ? dc.total
193: : 1.0)), RECSEC)
194: //format((float) (dc.total / (double) dc.count), RECSEC)
195: );
196:
197: // also sum by task
198: String task = label.substring(label.lastIndexOf("-") + 1);
199: LDCounter ldc = (LDCounter) resByTask.get(task);
200: if (ldc == null) {
201: ldc = new LDCounter();
202: resByTask.put(task, ldc);
203: }
204: ldc.Dcount += dc.count;
205: ldc.DrecordCount += dc.recordCount;
206: ldc.Dtotal += (dc.count > 0 && dc.total <= 0 ? 1 : dc.total); // assume atleast 1ms for any countable op
207: }
208: it = mapMem.keySet().iterator();
209: while (it.hasNext()) {
210: String label = (String) it.next();
211: TestData.LCounter[] lc = (TestData.LCounter[]) mapMem
212: .get(label);
213: String speed = (String) res.get(label);
214: boolean makeSpeed = false;
215: if (speed == null) {
216: makeSpeed = true;
217: speed = format(lc[0].count, RUNCNT) + format(0, RECCNT)
218: + format(0, (float) 0.0, RECSEC);
219: }
220: res.put(label, speed
221: + format(0, lc[0].total / lc[0].count, FREEMEM)
222: + format(0, lc[1].total / lc[1].count, TOTMEM));
223:
224: // also sum by task
225: String task = label.substring(label.lastIndexOf("-") + 1);
226: LDCounter ldc = (LDCounter) resByTask.get(task);
227: if (ldc == null) {
228: ldc = new LDCounter();
229: resByTask.put(task, ldc);
230: makeSpeed = true;
231: }
232: if (makeSpeed) {
233: ldc.Dcount += lc[0].count;
234: }
235: ldc.Lcount0 += lc[0].count;
236: ldc.Lcount1 += lc[1].count;
237: ldc.Ltotal0 += lc[0].total;
238: ldc.Ltotal1 += lc[1].total;
239: }
240: it = res.keySet().iterator();
241: while (it.hasNext()) {
242: String label = (String) it.next();
243: sb.append(format(prefix, ID));
244: sb.append(format(label, OP));
245: sb.append(res.get(label)).append("\n");
246: }
247: // show results by task (srch, optimize, etc.)
248: sb.append("\n");
249: for (int i = 0; i < COLS.length; i++) {
250: sb.append(COLS[i]);
251: }
252: sb.append("\n");
253: it = resByTask.keySet().iterator();
254: while (it.hasNext()) {
255: String task = (String) it.next();
256: LDCounter ldc = (LDCounter) resByTask.get(task);
257: sb.append(format(" ", ID));
258: sb.append(format(task, OP));
259: sb.append(format(ldc.Dcount, RUNCNT));
260: sb.append(format(ldc.DrecordCount / ldc.Dcount, RECCNT));
261: sb
262: .append(format(
263: 1,
264: (float) (ldc.DrecordCount * 1000.0 / (ldc.Dtotal > 0 ? ldc.Dtotal
265: : 1.0)), RECSEC));
266: sb.append(format(0, ldc.Ltotal0 / ldc.Lcount0, FREEMEM));
267: sb.append(format(0, ldc.Ltotal1 / ldc.Lcount1, TOTMEM));
268: sb.append("\n");
269: }
270: return sb.toString();
271: }
272:
273: private static NumberFormat numFormat[] = {
274: NumberFormat.getInstance(), NumberFormat.getInstance() };
275: private static final String padd = " ";
276: static {
277: numFormat[0].setMaximumFractionDigits(0);
278: numFormat[0].setMinimumFractionDigits(0);
279: numFormat[1].setMaximumFractionDigits(1);
280: numFormat[1].setMinimumFractionDigits(1);
281: }
282:
283: // padd number from left
284: // numFracDigits must be 0 or 1.
285: static String format(int numFracDigits, float f, String col) {
286: String res = padd + numFormat[numFracDigits].format(f);
287: return res.substring(res.length() - col.length());
288: }
289:
290: // padd number from left
291: static String format(int n, String col) {
292: String res = padd + n;
293: return res.substring(res.length() - col.length());
294: }
295:
296: // padd string from right
297: static String format(String s, String col) {
298: return (s + padd).substring(0, col.length());
299: }
300:
301: /**
302: * Prepare a list of benchmark data, using all possible combinations of
303: * benchmark parameters.
304: *
305: * @param sources list of directories containing different source document
306: * collections
307: * @param analyzers of analyzers to use.
308: */
309: public static TestData[] getAll(File[] sources, Analyzer[] analyzers) {
310: List res = new ArrayList(50);
311: TestData ref = new TestData();
312: for (int q = 0; q < analyzers.length; q++) {
313: for (int m = 0; m < sources.length; m++) {
314: for (int i = 0; i < MAX_BUFFERED_DOCS_COUNTS.length; i++) {
315: for (int k = 0; k < MERGEFACTOR_COUNTS.length; k++) {
316: for (int n = 0; n < Constants.BOOLEANS.length; n++) {
317: for (int p = 0; p < Constants.BOOLEANS.length; p++) {
318: ref.id = "td-" + q + m + i + k + n + p;
319: ref.source = sources[m];
320: ref.analyzer = analyzers[q];
321: ref.maxBufferedDocs = MAX_BUFFERED_DOCS_COUNTS[i];
322: ref.mergeFactor = MERGEFACTOR_COUNTS[k];
323: ref.compound = Constants.BOOLEANS[n]
324: .booleanValue();
325: ref.optimize = Constants.BOOLEANS[p]
326: .booleanValue();
327: try {
328: res.add(ref.clone());
329: } catch (Exception e) {
330: e.printStackTrace();
331: }
332: }
333: }
334: }
335: }
336: }
337: }
338: return (TestData[]) res.toArray(new TestData[0]);
339: }
340:
341: /**
342: * Similar to {@link #getAll(java.io.File[], org.apache.lucene.analysis.Analyzer[])} but only uses
343: * maxBufferedDocs of 10 and 100 and same for mergeFactor, thus reducing the number of permutations significantly.
344: * It also only uses compund file and optimize is always true.
345: *
346: * @param sources
347: * @param analyzers
348: * @return An Array of {@link TestData}
349: */
350: public static TestData[] getTestDataMinMaxMergeAndMaxBuffered(
351: File[] sources, Analyzer[] analyzers) {
352: List res = new ArrayList(50);
353: TestData ref = new TestData();
354: for (int q = 0; q < analyzers.length; q++) {
355: for (int m = 0; m < sources.length; m++) {
356: ref.id = "td-" + q + m + "_" + 10 + "_" + 10;
357: ref.source = sources[m];
358: ref.analyzer = analyzers[q];
359: ref.maxBufferedDocs = 10;
360: ref.mergeFactor = 10;//MERGEFACTOR_COUNTS[k];
361: ref.compound = true;
362: ref.optimize = true;
363: try {
364: res.add(ref.clone());
365: } catch (Exception e) {
366: e.printStackTrace();
367: }
368: ref.id = "td-" + q + m + "_" + 10 + "_" + 100;
369: ref.source = sources[m];
370: ref.analyzer = analyzers[q];
371: ref.maxBufferedDocs = 10;
372: ref.mergeFactor = 100;//MERGEFACTOR_COUNTS[k];
373: ref.compound = true;
374: ref.optimize = true;
375: try {
376: res.add(ref.clone());
377: } catch (Exception e) {
378: e.printStackTrace();
379: }
380: ref.id = "td-" + q + m + "_" + 100 + "_" + 10;
381: ref.source = sources[m];
382: ref.analyzer = analyzers[q];
383: ref.maxBufferedDocs = 100;
384: ref.mergeFactor = 10;//MERGEFACTOR_COUNTS[k];
385: ref.compound = true;
386: ref.optimize = true;
387: try {
388: res.add(ref.clone());
389: } catch (Exception e) {
390: e.printStackTrace();
391: }
392: ref.id = "td-" + q + m + "_" + 100 + "_" + 100;
393: ref.source = sources[m];
394: ref.analyzer = analyzers[q];
395: ref.maxBufferedDocs = 100;
396: ref.mergeFactor = 100;//MERGEFACTOR_COUNTS[k];
397: ref.compound = true;
398: ref.optimize = true;
399: try {
400: res.add(ref.clone());
401: } catch (Exception e) {
402: e.printStackTrace();
403: }
404: }
405: }
406: return (TestData[]) res.toArray(new TestData[0]);
407: }
408:
409: protected Object clone() {
410: TestData cl = new TestData();
411: cl.id = id;
412: cl.compound = compound;
413: cl.heap = heap;
414: cl.mergeFactor = mergeFactor;
415: cl.maxBufferedDocs = maxBufferedDocs;
416: cl.optimize = optimize;
417: cl.source = source;
418: cl.directory = directory;
419: cl.analyzer = analyzer;
420: // don't clone runData
421: return cl;
422: }
423:
424: public String toString() {
425: StringBuffer res = new StringBuffer();
426: res.append("#-- ID: ").append(id).append(", ").append(
427: new Date()).append(", heap=").append(heap).append(
428: " --\n");
429: res.append("# source=").append(source).append(", directory=")
430: .append(directory).append("\n");
431: res.append("# maxBufferedDocs=").append(maxBufferedDocs)
432: .append(", mergeFactor=").append(mergeFactor);
433: res.append(", compound=").append(compound)
434: .append(", optimize=").append(optimize).append("\n");
435: if (queries != null) {
436: res.append(QueryData.getLabels()).append("\n");
437: for (int i = 0; i < queries.length; i++) {
438: res.append("# ").append(queries[i].toString()).append(
439: "\n");
440: }
441: }
442: return res.toString();
443: }
444:
445: public Analyzer getAnalyzer() {
446: return analyzer;
447: }
448:
449: public void setAnalyzer(Analyzer analyzer) {
450: this .analyzer = analyzer;
451: }
452:
453: public boolean isCompound() {
454: return compound;
455: }
456:
457: public void setCompound(boolean compound) {
458: this .compound = compound;
459: }
460:
461: public Directory getDirectory() {
462: return directory;
463: }
464:
465: public void setDirectory(Directory directory) {
466: this .directory = directory;
467: }
468:
469: public long getHeap() {
470: return heap;
471: }
472:
473: public void setHeap(long heap) {
474: this .heap = heap;
475: }
476:
477: public String getId() {
478: return id;
479: }
480:
481: public void setId(String id) {
482: this .id = id;
483: }
484:
485: public int getMaxBufferedDocs() {
486: return maxBufferedDocs;
487: }
488:
489: public void setMaxBufferedDocs(int maxBufferedDocs) {
490: this .maxBufferedDocs = maxBufferedDocs;
491: }
492:
493: public int getMergeFactor() {
494: return mergeFactor;
495: }
496:
497: public void setMergeFactor(int mergeFactor) {
498: this .mergeFactor = mergeFactor;
499: }
500:
501: public boolean isOptimize() {
502: return optimize;
503: }
504:
505: public void setOptimize(boolean optimize) {
506: this .optimize = optimize;
507: }
508:
509: public QueryData[] getQueries() {
510: return queries;
511: }
512:
513: public void setQueries(QueryData[] queries) {
514: this .queries = queries;
515: }
516:
517: public Vector getRunData() {
518: return runData;
519: }
520:
521: public void setRunData(Vector runData) {
522: this .runData = runData;
523: }
524:
525: public File getSource() {
526: return source;
527: }
528:
529: public void setSource(File source) {
530: this.source = source;
531: }
532: }
|