001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.lucene.benchmark.byTask;
017:
018: import java.io.StringReader;
019: import java.io.File;
020: import java.io.FileReader;
021: import java.io.BufferedReader;
022: import java.util.List;
023: import java.util.Iterator;
024:
025: import org.apache.lucene.benchmark.byTask.Benchmark;
026: import org.apache.lucene.benchmark.byTask.feeds.DocData;
027: import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
028: import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker;
029: import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
030: import org.apache.lucene.benchmark.byTask.stats.TaskStats;
031: import org.apache.lucene.index.IndexReader;
032: import org.apache.lucene.index.IndexWriter;
033: import org.apache.lucene.index.TermEnum;
034: import org.apache.lucene.index.TermDocs;
035:
036: import junit.framework.TestCase;
037:
038: /**
039: * Test very simply that perf tasks - simple algorithms - are doing what they should.
040: */
041: public class TestPerfTasksLogic extends TestCase {
042:
043: private static final boolean DEBUG = false;
044: static final String NEW_LINE = System.getProperty("line.separator");
045:
046: // properties in effect in all tests here
047: static final String propLines[] = { "directory=RAMDirectory",
048: "print.props=false", };
049:
050: /**
051: * @param name test name
052: */
053: public TestPerfTasksLogic(String name) {
054: super (name);
055: }
056:
057: /**
058: * Test index creation logic
059: */
060: public void testIndexAndSearchTasks() throws Exception {
061: // 1. alg definition (required in every "logic" test)
062: String algLines[] = { "ResetSystemErase", "CreateIndex",
063: "{ AddDoc } : 1000", "Optimize", "CloseIndex",
064: "OpenReader", "{ CountingSearchTest } : 200",
065: "CloseReader", "[ CountingSearchTest > : 70",
066: "[ CountingSearchTest > : 9", };
067:
068: // 2. we test this value later
069: CountingSearchTestTask.numSearches = 0;
070:
071: // 3. execute the algorithm (required in every "logic" test)
072: Benchmark benchmark = execBenchmark(algLines);
073:
074: // 4. test specific checks after the benchmark run completed.
075: assertEquals("TestSearchTask was supposed to be called!", 279,
076: CountingSearchTestTask.numSearches);
077: assertTrue("Index does not exist?...!", IndexReader
078: .indexExists(benchmark.getRunData().getDirectory()));
079: // now we should be able to open the index for write.
080: IndexWriter iw = new IndexWriter(benchmark.getRunData()
081: .getDirectory(), null, false);
082: iw.close();
083: IndexReader ir = IndexReader.open(benchmark.getRunData()
084: .getDirectory());
085: assertEquals(
086: "1000 docs were added to the index, this is what we expect to find!",
087: 1000, ir.numDocs());
088: ir.close();
089: }
090:
091: /**
092: * Test Exhasting Doc Maker logic
093: */
094: public void testExhaustDocMaker() throws Exception {
095: // 1. alg definition (required in every "logic" test)
096: String algLines[] = {
097: "# ----- properties ",
098: "doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker",
099: "doc.add.log.step=1", "doc.term.vector=false",
100: "doc.maker.forever=false", "directory=RAMDirectory",
101: "doc.stored=false", "doc.tokenized=false",
102: "# ----- alg ", "CreateIndex", "{ AddDoc } : * ",
103: "Optimize", "CloseIndex", "OpenReader",
104: "{ CountingSearchTest } : 100", "CloseReader",
105: "[ CountingSearchTest > : 30",
106: "[ CountingSearchTest > : 9", };
107:
108: // 2. we test this value later
109: CountingSearchTestTask.numSearches = 0;
110:
111: // 3. execute the algorithm (required in every "logic" test)
112: Benchmark benchmark = execBenchmark(algLines);
113:
114: // 4. test specific checks after the benchmark run completed.
115: assertEquals("TestSearchTask was supposed to be called!", 139,
116: CountingSearchTestTask.numSearches);
117: assertTrue("Index does not exist?...!", IndexReader
118: .indexExists(benchmark.getRunData().getDirectory()));
119: // now we should be able to open the index for write.
120: IndexWriter iw = new IndexWriter(benchmark.getRunData()
121: .getDirectory(), null, false);
122: iw.close();
123: IndexReader ir = IndexReader.open(benchmark.getRunData()
124: .getDirectory());
125: assertEquals(
126: "1 docs were added to the index, this is what we expect to find!",
127: 1, ir.numDocs());
128: ir.close();
129: }
130:
131: /**
132: * Test Parallel Doc Maker logic (for LUCENE-940)
133: */
134: public void testParallelDocMaker() throws Exception {
135: // 1. alg definition (required in every "logic" test)
136: String algLines[] = { "# ----- properties ",
137: "doc.maker=" + Reuters20DocMaker.class.getName(),
138: "doc.add.log.step=3", "doc.term.vector=false",
139: "doc.maker.forever=false", "directory=FSDirectory",
140: "doc.stored=false", "doc.tokenized=false",
141: "# ----- alg ", "CreateIndex",
142: "[ { AddDoc } : * ] : 4 ", "CloseIndex", };
143:
144: // 2. execute the algorithm (required in every "logic" test)
145: Benchmark benchmark = execBenchmark(algLines);
146:
147: // 3. test number of docs in the index
148: IndexReader ir = IndexReader.open(benchmark.getRunData()
149: .getDirectory());
150: int ndocsExpected = 20; // Reuters20DocMaker exhausts after 20 docs.
151: assertEquals("wrong number of docs in the index!",
152: ndocsExpected, ir.numDocs());
153: ir.close();
154: }
155:
156: /**
157: * Test WriteLineDoc and LineDocMaker.
158: */
159: public void testLineDocFile() throws Exception {
160: File lineFile = new File(System.getProperty("tempDir"),
161: "test.reuters.lines.txt");
162:
163: // We will call WriteLineDocs this many times
164: final int NUM_TRY_DOCS = 500;
165:
166: // Creates a line file with first 500 docs from reuters
167: String algLines1[] = {
168: "# ----- properties ",
169: "doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",
170: "doc.maker.forever=false",
171: "line.file.out="
172: + lineFile.getAbsolutePath().replace('\\', '/'),
173: "# ----- alg ", "{WriteLineDoc()}:" + NUM_TRY_DOCS, };
174:
175: // Run algo
176: Benchmark benchmark = execBenchmark(algLines1);
177:
178: // Verify we got somewhere between 1-500 lines (some
179: // Reuters docs have no body, which WriteLineDoc task
180: // skips).
181: BufferedReader r = new BufferedReader(new FileReader(lineFile));
182: int numLines = 0;
183: while (r.readLine() != null)
184: numLines++;
185: r.close();
186: assertTrue(
187: "did not see the right number of docs; should be > 0 and <= "
188: + NUM_TRY_DOCS + " but was " + numLines,
189: numLines > 0 && numLines <= NUM_TRY_DOCS);
190:
191: // Index the line docs
192: String algLines2[] = {
193: "# ----- properties ",
194: "analyzer=org.apache.lucene.analysis.SimpleAnalyzer",
195: "doc.maker=org.apache.lucene.benchmark.byTask.feeds.LineDocMaker",
196: "docs.file="
197: + lineFile.getAbsolutePath().replace('\\', '/'),
198: "doc.maker.forever=false", "autocommit=false",
199: "ram.flush.mb=4", "# ----- alg ", "ResetSystemErase",
200: "CreateIndex", "{AddDoc}: *", "CloseIndex", };
201:
202: // Run algo
203: benchmark = execBenchmark(algLines2);
204:
205: // now we should be able to open the index for write.
206: IndexWriter iw = new IndexWriter(benchmark.getRunData()
207: .getDirectory(), null, false);
208: iw.close();
209:
210: IndexReader ir = IndexReader.open(benchmark.getRunData()
211: .getDirectory());
212: assertEquals(numLines + " lines were were created but "
213: + ir.numDocs() + " docs are in the index", numLines, ir
214: .numDocs());
215: ir.close();
216:
217: lineFile.delete();
218: }
219:
220: /**
221: * Test ReadTokensTask
222: */
223: public void testReadTokens() throws Exception {
224:
225: // We will call ReadTokens on this many docs
226: final int NUM_DOCS = 100;
227:
228: // Read tokens from first NUM_DOCS docs from Reuters and
229: // then build index from the same docs
230: String algLines1[] = {
231: "# ----- properties ",
232: "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer",
233: "doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",
234: "# ----- alg ", "{ReadTokens}: " + NUM_DOCS,
235: "ResetSystemErase", "CreateIndex",
236: "{AddDoc}: " + NUM_DOCS, "CloseIndex", };
237:
238: // Run algo
239: Benchmark benchmark = execBenchmark(algLines1);
240:
241: List stats = benchmark.getRunData().getPoints().taskStats();
242:
243: // Count how many tokens all ReadTokens saw
244: int totalTokenCount1 = 0;
245: for (Iterator it = stats.iterator(); it.hasNext();) {
246: TaskStats stat = (TaskStats) it.next();
247: if (stat.getTask().getName().equals("ReadTokens")) {
248: totalTokenCount1 += stat.getCount();
249: }
250: }
251:
252: // Separately count how many tokens are actually in the index:
253: IndexReader reader = IndexReader.open(benchmark.getRunData()
254: .getDirectory());
255: assertEquals(NUM_DOCS, reader.numDocs());
256:
257: TermEnum terms = reader.terms();
258: TermDocs termDocs = reader.termDocs();
259: int totalTokenCount2 = 0;
260: while (terms.next()) {
261: termDocs.seek(terms.term());
262: while (termDocs.next())
263: totalTokenCount2 += termDocs.freq();
264: }
265: reader.close();
266:
267: // Make sure they are the same
268: assertEquals(totalTokenCount1, totalTokenCount2);
269: }
270:
271: /**
272: * Test that " {[AddDoc(4000)]: 4} : * " works corrcetly (for LUCENE-941)
273: */
274: public void testParallelExhausted() throws Exception {
275: // 1. alg definition (required in every "logic" test)
276: String algLines[] = { "# ----- properties ",
277: "doc.maker=" + Reuters20DocMaker.class.getName(),
278: "doc.add.log.step=3", "doc.term.vector=false",
279: "doc.maker.forever=false", "directory=RAMDirectory",
280: "doc.stored=false", "doc.tokenized=false",
281: "debug.level=1", "# ----- alg ", "CreateIndex",
282: "{ [ AddDoc]: 4} : * ", "ResetInputs ",
283: "{ [ AddDoc]: 4} : * ", "CloseIndex", };
284:
285: // 2. execute the algorithm (required in every "logic" test)
286: Benchmark benchmark = execBenchmark(algLines);
287:
288: // 3. test number of docs in the index
289: IndexReader ir = IndexReader.open(benchmark.getRunData()
290: .getDirectory());
291: int ndocsExpected = 2 * 20; // Reuters20DocMaker exhausts after 20 docs.
292: assertEquals("wrong number of docs in the index!",
293: ndocsExpected, ir.numDocs());
294: ir.close();
295: }
296:
297: // create the benchmark and execute it.
298: public static Benchmark execBenchmark(String[] algLines)
299: throws Exception {
300: String algText = algLinesToText(algLines);
301: logTstLogic(algText);
302: Benchmark benchmark = new Benchmark(new StringReader(algText));
303: benchmark.execute();
304: return benchmark;
305: }
306:
307: // catenate alg lines to make the alg text
308: private static String algLinesToText(String[] algLines) {
309: String indent = " ";
310: StringBuffer sb = new StringBuffer();
311: for (int i = 0; i < propLines.length; i++) {
312: sb.append(indent).append(propLines[i]).append(NEW_LINE);
313: }
314: for (int i = 0; i < algLines.length; i++) {
315: sb.append(indent).append(algLines[i]).append(NEW_LINE);
316: }
317: return sb.toString();
318: }
319:
320: private static void logTstLogic(String txt) {
321: if (!DEBUG)
322: return;
323: System.out.println("Test logic of:");
324: System.out.println(txt);
325: }
326:
327: /** use reuters and the exhaust mechanism, but to be faster, add 20 docs only... */
328: public static class Reuters20DocMaker extends ReutersDocMaker {
329: private int nDocs = 0;
330:
331: protected synchronized DocData getNextDocData()
332: throws Exception {
333: if (nDocs >= 20 && !forever) {
334: throw new NoMoreDataException();
335: }
336: nDocs++;
337: return super .getNextDocData();
338: }
339:
340: public synchronized void resetInputs() {
341: super .resetInputs();
342: nDocs = 0;
343: }
344: }
345:
346: /**
347: * Test that exhaust in loop works as expected (LUCENE-1115).
348: */
349: public void testExhaustedLooped() throws Exception {
350: // 1. alg definition (required in every "logic" test)
351: String algLines[] = { "# ----- properties ",
352: "doc.maker=" + Reuters20DocMaker.class.getName(),
353: "doc.add.log.step=3", "doc.term.vector=false",
354: "doc.maker.forever=false", "directory=RAMDirectory",
355: "doc.stored=false", "doc.tokenized=false",
356: "debug.level=1", "# ----- alg ", "{ \"Rounds\"",
357: " ResetSystemErase", " CreateIndex",
358: " { \"AddDocs\" AddDoc > : * ", " CloseIndex",
359: "} : 2", };
360:
361: // 2. execute the algorithm (required in every "logic" test)
362: Benchmark benchmark = execBenchmark(algLines);
363:
364: // 3. test number of docs in the index
365: IndexReader ir = IndexReader.open(benchmark.getRunData()
366: .getDirectory());
367: int ndocsExpected = 20; // Reuters20DocMaker exhausts after 20 docs.
368: assertEquals("wrong number of docs in the index!",
369: ndocsExpected, ir.numDocs());
370: ir.close();
371: }
372: }
|