001: package org.apache.lucene.search;
002:
003: import org.apache.lucene.util.LuceneTestCase;
004:
005: import java.util.Random;
006: import java.util.BitSet;
007: import java.util.Set;
008: import java.io.IOException;
009:
010: import org.apache.lucene.index.IndexReader;
011: import org.apache.lucene.index.IndexWriter;
012: import org.apache.lucene.index.Term;
013: import org.apache.lucene.store.RAMDirectory;
014: import org.apache.lucene.store.Directory;
015: import org.apache.lucene.analysis.WhitespaceAnalyzer;
016: import org.apache.lucene.document.Document;
017: import org.apache.lucene.document.Field;
018:
019: /**
020: * Licensed to the Apache Software Foundation (ASF) under one or more
021: * contributor license agreements. See the NOTICE file distributed with
022: * this work for additional information regarding copyright ownership.
023: * The ASF licenses this file to You under the Apache License, Version 2.0
024: * (the "License"); you may not use this file except in compliance with
025: * the License. You may obtain a copy of the License at
026: *
027: * http://www.apache.org/licenses/LICENSE-2.0
028: *
029: * Unless required by applicable law or agreed to in writing, software
030: * distributed under the License is distributed on an "AS IS" BASIS,
031: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
032: * See the License for the specific language governing permissions and
033: * limitations under the License.
034: */
035:
036: /**
037: *
038: * @version $Id$
039: */
040: public class TestScorerPerf extends LuceneTestCase {
041: Random r = new Random(0);
042: boolean validate = true; // set to false when doing performance testing
043:
044: BitSet[] sets;
045: Term[] terms;
046: IndexSearcher s;
047:
048: public void createDummySearcher() throws Exception {
049: // Create a dummy index with nothing in it.
050: // This could possibly fail if Lucene starts checking for docid ranges...
051: RAMDirectory rd = new RAMDirectory();
052: IndexWriter iw = new IndexWriter(rd, new WhitespaceAnalyzer(),
053: true);
054: iw.close();
055: s = new IndexSearcher(rd);
056: }
057:
058: public void createRandomTerms(int nDocs, int nTerms, double power,
059: Directory dir) throws Exception {
060: int[] freq = new int[nTerms];
061: terms = new Term[nTerms];
062: for (int i = 0; i < nTerms; i++) {
063: int f = (nTerms + 1) - i; // make first terms less frequent
064: freq[i] = (int) Math.ceil(Math.pow(f, power));
065: terms[i] = new Term("f", Character
066: .toString((char) ('A' + i)));
067: }
068:
069: IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(),
070: true);
071: for (int i = 0; i < nDocs; i++) {
072: Document d = new Document();
073: for (int j = 0; j < nTerms; j++) {
074: if (r.nextInt(freq[j]) == 0) {
075: d.add(new Field("f", terms[j].text(),
076: Field.Store.NO, Field.Index.UN_TOKENIZED));
077: //System.out.println(d);
078: }
079: }
080: iw.addDocument(d);
081: }
082: iw.optimize();
083: iw.close();
084: }
085:
086: public BitSet randBitSet(int sz, int numBitsToSet) {
087: BitSet set = new BitSet(sz);
088: for (int i = 0; i < numBitsToSet; i++) {
089: set.set(r.nextInt(sz));
090: }
091: return set;
092: }
093:
094: public BitSet[] randBitSets(int numSets, int setSize) {
095: BitSet[] sets = new BitSet[numSets];
096: for (int i = 0; i < sets.length; i++) {
097: sets[i] = randBitSet(setSize, r.nextInt(setSize));
098: }
099: return sets;
100: }
101:
102: public static class BitSetFilter extends Filter {
103: public BitSet set;
104:
105: public BitSetFilter(BitSet set) {
106: this .set = set;
107: }
108:
109: public BitSet bits(IndexReader reader) throws IOException {
110: return set;
111: }
112: }
113:
114: public static class CountingHitCollector extends HitCollector {
115: int count = 0;
116: int sum = 0;
117:
118: public void collect(int doc, float score) {
119: count++;
120: sum += doc; // use it to avoid any possibility of being optimized away
121: }
122:
123: public int getCount() {
124: return count;
125: }
126:
127: public int getSum() {
128: return sum;
129: }
130: }
131:
132: public static class MatchingHitCollector extends
133: CountingHitCollector {
134: BitSet answer;
135: int pos = -1;
136:
137: public MatchingHitCollector(BitSet answer) {
138: this .answer = answer;
139: }
140:
141: public void collect(int doc, float score) {
142: pos = answer.nextSetBit(pos + 1);
143: if (pos != doc) {
144: throw new RuntimeException("Expected doc " + pos
145: + " but got " + doc);
146: }
147: super .collect(doc, score);
148: }
149: }
150:
151: BitSet addClause(BooleanQuery bq, BitSet result) {
152: BitSet rnd = sets[r.nextInt(sets.length)];
153: Query q = new ConstantScoreQuery(new BitSetFilter(rnd));
154: bq.add(q, BooleanClause.Occur.MUST);
155: if (validate) {
156: if (result == null)
157: result = (BitSet) rnd.clone();
158: else
159: result.and(rnd);
160: }
161: return result;
162: }
163:
164: public int doConjunctions(int iter, int maxClauses)
165: throws IOException {
166: int ret = 0;
167:
168: for (int i = 0; i < iter; i++) {
169: int nClauses = r.nextInt(maxClauses - 1) + 2; // min 2 clauses
170: BooleanQuery bq = new BooleanQuery();
171: BitSet result = null;
172: for (int j = 0; j < nClauses; j++) {
173: result = addClause(bq, result);
174: }
175:
176: CountingHitCollector hc = validate ? new MatchingHitCollector(
177: result)
178: : new CountingHitCollector();
179: s.search(bq, hc);
180: ret += hc.getSum();
181: if (validate)
182: assertEquals(result.cardinality(), hc.getCount());
183: // System.out.println(hc.getCount());
184: }
185:
186: return ret;
187: }
188:
189: public int doNestedConjunctions(int iter, int maxOuterClauses,
190: int maxClauses) throws IOException {
191: int ret = 0;
192: long nMatches = 0;
193:
194: for (int i = 0; i < iter; i++) {
195: int oClauses = r.nextInt(maxOuterClauses - 1) + 2;
196: BooleanQuery oq = new BooleanQuery();
197: BitSet result = null;
198:
199: for (int o = 0; o < oClauses; o++) {
200:
201: int nClauses = r.nextInt(maxClauses - 1) + 2; // min 2 clauses
202: BooleanQuery bq = new BooleanQuery();
203: for (int j = 0; j < nClauses; j++) {
204: result = addClause(bq, result);
205: }
206:
207: oq.add(bq, BooleanClause.Occur.MUST);
208: } // outer
209:
210: CountingHitCollector hc = validate ? new MatchingHitCollector(
211: result)
212: : new CountingHitCollector();
213: s.search(oq, hc);
214: nMatches += hc.getCount();
215: ret += hc.getSum();
216: if (validate)
217: assertEquals(result.cardinality(), hc.getCount());
218: // System.out.println(hc.getCount());
219: }
220: System.out.println("Average number of matches="
221: + (nMatches / iter));
222: return ret;
223: }
224:
225: public int doTermConjunctions(IndexSearcher s, int termsInIndex,
226: int maxClauses, int iter) throws IOException {
227: int ret = 0;
228:
229: long nMatches = 0;
230: for (int i = 0; i < iter; i++) {
231: int nClauses = r.nextInt(maxClauses - 1) + 2; // min 2 clauses
232: BooleanQuery bq = new BooleanQuery();
233: BitSet termflag = new BitSet(termsInIndex);
234: for (int j = 0; j < nClauses; j++) {
235: int tnum;
236: // don't pick same clause twice
237: tnum = r.nextInt(termsInIndex);
238: if (termflag.get(tnum))
239: tnum = termflag.nextClearBit(tnum);
240: if (tnum < 0 || tnum >= termsInIndex)
241: tnum = termflag.nextClearBit(0);
242: termflag.set(tnum);
243: Query tq = new TermQuery(terms[tnum]);
244: bq.add(tq, BooleanClause.Occur.MUST);
245: }
246:
247: CountingHitCollector hc = new CountingHitCollector();
248: s.search(bq, hc);
249: nMatches += hc.getCount();
250: ret += hc.getSum();
251: }
252: System.out.println("Average number of matches="
253: + (nMatches / iter));
254:
255: return ret;
256: }
257:
258: public int doNestedTermConjunctions(IndexSearcher s,
259: int termsInIndex, int maxOuterClauses, int maxClauses,
260: int iter) throws IOException {
261: int ret = 0;
262: long nMatches = 0;
263: for (int i = 0; i < iter; i++) {
264: int oClauses = r.nextInt(maxOuterClauses - 1) + 2;
265: BooleanQuery oq = new BooleanQuery();
266: for (int o = 0; o < oClauses; o++) {
267:
268: int nClauses = r.nextInt(maxClauses - 1) + 2; // min 2 clauses
269: BooleanQuery bq = new BooleanQuery();
270: BitSet termflag = new BitSet(termsInIndex);
271: for (int j = 0; j < nClauses; j++) {
272: int tnum;
273: // don't pick same clause twice
274: tnum = r.nextInt(termsInIndex);
275: if (termflag.get(tnum))
276: tnum = termflag.nextClearBit(tnum);
277: if (tnum < 0 || tnum >= 25)
278: tnum = termflag.nextClearBit(0);
279: termflag.set(tnum);
280: Query tq = new TermQuery(terms[tnum]);
281: bq.add(tq, BooleanClause.Occur.MUST);
282: } // inner
283:
284: oq.add(bq, BooleanClause.Occur.MUST);
285: } // outer
286:
287: CountingHitCollector hc = new CountingHitCollector();
288: s.search(oq, hc);
289: nMatches += hc.getCount();
290: ret += hc.getSum();
291: }
292: System.out.println("Average number of matches="
293: + (nMatches / iter));
294: return ret;
295: }
296:
297: public int doSloppyPhrase(IndexSearcher s, int termsInIndex,
298: int maxClauses, int iter) throws IOException {
299: int ret = 0;
300:
301: for (int i = 0; i < iter; i++) {
302: int nClauses = r.nextInt(maxClauses - 1) + 2; // min 2 clauses
303: PhraseQuery q = new PhraseQuery();
304: for (int j = 0; j < nClauses; j++) {
305: int tnum = r.nextInt(termsInIndex);
306: q.add(new Term("f", Character
307: .toString((char) (tnum + 'A'))), j);
308: }
309: q.setSlop(termsInIndex); // this could be random too
310:
311: CountingHitCollector hc = new CountingHitCollector();
312: s.search(q, hc);
313: ret += hc.getSum();
314: }
315:
316: return ret;
317: }
318:
319: public void testConjunctions() throws Exception {
320: // test many small sets... the bugs will be found on boundary conditions
321: createDummySearcher();
322: validate = true;
323: sets = randBitSets(1000, 10);
324: doConjunctions(10000, 5);
325: doNestedConjunctions(10000, 3, 3);
326: s.close();
327: }
328:
329: /***
330: int bigIter=10;
331:
332: public void testConjunctionPerf() throws Exception {
333: createDummySearcher();
334: validate=false;
335: sets=randBitSets(32,1000000);
336: for (int i=0; i<bigIter; i++) {
337: long start = System.currentTimeMillis();
338: doConjunctions(500,6);
339: long end = System.currentTimeMillis();
340: System.out.println("milliseconds="+(end-start));
341: }
342: s.close();
343: }
344:
345: public void testNestedConjunctionPerf() throws Exception {
346: createDummySearcher();
347: validate=false;
348: sets=randBitSets(32,1000000);
349: for (int i=0; i<bigIter; i++) {
350: long start = System.currentTimeMillis();
351: doNestedConjunctions(500,3,3);
352: long end = System.currentTimeMillis();
353: System.out.println("milliseconds="+(end-start));
354: }
355: s.close();
356: }
357:
358:
359: public void testConjunctionTerms() throws Exception {
360: validate=false;
361: RAMDirectory dir = new RAMDirectory();
362: System.out.println("Creating index");
363: createRandomTerms(100000,25,.5, dir);
364: s = new IndexSearcher(dir);
365: System.out.println("Starting performance test");
366: for (int i=0; i<bigIter; i++) {
367: long start = System.currentTimeMillis();
368: doTermConjunctions(s,25,5,1000);
369: long end = System.currentTimeMillis();
370: System.out.println("milliseconds="+(end-start));
371: }
372: s.close();
373: }
374:
375: public void testNestedConjunctionTerms() throws Exception {
376: validate=false;
377: RAMDirectory dir = new RAMDirectory();
378: System.out.println("Creating index");
379: createRandomTerms(100000,25,.2, dir);
380: s = new IndexSearcher(dir);
381: System.out.println("Starting performance test");
382: for (int i=0; i<bigIter; i++) {
383: long start = System.currentTimeMillis();
384: doNestedTermConjunctions(s,25,3,3,200);
385: long end = System.currentTimeMillis();
386: System.out.println("milliseconds="+(end-start));
387: }
388: s.close();
389: }
390:
391:
392: public void testSloppyPhrasePerf() throws Exception {
393: validate=false;
394: RAMDirectory dir = new RAMDirectory();
395: System.out.println("Creating index");
396: createRandomTerms(100000,25,2,dir);
397: s = new IndexSearcher(dir);
398: System.out.println("Starting performance test");
399: for (int i=0; i<bigIter; i++) {
400: long start = System.currentTimeMillis();
401: doSloppyPhrase(s,25,2,1000);
402: long end = System.currentTimeMillis();
403: System.out.println("milliseconds="+(end-start));
404: }
405: s.close();
406: }
407: ***/
408:
409: }
|