001: package org.apache.lucene.index;
002:
003: /**
004: * Copyright 2006 The Apache Software Foundation
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: import org.apache.lucene.util.LuceneTestCase;
020: import org.apache.lucene.store.Directory;
021: import org.apache.lucene.store.RAMDirectory;
022: import org.apache.lucene.analysis.Analyzer;
023: import org.apache.lucene.analysis.TokenStream;
024: import org.apache.lucene.analysis.Token;
025: import org.apache.lucene.document.Document;
026: import org.apache.lucene.document.Field;
027:
028: import java.io.Reader;
029: import java.io.IOException;
030: import java.util.Random;
031:
032: /**
033: * @author yonik
034: * @version $Id$
035: */
036:
037: class RepeatingTokenStream extends TokenStream {
038: public int num;
039: Token t;
040:
041: public RepeatingTokenStream(String val) {
042: t = new Token(val, 0, val.length());
043: }
044:
045: public Token next() throws IOException {
046: return --num < 0 ? null : t;
047: }
048: }
049:
050: public class TestTermdocPerf extends LuceneTestCase {
051:
052: void addDocs(Directory dir, final int ndocs, String field,
053: final String val, final int maxTF, final float percentDocs)
054: throws IOException {
055: final Random random = new Random(0);
056: final RepeatingTokenStream ts = new RepeatingTokenStream(val);
057:
058: Analyzer analyzer = new Analyzer() {
059: public TokenStream tokenStream(String fieldName,
060: Reader reader) {
061: if (random.nextFloat() < percentDocs)
062: ts.num = random.nextInt(maxTF) + 1;
063: else
064: ts.num = 0;
065: return ts;
066: }
067: };
068:
069: Document doc = new Document();
070: doc.add(new Field(field, val, Field.Store.NO,
071: Field.Index.NO_NORMS));
072: IndexWriter writer = new IndexWriter(dir, analyzer, true);
073: writer.setMaxBufferedDocs(100);
074: writer.setMergeFactor(100);
075:
076: for (int i = 0; i < ndocs; i++) {
077: writer.addDocument(doc);
078: }
079:
080: writer.optimize();
081: writer.close();
082: }
083:
084: public int doTest(int iter, int ndocs, int maxTF, float percentDocs)
085: throws IOException {
086: Directory dir = new RAMDirectory();
087:
088: long start = System.currentTimeMillis();
089: addDocs(dir, ndocs, "foo", "val", maxTF, percentDocs);
090: long end = System.currentTimeMillis();
091: System.out.println("milliseconds for creation of " + ndocs
092: + " docs = " + (end - start));
093:
094: IndexReader reader = IndexReader.open(dir);
095: TermEnum tenum = reader.terms(new Term("foo", "val"));
096: TermDocs tdocs = reader.termDocs();
097:
098: start = System.currentTimeMillis();
099:
100: int ret = 0;
101: for (int i = 0; i < iter; i++) {
102: tdocs.seek(tenum);
103: while (tdocs.next()) {
104: ret += tdocs.doc();
105: }
106: }
107:
108: end = System.currentTimeMillis();
109: System.out.println("milliseconds for " + iter
110: + " TermDocs iteration: " + (end - start));
111:
112: return ret;
113: }
114:
115: public void testTermDocPerf() throws IOException {
116: // performance test for 10% of documents containing a term
117: // doTest(100000, 10000,3,.1f);
118: }
119:
120: }
|