001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021:
022: import org.apache.lucene.analysis.WhitespaceAnalyzer;
023: import org.apache.lucene.document.Document;
024: import org.apache.lucene.document.Field;
025: import org.apache.lucene.search.Hits;
026: import org.apache.lucene.search.IndexSearcher;
027: import org.apache.lucene.search.PhraseQuery;
028: import org.apache.lucene.search.Searcher;
029: import org.apache.lucene.store.Directory;
030: import org.apache.lucene.store.IndexInput;
031: import org.apache.lucene.store.RAMDirectory;
032:
033: import org.apache.lucene.util.LuceneTestCase;
034:
035: /**
036: * Tests lazy skipping on the proximity file.
037: *
038: */
039: public class TestLazyProxSkipping extends LuceneTestCase {
040: private Searcher searcher;
041: private int seeksCounter = 0;
042:
043: private String field = "tokens";
044: private String term1 = "xx";
045: private String term2 = "yy";
046: private String term3 = "zz";
047:
048: private void createIndex(int numHits) throws IOException {
049: int numDocs = 500;
050:
051: Directory directory = new RAMDirectory();
052: IndexWriter writer = new IndexWriter(directory,
053: new WhitespaceAnalyzer(), true);
054: writer.setMaxBufferedDocs(10);
055: for (int i = 0; i < numDocs; i++) {
056: Document doc = new Document();
057: String content;
058: if (i % (numDocs / numHits) == 0) {
059: // add a document that matches the query "term1 term2"
060: content = this .term1 + " " + this .term2;
061: } else if (i % 15 == 0) {
062: // add a document that only contains term1
063: content = this .term1 + " " + this .term1;
064: } else {
065: // add a document that contains term2 but not term 1
066: content = this .term3 + " " + this .term2;
067: }
068:
069: doc.add(new Field(this .field, content, Field.Store.YES,
070: Field.Index.TOKENIZED));
071: writer.addDocument(doc);
072: }
073:
074: // make sure the index has only a single segment
075: writer.optimize();
076: writer.close();
077:
078: // the index is a single segment, thus IndexReader.open() returns an instance of SegmentReader
079: SegmentReader reader = (SegmentReader) IndexReader
080: .open(directory);
081:
082: // we decorate the proxStream with a wrapper class that allows to count the number of calls of seek()
083: reader.proxStream = new SeeksCountingStream(reader.proxStream);
084:
085: this .searcher = new IndexSearcher(reader);
086: }
087:
088: private Hits search() throws IOException {
089: // create PhraseQuery "term1 term2" and search
090: PhraseQuery pq = new PhraseQuery();
091: pq.add(new Term(this .field, this .term1));
092: pq.add(new Term(this .field, this .term2));
093: return this .searcher.search(pq);
094: }
095:
096: private void performTest(int numHits) throws IOException {
097: createIndex(numHits);
098: this .seeksCounter = 0;
099: Hits hits = search();
100: // verify that the right number of docs was found
101: assertEquals(numHits, hits.length());
102:
103: // check if the number of calls of seek() does not exceed the number of hits
104: assertTrue(this .seeksCounter <= numHits + 1);
105: }
106:
107: public void testLazySkipping() throws IOException {
108: // test whether only the minimum amount of seeks() are performed
109: performTest(5);
110: performTest(10);
111: }
112:
113: public void testSeek() throws IOException {
114: Directory directory = new RAMDirectory();
115: IndexWriter writer = new IndexWriter(directory,
116: new WhitespaceAnalyzer(), true);
117: for (int i = 0; i < 10; i++) {
118: Document doc = new Document();
119: doc.add(new Field(this .field, "a b", Field.Store.YES,
120: Field.Index.TOKENIZED));
121: writer.addDocument(doc);
122: }
123:
124: writer.close();
125: IndexReader reader = IndexReader.open(directory);
126: TermPositions tp = reader.termPositions();
127: tp.seek(new Term(this .field, "b"));
128: for (int i = 0; i < 10; i++) {
129: tp.next();
130: assertEquals(tp.doc(), i);
131: assertEquals(tp.nextPosition(), 1);
132: }
133: tp.seek(new Term(this .field, "a"));
134: for (int i = 0; i < 10; i++) {
135: tp.next();
136: assertEquals(tp.doc(), i);
137: assertEquals(tp.nextPosition(), 0);
138: }
139:
140: }
141:
142: // Simply extends IndexInput in a way that we are able to count the number
143: // of invocations of seek()
144: class SeeksCountingStream extends IndexInput {
145: private IndexInput input;
146:
147: SeeksCountingStream(IndexInput input) {
148: this .input = input;
149: }
150:
151: public byte readByte() throws IOException {
152: return this .input.readByte();
153: }
154:
155: public void readBytes(byte[] b, int offset, int len)
156: throws IOException {
157: this .input.readBytes(b, offset, len);
158: }
159:
160: public void close() throws IOException {
161: this .input.close();
162: }
163:
164: public long getFilePointer() {
165: return this .input.getFilePointer();
166: }
167:
168: public void seek(long pos) throws IOException {
169: TestLazyProxSkipping.this .seeksCounter++;
170: this .input.seek(pos);
171: }
172:
173: public long length() {
174: return this .input.length();
175: }
176:
177: public Object clone() {
178: return new SeeksCountingStream((IndexInput) this.input
179: .clone());
180: }
181:
182: }
183: }
|