001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.util.LuceneTestCase;
021: import org.apache.lucene.store.RAMDirectory;
022: import org.apache.lucene.store.MockRAMDirectory;
023: import org.apache.lucene.store.Directory;
024: import org.apache.lucene.analysis.WhitespaceAnalyzer;
025: import org.apache.lucene.document.Document;
026: import org.apache.lucene.document.Field;
027:
028: import java.io.IOException;
029: import org.apache.lucene.search.Similarity;
030:
031: public class TestSegmentTermDocs extends LuceneTestCase {
032: private Document testDoc = new Document();
033: private Directory dir = new RAMDirectory();
034: private SegmentInfo info;
035:
036: public TestSegmentTermDocs(String s) {
037: super (s);
038: }
039:
040: protected void setUp() throws Exception {
041: super .setUp();
042: DocHelper.setupDoc(testDoc);
043: info = DocHelper.writeDoc(dir, testDoc);
044: }
045:
046: public void test() {
047: assertTrue(dir != null);
048: }
049:
050: public void testTermDocs() throws IOException {
051: testTermDocs(1);
052: }
053:
054: public void testTermDocs(int indexDivisor) throws IOException {
055: //After adding the document, we should be able to read it back in
056: SegmentReader reader = SegmentReader.get(info);
057: reader.setTermInfosIndexDivisor(indexDivisor);
058: assertTrue(reader != null);
059: SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
060: assertTrue(segTermDocs != null);
061: segTermDocs.seek(new Term(DocHelper.TEXT_FIELD_2_KEY, "field"));
062: if (segTermDocs.next() == true) {
063: int docId = segTermDocs.doc();
064: assertTrue(docId == 0);
065: int freq = segTermDocs.freq();
066: assertTrue(freq == 3);
067: }
068: reader.close();
069: }
070:
071: public void testBadSeek() throws IOException {
072: testBadSeek(1);
073: }
074:
075: public void testBadSeek(int indexDivisor) throws IOException {
076: {
077: //After adding the document, we should be able to read it back in
078: SegmentReader reader = SegmentReader.get(info);
079: reader.setTermInfosIndexDivisor(indexDivisor);
080: assertTrue(reader != null);
081: SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
082: assertTrue(segTermDocs != null);
083: segTermDocs.seek(new Term("textField2", "bad"));
084: assertTrue(segTermDocs.next() == false);
085: reader.close();
086: }
087: {
088: //After adding the document, we should be able to read it back in
089: SegmentReader reader = SegmentReader.get(info);
090: reader.setTermInfosIndexDivisor(indexDivisor);
091: assertTrue(reader != null);
092: SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
093: assertTrue(segTermDocs != null);
094: segTermDocs.seek(new Term("junk", "bad"));
095: assertTrue(segTermDocs.next() == false);
096: reader.close();
097: }
098: }
099:
100: public void testSkipTo() throws IOException {
101: testSkipTo(1);
102: }
103:
104: public void testSkipTo(int indexDivisor) throws IOException {
105: Directory dir = new RAMDirectory();
106: IndexWriter writer = new IndexWriter(dir,
107: new WhitespaceAnalyzer(), true);
108:
109: Term ta = new Term("content", "aaa");
110: for (int i = 0; i < 10; i++)
111: addDoc(writer, "aaa aaa aaa aaa");
112:
113: Term tb = new Term("content", "bbb");
114: for (int i = 0; i < 16; i++)
115: addDoc(writer, "bbb bbb bbb bbb");
116:
117: Term tc = new Term("content", "ccc");
118: for (int i = 0; i < 50; i++)
119: addDoc(writer, "ccc ccc ccc ccc");
120:
121: // assure that we deal with a single segment
122: writer.optimize();
123: writer.close();
124:
125: IndexReader reader = IndexReader.open(dir);
126: reader.setTermInfosIndexDivisor(indexDivisor);
127: assertEquals(indexDivisor, reader.getTermInfosIndexDivisor());
128:
129: TermDocs tdocs = reader.termDocs();
130:
131: // without optimization (assumption skipInterval == 16)
132:
133: // with next
134: tdocs.seek(ta);
135: assertTrue(tdocs.next());
136: assertEquals(0, tdocs.doc());
137: assertEquals(4, tdocs.freq());
138: assertTrue(tdocs.next());
139: assertEquals(1, tdocs.doc());
140: assertEquals(4, tdocs.freq());
141: assertTrue(tdocs.skipTo(0));
142: assertEquals(2, tdocs.doc());
143: assertTrue(tdocs.skipTo(4));
144: assertEquals(4, tdocs.doc());
145: assertTrue(tdocs.skipTo(9));
146: assertEquals(9, tdocs.doc());
147: assertFalse(tdocs.skipTo(10));
148:
149: // without next
150: tdocs.seek(ta);
151: assertTrue(tdocs.skipTo(0));
152: assertEquals(0, tdocs.doc());
153: assertTrue(tdocs.skipTo(4));
154: assertEquals(4, tdocs.doc());
155: assertTrue(tdocs.skipTo(9));
156: assertEquals(9, tdocs.doc());
157: assertFalse(tdocs.skipTo(10));
158:
159: // exactly skipInterval documents and therefore with optimization
160:
161: // with next
162: tdocs.seek(tb);
163: assertTrue(tdocs.next());
164: assertEquals(10, tdocs.doc());
165: assertEquals(4, tdocs.freq());
166: assertTrue(tdocs.next());
167: assertEquals(11, tdocs.doc());
168: assertEquals(4, tdocs.freq());
169: assertTrue(tdocs.skipTo(5));
170: assertEquals(12, tdocs.doc());
171: assertTrue(tdocs.skipTo(15));
172: assertEquals(15, tdocs.doc());
173: assertTrue(tdocs.skipTo(24));
174: assertEquals(24, tdocs.doc());
175: assertTrue(tdocs.skipTo(25));
176: assertEquals(25, tdocs.doc());
177: assertFalse(tdocs.skipTo(26));
178:
179: // without next
180: tdocs.seek(tb);
181: assertTrue(tdocs.skipTo(5));
182: assertEquals(10, tdocs.doc());
183: assertTrue(tdocs.skipTo(15));
184: assertEquals(15, tdocs.doc());
185: assertTrue(tdocs.skipTo(24));
186: assertEquals(24, tdocs.doc());
187: assertTrue(tdocs.skipTo(25));
188: assertEquals(25, tdocs.doc());
189: assertFalse(tdocs.skipTo(26));
190:
191: // much more than skipInterval documents and therefore with optimization
192:
193: // with next
194: tdocs.seek(tc);
195: assertTrue(tdocs.next());
196: assertEquals(26, tdocs.doc());
197: assertEquals(4, tdocs.freq());
198: assertTrue(tdocs.next());
199: assertEquals(27, tdocs.doc());
200: assertEquals(4, tdocs.freq());
201: assertTrue(tdocs.skipTo(5));
202: assertEquals(28, tdocs.doc());
203: assertTrue(tdocs.skipTo(40));
204: assertEquals(40, tdocs.doc());
205: assertTrue(tdocs.skipTo(57));
206: assertEquals(57, tdocs.doc());
207: assertTrue(tdocs.skipTo(74));
208: assertEquals(74, tdocs.doc());
209: assertTrue(tdocs.skipTo(75));
210: assertEquals(75, tdocs.doc());
211: assertFalse(tdocs.skipTo(76));
212:
213: //without next
214: tdocs.seek(tc);
215: assertTrue(tdocs.skipTo(5));
216: assertEquals(26, tdocs.doc());
217: assertTrue(tdocs.skipTo(40));
218: assertEquals(40, tdocs.doc());
219: assertTrue(tdocs.skipTo(57));
220: assertEquals(57, tdocs.doc());
221: assertTrue(tdocs.skipTo(74));
222: assertEquals(74, tdocs.doc());
223: assertTrue(tdocs.skipTo(75));
224: assertEquals(75, tdocs.doc());
225: assertFalse(tdocs.skipTo(76));
226:
227: tdocs.close();
228: reader.close();
229: dir.close();
230: }
231:
232: public void testIndexDivisor() throws IOException {
233: dir = new MockRAMDirectory();
234: testDoc = new Document();
235: DocHelper.setupDoc(testDoc);
236: DocHelper.writeDoc(dir, testDoc);
237: testTermDocs(2);
238: testBadSeek(2);
239: testSkipTo(2);
240: }
241:
242: public void testIndexDivisorAfterLoad() throws IOException {
243: dir = new MockRAMDirectory();
244: testDoc = new Document();
245: DocHelper.setupDoc(testDoc);
246: SegmentInfo si = DocHelper.writeDoc(dir, testDoc);
247: SegmentReader reader = SegmentReader.get(si);
248: assertEquals(1, reader.docFreq(new Term("keyField", "Keyword")));
249: try {
250: reader.setTermInfosIndexDivisor(2);
251: fail("did not hit IllegalStateException exception");
252: } catch (IllegalStateException ise) {
253: // expected
254: }
255: }
256:
257: private void addDoc(IndexWriter writer, String value)
258: throws IOException {
259: Document doc = new Document();
260: doc.add(new Field("content", value, Field.Store.NO,
261: Field.Index.TOKENIZED));
262: writer.addDocument(doc);
263: }
264: }
|