001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021:
022: import org.apache.lucene.util.LuceneTestCase;
023:
024: import org.apache.lucene.analysis.WhitespaceAnalyzer;
025: import org.apache.lucene.document.Document;
026: import org.apache.lucene.document.Field;
027: import org.apache.lucene.index.IndexReader;
028: import org.apache.lucene.index.IndexWriter;
029: import org.apache.lucene.index.Term;
030: import org.apache.lucene.index.TermEnum;
031: import org.apache.lucene.store.Directory;
032: import org.apache.lucene.store.RAMDirectory;
033: import org.apache.lucene.store.MockRAMDirectory;
034:
035: /**
036: * @author goller
037: */
038: public class TestSegmentTermEnum extends LuceneTestCase {
039: Directory dir = new RAMDirectory();
040:
041: public void testTermEnum() throws IOException {
042: IndexWriter writer = null;
043:
044: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
045:
046: // add 100 documents with term : aaa
047: // add 100 documents with terms: aaa bbb
048: // Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100
049: for (int i = 0; i < 100; i++) {
050: addDoc(writer, "aaa");
051: addDoc(writer, "aaa bbb");
052: }
053:
054: writer.close();
055:
056: // verify document frequency of terms in an unoptimized index
057: verifyDocFreq();
058:
059: // merge segments by optimizing the index
060: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
061: writer.optimize();
062: writer.close();
063:
064: // verify document frequency of terms in an optimized index
065: verifyDocFreq();
066: }
067:
068: public void testPrevTermAtEnd() throws IOException {
069: Directory dir = new MockRAMDirectory();
070: IndexWriter writer = new IndexWriter(dir,
071: new WhitespaceAnalyzer(), true);
072: addDoc(writer, "aaa bbb");
073: writer.close();
074: IndexReader reader = IndexReader.open(dir);
075: SegmentTermEnum termEnum = (SegmentTermEnum) reader.terms();
076: assertTrue(termEnum.next());
077: assertEquals("aaa", termEnum.term().text());
078: assertTrue(termEnum.next());
079: assertEquals("aaa", termEnum.prev().text());
080: assertEquals("bbb", termEnum.term().text());
081: assertFalse(termEnum.next());
082: assertEquals("bbb", termEnum.prev().text());
083: }
084:
085: private void verifyDocFreq() throws IOException {
086: IndexReader reader = IndexReader.open(dir);
087: TermEnum termEnum = null;
088:
089: // create enumeration of all terms
090: termEnum = reader.terms();
091: // go to the first term (aaa)
092: termEnum.next();
093: // assert that term is 'aaa'
094: assertEquals("aaa", termEnum.term().text());
095: assertEquals(200, termEnum.docFreq());
096: // go to the second term (bbb)
097: termEnum.next();
098: // assert that term is 'bbb'
099: assertEquals("bbb", termEnum.term().text());
100: assertEquals(100, termEnum.docFreq());
101:
102: termEnum.close();
103:
104: // create enumeration of terms after term 'aaa', including 'aaa'
105: termEnum = reader.terms(new Term("content", "aaa"));
106: // assert that term is 'aaa'
107: assertEquals("aaa", termEnum.term().text());
108: assertEquals(200, termEnum.docFreq());
109: // go to term 'bbb'
110: termEnum.next();
111: // assert that term is 'bbb'
112: assertEquals("bbb", termEnum.term().text());
113: assertEquals(100, termEnum.docFreq());
114:
115: termEnum.close();
116: }
117:
118: private void addDoc(IndexWriter writer, String value)
119: throws IOException {
120: Document doc = new Document();
121: doc.add(new Field("content", value, Field.Store.NO,
122: Field.Index.TOKENIZED));
123: writer.addDocument(doc);
124: }
125: }
|