001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021:
022: import org.apache.lucene.analysis.WhitespaceAnalyzer;
023: import org.apache.lucene.document.Document;
024: import org.apache.lucene.document.Field;
025: import org.apache.lucene.store.Directory;
026: import org.apache.lucene.store.RAMDirectory;
027: import org.apache.lucene.util._TestUtil;
028:
029: import org.apache.lucene.util.LuceneTestCase;
030:
031: public class TestIndexWriterMergePolicy extends LuceneTestCase {
032:
033: // Test the normal case
034: public void testNormalCase() throws IOException {
035: Directory dir = new RAMDirectory();
036:
037: IndexWriter writer = new IndexWriter(dir,
038: new WhitespaceAnalyzer(), true);
039: writer.setMaxBufferedDocs(10);
040: writer.setMergeFactor(10);
041: writer.setMergePolicy(new LogDocMergePolicy());
042:
043: for (int i = 0; i < 100; i++) {
044: addDoc(writer);
045: checkInvariants(writer);
046: }
047:
048: writer.close();
049: }
050:
051: // Test to see if there is over merge
052: public void testNoOverMerge() throws IOException {
053: Directory dir = new RAMDirectory();
054:
055: IndexWriter writer = new IndexWriter(dir,
056: new WhitespaceAnalyzer(), true);
057: writer.setMaxBufferedDocs(10);
058: writer.setMergeFactor(10);
059: writer.setMergePolicy(new LogDocMergePolicy());
060:
061: boolean noOverMerge = false;
062: for (int i = 0; i < 100; i++) {
063: addDoc(writer);
064: checkInvariants(writer);
065: if (writer.getNumBufferedDocuments()
066: + writer.getSegmentCount() >= 18) {
067: noOverMerge = true;
068: }
069: }
070: assertTrue(noOverMerge);
071:
072: writer.close();
073: }
074:
075: // Test the case where flush is forced after every addDoc
076: public void testForceFlush() throws IOException {
077: Directory dir = new RAMDirectory();
078:
079: IndexWriter writer = new IndexWriter(dir,
080: new WhitespaceAnalyzer(), true);
081: writer.setMaxBufferedDocs(10);
082: writer.setMergeFactor(10);
083: LogDocMergePolicy mp = new LogDocMergePolicy();
084: mp.setMinMergeDocs(100);
085: writer.setMergePolicy(mp);
086:
087: for (int i = 0; i < 100; i++) {
088: addDoc(writer);
089: writer.close();
090:
091: writer = new IndexWriter(dir, new WhitespaceAnalyzer(),
092: false);
093: writer.setMaxBufferedDocs(10);
094: writer.setMergePolicy(mp);
095: mp.setMinMergeDocs(100);
096: writer.setMergeFactor(10);
097: checkInvariants(writer);
098: }
099:
100: writer.close();
101: }
102:
103: // Test the case where mergeFactor changes
104: public void testMergeFactorChange() throws IOException {
105: Directory dir = new RAMDirectory();
106:
107: IndexWriter writer = new IndexWriter(dir,
108: new WhitespaceAnalyzer(), true);
109: writer.setMaxBufferedDocs(10);
110: writer.setMergeFactor(100);
111: writer.setMergePolicy(new LogDocMergePolicy());
112:
113: for (int i = 0; i < 250; i++) {
114: addDoc(writer);
115: checkInvariants(writer);
116: }
117:
118: writer.setMergeFactor(5);
119:
120: // merge policy only fixes segments on levels where merges
121: // have been triggered, so check invariants after all adds
122: for (int i = 0; i < 10; i++) {
123: addDoc(writer);
124: }
125: checkInvariants(writer);
126:
127: writer.close();
128: }
129:
130: // Test the case where both mergeFactor and maxBufferedDocs change
131: public void testMaxBufferedDocsChange() throws IOException {
132: Directory dir = new RAMDirectory();
133:
134: IndexWriter writer = new IndexWriter(dir,
135: new WhitespaceAnalyzer(), true);
136: writer.setMaxBufferedDocs(101);
137: writer.setMergeFactor(101);
138: writer.setMergePolicy(new LogDocMergePolicy());
139:
140: // leftmost* segment has 1 doc
141: // rightmost* segment has 100 docs
142: for (int i = 1; i <= 100; i++) {
143: for (int j = 0; j < i; j++) {
144: addDoc(writer);
145: checkInvariants(writer);
146: }
147: writer.close();
148:
149: writer = new IndexWriter(dir, new WhitespaceAnalyzer(),
150: false);
151: writer.setMaxBufferedDocs(101);
152: writer.setMergeFactor(101);
153: writer.setMergePolicy(new LogDocMergePolicy());
154: }
155:
156: writer.setMaxBufferedDocs(10);
157: writer.setMergeFactor(10);
158:
159: // merge policy only fixes segments on levels where merges
160: // have been triggered, so check invariants after all adds
161: for (int i = 0; i < 100; i++) {
162: addDoc(writer);
163: }
164: checkInvariants(writer);
165:
166: for (int i = 100; i < 1000; i++) {
167: addDoc(writer);
168: }
169: checkInvariants(writer);
170:
171: writer.close();
172: }
173:
174: // Test the case where a merge results in no doc at all
175: public void testMergeDocCount0() throws IOException {
176: Directory dir = new RAMDirectory();
177:
178: IndexWriter writer = new IndexWriter(dir,
179: new WhitespaceAnalyzer(), true);
180: writer.setMergePolicy(new LogDocMergePolicy());
181: writer.setMaxBufferedDocs(10);
182: writer.setMergeFactor(100);
183:
184: for (int i = 0; i < 250; i++) {
185: addDoc(writer);
186: checkInvariants(writer);
187: }
188: writer.close();
189:
190: IndexReader reader = IndexReader.open(dir);
191: reader.deleteDocuments(new Term("content", "aaa"));
192: reader.close();
193:
194: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
195: writer.setMergePolicy(new LogDocMergePolicy());
196: writer.setMaxBufferedDocs(10);
197: writer.setMergeFactor(5);
198:
199: // merge factor is changed, so check invariants after all adds
200: for (int i = 0; i < 10; i++) {
201: addDoc(writer);
202: }
203: checkInvariants(writer);
204: assertEquals(10, writer.docCount());
205:
206: writer.close();
207: }
208:
209: private void addDoc(IndexWriter writer) throws IOException {
210: Document doc = new Document();
211: doc.add(new Field("content", "aaa", Field.Store.NO,
212: Field.Index.TOKENIZED));
213: writer.addDocument(doc);
214: }
215:
216: private void checkInvariants(IndexWriter writer) throws IOException {
217: _TestUtil.syncConcurrentMerges(writer);
218: int maxBufferedDocs = writer.getMaxBufferedDocs();
219: int mergeFactor = writer.getMergeFactor();
220: int maxMergeDocs = writer.getMaxMergeDocs();
221:
222: int ramSegmentCount = writer.getNumBufferedDocuments();
223: assertTrue(ramSegmentCount < maxBufferedDocs);
224:
225: int lowerBound = -1;
226: int upperBound = maxBufferedDocs;
227: int numSegments = 0;
228:
229: int segmentCount = writer.getSegmentCount();
230: for (int i = segmentCount - 1; i >= 0; i--) {
231: int docCount = writer.getDocCount(i);
232: assertTrue(docCount > lowerBound);
233:
234: if (docCount <= upperBound) {
235: numSegments++;
236: } else {
237: if (upperBound * mergeFactor <= maxMergeDocs) {
238: assertTrue(numSegments < mergeFactor);
239: }
240:
241: do {
242: lowerBound = upperBound;
243: upperBound *= mergeFactor;
244: } while (docCount > upperBound);
245: numSegments = 1;
246: }
247: }
248: if (upperBound * mergeFactor <= maxMergeDocs) {
249: assertTrue(numSegments < mergeFactor);
250: }
251:
252: String[] files = writer.getDirectory().list();
253: int segmentCfsCount = 0;
254: for (int i = 0; i < files.length; i++) {
255: if (files[i].endsWith(".cfs")) {
256: segmentCfsCount++;
257: }
258: }
259: assertEquals(segmentCount, segmentCfsCount);
260: }
261:
262: private void printSegmentDocCounts(IndexWriter writer) {
263: int segmentCount = writer.getSegmentCount();
264: System.out.println("" + segmentCount + " segments total");
265: for (int i = 0; i < segmentCount; i++) {
266: System.out.println(" segment " + i + " has "
267: + writer.getDocCount(i) + " docs");
268: }
269: }
270: }
|