001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021:
022: import org.apache.lucene.util.LuceneTestCase;
023:
024: import org.apache.lucene.analysis.WhitespaceAnalyzer;
025: import org.apache.lucene.document.Document;
026: import org.apache.lucene.document.Field;
027: import org.apache.lucene.index.IndexReader;
028: import org.apache.lucene.index.IndexWriter;
029: import org.apache.lucene.store.Directory;
030: import org.apache.lucene.store.RAMDirectory;
031:
032: public class TestAddIndexesNoOptimize extends LuceneTestCase {
033: public void testSimpleCase() throws IOException {
034: // main directory
035: Directory dir = new RAMDirectory();
036: // two auxiliary directories
037: Directory aux = new RAMDirectory();
038: Directory aux2 = new RAMDirectory();
039:
040: IndexWriter writer = null;
041:
042: writer = newWriter(dir, true);
043: // add 100 documents
044: addDocs(writer, 100);
045: assertEquals(100, writer.docCount());
046: writer.close();
047:
048: writer = newWriter(aux, true);
049: writer.setUseCompoundFile(false); // use one without a compound file
050: // add 40 documents in separate files
051: addDocs(writer, 40);
052: assertEquals(40, writer.docCount());
053: writer.close();
054:
055: writer = newWriter(aux2, true);
056: // add 40 documents in compound files
057: addDocs2(writer, 50);
058: assertEquals(50, writer.docCount());
059: writer.close();
060:
061: // test doc count before segments are merged
062: writer = newWriter(dir, false);
063: assertEquals(100, writer.docCount());
064: writer.addIndexesNoOptimize(new Directory[] { aux, aux2 });
065: assertEquals(190, writer.docCount());
066: writer.close();
067:
068: // make sure the old index is correct
069: verifyNumDocs(aux, 40);
070:
071: // make sure the new index is correct
072: verifyNumDocs(dir, 190);
073:
074: // now add another set in.
075: Directory aux3 = new RAMDirectory();
076: writer = newWriter(aux3, true);
077: // add 40 documents
078: addDocs(writer, 40);
079: assertEquals(40, writer.docCount());
080: writer.close();
081:
082: // test doc count before segments are merged/index is optimized
083: writer = newWriter(dir, false);
084: assertEquals(190, writer.docCount());
085: writer.addIndexesNoOptimize(new Directory[] { aux3 });
086: assertEquals(230, writer.docCount());
087: writer.close();
088:
089: // make sure the new index is correct
090: verifyNumDocs(dir, 230);
091:
092: verifyTermDocs(dir, new Term("content", "aaa"), 180);
093:
094: verifyTermDocs(dir, new Term("content", "bbb"), 50);
095:
096: // now optimize it.
097: writer = newWriter(dir, false);
098: writer.optimize();
099: writer.close();
100:
101: // make sure the new index is correct
102: verifyNumDocs(dir, 230);
103:
104: verifyTermDocs(dir, new Term("content", "aaa"), 180);
105:
106: verifyTermDocs(dir, new Term("content", "bbb"), 50);
107:
108: // now add a single document
109: Directory aux4 = new RAMDirectory();
110: writer = newWriter(aux4, true);
111: addDocs2(writer, 1);
112: writer.close();
113:
114: writer = newWriter(dir, false);
115: assertEquals(230, writer.docCount());
116: writer.addIndexesNoOptimize(new Directory[] { aux4 });
117: assertEquals(231, writer.docCount());
118: writer.close();
119:
120: verifyNumDocs(dir, 231);
121:
122: verifyTermDocs(dir, new Term("content", "bbb"), 51);
123: }
124:
125: // case 0: add self or exceed maxMergeDocs, expect exception
126: public void testAddSelf() throws IOException {
127: // main directory
128: Directory dir = new RAMDirectory();
129: // auxiliary directory
130: Directory aux = new RAMDirectory();
131:
132: IndexWriter writer = null;
133:
134: writer = newWriter(dir, true);
135: // add 100 documents
136: addDocs(writer, 100);
137: assertEquals(100, writer.docCount());
138: writer.close();
139:
140: writer = newWriter(aux, true);
141: writer.setUseCompoundFile(false); // use one without a compound file
142: writer.setMaxBufferedDocs(1000);
143: // add 140 documents in separate files
144: addDocs(writer, 40);
145: writer.close();
146: writer = newWriter(aux, true);
147: writer.setUseCompoundFile(false); // use one without a compound file
148: writer.setMaxBufferedDocs(1000);
149: addDocs(writer, 100);
150: writer.close();
151:
152: writer = newWriter(dir, false);
153: int maxMergeDocs = writer.getMaxMergeDocs();
154: writer.setMaxMergeDocs(99);
155:
156: try {
157: // upper bound cannot exceed maxMergeDocs
158: writer.addIndexesNoOptimize(new Directory[] { aux });
159: assertTrue(false);
160: } catch (IllegalArgumentException e) {
161: assertEquals(100, writer.docCount());
162: assertEquals(1, writer.getSegmentCount());
163: }
164:
165: writer.setMaxMergeDocs(maxMergeDocs);
166: try {
167: // cannot add self
168: writer.addIndexesNoOptimize(new Directory[] { aux, dir });
169: assertTrue(false);
170: } catch (IllegalArgumentException e) {
171: assertEquals(100, writer.docCount());
172: }
173: writer.close();
174:
175: // make sure the index is correct
176: verifyNumDocs(dir, 100);
177: }
178:
179: // in all the remaining tests, make the doc count of the oldest segment
180: // in dir large so that it is never merged in addIndexesNoOptimize()
181: // case 1: no tail segments
182: public void testNoTailSegments() throws IOException {
183: // main directory
184: Directory dir = new RAMDirectory();
185: // auxiliary directory
186: Directory aux = new RAMDirectory();
187:
188: setUpDirs(dir, aux);
189:
190: IndexWriter writer = newWriter(dir, false);
191: writer.setMaxBufferedDocs(10);
192: writer.setMergeFactor(4);
193: addDocs(writer, 10);
194:
195: writer.addIndexesNoOptimize(new Directory[] { aux });
196: assertEquals(1040, writer.docCount());
197: assertEquals(2, writer.getSegmentCount());
198: assertEquals(1000, writer.getDocCount(0));
199: writer.close();
200:
201: // make sure the index is correct
202: verifyNumDocs(dir, 1040);
203: }
204:
205: // case 2: tail segments, invariants hold, no copy
206: public void testNoCopySegments() throws IOException {
207: // main directory
208: Directory dir = new RAMDirectory();
209: // auxiliary directory
210: Directory aux = new RAMDirectory();
211:
212: setUpDirs(dir, aux);
213:
214: IndexWriter writer = newWriter(dir, false);
215: writer.setMaxBufferedDocs(9);
216: writer.setMergeFactor(4);
217: addDocs(writer, 2);
218:
219: writer.addIndexesNoOptimize(new Directory[] { aux });
220: assertEquals(1032, writer.docCount());
221: assertEquals(2, writer.getSegmentCount());
222: assertEquals(1000, writer.getDocCount(0));
223: writer.close();
224:
225: // make sure the index is correct
226: verifyNumDocs(dir, 1032);
227: }
228:
229: // case 3: tail segments, invariants hold, copy, invariants hold
230: public void testNoMergeAfterCopy() throws IOException {
231: // main directory
232: Directory dir = new RAMDirectory();
233: // auxiliary directory
234: Directory aux = new RAMDirectory();
235:
236: setUpDirs(dir, aux);
237:
238: IndexWriter writer = newWriter(dir, false);
239: writer.setMaxBufferedDocs(10);
240: writer.setMergeFactor(4);
241:
242: writer.addIndexesNoOptimize(new Directory[] { aux, aux });
243: assertEquals(1060, writer.docCount());
244: assertEquals(1000, writer.getDocCount(0));
245: writer.close();
246:
247: // make sure the index is correct
248: verifyNumDocs(dir, 1060);
249: }
250:
251: // case 4: tail segments, invariants hold, copy, invariants not hold
252: public void testMergeAfterCopy() throws IOException {
253: // main directory
254: Directory dir = new RAMDirectory();
255: // auxiliary directory
256: Directory aux = new RAMDirectory();
257:
258: setUpDirs(dir, aux);
259:
260: IndexReader reader = IndexReader.open(aux);
261: for (int i = 0; i < 20; i++) {
262: reader.deleteDocument(i);
263: }
264: assertEquals(10, reader.numDocs());
265: reader.close();
266:
267: IndexWriter writer = newWriter(dir, false);
268: writer.setMaxBufferedDocs(4);
269: writer.setMergeFactor(4);
270:
271: writer.addIndexesNoOptimize(new Directory[] { aux, aux });
272: assertEquals(1020, writer.docCount());
273: assertEquals(1000, writer.getDocCount(0));
274: writer.close();
275:
276: // make sure the index is correct
277: verifyNumDocs(dir, 1020);
278: }
279:
280: // case 5: tail segments, invariants not hold
281: public void testMoreMerges() throws IOException {
282: // main directory
283: Directory dir = new RAMDirectory();
284: // auxiliary directory
285: Directory aux = new RAMDirectory();
286: Directory aux2 = new RAMDirectory();
287:
288: setUpDirs(dir, aux);
289:
290: IndexWriter writer = newWriter(aux2, true);
291: writer.setMaxBufferedDocs(100);
292: writer.setMergeFactor(10);
293: writer.addIndexesNoOptimize(new Directory[] { aux });
294: assertEquals(30, writer.docCount());
295: assertEquals(3, writer.getSegmentCount());
296: writer.close();
297:
298: IndexReader reader = IndexReader.open(aux);
299: for (int i = 0; i < 27; i++) {
300: reader.deleteDocument(i);
301: }
302: assertEquals(3, reader.numDocs());
303: reader.close();
304:
305: reader = IndexReader.open(aux2);
306: for (int i = 0; i < 8; i++) {
307: reader.deleteDocument(i);
308: }
309: assertEquals(22, reader.numDocs());
310: reader.close();
311:
312: writer = newWriter(dir, false);
313: writer.setMaxBufferedDocs(6);
314: writer.setMergeFactor(4);
315:
316: writer.addIndexesNoOptimize(new Directory[] { aux, aux2 });
317: assertEquals(1025, writer.docCount());
318: assertEquals(1000, writer.getDocCount(0));
319: writer.close();
320:
321: // make sure the index is correct
322: verifyNumDocs(dir, 1025);
323: }
324:
325: private IndexWriter newWriter(Directory dir, boolean create)
326: throws IOException {
327: final IndexWriter writer = new IndexWriter(dir,
328: new WhitespaceAnalyzer(), create);
329: writer.setMergePolicy(new LogDocMergePolicy());
330: return writer;
331: }
332:
333: private void addDocs(IndexWriter writer, int numDocs)
334: throws IOException {
335: for (int i = 0; i < numDocs; i++) {
336: Document doc = new Document();
337: doc.add(new Field("content", "aaa", Field.Store.NO,
338: Field.Index.TOKENIZED));
339: writer.addDocument(doc);
340: }
341: }
342:
343: private void addDocs2(IndexWriter writer, int numDocs)
344: throws IOException {
345: for (int i = 0; i < numDocs; i++) {
346: Document doc = new Document();
347: doc.add(new Field("content", "bbb", Field.Store.NO,
348: Field.Index.TOKENIZED));
349: writer.addDocument(doc);
350: }
351: }
352:
353: private void verifyNumDocs(Directory dir, int numDocs)
354: throws IOException {
355: IndexReader reader = IndexReader.open(dir);
356: assertEquals(numDocs, reader.maxDoc());
357: assertEquals(numDocs, reader.numDocs());
358: reader.close();
359: }
360:
361: private void verifyTermDocs(Directory dir, Term term, int numDocs)
362: throws IOException {
363: IndexReader reader = IndexReader.open(dir);
364: TermDocs termDocs = reader.termDocs(term);
365: int count = 0;
366: while (termDocs.next())
367: count++;
368: assertEquals(numDocs, count);
369: reader.close();
370: }
371:
372: private void setUpDirs(Directory dir, Directory aux)
373: throws IOException {
374: IndexWriter writer = null;
375:
376: writer = newWriter(dir, true);
377: writer.setMaxBufferedDocs(1000);
378: // add 1000 documents in 1 segment
379: addDocs(writer, 1000);
380: assertEquals(1000, writer.docCount());
381: assertEquals(1, writer.getSegmentCount());
382: writer.close();
383:
384: writer = newWriter(aux, true);
385: writer.setUseCompoundFile(false); // use one without a compound file
386: writer.setMaxBufferedDocs(100);
387: writer.setMergeFactor(10);
388: // add 30 documents in 3 segments
389: for (int i = 0; i < 3; i++) {
390: addDocs(writer, 10);
391: writer.close();
392: writer = newWriter(aux, false);
393: writer.setUseCompoundFile(false); // use one without a compound file
394: writer.setMaxBufferedDocs(100);
395: writer.setMergeFactor(10);
396: }
397: assertEquals(30, writer.docCount());
398: assertEquals(3, writer.getSegmentCount());
399: writer.close();
400: }
401: }
|