001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.util.LuceneTestCase;
021:
022: import java.io.IOException;
023:
024: import org.apache.lucene.analysis.WhitespaceAnalyzer;
025: import org.apache.lucene.store.Directory;
026: import org.apache.lucene.store.RAMDirectory;
027: import org.apache.lucene.search.Query;
028: import org.apache.lucene.search.TermQuery;
029: import org.apache.lucene.search.Hits;
030: import org.apache.lucene.search.IndexSearcher;
031: import org.apache.lucene.document.Document;
032: import org.apache.lucene.document.Field;
033: import java.util.List;
034: import java.util.Iterator;
035: import java.util.Set;
036: import java.util.HashSet;
037:
038: /*
039: Verify we can read the pre-2.1 file format, do searches
040: against it, and add documents to it.
041: */
042:
043: public class TestDeletionPolicy extends LuceneTestCase {
044: private void verifyCommitOrder(List commits) {
045: long last = SegmentInfos
046: .generationFromSegmentsFileName(((IndexCommitPoint) commits
047: .get(0)).getSegmentsFileName());
048: for (int i = 1; i < commits.size(); i++) {
049: long now = SegmentInfos
050: .generationFromSegmentsFileName(((IndexCommitPoint) commits
051: .get(i)).getSegmentsFileName());
052: assertTrue("SegmentInfos commits are out-of-order",
053: now > last);
054: last = now;
055: }
056: }
057:
058: class KeepAllDeletionPolicy implements IndexDeletionPolicy {
059: int numOnInit;
060: int numOnCommit;
061:
062: public void onInit(List commits) {
063: verifyCommitOrder(commits);
064: numOnInit++;
065: }
066:
067: public void onCommit(List commits) {
068: verifyCommitOrder(commits);
069: numOnCommit++;
070: }
071: }
072:
073: /**
074: * This is useful for adding to a big index w/ autoCommit
075: * false when you know readers are not using it.
076: */
077: class KeepNoneOnInitDeletionPolicy implements IndexDeletionPolicy {
078: int numOnInit;
079: int numOnCommit;
080:
081: public void onInit(List commits) {
082: verifyCommitOrder(commits);
083: numOnInit++;
084: // On init, delete all commit points:
085: Iterator it = commits.iterator();
086: while (it.hasNext()) {
087: ((IndexCommitPoint) it.next()).delete();
088: }
089: }
090:
091: public void onCommit(List commits) {
092: verifyCommitOrder(commits);
093: int size = commits.size();
094: // Delete all but last one:
095: for (int i = 0; i < size - 1; i++) {
096: ((IndexCommitPoint) commits.get(i)).delete();
097: }
098: numOnCommit++;
099: }
100: }
101:
102: class KeepLastNDeletionPolicy implements IndexDeletionPolicy {
103: int numOnInit;
104: int numOnCommit;
105: int numToKeep;
106: int numDelete;
107: Set seen = new HashSet();
108:
109: public KeepLastNDeletionPolicy(int numToKeep) {
110: this .numToKeep = numToKeep;
111: }
112:
113: public void onInit(List commits) {
114: verifyCommitOrder(commits);
115: numOnInit++;
116: // do no deletions on init
117: doDeletes(commits, false);
118: }
119:
120: public void onCommit(List commits) {
121: verifyCommitOrder(commits);
122: doDeletes(commits, true);
123: }
124:
125: private void doDeletes(List commits, boolean isCommit) {
126:
127: // Assert that we really are only called for each new
128: // commit:
129: if (isCommit) {
130: String fileName = ((IndexCommitPoint) commits
131: .get(commits.size() - 1)).getSegmentsFileName();
132: if (seen.contains(fileName)) {
133: throw new RuntimeException(
134: "onCommit was called twice on the same commit point: "
135: + fileName);
136: }
137: seen.add(fileName);
138: numOnCommit++;
139: }
140: int size = commits.size();
141: for (int i = 0; i < size - numToKeep; i++) {
142: ((IndexCommitPoint) commits.get(i)).delete();
143: numDelete++;
144: }
145: }
146: }
147:
148: /*
149: * Delete a commit only when it has been obsoleted by N
150: * seconds.
151: */
152: class ExpirationTimeDeletionPolicy implements IndexDeletionPolicy {
153:
154: Directory dir;
155: double expirationTimeSeconds;
156: int numDelete;
157:
158: public ExpirationTimeDeletionPolicy(Directory dir,
159: double seconds) {
160: this .dir = dir;
161: this .expirationTimeSeconds = seconds;
162: }
163:
164: public void onInit(List commits) throws IOException {
165: verifyCommitOrder(commits);
166: onCommit(commits);
167: }
168:
169: public void onCommit(List commits) throws IOException {
170: verifyCommitOrder(commits);
171:
172: IndexCommitPoint lastCommit = (IndexCommitPoint) commits
173: .get(commits.size() - 1);
174:
175: // Any commit older than expireTime should be deleted:
176: double expireTime = dir.fileModified(lastCommit
177: .getSegmentsFileName())
178: / 1000.0 - expirationTimeSeconds;
179:
180: Iterator it = commits.iterator();
181:
182: while (it.hasNext()) {
183: IndexCommitPoint commit = (IndexCommitPoint) it.next();
184: double modTime = dir.fileModified(commit
185: .getSegmentsFileName()) / 1000.0;
186: if (commit != lastCommit && modTime < expireTime) {
187: commit.delete();
188: numDelete += 1;
189: }
190: }
191: }
192: }
193:
194: /*
195: * Test "by time expiration" deletion policy:
196: */
197: public void testExpirationTimeDeletionPolicy() throws IOException,
198: InterruptedException {
199:
200: final double SECONDS = 2.0;
201:
202: boolean autoCommit = false;
203: boolean useCompoundFile = true;
204:
205: Directory dir = new RAMDirectory();
206: ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(
207: dir, SECONDS);
208: IndexWriter writer = new IndexWriter(dir, autoCommit,
209: new WhitespaceAnalyzer(), true, policy);
210: writer.setUseCompoundFile(useCompoundFile);
211: writer.close();
212:
213: long lastDeleteTime = 0;
214: for (int i = 0; i < 7; i++) {
215: // Record last time when writer performed deletes of
216: // past commits
217: lastDeleteTime = System.currentTimeMillis();
218: writer = new IndexWriter(dir, autoCommit,
219: new WhitespaceAnalyzer(), false, policy);
220: writer.setUseCompoundFile(useCompoundFile);
221: for (int j = 0; j < 17; j++) {
222: addDoc(writer);
223: }
224: writer.close();
225:
226: // Make sure to sleep long enough so that some commit
227: // points will be deleted:
228: Thread.sleep((int) (1000.0 * (SECONDS / 5.0)));
229: }
230:
231: // First, make sure the policy in fact deleted something:
232: assertTrue("no commits were deleted", policy.numDelete > 0);
233:
234: // Then simplistic check: just verify that the
235: // segments_N's that still exist are in fact within SECONDS
236: // seconds of the last one's mod time, and, that I can
237: // open a reader on each:
238: long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
239:
240: String fileName = IndexFileNames.fileNameFromGeneration(
241: IndexFileNames.SEGMENTS, "", gen);
242: dir.deleteFile(IndexFileNames.SEGMENTS_GEN);
243: while (gen > 0) {
244: try {
245: IndexReader reader = IndexReader.open(dir);
246: reader.close();
247: fileName = IndexFileNames.fileNameFromGeneration(
248: IndexFileNames.SEGMENTS, "", gen);
249: long modTime = dir.fileModified(fileName);
250: assertTrue("commit point was older than " + SECONDS
251: + " seconds (" + (lastDeleteTime - modTime)
252: + " msec) but did not get deleted",
253: lastDeleteTime - modTime <= (SECONDS * 1000));
254: } catch (IOException e) {
255: // OK
256: break;
257: }
258:
259: dir.deleteFile(IndexFileNames.fileNameFromGeneration(
260: IndexFileNames.SEGMENTS, "", gen));
261: gen--;
262: }
263:
264: dir.close();
265: }
266:
267: /*
268: * Test a silly deletion policy that keeps all commits around.
269: */
270: public void testKeepAllDeletionPolicy() throws IOException {
271:
272: for (int pass = 0; pass < 4; pass++) {
273:
274: boolean autoCommit = pass < 2;
275: boolean useCompoundFile = (pass % 2) > 0;
276:
277: KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy();
278:
279: Directory dir = new RAMDirectory();
280:
281: IndexWriter writer = new IndexWriter(dir, autoCommit,
282: new WhitespaceAnalyzer(), true, policy);
283: writer.setMaxBufferedDocs(10);
284: writer.setUseCompoundFile(useCompoundFile);
285: for (int i = 0; i < 107; i++) {
286: addDoc(writer);
287: }
288: writer.close();
289:
290: writer = new IndexWriter(dir, autoCommit,
291: new WhitespaceAnalyzer(), false, policy);
292: writer.setUseCompoundFile(useCompoundFile);
293: writer.optimize();
294: writer.close();
295:
296: assertEquals(2, policy.numOnInit);
297: if (autoCommit) {
298: assertTrue(policy.numOnCommit > 2);
299: } else {
300: // If we are not auto committing then there should
301: // be exactly 2 commits (one per close above):
302: assertEquals(2, policy.numOnCommit);
303: }
304:
305: // Simplistic check: just verify all segments_N's still
306: // exist, and, I can open a reader on each:
307: dir.deleteFile(IndexFileNames.SEGMENTS_GEN);
308: long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
309: while (gen > 0) {
310: IndexReader reader = IndexReader.open(dir);
311: reader.close();
312: dir.deleteFile(IndexFileNames.fileNameFromGeneration(
313: IndexFileNames.SEGMENTS, "", gen));
314: gen--;
315:
316: if (gen > 0) {
317: // Now that we've removed a commit point, which
318: // should have orphan'd at least one index file.
319: // Open & close a writer and assert that it
320: // actually removed something:
321: int preCount = dir.list().length;
322: writer = new IndexWriter(dir, false,
323: new WhitespaceAnalyzer(), false, policy);
324: writer.close();
325: int postCount = dir.list().length;
326: assertTrue(postCount < preCount);
327: }
328: }
329:
330: dir.close();
331: }
332: }
333:
334: /* Test keeping NO commit points. This is a viable and
335: * useful case eg where you want to build a big index with
336: * autoCommit false and you know there are no readers.
337: */
338: public void testKeepNoneOnInitDeletionPolicy() throws IOException {
339:
340: for (int pass = 0; pass < 4; pass++) {
341:
342: boolean autoCommit = pass < 2;
343: boolean useCompoundFile = (pass % 2) > 0;
344:
345: KeepNoneOnInitDeletionPolicy policy = new KeepNoneOnInitDeletionPolicy();
346:
347: Directory dir = new RAMDirectory();
348:
349: IndexWriter writer = new IndexWriter(dir, autoCommit,
350: new WhitespaceAnalyzer(), true, policy);
351: writer.setMaxBufferedDocs(10);
352: writer.setUseCompoundFile(useCompoundFile);
353: for (int i = 0; i < 107; i++) {
354: addDoc(writer);
355: }
356: writer.close();
357:
358: writer = new IndexWriter(dir, autoCommit,
359: new WhitespaceAnalyzer(), false, policy);
360: writer.setUseCompoundFile(useCompoundFile);
361: writer.optimize();
362: writer.close();
363:
364: assertEquals(2, policy.numOnInit);
365: if (autoCommit) {
366: assertTrue(policy.numOnCommit > 2);
367: } else {
368: // If we are not auto committing then there should
369: // be exactly 2 commits (one per close above):
370: assertEquals(2, policy.numOnCommit);
371: }
372:
373: // Simplistic check: just verify the index is in fact
374: // readable:
375: IndexReader reader = IndexReader.open(dir);
376: reader.close();
377:
378: dir.close();
379: }
380: }
381:
382: /*
383: * Test a deletion policy that keeps last N commits.
384: */
385: public void testKeepLastNDeletionPolicy() throws IOException {
386:
387: final int N = 5;
388:
389: for (int pass = 0; pass < 4; pass++) {
390:
391: boolean autoCommit = pass < 2;
392: boolean useCompoundFile = (pass % 2) > 0;
393:
394: Directory dir = new RAMDirectory();
395:
396: KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(
397: N);
398:
399: for (int j = 0; j < N + 1; j++) {
400: IndexWriter writer = new IndexWriter(dir, autoCommit,
401: new WhitespaceAnalyzer(), true, policy);
402: writer.setMaxBufferedDocs(10);
403: writer.setUseCompoundFile(useCompoundFile);
404: for (int i = 0; i < 17; i++) {
405: addDoc(writer);
406: }
407: writer.optimize();
408: writer.close();
409: }
410:
411: assertTrue(policy.numDelete > 0);
412: assertEquals(N + 1, policy.numOnInit);
413: if (autoCommit) {
414: assertTrue(policy.numOnCommit > 1);
415: } else {
416: assertEquals(N + 1, policy.numOnCommit);
417: }
418:
419: // Simplistic check: just verify only the past N segments_N's still
420: // exist, and, I can open a reader on each:
421: dir.deleteFile(IndexFileNames.SEGMENTS_GEN);
422: long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
423: for (int i = 0; i < N + 1; i++) {
424: try {
425: IndexReader reader = IndexReader.open(dir);
426: reader.close();
427: if (i == N) {
428: fail("should have failed on commits prior to last "
429: + N);
430: }
431: } catch (IOException e) {
432: if (i != N) {
433: throw e;
434: }
435: }
436: if (i < N) {
437: dir.deleteFile(IndexFileNames
438: .fileNameFromGeneration(
439: IndexFileNames.SEGMENTS, "", gen));
440: }
441: gen--;
442: }
443:
444: dir.close();
445: }
446: }
447:
448: /*
449: * Test a deletion policy that keeps last N commits
450: * around, with reader doing deletes.
451: */
452: public void testKeepLastNDeletionPolicyWithReader()
453: throws IOException {
454:
455: final int N = 10;
456:
457: for (int pass = 0; pass < 4; pass++) {
458:
459: boolean autoCommit = pass < 2;
460: boolean useCompoundFile = (pass % 2) > 0;
461:
462: KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(
463: N);
464:
465: Directory dir = new RAMDirectory();
466: IndexWriter writer = new IndexWriter(dir, autoCommit,
467: new WhitespaceAnalyzer(), true, policy);
468: writer.setUseCompoundFile(useCompoundFile);
469: writer.close();
470: Term searchTerm = new Term("content", "aaa");
471: Query query = new TermQuery(searchTerm);
472:
473: for (int i = 0; i < N + 1; i++) {
474: writer = new IndexWriter(dir, autoCommit,
475: new WhitespaceAnalyzer(), false, policy);
476: writer.setUseCompoundFile(useCompoundFile);
477: for (int j = 0; j < 17; j++) {
478: addDoc(writer);
479: }
480: // this is a commit when autoCommit=false:
481: writer.close();
482: IndexReader reader = IndexReader.open(dir, policy);
483: reader.deleteDocument(3 * i + 1);
484: reader.setNorm(4 * i + 1, "content", 2.0F);
485: IndexSearcher searcher = new IndexSearcher(reader);
486: Hits hits = searcher.search(query);
487: assertEquals(16 * (1 + i), hits.length());
488: // this is a commit when autoCommit=false:
489: reader.close();
490: searcher.close();
491: }
492: writer = new IndexWriter(dir, autoCommit,
493: new WhitespaceAnalyzer(), false, policy);
494: writer.setUseCompoundFile(useCompoundFile);
495: writer.optimize();
496: // this is a commit when autoCommit=false:
497: writer.close();
498:
499: assertEquals(2 * (N + 2), policy.numOnInit);
500: if (autoCommit) {
501: assertTrue(policy.numOnCommit > 2 * (N + 2) - 1);
502: } else {
503: assertEquals(2 * (N + 2) - 1, policy.numOnCommit);
504: }
505:
506: IndexSearcher searcher = new IndexSearcher(dir);
507: Hits hits = searcher.search(query);
508: assertEquals(176, hits.length());
509:
510: // Simplistic check: just verify only the past N segments_N's still
511: // exist, and, I can open a reader on each:
512: long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
513:
514: dir.deleteFile(IndexFileNames.SEGMENTS_GEN);
515: int expectedCount = 176;
516:
517: for (int i = 0; i < N + 1; i++) {
518: try {
519: IndexReader reader = IndexReader.open(dir);
520:
521: // Work backwards in commits on what the expected
522: // count should be. Only check this in the
523: // autoCommit false case:
524: if (!autoCommit) {
525: searcher = new IndexSearcher(reader);
526: hits = searcher.search(query);
527: if (i > 1) {
528: if (i % 2 == 0) {
529: expectedCount += 1;
530: } else {
531: expectedCount -= 17;
532: }
533: }
534: assertEquals(expectedCount, hits.length());
535: searcher.close();
536: }
537: reader.close();
538: if (i == N) {
539: fail("should have failed on commits before last 5");
540: }
541: } catch (IOException e) {
542: if (i != N) {
543: throw e;
544: }
545: }
546: if (i < N) {
547: dir.deleteFile(IndexFileNames
548: .fileNameFromGeneration(
549: IndexFileNames.SEGMENTS, "", gen));
550: }
551: gen--;
552: }
553:
554: dir.close();
555: }
556: }
557:
558: /*
559: * Test a deletion policy that keeps last N commits
560: * around, through creates.
561: */
562: public void testKeepLastNDeletionPolicyWithCreates()
563: throws IOException {
564:
565: final int N = 10;
566:
567: for (int pass = 0; pass < 4; pass++) {
568:
569: boolean autoCommit = pass < 2;
570: boolean useCompoundFile = (pass % 2) > 0;
571:
572: KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(
573: N);
574:
575: Directory dir = new RAMDirectory();
576: IndexWriter writer = new IndexWriter(dir, autoCommit,
577: new WhitespaceAnalyzer(), true, policy);
578: writer.setMaxBufferedDocs(10);
579: writer.setUseCompoundFile(useCompoundFile);
580: writer.close();
581: Term searchTerm = new Term("content", "aaa");
582: Query query = new TermQuery(searchTerm);
583:
584: for (int i = 0; i < N + 1; i++) {
585:
586: writer = new IndexWriter(dir, autoCommit,
587: new WhitespaceAnalyzer(), false, policy);
588: writer.setMaxBufferedDocs(10);
589: writer.setUseCompoundFile(useCompoundFile);
590: for (int j = 0; j < 17; j++) {
591: addDoc(writer);
592: }
593: // this is a commit when autoCommit=false:
594: writer.close();
595: IndexReader reader = IndexReader.open(dir, policy);
596: reader.deleteDocument(3);
597: reader.setNorm(5, "content", 2.0F);
598: IndexSearcher searcher = new IndexSearcher(reader);
599: Hits hits = searcher.search(query);
600: assertEquals(16, hits.length());
601: // this is a commit when autoCommit=false:
602: reader.close();
603: searcher.close();
604:
605: writer = new IndexWriter(dir, autoCommit,
606: new WhitespaceAnalyzer(), true, policy);
607: // This will not commit: there are no changes
608: // pending because we opened for "create":
609: writer.close();
610: }
611:
612: assertEquals(1 + 3 * (N + 1), policy.numOnInit);
613: if (autoCommit) {
614: assertTrue(policy.numOnCommit > 3 * (N + 1) - 1);
615: } else {
616: assertEquals(2 * (N + 1), policy.numOnCommit);
617: }
618:
619: IndexSearcher searcher = new IndexSearcher(dir);
620: Hits hits = searcher.search(query);
621: assertEquals(0, hits.length());
622:
623: // Simplistic check: just verify only the past N segments_N's still
624: // exist, and, I can open a reader on each:
625: long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
626:
627: dir.deleteFile(IndexFileNames.SEGMENTS_GEN);
628: int expectedCount = 0;
629:
630: for (int i = 0; i < N + 1; i++) {
631: try {
632: IndexReader reader = IndexReader.open(dir);
633:
634: // Work backwards in commits on what the expected
635: // count should be. Only check this in the
636: // autoCommit false case:
637: if (!autoCommit) {
638: searcher = new IndexSearcher(reader);
639: hits = searcher.search(query);
640: assertEquals(expectedCount, hits.length());
641: searcher.close();
642: if (expectedCount == 0) {
643: expectedCount = 16;
644: } else if (expectedCount == 16) {
645: expectedCount = 17;
646: } else if (expectedCount == 17) {
647: expectedCount = 0;
648: }
649: }
650: reader.close();
651: if (i == N) {
652: fail("should have failed on commits before last "
653: + N);
654: }
655: } catch (IOException e) {
656: if (i != N) {
657: throw e;
658: }
659: }
660: if (i < N) {
661: dir.deleteFile(IndexFileNames
662: .fileNameFromGeneration(
663: IndexFileNames.SEGMENTS, "", gen));
664: }
665: gen--;
666: }
667:
668: dir.close();
669: }
670: }
671:
672: private void addDoc(IndexWriter writer) throws IOException {
673: Document doc = new Document();
674: doc.add(new Field("content", "aaa", Field.Store.NO,
675: Field.Index.TOKENIZED));
676: writer.addDocument(doc);
677: }
678: }
|