0001: package org.apache.lucene.index;
0002:
0003: /**
0004: * Licensed to the Apache Software Foundation (ASF) under one or more
0005: * contributor license agreements. See the NOTICE file distributed with
0006: * this work for additional information regarding copyright ownership.
0007: * The ASF licenses this file to You under the Apache License, Version 2.0
0008: * (the "License"); you may not use this file except in compliance with
0009: * the License. You may obtain a copy of the License at
0010: *
0011: * http://www.apache.org/licenses/LICENSE-2.0
0012: *
0013: * Unless required by applicable law or agreed to in writing, software
0014: * distributed under the License is distributed on an "AS IS" BASIS,
0015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016: * See the License for the specific language governing permissions and
0017: * limitations under the License.
0018: */
0019:
0020: import java.io.IOException;
0021: import java.io.Reader;
0022: import java.io.File;
0023: import java.util.Arrays;
0024: import java.util.ArrayList;
0025: import java.util.Random;
0026:
0027: import org.apache.lucene.util.LuceneTestCase;
0028:
0029: import org.apache.lucene.analysis.WhitespaceAnalyzer;
0030: import org.apache.lucene.analysis.WhitespaceTokenizer;
0031: import org.apache.lucene.analysis.Analyzer;
0032: import org.apache.lucene.analysis.TokenFilter;
0033: import org.apache.lucene.analysis.TokenStream;
0034: import org.apache.lucene.analysis.standard.StandardAnalyzer;
0035: import org.apache.lucene.analysis.standard.StandardTokenizer;
0036: import org.apache.lucene.analysis.Token;
0037: import org.apache.lucene.document.Document;
0038: import org.apache.lucene.document.Field;
0039: import org.apache.lucene.search.IndexSearcher;
0040: import org.apache.lucene.search.Hits;
0041: import org.apache.lucene.search.TermQuery;
0042: import org.apache.lucene.store.Directory;
0043: import org.apache.lucene.store.FSDirectory;
0044: import org.apache.lucene.store.RAMDirectory;
0045: import org.apache.lucene.store.IndexInput;
0046: import org.apache.lucene.store.IndexOutput;
0047: import org.apache.lucene.store.AlreadyClosedException;
0048: import org.apache.lucene.util._TestUtil;
0049:
0050: import org.apache.lucene.store.MockRAMDirectory;
0051: import org.apache.lucene.store.LockFactory;
0052: import org.apache.lucene.store.Lock;
0053: import org.apache.lucene.store.SingleInstanceLockFactory;
0054:
0055: /**
0056: *
0057: * @version $Id: TestIndexWriter.java 628085 2008-02-15 15:18:22Z mikemccand $
0058: */
0059: public class TestIndexWriter extends LuceneTestCase {
0060: public void testDocCount() throws IOException {
0061: Directory dir = new RAMDirectory();
0062:
0063: IndexWriter writer = null;
0064: IndexReader reader = null;
0065: int i;
0066:
0067: IndexWriter.setDefaultWriteLockTimeout(2000);
0068: assertEquals(2000, IndexWriter.getDefaultWriteLockTimeout());
0069:
0070: writer = new IndexWriter(dir, new WhitespaceAnalyzer());
0071:
0072: IndexWriter.setDefaultWriteLockTimeout(1000);
0073:
0074: // add 100 documents
0075: for (i = 0; i < 100; i++) {
0076: addDoc(writer);
0077: }
0078: assertEquals(100, writer.docCount());
0079: writer.close();
0080:
0081: // delete 40 documents
0082: reader = IndexReader.open(dir);
0083: for (i = 0; i < 40; i++) {
0084: reader.deleteDocument(i);
0085: }
0086: reader.close();
0087:
0088: // test doc count before segments are merged/index is optimized
0089: writer = new IndexWriter(dir, new WhitespaceAnalyzer());
0090: assertEquals(100, writer.docCount());
0091: writer.close();
0092:
0093: reader = IndexReader.open(dir);
0094: assertEquals(100, reader.maxDoc());
0095: assertEquals(60, reader.numDocs());
0096: reader.close();
0097:
0098: // optimize the index and check that the new doc count is correct
0099: writer = new IndexWriter(dir, true, new WhitespaceAnalyzer());
0100: writer.optimize();
0101: assertEquals(60, writer.docCount());
0102: writer.close();
0103:
0104: // check that the index reader gives the same numbers.
0105: reader = IndexReader.open(dir);
0106: assertEquals(60, reader.maxDoc());
0107: assertEquals(60, reader.numDocs());
0108: reader.close();
0109:
0110: // make sure opening a new index for create over
0111: // this existing one works correctly:
0112: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
0113: assertEquals(0, writer.docCount());
0114: writer.close();
0115: }
0116:
0117: private void addDoc(IndexWriter writer) throws IOException {
0118: Document doc = new Document();
0119: doc.add(new Field("content", "aaa", Field.Store.NO,
0120: Field.Index.TOKENIZED));
0121: writer.addDocument(doc);
0122: }
0123:
0124: private void addDocWithIndex(IndexWriter writer, int index)
0125: throws IOException {
0126: Document doc = new Document();
0127: doc.add(new Field("content", "aaa " + index, Field.Store.YES,
0128: Field.Index.TOKENIZED));
0129: doc.add(new Field("id", "" + index, Field.Store.YES,
0130: Field.Index.TOKENIZED));
0131: writer.addDocument(doc);
0132: }
0133:
0134: /*
0135: Test: make sure when we run out of disk space or hit
0136: random IOExceptions in any of the addIndexes(*) calls
0137: that 1) index is not corrupt (searcher can open/search
0138: it) and 2) transactional semantics are followed:
0139: either all or none of the incoming documents were in
0140: fact added.
0141: */
0142: public void testAddIndexOnDiskFull() throws IOException {
0143: int START_COUNT = 57;
0144: int NUM_DIR = 50;
0145: int END_COUNT = START_COUNT + NUM_DIR * 25;
0146:
0147: boolean debug = false;
0148:
0149: // Build up a bunch of dirs that have indexes which we
0150: // will then merge together by calling addIndexes(*):
0151: Directory[] dirs = new Directory[NUM_DIR];
0152: long inputDiskUsage = 0;
0153: for (int i = 0; i < NUM_DIR; i++) {
0154: dirs[i] = new RAMDirectory();
0155: IndexWriter writer = new IndexWriter(dirs[i],
0156: new WhitespaceAnalyzer(), true);
0157: for (int j = 0; j < 25; j++) {
0158: addDocWithIndex(writer, 25 * i + j);
0159: }
0160: writer.close();
0161: String[] files = dirs[i].list();
0162: for (int j = 0; j < files.length; j++) {
0163: inputDiskUsage += dirs[i].fileLength(files[j]);
0164: }
0165: }
0166:
0167: // Now, build a starting index that has START_COUNT docs. We
0168: // will then try to addIndexes into a copy of this:
0169: RAMDirectory startDir = new RAMDirectory();
0170: IndexWriter writer = new IndexWriter(startDir,
0171: new WhitespaceAnalyzer(), true);
0172: for (int j = 0; j < START_COUNT; j++) {
0173: addDocWithIndex(writer, j);
0174: }
0175: writer.close();
0176:
0177: // Make sure starting index seems to be working properly:
0178: Term searchTerm = new Term("content", "aaa");
0179: IndexReader reader = IndexReader.open(startDir);
0180: assertEquals("first docFreq", 57, reader.docFreq(searchTerm));
0181:
0182: IndexSearcher searcher = new IndexSearcher(reader);
0183: Hits hits = searcher.search(new TermQuery(searchTerm));
0184: assertEquals("first number of hits", 57, hits.length());
0185: searcher.close();
0186: reader.close();
0187:
0188: // Iterate with larger and larger amounts of free
0189: // disk space. With little free disk space,
0190: // addIndexes will certainly run out of space &
0191: // fail. Verify that when this happens, index is
0192: // not corrupt and index in fact has added no
0193: // documents. Then, we increase disk space by 2000
0194: // bytes each iteration. At some point there is
0195: // enough free disk space and addIndexes should
0196: // succeed and index should show all documents were
0197: // added.
0198:
0199: // String[] files = startDir.list();
0200: long diskUsage = startDir.sizeInBytes();
0201:
0202: long startDiskUsage = 0;
0203: String[] files = startDir.list();
0204: for (int i = 0; i < files.length; i++) {
0205: startDiskUsage += startDir.fileLength(files[i]);
0206: }
0207:
0208: for (int iter = 0; iter < 6; iter++) {
0209:
0210: if (debug)
0211: System.out.println("TEST: iter=" + iter);
0212:
0213: // Start with 100 bytes more than we are currently using:
0214: long diskFree = diskUsage + 100;
0215:
0216: boolean autoCommit = iter % 2 == 0;
0217: int method = iter / 2;
0218:
0219: boolean success = false;
0220: boolean done = false;
0221:
0222: String methodName;
0223: if (0 == method) {
0224: methodName = "addIndexes(Directory[])";
0225: } else if (1 == method) {
0226: methodName = "addIndexes(IndexReader[])";
0227: } else {
0228: methodName = "addIndexesNoOptimize(Directory[])";
0229: }
0230:
0231: while (!done) {
0232:
0233: // Make a new dir that will enforce disk usage:
0234: MockRAMDirectory dir = new MockRAMDirectory(startDir);
0235: writer = new IndexWriter(dir, autoCommit,
0236: new WhitespaceAnalyzer(), false);
0237: IOException err = null;
0238:
0239: MergeScheduler ms = writer.getMergeScheduler();
0240: for (int x = 0; x < 2; x++) {
0241: if (ms instanceof ConcurrentMergeScheduler)
0242: // This test intentionally produces exceptions
0243: // in the threads that CMS launches; we don't
0244: // want to pollute test output with these.
0245: if (0 == x)
0246: ((ConcurrentMergeScheduler) ms)
0247: .setSuppressExceptions();
0248: else
0249: ((ConcurrentMergeScheduler) ms)
0250: .clearSuppressExceptions();
0251:
0252: // Two loops: first time, limit disk space &
0253: // throw random IOExceptions; second time, no
0254: // disk space limit:
0255:
0256: double rate = 0.05;
0257: double diskRatio = ((double) diskFree) / diskUsage;
0258: long this DiskFree;
0259:
0260: String testName = null;
0261:
0262: if (0 == x) {
0263: this DiskFree = diskFree;
0264: if (diskRatio >= 2.0) {
0265: rate /= 2;
0266: }
0267: if (diskRatio >= 4.0) {
0268: rate /= 2;
0269: }
0270: if (diskRatio >= 6.0) {
0271: rate = 0.0;
0272: }
0273: if (debug)
0274: testName = "disk full test " + methodName
0275: + " with disk full at " + diskFree
0276: + " bytes autoCommit=" + autoCommit;
0277: } else {
0278: this DiskFree = 0;
0279: rate = 0.0;
0280: if (debug)
0281: testName = "disk full test "
0282: + methodName
0283: + " with unlimited disk space autoCommit="
0284: + autoCommit;
0285: }
0286:
0287: if (debug)
0288: System.out.println("\ncycle: " + testName);
0289:
0290: dir.setMaxSizeInBytes(this DiskFree);
0291: dir.setRandomIOExceptionRate(rate, diskFree);
0292:
0293: try {
0294:
0295: if (0 == method) {
0296: writer.addIndexes(dirs);
0297: } else if (1 == method) {
0298: IndexReader readers[] = new IndexReader[dirs.length];
0299: for (int i = 0; i < dirs.length; i++) {
0300: readers[i] = IndexReader.open(dirs[i]);
0301: }
0302: try {
0303: writer.addIndexes(readers);
0304: } finally {
0305: for (int i = 0; i < dirs.length; i++) {
0306: readers[i].close();
0307: }
0308: }
0309: } else {
0310: writer.addIndexesNoOptimize(dirs);
0311: }
0312:
0313: success = true;
0314: if (debug) {
0315: System.out.println(" success!");
0316: }
0317:
0318: if (0 == x) {
0319: done = true;
0320: }
0321:
0322: } catch (IOException e) {
0323: success = false;
0324: err = e;
0325: if (debug) {
0326: System.out.println(" hit IOException: "
0327: + e);
0328: e.printStackTrace(System.out);
0329: }
0330:
0331: if (1 == x) {
0332: e.printStackTrace(System.out);
0333: fail(methodName
0334: + " hit IOException after disk space was freed up");
0335: }
0336: }
0337:
0338: // Make sure all threads from
0339: // ConcurrentMergeScheduler are done
0340: _TestUtil.syncConcurrentMerges(writer);
0341:
0342: if (autoCommit) {
0343:
0344: // Whether we succeeded or failed, check that
0345: // all un-referenced files were in fact
0346: // deleted (ie, we did not create garbage).
0347: // Only check this when autoCommit is true:
0348: // when it's false, it's expected that there
0349: // are unreferenced files (ie they won't be
0350: // referenced until the "commit on close").
0351: // Just create a new IndexFileDeleter, have it
0352: // delete unreferenced files, then verify that
0353: // in fact no files were deleted:
0354:
0355: String successStr;
0356: if (success) {
0357: successStr = "success";
0358: } else {
0359: successStr = "IOException";
0360: }
0361: String message = methodName
0362: + " failed to delete unreferenced files after "
0363: + successStr + " (" + diskFree
0364: + " bytes)";
0365: assertNoUnreferencedFiles(dir, message);
0366: }
0367:
0368: if (debug) {
0369: System.out.println(" now test readers");
0370: }
0371:
0372: // Finally, verify index is not corrupt, and, if
0373: // we succeeded, we see all docs added, and if we
0374: // failed, we see either all docs or no docs added
0375: // (transactional semantics):
0376: try {
0377: reader = IndexReader.open(dir);
0378: } catch (IOException e) {
0379: e.printStackTrace(System.out);
0380: fail(testName
0381: + ": exception when creating IndexReader: "
0382: + e);
0383: }
0384: int result = reader.docFreq(searchTerm);
0385: if (success) {
0386: if (autoCommit && result != END_COUNT) {
0387: fail(testName
0388: + ": method did not throw exception but docFreq('aaa') is "
0389: + result + " instead of expected "
0390: + END_COUNT);
0391: } else if (!autoCommit && result != START_COUNT) {
0392: fail(testName
0393: + ": method did not throw exception but docFreq('aaa') is "
0394: + result + " instead of expected "
0395: + START_COUNT
0396: + " [autoCommit = false]");
0397: }
0398: } else {
0399: // On hitting exception we still may have added
0400: // all docs:
0401: if (result != START_COUNT
0402: && result != END_COUNT) {
0403: err.printStackTrace(System.out);
0404: fail(testName
0405: + ": method did throw exception but docFreq('aaa') is "
0406: + result + " instead of expected "
0407: + START_COUNT + " or " + END_COUNT);
0408: }
0409: }
0410:
0411: searcher = new IndexSearcher(reader);
0412: try {
0413: hits = searcher
0414: .search(new TermQuery(searchTerm));
0415: } catch (IOException e) {
0416: e.printStackTrace(System.out);
0417: fail(testName + ": exception when searching: "
0418: + e);
0419: }
0420: int result2 = hits.length();
0421: if (success) {
0422: if (result2 != result) {
0423: fail(testName
0424: + ": method did not throw exception but hits.length for search on term 'aaa' is "
0425: + result2 + " instead of expected "
0426: + result);
0427: }
0428: } else {
0429: // On hitting exception we still may have added
0430: // all docs:
0431: if (result2 != result) {
0432: err.printStackTrace(System.out);
0433: fail(testName
0434: + ": method did throw exception but hits.length for search on term 'aaa' is "
0435: + result2 + " instead of expected "
0436: + result);
0437: }
0438: }
0439:
0440: searcher.close();
0441: reader.close();
0442: if (debug) {
0443: System.out.println(" count is " + result);
0444: }
0445:
0446: if (done || result == END_COUNT) {
0447: break;
0448: }
0449: }
0450:
0451: if (debug) {
0452: System.out.println(" start disk = "
0453: + startDiskUsage + "; input disk = "
0454: + inputDiskUsage + "; max used = "
0455: + dir.getMaxUsedSizeInBytes());
0456: }
0457:
0458: if (done) {
0459: // Javadocs state that temp free Directory space
0460: // required is at most 2X total input size of
0461: // indices so let's make sure:
0462: assertTrue(
0463: "max free Directory space required exceeded 1X the total input index sizes during "
0464: + methodName
0465: + ": max temp usage = "
0466: + (dir.getMaxUsedSizeInBytes() - startDiskUsage)
0467: + " bytes; "
0468: + "starting disk usage = "
0469: + startDiskUsage
0470: + " bytes; "
0471: + "input index disk usage = "
0472: + inputDiskUsage + " bytes",
0473: (dir.getMaxUsedSizeInBytes() - startDiskUsage) < 2 * (startDiskUsage + inputDiskUsage));
0474: }
0475:
0476: writer.close();
0477:
0478: // Wait for all BG threads to finish else
0479: // dir.close() will throw IOException because
0480: // there are still open files
0481: _TestUtil.syncConcurrentMerges(ms);
0482:
0483: dir.close();
0484:
0485: // Try again with 2000 more bytes of free space:
0486: diskFree += 2000;
0487: }
0488: }
0489:
0490: startDir.close();
0491: }
0492:
0493: /*
0494: * Make sure IndexWriter cleans up on hitting a disk
0495: * full exception in addDocument.
0496: */
0497: public void testAddDocumentOnDiskFull() throws IOException {
0498:
0499: boolean debug = false;
0500:
0501: for (int pass = 0; pass < 3; pass++) {
0502: if (debug)
0503: System.out.println("TEST: pass=" + pass);
0504: boolean autoCommit = pass == 0;
0505: boolean doAbort = pass == 2;
0506: long diskFree = 200;
0507: while (true) {
0508: if (debug)
0509: System.out.println("TEST: cycle: diskFree="
0510: + diskFree);
0511: MockRAMDirectory dir = new MockRAMDirectory();
0512: dir.setMaxSizeInBytes(diskFree);
0513: IndexWriter writer = new IndexWriter(dir, autoCommit,
0514: new WhitespaceAnalyzer(), true);
0515:
0516: MergeScheduler ms = writer.getMergeScheduler();
0517: if (ms instanceof ConcurrentMergeScheduler)
0518: // This test intentionally produces exceptions
0519: // in the threads that CMS launches; we don't
0520: // want to pollute test output with these.
0521: ((ConcurrentMergeScheduler) ms)
0522: .setSuppressExceptions();
0523:
0524: boolean hitError = false;
0525: try {
0526: for (int i = 0; i < 200; i++) {
0527: addDoc(writer);
0528: }
0529: } catch (IOException e) {
0530: if (debug) {
0531: System.out.println("TEST: exception on addDoc");
0532: e.printStackTrace(System.out);
0533: }
0534: hitError = true;
0535: }
0536:
0537: if (hitError) {
0538: if (doAbort) {
0539: writer.abort();
0540: } else {
0541: try {
0542: writer.close();
0543: } catch (IOException e) {
0544: if (debug) {
0545: System.out
0546: .println("TEST: exception on close");
0547: e.printStackTrace(System.out);
0548: }
0549: dir.setMaxSizeInBytes(0);
0550: writer.close();
0551: }
0552: }
0553:
0554: _TestUtil.syncConcurrentMerges(ms);
0555:
0556: assertNoUnreferencedFiles(dir,
0557: "after disk full during addDocument with autoCommit="
0558: + autoCommit);
0559:
0560: // Make sure reader can open the index:
0561: IndexReader.open(dir).close();
0562:
0563: dir.close();
0564:
0565: // Now try again w/ more space:
0566: diskFree += 500;
0567: } else {
0568: _TestUtil.syncConcurrentMerges(writer);
0569: dir.close();
0570: break;
0571: }
0572: }
0573: }
0574: }
0575:
0576: public static void assertNoUnreferencedFiles(Directory dir,
0577: String message) throws IOException {
0578: String[] startFiles = dir.list();
0579: SegmentInfos infos = new SegmentInfos();
0580: infos.read(dir);
0581: new IndexFileDeleter(dir,
0582: new KeepOnlyLastCommitDeletionPolicy(), infos, null,
0583: null);
0584: String[] endFiles = dir.list();
0585:
0586: Arrays.sort(startFiles);
0587: Arrays.sort(endFiles);
0588:
0589: if (!Arrays.equals(startFiles, endFiles)) {
0590: fail(message + ": before delete:\n "
0591: + arrayToString(startFiles)
0592: + "\n after delete:\n "
0593: + arrayToString(endFiles));
0594: }
0595: }
0596:
0597: /**
0598: * Make sure we skip wicked long terms.
0599: */
0600: public void testWickedLongTerm() throws IOException {
0601: RAMDirectory dir = new RAMDirectory();
0602: IndexWriter writer = new IndexWriter(dir,
0603: new StandardAnalyzer(), true);
0604:
0605: char[] chars = new char[16383];
0606: Arrays.fill(chars, 'x');
0607: Document doc = new Document();
0608: final String bigTerm = new String(chars);
0609:
0610: // Max length term is 16383, so this contents produces
0611: // a too-long term:
0612: String contents = "abc xyz x" + bigTerm + " another term";
0613: doc.add(new Field("content", contents, Field.Store.NO,
0614: Field.Index.TOKENIZED));
0615: writer.addDocument(doc);
0616:
0617: // Make sure we can add another normal document
0618: doc = new Document();
0619: doc.add(new Field("content", "abc bbb ccc", Field.Store.NO,
0620: Field.Index.TOKENIZED));
0621: writer.addDocument(doc);
0622: writer.close();
0623:
0624: IndexReader reader = IndexReader.open(dir);
0625:
0626: // Make sure all terms < max size were indexed
0627: assertEquals(2, reader.docFreq(new Term("content", "abc")));
0628: assertEquals(1, reader.docFreq(new Term("content", "bbb")));
0629: assertEquals(1, reader.docFreq(new Term("content", "term")));
0630: assertEquals(1, reader.docFreq(new Term("content", "another")));
0631:
0632: // Make sure position is still incremented when
0633: // massive term is skipped:
0634: TermPositions tps = reader.termPositions(new Term("content",
0635: "another"));
0636: assertTrue(tps.next());
0637: assertEquals(1, tps.freq());
0638: assertEquals(3, tps.nextPosition());
0639:
0640: // Make sure the doc that has the massive term is in
0641: // the index:
0642: assertEquals(
0643: "document with wicked long term should is not in the index!",
0644: 2, reader.numDocs());
0645:
0646: reader.close();
0647:
0648: // Make sure we can add a document with exactly the
0649: // maximum length term, and search on that term:
0650: doc = new Document();
0651: doc.add(new Field("content", bigTerm, Field.Store.NO,
0652: Field.Index.TOKENIZED));
0653: StandardAnalyzer sa = new StandardAnalyzer();
0654: sa.setMaxTokenLength(100000);
0655: writer = new IndexWriter(dir, sa);
0656: writer.addDocument(doc);
0657: writer.close();
0658: reader = IndexReader.open(dir);
0659: assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
0660: reader.close();
0661:
0662: dir.close();
0663: }
0664:
0665: public void testOptimizeMaxNumSegments() throws IOException {
0666:
0667: MockRAMDirectory dir = new MockRAMDirectory();
0668:
0669: final Document doc = new Document();
0670: doc.add(new Field("content", "aaa", Field.Store.YES,
0671: Field.Index.TOKENIZED));
0672:
0673: for (int numDocs = 38; numDocs < 500; numDocs += 38) {
0674: IndexWriter writer = new IndexWriter(dir,
0675: new WhitespaceAnalyzer(), true);
0676: LogDocMergePolicy ldmp = new LogDocMergePolicy();
0677: ldmp.setMinMergeDocs(1);
0678: writer.setMergePolicy(ldmp);
0679: writer.setMergeFactor(5);
0680: writer.setMaxBufferedDocs(2);
0681: for (int j = 0; j < numDocs; j++)
0682: writer.addDocument(doc);
0683: writer.close();
0684:
0685: SegmentInfos sis = new SegmentInfos();
0686: sis.read(dir);
0687: final int segCount = sis.size();
0688:
0689: writer = new IndexWriter(dir, new WhitespaceAnalyzer());
0690: writer.setMergePolicy(ldmp);
0691: writer.setMergeFactor(5);
0692: writer.optimize(3);
0693: writer.close();
0694:
0695: sis = new SegmentInfos();
0696: sis.read(dir);
0697: final int optSegCount = sis.size();
0698:
0699: if (segCount < 3)
0700: assertEquals(segCount, optSegCount);
0701: else
0702: assertEquals(3, optSegCount);
0703: }
0704: }
0705:
0706: public void testOptimizeMaxNumSegments2() throws IOException {
0707: MockRAMDirectory dir = new MockRAMDirectory();
0708:
0709: final Document doc = new Document();
0710: doc.add(new Field("content", "aaa", Field.Store.YES,
0711: Field.Index.TOKENIZED));
0712:
0713: IndexWriter writer = new IndexWriter(dir,
0714: new WhitespaceAnalyzer(), true);
0715: LogDocMergePolicy ldmp = new LogDocMergePolicy();
0716: ldmp.setMinMergeDocs(1);
0717: writer.setMergePolicy(ldmp);
0718: writer.setMergeFactor(4);
0719: writer.setMaxBufferedDocs(2);
0720:
0721: for (int iter = 0; iter < 10; iter++) {
0722:
0723: for (int i = 0; i < 19; i++)
0724: writer.addDocument(doc);
0725:
0726: writer.flush();
0727:
0728: SegmentInfos sis = new SegmentInfos();
0729: ((ConcurrentMergeScheduler) writer.getMergeScheduler())
0730: .sync();
0731: sis.read(dir);
0732:
0733: final int segCount = sis.size();
0734:
0735: writer.optimize(7);
0736:
0737: sis = new SegmentInfos();
0738: ((ConcurrentMergeScheduler) writer.getMergeScheduler())
0739: .sync();
0740: sis.read(dir);
0741: final int optSegCount = sis.size();
0742:
0743: if (segCount < 7)
0744: assertEquals(segCount, optSegCount);
0745: else
0746: assertEquals(7, optSegCount);
0747: }
0748: }
0749:
0750: /**
0751: * Make sure optimize doesn't use any more than 1X
0752: * starting index size as its temporary free space
0753: * required.
0754: */
0755: public void testOptimizeTempSpaceUsage() throws IOException {
0756:
0757: MockRAMDirectory dir = new MockRAMDirectory();
0758: IndexWriter writer = new IndexWriter(dir,
0759: new WhitespaceAnalyzer(), true);
0760: for (int j = 0; j < 500; j++) {
0761: addDocWithIndex(writer, j);
0762: }
0763: writer.close();
0764:
0765: long startDiskUsage = 0;
0766: String[] files = dir.list();
0767: for (int i = 0; i < files.length; i++) {
0768: startDiskUsage += dir.fileLength(files[i]);
0769: }
0770:
0771: dir.resetMaxUsedSizeInBytes();
0772: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
0773: writer.optimize();
0774: writer.close();
0775: long maxDiskUsage = dir.getMaxUsedSizeInBytes();
0776:
0777: assertTrue(
0778: "optimized used too much temporary space: starting usage was "
0779: + startDiskUsage
0780: + " bytes; max temp usage was " + maxDiskUsage
0781: + " but should have been "
0782: + (2 * startDiskUsage)
0783: + " (= 2X starting usage)",
0784: maxDiskUsage <= 2 * startDiskUsage);
0785: dir.close();
0786: }
0787:
0788: static String arrayToString(String[] l) {
0789: String s = "";
0790: for (int i = 0; i < l.length; i++) {
0791: if (i > 0) {
0792: s += "\n ";
0793: }
0794: s += l[i];
0795: }
0796: return s;
0797: }
0798:
0799: // Make sure we can open an index for create even when a
0800: // reader holds it open (this fails pre lock-less
0801: // commits on windows):
0802: public void testCreateWithReader() throws IOException {
0803: String tempDir = System.getProperty("java.io.tmpdir");
0804: if (tempDir == null)
0805: throw new IOException(
0806: "java.io.tmpdir undefined, cannot run test");
0807: File indexDir = new File(tempDir, "lucenetestindexwriter");
0808:
0809: try {
0810: Directory dir = FSDirectory.getDirectory(indexDir);
0811:
0812: // add one document & close writer
0813: IndexWriter writer = new IndexWriter(dir,
0814: new WhitespaceAnalyzer(), true);
0815: addDoc(writer);
0816: writer.close();
0817:
0818: // now open reader:
0819: IndexReader reader = IndexReader.open(dir);
0820: assertEquals("should be one document", reader.numDocs(), 1);
0821:
0822: // now open index for create:
0823: writer = new IndexWriter(dir, new WhitespaceAnalyzer(),
0824: true);
0825: assertEquals("should be zero documents", writer.docCount(),
0826: 0);
0827: addDoc(writer);
0828: writer.close();
0829:
0830: assertEquals("should be one document", reader.numDocs(), 1);
0831: IndexReader reader2 = IndexReader.open(dir);
0832: assertEquals("should be one document", reader2.numDocs(), 1);
0833: reader.close();
0834: reader2.close();
0835: } finally {
0836: rmDir(indexDir);
0837: }
0838: }
0839:
0840: // Same test as above, but use IndexWriter constructor
0841: // that takes File:
0842: public void testCreateWithReader2() throws IOException {
0843: String tempDir = System.getProperty("java.io.tmpdir");
0844: if (tempDir == null)
0845: throw new IOException(
0846: "java.io.tmpdir undefined, cannot run test");
0847: File indexDir = new File(tempDir, "lucenetestindexwriter");
0848: try {
0849: // add one document & close writer
0850: IndexWriter writer = new IndexWriter(indexDir,
0851: new WhitespaceAnalyzer(), true);
0852: addDoc(writer);
0853: writer.close();
0854:
0855: // now open reader:
0856: IndexReader reader = IndexReader.open(indexDir);
0857: assertEquals("should be one document", reader.numDocs(), 1);
0858:
0859: // now open index for create:
0860: writer = new IndexWriter(indexDir,
0861: new WhitespaceAnalyzer(), true);
0862: assertEquals("should be zero documents", writer.docCount(),
0863: 0);
0864: addDoc(writer);
0865: writer.close();
0866:
0867: assertEquals("should be one document", reader.numDocs(), 1);
0868: IndexReader reader2 = IndexReader.open(indexDir);
0869: assertEquals("should be one document", reader2.numDocs(), 1);
0870: reader.close();
0871: reader2.close();
0872: } finally {
0873: rmDir(indexDir);
0874: }
0875: }
0876:
0877: // Same test as above, but use IndexWriter constructor
0878: // that takes String:
0879: public void testCreateWithReader3() throws IOException {
0880: String tempDir = System.getProperty("tempDir");
0881: if (tempDir == null)
0882: throw new IOException(
0883: "java.io.tmpdir undefined, cannot run test");
0884:
0885: String dirName = tempDir + "/lucenetestindexwriter";
0886: try {
0887:
0888: // add one document & close writer
0889: IndexWriter writer = new IndexWriter(dirName,
0890: new WhitespaceAnalyzer(), true);
0891: addDoc(writer);
0892: writer.close();
0893:
0894: // now open reader:
0895: IndexReader reader = IndexReader.open(dirName);
0896: assertEquals("should be one document", reader.numDocs(), 1);
0897:
0898: // now open index for create:
0899: writer = new IndexWriter(dirName, new WhitespaceAnalyzer(),
0900: true);
0901: assertEquals("should be zero documents", writer.docCount(),
0902: 0);
0903: addDoc(writer);
0904: writer.close();
0905:
0906: assertEquals("should be one document", reader.numDocs(), 1);
0907: IndexReader reader2 = IndexReader.open(dirName);
0908: assertEquals("should be one document", reader2.numDocs(), 1);
0909: reader.close();
0910: reader2.close();
0911: } finally {
0912: rmDir(new File(dirName));
0913: }
0914: }
0915:
0916: // Simulate a writer that crashed while writing segments
0917: // file: make sure we can still open the index (ie,
0918: // gracefully fallback to the previous segments file),
0919: // and that we can add to the index:
0920: public void testSimulatedCrashedWriter() throws IOException {
0921: Directory dir = new RAMDirectory();
0922:
0923: IndexWriter writer = null;
0924:
0925: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
0926:
0927: // add 100 documents
0928: for (int i = 0; i < 100; i++) {
0929: addDoc(writer);
0930: }
0931:
0932: // close
0933: writer.close();
0934:
0935: long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
0936: assertTrue("segment generation should be > 1 but got " + gen,
0937: gen > 1);
0938:
0939: // Make the next segments file, with last byte
0940: // missing, to simulate a writer that crashed while
0941: // writing segments file:
0942: String fileNameIn = SegmentInfos.getCurrentSegmentFileName(dir);
0943: String fileNameOut = IndexFileNames.fileNameFromGeneration(
0944: IndexFileNames.SEGMENTS, "", 1 + gen);
0945: IndexInput in = dir.openInput(fileNameIn);
0946: IndexOutput out = dir.createOutput(fileNameOut);
0947: long length = in.length();
0948: for (int i = 0; i < length - 1; i++) {
0949: out.writeByte(in.readByte());
0950: }
0951: in.close();
0952: out.close();
0953:
0954: IndexReader reader = null;
0955: try {
0956: reader = IndexReader.open(dir);
0957: } catch (Exception e) {
0958: fail("reader failed to open on a crashed index");
0959: }
0960: reader.close();
0961:
0962: try {
0963: writer = new IndexWriter(dir, new WhitespaceAnalyzer(),
0964: true);
0965: } catch (Exception e) {
0966: fail("writer failed to open on a crashed index");
0967: }
0968:
0969: // add 100 documents
0970: for (int i = 0; i < 100; i++) {
0971: addDoc(writer);
0972: }
0973:
0974: // close
0975: writer.close();
0976: }
0977:
0978: // Simulate a corrupt index by removing last byte of
0979: // latest segments file and make sure we get an
0980: // IOException trying to open the index:
0981: public void testSimulatedCorruptIndex1() throws IOException {
0982: Directory dir = new RAMDirectory();
0983:
0984: IndexWriter writer = null;
0985:
0986: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
0987:
0988: // add 100 documents
0989: for (int i = 0; i < 100; i++) {
0990: addDoc(writer);
0991: }
0992:
0993: // close
0994: writer.close();
0995:
0996: long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
0997: assertTrue("segment generation should be > 1 but got " + gen,
0998: gen > 1);
0999:
1000: String fileNameIn = SegmentInfos.getCurrentSegmentFileName(dir);
1001: String fileNameOut = IndexFileNames.fileNameFromGeneration(
1002: IndexFileNames.SEGMENTS, "", 1 + gen);
1003: IndexInput in = dir.openInput(fileNameIn);
1004: IndexOutput out = dir.createOutput(fileNameOut);
1005: long length = in.length();
1006: for (int i = 0; i < length - 1; i++) {
1007: out.writeByte(in.readByte());
1008: }
1009: in.close();
1010: out.close();
1011: dir.deleteFile(fileNameIn);
1012:
1013: IndexReader reader = null;
1014: try {
1015: reader = IndexReader.open(dir);
1016: fail("reader did not hit IOException on opening a corrupt index");
1017: } catch (Exception e) {
1018: }
1019: if (reader != null) {
1020: reader.close();
1021: }
1022: }
1023:
1024: public void testChangesAfterClose() throws IOException {
1025: Directory dir = new RAMDirectory();
1026:
1027: IndexWriter writer = null;
1028:
1029: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
1030: addDoc(writer);
1031:
1032: // close
1033: writer.close();
1034: try {
1035: addDoc(writer);
1036: fail("did not hit AlreadyClosedException");
1037: } catch (AlreadyClosedException e) {
1038: // expected
1039: }
1040: }
1041:
1042: // Simulate a corrupt index by removing one of the cfs
1043: // files and make sure we get an IOException trying to
1044: // open the index:
1045: public void testSimulatedCorruptIndex2() throws IOException {
1046: Directory dir = new RAMDirectory();
1047:
1048: IndexWriter writer = null;
1049:
1050: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
1051:
1052: // add 100 documents
1053: for (int i = 0; i < 100; i++) {
1054: addDoc(writer);
1055: }
1056:
1057: // close
1058: writer.close();
1059:
1060: long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
1061: assertTrue("segment generation should be > 1 but got " + gen,
1062: gen > 1);
1063:
1064: String[] files = dir.list();
1065: for (int i = 0; i < files.length; i++) {
1066: if (files[i].endsWith(".cfs")) {
1067: dir.deleteFile(files[i]);
1068: break;
1069: }
1070: }
1071:
1072: IndexReader reader = null;
1073: try {
1074: reader = IndexReader.open(dir);
1075: fail("reader did not hit IOException on opening a corrupt index");
1076: } catch (Exception e) {
1077: }
1078: if (reader != null) {
1079: reader.close();
1080: }
1081: }
1082:
1083: /*
1084: * Simple test for "commit on close": open writer with
1085: * autoCommit=false, so it will only commit on close,
1086: * then add a bunch of docs, making sure reader does not
1087: * see these docs until writer is closed.
1088: */
1089: public void testCommitOnClose() throws IOException {
1090: Directory dir = new RAMDirectory();
1091: IndexWriter writer = new IndexWriter(dir,
1092: new WhitespaceAnalyzer(), true);
1093: for (int i = 0; i < 14; i++) {
1094: addDoc(writer);
1095: }
1096: writer.close();
1097:
1098: Term searchTerm = new Term("content", "aaa");
1099: IndexSearcher searcher = new IndexSearcher(dir);
1100: Hits hits = searcher.search(new TermQuery(searchTerm));
1101: assertEquals("first number of hits", 14, hits.length());
1102: searcher.close();
1103:
1104: IndexReader reader = IndexReader.open(dir);
1105:
1106: writer = new IndexWriter(dir, false, new WhitespaceAnalyzer());
1107: for (int i = 0; i < 3; i++) {
1108: for (int j = 0; j < 11; j++) {
1109: addDoc(writer);
1110: }
1111: searcher = new IndexSearcher(dir);
1112: hits = searcher.search(new TermQuery(searchTerm));
1113: assertEquals(
1114: "reader incorrectly sees changes from writer with autoCommit disabled",
1115: 14, hits.length());
1116: searcher.close();
1117: assertTrue("reader should have still been current", reader
1118: .isCurrent());
1119: }
1120:
1121: // Now, close the writer:
1122: writer.close();
1123: assertFalse("reader should not be current now", reader
1124: .isCurrent());
1125:
1126: searcher = new IndexSearcher(dir);
1127: hits = searcher.search(new TermQuery(searchTerm));
1128: assertEquals(
1129: "reader did not see changes after writer was closed",
1130: 47, hits.length());
1131: searcher.close();
1132: }
1133:
1134: /*
1135: * Simple test for "commit on close": open writer with
1136: * autoCommit=false, so it will only commit on close,
1137: * then add a bunch of docs, making sure reader does not
1138: * see them until writer has closed. Then instead of
1139: * closing the writer, call abort and verify reader sees
1140: * nothing was added. Then verify we can open the index
1141: * and add docs to it.
1142: */
1143: public void testCommitOnCloseAbort() throws IOException {
1144: Directory dir = new RAMDirectory();
1145: IndexWriter writer = new IndexWriter(dir,
1146: new WhitespaceAnalyzer(), true);
1147: writer.setMaxBufferedDocs(10);
1148: for (int i = 0; i < 14; i++) {
1149: addDoc(writer);
1150: }
1151: writer.close();
1152:
1153: Term searchTerm = new Term("content", "aaa");
1154: IndexSearcher searcher = new IndexSearcher(dir);
1155: Hits hits = searcher.search(new TermQuery(searchTerm));
1156: assertEquals("first number of hits", 14, hits.length());
1157: searcher.close();
1158:
1159: writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(),
1160: false);
1161: writer.setMaxBufferedDocs(10);
1162: for (int j = 0; j < 17; j++) {
1163: addDoc(writer);
1164: }
1165: // Delete all docs:
1166: writer.deleteDocuments(searchTerm);
1167:
1168: searcher = new IndexSearcher(dir);
1169: hits = searcher.search(new TermQuery(searchTerm));
1170: assertEquals(
1171: "reader incorrectly sees changes from writer with autoCommit disabled",
1172: 14, hits.length());
1173: searcher.close();
1174:
1175: // Now, close the writer:
1176: writer.abort();
1177:
1178: assertNoUnreferencedFiles(dir,
1179: "unreferenced files remain after abort()");
1180:
1181: searcher = new IndexSearcher(dir);
1182: hits = searcher.search(new TermQuery(searchTerm));
1183: assertEquals("saw changes after writer.abort", 14, hits
1184: .length());
1185: searcher.close();
1186:
1187: // Now make sure we can re-open the index, add docs,
1188: // and all is good:
1189: writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(),
1190: false);
1191: writer.setMaxBufferedDocs(10);
1192: for (int i = 0; i < 12; i++) {
1193: for (int j = 0; j < 17; j++) {
1194: addDoc(writer);
1195: }
1196: searcher = new IndexSearcher(dir);
1197: hits = searcher.search(new TermQuery(searchTerm));
1198: assertEquals(
1199: "reader incorrectly sees changes from writer with autoCommit disabled",
1200: 14, hits.length());
1201: searcher.close();
1202: }
1203:
1204: writer.close();
1205: searcher = new IndexSearcher(dir);
1206: hits = searcher.search(new TermQuery(searchTerm));
1207: assertEquals("didn't see changes after close", 218, hits
1208: .length());
1209: searcher.close();
1210:
1211: dir.close();
1212: }
1213:
1214: /*
1215: * Verify that a writer with "commit on close" indeed
1216: * cleans up the temp segments created after opening
1217: * that are not referenced by the starting segments
1218: * file. We check this by using MockRAMDirectory to
1219: * measure max temp disk space used.
1220: */
1221: public void testCommitOnCloseDiskUsage() throws IOException {
1222: MockRAMDirectory dir = new MockRAMDirectory();
1223: IndexWriter writer = new IndexWriter(dir,
1224: new WhitespaceAnalyzer(), true);
1225: for (int j = 0; j < 30; j++) {
1226: addDocWithIndex(writer, j);
1227: }
1228: writer.close();
1229: dir.resetMaxUsedSizeInBytes();
1230:
1231: long startDiskUsage = dir.getMaxUsedSizeInBytes();
1232: writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(),
1233: false);
1234: for (int j = 0; j < 1470; j++) {
1235: addDocWithIndex(writer, j);
1236: }
1237: long midDiskUsage = dir.getMaxUsedSizeInBytes();
1238: dir.resetMaxUsedSizeInBytes();
1239: writer.optimize();
1240: writer.close();
1241: long endDiskUsage = dir.getMaxUsedSizeInBytes();
1242:
1243: // Ending index is 50X as large as starting index; due
1244: // to 2X disk usage normally we allow 100X max
1245: // transient usage. If something is wrong w/ deleter
1246: // and it doesn't delete intermediate segments then it
1247: // will exceed this 100X:
1248: // System.out.println("start " + startDiskUsage + "; mid " + midDiskUsage + ";end " + endDiskUsage);
1249: assertTrue(
1250: "writer used to much space while adding documents when autoCommit=false",
1251: midDiskUsage < 100 * startDiskUsage);
1252: assertTrue(
1253: "writer used to much space after close when autoCommit=false",
1254: endDiskUsage < 100 * startDiskUsage);
1255: }
1256:
1257: /*
1258: * Verify that calling optimize when writer is open for
1259: * "commit on close" works correctly both for abort()
1260: * and close().
1261: */
1262: public void testCommitOnCloseOptimize() throws IOException {
1263: RAMDirectory dir = new RAMDirectory();
1264: IndexWriter writer = new IndexWriter(dir,
1265: new WhitespaceAnalyzer(), true);
1266: writer.setMaxBufferedDocs(10);
1267: for (int j = 0; j < 17; j++) {
1268: addDocWithIndex(writer, j);
1269: }
1270: writer.close();
1271:
1272: writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(),
1273: false);
1274: writer.optimize();
1275:
1276: // Open a reader before closing (commiting) the writer:
1277: IndexReader reader = IndexReader.open(dir);
1278:
1279: // Reader should see index as unoptimized at this
1280: // point:
1281: assertFalse(
1282: "Reader incorrectly sees that the index is optimized",
1283: reader.isOptimized());
1284: reader.close();
1285:
1286: // Abort the writer:
1287: writer.abort();
1288: assertNoUnreferencedFiles(dir, "aborted writer after optimize");
1289:
1290: // Open a reader after aborting writer:
1291: reader = IndexReader.open(dir);
1292:
1293: // Reader should still see index as unoptimized:
1294: assertFalse(
1295: "Reader incorrectly sees that the index is optimized",
1296: reader.isOptimized());
1297: reader.close();
1298:
1299: writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(),
1300: false);
1301: writer.optimize();
1302: writer.close();
1303: assertNoUnreferencedFiles(dir, "aborted writer after optimize");
1304:
1305: // Open a reader after aborting writer:
1306: reader = IndexReader.open(dir);
1307:
1308: // Reader should still see index as unoptimized:
1309: assertTrue(
1310: "Reader incorrectly sees that the index is unoptimized",
1311: reader.isOptimized());
1312: reader.close();
1313: }
1314:
1315: public void testIndexNoDocuments() throws IOException {
1316: RAMDirectory dir = new RAMDirectory();
1317: IndexWriter writer = new IndexWriter(dir,
1318: new WhitespaceAnalyzer(), true);
1319: writer.flush();
1320: writer.close();
1321:
1322: IndexReader reader = IndexReader.open(dir);
1323: assertEquals(0, reader.maxDoc());
1324: assertEquals(0, reader.numDocs());
1325: reader.close();
1326:
1327: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
1328: writer.flush();
1329: writer.close();
1330:
1331: reader = IndexReader.open(dir);
1332: assertEquals(0, reader.maxDoc());
1333: assertEquals(0, reader.numDocs());
1334: reader.close();
1335: }
1336:
1337: public void testManyFields() throws IOException {
1338: RAMDirectory dir = new RAMDirectory();
1339: IndexWriter writer = new IndexWriter(dir,
1340: new WhitespaceAnalyzer(), true);
1341: writer.setMaxBufferedDocs(10);
1342: for (int j = 0; j < 100; j++) {
1343: Document doc = new Document();
1344: doc.add(new Field("a" + j, "aaa" + j, Field.Store.YES,
1345: Field.Index.TOKENIZED));
1346: doc.add(new Field("b" + j, "aaa" + j, Field.Store.YES,
1347: Field.Index.TOKENIZED));
1348: doc.add(new Field("c" + j, "aaa" + j, Field.Store.YES,
1349: Field.Index.TOKENIZED));
1350: doc.add(new Field("d" + j, "aaa", Field.Store.YES,
1351: Field.Index.TOKENIZED));
1352: doc.add(new Field("e" + j, "aaa", Field.Store.YES,
1353: Field.Index.TOKENIZED));
1354: doc.add(new Field("f" + j, "aaa", Field.Store.YES,
1355: Field.Index.TOKENIZED));
1356: writer.addDocument(doc);
1357: }
1358: writer.close();
1359:
1360: IndexReader reader = IndexReader.open(dir);
1361: assertEquals(100, reader.maxDoc());
1362: assertEquals(100, reader.numDocs());
1363: for (int j = 0; j < 100; j++) {
1364: assertEquals(1, reader
1365: .docFreq(new Term("a" + j, "aaa" + j)));
1366: assertEquals(1, reader
1367: .docFreq(new Term("b" + j, "aaa" + j)));
1368: assertEquals(1, reader
1369: .docFreq(new Term("c" + j, "aaa" + j)));
1370: assertEquals(1, reader.docFreq(new Term("d" + j, "aaa")));
1371: assertEquals(1, reader.docFreq(new Term("e" + j, "aaa")));
1372: assertEquals(1, reader.docFreq(new Term("f" + j, "aaa")));
1373: }
1374: reader.close();
1375: dir.close();
1376: }
1377:
1378: public void testSmallRAMBuffer() throws IOException {
1379: RAMDirectory dir = new RAMDirectory();
1380: IndexWriter writer = new IndexWriter(dir,
1381: new WhitespaceAnalyzer(), true);
1382: writer.setRAMBufferSizeMB(0.000001);
1383: int lastNumFile = dir.list().length;
1384: for (int j = 0; j < 9; j++) {
1385: Document doc = new Document();
1386: doc.add(new Field("field", "aaa" + j, Field.Store.YES,
1387: Field.Index.TOKENIZED));
1388: writer.addDocument(doc);
1389: int numFile = dir.list().length;
1390: // Verify that with a tiny RAM buffer we see new
1391: // segment after every doc
1392: assertTrue(numFile > lastNumFile);
1393: lastNumFile = numFile;
1394: }
1395: writer.close();
1396: dir.close();
1397: }
1398:
1399: // Make sure it's OK to change RAM buffer size and
1400: // maxBufferedDocs in a write session
1401: public void testChangingRAMBuffer() throws IOException {
1402: RAMDirectory dir = new RAMDirectory();
1403: IndexWriter writer = new IndexWriter(dir,
1404: new WhitespaceAnalyzer(), true);
1405: writer.setMaxBufferedDocs(10);
1406: writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
1407:
1408: long lastGen = -1;
1409: for (int j = 1; j < 52; j++) {
1410: Document doc = new Document();
1411: doc.add(new Field("field", "aaa" + j, Field.Store.YES,
1412: Field.Index.TOKENIZED));
1413: writer.addDocument(doc);
1414: _TestUtil.syncConcurrentMerges(writer);
1415: long gen = SegmentInfos
1416: .generationFromSegmentsFileName(SegmentInfos
1417: .getCurrentSegmentFileName(dir.list()));
1418: if (j == 1)
1419: lastGen = gen;
1420: else if (j < 10)
1421: // No new files should be created
1422: assertEquals(gen, lastGen);
1423: else if (10 == j) {
1424: assertTrue(gen > lastGen);
1425: lastGen = gen;
1426: writer.setRAMBufferSizeMB(0.000001);
1427: writer
1428: .setMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH);
1429: } else if (j < 20) {
1430: assertTrue(gen > lastGen);
1431: lastGen = gen;
1432: } else if (20 == j) {
1433: writer.setRAMBufferSizeMB(16);
1434: writer
1435: .setMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH);
1436: lastGen = gen;
1437: } else if (j < 30) {
1438: assertEquals(gen, lastGen);
1439: } else if (30 == j) {
1440: writer.setRAMBufferSizeMB(0.000001);
1441: writer
1442: .setMaxBufferedDocs(IndexWriter.DISABLE_AUTO_FLUSH);
1443: } else if (j < 40) {
1444: assertTrue(gen > lastGen);
1445: lastGen = gen;
1446: } else if (40 == j) {
1447: writer.setMaxBufferedDocs(10);
1448: writer
1449: .setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
1450: lastGen = gen;
1451: } else if (j < 50) {
1452: assertEquals(gen, lastGen);
1453: writer.setMaxBufferedDocs(10);
1454: writer
1455: .setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
1456: } else if (50 == j) {
1457: assertTrue(gen > lastGen);
1458: }
1459: }
1460: writer.close();
1461: dir.close();
1462: }
1463:
1464: public void testChangingRAMBuffer2() throws IOException {
1465: RAMDirectory dir = new RAMDirectory();
1466: IndexWriter writer = new IndexWriter(dir,
1467: new WhitespaceAnalyzer(), true);
1468: writer.setMaxBufferedDocs(10);
1469: writer.setMaxBufferedDeleteTerms(10);
1470: writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
1471:
1472: for (int j = 1; j < 52; j++) {
1473: Document doc = new Document();
1474: doc.add(new Field("field", "aaa" + j, Field.Store.YES,
1475: Field.Index.TOKENIZED));
1476: writer.addDocument(doc);
1477: }
1478:
1479: long lastGen = -1;
1480: for (int j = 1; j < 52; j++) {
1481: writer.deleteDocuments(new Term("field", "aaa" + j));
1482: _TestUtil.syncConcurrentMerges(writer);
1483: long gen = SegmentInfos
1484: .generationFromSegmentsFileName(SegmentInfos
1485: .getCurrentSegmentFileName(dir.list()));
1486: if (j == 1)
1487: lastGen = gen;
1488: else if (j < 10) {
1489: // No new files should be created
1490: assertEquals(gen, lastGen);
1491: } else if (10 == j) {
1492: assertTrue(gen > lastGen);
1493: lastGen = gen;
1494: writer.setRAMBufferSizeMB(0.000001);
1495: writer
1496: .setMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH);
1497: } else if (j < 20) {
1498: assertTrue(gen > lastGen);
1499: lastGen = gen;
1500: } else if (20 == j) {
1501: writer.setRAMBufferSizeMB(16);
1502: writer
1503: .setMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH);
1504: lastGen = gen;
1505: } else if (j < 30) {
1506: assertEquals(gen, lastGen);
1507: } else if (30 == j) {
1508: writer.setRAMBufferSizeMB(0.000001);
1509: writer
1510: .setMaxBufferedDeleteTerms(IndexWriter.DISABLE_AUTO_FLUSH);
1511: } else if (j < 40) {
1512: assertTrue(gen > lastGen);
1513: lastGen = gen;
1514: } else if (40 == j) {
1515: writer.setMaxBufferedDeleteTerms(10);
1516: writer
1517: .setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
1518: lastGen = gen;
1519: } else if (j < 50) {
1520: assertEquals(gen, lastGen);
1521: writer.setMaxBufferedDeleteTerms(10);
1522: writer
1523: .setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
1524: } else if (50 == j) {
1525: assertTrue(gen > lastGen);
1526: }
1527: }
1528: writer.close();
1529: dir.close();
1530: }
1531:
1532: public void testDiverseDocs() throws IOException {
1533: RAMDirectory dir = new RAMDirectory();
1534: IndexWriter writer = new IndexWriter(dir,
1535: new WhitespaceAnalyzer(), true);
1536: writer.setRAMBufferSizeMB(0.5);
1537: Random rand = new Random(31415);
1538: for (int i = 0; i < 3; i++) {
1539: // First, docs where every term is unique (heavy on
1540: // Posting instances)
1541: for (int j = 0; j < 100; j++) {
1542: Document doc = new Document();
1543: for (int k = 0; k < 100; k++) {
1544: doc.add(new Field("field", Integer.toString(rand
1545: .nextInt()), Field.Store.YES,
1546: Field.Index.TOKENIZED));
1547: }
1548: writer.addDocument(doc);
1549: }
1550:
1551: // Next, many single term docs where only one term
1552: // occurs (heavy on byte blocks)
1553: for (int j = 0; j < 100; j++) {
1554: Document doc = new Document();
1555: doc.add(new Field("field",
1556: "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa",
1557: Field.Store.YES, Field.Index.TOKENIZED));
1558: writer.addDocument(doc);
1559: }
1560:
1561: // Next, many single term docs where only one term
1562: // occurs but the terms are very long (heavy on
1563: // char[] arrays)
1564: for (int j = 0; j < 100; j++) {
1565: StringBuffer b = new StringBuffer();
1566: String x = Integer.toString(j) + ".";
1567: for (int k = 0; k < 1000; k++)
1568: b.append(x);
1569: String longTerm = b.toString();
1570:
1571: Document doc = new Document();
1572: doc.add(new Field("field", longTerm, Field.Store.YES,
1573: Field.Index.TOKENIZED));
1574: writer.addDocument(doc);
1575: }
1576: }
1577: writer.close();
1578:
1579: IndexSearcher searcher = new IndexSearcher(dir);
1580: Hits hits = searcher.search(new TermQuery(new Term("field",
1581: "aaa")));
1582: assertEquals(300, hits.length());
1583: searcher.close();
1584:
1585: dir.close();
1586: }
1587:
1588: public void testEnablingNorms() throws IOException {
1589: RAMDirectory dir = new RAMDirectory();
1590: IndexWriter writer = new IndexWriter(dir,
1591: new WhitespaceAnalyzer(), true);
1592: writer.setMaxBufferedDocs(10);
1593: // Enable norms for only 1 doc, pre flush
1594: for (int j = 0; j < 10; j++) {
1595: Document doc = new Document();
1596: Field f = new Field("field", "aaa", Field.Store.YES,
1597: Field.Index.TOKENIZED);
1598: if (j != 8) {
1599: f.setOmitNorms(true);
1600: }
1601: doc.add(f);
1602: writer.addDocument(doc);
1603: }
1604: writer.close();
1605:
1606: Term searchTerm = new Term("field", "aaa");
1607:
1608: IndexSearcher searcher = new IndexSearcher(dir);
1609: Hits hits = searcher.search(new TermQuery(searchTerm));
1610: assertEquals(10, hits.length());
1611: searcher.close();
1612:
1613: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
1614: writer.setMaxBufferedDocs(10);
1615: // Enable norms for only 1 doc, post flush
1616: for (int j = 0; j < 27; j++) {
1617: Document doc = new Document();
1618: Field f = new Field("field", "aaa", Field.Store.YES,
1619: Field.Index.TOKENIZED);
1620: if (j != 26) {
1621: f.setOmitNorms(true);
1622: }
1623: doc.add(f);
1624: writer.addDocument(doc);
1625: }
1626: writer.close();
1627: searcher = new IndexSearcher(dir);
1628: hits = searcher.search(new TermQuery(searchTerm));
1629: assertEquals(27, hits.length());
1630: searcher.close();
1631:
1632: IndexReader reader = IndexReader.open(dir);
1633: reader.close();
1634:
1635: dir.close();
1636: }
1637:
1638: public void testHighFreqTerm() throws IOException {
1639: RAMDirectory dir = new RAMDirectory();
1640: IndexWriter writer = new IndexWriter(dir,
1641: new WhitespaceAnalyzer(), true);
1642: writer.setRAMBufferSizeMB(0.01);
1643: writer.setMaxFieldLength(100000000);
1644: // Massive doc that has 128 K a's
1645: StringBuffer b = new StringBuffer(1024 * 1024);
1646: for (int i = 0; i < 4096; i++) {
1647: b.append(" a a a a a a a a");
1648: b.append(" a a a a a a a a");
1649: b.append(" a a a a a a a a");
1650: b.append(" a a a a a a a a");
1651: }
1652: Document doc = new Document();
1653: doc.add(new Field("field", b.toString(), Field.Store.YES,
1654: Field.Index.TOKENIZED,
1655: Field.TermVector.WITH_POSITIONS_OFFSETS));
1656: writer.addDocument(doc);
1657: writer.close();
1658:
1659: IndexReader reader = IndexReader.open(dir);
1660: assertEquals(1, reader.maxDoc());
1661: assertEquals(1, reader.numDocs());
1662: Term t = new Term("field", "a");
1663: assertEquals(1, reader.docFreq(t));
1664: TermDocs td = reader.termDocs(t);
1665: td.next();
1666: assertEquals(128 * 1024, td.freq());
1667: reader.close();
1668: dir.close();
1669: }
1670:
1671: // Make sure that a Directory implementation that does
1672: // not use LockFactory at all (ie overrides makeLock and
1673: // implements its own private locking) works OK. This
1674: // was raised on java-dev as loss of backwards
1675: // compatibility.
1676: public void testNullLockFactory() throws IOException {
1677:
1678: final class MyRAMDirectory extends RAMDirectory {
1679: private LockFactory myLockFactory;
1680:
1681: MyRAMDirectory() {
1682: lockFactory = null;
1683: myLockFactory = new SingleInstanceLockFactory();
1684: }
1685:
1686: public Lock makeLock(String name) {
1687: return myLockFactory.makeLock(name);
1688: }
1689: }
1690:
1691: Directory dir = new MyRAMDirectory();
1692: IndexWriter writer = new IndexWriter(dir,
1693: new WhitespaceAnalyzer(), true);
1694: for (int i = 0; i < 100; i++) {
1695: addDoc(writer);
1696: }
1697: writer.close();
1698: Term searchTerm = new Term("content", "aaa");
1699: IndexSearcher searcher = new IndexSearcher(dir);
1700: Hits hits = searcher.search(new TermQuery(searchTerm));
1701: assertEquals("did not get right number of hits", 100, hits
1702: .length());
1703: writer.close();
1704:
1705: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
1706: writer.close();
1707:
1708: dir.close();
1709: }
1710:
1711: public void testFlushWithNoMerging() throws IOException {
1712: Directory dir = new RAMDirectory();
1713: IndexWriter writer = new IndexWriter(dir,
1714: new WhitespaceAnalyzer(), true);
1715: writer.setMaxBufferedDocs(2);
1716: Document doc = new Document();
1717: doc.add(new Field("field", "aaa", Field.Store.YES,
1718: Field.Index.TOKENIZED,
1719: Field.TermVector.WITH_POSITIONS_OFFSETS));
1720: for (int i = 0; i < 19; i++)
1721: writer.addDocument(doc);
1722: writer.flush(false, true);
1723: writer.close();
1724: SegmentInfos sis = new SegmentInfos();
1725: sis.read(dir);
1726: // Since we flushed w/o allowing merging we should now
1727: // have 10 segments
1728: assert sis.size() == 10;
1729: }
1730:
1731: // Make sure we can flush segment w/ norms, then add
1732: // empty doc (no norms) and flush
1733: public void testEmptyDocAfterFlushingRealDoc() throws IOException {
1734: Directory dir = new RAMDirectory();
1735: IndexWriter writer = new IndexWriter(dir,
1736: new WhitespaceAnalyzer(), true);
1737: Document doc = new Document();
1738: doc.add(new Field("field", "aaa", Field.Store.YES,
1739: Field.Index.TOKENIZED,
1740: Field.TermVector.WITH_POSITIONS_OFFSETS));
1741: writer.addDocument(doc);
1742: writer.flush();
1743: writer.addDocument(new Document());
1744: writer.close();
1745: IndexReader reader = IndexReader.open(dir);
1746: assertEquals(2, reader.numDocs());
1747: }
1748:
1749: // Test calling optimize(false) whereby optimize is kicked
1750: // off but we don't wait for it to finish (but
1751: // writer.close()) does wait
1752: public void testBackgroundOptimize() throws IOException {
1753:
1754: Directory dir = new MockRAMDirectory();
1755: for (int pass = 0; pass < 2; pass++) {
1756: IndexWriter writer = new IndexWriter(dir,
1757: new WhitespaceAnalyzer(), true);
1758: writer.setMergeScheduler(new ConcurrentMergeScheduler());
1759: Document doc = new Document();
1760: doc.add(new Field("field", "aaa", Field.Store.YES,
1761: Field.Index.TOKENIZED,
1762: Field.TermVector.WITH_POSITIONS_OFFSETS));
1763: writer.setMaxBufferedDocs(2);
1764: writer.setMergeFactor(101);
1765: for (int i = 0; i < 200; i++)
1766: writer.addDocument(doc);
1767: writer.optimize(false);
1768:
1769: if (0 == pass) {
1770: writer.close();
1771: IndexReader reader = IndexReader.open(dir);
1772: assertTrue(reader.isOptimized());
1773: reader.close();
1774: } else {
1775: // Get another segment to flush so we can verify it is
1776: // NOT included in the optimization
1777: writer.addDocument(doc);
1778: writer.addDocument(doc);
1779: writer.close();
1780:
1781: IndexReader reader = IndexReader.open(dir);
1782: assertTrue(!reader.isOptimized());
1783: reader.close();
1784:
1785: SegmentInfos infos = new SegmentInfos();
1786: infos.read(dir);
1787: assertEquals(2, infos.size());
1788: }
1789: }
1790:
1791: dir.close();
1792: }
1793:
1794: private void rmDir(File dir) {
1795: File[] files = dir.listFiles();
1796: if (files != null) {
1797: for (int i = 0; i < files.length; i++) {
1798: files[i].delete();
1799: }
1800: }
1801: dir.delete();
1802: }
1803:
1804: /**
1805: * Test that no NullPointerException will be raised,
1806: * when adding one document with a single, empty field
1807: * and term vectors enabled.
1808: * @throws IOException
1809: *
1810: */
1811: public void testBadSegment() throws IOException {
1812: MockRAMDirectory dir = new MockRAMDirectory();
1813: IndexWriter ir = new IndexWriter(dir, new StandardAnalyzer(),
1814: true);
1815:
1816: Document document = new Document();
1817: document.add(new Field("tvtest", "", Field.Store.NO,
1818: Field.Index.TOKENIZED, Field.TermVector.YES));
1819: ir.addDocument(document);
1820: ir.close();
1821: dir.close();
1822: }
1823:
1824: // LUCENE-1008
1825: public void testNoTermVectorAfterTermVector() throws IOException {
1826: MockRAMDirectory dir = new MockRAMDirectory();
1827: IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(),
1828: true);
1829: Document document = new Document();
1830: document.add(new Field("tvtest", "a b c", Field.Store.NO,
1831: Field.Index.TOKENIZED, Field.TermVector.YES));
1832: iw.addDocument(document);
1833: document = new Document();
1834: document.add(new Field("tvtest", "x y z", Field.Store.NO,
1835: Field.Index.TOKENIZED, Field.TermVector.NO));
1836: iw.addDocument(document);
1837: // Make first segment
1838: iw.flush();
1839:
1840: document.add(new Field("tvtest", "a b c", Field.Store.NO,
1841: Field.Index.TOKENIZED, Field.TermVector.YES));
1842: iw.addDocument(document);
1843: // Make 2nd segment
1844: iw.flush();
1845:
1846: iw.optimize();
1847: iw.close();
1848: dir.close();
1849: }
1850:
1851: // LUCENE-1010
1852: public void testNoTermVectorAfterTermVectorMerge()
1853: throws IOException {
1854: MockRAMDirectory dir = new MockRAMDirectory();
1855: IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(),
1856: true);
1857: Document document = new Document();
1858: document.add(new Field("tvtest", "a b c", Field.Store.NO,
1859: Field.Index.TOKENIZED, Field.TermVector.YES));
1860: iw.addDocument(document);
1861: iw.flush();
1862:
1863: document = new Document();
1864: document.add(new Field("tvtest", "x y z", Field.Store.NO,
1865: Field.Index.TOKENIZED, Field.TermVector.NO));
1866: iw.addDocument(document);
1867: // Make first segment
1868: iw.flush();
1869:
1870: iw.optimize();
1871:
1872: document.add(new Field("tvtest", "a b c", Field.Store.NO,
1873: Field.Index.TOKENIZED, Field.TermVector.YES));
1874: iw.addDocument(document);
1875: // Make 2nd segment
1876: iw.flush();
1877: iw.optimize();
1878:
1879: iw.close();
1880: dir.close();
1881: }
1882:
1883: // LUCENE-1036
1884: public void testMaxThreadPriority() throws IOException {
1885: int pri = Thread.currentThread().getPriority();
1886: try {
1887: MockRAMDirectory dir = new MockRAMDirectory();
1888: IndexWriter iw = new IndexWriter(dir,
1889: new StandardAnalyzer(), true);
1890: Document document = new Document();
1891: document.add(new Field("tvtest", "a b c", Field.Store.NO,
1892: Field.Index.TOKENIZED, Field.TermVector.YES));
1893: iw.setMaxBufferedDocs(2);
1894: iw.setMergeFactor(2);
1895: Thread.currentThread().setPriority(Thread.MAX_PRIORITY);
1896: for (int i = 0; i < 4; i++)
1897: iw.addDocument(document);
1898: iw.close();
1899:
1900: } finally {
1901: Thread.currentThread().setPriority(pri);
1902: }
1903: }
1904:
1905: // Just intercepts all merges & verifies that we are never
1906: // merging a segment with >= 20 (maxMergeDocs) docs
1907: private class MyMergeScheduler extends MergeScheduler {
1908: synchronized public void merge(IndexWriter writer)
1909: throws CorruptIndexException, IOException {
1910:
1911: while (true) {
1912: MergePolicy.OneMerge merge = writer.getNextMerge();
1913: if (merge == null)
1914: break;
1915: for (int i = 0; i < merge.segments.size(); i++)
1916: assert merge.segments.info(i).docCount < 20;
1917: writer.merge(merge);
1918: }
1919: }
1920:
1921: public void close() {
1922: }
1923: }
1924:
1925: // LUCENE-1013
1926: public void testSetMaxMergeDocs() throws IOException {
1927: MockRAMDirectory dir = new MockRAMDirectory();
1928: IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(),
1929: true);
1930: iw.setMergeScheduler(new MyMergeScheduler());
1931: iw.setMaxMergeDocs(20);
1932: iw.setMaxBufferedDocs(2);
1933: iw.setMergeFactor(2);
1934: Document document = new Document();
1935: document.add(new Field("tvtest", "a b c", Field.Store.NO,
1936: Field.Index.TOKENIZED, Field.TermVector.YES));
1937: for (int i = 0; i < 177; i++)
1938: iw.addDocument(document);
1939: iw.close();
1940: }
1941:
1942: // LUCENE-1072
1943: public void testExceptionFromTokenStream() throws IOException {
1944: RAMDirectory dir = new MockRAMDirectory();
1945: IndexWriter writer = new IndexWriter(dir, new Analyzer() {
1946:
1947: public TokenStream tokenStream(String fieldName,
1948: Reader reader) {
1949: return new TokenFilter(new StandardTokenizer(reader)) {
1950: private int count = 0;
1951:
1952: public Token next() throws IOException {
1953: if (count++ == 5) {
1954: throw new IOException();
1955: }
1956: return input.next();
1957: }
1958: };
1959: }
1960:
1961: }, true);
1962:
1963: Document doc = new Document();
1964: String contents = "aa bb cc dd ee ff gg hh ii jj kk";
1965: doc.add(new Field("content", contents, Field.Store.NO,
1966: Field.Index.TOKENIZED));
1967: try {
1968: writer.addDocument(doc);
1969: fail("did not hit expected exception");
1970: } catch (Exception e) {
1971: }
1972:
1973: // Make sure we can add another normal document
1974: doc = new Document();
1975: doc.add(new Field("content", "aa bb cc dd", Field.Store.NO,
1976: Field.Index.TOKENIZED));
1977: writer.addDocument(doc);
1978:
1979: // Make sure we can add another normal document
1980: doc = new Document();
1981: doc.add(new Field("content", "aa bb cc dd", Field.Store.NO,
1982: Field.Index.TOKENIZED));
1983: writer.addDocument(doc);
1984:
1985: writer.close();
1986: IndexReader reader = IndexReader.open(dir);
1987: final Term t = new Term("content", "aa");
1988: assertEquals(reader.docFreq(t), 3);
1989:
1990: // Make sure the doc that hit the exception was marked
1991: // as deleted:
1992: TermDocs tdocs = reader.termDocs(t);
1993: int count = 0;
1994: while (tdocs.next()) {
1995: count++;
1996: }
1997: assertEquals(2, count);
1998:
1999: assertEquals(reader.docFreq(new Term("content", "gg")), 0);
2000: reader.close();
2001: dir.close();
2002: }
2003:
2004: private static class FailOnlyOnFlush extends
2005: MockRAMDirectory.Failure {
2006: boolean doFail = false;
2007: int count;
2008:
2009: public void setDoFail() {
2010: this .doFail = true;
2011: }
2012:
2013: public void clearDoFail() {
2014: this .doFail = false;
2015: }
2016:
2017: public void eval(MockRAMDirectory dir) throws IOException {
2018: if (doFail) {
2019: StackTraceElement[] trace = new Exception()
2020: .getStackTrace();
2021: for (int i = 0; i < trace.length; i++) {
2022: if ("org.apache.lucene.index.DocumentsWriter"
2023: .equals(trace[i].getClassName())
2024: && "appendPostings".equals(trace[i]
2025: .getMethodName()) && count++ == 30) {
2026: doFail = false;
2027: throw new IOException(
2028: "now failing during flush");
2029: }
2030: }
2031: }
2032: }
2033: }
2034:
2035: // LUCENE-1072: make sure an errant exception on flushing
2036: // one segment only takes out those docs in that one flush
2037: public void testDocumentsWriterAbort() throws IOException {
2038: MockRAMDirectory dir = new MockRAMDirectory();
2039: FailOnlyOnFlush failure = new FailOnlyOnFlush();
2040: failure.setDoFail();
2041: dir.failOn(failure);
2042:
2043: IndexWriter writer = new IndexWriter(dir,
2044: new WhitespaceAnalyzer());
2045: writer.setMaxBufferedDocs(2);
2046: Document doc = new Document();
2047: String contents = "aa bb cc dd ee ff gg hh ii jj kk";
2048: doc.add(new Field("content", contents, Field.Store.NO,
2049: Field.Index.TOKENIZED));
2050: boolean hitError = false;
2051: for (int i = 0; i < 200; i++) {
2052: try {
2053: writer.addDocument(doc);
2054: } catch (IOException ioe) {
2055: // only one flush should fail:
2056: assertFalse(hitError);
2057: hitError = true;
2058: }
2059: }
2060: assertTrue(hitError);
2061: writer.close();
2062: IndexReader reader = IndexReader.open(dir);
2063: assertEquals(198, reader.docFreq(new Term("content", "aa")));
2064: reader.close();
2065: }
2066:
2067: private class CrashingFilter extends TokenFilter {
2068: String fieldName;
2069: int count;
2070:
2071: public CrashingFilter(String fieldName, TokenStream input) {
2072: super (input);
2073: this .fieldName = fieldName;
2074: }
2075:
2076: public Token next(Token result) throws IOException {
2077: if (this .fieldName.equals("crash") && count++ >= 4)
2078: throw new IOException("I'm experiencing problems");
2079: return input.next(result);
2080: }
2081:
2082: public void reset() throws IOException {
2083: super .reset();
2084: count = 0;
2085: }
2086: }
2087:
2088: public void testDocumentsWriterExceptions() throws IOException {
2089: Analyzer analyzer = new Analyzer() {
2090: public TokenStream tokenStream(String fieldName,
2091: Reader reader) {
2092: return new CrashingFilter(fieldName,
2093: new WhitespaceTokenizer(reader));
2094: }
2095: };
2096:
2097: for (int i = 0; i < 2; i++) {
2098: MockRAMDirectory dir = new MockRAMDirectory();
2099: IndexWriter writer = new IndexWriter(dir, analyzer);
2100: //writer.setInfoStream(System.out);
2101: Document doc = new Document();
2102: doc.add(new Field("contents", "here are some contents",
2103: Field.Store.YES, Field.Index.TOKENIZED,
2104: Field.TermVector.WITH_POSITIONS_OFFSETS));
2105: writer.addDocument(doc);
2106: writer.addDocument(doc);
2107: doc.add(new Field("crash",
2108: "this should crash after 4 terms", Field.Store.YES,
2109: Field.Index.TOKENIZED,
2110: Field.TermVector.WITH_POSITIONS_OFFSETS));
2111: doc.add(new Field("other", "this will not get indexed",
2112: Field.Store.YES, Field.Index.TOKENIZED,
2113: Field.TermVector.WITH_POSITIONS_OFFSETS));
2114: try {
2115: writer.addDocument(doc);
2116: fail("did not hit expected exception");
2117: } catch (IOException ioe) {
2118: }
2119:
2120: if (0 == i) {
2121: doc = new Document();
2122: doc.add(new Field("contents", "here are some contents",
2123: Field.Store.YES, Field.Index.TOKENIZED,
2124: Field.TermVector.WITH_POSITIONS_OFFSETS));
2125: writer.addDocument(doc);
2126: writer.addDocument(doc);
2127: }
2128: writer.close();
2129:
2130: IndexReader reader = IndexReader.open(dir);
2131: int expected = 3 + (1 - i) * 2;
2132: assertEquals(expected, reader.docFreq(new Term("contents",
2133: "here")));
2134: assertEquals(expected, reader.maxDoc());
2135: int numDel = 0;
2136: for (int j = 0; j < reader.maxDoc(); j++) {
2137: if (reader.isDeleted(j))
2138: numDel++;
2139: else
2140: reader.document(j);
2141: reader.getTermFreqVectors(j);
2142: }
2143: reader.close();
2144:
2145: assertEquals(1, numDel);
2146:
2147: writer = new IndexWriter(dir, analyzer);
2148: writer.setMaxBufferedDocs(10);
2149: doc = new Document();
2150: doc.add(new Field("contents", "here are some contents",
2151: Field.Store.YES, Field.Index.TOKENIZED,
2152: Field.TermVector.WITH_POSITIONS_OFFSETS));
2153: for (int j = 0; j < 17; j++)
2154: writer.addDocument(doc);
2155: writer.optimize();
2156: writer.close();
2157:
2158: reader = IndexReader.open(dir);
2159: expected = 19 + (1 - i) * 2;
2160: assertEquals(expected, reader.docFreq(new Term("contents",
2161: "here")));
2162: assertEquals(expected, reader.maxDoc());
2163: numDel = 0;
2164: for (int j = 0; j < reader.maxDoc(); j++) {
2165: if (reader.isDeleted(j))
2166: numDel++;
2167: else
2168: reader.document(j);
2169: reader.getTermFreqVectors(j);
2170: }
2171: reader.close();
2172: assertEquals(0, numDel);
2173:
2174: dir.close();
2175: }
2176: }
2177:
2178: public void testDocumentsWriterExceptionThreads()
2179: throws IOException {
2180: Analyzer analyzer = new Analyzer() {
2181: public TokenStream tokenStream(String fieldName,
2182: Reader reader) {
2183: return new CrashingFilter(fieldName,
2184: new WhitespaceTokenizer(reader));
2185: }
2186: };
2187:
2188: final int NUM_THREAD = 3;
2189: final int NUM_ITER = 100;
2190:
2191: for (int i = 0; i < 2; i++) {
2192: MockRAMDirectory dir = new MockRAMDirectory();
2193:
2194: {
2195: final IndexWriter writer = new IndexWriter(dir,
2196: analyzer);
2197:
2198: final int finalI = i;
2199:
2200: Thread[] threads = new Thread[NUM_THREAD];
2201: for (int t = 0; t < NUM_THREAD; t++) {
2202: threads[t] = new Thread() {
2203: public void run() {
2204: try {
2205: for (int iter = 0; iter < NUM_ITER; iter++) {
2206: Document doc = new Document();
2207: doc
2208: .add(new Field(
2209: "contents",
2210: "here are some contents",
2211: Field.Store.YES,
2212: Field.Index.TOKENIZED,
2213: Field.TermVector.WITH_POSITIONS_OFFSETS));
2214: writer.addDocument(doc);
2215: writer.addDocument(doc);
2216: doc
2217: .add(new Field(
2218: "crash",
2219: "this should crash after 4 terms",
2220: Field.Store.YES,
2221: Field.Index.TOKENIZED,
2222: Field.TermVector.WITH_POSITIONS_OFFSETS));
2223: doc
2224: .add(new Field(
2225: "other",
2226: "this will not get indexed",
2227: Field.Store.YES,
2228: Field.Index.TOKENIZED,
2229: Field.TermVector.WITH_POSITIONS_OFFSETS));
2230: try {
2231: writer.addDocument(doc);
2232: fail("did not hit expected exception");
2233: } catch (IOException ioe) {
2234: }
2235:
2236: if (0 == finalI) {
2237: doc = new Document();
2238: doc
2239: .add(new Field(
2240: "contents",
2241: "here are some contents",
2242: Field.Store.YES,
2243: Field.Index.TOKENIZED,
2244: Field.TermVector.WITH_POSITIONS_OFFSETS));
2245: writer.addDocument(doc);
2246: writer.addDocument(doc);
2247: }
2248: }
2249: } catch (Throwable t) {
2250: synchronized (this ) {
2251: System.out
2252: .println(Thread
2253: .currentThread()
2254: .getName()
2255: + ": ERROR: hit unexpected exception");
2256: t.printStackTrace(System.out);
2257: }
2258: fail();
2259: }
2260: }
2261: };
2262: threads[t].start();
2263: }
2264:
2265: for (int t = 0; t < NUM_THREAD; t++)
2266: while (true)
2267: try {
2268: threads[t].join();
2269: break;
2270: } catch (InterruptedException ie) {
2271: Thread.currentThread().interrupt();
2272: }
2273:
2274: writer.close();
2275: }
2276:
2277: IndexReader reader = IndexReader.open(dir);
2278: int expected = (3 + (1 - i) * 2) * NUM_THREAD * NUM_ITER;
2279: assertEquals(expected, reader.docFreq(new Term("contents",
2280: "here")));
2281: assertEquals(expected, reader.maxDoc());
2282: int numDel = 0;
2283: for (int j = 0; j < reader.maxDoc(); j++) {
2284: if (reader.isDeleted(j))
2285: numDel++;
2286: else
2287: reader.document(j);
2288: reader.getTermFreqVectors(j);
2289: }
2290: reader.close();
2291:
2292: assertEquals(NUM_THREAD * NUM_ITER, numDel);
2293:
2294: IndexWriter writer = new IndexWriter(dir, analyzer);
2295: writer.setMaxBufferedDocs(10);
2296: Document doc = new Document();
2297: doc.add(new Field("contents", "here are some contents",
2298: Field.Store.YES, Field.Index.TOKENIZED,
2299: Field.TermVector.WITH_POSITIONS_OFFSETS));
2300: for (int j = 0; j < 17; j++)
2301: writer.addDocument(doc);
2302: writer.optimize();
2303: writer.close();
2304:
2305: reader = IndexReader.open(dir);
2306: expected += 17 - NUM_THREAD * NUM_ITER;
2307: assertEquals(expected, reader.docFreq(new Term("contents",
2308: "here")));
2309: assertEquals(expected, reader.maxDoc());
2310: numDel = 0;
2311: for (int j = 0; j < reader.maxDoc(); j++) {
2312: if (reader.isDeleted(j))
2313: numDel++;
2314: else
2315: reader.document(j);
2316: reader.getTermFreqVectors(j);
2317: }
2318: reader.close();
2319: assertEquals(0, numDel);
2320:
2321: dir.close();
2322: }
2323: }
2324:
2325: public void testVariableSchema() throws IOException {
2326: MockRAMDirectory dir = new MockRAMDirectory();
2327: int delID = 0;
2328: for (int i = 0; i < 20; i++) {
2329: IndexWriter writer = new IndexWriter(dir, false,
2330: new WhitespaceAnalyzer());
2331: writer.setMaxBufferedDocs(2);
2332: writer.setMergeFactor(2);
2333: writer.setUseCompoundFile(false);
2334: Document doc = new Document();
2335: String contents = "aa bb cc dd ee ff gg hh ii jj kk";
2336:
2337: if (i == 7) {
2338: // Add empty docs here
2339: doc.add(new Field("content3", "", Field.Store.NO,
2340: Field.Index.TOKENIZED));
2341: } else {
2342: Field.Store storeVal;
2343: if (i % 2 == 0) {
2344: doc.add(new Field("content4", contents,
2345: Field.Store.YES, Field.Index.TOKENIZED));
2346: storeVal = Field.Store.YES;
2347: } else
2348: storeVal = Field.Store.NO;
2349: doc.add(new Field("content1", contents, storeVal,
2350: Field.Index.TOKENIZED));
2351: doc.add(new Field("content3", "", Field.Store.YES,
2352: Field.Index.TOKENIZED));
2353: doc.add(new Field("content5", "", storeVal,
2354: Field.Index.TOKENIZED));
2355: }
2356:
2357: for (int j = 0; j < 4; j++)
2358: writer.addDocument(doc);
2359:
2360: writer.close();
2361: IndexReader reader = IndexReader.open(dir);
2362: reader.deleteDocument(delID++);
2363: reader.close();
2364:
2365: if (0 == i % 4) {
2366: writer = new IndexWriter(dir, false,
2367: new WhitespaceAnalyzer());
2368: writer.setUseCompoundFile(false);
2369: writer.optimize();
2370: writer.close();
2371: }
2372: }
2373: }
2374:
2375: public void testNoWaitClose() throws Throwable {
2376: RAMDirectory directory = new MockRAMDirectory();
2377:
2378: final Document doc = new Document();
2379: Field idField = new Field("id", "", Field.Store.YES,
2380: Field.Index.UN_TOKENIZED);
2381: doc.add(idField);
2382:
2383: for (int pass = 0; pass < 3; pass++) {
2384: boolean autoCommit = pass % 2 == 0;
2385: IndexWriter writer = new IndexWriter(directory, autoCommit,
2386: new WhitespaceAnalyzer(), true);
2387:
2388: //System.out.println("TEST: pass=" + pass + " ac=" + autoCommit + " cms=" + (pass >= 2));
2389: for (int iter = 0; iter < 10; iter++) {
2390: //System.out.println("TEST: iter=" + iter);
2391: MergeScheduler ms;
2392: if (pass >= 2)
2393: ms = new ConcurrentMergeScheduler();
2394: else
2395: ms = new SerialMergeScheduler();
2396:
2397: writer.setMergeScheduler(ms);
2398: writer.setMaxBufferedDocs(2);
2399: writer.setMergeFactor(100);
2400:
2401: for (int j = 0; j < 199; j++) {
2402: idField.setValue(Integer.toString(iter * 201 + j));
2403: writer.addDocument(doc);
2404: }
2405:
2406: int delID = iter * 199;
2407: for (int j = 0; j < 20; j++) {
2408: writer.deleteDocuments(new Term("id", Integer
2409: .toString(delID)));
2410: delID += 5;
2411: }
2412:
2413: // Force a bunch of merge threads to kick off so we
2414: // stress out aborting them on close:
2415: writer.setMergeFactor(2);
2416:
2417: final IndexWriter finalWriter = writer;
2418: final ArrayList failure = new ArrayList();
2419: Thread t1 = new Thread() {
2420: public void run() {
2421: boolean done = false;
2422: while (!done) {
2423: for (int i = 0; i < 100; i++) {
2424: try {
2425: finalWriter.addDocument(doc);
2426: } catch (AlreadyClosedException e) {
2427: done = true;
2428: break;
2429: } catch (NullPointerException e) {
2430: done = true;
2431: break;
2432: } catch (Throwable e) {
2433: e.printStackTrace(System.out);
2434: failure.add(e);
2435: done = true;
2436: break;
2437: }
2438: }
2439: Thread.yield();
2440: }
2441:
2442: }
2443: };
2444:
2445: if (failure.size() > 0)
2446: throw (Throwable) failure.get(0);
2447:
2448: t1.start();
2449:
2450: writer.close(false);
2451: while (true) {
2452: try {
2453: t1.join();
2454: break;
2455: } catch (InterruptedException ie) {
2456: Thread.currentThread().interrupt();
2457: }
2458: }
2459:
2460: // Make sure reader can read
2461: IndexReader reader = IndexReader.open(directory);
2462: reader.close();
2463:
2464: // Reopen
2465: writer = new IndexWriter(directory, autoCommit,
2466: new WhitespaceAnalyzer(), false);
2467: }
2468: writer.close();
2469: }
2470:
2471: directory.close();
2472: }
2473:
2474: // Used by test cases below
2475: private class IndexerThread extends Thread {
2476:
2477: boolean diskFull;
2478: Throwable error;
2479: AlreadyClosedException ace;
2480: IndexWriter writer;
2481: boolean noErrors;
2482:
2483: public IndexerThread(IndexWriter writer, boolean noErrors) {
2484: this .writer = writer;
2485: this .noErrors = noErrors;
2486: }
2487:
2488: public void run() {
2489:
2490: final Document doc = new Document();
2491: doc.add(new Field("field",
2492: "aaa bbb ccc ddd eee fff ggg hhh iii jjj",
2493: Field.Store.YES, Field.Index.TOKENIZED,
2494: Field.TermVector.WITH_POSITIONS_OFFSETS));
2495:
2496: int idUpto = 0;
2497: int fullCount = 0;
2498: final long stopTime = System.currentTimeMillis() + 500;
2499:
2500: while (System.currentTimeMillis() < stopTime) {
2501: try {
2502: writer.updateDocument(new Term("id", ""
2503: + (idUpto++)), doc);
2504: } catch (IOException ioe) {
2505: if (ioe.getMessage()
2506: .startsWith("fake disk full at")
2507: || ioe.getMessage().equals(
2508: "now failing on purpose")) {
2509: diskFull = true;
2510: try {
2511: Thread.sleep(1);
2512: } catch (InterruptedException ie) {
2513: Thread.currentThread().interrupt();
2514: }
2515: if (fullCount++ >= 5)
2516: break;
2517: } else {
2518: if (noErrors) {
2519: System.out
2520: .println(Thread.currentThread()
2521: .getName()
2522: + ": ERROR: unexpected IOException:");
2523: ioe.printStackTrace(System.out);
2524: error = ioe;
2525: }
2526: break;
2527: }
2528: } catch (Throwable t) {
2529: if (noErrors) {
2530: System.out.println(Thread.currentThread()
2531: .getName()
2532: + ": ERROR: unexpected Throwable:");
2533: t.printStackTrace(System.out);
2534: error = t;
2535: }
2536: break;
2537: }
2538: }
2539: }
2540: }
2541:
2542: // LUCENE-1130: make sure we can close() even while
2543: // threads are trying to add documents. Strictly
2544: // speaking, this isn't valid us of Lucene's APIs, but we
2545: // still want to be robust to this case:
2546: public void testCloseWithThreads() throws IOException {
2547: int NUM_THREADS = 3;
2548:
2549: for (int iter = 0; iter < 50; iter++) {
2550: MockRAMDirectory dir = new MockRAMDirectory();
2551: IndexWriter writer = new IndexWriter(dir,
2552: new WhitespaceAnalyzer());
2553: ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
2554:
2555: writer.setMergeScheduler(cms);
2556: writer.setMaxBufferedDocs(10);
2557: writer.setMergeFactor(4);
2558:
2559: IndexerThread[] threads = new IndexerThread[NUM_THREADS];
2560: boolean diskFull = false;
2561:
2562: for (int i = 0; i < NUM_THREADS; i++)
2563: threads[i] = new IndexerThread(writer, false);
2564:
2565: for (int i = 0; i < NUM_THREADS; i++)
2566: threads[i].start();
2567:
2568: try {
2569: Thread.sleep(50);
2570: } catch (InterruptedException ie) {
2571: Thread.currentThread().interrupt();
2572: }
2573:
2574: writer.close(false);
2575:
2576: // Make sure threads that are adding docs are not hung:
2577: for (int i = 0; i < NUM_THREADS; i++) {
2578: while (true) {
2579: try {
2580: // Without fix for LUCENE-1130: one of the
2581: // threads will hang
2582: threads[i].join();
2583: break;
2584: } catch (InterruptedException ie) {
2585: Thread.currentThread().interrupt();
2586: }
2587: }
2588: if (threads[i].isAlive())
2589: fail("thread seems to be hung");
2590: }
2591:
2592: // Quick test to make sure index is not corrupt:
2593: IndexReader reader = IndexReader.open(dir);
2594: TermDocs tdocs = reader.termDocs(new Term("field", "aaa"));
2595: int count = 0;
2596: while (tdocs.next()) {
2597: count++;
2598: }
2599: assertTrue(count > 0);
2600: reader.close();
2601:
2602: dir.close();
2603: }
2604: }
2605:
2606: // LUCENE-1130: make sure immeidate disk full on creating
2607: // an IndexWriter (hit during DW.ThreadState.init()) is
2608: // OK:
2609: public void testImmediateDiskFull() throws IOException {
2610: MockRAMDirectory dir = new MockRAMDirectory();
2611: IndexWriter writer = new IndexWriter(dir,
2612: new WhitespaceAnalyzer());
2613: dir.setMaxSizeInBytes(dir.getRecomputedActualSizeInBytes());
2614: writer.setMaxBufferedDocs(2);
2615: final Document doc = new Document();
2616: doc.add(new Field("field",
2617: "aaa bbb ccc ddd eee fff ggg hhh iii jjj",
2618: Field.Store.YES, Field.Index.TOKENIZED,
2619: Field.TermVector.WITH_POSITIONS_OFFSETS));
2620: try {
2621: writer.addDocument(doc);
2622: fail("did not hit disk full");
2623: } catch (IOException ioe) {
2624: }
2625: // Without fix for LUCENE-1130: this call will hang:
2626: try {
2627: writer.addDocument(doc);
2628: fail("did not hit disk full");
2629: } catch (IOException ioe) {
2630: }
2631: try {
2632: writer.close(false);
2633: fail("did not hit disk full");
2634: } catch (IOException ioe) {
2635: }
2636: }
2637:
2638: // LUCENE-1130: make sure immeidate disk full on creating
2639: // an IndexWriter (hit during DW.ThreadState.init()), with
2640: // multiple threads, is OK:
2641: public void testImmediateDiskFullWithThreads() throws IOException {
2642:
2643: int NUM_THREADS = 3;
2644:
2645: for (int iter = 0; iter < 10; iter++) {
2646: MockRAMDirectory dir = new MockRAMDirectory();
2647: IndexWriter writer = new IndexWriter(dir,
2648: new WhitespaceAnalyzer());
2649: ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
2650: // We expect disk full exceptions in the merge threads
2651: cms.setSuppressExceptions();
2652: writer.setMergeScheduler(cms);
2653: writer.setMaxBufferedDocs(2);
2654: writer.setMergeFactor(4);
2655: dir.setMaxSizeInBytes(4 * 1024 + 20 * iter);
2656:
2657: IndexerThread[] threads = new IndexerThread[NUM_THREADS];
2658: boolean diskFull = false;
2659:
2660: for (int i = 0; i < NUM_THREADS; i++)
2661: threads[i] = new IndexerThread(writer, true);
2662:
2663: for (int i = 0; i < NUM_THREADS; i++)
2664: threads[i].start();
2665:
2666: for (int i = 0; i < NUM_THREADS; i++) {
2667: while (true) {
2668: try {
2669: // Without fix for LUCENE-1130: one of the
2670: // threads will hang
2671: threads[i].join();
2672: break;
2673: } catch (InterruptedException ie) {
2674: Thread.currentThread().interrupt();
2675: }
2676: }
2677: if (threads[i].isAlive())
2678: fail("thread seems to be hung");
2679: else
2680: assertTrue("hit unexpected Throwable",
2681: threads[i].error == null);
2682: }
2683:
2684: try {
2685: writer.close(false);
2686: } catch (IOException ioe) {
2687: }
2688:
2689: dir.close();
2690: }
2691: }
2692:
2693: // Throws IOException during FieldsWriter.flushDocument and during DocumentsWriter.abort
2694: private static class FailOnlyOnAbortOrFlush extends
2695: MockRAMDirectory.Failure {
2696: private boolean onlyOnce;
2697:
2698: public FailOnlyOnAbortOrFlush(boolean onlyOnce) {
2699: this .onlyOnce = true;
2700: }
2701:
2702: public void eval(MockRAMDirectory dir) throws IOException {
2703: if (doFail) {
2704: StackTraceElement[] trace = new Exception()
2705: .getStackTrace();
2706: for (int i = 0; i < trace.length; i++) {
2707: if ("abort".equals(trace[i].getMethodName())
2708: || "flushDocument".equals(trace[i]
2709: .getMethodName())) {
2710: if (onlyOnce)
2711: doFail = false;
2712: throw new IOException("now failing on purpose");
2713: }
2714: }
2715: }
2716: }
2717: }
2718:
2719: // Runs test, with one thread, using the specific failure
2720: // to trigger an IOException
2721: public void _testSingleThreadFailure(
2722: MockRAMDirectory.Failure failure) throws IOException {
2723: MockRAMDirectory dir = new MockRAMDirectory();
2724:
2725: IndexWriter writer = new IndexWriter(dir,
2726: new WhitespaceAnalyzer());
2727: writer.setMaxBufferedDocs(2);
2728: final Document doc = new Document();
2729: doc.add(new Field("field",
2730: "aaa bbb ccc ddd eee fff ggg hhh iii jjj",
2731: Field.Store.YES, Field.Index.TOKENIZED,
2732: Field.TermVector.WITH_POSITIONS_OFFSETS));
2733:
2734: for (int i = 0; i < 6; i++)
2735: writer.addDocument(doc);
2736:
2737: dir.failOn(failure);
2738: failure.setDoFail();
2739: try {
2740: writer.addDocument(doc);
2741: writer.addDocument(doc);
2742: fail("did not hit exception");
2743: } catch (IOException ioe) {
2744: }
2745: failure.clearDoFail();
2746: writer.addDocument(doc);
2747: writer.close(false);
2748: }
2749:
2750: // Runs test, with multiple threads, using the specific
2751: // failure to trigger an IOException
2752: public void _testMultipleThreadsFailure(
2753: MockRAMDirectory.Failure failure) throws IOException {
2754:
2755: int NUM_THREADS = 3;
2756:
2757: for (int iter = 0; iter < 5; iter++) {
2758: MockRAMDirectory dir = new MockRAMDirectory();
2759: IndexWriter writer = new IndexWriter(dir,
2760: new WhitespaceAnalyzer());
2761: ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
2762: // We expect disk full exceptions in the merge threads
2763: cms.setSuppressExceptions();
2764: writer.setMergeScheduler(cms);
2765: writer.setMaxBufferedDocs(2);
2766: writer.setMergeFactor(4);
2767:
2768: IndexerThread[] threads = new IndexerThread[NUM_THREADS];
2769: boolean diskFull = false;
2770:
2771: for (int i = 0; i < NUM_THREADS; i++)
2772: threads[i] = new IndexerThread(writer, true);
2773:
2774: for (int i = 0; i < NUM_THREADS; i++)
2775: threads[i].start();
2776:
2777: try {
2778: Thread.sleep(10);
2779: } catch (InterruptedException ie) {
2780: Thread.currentThread().interrupt();
2781: }
2782:
2783: dir.failOn(failure);
2784: failure.setDoFail();
2785:
2786: for (int i = 0; i < NUM_THREADS; i++) {
2787: while (true) {
2788: try {
2789: threads[i].join();
2790: break;
2791: } catch (InterruptedException ie) {
2792: Thread.currentThread().interrupt();
2793: }
2794: }
2795: if (threads[i].isAlive())
2796: fail("thread seems to be hung");
2797: else
2798: assertTrue("hit unexpected Throwable",
2799: threads[i].error == null);
2800: }
2801:
2802: boolean success = false;
2803: try {
2804: writer.close(false);
2805: success = true;
2806: } catch (IOException ioe) {
2807: }
2808:
2809: if (success) {
2810: IndexReader reader = IndexReader.open(dir);
2811: for (int j = 0; j < reader.maxDoc(); j++) {
2812: if (!reader.isDeleted(j)) {
2813: reader.document(j);
2814: reader.getTermFreqVectors(j);
2815: }
2816: }
2817: reader.close();
2818: }
2819:
2820: dir.close();
2821: }
2822: }
2823:
2824: // LUCENE-1130: make sure initial IOException, and then 2nd
2825: // IOException during abort(), is OK:
2826: public void testIOExceptionDuringAbort() throws IOException {
2827: _testSingleThreadFailure(new FailOnlyOnAbortOrFlush(false));
2828: }
2829:
2830: // LUCENE-1130: make sure initial IOException, and then 2nd
2831: // IOException during abort(), is OK:
2832: public void testIOExceptionDuringAbortOnlyOnce() throws IOException {
2833: _testSingleThreadFailure(new FailOnlyOnAbortOrFlush(true));
2834: }
2835:
2836: // LUCENE-1130: make sure initial IOException, and then 2nd
2837: // IOException during abort(), with multiple threads, is OK:
2838: public void testIOExceptionDuringAbortWithThreads()
2839: throws IOException {
2840: _testMultipleThreadsFailure(new FailOnlyOnAbortOrFlush(false));
2841: }
2842:
2843: // LUCENE-1130: make sure initial IOException, and then 2nd
2844: // IOException during abort(), with multiple threads, is OK:
2845: public void testIOExceptionDuringAbortWithThreadsOnlyOnce()
2846: throws IOException {
2847: _testMultipleThreadsFailure(new FailOnlyOnAbortOrFlush(true));
2848: }
2849:
2850: // Throws IOException during DocumentsWriter.closeDocStore
2851: private static class FailOnlyInCloseDocStore extends
2852: MockRAMDirectory.Failure {
2853: private boolean onlyOnce;
2854:
2855: public FailOnlyInCloseDocStore(boolean onlyOnce) {
2856: this .onlyOnce = true;
2857: }
2858:
2859: public void eval(MockRAMDirectory dir) throws IOException {
2860: if (doFail) {
2861: StackTraceElement[] trace = new Exception()
2862: .getStackTrace();
2863: for (int i = 0; i < trace.length; i++) {
2864: if ("closeDocStore"
2865: .equals(trace[i].getMethodName())) {
2866: if (onlyOnce)
2867: doFail = false;
2868: throw new IOException("now failing on purpose");
2869: }
2870: }
2871: }
2872: }
2873: }
2874:
2875: // LUCENE-1130: test IOException in closeDocStore
2876: public void testIOExceptionDuringCloseDocStore() throws IOException {
2877: _testSingleThreadFailure(new FailOnlyInCloseDocStore(false));
2878: }
2879:
2880: // LUCENE-1130: test IOException in closeDocStore
2881: public void testIOExceptionDuringCloseDocStoreOnlyOnce()
2882: throws IOException {
2883: _testSingleThreadFailure(new FailOnlyInCloseDocStore(true));
2884: }
2885:
2886: // LUCENE-1130: test IOException in closeDocStore, with threads
2887: public void testIOExceptionDuringCloseDocStoreWithThreads()
2888: throws IOException {
2889: _testMultipleThreadsFailure(new FailOnlyInCloseDocStore(false));
2890: }
2891:
2892: // LUCENE-1130: test IOException in closeDocStore, with threads
2893: public void testIOExceptionDuringCloseDocStoreWithThreadsOnlyOnce()
2894: throws IOException {
2895: _testMultipleThreadsFailure(new FailOnlyInCloseDocStore(true));
2896: }
2897:
2898: // Throws IOException during DocumentsWriter.writeSegment
2899: private static class FailOnlyInWriteSegment extends
2900: MockRAMDirectory.Failure {
2901: private boolean onlyOnce;
2902:
2903: public FailOnlyInWriteSegment(boolean onlyOnce) {
2904: this .onlyOnce = true;
2905: }
2906:
2907: public void eval(MockRAMDirectory dir) throws IOException {
2908: if (doFail) {
2909: StackTraceElement[] trace = new Exception()
2910: .getStackTrace();
2911: for (int i = 0; i < trace.length; i++) {
2912: if ("writeSegment".equals(trace[i].getMethodName())) {
2913: if (onlyOnce)
2914: doFail = false;
2915: // new RuntimeException().printStackTrace(System.out);
2916: throw new IOException("now failing on purpose");
2917: }
2918: }
2919: }
2920: }
2921: }
2922:
2923: // LUCENE-1130: test IOException in writeSegment
2924: public void testIOExceptionDuringWriteSegment() throws IOException {
2925: _testSingleThreadFailure(new FailOnlyInWriteSegment(false));
2926: }
2927:
2928: // LUCENE-1130: test IOException in writeSegment
2929: public void testIOExceptionDuringWriteSegmentOnlyOnce()
2930: throws IOException {
2931: _testSingleThreadFailure(new FailOnlyInWriteSegment(true));
2932: }
2933:
2934: // LUCENE-1130: test IOException in writeSegment, with threads
2935: public void testIOExceptionDuringWriteSegmentWithThreads()
2936: throws IOException {
2937: _testMultipleThreadsFailure(new FailOnlyInWriteSegment(false));
2938: }
2939:
2940: // LUCENE-1130: test IOException in writeSegment, with threads
2941: public void testIOExceptionDuringWriteSegmentWithThreadsOnlyOnce()
2942: throws IOException {
2943: _testMultipleThreadsFailure(new FailOnlyInWriteSegment(true));
2944: }
2945:
2946: // LUCENE-1168
2947: public void testTermVectorCorruption() throws IOException {
2948:
2949: Directory dir = new MockRAMDirectory();
2950: for (int iter = 0; iter < 4; iter++) {
2951: final boolean autoCommit = 1 == iter / 2;
2952: IndexWriter writer = new IndexWriter(dir, autoCommit,
2953: new StandardAnalyzer());
2954: writer.setMaxBufferedDocs(2);
2955: writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
2956: writer.setMergeScheduler(new SerialMergeScheduler());
2957: writer.setMergePolicy(new LogDocMergePolicy());
2958:
2959: Document document = new Document();
2960:
2961: Field storedField = new Field("stored", "stored",
2962: Field.Store.YES, Field.Index.NO);
2963: document.add(storedField);
2964: writer.addDocument(document);
2965: writer.addDocument(document);
2966:
2967: document = new Document();
2968: document.add(storedField);
2969: Field termVectorField = new Field("termVector",
2970: "termVector", Field.Store.NO,
2971: Field.Index.UN_TOKENIZED,
2972: Field.TermVector.WITH_POSITIONS_OFFSETS);
2973:
2974: document.add(termVectorField);
2975: writer.addDocument(document);
2976: writer.optimize();
2977: writer.close();
2978:
2979: IndexReader reader = IndexReader.open(dir);
2980: for (int i = 0; i < reader.numDocs(); i++) {
2981: reader.document(i);
2982: reader.getTermFreqVectors(i);
2983: }
2984: reader.close();
2985:
2986: writer = new IndexWriter(dir, autoCommit,
2987: new StandardAnalyzer());
2988: writer.setMaxBufferedDocs(2);
2989: writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
2990: writer.setMergeScheduler(new SerialMergeScheduler());
2991: writer.setMergePolicy(new LogDocMergePolicy());
2992:
2993: Directory[] indexDirs = { dir };
2994: writer.addIndexes(indexDirs);
2995: writer.close();
2996: }
2997: dir.close();
2998: }
2999:
3000: // LUCENE-1168
3001: public void testTermVectorCorruption2() throws IOException {
3002: Directory dir = new MockRAMDirectory();
3003: for (int iter = 0; iter < 4; iter++) {
3004: final boolean autoCommit = 1 == iter / 2;
3005: IndexWriter writer = new IndexWriter(dir, autoCommit,
3006: new StandardAnalyzer());
3007: writer.setMaxBufferedDocs(2);
3008: writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
3009: writer.setMergeScheduler(new SerialMergeScheduler());
3010: writer.setMergePolicy(new LogDocMergePolicy());
3011:
3012: Document document = new Document();
3013:
3014: Field storedField = new Field("stored", "stored",
3015: Field.Store.YES, Field.Index.NO);
3016: document.add(storedField);
3017: writer.addDocument(document);
3018: writer.addDocument(document);
3019:
3020: document = new Document();
3021: document.add(storedField);
3022: Field termVectorField = new Field("termVector",
3023: "termVector", Field.Store.NO,
3024: Field.Index.UN_TOKENIZED,
3025: Field.TermVector.WITH_POSITIONS_OFFSETS);
3026: document.add(termVectorField);
3027: writer.addDocument(document);
3028: writer.optimize();
3029: writer.close();
3030:
3031: IndexReader reader = IndexReader.open(dir);
3032: assertTrue(reader.getTermFreqVectors(0) == null);
3033: assertTrue(reader.getTermFreqVectors(1) == null);
3034: assertTrue(reader.getTermFreqVectors(2) != null);
3035: reader.close();
3036: }
3037: dir.close();
3038: }
3039:
3040: // LUCENE-1168
3041: public void testTermVectorCorruption3() throws IOException {
3042: Directory dir = new MockRAMDirectory();
3043: IndexWriter writer = new IndexWriter(dir, false,
3044: new StandardAnalyzer());
3045: writer.setMaxBufferedDocs(2);
3046: writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
3047: writer.setMergeScheduler(new SerialMergeScheduler());
3048: writer.setMergePolicy(new LogDocMergePolicy());
3049:
3050: Document document = new Document();
3051:
3052: document = new Document();
3053: Field storedField = new Field("stored", "stored",
3054: Field.Store.YES, Field.Index.NO);
3055: document.add(storedField);
3056: Field termVectorField = new Field("termVector", "termVector",
3057: Field.Store.NO, Field.Index.UN_TOKENIZED,
3058: Field.TermVector.WITH_POSITIONS_OFFSETS);
3059: document.add(termVectorField);
3060: for (int i = 0; i < 10; i++)
3061: writer.addDocument(document);
3062: writer.close();
3063:
3064: writer = new IndexWriter(dir, false, new StandardAnalyzer());
3065: writer.setMaxBufferedDocs(2);
3066: writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
3067: writer.setMergeScheduler(new SerialMergeScheduler());
3068: writer.setMergePolicy(new LogDocMergePolicy());
3069: for (int i = 0; i < 6; i++)
3070: writer.addDocument(document);
3071:
3072: writer.optimize();
3073: writer.close();
3074:
3075: IndexReader reader = IndexReader.open(dir);
3076: for (int i = 0; i < 10; i++) {
3077: reader.getTermFreqVectors(i);
3078: reader.document(i);
3079: }
3080: reader.close();
3081: dir.close();
3082: }
3083:
3084: // Just intercepts all merges & verifies that we are never
3085: // merging a segment with >= 20 (maxMergeDocs) docs
3086: private class MyIndexWriter extends IndexWriter {
3087: int mergeCount;
3088: Directory myDir;
3089:
3090: public MyIndexWriter(Directory dir) throws IOException {
3091: super (dir, new StandardAnalyzer());
3092: myDir = dir;
3093: }
3094:
3095: synchronized MergePolicy.OneMerge getNextMerge() {
3096: MergePolicy.OneMerge merge = super .getNextMerge();
3097: if (merge != null)
3098: mergeCount++;
3099: return merge;
3100: }
3101: }
3102:
3103: public void testOptimizeOverMerge() throws IOException {
3104: Directory dir = new MockRAMDirectory();
3105: IndexWriter writer = new IndexWriter(dir, false,
3106: new StandardAnalyzer());
3107: writer.setMaxBufferedDocs(2);
3108: writer.setMergeFactor(100);
3109: writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
3110:
3111: Document document = new Document();
3112:
3113: document = new Document();
3114: Field storedField = new Field("stored", "stored",
3115: Field.Store.YES, Field.Index.NO);
3116: document.add(storedField);
3117: Field termVectorField = new Field("termVector", "termVector",
3118: Field.Store.NO, Field.Index.UN_TOKENIZED,
3119: Field.TermVector.WITH_POSITIONS_OFFSETS);
3120: document.add(termVectorField);
3121: for (int i = 0; i < 170; i++)
3122: writer.addDocument(document);
3123:
3124: writer.close();
3125: MyIndexWriter myWriter = new MyIndexWriter(dir);
3126: myWriter.optimize();
3127: assertEquals(10, myWriter.mergeCount);
3128: }
3129:
3130: // LUCENE-1179
3131: public void testEmptyFieldName() throws IOException {
3132: MockRAMDirectory dir = new MockRAMDirectory();
3133: IndexWriter writer = new IndexWriter(dir,
3134: new WhitespaceAnalyzer());
3135: Document doc = new Document();
3136: doc.add(new Field("", "a b c", Field.Store.NO,
3137: Field.Index.TOKENIZED));
3138: writer.addDocument(doc);
3139: writer.close();
3140: }
3141: }
|