0001: package org.apache.lucene.index;
0002:
0003: /**
0004: * Licensed to the Apache Software Foundation (ASF) under one or more
0005: * contributor license agreements. See the NOTICE file distributed with
0006: * this work for additional information regarding copyright ownership.
0007: * The ASF licenses this file to You under the Apache License, Version 2.0
0008: * (the "License"); you may not use this file except in compliance with
0009: * the License. You may obtain a copy of the License at
0010: *
0011: * http://www.apache.org/licenses/LICENSE-2.0
0012: *
0013: * Unless required by applicable law or agreed to in writing, software
0014: * distributed under the License is distributed on an "AS IS" BASIS,
0015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016: * See the License for the specific language governing permissions and
0017: * limitations under the License.
0018: */
0019:
0020: import org.apache.lucene.analysis.Analyzer;
0021: import org.apache.lucene.document.Document;
0022: import org.apache.lucene.search.Similarity;
0023: import org.apache.lucene.store.Directory;
0024: import org.apache.lucene.store.FSDirectory;
0025: import org.apache.lucene.store.Lock;
0026: import org.apache.lucene.store.LockObtainFailedException;
0027: import org.apache.lucene.store.AlreadyClosedException;
0028: import org.apache.lucene.util.BitVector;
0029:
0030: import java.io.File;
0031: import java.io.IOException;
0032: import java.io.PrintStream;
0033: import java.util.List;
0034: import java.util.ArrayList;
0035: import java.util.HashMap;
0036: import java.util.Set;
0037: import java.util.HashSet;
0038: import java.util.LinkedList;
0039: import java.util.Iterator;
0040: import java.util.Map.Entry;
0041:
0042: /**
0043: An <code>IndexWriter</code> creates and maintains an index.
0044:
0045: <p>The <code>create</code> argument to the
0046: <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer, boolean)"><b>constructor</b></a>
0047: determines whether a new index is created, or whether an existing index is
0048: opened. Note that you
0049: can open an index with <code>create=true</code> even while readers are
0050: using the index. The old readers will continue to search
0051: the "point in time" snapshot they had opened, and won't
0052: see the newly created index until they re-open. There are
0053: also <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer)"><b>constructors</b></a>
0054: with no <code>create</code> argument which
0055: will create a new index if there is not already an index at the
0056: provided path and otherwise open the existing index.</p>
0057:
0058: <p>In either case, documents are added with <a
0059: href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a>
0060: and removed with <a
0061: href="#deleteDocuments(org.apache.lucene.index.Term)"><b>deleteDocuments</b></a>.
0062: A document can be updated with <a href="#updateDocument(org.apache.lucene.index.Term, org.apache.lucene.document.Document)"><b>updateDocument</b></a>
0063: (which just deletes and then adds the entire document).
0064: When finished adding, deleting and updating documents, <a href="#close()"><b>close</b></a> should be called.</p>
0065:
0066: <p>These changes are buffered in memory and periodically
0067: flushed to the {@link Directory} (during the above method
0068: calls). A flush is triggered when there are enough
0069: buffered deletes (see {@link #setMaxBufferedDeleteTerms})
0070: or enough added documents since the last flush, whichever
0071: is sooner. For the added documents, flushing is triggered
0072: either by RAM usage of the documents (see {@link
0073: #setRAMBufferSizeMB}) or the number of added documents.
0074: The default is to flush when RAM usage hits 16 MB. For
0075: best indexing speed you should flush by RAM usage with a
0076: large RAM buffer. You can also force a flush by calling
0077: {@link #flush}. When a flush occurs, both pending deletes
0078: and added documents are flushed to the index. A flush may
0079: also trigger one or more segment merges which by default
0080: run with a background thread so as not to block the
0081: addDocument calls (see <a href="#mergePolicy">below</a>
0082: for changing the {@link MergeScheduler}).</p>
0083:
0084: <a name="autoCommit"></a>
0085: <p>The optional <code>autoCommit</code> argument to the
0086: <a href="#IndexWriter(org.apache.lucene.store.Directory, boolean, org.apache.lucene.analysis.Analyzer)"><b>constructors</b></a>
0087: controls visibility of the changes to {@link IndexReader} instances reading the same index.
0088: When this is <code>false</code>, changes are not
0089: visible until {@link #close()} is called.
0090: Note that changes will still be flushed to the
0091: {@link org.apache.lucene.store.Directory} as new files,
0092: but are not committed (no new <code>segments_N</code> file
0093: is written referencing the new files) until {@link #close} is
0094: called. If something goes terribly wrong (for example the
0095: JVM crashes) before {@link #close()}, then
0096: the index will reflect none of the changes made (it will
0097: remain in its starting state).
0098: You can also call {@link #abort()}, which closes the writer without committing any
0099: changes, and removes any index
0100: files that had been flushed but are now unreferenced.
0101: This mode is useful for preventing readers from refreshing
0102: at a bad time (for example after you've done all your
0103: deletes but before you've done your adds).
0104: It can also be used to implement simple single-writer
0105: transactional semantics ("all or none").</p>
0106:
0107: <p>When <code>autoCommit</code> is <code>true</code> then
0108: every flush is also a commit ({@link IndexReader}
0109: instances will see each flush as changes to the index).
0110: This is the default, to match the behavior before 2.2.
0111: When running in this mode, be careful not to refresh your
0112: readers while optimize or segment merges are taking place
0113: as this can tie up substantial disk space.</p>
0114:
0115: <p>Regardless of <code>autoCommit</code>, an {@link
0116: IndexReader} or {@link org.apache.lucene.search.IndexSearcher} will only see the
0117: index as of the "point in time" that it was opened. Any
0118: changes committed to the index after the reader was opened
0119: are not visible until the reader is re-opened.</p>
0120:
0121: <p>If an index will not have more documents added for a while and optimal search
0122: performance is desired, then the <a href="#optimize()"><b>optimize</b></a>
0123: method should be called before the index is closed.</p>
0124:
0125: <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
0126: another <code>IndexWriter</code> on the same directory will lead to a
0127: {@link LockObtainFailedException}. The {@link LockObtainFailedException}
0128: is also thrown if an IndexReader on the same directory is used to delete documents
0129: from the index.</p>
0130:
0131: <a name="deletionPolicy"></a>
0132: <p>Expert: <code>IndexWriter</code> allows an optional
0133: {@link IndexDeletionPolicy} implementation to be
0134: specified. You can use this to control when prior commits
0135: are deleted from the index. The default policy is {@link
0136: KeepOnlyLastCommitDeletionPolicy} which removes all prior
0137: commits as soon as a new commit is done (this matches
0138: behavior before 2.2). Creating your own policy can allow
0139: you to explicitly keep previous "point in time" commits
0140: alive in the index for some time, to allow readers to
0141: refresh to the new commit without having the old commit
0142: deleted out from under them. This is necessary on
0143: filesystems like NFS that do not support "delete on last
0144: close" semantics, which Lucene's "point in time" search
0145: normally relies on. </p>
0146:
0147: <a name="mergePolicy"></a> <p>Expert:
0148: <code>IndexWriter</code> allows you to separately change
0149: the {@link MergePolicy} and the {@link MergeScheduler}.
0150: The {@link MergePolicy} is invoked whenever there are
0151: changes to the segments in the index. Its role is to
0152: select which merges to do, if any, and return a {@link
0153: MergePolicy.MergeSpecification} describing the merges. It
0154: also selects merges to do for optimize(). (The default is
0155: {@link LogByteSizeMergePolicy}. Then, the {@link
0156: MergeScheduler} is invoked with the requested merges and
0157: it decides when and how to run the merges. The default is
0158: {@link ConcurrentMergeScheduler}. </p>
0159: */
0160:
0161: /*
0162: * Clarification: Check Points (and commits)
0163: * Being able to set autoCommit=false allows IndexWriter to flush and
0164: * write new index files to the directory without writing a new segments_N
0165: * file which references these new files. It also means that the state of
0166: * the in memory SegmentInfos object is different than the most recent
0167: * segments_N file written to the directory.
0168: *
0169: * Each time the SegmentInfos is changed, and matches the (possibly
0170: * modified) directory files, we have a new "check point".
0171: * If the modified/new SegmentInfos is written to disk - as a new
0172: * (generation of) segments_N file - this check point is also an
0173: * IndexCommitPoint.
0174: *
0175: * With autoCommit=true, every checkPoint is also a CommitPoint.
0176: * With autoCommit=false, some checkPoints may not be commits.
0177: *
0178: * A new checkpoint always replaces the previous checkpoint and
0179: * becomes the new "front" of the index. This allows the IndexFileDeleter
0180: * to delete files that are referenced only by stale checkpoints.
0181: * (files that were created since the last commit, but are no longer
0182: * referenced by the "front" of the index). For this, IndexFileDeleter
0183: * keeps track of the last non commit checkpoint.
0184: */
0185: public class IndexWriter {
0186:
0187: /**
0188: * Default value for the write lock timeout (1,000).
0189: * @see #setDefaultWriteLockTimeout
0190: */
0191: public static long WRITE_LOCK_TIMEOUT = 1000;
0192:
0193: private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
0194:
0195: /**
0196: * Name of the write lock in the index.
0197: */
0198: public static final String WRITE_LOCK_NAME = "write.lock";
0199:
0200: /**
0201: * @deprecated
0202: * @see LogMergePolicy#DEFAULT_MERGE_FACTOR
0203: */
0204: public final static int DEFAULT_MERGE_FACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR;
0205:
0206: /**
0207: * Value to denote a flush trigger is disabled
0208: */
0209: public final static int DISABLE_AUTO_FLUSH = -1;
0210:
0211: /**
0212: * Disabled by default (because IndexWriter flushes by RAM usage
0213: * by default). Change using {@link #setMaxBufferedDocs(int)}.
0214: */
0215: public final static int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
0216:
0217: /**
0218: * Default value is 16 MB (which means flush when buffered
0219: * docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}.
0220: */
0221: public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
0222:
0223: /**
0224: * Disabled by default (because IndexWriter flushes by RAM usage
0225: * by default). Change using {@link #setMaxBufferedDeleteTerms(int)}.
0226: */
0227: public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
0228:
0229: /**
0230: * @deprecated
0231: * @see LogDocMergePolicy#DEFAULT_MAX_MERGE_DOCS
0232: */
0233: public final static int DEFAULT_MAX_MERGE_DOCS = LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS;
0234:
0235: /**
0236: * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.
0237: */
0238: public final static int DEFAULT_MAX_FIELD_LENGTH = 10000;
0239:
0240: /**
0241: * Default value is 128. Change using {@link #setTermIndexInterval(int)}.
0242: */
0243: public final static int DEFAULT_TERM_INDEX_INTERVAL = 128;
0244:
0245: /**
0246: * Absolute hard maximum length for a term. If a term
0247: * arrives from the analyzer longer than this length, it
0248: * is skipped and a message is printed to infoStream, if
0249: * set (see {@link #setInfoStream}).
0250: */
0251: public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH;
0252:
0253: // The normal read buffer size defaults to 1024, but
0254: // increasing this during merging seems to yield
0255: // performance gains. However we don't want to increase
0256: // it too much because there are quite a few
0257: // BufferedIndexInputs created during merging. See
0258: // LUCENE-888 for details.
0259: private final static int MERGE_READ_BUFFER_SIZE = 4096;
0260:
0261: // Used for printing messages
0262: private static Object MESSAGE_ID_LOCK = new Object();
0263: private static int MESSAGE_ID = 0;
0264: private int messageID = -1;
0265:
0266: private Directory directory; // where this index resides
0267: private Analyzer analyzer; // how to analyze text
0268:
0269: private Similarity similarity = Similarity.getDefault(); // how to normalize
0270:
0271: private boolean commitPending; // true if segmentInfos has changes not yet committed
0272: private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
0273:
0274: private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
0275: private boolean localAutoCommit; // saved autoCommit during local transaction
0276: private boolean autoCommit = true; // false if we should commit only on close
0277:
0278: private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
0279: private DocumentsWriter docWriter;
0280: private IndexFileDeleter deleter;
0281:
0282: private Set segmentsToOptimize = new HashSet(); // used by optimize to note those needing optimization
0283:
0284: private Lock writeLock;
0285:
0286: private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
0287:
0288: private boolean closeDir;
0289: private boolean closed;
0290: private boolean closing;
0291:
0292: // Holds all SegmentInfo instances currently involved in
0293: // merges
0294: private HashSet mergingSegments = new HashSet();
0295:
0296: private MergePolicy mergePolicy = new LogByteSizeMergePolicy();
0297: private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
0298: private LinkedList pendingMerges = new LinkedList();
0299: private Set runningMerges = new HashSet();
0300: private List mergeExceptions = new ArrayList();
0301: private long mergeGen;
0302: private boolean stopMerges;
0303:
0304: /**
0305: * Used internally to throw an {@link
0306: * AlreadyClosedException} if this IndexWriter has been
0307: * closed.
0308: * @throws AlreadyClosedException if this IndexWriter is
0309: */
0310: protected final void ensureOpen() throws AlreadyClosedException {
0311: if (closed) {
0312: throw new AlreadyClosedException(
0313: "this IndexWriter is closed");
0314: }
0315: }
0316:
0317: /**
0318: * Prints a message to the infoStream (if non-null),
0319: * prefixed with the identifying information for this
0320: * writer and the thread that's calling it.
0321: */
0322: public void message(String message) {
0323: if (infoStream != null)
0324: infoStream.println("IW " + messageID + " ["
0325: + Thread.currentThread().getName() + "]: "
0326: + message);
0327: }
0328:
0329: private synchronized void setMessageID() {
0330: if (infoStream != null && messageID == -1) {
0331: synchronized (MESSAGE_ID_LOCK) {
0332: messageID = MESSAGE_ID++;
0333: }
0334: }
0335: }
0336:
0337: /**
0338: * Casts current mergePolicy to LogMergePolicy, and throws
0339: * an exception if the mergePolicy is not a LogMergePolicy.
0340: */
0341: private LogMergePolicy getLogMergePolicy() {
0342: if (mergePolicy instanceof LogMergePolicy)
0343: return (LogMergePolicy) mergePolicy;
0344: else
0345: throw new IllegalArgumentException(
0346: "this method can only be called when the merge policy is the default LogMergePolicy");
0347: }
0348:
0349: /** <p>Get the current setting of whether newly flushed
0350: * segments will use the compound file format. Note that
0351: * this just returns the value previously set with
0352: * setUseCompoundFile(boolean), or the default value
0353: * (true). You cannot use this to query the status of
0354: * previously flushed segments.</p>
0355: *
0356: * <p>Note that this method is a convenience method: it
0357: * just calls mergePolicy.getUseCompoundFile as long as
0358: * mergePolicy is an instance of {@link LogMergePolicy}.
0359: * Otherwise an IllegalArgumentException is thrown.</p>
0360: *
0361: * @see #setUseCompoundFile(boolean)
0362: */
0363: public boolean getUseCompoundFile() {
0364: return getLogMergePolicy().getUseCompoundFile();
0365: }
0366:
0367: /** <p>Setting to turn on usage of a compound file. When on,
0368: * multiple files for each segment are merged into a
0369: * single file when a new segment is flushed.</p>
0370: *
0371: * <p>Note that this method is a convenience method: it
0372: * just calls mergePolicy.setUseCompoundFile as long as
0373: * mergePolicy is an instance of {@link LogMergePolicy}.
0374: * Otherwise an IllegalArgumentException is thrown.</p>
0375: */
0376: public void setUseCompoundFile(boolean value) {
0377: getLogMergePolicy().setUseCompoundFile(value);
0378: getLogMergePolicy().setUseCompoundDocStore(value);
0379: }
0380:
0381: /** Expert: Set the Similarity implementation used by this IndexWriter.
0382: *
0383: * @see Similarity#setDefault(Similarity)
0384: */
0385: public void setSimilarity(Similarity similarity) {
0386: ensureOpen();
0387: this .similarity = similarity;
0388: }
0389:
0390: /** Expert: Return the Similarity implementation used by this IndexWriter.
0391: *
0392: * <p>This defaults to the current value of {@link Similarity#getDefault()}.
0393: */
0394: public Similarity getSimilarity() {
0395: ensureOpen();
0396: return this .similarity;
0397: }
0398:
0399: /** Expert: Set the interval between indexed terms. Large values cause less
0400: * memory to be used by IndexReader, but slow random-access to terms. Small
0401: * values cause more memory to be used by an IndexReader, and speed
0402: * random-access to terms.
0403: *
0404: * This parameter determines the amount of computation required per query
0405: * term, regardless of the number of documents that contain that term. In
0406: * particular, it is the maximum number of other terms that must be
0407: * scanned before a term is located and its frequency and position information
0408: * may be processed. In a large index with user-entered query terms, query
0409: * processing time is likely to be dominated not by term lookup but rather
0410: * by the processing of frequency and positional data. In a small index
0411: * or when many uncommon query terms are generated (e.g., by wildcard
0412: * queries) term lookup may become a dominant cost.
0413: *
0414: * In particular, <code>numUniqueTerms/interval</code> terms are read into
0415: * memory by an IndexReader, and, on average, <code>interval/2</code> terms
0416: * must be scanned for each random term access.
0417: *
0418: * @see #DEFAULT_TERM_INDEX_INTERVAL
0419: */
0420: public void setTermIndexInterval(int interval) {
0421: ensureOpen();
0422: this .termIndexInterval = interval;
0423: }
0424:
0425: /** Expert: Return the interval between indexed terms.
0426: *
0427: * @see #setTermIndexInterval(int)
0428: */
0429: public int getTermIndexInterval() {
0430: ensureOpen();
0431: return termIndexInterval;
0432: }
0433:
0434: /**
0435: * Constructs an IndexWriter for the index in <code>path</code>.
0436: * Text will be analyzed with <code>a</code>. If <code>create</code>
0437: * is true, then a new, empty index will be created in
0438: * <code>path</code>, replacing the index already there, if any.
0439: *
0440: * @param path the path to the index directory
0441: * @param a the analyzer to use
0442: * @param create <code>true</code> to create the index or overwrite
0443: * the existing one; <code>false</code> to append to the existing
0444: * index
0445: * @throws CorruptIndexException if the index is corrupt
0446: * @throws LockObtainFailedException if another writer
0447: * has this index open (<code>write.lock</code> could not
0448: * be obtained)
0449: * @throws IOException if the directory cannot be read/written to, or
0450: * if it does not exist and <code>create</code> is
0451: * <code>false</code> or if there is any other low-level
0452: * IO error
0453: */
0454: public IndexWriter(String path, Analyzer a, boolean create)
0455: throws CorruptIndexException, LockObtainFailedException,
0456: IOException {
0457: init(FSDirectory.getDirectory(path), a, create, true, null,
0458: true);
0459: }
0460:
0461: /**
0462: * Constructs an IndexWriter for the index in <code>path</code>.
0463: * Text will be analyzed with <code>a</code>. If <code>create</code>
0464: * is true, then a new, empty index will be created in
0465: * <code>path</code>, replacing the index already there, if any.
0466: *
0467: * @param path the path to the index directory
0468: * @param a the analyzer to use
0469: * @param create <code>true</code> to create the index or overwrite
0470: * the existing one; <code>false</code> to append to the existing
0471: * index
0472: * @throws CorruptIndexException if the index is corrupt
0473: * @throws LockObtainFailedException if another writer
0474: * has this index open (<code>write.lock</code> could not
0475: * be obtained)
0476: * @throws IOException if the directory cannot be read/written to, or
0477: * if it does not exist and <code>create</code> is
0478: * <code>false</code> or if there is any other low-level
0479: * IO error
0480: */
0481: public IndexWriter(File path, Analyzer a, boolean create)
0482: throws CorruptIndexException, LockObtainFailedException,
0483: IOException {
0484: init(FSDirectory.getDirectory(path), a, create, true, null,
0485: true);
0486: }
0487:
0488: /**
0489: * Constructs an IndexWriter for the index in <code>d</code>.
0490: * Text will be analyzed with <code>a</code>. If <code>create</code>
0491: * is true, then a new, empty index will be created in
0492: * <code>d</code>, replacing the index already there, if any.
0493: *
0494: * @param d the index directory
0495: * @param a the analyzer to use
0496: * @param create <code>true</code> to create the index or overwrite
0497: * the existing one; <code>false</code> to append to the existing
0498: * index
0499: * @throws CorruptIndexException if the index is corrupt
0500: * @throws LockObtainFailedException if another writer
0501: * has this index open (<code>write.lock</code> could not
0502: * be obtained)
0503: * @throws IOException if the directory cannot be read/written to, or
0504: * if it does not exist and <code>create</code> is
0505: * <code>false</code> or if there is any other low-level
0506: * IO error
0507: */
0508: public IndexWriter(Directory d, Analyzer a, boolean create)
0509: throws CorruptIndexException, LockObtainFailedException,
0510: IOException {
0511: init(d, a, create, false, null, true);
0512: }
0513:
0514: /**
0515: * Constructs an IndexWriter for the index in
0516: * <code>path</code>, first creating it if it does not
0517: * already exist. Text will be analyzed with
0518: * <code>a</code>.
0519: *
0520: * @param path the path to the index directory
0521: * @param a the analyzer to use
0522: * @throws CorruptIndexException if the index is corrupt
0523: * @throws LockObtainFailedException if another writer
0524: * has this index open (<code>write.lock</code> could not
0525: * be obtained)
0526: * @throws IOException if the directory cannot be
0527: * read/written to or if there is any other low-level
0528: * IO error
0529: */
0530: public IndexWriter(String path, Analyzer a)
0531: throws CorruptIndexException, LockObtainFailedException,
0532: IOException {
0533: init(FSDirectory.getDirectory(path), a, true, null, true);
0534: }
0535:
0536: /**
0537: * Constructs an IndexWriter for the index in
0538: * <code>path</code>, first creating it if it does not
0539: * already exist. Text will be analyzed with
0540: * <code>a</code>.
0541: *
0542: * @param path the path to the index directory
0543: * @param a the analyzer to use
0544: * @throws CorruptIndexException if the index is corrupt
0545: * @throws LockObtainFailedException if another writer
0546: * has this index open (<code>write.lock</code> could not
0547: * be obtained)
0548: * @throws IOException if the directory cannot be
0549: * read/written to or if there is any other low-level
0550: * IO error
0551: */
0552: public IndexWriter(File path, Analyzer a)
0553: throws CorruptIndexException, LockObtainFailedException,
0554: IOException {
0555: init(FSDirectory.getDirectory(path), a, true, null, true);
0556: }
0557:
0558: /**
0559: * Constructs an IndexWriter for the index in
0560: * <code>d</code>, first creating it if it does not
0561: * already exist. Text will be analyzed with
0562: * <code>a</code>.
0563: *
0564: * @param d the index directory
0565: * @param a the analyzer to use
0566: * @throws CorruptIndexException if the index is corrupt
0567: * @throws LockObtainFailedException if another writer
0568: * has this index open (<code>write.lock</code> could not
0569: * be obtained)
0570: * @throws IOException if the directory cannot be
0571: * read/written to or if there is any other low-level
0572: * IO error
0573: */
0574: public IndexWriter(Directory d, Analyzer a)
0575: throws CorruptIndexException, LockObtainFailedException,
0576: IOException {
0577: init(d, a, false, null, true);
0578: }
0579:
0580: /**
0581: * Constructs an IndexWriter for the index in
0582: * <code>d</code>, first creating it if it does not
0583: * already exist. Text will be analyzed with
0584: * <code>a</code>.
0585: *
0586: * @param d the index directory
0587: * @param autoCommit see <a href="#autoCommit">above</a>
0588: * @param a the analyzer to use
0589: * @throws CorruptIndexException if the index is corrupt
0590: * @throws LockObtainFailedException if another writer
0591: * has this index open (<code>write.lock</code> could not
0592: * be obtained)
0593: * @throws IOException if the directory cannot be
0594: * read/written to or if there is any other low-level
0595: * IO error
0596: */
0597: public IndexWriter(Directory d, boolean autoCommit, Analyzer a)
0598: throws CorruptIndexException, LockObtainFailedException,
0599: IOException {
0600: init(d, a, false, null, autoCommit);
0601: }
0602:
0603: /**
0604: * Constructs an IndexWriter for the index in <code>d</code>.
0605: * Text will be analyzed with <code>a</code>. If <code>create</code>
0606: * is true, then a new, empty index will be created in
0607: * <code>d</code>, replacing the index already there, if any.
0608: *
0609: * @param d the index directory
0610: * @param autoCommit see <a href="#autoCommit">above</a>
0611: * @param a the analyzer to use
0612: * @param create <code>true</code> to create the index or overwrite
0613: * the existing one; <code>false</code> to append to the existing
0614: * index
0615: * @throws CorruptIndexException if the index is corrupt
0616: * @throws LockObtainFailedException if another writer
0617: * has this index open (<code>write.lock</code> could not
0618: * be obtained)
0619: * @throws IOException if the directory cannot be read/written to, or
0620: * if it does not exist and <code>create</code> is
0621: * <code>false</code> or if there is any other low-level
0622: * IO error
0623: */
0624: public IndexWriter(Directory d, boolean autoCommit, Analyzer a,
0625: boolean create) throws CorruptIndexException,
0626: LockObtainFailedException, IOException {
0627: init(d, a, create, false, null, autoCommit);
0628: }
0629:
0630: /**
0631: * Expert: constructs an IndexWriter with a custom {@link
0632: * IndexDeletionPolicy}, for the index in <code>d</code>,
0633: * first creating it if it does not already exist. Text
0634: * will be analyzed with <code>a</code>.
0635: *
0636: * @param d the index directory
0637: * @param autoCommit see <a href="#autoCommit">above</a>
0638: * @param a the analyzer to use
0639: * @param deletionPolicy see <a href="#deletionPolicy">above</a>
0640: * @throws CorruptIndexException if the index is corrupt
0641: * @throws LockObtainFailedException if another writer
0642: * has this index open (<code>write.lock</code> could not
0643: * be obtained)
0644: * @throws IOException if the directory cannot be
0645: * read/written to or if there is any other low-level
0646: * IO error
0647: */
0648: public IndexWriter(Directory d, boolean autoCommit, Analyzer a,
0649: IndexDeletionPolicy deletionPolicy)
0650: throws CorruptIndexException, LockObtainFailedException,
0651: IOException {
0652: init(d, a, false, deletionPolicy, autoCommit);
0653: }
0654:
0655: /**
0656: * Expert: constructs an IndexWriter with a custom {@link
0657: * IndexDeletionPolicy}, for the index in <code>d</code>.
0658: * Text will be analyzed with <code>a</code>. If
0659: * <code>create</code> is true, then a new, empty index
0660: * will be created in <code>d</code>, replacing the index
0661: * already there, if any.
0662: *
0663: * @param d the index directory
0664: * @param autoCommit see <a href="#autoCommit">above</a>
0665: * @param a the analyzer to use
0666: * @param create <code>true</code> to create the index or overwrite
0667: * the existing one; <code>false</code> to append to the existing
0668: * index
0669: * @param deletionPolicy see <a href="#deletionPolicy">above</a>
0670: * @throws CorruptIndexException if the index is corrupt
0671: * @throws LockObtainFailedException if another writer
0672: * has this index open (<code>write.lock</code> could not
0673: * be obtained)
0674: * @throws IOException if the directory cannot be read/written to, or
0675: * if it does not exist and <code>create</code> is
0676: * <code>false</code> or if there is any other low-level
0677: * IO error
0678: */
0679: public IndexWriter(Directory d, boolean autoCommit, Analyzer a,
0680: boolean create, IndexDeletionPolicy deletionPolicy)
0681: throws CorruptIndexException, LockObtainFailedException,
0682: IOException {
0683: init(d, a, create, false, deletionPolicy, autoCommit);
0684: }
0685:
0686: private void init(Directory d, Analyzer a, boolean closeDir,
0687: IndexDeletionPolicy deletionPolicy, boolean autoCommit)
0688: throws CorruptIndexException, LockObtainFailedException,
0689: IOException {
0690: if (IndexReader.indexExists(d)) {
0691: init(d, a, false, closeDir, deletionPolicy, autoCommit);
0692: } else {
0693: init(d, a, true, closeDir, deletionPolicy, autoCommit);
0694: }
0695: }
0696:
0697: private void init(Directory d, Analyzer a, final boolean create,
0698: boolean closeDir, IndexDeletionPolicy deletionPolicy,
0699: boolean autoCommit) throws CorruptIndexException,
0700: LockObtainFailedException, IOException {
0701: this .closeDir = closeDir;
0702: directory = d;
0703: analyzer = a;
0704: this .infoStream = defaultInfoStream;
0705: setMessageID();
0706:
0707: if (create) {
0708: // Clear the write lock in case it's leftover:
0709: directory.clearLock(IndexWriter.WRITE_LOCK_NAME);
0710: }
0711:
0712: Lock writeLock = directory
0713: .makeLock(IndexWriter.WRITE_LOCK_NAME);
0714: if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
0715: throw new LockObtainFailedException(
0716: "Index locked for write: " + writeLock);
0717: this .writeLock = writeLock; // save it
0718:
0719: try {
0720: if (create) {
0721: // Try to read first. This is to allow create
0722: // against an index that's currently open for
0723: // searching. In this case we write the next
0724: // segments_N file with no segments:
0725: try {
0726: segmentInfos.read(directory);
0727: segmentInfos.clear();
0728: } catch (IOException e) {
0729: // Likely this means it's a fresh directory
0730: }
0731: segmentInfos.write(directory);
0732: } else {
0733: segmentInfos.read(directory);
0734: }
0735:
0736: this .autoCommit = autoCommit;
0737: if (!autoCommit) {
0738: rollbackSegmentInfos = (SegmentInfos) segmentInfos
0739: .clone();
0740: }
0741:
0742: docWriter = new DocumentsWriter(directory, this );
0743: docWriter.setInfoStream(infoStream);
0744:
0745: // Default deleter (for backwards compatibility) is
0746: // KeepOnlyLastCommitDeleter:
0747: deleter = new IndexFileDeleter(
0748: directory,
0749: deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy()
0750: : deletionPolicy, segmentInfos, infoStream,
0751: docWriter);
0752:
0753: pushMaxBufferedDocs();
0754:
0755: if (infoStream != null) {
0756: message("init: create=" + create);
0757: messageState();
0758: }
0759:
0760: } catch (IOException e) {
0761: this .writeLock.release();
0762: this .writeLock = null;
0763: throw e;
0764: }
0765: }
0766:
0767: /**
0768: * Expert: set the merge policy used by this writer.
0769: */
0770: public void setMergePolicy(MergePolicy mp) {
0771: ensureOpen();
0772: if (mp == null)
0773: throw new NullPointerException(
0774: "MergePolicy must be non-null");
0775:
0776: if (mergePolicy != mp)
0777: mergePolicy.close();
0778: mergePolicy = mp;
0779: pushMaxBufferedDocs();
0780: if (infoStream != null)
0781: message("setMergePolicy " + mp);
0782: }
0783:
0784: /**
0785: * Expert: returns the current MergePolicy in use by this writer.
0786: * @see #setMergePolicy
0787: */
0788: public MergePolicy getMergePolicy() {
0789: ensureOpen();
0790: return mergePolicy;
0791: }
0792:
0793: /**
0794: * Expert: set the merge scheduler used by this writer.
0795: */
0796: public void setMergeScheduler(MergeScheduler mergeScheduler)
0797: throws CorruptIndexException, IOException {
0798: ensureOpen();
0799: if (mergeScheduler == null)
0800: throw new NullPointerException(
0801: "MergeScheduler must be non-null");
0802:
0803: if (this .mergeScheduler != mergeScheduler) {
0804: finishMerges(true);
0805: this .mergeScheduler.close();
0806: }
0807: this .mergeScheduler = mergeScheduler;
0808: if (infoStream != null)
0809: message("setMergeScheduler " + mergeScheduler);
0810: }
0811:
0812: /**
0813: * Expert: returns the current MergePolicy in use by this
0814: * writer.
0815: * @see #setMergePolicy
0816: */
0817: public MergeScheduler getMergeScheduler() {
0818: ensureOpen();
0819: return mergeScheduler;
0820: }
0821:
0822: /** <p>Determines the largest segment (measured by
0823: * document count) that may be merged with other segments.
0824: * Small values (e.g., less than 10,000) are best for
0825: * interactive indexing, as this limits the length of
0826: * pauses while indexing to a few seconds. Larger values
0827: * are best for batched indexing and speedier
0828: * searches.</p>
0829: *
0830: * <p>The default value is {@link Integer#MAX_VALUE}.</p>
0831: *
0832: * <p>Note that this method is a convenience method: it
0833: * just calls mergePolicy.setMaxMergeDocs as long as
0834: * mergePolicy is an instance of {@link LogMergePolicy}.
0835: * Otherwise an IllegalArgumentException is thrown.</p>
0836: *
0837: * <p>The default merge policy ({@link
0838: * LogByteSizeMergePolicy}) also allows you to set this
0839: * limit by net size (in MB) of the segment, using {@link
0840: * LogByteSizeMergePolicy#setMaxMergeMB}.</p>
0841: */
0842: public void setMaxMergeDocs(int maxMergeDocs) {
0843: getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
0844: }
0845:
0846: /**
0847: * <p>Returns the largest segment (measured by document
0848: * count) that may be merged with other segments.</p>
0849: *
0850: * <p>Note that this method is a convenience method: it
0851: * just calls mergePolicy.getMaxMergeDocs as long as
0852: * mergePolicy is an instance of {@link LogMergePolicy}.
0853: * Otherwise an IllegalArgumentException is thrown.</p>
0854: *
0855: * @see #setMaxMergeDocs
0856: */
0857: public int getMaxMergeDocs() {
0858: return getLogMergePolicy().getMaxMergeDocs();
0859: }
0860:
0861: /**
0862: * The maximum number of terms that will be indexed for a single field in a
0863: * document. This limits the amount of memory required for indexing, so that
0864: * collections with very large files will not crash the indexing process by
0865: * running out of memory. This setting refers to the number of running terms,
0866: * not to the number of different terms.<p/>
0867: * <strong>Note:</strong> this silently truncates large documents, excluding from the
0868: * index all terms that occur further in the document. If you know your source
0869: * documents are large, be sure to set this value high enough to accomodate
0870: * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
0871: * is your memory, but you should anticipate an OutOfMemoryError.<p/>
0872: * By default, no more than 10,000 terms will be indexed for a field.
0873: */
0874: public void setMaxFieldLength(int maxFieldLength) {
0875: ensureOpen();
0876: this .maxFieldLength = maxFieldLength;
0877: if (infoStream != null)
0878: message("setMaxFieldLength " + maxFieldLength);
0879: }
0880:
0881: /**
0882: * Returns the maximum number of terms that will be
0883: * indexed for a single field in a document.
0884: * @see #setMaxFieldLength
0885: */
0886: public int getMaxFieldLength() {
0887: ensureOpen();
0888: return maxFieldLength;
0889: }
0890:
0891: /** Determines the minimal number of documents required
0892: * before the buffered in-memory documents are flushed as
0893: * a new Segment. Large values generally gives faster
0894: * indexing.
0895: *
0896: * <p>When this is set, the writer will flush every
0897: * maxBufferedDocs added documents. Pass in {@link
0898: * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
0899: * to number of buffered documents. Note that if flushing
0900: * by RAM usage is also enabled, then the flush will be
0901: * triggered by whichever comes first.</p>
0902: *
0903: * <p>Disabled by default (writer flushes by RAM usage).</p>
0904: *
0905: * @throws IllegalArgumentException if maxBufferedDocs is
0906: * enabled but smaller than 2, or it disables maxBufferedDocs
0907: * when ramBufferSize is already disabled
0908: * @see #setRAMBufferSizeMB
0909: */
0910: public void setMaxBufferedDocs(int maxBufferedDocs) {
0911: ensureOpen();
0912: if (maxBufferedDocs != DISABLE_AUTO_FLUSH
0913: && maxBufferedDocs < 2)
0914: throw new IllegalArgumentException(
0915: "maxBufferedDocs must at least be 2 when enabled");
0916: if (maxBufferedDocs == DISABLE_AUTO_FLUSH
0917: && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH)
0918: throw new IllegalArgumentException(
0919: "at least one of ramBufferSize and maxBufferedDocs must be enabled");
0920: docWriter.setMaxBufferedDocs(maxBufferedDocs);
0921: pushMaxBufferedDocs();
0922: if (infoStream != null)
0923: message("setMaxBufferedDocs " + maxBufferedDocs);
0924: }
0925:
0926: /**
0927: * If we are flushing by doc count (not by RAM usage), and
0928: * using LogDocMergePolicy then push maxBufferedDocs down
0929: * as its minMergeDocs, to keep backwards compatibility.
0930: */
0931: private void pushMaxBufferedDocs() {
0932: if (docWriter.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) {
0933: final MergePolicy mp = mergePolicy;
0934: if (mp instanceof LogDocMergePolicy) {
0935: LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
0936: final int maxBufferedDocs = docWriter
0937: .getMaxBufferedDocs();
0938: if (lmp.getMinMergeDocs() != maxBufferedDocs) {
0939: if (infoStream != null)
0940: message("now push maxBufferedDocs "
0941: + maxBufferedDocs
0942: + " to LogDocMergePolicy");
0943: lmp.setMinMergeDocs(maxBufferedDocs);
0944: }
0945: }
0946: }
0947: }
0948:
0949: /**
0950: * Returns the number of buffered added documents that will
0951: * trigger a flush if enabled.
0952: * @see #setMaxBufferedDocs
0953: */
0954: public int getMaxBufferedDocs() {
0955: ensureOpen();
0956: return docWriter.getMaxBufferedDocs();
0957: }
0958:
0959: /** Determines the amount of RAM that may be used for
0960: * buffering added documents before they are flushed as a
0961: * new Segment. Generally for faster indexing performance
0962: * it's best to flush by RAM usage instead of document
0963: * count and use as large a RAM buffer as you can.
0964: *
0965: * <p>When this is set, the writer will flush whenever
0966: * buffered documents use this much RAM. Pass in {@link
0967: * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
0968: * to RAM usage. Note that if flushing by document count
0969: * is also enabled, then the flush will be triggered by
0970: * whichever comes first.</p>
0971: *
0972: * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
0973: *
0974: * @throws IllegalArgumentException if ramBufferSize is
0975: * enabled but non-positive, or it disables ramBufferSize
0976: * when maxBufferedDocs is already disabled
0977: */
0978: public void setRAMBufferSizeMB(double mb) {
0979: if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0)
0980: throw new IllegalArgumentException(
0981: "ramBufferSize should be > 0.0 MB when enabled");
0982: if (mb == DISABLE_AUTO_FLUSH
0983: && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH)
0984: throw new IllegalArgumentException(
0985: "at least one of ramBufferSize and maxBufferedDocs must be enabled");
0986: docWriter.setRAMBufferSizeMB(mb);
0987: if (infoStream != null)
0988: message("setRAMBufferSizeMB " + mb);
0989: }
0990:
0991: /**
0992: * Returns the value set by {@link #setRAMBufferSizeMB} if enabled.
0993: */
0994: public double getRAMBufferSizeMB() {
0995: return docWriter.getRAMBufferSizeMB();
0996: }
0997:
0998: /**
0999: * <p>Determines the minimal number of delete terms required before the buffered
1000: * in-memory delete terms are applied and flushed. If there are documents
1001: * buffered in memory at the time, they are merged and a new segment is
1002: * created.</p>
1003:
1004: * <p>Disabled by default (writer flushes by RAM usage).</p>
1005: *
1006: * @throws IllegalArgumentException if maxBufferedDeleteTerms
1007: * is enabled but smaller than 1
1008: * @see #setRAMBufferSizeMB
1009: */
1010: public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
1011: ensureOpen();
1012: if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
1013: && maxBufferedDeleteTerms < 1)
1014: throw new IllegalArgumentException(
1015: "maxBufferedDeleteTerms must at least be 1 when enabled");
1016: docWriter.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
1017: if (infoStream != null)
1018: message("setMaxBufferedDeleteTerms "
1019: + maxBufferedDeleteTerms);
1020: }
1021:
1022: /**
1023: * Returns the number of buffered deleted terms that will
1024: * trigger a flush if enabled.
1025: * @see #setMaxBufferedDeleteTerms
1026: */
1027: public int getMaxBufferedDeleteTerms() {
1028: ensureOpen();
1029: return docWriter.getMaxBufferedDeleteTerms();
1030: }
1031:
1032: /** Determines how often segment indices are merged by addDocument(). With
1033: * smaller values, less RAM is used while indexing, and searches on
1034: * unoptimized indices are faster, but indexing speed is slower. With larger
1035: * values, more RAM is used during indexing, and while searches on unoptimized
1036: * indices are slower, indexing is faster. Thus larger values (> 10) are best
1037: * for batch index creation, and smaller values (< 10) for indices that are
1038: * interactively maintained.
1039: *
1040: * <p>Note that this method is a convenience method: it
1041: * just calls mergePolicy.setMergeFactor as long as
1042: * mergePolicy is an instance of {@link LogMergePolicy}.
1043: * Otherwise an IllegalArgumentException is thrown.</p>
1044: *
1045: * <p>This must never be less than 2. The default value is 10.
1046: */
1047: public void setMergeFactor(int mergeFactor) {
1048: getLogMergePolicy().setMergeFactor(mergeFactor);
1049: }
1050:
1051: /**
1052: * <p>Returns the number of segments that are merged at
1053: * once and also controls the total number of segments
1054: * allowed to accumulate in the index.</p>
1055: *
1056: * <p>Note that this method is a convenience method: it
1057: * just calls mergePolicy.getMergeFactor as long as
1058: * mergePolicy is an instance of {@link LogMergePolicy}.
1059: * Otherwise an IllegalArgumentException is thrown.</p>
1060: *
1061: * @see #setMergeFactor
1062: */
1063: public int getMergeFactor() {
1064: return getLogMergePolicy().getMergeFactor();
1065: }
1066:
1067: /** If non-null, this will be the default infoStream used
1068: * by a newly instantiated IndexWriter.
1069: * @see #setInfoStream
1070: */
1071: public static void setDefaultInfoStream(PrintStream infoStream) {
1072: IndexWriter.defaultInfoStream = infoStream;
1073: }
1074:
1075: /**
1076: * Returns the current default infoStream for newly
1077: * instantiated IndexWriters.
1078: * @see #setDefaultInfoStream
1079: */
1080: public static PrintStream getDefaultInfoStream() {
1081: return IndexWriter.defaultInfoStream;
1082: }
1083:
1084: /** If non-null, information about merges, deletes and a
1085: * message when maxFieldLength is reached will be printed
1086: * to this.
1087: */
1088: public void setInfoStream(PrintStream infoStream) {
1089: ensureOpen();
1090: this .infoStream = infoStream;
1091: setMessageID();
1092: docWriter.setInfoStream(infoStream);
1093: deleter.setInfoStream(infoStream);
1094: if (infoStream != null)
1095: messageState();
1096: }
1097:
1098: private void messageState() {
1099: message("setInfoStream: dir=" + directory + " autoCommit="
1100: + autoCommit + " mergePolicy=" + mergePolicy
1101: + " mergeScheduler=" + mergeScheduler
1102: + " ramBufferSizeMB=" + docWriter.getRAMBufferSizeMB()
1103: + " maxBuffereDocs=" + docWriter.getMaxBufferedDocs()
1104: + " maxBuffereDeleteTerms="
1105: + docWriter.getMaxBufferedDeleteTerms()
1106: + " maxFieldLength=" + maxFieldLength + " index="
1107: + segString());
1108: }
1109:
1110: /**
1111: * Returns the current infoStream in use by this writer.
1112: * @see #setInfoStream
1113: */
1114: public PrintStream getInfoStream() {
1115: ensureOpen();
1116: return infoStream;
1117: }
1118:
1119: /**
1120: * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see
1121: * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.
1122: */
1123: public void setWriteLockTimeout(long writeLockTimeout) {
1124: ensureOpen();
1125: this .writeLockTimeout = writeLockTimeout;
1126: }
1127:
1128: /**
1129: * Returns allowed timeout when acquiring the write lock.
1130: * @see #setWriteLockTimeout
1131: */
1132: public long getWriteLockTimeout() {
1133: ensureOpen();
1134: return writeLockTimeout;
1135: }
1136:
1137: /**
1138: * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
1139: * milliseconds).
1140: */
1141: public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
1142: IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout;
1143: }
1144:
1145: /**
1146: * Returns default write lock timeout for newly
1147: * instantiated IndexWriters.
1148: * @see #setDefaultWriteLockTimeout
1149: */
1150: public static long getDefaultWriteLockTimeout() {
1151: return IndexWriter.WRITE_LOCK_TIMEOUT;
1152: }
1153:
1154: /**
1155: * Flushes all changes to an index and closes all
1156: * associated files.
1157: *
1158: * <p> If an Exception is hit during close, eg due to disk
1159: * full or some other reason, then both the on-disk index
1160: * and the internal state of the IndexWriter instance will
1161: * be consistent. However, the close will not be complete
1162: * even though part of it (flushing buffered documents)
1163: * may have succeeded, so the write lock will still be
1164: * held.</p>
1165: *
1166: * <p> If you can correct the underlying cause (eg free up
1167: * some disk space) then you can call close() again.
1168: * Failing that, if you want to force the write lock to be
1169: * released (dangerous, because you may then lose buffered
1170: * docs in the IndexWriter instance) then you can do
1171: * something like this:</p>
1172: *
1173: * <pre>
1174: * try {
1175: * writer.close();
1176: * } finally {
1177: * if (IndexReader.isLocked(directory)) {
1178: * IndexReader.unlock(directory);
1179: * }
1180: * }
1181: * </pre>
1182: *
1183: * after which, you must be certain not to use the writer
1184: * instance anymore.</p>
1185: * @throws CorruptIndexException if the index is corrupt
1186: * @throws IOException if there is a low-level IO error
1187: */
1188: public void close() throws CorruptIndexException, IOException {
1189: close(true);
1190: }
1191:
1192: /**
1193: * Closes the index with or without waiting for currently
1194: * running merges to finish. This is only meaningful when
1195: * using a MergeScheduler that runs merges in background
1196: * threads.
1197: * @param waitForMerges if true, this call will block
1198: * until all merges complete; else, it will ask all
1199: * running merges to abort, wait until those merges have
1200: * finished (which should be at most a few seconds), and
1201: * then return.
1202: */
1203: public void close(boolean waitForMerges)
1204: throws CorruptIndexException, IOException {
1205: boolean doClose;
1206: synchronized (this ) {
1207: // Ensure that only one thread actually gets to do the closing:
1208: if (!closing) {
1209: doClose = true;
1210: closing = true;
1211: } else
1212: doClose = false;
1213: }
1214: if (doClose)
1215: closeInternal(waitForMerges);
1216: else
1217: // Another thread beat us to it (is actually doing the
1218: // close), so we will block until that other thread
1219: // has finished closing
1220: waitForClose();
1221: }
1222:
1223: synchronized private void waitForClose() {
1224: while (!closed && closing) {
1225: try {
1226: wait();
1227: } catch (InterruptedException ie) {
1228: }
1229: }
1230: }
1231:
1232: private void closeInternal(boolean waitForMerges)
1233: throws CorruptIndexException, IOException {
1234: try {
1235: if (infoStream != null)
1236: message("now flush at close");
1237:
1238: docWriter.close();
1239:
1240: // Only allow a new merge to be triggered if we are
1241: // going to wait for merges:
1242: flush(waitForMerges, true);
1243:
1244: mergePolicy.close();
1245:
1246: finishMerges(waitForMerges);
1247:
1248: mergeScheduler.close();
1249:
1250: synchronized (this ) {
1251: if (commitPending) {
1252: boolean success = false;
1253: try {
1254: segmentInfos.write(directory); // now commit changes
1255: success = true;
1256: } finally {
1257: if (!success) {
1258: if (infoStream != null)
1259: message("hit exception committing segments file during close");
1260: deletePartialSegmentsFile();
1261: }
1262: }
1263: if (infoStream != null)
1264: message("close: wrote segments file \""
1265: + segmentInfos
1266: .getCurrentSegmentFileName()
1267: + "\"");
1268:
1269: deleter.checkpoint(segmentInfos, true);
1270:
1271: commitPending = false;
1272: rollbackSegmentInfos = null;
1273: }
1274:
1275: if (infoStream != null)
1276: message("at close: " + segString());
1277:
1278: docWriter = null;
1279:
1280: deleter.close();
1281: }
1282:
1283: if (closeDir)
1284: directory.close();
1285:
1286: if (writeLock != null) {
1287: writeLock.release(); // release write lock
1288: writeLock = null;
1289: }
1290: closed = true;
1291:
1292: } finally {
1293: synchronized (this ) {
1294: if (!closed)
1295: closing = false;
1296: notifyAll();
1297: }
1298: }
1299: }
1300:
1301: /** Tells the docWriter to close its currently open shared
1302: * doc stores (stored fields & vectors files).
1303: * Return value specifices whether new doc store files are compound or not.
1304: */
1305: private synchronized boolean flushDocStores() throws IOException {
1306:
1307: List files = docWriter.files();
1308:
1309: boolean useCompoundDocStore = false;
1310:
1311: if (files.size() > 0) {
1312: String docStoreSegment;
1313:
1314: boolean success = false;
1315: try {
1316: docStoreSegment = docWriter.closeDocStore();
1317: success = true;
1318: } finally {
1319: if (!success) {
1320: if (infoStream != null)
1321: message("hit exception closing doc store segment");
1322: docWriter.abort(null);
1323: }
1324: }
1325:
1326: useCompoundDocStore = mergePolicy
1327: .useCompoundDocStore(segmentInfos);
1328:
1329: if (useCompoundDocStore && docStoreSegment != null) {
1330: // Now build compound doc store file
1331:
1332: success = false;
1333:
1334: final int numSegments = segmentInfos.size();
1335: final String compoundFileName = docStoreSegment + "."
1336: + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
1337:
1338: try {
1339: CompoundFileWriter cfsWriter = new CompoundFileWriter(
1340: directory, compoundFileName);
1341: final int size = files.size();
1342: for (int i = 0; i < size; i++)
1343: cfsWriter.addFile((String) files.get(i));
1344:
1345: // Perform the merge
1346: cfsWriter.close();
1347:
1348: for (int i = 0; i < numSegments; i++) {
1349: SegmentInfo si = segmentInfos.info(i);
1350: if (si.getDocStoreOffset() != -1
1351: && si.getDocStoreSegment().equals(
1352: docStoreSegment))
1353: si.setDocStoreIsCompoundFile(true);
1354: }
1355: checkpoint();
1356: success = true;
1357: } finally {
1358: if (!success) {
1359:
1360: if (infoStream != null)
1361: message("hit exception building compound file doc store for segment "
1362: + docStoreSegment);
1363:
1364: // Rollback to no compound file
1365: for (int i = 0; i < numSegments; i++) {
1366: SegmentInfo si = segmentInfos.info(i);
1367: if (si.getDocStoreOffset() != -1
1368: && si.getDocStoreSegment().equals(
1369: docStoreSegment))
1370: si.setDocStoreIsCompoundFile(false);
1371: }
1372: deleter.deleteFile(compoundFileName);
1373: deletePartialSegmentsFile();
1374: }
1375: }
1376:
1377: deleter.checkpoint(segmentInfos, false);
1378: }
1379: }
1380:
1381: return useCompoundDocStore;
1382: }
1383:
1384: /** Release the write lock, if needed. */
1385: protected void finalize() throws Throwable {
1386: try {
1387: if (writeLock != null) {
1388: writeLock.release(); // release write lock
1389: writeLock = null;
1390: }
1391: } finally {
1392: super .finalize();
1393: }
1394: }
1395:
1396: /** Returns the Directory used by this index. */
1397: public Directory getDirectory() {
1398: ensureOpen();
1399: return directory;
1400: }
1401:
1402: /** Returns the analyzer used by this index. */
1403: public Analyzer getAnalyzer() {
1404: ensureOpen();
1405: return analyzer;
1406: }
1407:
1408: /** Returns the number of documents currently in this index. */
1409: public synchronized int docCount() {
1410: ensureOpen();
1411: int count = docWriter.getNumDocsInRAM();
1412: for (int i = 0; i < segmentInfos.size(); i++) {
1413: SegmentInfo si = segmentInfos.info(i);
1414: count += si.docCount;
1415: }
1416: return count;
1417: }
1418:
1419: /**
1420: * The maximum number of terms that will be indexed for a single field in a
1421: * document. This limits the amount of memory required for indexing, so that
1422: * collections with very large files will not crash the indexing process by
1423: * running out of memory.<p/>
1424: * Note that this effectively truncates large documents, excluding from the
1425: * index terms that occur further in the document. If you know your source
1426: * documents are large, be sure to set this value high enough to accomodate
1427: * the expected size. If you set it to Integer.MAX_VALUE, then the only limit
1428: * is your memory, but you should anticipate an OutOfMemoryError.<p/>
1429: * By default, no more than 10,000 terms will be indexed for a field.
1430: *
1431: */
1432: private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
1433:
1434: /**
1435: * Adds a document to this index. If the document contains more than
1436: * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
1437: * discarded.
1438: *
1439: * <p> Note that if an Exception is hit (for example disk full)
1440: * then the index will be consistent, but this document
1441: * may not have been added. Furthermore, it's possible
1442: * the index will have one segment in non-compound format
1443: * even when using compound files (when a merge has
1444: * partially succeeded).</p>
1445: *
1446: * <p> This method periodically flushes pending documents
1447: * to the Directory (every {@link #setMaxBufferedDocs}),
1448: * and also periodically merges segments in the index
1449: * (every {@link #setMergeFactor} flushes). When this
1450: * occurs, the method will take more time to run (possibly
1451: * a long time if the index is large), and will require
1452: * free temporary space in the Directory to do the
1453: * merging.</p>
1454: *
1455: * <p>The amount of free space required when a merge is triggered is
1456: * up to 1X the size of all segments being merged, when no
1457: * readers/searchers are open against the index, and up to 2X the
1458: * size of all segments being merged when readers/searchers are open
1459: * against the index (see {@link #optimize()} for details). The
1460: * sequence of primitive merge operations performed is governed by
1461: * the merge policy.
1462: *
1463: * <p>Note that each term in the document can be no longer
1464: * than 16383 characters, otherwise an
1465: * IllegalArgumentException will be thrown.</p>
1466: *
1467: * @throws CorruptIndexException if the index is corrupt
1468: * @throws IOException if there is a low-level IO error
1469: */
1470: public void addDocument(Document doc) throws CorruptIndexException,
1471: IOException {
1472: addDocument(doc, analyzer);
1473: }
1474:
1475: /**
1476: * Adds a document to this index, using the provided analyzer instead of the
1477: * value of {@link #getAnalyzer()}. If the document contains more than
1478: * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
1479: * discarded.
1480: *
1481: * <p>See {@link #addDocument(Document)} for details on
1482: * index and IndexWriter state after an Exception, and
1483: * flushing/merging temporary free space requirements.</p>
1484: *
1485: * @throws CorruptIndexException if the index is corrupt
1486: * @throws IOException if there is a low-level IO error
1487: */
1488: public void addDocument(Document doc, Analyzer analyzer)
1489: throws CorruptIndexException, IOException {
1490: ensureOpen();
1491: boolean doFlush = false;
1492: boolean success = false;
1493: try {
1494: doFlush = docWriter.addDocument(doc, analyzer);
1495: success = true;
1496: } finally {
1497: if (!success) {
1498:
1499: if (infoStream != null)
1500: message("hit exception adding document");
1501:
1502: synchronized (this ) {
1503: // If docWriter has some aborted files that were
1504: // never incref'd, then we clean them up here
1505: if (docWriter != null) {
1506: final List files = docWriter.abortedFiles();
1507: if (files != null)
1508: deleter.deleteNewFiles(files);
1509: }
1510: }
1511: }
1512: }
1513: if (doFlush)
1514: flush(true, false);
1515: }
1516:
1517: /**
1518: * Deletes the document(s) containing <code>term</code>.
1519: * @param term the term to identify the documents to be deleted
1520: * @throws CorruptIndexException if the index is corrupt
1521: * @throws IOException if there is a low-level IO error
1522: */
1523: public void deleteDocuments(Term term)
1524: throws CorruptIndexException, IOException {
1525: ensureOpen();
1526: boolean doFlush = docWriter.bufferDeleteTerm(term);
1527: if (doFlush)
1528: flush(true, false);
1529: }
1530:
1531: /**
1532: * Deletes the document(s) containing any of the
1533: * terms. All deletes are flushed at the same time.
1534: * @param terms array of terms to identify the documents
1535: * to be deleted
1536: * @throws CorruptIndexException if the index is corrupt
1537: * @throws IOException if there is a low-level IO error
1538: */
1539: public void deleteDocuments(Term[] terms)
1540: throws CorruptIndexException, IOException {
1541: ensureOpen();
1542: boolean doFlush = docWriter.bufferDeleteTerms(terms);
1543: if (doFlush)
1544: flush(true, false);
1545: }
1546:
1547: /**
1548: * Updates a document by first deleting the document(s)
1549: * containing <code>term</code> and then adding the new
1550: * document. The delete and then add are atomic as seen
1551: * by a reader on the same index (flush may happen only after
1552: * the add).
1553: * @param term the term to identify the document(s) to be
1554: * deleted
1555: * @param doc the document to be added
1556: * @throws CorruptIndexException if the index is corrupt
1557: * @throws IOException if there is a low-level IO error
1558: */
1559: public void updateDocument(Term term, Document doc)
1560: throws CorruptIndexException, IOException {
1561: ensureOpen();
1562: updateDocument(term, doc, getAnalyzer());
1563: }
1564:
1565: /**
1566: * Updates a document by first deleting the document(s)
1567: * containing <code>term</code> and then adding the new
1568: * document. The delete and then add are atomic as seen
1569: * by a reader on the same index (flush may happen only after
1570: * the add).
1571: * @param term the term to identify the document(s) to be
1572: * deleted
1573: * @param doc the document to be added
1574: * @param analyzer the analyzer to use when analyzing the document
1575: * @throws CorruptIndexException if the index is corrupt
1576: * @throws IOException if there is a low-level IO error
1577: */
1578: public void updateDocument(Term term, Document doc,
1579: Analyzer analyzer) throws CorruptIndexException,
1580: IOException {
1581: ensureOpen();
1582: boolean doFlush = false;
1583: boolean success = false;
1584: try {
1585: doFlush = docWriter.updateDocument(term, doc, analyzer);
1586: success = true;
1587: } finally {
1588: if (!success) {
1589:
1590: if (infoStream != null)
1591: message("hit exception updating document");
1592:
1593: synchronized (this ) {
1594: // If docWriter has some aborted files that were
1595: // never incref'd, then we clean them up here
1596: final List files = docWriter.abortedFiles();
1597: if (files != null)
1598: deleter.deleteNewFiles(files);
1599: }
1600: }
1601: }
1602: if (doFlush)
1603: flush(true, false);
1604: }
1605:
1606: // for test purpose
1607: final synchronized int getSegmentCount() {
1608: return segmentInfos.size();
1609: }
1610:
1611: // for test purpose
1612: final synchronized int getNumBufferedDocuments() {
1613: return docWriter.getNumDocsInRAM();
1614: }
1615:
1616: // for test purpose
1617: final synchronized int getDocCount(int i) {
1618: if (i >= 0 && i < segmentInfos.size()) {
1619: return segmentInfos.info(i).docCount;
1620: } else {
1621: return -1;
1622: }
1623: }
1624:
1625: final String newSegmentName() {
1626: // Cannot synchronize on IndexWriter because that causes
1627: // deadlock
1628: synchronized (segmentInfos) {
1629: // Important to set commitPending so that the
1630: // segmentInfos is written on close. Otherwise we
1631: // could close, re-open and re-return the same segment
1632: // name that was previously returned which can cause
1633: // problems at least with ConcurrentMergeScheduler.
1634: commitPending = true;
1635: return "_"
1636: + Integer.toString(segmentInfos.counter++,
1637: Character.MAX_RADIX);
1638: }
1639: }
1640:
1641: /** If non-null, information about merges will be printed to this.
1642: */
1643: private PrintStream infoStream = null;
1644: private static PrintStream defaultInfoStream = null;
1645:
1646: /**
1647: * Requests an "optimize" operation on an index, priming the index
1648: * for the fastest available search. Traditionally this has meant
1649: * merging all segments into a single segment as is done in the
1650: * default merge policy, but individaul merge policies may implement
1651: * optimize in different ways.
1652: *
1653: * @see LogMergePolicy#findMergesForOptimize
1654: *
1655: * <p>It is recommended that this method be called upon completion of indexing. In
1656: * environments with frequent updates, optimize is best done during low volume times, if at all.
1657: *
1658: * </p>
1659: * <p>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. </p>
1660: *
1661: * <p>Note that this can require substantial temporary free
1662: * space in the Directory (see <a target="_top"
1663: * href="http://issues.apache.org/jira/browse/LUCENE-764">LUCENE-764</a>
1664: * for details):</p>
1665: *
1666: * <ul>
1667: * <li>
1668: *
1669: * <p>If no readers/searchers are open against the index,
1670: * then free space required is up to 1X the total size of
1671: * the starting index. For example, if the starting
1672: * index is 10 GB, then you must have up to 10 GB of free
1673: * space before calling optimize.</p>
1674: *
1675: * <li>
1676: *
1677: * <p>If readers/searchers are using the index, then free
1678: * space required is up to 2X the size of the starting
1679: * index. This is because in addition to the 1X used by
1680: * optimize, the original 1X of the starting index is
1681: * still consuming space in the Directory as the readers
1682: * are holding the segments files open. Even on Unix,
1683: * where it will appear as if the files are gone ("ls"
1684: * won't list them), they still consume storage due to
1685: * "delete on last close" semantics.</p>
1686: *
1687: * <p>Furthermore, if some but not all readers re-open
1688: * while the optimize is underway, this will cause > 2X
1689: * temporary space to be consumed as those new readers
1690: * will then hold open the partially optimized segments at
1691: * that time. It is best not to re-open readers while
1692: * optimize is running.</p>
1693: *
1694: * </ul>
1695: *
1696: * <p>The actual temporary usage could be much less than
1697: * these figures (it depends on many factors).</p>
1698: *
1699: * <p>In general, once the optimize completes, the total size of the
1700: * index will be less than the size of the starting index.
1701: * It could be quite a bit smaller (if there were many
1702: * pending deletes) or just slightly smaller.</p>
1703: *
1704: * <p>If an Exception is hit during optimize(), for example
1705: * due to disk full, the index will not be corrupt and no
1706: * documents will have been lost. However, it may have
1707: * been partially optimized (some segments were merged but
1708: * not all), and it's possible that one of the segments in
1709: * the index will be in non-compound format even when
1710: * using compound file format. This will occur when the
1711: * Exception is hit during conversion of the segment into
1712: * compound format.</p>
1713: *
1714: * <p>This call will optimize those segments present in
1715: * the index when the call started. If other threads are
1716: * still adding documents and flushing segments, those
1717: * newly created segments will not be optimized unless you
1718: * call optimize again.</p>
1719: *
1720: * @throws CorruptIndexException if the index is corrupt
1721: * @throws IOException if there is a low-level IO error
1722: */
1723: public void optimize() throws CorruptIndexException, IOException {
1724: optimize(true);
1725: }
1726:
1727: /**
1728: * Optimize the index down to <= maxNumSegments. If
1729: * maxNumSegments==1 then this is the same as {@link
1730: * #optimize()}.
1731: * @param maxNumSegments maximum number of segments left
1732: * in the index after optimization finishes
1733: */
1734: public void optimize(int maxNumSegments)
1735: throws CorruptIndexException, IOException {
1736: optimize(maxNumSegments, true);
1737: }
1738:
1739: /** Just like {@link #optimize()}, except you can specify
1740: * whether the call should block until the optimize
1741: * completes. This is only meaningful with a
1742: * {@link MergeScheduler} that is able to run merges in
1743: * background threads. */
1744: public void optimize(boolean doWait) throws CorruptIndexException,
1745: IOException {
1746: optimize(1, true);
1747: }
1748:
1749: /** Just like {@link #optimize(int)}, except you can
1750: * specify whether the call should block until the
1751: * optimize completes. This is only meaningful with a
1752: * {@link MergeScheduler} that is able to run merges in
1753: * background threads. */
1754: public void optimize(int maxNumSegments, boolean doWait)
1755: throws CorruptIndexException, IOException {
1756: ensureOpen();
1757:
1758: if (maxNumSegments < 1)
1759: throw new IllegalArgumentException(
1760: "maxNumSegments must be >= 1; got "
1761: + maxNumSegments);
1762:
1763: if (infoStream != null)
1764: message("optimize: index now " + segString());
1765:
1766: flush();
1767:
1768: synchronized (this ) {
1769: resetMergeExceptions();
1770: segmentsToOptimize = new HashSet();
1771: final int numSegments = segmentInfos.size();
1772: for (int i = 0; i < numSegments; i++)
1773: segmentsToOptimize.add(segmentInfos.info(i));
1774:
1775: // Now mark all pending & running merges as optimize
1776: // merge:
1777: Iterator it = pendingMerges.iterator();
1778: while (it.hasNext()) {
1779: final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it
1780: .next();
1781: merge.optimize = true;
1782: merge.maxNumSegmentsOptimize = maxNumSegments;
1783: }
1784:
1785: it = runningMerges.iterator();
1786: while (it.hasNext()) {
1787: final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it
1788: .next();
1789: merge.optimize = true;
1790: merge.maxNumSegmentsOptimize = maxNumSegments;
1791: }
1792: }
1793:
1794: maybeMerge(maxNumSegments, true);
1795:
1796: if (doWait) {
1797: synchronized (this ) {
1798: while (optimizeMergesPending()) {
1799: try {
1800: wait();
1801: } catch (InterruptedException ie) {
1802: }
1803:
1804: if (mergeExceptions.size() > 0) {
1805: // Forward any exceptions in background merge
1806: // threads to the current thread:
1807: final int size = mergeExceptions.size();
1808: for (int i = 0; i < size; i++) {
1809: final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) mergeExceptions
1810: .get(0);
1811: if (merge.optimize) {
1812: IOException err = new IOException(
1813: "background merge hit exception: "
1814: + merge
1815: .segString(directory));
1816: err.initCause(merge.getException());
1817: throw err;
1818: }
1819: }
1820: }
1821: }
1822: }
1823: }
1824:
1825: // NOTE: in the ConcurrentMergeScheduler case, when
1826: // doWait is false, we can return immediately while
1827: // background threads accomplish the optimization
1828: }
1829:
1830: /** Returns true if any merges in pendingMerges or
1831: * runningMerges are optimization merges. */
1832: private synchronized boolean optimizeMergesPending() {
1833: Iterator it = pendingMerges.iterator();
1834: while (it.hasNext())
1835: if (((MergePolicy.OneMerge) it.next()).optimize)
1836: return true;
1837:
1838: it = runningMerges.iterator();
1839: while (it.hasNext())
1840: if (((MergePolicy.OneMerge) it.next()).optimize)
1841: return true;
1842:
1843: return false;
1844: }
1845:
1846: /**
1847: * Expert: asks the mergePolicy whether any merges are
1848: * necessary now and if so, runs the requested merges and
1849: * then iterate (test again if merges are needed) until no
1850: * more merges are returned by the mergePolicy.
1851: *
1852: * Explicit calls to maybeMerge() are usually not
1853: * necessary. The most common case is when merge policy
1854: * parameters have changed.
1855: */
1856: public final void maybeMerge() throws CorruptIndexException,
1857: IOException {
1858: maybeMerge(false);
1859: }
1860:
1861: private final void maybeMerge(boolean optimize)
1862: throws CorruptIndexException, IOException {
1863: maybeMerge(1, optimize);
1864: }
1865:
1866: private final void maybeMerge(int maxNumSegmentsOptimize,
1867: boolean optimize) throws CorruptIndexException, IOException {
1868: updatePendingMerges(maxNumSegmentsOptimize, optimize);
1869: mergeScheduler.merge(this );
1870: }
1871:
1872: private synchronized void updatePendingMerges(
1873: int maxNumSegmentsOptimize, boolean optimize)
1874: throws CorruptIndexException, IOException {
1875: assert !optimize || maxNumSegmentsOptimize > 0;
1876:
1877: if (stopMerges)
1878: return;
1879:
1880: final MergePolicy.MergeSpecification spec;
1881: if (optimize) {
1882: spec = mergePolicy.findMergesForOptimize(segmentInfos,
1883: this , maxNumSegmentsOptimize, segmentsToOptimize);
1884:
1885: if (spec != null) {
1886: final int numMerges = spec.merges.size();
1887: for (int i = 0; i < numMerges; i++) {
1888: final MergePolicy.OneMerge merge = ((MergePolicy.OneMerge) spec.merges
1889: .get(i));
1890: merge.optimize = true;
1891: merge.maxNumSegmentsOptimize = maxNumSegmentsOptimize;
1892: }
1893: }
1894:
1895: } else
1896: spec = mergePolicy.findMerges(segmentInfos, this );
1897:
1898: if (spec != null) {
1899: final int numMerges = spec.merges.size();
1900: for (int i = 0; i < numMerges; i++)
1901: registerMerge((MergePolicy.OneMerge) spec.merges.get(i));
1902: }
1903: }
1904:
1905: /** Expert: the {@link MergeScheduler} calls this method
1906: * to retrieve the next merge requested by the
1907: * MergePolicy */
1908: synchronized MergePolicy.OneMerge getNextMerge() {
1909: if (pendingMerges.size() == 0)
1910: return null;
1911: else {
1912: // Advance the merge from pending to running
1913: MergePolicy.OneMerge merge = (MergePolicy.OneMerge) pendingMerges
1914: .removeFirst();
1915: runningMerges.add(merge);
1916: return merge;
1917: }
1918: }
1919:
1920: /*
1921: * Begin a transaction. During a transaction, any segment
1922: * merges that happen (or ram segments flushed) will not
1923: * write a new segments file and will not remove any files
1924: * that were present at the start of the transaction. You
1925: * must make a matched (try/finally) call to
1926: * commitTransaction() or rollbackTransaction() to finish
1927: * the transaction.
1928: *
1929: * Note that buffered documents and delete terms are not handled
1930: * within the transactions, so they must be flushed before the
1931: * transaction is started.
1932: */
1933: private void startTransaction() throws IOException {
1934:
1935: if (infoStream != null)
1936: message("now start transaction");
1937:
1938: assert docWriter.getNumBufferedDeleteTerms() == 0 : "calling startTransaction with buffered delete terms not supported";
1939: assert docWriter.getNumDocsInRAM() == 0 : "calling startTransaction with buffered documents not supported";
1940:
1941: localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
1942: localAutoCommit = autoCommit;
1943:
1944: if (localAutoCommit) {
1945:
1946: if (infoStream != null)
1947: message("flush at startTransaction");
1948:
1949: flush();
1950: // Turn off auto-commit during our local transaction:
1951: autoCommit = false;
1952: } else
1953: // We must "protect" our files at this point from
1954: // deletion in case we need to rollback:
1955: deleter.incRef(segmentInfos, false);
1956: }
1957:
1958: /*
1959: * Rolls back the transaction and restores state to where
1960: * we were at the start.
1961: */
1962: private void rollbackTransaction() throws IOException {
1963:
1964: if (infoStream != null)
1965: message("now rollback transaction");
1966:
1967: // First restore autoCommit in case we hit an exception below:
1968: autoCommit = localAutoCommit;
1969:
1970: // Keep the same segmentInfos instance but replace all
1971: // of its SegmentInfo instances. This is so the next
1972: // attempt to commit using this instance of IndexWriter
1973: // will always write to a new generation ("write once").
1974: segmentInfos.clear();
1975: segmentInfos.addAll(localRollbackSegmentInfos);
1976: localRollbackSegmentInfos = null;
1977:
1978: // Ask deleter to locate unreferenced files we had
1979: // created & remove them:
1980: deleter.checkpoint(segmentInfos, false);
1981:
1982: if (!autoCommit)
1983: // Remove the incRef we did in startTransaction:
1984: deleter.decRef(segmentInfos);
1985:
1986: deleter.refresh();
1987: finishMerges(false);
1988: stopMerges = false;
1989: }
1990:
1991: /*
1992: * Commits the transaction. This will write the new
1993: * segments file and remove and pending deletions we have
1994: * accumulated during the transaction
1995: */
1996: private void commitTransaction() throws IOException {
1997:
1998: if (infoStream != null)
1999: message("now commit transaction");
2000:
2001: // First restore autoCommit in case we hit an exception below:
2002: autoCommit = localAutoCommit;
2003:
2004: boolean success = false;
2005: try {
2006: checkpoint();
2007: success = true;
2008: } finally {
2009: if (!success) {
2010: if (infoStream != null)
2011: message("hit exception committing transaction");
2012:
2013: rollbackTransaction();
2014: }
2015: }
2016:
2017: if (!autoCommit)
2018: // Remove the incRef we did in startTransaction.
2019: deleter.decRef(localRollbackSegmentInfos);
2020:
2021: localRollbackSegmentInfos = null;
2022:
2023: // Give deleter a chance to remove files now:
2024: deleter.checkpoint(segmentInfos, autoCommit);
2025: }
2026:
2027: /**
2028: * Close the <code>IndexWriter</code> without committing
2029: * any of the changes that have occurred since it was
2030: * opened. This removes any temporary files that had been
2031: * created, after which the state of the index will be the
2032: * same as it was when this writer was first opened. This
2033: * can only be called when this IndexWriter was opened
2034: * with <code>autoCommit=false</code>.
2035: * @throws IllegalStateException if this is called when
2036: * the writer was opened with <code>autoCommit=true</code>.
2037: * @throws IOException if there is a low-level IO error
2038: */
2039: public void abort() throws IOException {
2040: ensureOpen();
2041: if (autoCommit)
2042: throw new IllegalStateException(
2043: "abort() can only be called when IndexWriter was opened with autoCommit=false");
2044:
2045: boolean doClose;
2046: synchronized (this ) {
2047: // Ensure that only one thread actually gets to do the closing:
2048: if (!closing) {
2049: doClose = true;
2050: closing = true;
2051: } else
2052: doClose = false;
2053: }
2054:
2055: if (doClose) {
2056:
2057: finishMerges(false);
2058:
2059: // Must pre-close these two, in case they set
2060: // commitPending=true, so that we can then set it to
2061: // false before calling closeInternal
2062: mergePolicy.close();
2063: mergeScheduler.close();
2064:
2065: synchronized (this ) {
2066: // Keep the same segmentInfos instance but replace all
2067: // of its SegmentInfo instances. This is so the next
2068: // attempt to commit using this instance of IndexWriter
2069: // will always write to a new generation ("write
2070: // once").
2071: segmentInfos.clear();
2072: segmentInfos.addAll(rollbackSegmentInfos);
2073:
2074: docWriter.abort(null);
2075:
2076: // Ask deleter to locate unreferenced files & remove
2077: // them:
2078: deleter.checkpoint(segmentInfos, false);
2079: deleter.refresh();
2080: }
2081:
2082: commitPending = false;
2083: closeInternal(false);
2084: } else
2085: waitForClose();
2086: }
2087:
2088: private synchronized void finishMerges(boolean waitForMerges)
2089: throws IOException {
2090: if (!waitForMerges) {
2091:
2092: stopMerges = true;
2093:
2094: // Abort all pending & running merges:
2095: Iterator it = pendingMerges.iterator();
2096: while (it.hasNext()) {
2097: final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it
2098: .next();
2099: if (infoStream != null)
2100: message("now abort pending merge "
2101: + merge.segString(directory));
2102: merge.abort();
2103: mergeFinish(merge);
2104: }
2105: pendingMerges.clear();
2106:
2107: it = runningMerges.iterator();
2108: while (it.hasNext()) {
2109: final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it
2110: .next();
2111: if (infoStream != null)
2112: message("now abort running merge "
2113: + merge.segString(directory));
2114: merge.abort();
2115: }
2116:
2117: // These merges periodically check whether they have
2118: // been aborted, and stop if so. We wait here to make
2119: // sure they all stop. It should not take very long
2120: // because the merge threads periodically check if
2121: // they are aborted.
2122: while (runningMerges.size() > 0) {
2123: if (infoStream != null)
2124: message("now wait for " + runningMerges.size()
2125: + " running merge to abort");
2126: try {
2127: wait();
2128: } catch (InterruptedException ie) {
2129: Thread.currentThread().interrupt();
2130: }
2131: }
2132:
2133: assert 0 == mergingSegments.size();
2134:
2135: if (infoStream != null)
2136: message("all running merges have aborted");
2137:
2138: } else {
2139: while (pendingMerges.size() > 0 || runningMerges.size() > 0) {
2140: try {
2141: wait();
2142: } catch (InterruptedException ie) {
2143: }
2144: }
2145: assert 0 == mergingSegments.size();
2146: }
2147: }
2148:
2149: /*
2150: * Called whenever the SegmentInfos has been updated and
2151: * the index files referenced exist (correctly) in the
2152: * index directory. If we are in autoCommit mode, we
2153: * commit the change immediately. Else, we mark
2154: * commitPending.
2155: */
2156: private synchronized void checkpoint() throws IOException {
2157: if (autoCommit) {
2158: segmentInfos.write(directory);
2159: commitPending = false;
2160: if (infoStream != null)
2161: message("checkpoint: wrote segments file \""
2162: + segmentInfos.getCurrentSegmentFileName()
2163: + "\"");
2164: } else {
2165: commitPending = true;
2166: }
2167: }
2168:
2169: /** Merges all segments from an array of indexes into this index.
2170: *
2171: * <p>This may be used to parallelize batch indexing. A large document
2172: * collection can be broken into sub-collections. Each sub-collection can be
2173: * indexed in parallel, on a different thread, process or machine. The
2174: * complete index can then be created by merging sub-collection indexes
2175: * with this method.
2176: *
2177: * <p><b>NOTE:</b> the index in each Directory must not be
2178: * changed (opened by a writer) while this method is
2179: * running. This method does not acquire a write lock in
2180: * each input Directory, so it is up to the caller to
2181: * enforce this.
2182: *
2183: * <p>After this completes, the index is optimized.
2184: *
2185: * <p>This method is transactional in how Exceptions are
2186: * handled: it does not commit a new segments_N file until
2187: * all indexes are added. This means if an Exception
2188: * occurs (for example disk full), then either no indexes
2189: * will have been added or they all will have been.</p>
2190: *
2191: * <p>If an Exception is hit, it's still possible that all
2192: * indexes were successfully added. This happens when the
2193: * Exception is hit when trying to build a CFS file. In
2194: * this case, one segment in the index will be in non-CFS
2195: * format, even when using compound file format.</p>
2196: *
2197: * <p>Also note that on an Exception, the index may still
2198: * have been partially or fully optimized even though none
2199: * of the input indexes were added. </p>
2200: *
2201: * <p>Note that this requires temporary free space in the
2202: * Directory up to 2X the sum of all input indexes
2203: * (including the starting index). If readers/searchers
2204: * are open against the starting index, then temporary
2205: * free space required will be higher by the size of the
2206: * starting index (see {@link #optimize()} for details).
2207: * </p>
2208: *
2209: * <p>Once this completes, the final size of the index
2210: * will be less than the sum of all input index sizes
2211: * (including the starting index). It could be quite a
2212: * bit smaller (if there were many pending deletes) or
2213: * just slightly smaller.</p>
2214: *
2215: * <p>See <a target="_top"
2216: * href="http://issues.apache.org/jira/browse/LUCENE-702">LUCENE-702</a>
2217: * for details.</p>
2218: * @throws CorruptIndexException if the index is corrupt
2219: * @throws IOException if there is a low-level IO error
2220: */
2221: public synchronized void addIndexes(Directory[] dirs)
2222: throws CorruptIndexException, IOException {
2223:
2224: ensureOpen();
2225: if (infoStream != null)
2226: message("flush at addIndexes");
2227: flush();
2228:
2229: boolean success = false;
2230:
2231: startTransaction();
2232:
2233: try {
2234: for (int i = 0; i < dirs.length; i++) {
2235: SegmentInfos sis = new SegmentInfos(); // read infos from dir
2236: sis.read(dirs[i]);
2237: for (int j = 0; j < sis.size(); j++) {
2238: segmentInfos.addElement(sis.info(j)); // add each info
2239: }
2240: }
2241:
2242: optimize();
2243:
2244: success = true;
2245: } finally {
2246: if (success) {
2247: commitTransaction();
2248: } else {
2249: rollbackTransaction();
2250: }
2251: }
2252: }
2253:
2254: private synchronized void resetMergeExceptions() {
2255: mergeExceptions = new ArrayList();
2256: mergeGen++;
2257: }
2258:
2259: /**
2260: * Merges all segments from an array of indexes into this index.
2261: * <p>
2262: * This is similar to addIndexes(Directory[]). However, no optimize()
2263: * is called either at the beginning or at the end. Instead, merges
2264: * are carried out as necessary.
2265: *
2266: * <p><b>NOTE:</b> the index in each Directory must not be
2267: * changed (opened by a writer) while this method is
2268: * running. This method does not acquire a write lock in
2269: * each input Directory, so it is up to the caller to
2270: * enforce this.
2271: *
2272: * <p>
2273: * This requires this index not be among those to be added, and the
2274: * upper bound* of those segment doc counts not exceed maxMergeDocs.
2275: *
2276: * <p>See {@link #addIndexes(Directory[])} for
2277: * details on transactional semantics, temporary free
2278: * space required in the Directory, and non-CFS segments
2279: * on an Exception.</p>
2280: * @throws CorruptIndexException if the index is corrupt
2281: * @throws IOException if there is a low-level IO error
2282: */
2283: public synchronized void addIndexesNoOptimize(Directory[] dirs)
2284: throws CorruptIndexException, IOException {
2285:
2286: ensureOpen();
2287: if (infoStream != null)
2288: message("flush at addIndexesNoOptimize");
2289: flush();
2290:
2291: boolean success = false;
2292:
2293: startTransaction();
2294:
2295: try {
2296:
2297: for (int i = 0; i < dirs.length; i++) {
2298: if (directory == dirs[i]) {
2299: // cannot add this index: segments may be deleted in merge before added
2300: throw new IllegalArgumentException(
2301: "Cannot add this index to itself");
2302: }
2303:
2304: SegmentInfos sis = new SegmentInfos(); // read infos from dir
2305: sis.read(dirs[i]);
2306: for (int j = 0; j < sis.size(); j++) {
2307: SegmentInfo info = sis.info(j);
2308: segmentInfos.addElement(info); // add each info
2309: }
2310: }
2311:
2312: maybeMerge();
2313:
2314: // If after merging there remain segments in the index
2315: // that are in a different directory, just copy these
2316: // over into our index. This is necessary (before
2317: // finishing the transaction) to avoid leaving the
2318: // index in an unusable (inconsistent) state.
2319: copyExternalSegments();
2320:
2321: success = true;
2322:
2323: } finally {
2324: if (success) {
2325: commitTransaction();
2326: } else {
2327: rollbackTransaction();
2328: }
2329: }
2330: }
2331:
2332: /* If any of our segments are using a directory != ours
2333: * then copy them over. Currently this is only used by
2334: * addIndexesNoOptimize(). */
2335: private synchronized void copyExternalSegments()
2336: throws CorruptIndexException, IOException {
2337: final int numSegments = segmentInfos.size();
2338: for (int i = 0; i < numSegments; i++) {
2339: SegmentInfo info = segmentInfos.info(i);
2340: if (info.dir != directory) {
2341: MergePolicy.OneMerge merge = new MergePolicy.OneMerge(
2342: segmentInfos.range(i, 1 + i), info
2343: .getUseCompoundFile());
2344: if (registerMerge(merge)) {
2345: pendingMerges.remove(merge);
2346: runningMerges.add(merge);
2347: merge(merge);
2348: } else
2349: // This means there is a bug in the
2350: // MergeScheduler. MergeSchedulers in general are
2351: // not allowed to run a merge involving segments
2352: // external to this IndexWriter's directory in the
2353: // background because this would put the index
2354: // into an inconsistent state (where segmentInfos
2355: // has been written with such external segments
2356: // that an IndexReader would fail to load).
2357: throw new MergePolicy.MergeException(
2358: "segment \""
2359: + info.name
2360: + " exists in external directory yet the MergeScheduler executed the merge in a separate thread");
2361: }
2362: }
2363: }
2364:
2365: /** Merges the provided indexes into this index.
2366: * <p>After this completes, the index is optimized. </p>
2367: * <p>The provided IndexReaders are not closed.</p>
2368:
2369: * <p>See {@link #addIndexes(Directory[])} for
2370: * details on transactional semantics, temporary free
2371: * space required in the Directory, and non-CFS segments
2372: * on an Exception.</p>
2373: * @throws CorruptIndexException if the index is corrupt
2374: * @throws IOException if there is a low-level IO error
2375: */
2376: public synchronized void addIndexes(IndexReader[] readers)
2377: throws CorruptIndexException, IOException {
2378:
2379: ensureOpen();
2380: optimize(); // start with zero or 1 seg
2381:
2382: final String mergedName = newSegmentName();
2383: SegmentMerger merger = new SegmentMerger(this , mergedName, null);
2384:
2385: SegmentInfo info;
2386:
2387: IndexReader sReader = null;
2388: try {
2389: if (segmentInfos.size() == 1) { // add existing index, if any
2390: sReader = SegmentReader.get(segmentInfos.info(0));
2391: merger.add(sReader);
2392: }
2393:
2394: for (int i = 0; i < readers.length; i++)
2395: // add new indexes
2396: merger.add(readers[i]);
2397:
2398: boolean success = false;
2399:
2400: startTransaction();
2401:
2402: try {
2403: int docCount = merger.merge(); // merge 'em
2404:
2405: if (sReader != null) {
2406: sReader.close();
2407: sReader = null;
2408: }
2409:
2410: segmentInfos.setSize(0); // pop old infos & add new
2411: info = new SegmentInfo(mergedName, docCount, directory,
2412: false, true, -1, null, false);
2413: segmentInfos.addElement(info);
2414:
2415: success = true;
2416:
2417: } finally {
2418: if (!success) {
2419: if (infoStream != null)
2420: message("hit exception in addIndexes during merge");
2421:
2422: rollbackTransaction();
2423: } else {
2424: commitTransaction();
2425: }
2426: }
2427: } finally {
2428: if (sReader != null) {
2429: sReader.close();
2430: }
2431: }
2432:
2433: if (mergePolicy instanceof LogMergePolicy
2434: && getUseCompoundFile()) {
2435:
2436: boolean success = false;
2437:
2438: startTransaction();
2439:
2440: try {
2441: merger.createCompoundFile(mergedName + ".cfs");
2442: info.setUseCompoundFile(true);
2443: } finally {
2444: if (!success) {
2445: if (infoStream != null)
2446: message("hit exception building compound file in addIndexes during merge");
2447:
2448: rollbackTransaction();
2449: } else {
2450: commitTransaction();
2451: }
2452: }
2453: }
2454: }
2455:
2456: // This is called after pending added and deleted
2457: // documents have been flushed to the Directory but before
2458: // the change is committed (new segments_N file written).
2459: void doAfterFlush() throws IOException {
2460: }
2461:
2462: /**
2463: * Flush all in-memory buffered updates (adds and deletes)
2464: * to the Directory.
2465: * <p>Note: if <code>autoCommit=false</code>, flushed data would still
2466: * not be visible to readers, until {@link #close} is called.
2467: * @throws CorruptIndexException if the index is corrupt
2468: * @throws IOException if there is a low-level IO error
2469: */
2470: public final void flush() throws CorruptIndexException, IOException {
2471: flush(true, false);
2472: }
2473:
2474: /**
2475: * Flush all in-memory buffered udpates (adds and deletes)
2476: * to the Directory.
2477: * @param triggerMerge if true, we may merge segments (if
2478: * deletes or docs were flushed) if necessary
2479: * @param flushDocStores if false we are allowed to keep
2480: * doc stores open to share with the next segment
2481: */
2482: protected final void flush(boolean triggerMerge,
2483: boolean flushDocStores) throws CorruptIndexException,
2484: IOException {
2485: ensureOpen();
2486:
2487: if (doFlush(flushDocStores) && triggerMerge)
2488: maybeMerge();
2489: }
2490:
2491: private synchronized final boolean doFlush(boolean flushDocStores)
2492: throws CorruptIndexException, IOException {
2493:
2494: // Make sure no threads are actively adding a document
2495:
2496: // Returns true if docWriter is currently aborting, in
2497: // which case we skip flushing this segment
2498: if (docWriter.pauseAllThreads()) {
2499: docWriter.resumeAllThreads();
2500: return false;
2501: }
2502:
2503: try {
2504:
2505: SegmentInfo newSegment = null;
2506:
2507: final int numDocs = docWriter.getNumDocsInRAM();
2508:
2509: // Always flush docs if there are any
2510: boolean flushDocs = numDocs > 0;
2511:
2512: // With autoCommit=true we always must flush the doc
2513: // stores when we flush
2514: flushDocStores |= autoCommit;
2515: String docStoreSegment = docWriter.getDocStoreSegment();
2516: if (docStoreSegment == null)
2517: flushDocStores = false;
2518:
2519: // Always flush deletes if there are any delete terms.
2520: // TODO: when autoCommit=false we don't have to flush
2521: // deletes with every flushed segment; we can save
2522: // CPU/IO by buffering longer & flushing deletes only
2523: // when they are full or writer is being closed. We
2524: // have to fix the "applyDeletesSelectively" logic to
2525: // apply to more than just the last flushed segment
2526: boolean flushDeletes = docWriter.hasDeletes();
2527:
2528: if (infoStream != null) {
2529: message(" flush: segment=" + docWriter.getSegment()
2530: + " docStoreSegment="
2531: + docWriter.getDocStoreSegment()
2532: + " docStoreOffset="
2533: + docWriter.getDocStoreOffset() + " flushDocs="
2534: + flushDocs + " flushDeletes=" + flushDeletes
2535: + " flushDocStores=" + flushDocStores
2536: + " numDocs=" + numDocs + " numBufDelTerms="
2537: + docWriter.getNumBufferedDeleteTerms());
2538: message(" index before flush " + segString());
2539: }
2540:
2541: int docStoreOffset = docWriter.getDocStoreOffset();
2542:
2543: // docStoreOffset should only be non-zero when
2544: // autoCommit == false
2545: assert !autoCommit || 0 == docStoreOffset;
2546:
2547: boolean docStoreIsCompoundFile = false;
2548:
2549: // Check if the doc stores must be separately flushed
2550: // because other segments, besides the one we are about
2551: // to flush, reference it
2552: if (flushDocStores
2553: && (!flushDocs || !docWriter.getSegment().equals(
2554: docWriter.getDocStoreSegment()))) {
2555: // We must separately flush the doc store
2556: if (infoStream != null)
2557: message(" flush shared docStore segment "
2558: + docStoreSegment);
2559:
2560: docStoreIsCompoundFile = flushDocStores();
2561: flushDocStores = false;
2562: }
2563:
2564: String segment = docWriter.getSegment();
2565:
2566: // If we are flushing docs, segment must not be null:
2567: assert segment != null || !flushDocs;
2568:
2569: if (flushDocs || flushDeletes) {
2570:
2571: SegmentInfos rollback = null;
2572:
2573: if (flushDeletes)
2574: rollback = (SegmentInfos) segmentInfos.clone();
2575:
2576: boolean success = false;
2577:
2578: try {
2579: if (flushDocs) {
2580:
2581: if (0 == docStoreOffset && flushDocStores) {
2582: // This means we are flushing private doc stores
2583: // with this segment, so it will not be shared
2584: // with other segments
2585: assert docStoreSegment != null;
2586: assert docStoreSegment.equals(segment);
2587: docStoreOffset = -1;
2588: docStoreIsCompoundFile = false;
2589: docStoreSegment = null;
2590: }
2591:
2592: int flushedDocCount = docWriter
2593: .flush(flushDocStores);
2594:
2595: newSegment = new SegmentInfo(segment,
2596: flushedDocCount, directory, false,
2597: true, docStoreOffset, docStoreSegment,
2598: docStoreIsCompoundFile);
2599: segmentInfos.addElement(newSegment);
2600: }
2601:
2602: if (flushDeletes) {
2603: // we should be able to change this so we can
2604: // buffer deletes longer and then flush them to
2605: // multiple flushed segments, when
2606: // autoCommit=false
2607: applyDeletes(flushDocs);
2608: doAfterFlush();
2609: }
2610:
2611: checkpoint();
2612: success = true;
2613: } finally {
2614: if (!success) {
2615:
2616: if (infoStream != null)
2617: message("hit exception flushing segment "
2618: + segment);
2619:
2620: if (flushDeletes) {
2621:
2622: // Carefully check if any partial .del files
2623: // should be removed:
2624: final int size = rollback.size();
2625: for (int i = 0; i < size; i++) {
2626: final String newDelFileName = segmentInfos
2627: .info(i).getDelFileName();
2628: final String delFileName = rollback
2629: .info(i).getDelFileName();
2630: if (newDelFileName != null
2631: && !newDelFileName
2632: .equals(delFileName))
2633: deleter.deleteFile(newDelFileName);
2634: }
2635:
2636: // Fully replace the segmentInfos since flushed
2637: // deletes could have changed any of the
2638: // SegmentInfo instances:
2639: segmentInfos.clear();
2640: segmentInfos.addAll(rollback);
2641:
2642: } else {
2643: // Remove segment we added, if any:
2644: if (newSegment != null
2645: && segmentInfos.size() > 0
2646: && segmentInfos.info(segmentInfos
2647: .size() - 1) == newSegment)
2648: segmentInfos
2649: .remove(segmentInfos.size() - 1);
2650: }
2651: if (flushDocs)
2652: docWriter.abort(null);
2653: deletePartialSegmentsFile();
2654: deleter.checkpoint(segmentInfos, false);
2655:
2656: if (segment != null)
2657: deleter.refresh(segment);
2658: }
2659: }
2660:
2661: deleter.checkpoint(segmentInfos, autoCommit);
2662:
2663: if (flushDocs
2664: && mergePolicy.useCompoundFile(segmentInfos,
2665: newSegment)) {
2666: success = false;
2667: try {
2668: docWriter.createCompoundFile(segment);
2669: newSegment.setUseCompoundFile(true);
2670: checkpoint();
2671: success = true;
2672: } finally {
2673: if (!success) {
2674: if (infoStream != null)
2675: message("hit exception creating compound file for newly flushed segment "
2676: + segment);
2677: newSegment.setUseCompoundFile(false);
2678: deleter
2679: .deleteFile(segment
2680: + "."
2681: + IndexFileNames.COMPOUND_FILE_EXTENSION);
2682: deletePartialSegmentsFile();
2683: }
2684: }
2685:
2686: deleter.checkpoint(segmentInfos, autoCommit);
2687: }
2688:
2689: return true;
2690: } else {
2691: return false;
2692: }
2693:
2694: } finally {
2695: docWriter.clearFlushPending();
2696: docWriter.resumeAllThreads();
2697: }
2698: }
2699:
2700: /** Expert: Return the total size of all index files currently cached in memory.
2701: * Useful for size management with flushRamDocs()
2702: */
2703: public final long ramSizeInBytes() {
2704: ensureOpen();
2705: return docWriter.getRAMUsed();
2706: }
2707:
2708: /** Expert: Return the number of documents whose segments are currently cached in memory.
2709: * Useful when calling flush()
2710: */
2711: public final synchronized int numRamDocs() {
2712: ensureOpen();
2713: return docWriter.getNumDocsInRAM();
2714: }
2715:
2716: private int ensureContiguousMerge(MergePolicy.OneMerge merge) {
2717:
2718: int first = segmentInfos.indexOf(merge.segments.info(0));
2719: if (first == -1)
2720: throw new MergePolicy.MergeException(
2721: "could not find segment "
2722: + merge.segments.info(0).name
2723: + " in current segments");
2724:
2725: final int numSegments = segmentInfos.size();
2726:
2727: final int numSegmentsToMerge = merge.segments.size();
2728: for (int i = 0; i < numSegmentsToMerge; i++) {
2729: final SegmentInfo info = merge.segments.info(i);
2730:
2731: if (first + i >= numSegments
2732: || !segmentInfos.info(first + i).equals(info)) {
2733: if (segmentInfos.indexOf(info) == -1)
2734: throw new MergePolicy.MergeException(
2735: "MergePolicy selected a segment ("
2736: + info.name
2737: + ") that is not in the index");
2738: else
2739: throw new MergePolicy.MergeException(
2740: "MergePolicy selected non-contiguous segments to merge ("
2741: + merge
2742: + " vs "
2743: + segString()
2744: + "), which IndexWriter (currently) cannot handle");
2745: }
2746: }
2747:
2748: return first;
2749: }
2750:
2751: /* FIXME if we want to support non-contiguous segment merges */
2752: synchronized private boolean commitMerge(MergePolicy.OneMerge merge)
2753: throws IOException {
2754:
2755: assert merge.registerDone;
2756:
2757: // If merge was explicitly aborted, or, if abort() or
2758: // rollbackTransaction() had been called since our merge
2759: // started (which results in an unqualified
2760: // deleter.refresh() call that will remove any index
2761: // file that current segments does not reference), we
2762: // abort this merge
2763: if (merge.isAborted()) {
2764: if (infoStream != null)
2765: message("commitMerge: skipping merge "
2766: + merge.segString(directory)
2767: + ": it was aborted");
2768:
2769: assert merge.increfDone;
2770: decrefMergeSegments(merge);
2771: deleter.refresh(merge.info.name);
2772: return false;
2773: }
2774:
2775: boolean success = false;
2776:
2777: int start;
2778:
2779: try {
2780: SegmentInfos sourceSegmentsClone = merge.segmentsClone;
2781: SegmentInfos sourceSegments = merge.segments;
2782:
2783: start = ensureContiguousMerge(merge);
2784: if (infoStream != null)
2785: message("commitMerge " + merge.segString(directory));
2786:
2787: // Carefully merge deletes that occurred after we
2788: // started merging:
2789:
2790: BitVector deletes = null;
2791: int docUpto = 0;
2792:
2793: final int numSegmentsToMerge = sourceSegments.size();
2794: for (int i = 0; i < numSegmentsToMerge; i++) {
2795: final SegmentInfo previousInfo = sourceSegmentsClone
2796: .info(i);
2797: final SegmentInfo currentInfo = sourceSegments.info(i);
2798:
2799: assert currentInfo.docCount == previousInfo.docCount;
2800:
2801: final int docCount = currentInfo.docCount;
2802:
2803: if (previousInfo.hasDeletions()) {
2804:
2805: // There were deletes on this segment when the merge
2806: // started. The merge has collapsed away those
2807: // deletes, but, if new deletes were flushed since
2808: // the merge started, we must now carefully keep any
2809: // newly flushed deletes but mapping them to the new
2810: // docIDs.
2811:
2812: assert currentInfo.hasDeletions();
2813:
2814: // Load deletes present @ start of merge, for this segment:
2815: BitVector previousDeletes = new BitVector(
2816: previousInfo.dir, previousInfo
2817: .getDelFileName());
2818:
2819: if (!currentInfo.getDelFileName().equals(
2820: previousInfo.getDelFileName())) {
2821: // This means this segment has had new deletes
2822: // committed since we started the merge, so we
2823: // must merge them:
2824: if (deletes == null)
2825: deletes = new BitVector(merge.info.docCount);
2826:
2827: BitVector currentDeletes = new BitVector(
2828: currentInfo.dir, currentInfo
2829: .getDelFileName());
2830: for (int j = 0; j < docCount; j++) {
2831: if (previousDeletes.get(j))
2832: assert currentDeletes.get(j);
2833: else {
2834: if (currentDeletes.get(j))
2835: deletes.set(docUpto);
2836: docUpto++;
2837: }
2838: }
2839: } else
2840: docUpto += docCount - previousDeletes.count();
2841:
2842: } else if (currentInfo.hasDeletions()) {
2843: // This segment had no deletes before but now it
2844: // does:
2845: if (deletes == null)
2846: deletes = new BitVector(merge.info.docCount);
2847: BitVector currentDeletes = new BitVector(directory,
2848: currentInfo.getDelFileName());
2849:
2850: for (int j = 0; j < docCount; j++) {
2851: if (currentDeletes.get(j))
2852: deletes.set(docUpto);
2853: docUpto++;
2854: }
2855:
2856: } else
2857: // No deletes before or after
2858: docUpto += currentInfo.docCount;
2859:
2860: merge.checkAborted(directory);
2861: }
2862:
2863: if (deletes != null) {
2864: merge.info.advanceDelGen();
2865: deletes.write(directory, merge.info.getDelFileName());
2866: }
2867: success = true;
2868: } finally {
2869: if (!success) {
2870: if (infoStream != null)
2871: message("hit exception creating merged deletes file");
2872: deleter.refresh(merge.info.name);
2873: }
2874: }
2875:
2876: // Simple optimization: if the doc store we are using
2877: // has been closed and is in now compound format (but
2878: // wasn't when we started), then we will switch to the
2879: // compound format as well:
2880: final String mergeDocStoreSegment = merge.info
2881: .getDocStoreSegment();
2882: if (mergeDocStoreSegment != null
2883: && !merge.info.getDocStoreIsCompoundFile()) {
2884: final int size = segmentInfos.size();
2885: for (int i = 0; i < size; i++) {
2886: final SegmentInfo info = segmentInfos.info(i);
2887: final String docStoreSegment = info
2888: .getDocStoreSegment();
2889: if (docStoreSegment != null
2890: && docStoreSegment.equals(mergeDocStoreSegment)
2891: && info.getDocStoreIsCompoundFile()) {
2892: merge.info.setDocStoreIsCompoundFile(true);
2893: break;
2894: }
2895: }
2896: }
2897:
2898: success = false;
2899: SegmentInfos rollback = null;
2900: try {
2901: rollback = (SegmentInfos) segmentInfos.clone();
2902: segmentInfos.subList(start, start + merge.segments.size())
2903: .clear();
2904: segmentInfos.add(start, merge.info);
2905: checkpoint();
2906: success = true;
2907: } finally {
2908: if (!success && rollback != null) {
2909: if (infoStream != null)
2910: message("hit exception when checkpointing after merge");
2911: segmentInfos.clear();
2912: segmentInfos.addAll(rollback);
2913: deletePartialSegmentsFile();
2914: deleter.refresh(merge.info.name);
2915: }
2916: }
2917:
2918: if (merge.optimize)
2919: segmentsToOptimize.add(merge.info);
2920:
2921: // Must checkpoint before decrefing so any newly
2922: // referenced files in the new merge.info are incref'd
2923: // first:
2924: deleter.checkpoint(segmentInfos, autoCommit);
2925:
2926: decrefMergeSegments(merge);
2927:
2928: return true;
2929: }
2930:
2931: private void decrefMergeSegments(MergePolicy.OneMerge merge)
2932: throws IOException {
2933: final SegmentInfos sourceSegmentsClone = merge.segmentsClone;
2934: final int numSegmentsToMerge = sourceSegmentsClone.size();
2935: assert merge.increfDone;
2936: merge.increfDone = false;
2937: for (int i = 0; i < numSegmentsToMerge; i++) {
2938: final SegmentInfo previousInfo = sourceSegmentsClone
2939: .info(i);
2940: // Decref all files for this SegmentInfo (this
2941: // matches the incref in mergeInit):
2942: if (previousInfo.dir == directory)
2943: deleter.decRef(previousInfo.files());
2944: }
2945: }
2946:
2947: /**
2948: * Merges the indicated segments, replacing them in the stack with a
2949: * single segment.
2950: */
2951:
2952: final void merge(MergePolicy.OneMerge merge)
2953: throws CorruptIndexException, IOException {
2954:
2955: assert merge.registerDone;
2956: assert !merge.optimize || merge.maxNumSegmentsOptimize > 0;
2957:
2958: boolean success = false;
2959:
2960: try {
2961:
2962: try {
2963: if (merge.info == null)
2964: mergeInit(merge);
2965:
2966: if (infoStream != null)
2967: message("now merge\n merge="
2968: + merge.segString(directory) + "\n index="
2969: + segString());
2970:
2971: mergeMiddle(merge);
2972: success = true;
2973: } catch (MergePolicy.MergeAbortedException e) {
2974: merge.setException(e);
2975: addMergeException(merge);
2976: // We can ignore this exception, unless the merge
2977: // involves segments from external directories, in
2978: // which case we must throw it so, for example, the
2979: // rollbackTransaction code in addIndexes* is
2980: // executed.
2981: if (merge.isExternal)
2982: throw e;
2983: }
2984: } finally {
2985: synchronized (this ) {
2986: try {
2987: if (!success && infoStream != null)
2988: message("hit exception during merge");
2989:
2990: mergeFinish(merge);
2991:
2992: // This merge (and, generally, any change to the
2993: // segments) may now enable new merges, so we call
2994: // merge policy & update pending merges.
2995: if (success && !merge.isAborted() && !closed
2996: && !closing)
2997: updatePendingMerges(
2998: merge.maxNumSegmentsOptimize,
2999: merge.optimize);
3000: } finally {
3001: runningMerges.remove(merge);
3002: // Optimize may be waiting on the final optimize
3003: // merge to finish; and finishMerges() may be
3004: // waiting for all merges to finish:
3005: notifyAll();
3006: }
3007: }
3008: }
3009: }
3010:
3011: /** Checks whether this merge involves any segments
3012: * already participating in a merge. If not, this merge
3013: * is "registered", meaning we record that its segments
3014: * are now participating in a merge, and true is
3015: * returned. Else (the merge conflicts) false is
3016: * returned. */
3017: final synchronized boolean registerMerge(MergePolicy.OneMerge merge) {
3018:
3019: if (merge.registerDone)
3020: return true;
3021:
3022: final int count = merge.segments.size();
3023: boolean isExternal = false;
3024: for (int i = 0; i < count; i++) {
3025: final SegmentInfo info = merge.segments.info(i);
3026: if (mergingSegments.contains(info))
3027: return false;
3028: if (segmentInfos.indexOf(info) == -1)
3029: return false;
3030: if (info.dir != directory)
3031: isExternal = true;
3032: }
3033:
3034: pendingMerges.add(merge);
3035:
3036: if (infoStream != null)
3037: message("add merge to pendingMerges: "
3038: + merge.segString(directory) + " [total "
3039: + pendingMerges.size() + " pending]");
3040:
3041: merge.mergeGen = mergeGen;
3042: merge.isExternal = isExternal;
3043:
3044: // OK it does not conflict; now record that this merge
3045: // is running (while synchronized) to avoid race
3046: // condition where two conflicting merges from different
3047: // threads, start
3048: for (int i = 0; i < count; i++)
3049: mergingSegments.add(merge.segments.info(i));
3050:
3051: // Merge is now registered
3052: merge.registerDone = true;
3053: return true;
3054: }
3055:
3056: /** Does initial setup for a merge, which is fast but holds
3057: * the synchronized lock on IndexWriter instance. */
3058: final synchronized void mergeInit(MergePolicy.OneMerge merge)
3059: throws IOException {
3060:
3061: assert merge.registerDone;
3062:
3063: if (merge.isAborted())
3064: return;
3065:
3066: final SegmentInfos sourceSegments = merge.segments;
3067: final int end = sourceSegments.size();
3068:
3069: ensureContiguousMerge(merge);
3070:
3071: // Check whether this merge will allow us to skip
3072: // merging the doc stores (stored field & vectors).
3073: // This is a very substantial optimization (saves tons
3074: // of IO) that can only be applied with
3075: // autoCommit=false.
3076:
3077: Directory lastDir = directory;
3078: String lastDocStoreSegment = null;
3079: int next = -1;
3080:
3081: boolean mergeDocStores = false;
3082: boolean doFlushDocStore = false;
3083: final String currentDocStoreSegment = docWriter
3084: .getDocStoreSegment();
3085:
3086: // Test each segment to be merged: check if we need to
3087: // flush/merge doc stores
3088: for (int i = 0; i < end; i++) {
3089: SegmentInfo si = sourceSegments.info(i);
3090:
3091: // If it has deletions we must merge the doc stores
3092: if (si.hasDeletions())
3093: mergeDocStores = true;
3094:
3095: // If it has its own (private) doc stores we must
3096: // merge the doc stores
3097: if (-1 == si.getDocStoreOffset())
3098: mergeDocStores = true;
3099:
3100: // If it has a different doc store segment than
3101: // previous segments, we must merge the doc stores
3102: String docStoreSegment = si.getDocStoreSegment();
3103: if (docStoreSegment == null)
3104: mergeDocStores = true;
3105: else if (lastDocStoreSegment == null)
3106: lastDocStoreSegment = docStoreSegment;
3107: else if (!lastDocStoreSegment.equals(docStoreSegment))
3108: mergeDocStores = true;
3109:
3110: // Segments' docScoreOffsets must be in-order,
3111: // contiguous. For the default merge policy now
3112: // this will always be the case but for an arbitrary
3113: // merge policy this may not be the case
3114: if (-1 == next)
3115: next = si.getDocStoreOffset() + si.docCount;
3116: else if (next != si.getDocStoreOffset())
3117: mergeDocStores = true;
3118: else
3119: next = si.getDocStoreOffset() + si.docCount;
3120:
3121: // If the segment comes from a different directory
3122: // we must merge
3123: if (lastDir != si.dir)
3124: mergeDocStores = true;
3125:
3126: // If the segment is referencing the current "live"
3127: // doc store outputs then we must merge
3128: if (si.getDocStoreOffset() != -1
3129: && currentDocStoreSegment != null
3130: && si.getDocStoreSegment().equals(
3131: currentDocStoreSegment))
3132: doFlushDocStore = true;
3133: }
3134:
3135: final int docStoreOffset;
3136: final String docStoreSegment;
3137: final boolean docStoreIsCompoundFile;
3138:
3139: if (mergeDocStores) {
3140: docStoreOffset = -1;
3141: docStoreSegment = null;
3142: docStoreIsCompoundFile = false;
3143: } else {
3144: SegmentInfo si = sourceSegments.info(0);
3145: docStoreOffset = si.getDocStoreOffset();
3146: docStoreSegment = si.getDocStoreSegment();
3147: docStoreIsCompoundFile = si.getDocStoreIsCompoundFile();
3148: }
3149:
3150: if (mergeDocStores && doFlushDocStore) {
3151: // SegmentMerger intends to merge the doc stores
3152: // (stored fields, vectors), and at least one of the
3153: // segments to be merged refers to the currently
3154: // live doc stores.
3155:
3156: // TODO: if we know we are about to merge away these
3157: // newly flushed doc store files then we should not
3158: // make compound file out of them...
3159: if (infoStream != null)
3160: message("flush at merge");
3161: flush(false, true);
3162: }
3163:
3164: // We must take a full copy at this point so that we can
3165: // properly merge deletes in commitMerge()
3166: merge.segmentsClone = (SegmentInfos) merge.segments.clone();
3167:
3168: for (int i = 0; i < end; i++) {
3169: SegmentInfo si = merge.segmentsClone.info(i);
3170:
3171: // IncRef all files for this segment info to make sure
3172: // they are not removed while we are trying to merge.
3173: if (si.dir == directory)
3174: deleter.incRef(si.files());
3175: }
3176:
3177: merge.increfDone = true;
3178:
3179: merge.mergeDocStores = mergeDocStores;
3180:
3181: // Bind a new segment name here so even with
3182: // ConcurrentMergePolicy we keep deterministic segment
3183: // names.
3184: merge.info = new SegmentInfo(newSegmentName(), 0, directory,
3185: false, true, docStoreOffset, docStoreSegment,
3186: docStoreIsCompoundFile);
3187: // Also enroll the merged segment into mergingSegments;
3188: // this prevents it from getting selected for a merge
3189: // after our merge is done but while we are building the
3190: // CFS:
3191: mergingSegments.add(merge.info);
3192: }
3193:
3194: /** Does fininishing for a merge, which is fast but holds
3195: * the synchronized lock on IndexWriter instance. */
3196: final synchronized void mergeFinish(MergePolicy.OneMerge merge)
3197: throws IOException {
3198:
3199: if (merge.increfDone)
3200: decrefMergeSegments(merge);
3201:
3202: assert merge.registerDone;
3203:
3204: final SegmentInfos sourceSegments = merge.segments;
3205: final int end = sourceSegments.size();
3206: for (int i = 0; i < end; i++)
3207: mergingSegments.remove(sourceSegments.info(i));
3208: mergingSegments.remove(merge.info);
3209: merge.registerDone = false;
3210: }
3211:
3212: /** Does the actual (time-consuming) work of the merge,
3213: * but without holding synchronized lock on IndexWriter
3214: * instance */
3215: final private int mergeMiddle(MergePolicy.OneMerge merge)
3216: throws CorruptIndexException, IOException {
3217:
3218: merge.checkAborted(directory);
3219:
3220: final String mergedName = merge.info.name;
3221:
3222: SegmentMerger merger = null;
3223:
3224: int mergedDocCount = 0;
3225:
3226: SegmentInfos sourceSegments = merge.segments;
3227: SegmentInfos sourceSegmentsClone = merge.segmentsClone;
3228: final int numSegments = sourceSegments.size();
3229:
3230: if (infoStream != null)
3231: message("merging " + merge.segString(directory));
3232:
3233: merger = new SegmentMerger(this , mergedName, merge);
3234:
3235: // This is try/finally to make sure merger's readers are
3236: // closed:
3237:
3238: boolean success = false;
3239:
3240: try {
3241: int totDocCount = 0;
3242:
3243: for (int i = 0; i < numSegments; i++) {
3244: SegmentInfo si = sourceSegmentsClone.info(i);
3245: IndexReader reader = SegmentReader.get(si,
3246: MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet)
3247: merger.add(reader);
3248: totDocCount += reader.numDocs();
3249: }
3250: if (infoStream != null) {
3251: message("merge: total " + totDocCount + " docs");
3252: }
3253:
3254: merge.checkAborted(directory);
3255:
3256: mergedDocCount = merge.info.docCount = merger
3257: .merge(merge.mergeDocStores);
3258:
3259: assert mergedDocCount == totDocCount;
3260:
3261: success = true;
3262:
3263: } finally {
3264: // close readers before we attempt to delete
3265: // now-obsolete segments
3266: if (merger != null) {
3267: merger.closeReaders();
3268: }
3269: if (!success) {
3270: if (infoStream != null)
3271: message("hit exception during merge; now refresh deleter on segment "
3272: + mergedName);
3273: synchronized (this ) {
3274: addMergeException(merge);
3275: deleter.refresh(mergedName);
3276: }
3277: }
3278: }
3279:
3280: if (!commitMerge(merge))
3281: // commitMerge will return false if this merge was aborted
3282: return 0;
3283:
3284: if (merge.useCompoundFile) {
3285:
3286: success = false;
3287: boolean skip = false;
3288: final String compoundFileName = mergedName + "."
3289: + IndexFileNames.COMPOUND_FILE_EXTENSION;
3290:
3291: try {
3292: try {
3293: merger.createCompoundFile(compoundFileName);
3294: success = true;
3295: } catch (IOException ioe) {
3296: synchronized (this ) {
3297: if (segmentInfos.indexOf(merge.info) == -1) {
3298: // If another merge kicked in and merged our
3299: // new segment away while we were trying to
3300: // build the compound file, we can hit a
3301: // FileNotFoundException and possibly
3302: // IOException over NFS. We can tell this has
3303: // happened because our SegmentInfo is no
3304: // longer in the segments; if this has
3305: // happened it is safe to ignore the exception
3306: // & skip finishing/committing our compound
3307: // file creating.
3308: if (infoStream != null)
3309: message("hit exception creating compound file; ignoring it because our info (segment "
3310: + merge.info.name
3311: + ") has been merged away");
3312: skip = true;
3313: } else
3314: throw ioe;
3315: }
3316: }
3317: } finally {
3318: if (!success) {
3319: if (infoStream != null)
3320: message("hit exception creating compound file during merge: skip="
3321: + skip);
3322:
3323: synchronized (this ) {
3324: if (!skip)
3325: addMergeException(merge);
3326: deleter.deleteFile(compoundFileName);
3327: }
3328: }
3329: }
3330:
3331: if (!skip) {
3332:
3333: synchronized (this ) {
3334: if (skip || segmentInfos.indexOf(merge.info) == -1
3335: || merge.isAborted()) {
3336: // Our segment (committed in non-compound
3337: // format) got merged away while we were
3338: // building the compound format.
3339: deleter.deleteFile(compoundFileName);
3340: } else {
3341: success = false;
3342: try {
3343: merge.info.setUseCompoundFile(true);
3344: checkpoint();
3345: success = true;
3346: } finally {
3347: if (!success) {
3348: if (infoStream != null)
3349: message("hit exception checkpointing compound file during merge");
3350:
3351: // Must rollback:
3352: addMergeException(merge);
3353: merge.info.setUseCompoundFile(false);
3354: deletePartialSegmentsFile();
3355: deleter.deleteFile(compoundFileName);
3356: }
3357: }
3358:
3359: // Give deleter a chance to remove files now.
3360: deleter.checkpoint(segmentInfos, autoCommit);
3361: }
3362: }
3363: }
3364: }
3365:
3366: return mergedDocCount;
3367: }
3368:
3369: synchronized void addMergeException(MergePolicy.OneMerge merge) {
3370: if (!mergeExceptions.contains(merge)
3371: && mergeGen == merge.mergeGen)
3372: mergeExceptions.add(merge);
3373: }
3374:
3375: private void deletePartialSegmentsFile() throws IOException {
3376: if (segmentInfos.getLastGeneration() != segmentInfos
3377: .getGeneration()) {
3378: String segmentFileName = IndexFileNames
3379: .fileNameFromGeneration(IndexFileNames.SEGMENTS,
3380: "", segmentInfos.getGeneration());
3381: if (infoStream != null)
3382: message("now delete partial segments file \""
3383: + segmentFileName + "\"");
3384:
3385: deleter.deleteFile(segmentFileName);
3386: }
3387: }
3388:
3389: // Called during flush to apply any buffered deletes. If
3390: // flushedNewSegment is true then a new segment was just
3391: // created and flushed from the ram segments, so we will
3392: // selectively apply the deletes to that new segment.
3393: private final void applyDeletes(boolean flushedNewSegment)
3394: throws CorruptIndexException, IOException {
3395:
3396: final HashMap bufferedDeleteTerms = docWriter
3397: .getBufferedDeleteTerms();
3398: final List bufferedDeleteDocIDs = docWriter
3399: .getBufferedDeleteDocIDs();
3400:
3401: if (infoStream != null)
3402: message("flush " + docWriter.getNumBufferedDeleteTerms()
3403: + " buffered deleted terms and "
3404: + bufferedDeleteDocIDs.size()
3405: + " deleted docIDs on " + segmentInfos.size()
3406: + " segments.");
3407:
3408: if (flushedNewSegment) {
3409: IndexReader reader = null;
3410: try {
3411: // Open readers w/o opening the stored fields /
3412: // vectors because these files may still be held
3413: // open for writing by docWriter
3414: reader = SegmentReader.get(segmentInfos
3415: .info(segmentInfos.size() - 1), false);
3416:
3417: // Apply delete terms to the segment just flushed from ram
3418: // apply appropriately so that a delete term is only applied to
3419: // the documents buffered before it, not those buffered after it.
3420: applyDeletesSelectively(bufferedDeleteTerms,
3421: bufferedDeleteDocIDs, reader);
3422: } finally {
3423: if (reader != null) {
3424: try {
3425: reader.doCommit();
3426: } finally {
3427: reader.doClose();
3428: }
3429: }
3430: }
3431: }
3432:
3433: int infosEnd = segmentInfos.size();
3434: if (flushedNewSegment) {
3435: infosEnd--;
3436: }
3437:
3438: for (int i = 0; i < infosEnd; i++) {
3439: IndexReader reader = null;
3440: try {
3441: reader = SegmentReader.get(segmentInfos.info(i), false);
3442:
3443: // Apply delete terms to disk segments
3444: // except the one just flushed from ram.
3445: applyDeletes(bufferedDeleteTerms, reader);
3446: } finally {
3447: if (reader != null) {
3448: try {
3449: reader.doCommit();
3450: } finally {
3451: reader.doClose();
3452: }
3453: }
3454: }
3455: }
3456:
3457: // Clean up bufferedDeleteTerms.
3458: docWriter.clearBufferedDeletes();
3459: }
3460:
3461: // For test purposes.
3462: final synchronized int getBufferedDeleteTermsSize() {
3463: return docWriter.getBufferedDeleteTerms().size();
3464: }
3465:
3466: // For test purposes.
3467: final synchronized int getNumBufferedDeleteTerms() {
3468: return docWriter.getNumBufferedDeleteTerms();
3469: }
3470:
3471: // Apply buffered delete terms to the segment just flushed from ram
3472: // apply appropriately so that a delete term is only applied to
3473: // the documents buffered before it, not those buffered after it.
3474: private final void applyDeletesSelectively(HashMap deleteTerms,
3475: List deleteIds, IndexReader reader)
3476: throws CorruptIndexException, IOException {
3477: Iterator iter = deleteTerms.entrySet().iterator();
3478: while (iter.hasNext()) {
3479: Entry entry = (Entry) iter.next();
3480: Term term = (Term) entry.getKey();
3481:
3482: TermDocs docs = reader.termDocs(term);
3483: if (docs != null) {
3484: int num = ((DocumentsWriter.Num) entry.getValue())
3485: .getNum();
3486: try {
3487: while (docs.next()) {
3488: int doc = docs.doc();
3489: if (doc >= num) {
3490: break;
3491: }
3492: reader.deleteDocument(doc);
3493: }
3494: } finally {
3495: docs.close();
3496: }
3497: }
3498: }
3499:
3500: if (deleteIds.size() > 0) {
3501: iter = deleteIds.iterator();
3502: while (iter.hasNext())
3503: reader.deleteDocument(((Integer) iter.next())
3504: .intValue());
3505: }
3506: }
3507:
3508: // Apply buffered delete terms to this reader.
3509: private final void applyDeletes(HashMap deleteTerms,
3510: IndexReader reader) throws CorruptIndexException,
3511: IOException {
3512: Iterator iter = deleteTerms.entrySet().iterator();
3513: while (iter.hasNext()) {
3514: Entry entry = (Entry) iter.next();
3515: reader.deleteDocuments((Term) entry.getKey());
3516: }
3517: }
3518:
3519: // utility routines for tests
3520: SegmentInfo newestSegment() {
3521: return segmentInfos.info(segmentInfos.size() - 1);
3522: }
3523:
3524: public synchronized String segString() {
3525: StringBuffer buffer = new StringBuffer();
3526: for (int i = 0; i < segmentInfos.size(); i++) {
3527: if (i > 0) {
3528: buffer.append(' ');
3529: }
3530: buffer.append(segmentInfos.info(i).segString(directory));
3531: }
3532:
3533: return buffer.toString();
3534: }
3535: }
|