Source Code Cross Referenced for IndexWriter.java in  » Net » lucene-connector » org » apache » lucene » index » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Net » lucene connector » org.apache.lucene.index 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        package org.apache.lucene.index;
0002:
0003:        /**
0004:         * Licensed to the Apache Software Foundation (ASF) under one or more
0005:         * contributor license agreements.  See the NOTICE file distributed with
0006:         * this work for additional information regarding copyright ownership.
0007:         * The ASF licenses this file to You under the Apache License, Version 2.0
0008:         * (the "License"); you may not use this file except in compliance with
0009:         * the License.  You may obtain a copy of the License at
0010:         *
0011:         *     http://www.apache.org/licenses/LICENSE-2.0
0012:         *
0013:         * Unless required by applicable law or agreed to in writing, software
0014:         * distributed under the License is distributed on an "AS IS" BASIS,
0015:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016:         * See the License for the specific language governing permissions and
0017:         * limitations under the License.
0018:         */
0019:
0020:        import org.apache.lucene.analysis.Analyzer;
0021:        import org.apache.lucene.document.Document;
0022:        import org.apache.lucene.search.Similarity;
0023:        import org.apache.lucene.store.Directory;
0024:        import org.apache.lucene.store.FSDirectory;
0025:        import org.apache.lucene.store.Lock;
0026:        import org.apache.lucene.store.LockObtainFailedException;
0027:        import org.apache.lucene.store.AlreadyClosedException;
0028:        import org.apache.lucene.util.BitVector;
0029:
0030:        import java.io.File;
0031:        import java.io.IOException;
0032:        import java.io.PrintStream;
0033:        import java.util.List;
0034:        import java.util.ArrayList;
0035:        import java.util.HashMap;
0036:        import java.util.Set;
0037:        import java.util.HashSet;
0038:        import java.util.LinkedList;
0039:        import java.util.Iterator;
0040:        import java.util.Map.Entry;
0041:
0042:        /**
0043:         An <code>IndexWriter</code> creates and maintains an index.
0044:
0045:         <p>The <code>create</code> argument to the 
0046:         <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer, boolean)"><b>constructor</b></a>
0047:         determines whether a new index is created, or whether an existing index is
0048:         opened.  Note that you
0049:         can open an index with <code>create=true</code> even while readers are
0050:         using the index.  The old readers will continue to search
0051:         the "point in time" snapshot they had opened, and won't
0052:         see the newly created index until they re-open.  There are
0053:         also <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer)"><b>constructors</b></a>
0054:         with no <code>create</code> argument which
0055:         will create a new index if there is not already an index at the
0056:         provided path and otherwise open the existing index.</p>
0057:
0058:         <p>In either case, documents are added with <a
0059:         href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a>
0060:         and removed with <a
0061:         href="#deleteDocuments(org.apache.lucene.index.Term)"><b>deleteDocuments</b></a>.
0062:         A document can be updated with <a href="#updateDocument(org.apache.lucene.index.Term, org.apache.lucene.document.Document)"><b>updateDocument</b></a> 
0063:         (which just deletes and then adds the entire document).
0064:         When finished adding, deleting and updating documents, <a href="#close()"><b>close</b></a> should be called.</p>
0065:
0066:         <p>These changes are buffered in memory and periodically
0067:         flushed to the {@link Directory} (during the above method
0068:         calls).  A flush is triggered when there are enough
0069:         buffered deletes (see {@link #setMaxBufferedDeleteTerms})
0070:         or enough added documents since the last flush, whichever
0071:         is sooner.  For the added documents, flushing is triggered
0072:         either by RAM usage of the documents (see {@link
0073:         #setRAMBufferSizeMB}) or the number of added documents.
0074:         The default is to flush when RAM usage hits 16 MB.  For
0075:         best indexing speed you should flush by RAM usage with a
0076:         large RAM buffer.  You can also force a flush by calling
0077:         {@link #flush}.  When a flush occurs, both pending deletes
0078:         and added documents are flushed to the index.  A flush may
0079:         also trigger one or more segment merges which by default
0080:         run with a background thread so as not to block the
0081:         addDocument calls (see <a href="#mergePolicy">below</a>
0082:         for changing the {@link MergeScheduler}).</p>
0083:
0084:         <a name="autoCommit"></a>
0085:         <p>The optional <code>autoCommit</code> argument to the
0086:         <a href="#IndexWriter(org.apache.lucene.store.Directory, boolean, org.apache.lucene.analysis.Analyzer)"><b>constructors</b></a>
0087:         controls visibility of the changes to {@link IndexReader} instances reading the same index.
0088:         When this is <code>false</code>, changes are not
0089:         visible until {@link #close()} is called.
0090:         Note that changes will still be flushed to the
0091:         {@link org.apache.lucene.store.Directory} as new files,
0092:         but are not committed (no new <code>segments_N</code> file
0093:         is written referencing the new files) until {@link #close} is
0094:         called.  If something goes terribly wrong (for example the
0095:         JVM crashes) before {@link #close()}, then
0096:         the index will reflect none of the changes made (it will
0097:         remain in its starting state).
0098:         You can also call {@link #abort()}, which closes the writer without committing any
0099:         changes, and removes any index
0100:         files that had been flushed but are now unreferenced.
0101:         This mode is useful for preventing readers from refreshing
0102:         at a bad time (for example after you've done all your
0103:         deletes but before you've done your adds).
0104:         It can also be used to implement simple single-writer
0105:         transactional semantics ("all or none").</p>
0106:
0107:         <p>When <code>autoCommit</code> is <code>true</code> then
0108:         every flush is also a commit ({@link IndexReader}
0109:         instances will see each flush as changes to the index).
0110:         This is the default, to match the behavior before 2.2.
0111:         When running in this mode, be careful not to refresh your
0112:         readers while optimize or segment merges are taking place
0113:         as this can tie up substantial disk space.</p>
0114:        
0115:         <p>Regardless of <code>autoCommit</code>, an {@link
0116:         IndexReader} or {@link org.apache.lucene.search.IndexSearcher} will only see the
0117:         index as of the "point in time" that it was opened.  Any
0118:         changes committed to the index after the reader was opened
0119:         are not visible until the reader is re-opened.</p>
0120:
0121:         <p>If an index will not have more documents added for a while and optimal search
0122:         performance is desired, then the <a href="#optimize()"><b>optimize</b></a>
0123:         method should be called before the index is closed.</p>
0124:
0125:         <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
0126:         another <code>IndexWriter</code> on the same directory will lead to a
0127:         {@link LockObtainFailedException}. The {@link LockObtainFailedException}
0128:         is also thrown if an IndexReader on the same directory is used to delete documents
0129:         from the index.</p>
0130:        
0131:         <a name="deletionPolicy"></a>
0132:         <p>Expert: <code>IndexWriter</code> allows an optional
0133:         {@link IndexDeletionPolicy} implementation to be
0134:         specified.  You can use this to control when prior commits
0135:         are deleted from the index.  The default policy is {@link
0136:         KeepOnlyLastCommitDeletionPolicy} which removes all prior
0137:         commits as soon as a new commit is done (this matches
0138:         behavior before 2.2).  Creating your own policy can allow
0139:         you to explicitly keep previous "point in time" commits
0140:         alive in the index for some time, to allow readers to
0141:         refresh to the new commit without having the old commit
0142:         deleted out from under them.  This is necessary on
0143:         filesystems like NFS that do not support "delete on last
0144:         close" semantics, which Lucene's "point in time" search
0145:         normally relies on. </p>
0146:
0147:         <a name="mergePolicy"></a> <p>Expert:
0148:         <code>IndexWriter</code> allows you to separately change
0149:         the {@link MergePolicy} and the {@link MergeScheduler}.
0150:         The {@link MergePolicy} is invoked whenever there are
0151:         changes to the segments in the index.  Its role is to
0152:         select which merges to do, if any, and return a {@link
0153:         MergePolicy.MergeSpecification} describing the merges.  It
0154:         also selects merges to do for optimize().  (The default is
0155:         {@link LogByteSizeMergePolicy}.  Then, the {@link
0156:         MergeScheduler} is invoked with the requested merges and
0157:         it decides when and how to run the merges.  The default is
0158:         {@link ConcurrentMergeScheduler}. </p>
0159:         */
0160:
0161:        /*
0162:         * Clarification: Check Points (and commits)
0163:         * Being able to set autoCommit=false allows IndexWriter to flush and 
0164:         * write new index files to the directory without writing a new segments_N
0165:         * file which references these new files. It also means that the state of 
0166:         * the in memory SegmentInfos object is different than the most recent
0167:         * segments_N file written to the directory.
0168:         * 
0169:         * Each time the SegmentInfos is changed, and matches the (possibly 
0170:         * modified) directory files, we have a new "check point". 
0171:         * If the modified/new SegmentInfos is written to disk - as a new 
0172:         * (generation of) segments_N file - this check point is also an 
0173:         * IndexCommitPoint.
0174:         * 
0175:         * With autoCommit=true, every checkPoint is also a CommitPoint.
0176:         * With autoCommit=false, some checkPoints may not be commits.
0177:         * 
0178:         * A new checkpoint always replaces the previous checkpoint and 
0179:         * becomes the new "front" of the index. This allows the IndexFileDeleter 
0180:         * to delete files that are referenced only by stale checkpoints.
0181:         * (files that were created since the last commit, but are no longer
0182:         * referenced by the "front" of the index). For this, IndexFileDeleter 
0183:         * keeps track of the last non commit checkpoint.
0184:         */
0185:        public class IndexWriter {
0186:
0187:            /**
0188:             * Default value for the write lock timeout (1,000).
0189:             * @see #setDefaultWriteLockTimeout
0190:             */
0191:            public static long WRITE_LOCK_TIMEOUT = 1000;
0192:
0193:            private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
0194:
0195:            /**
0196:             * Name of the write lock in the index.
0197:             */
0198:            public static final String WRITE_LOCK_NAME = "write.lock";
0199:
0200:            /**
0201:             * @deprecated
0202:             * @see LogMergePolicy#DEFAULT_MERGE_FACTOR
0203:             */
0204:            public final static int DEFAULT_MERGE_FACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR;
0205:
0206:            /**
0207:             * Value to denote a flush trigger is disabled
0208:             */
0209:            public final static int DISABLE_AUTO_FLUSH = -1;
0210:
0211:            /**
0212:             * Disabled by default (because IndexWriter flushes by RAM usage
0213:             * by default). Change using {@link #setMaxBufferedDocs(int)}.
0214:             */
0215:            public final static int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
0216:
0217:            /**
0218:             * Default value is 16 MB (which means flush when buffered
0219:             * docs consume 16 MB RAM).  Change using {@link #setRAMBufferSizeMB}.
0220:             */
0221:            public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
0222:
0223:            /**
0224:             * Disabled by default (because IndexWriter flushes by RAM usage
0225:             * by default). Change using {@link #setMaxBufferedDeleteTerms(int)}.
0226:             */
0227:            public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
0228:
0229:            /**
0230:             * @deprecated
0231:             * @see LogDocMergePolicy#DEFAULT_MAX_MERGE_DOCS
0232:             */
0233:            public final static int DEFAULT_MAX_MERGE_DOCS = LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS;
0234:
0235:            /**
0236:             * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.
0237:             */
0238:            public final static int DEFAULT_MAX_FIELD_LENGTH = 10000;
0239:
0240:            /**
0241:             * Default value is 128. Change using {@link #setTermIndexInterval(int)}.
0242:             */
0243:            public final static int DEFAULT_TERM_INDEX_INTERVAL = 128;
0244:
0245:            /**
0246:             * Absolute hard maximum length for a term.  If a term
0247:             * arrives from the analyzer longer than this length, it
0248:             * is skipped and a message is printed to infoStream, if
0249:             * set (see {@link #setInfoStream}).
0250:             */
0251:            public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH;
0252:
0253:            // The normal read buffer size defaults to 1024, but
0254:            // increasing this during merging seems to yield
0255:            // performance gains.  However we don't want to increase
0256:            // it too much because there are quite a few
0257:            // BufferedIndexInputs created during merging.  See
0258:            // LUCENE-888 for details.
0259:            private final static int MERGE_READ_BUFFER_SIZE = 4096;
0260:
0261:            // Used for printing messages
0262:            private static Object MESSAGE_ID_LOCK = new Object();
0263:            private static int MESSAGE_ID = 0;
0264:            private int messageID = -1;
0265:
0266:            private Directory directory; // where this index resides
0267:            private Analyzer analyzer; // how to analyze text
0268:
0269:            private Similarity similarity = Similarity.getDefault(); // how to normalize
0270:
0271:            private boolean commitPending; // true if segmentInfos has changes not yet committed
0272:            private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
0273:
0274:            private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
0275:            private boolean localAutoCommit; // saved autoCommit during local transaction
0276:            private boolean autoCommit = true; // false if we should commit only on close
0277:
0278:            private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
0279:            private DocumentsWriter docWriter;
0280:            private IndexFileDeleter deleter;
0281:
0282:            private Set segmentsToOptimize = new HashSet(); // used by optimize to note those needing optimization
0283:
0284:            private Lock writeLock;
0285:
0286:            private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
0287:
0288:            private boolean closeDir;
0289:            private boolean closed;
0290:            private boolean closing;
0291:
0292:            // Holds all SegmentInfo instances currently involved in
0293:            // merges
0294:            private HashSet mergingSegments = new HashSet();
0295:
0296:            private MergePolicy mergePolicy = new LogByteSizeMergePolicy();
0297:            private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
0298:            private LinkedList pendingMerges = new LinkedList();
0299:            private Set runningMerges = new HashSet();
0300:            private List mergeExceptions = new ArrayList();
0301:            private long mergeGen;
0302:            private boolean stopMerges;
0303:
0304:            /**
0305:             * Used internally to throw an {@link
0306:             * AlreadyClosedException} if this IndexWriter has been
0307:             * closed.
0308:             * @throws AlreadyClosedException if this IndexWriter is
0309:             */
0310:            protected final void ensureOpen() throws AlreadyClosedException {
0311:                if (closed) {
0312:                    throw new AlreadyClosedException(
0313:                            "this IndexWriter is closed");
0314:                }
0315:            }
0316:
0317:            /**
0318:             * Prints a message to the infoStream (if non-null),
0319:             * prefixed with the identifying information for this
0320:             * writer and the thread that's calling it.
0321:             */
0322:            public void message(String message) {
0323:                if (infoStream != null)
0324:                    infoStream.println("IW " + messageID + " ["
0325:                            + Thread.currentThread().getName() + "]: "
0326:                            + message);
0327:            }
0328:
0329:            private synchronized void setMessageID() {
0330:                if (infoStream != null && messageID == -1) {
0331:                    synchronized (MESSAGE_ID_LOCK) {
0332:                        messageID = MESSAGE_ID++;
0333:                    }
0334:                }
0335:            }
0336:
0337:            /**
0338:             * Casts current mergePolicy to LogMergePolicy, and throws
0339:             * an exception if the mergePolicy is not a LogMergePolicy.
0340:             */
0341:            private LogMergePolicy getLogMergePolicy() {
0342:                if (mergePolicy instanceof  LogMergePolicy)
0343:                    return (LogMergePolicy) mergePolicy;
0344:                else
0345:                    throw new IllegalArgumentException(
0346:                            "this method can only be called when the merge policy is the default LogMergePolicy");
0347:            }
0348:
0349:            /** <p>Get the current setting of whether newly flushed
0350:             *  segments will use the compound file format.  Note that
0351:             *  this just returns the value previously set with
0352:             *  setUseCompoundFile(boolean), or the default value
0353:             *  (true).  You cannot use this to query the status of
0354:             *  previously flushed segments.</p>
0355:             *
0356:             *  <p>Note that this method is a convenience method: it
0357:             *  just calls mergePolicy.getUseCompoundFile as long as
0358:             *  mergePolicy is an instance of {@link LogMergePolicy}.
0359:             *  Otherwise an IllegalArgumentException is thrown.</p>
0360:             *
0361:             *  @see #setUseCompoundFile(boolean)
0362:             */
0363:            public boolean getUseCompoundFile() {
0364:                return getLogMergePolicy().getUseCompoundFile();
0365:            }
0366:
0367:            /** <p>Setting to turn on usage of a compound file. When on,
0368:             *  multiple files for each segment are merged into a
0369:             *  single file when a new segment is flushed.</p>
0370:             *
0371:             *  <p>Note that this method is a convenience method: it
0372:             *  just calls mergePolicy.setUseCompoundFile as long as
0373:             *  mergePolicy is an instance of {@link LogMergePolicy}.
0374:             *  Otherwise an IllegalArgumentException is thrown.</p>
0375:             */
0376:            public void setUseCompoundFile(boolean value) {
0377:                getLogMergePolicy().setUseCompoundFile(value);
0378:                getLogMergePolicy().setUseCompoundDocStore(value);
0379:            }
0380:
0381:            /** Expert: Set the Similarity implementation used by this IndexWriter.
0382:             *
0383:             * @see Similarity#setDefault(Similarity)
0384:             */
0385:            public void setSimilarity(Similarity similarity) {
0386:                ensureOpen();
0387:                this .similarity = similarity;
0388:            }
0389:
0390:            /** Expert: Return the Similarity implementation used by this IndexWriter.
0391:             *
0392:             * <p>This defaults to the current value of {@link Similarity#getDefault()}.
0393:             */
0394:            public Similarity getSimilarity() {
0395:                ensureOpen();
0396:                return this .similarity;
0397:            }
0398:
0399:            /** Expert: Set the interval between indexed terms.  Large values cause less
0400:             * memory to be used by IndexReader, but slow random-access to terms.  Small
0401:             * values cause more memory to be used by an IndexReader, and speed
0402:             * random-access to terms.
0403:             *
0404:             * This parameter determines the amount of computation required per query
0405:             * term, regardless of the number of documents that contain that term.  In
0406:             * particular, it is the maximum number of other terms that must be
0407:             * scanned before a term is located and its frequency and position information
0408:             * may be processed.  In a large index with user-entered query terms, query
0409:             * processing time is likely to be dominated not by term lookup but rather
0410:             * by the processing of frequency and positional data.  In a small index
0411:             * or when many uncommon query terms are generated (e.g., by wildcard
0412:             * queries) term lookup may become a dominant cost.
0413:             *
0414:             * In particular, <code>numUniqueTerms/interval</code> terms are read into
0415:             * memory by an IndexReader, and, on average, <code>interval/2</code> terms
0416:             * must be scanned for each random term access.
0417:             *
0418:             * @see #DEFAULT_TERM_INDEX_INTERVAL
0419:             */
0420:            public void setTermIndexInterval(int interval) {
0421:                ensureOpen();
0422:                this .termIndexInterval = interval;
0423:            }
0424:
0425:            /** Expert: Return the interval between indexed terms.
0426:             *
0427:             * @see #setTermIndexInterval(int)
0428:             */
0429:            public int getTermIndexInterval() {
0430:                ensureOpen();
0431:                return termIndexInterval;
0432:            }
0433:
0434:            /**
0435:             * Constructs an IndexWriter for the index in <code>path</code>.
0436:             * Text will be analyzed with <code>a</code>.  If <code>create</code>
0437:             * is true, then a new, empty index will be created in
0438:             * <code>path</code>, replacing the index already there, if any.
0439:             *
0440:             * @param path the path to the index directory
0441:             * @param a the analyzer to use
0442:             * @param create <code>true</code> to create the index or overwrite
0443:             *  the existing one; <code>false</code> to append to the existing
0444:             *  index
0445:             * @throws CorruptIndexException if the index is corrupt
0446:             * @throws LockObtainFailedException if another writer
0447:             *  has this index open (<code>write.lock</code> could not
0448:             *  be obtained)
0449:             * @throws IOException if the directory cannot be read/written to, or
0450:             *  if it does not exist and <code>create</code> is
0451:             *  <code>false</code> or if there is any other low-level
0452:             *  IO error
0453:             */
0454:            public IndexWriter(String path, Analyzer a, boolean create)
0455:                    throws CorruptIndexException, LockObtainFailedException,
0456:                    IOException {
0457:                init(FSDirectory.getDirectory(path), a, create, true, null,
0458:                        true);
0459:            }
0460:
0461:            /**
0462:             * Constructs an IndexWriter for the index in <code>path</code>.
0463:             * Text will be analyzed with <code>a</code>.  If <code>create</code>
0464:             * is true, then a new, empty index will be created in
0465:             * <code>path</code>, replacing the index already there, if any.
0466:             *
0467:             * @param path the path to the index directory
0468:             * @param a the analyzer to use
0469:             * @param create <code>true</code> to create the index or overwrite
0470:             *  the existing one; <code>false</code> to append to the existing
0471:             *  index
0472:             * @throws CorruptIndexException if the index is corrupt
0473:             * @throws LockObtainFailedException if another writer
0474:             *  has this index open (<code>write.lock</code> could not
0475:             *  be obtained)
0476:             * @throws IOException if the directory cannot be read/written to, or
0477:             *  if it does not exist and <code>create</code> is
0478:             *  <code>false</code> or if there is any other low-level
0479:             *  IO error
0480:             */
0481:            public IndexWriter(File path, Analyzer a, boolean create)
0482:                    throws CorruptIndexException, LockObtainFailedException,
0483:                    IOException {
0484:                init(FSDirectory.getDirectory(path), a, create, true, null,
0485:                        true);
0486:            }
0487:
0488:            /**
0489:             * Constructs an IndexWriter for the index in <code>d</code>.
0490:             * Text will be analyzed with <code>a</code>.  If <code>create</code>
0491:             * is true, then a new, empty index will be created in
0492:             * <code>d</code>, replacing the index already there, if any.
0493:             *
0494:             * @param d the index directory
0495:             * @param a the analyzer to use
0496:             * @param create <code>true</code> to create the index or overwrite
0497:             *  the existing one; <code>false</code> to append to the existing
0498:             *  index
0499:             * @throws CorruptIndexException if the index is corrupt
0500:             * @throws LockObtainFailedException if another writer
0501:             *  has this index open (<code>write.lock</code> could not
0502:             *  be obtained)
0503:             * @throws IOException if the directory cannot be read/written to, or
0504:             *  if it does not exist and <code>create</code> is
0505:             *  <code>false</code> or if there is any other low-level
0506:             *  IO error
0507:             */
0508:            public IndexWriter(Directory d, Analyzer a, boolean create)
0509:                    throws CorruptIndexException, LockObtainFailedException,
0510:                    IOException {
0511:                init(d, a, create, false, null, true);
0512:            }
0513:
0514:            /**
0515:             * Constructs an IndexWriter for the index in
0516:             * <code>path</code>, first creating it if it does not
0517:             * already exist.  Text will be analyzed with
0518:             * <code>a</code>.
0519:             *
0520:             * @param path the path to the index directory
0521:             * @param a the analyzer to use
0522:             * @throws CorruptIndexException if the index is corrupt
0523:             * @throws LockObtainFailedException if another writer
0524:             *  has this index open (<code>write.lock</code> could not
0525:             *  be obtained)
0526:             * @throws IOException if the directory cannot be
0527:             *  read/written to or if there is any other low-level
0528:             *  IO error
0529:             */
0530:            public IndexWriter(String path, Analyzer a)
0531:                    throws CorruptIndexException, LockObtainFailedException,
0532:                    IOException {
0533:                init(FSDirectory.getDirectory(path), a, true, null, true);
0534:            }
0535:
0536:            /**
0537:             * Constructs an IndexWriter for the index in
0538:             * <code>path</code>, first creating it if it does not
0539:             * already exist.  Text will be analyzed with
0540:             * <code>a</code>.
0541:             *
0542:             * @param path the path to the index directory
0543:             * @param a the analyzer to use
0544:             * @throws CorruptIndexException if the index is corrupt
0545:             * @throws LockObtainFailedException if another writer
0546:             *  has this index open (<code>write.lock</code> could not
0547:             *  be obtained)
0548:             * @throws IOException if the directory cannot be
0549:             *  read/written to or if there is any other low-level
0550:             *  IO error
0551:             */
0552:            public IndexWriter(File path, Analyzer a)
0553:                    throws CorruptIndexException, LockObtainFailedException,
0554:                    IOException {
0555:                init(FSDirectory.getDirectory(path), a, true, null, true);
0556:            }
0557:
0558:            /**
0559:             * Constructs an IndexWriter for the index in
0560:             * <code>d</code>, first creating it if it does not
0561:             * already exist.  Text will be analyzed with
0562:             * <code>a</code>.
0563:             *
0564:             * @param d the index directory
0565:             * @param a the analyzer to use
0566:             * @throws CorruptIndexException if the index is corrupt
0567:             * @throws LockObtainFailedException if another writer
0568:             *  has this index open (<code>write.lock</code> could not
0569:             *  be obtained)
0570:             * @throws IOException if the directory cannot be
0571:             *  read/written to or if there is any other low-level
0572:             *  IO error
0573:             */
0574:            public IndexWriter(Directory d, Analyzer a)
0575:                    throws CorruptIndexException, LockObtainFailedException,
0576:                    IOException {
0577:                init(d, a, false, null, true);
0578:            }
0579:
0580:            /**
0581:             * Constructs an IndexWriter for the index in
0582:             * <code>d</code>, first creating it if it does not
0583:             * already exist.  Text will be analyzed with
0584:             * <code>a</code>.
0585:             *
0586:             * @param d the index directory
0587:             * @param autoCommit see <a href="#autoCommit">above</a>
0588:             * @param a the analyzer to use
0589:             * @throws CorruptIndexException if the index is corrupt
0590:             * @throws LockObtainFailedException if another writer
0591:             *  has this index open (<code>write.lock</code> could not
0592:             *  be obtained)
0593:             * @throws IOException if the directory cannot be
0594:             *  read/written to or if there is any other low-level
0595:             *  IO error
0596:             */
0597:            public IndexWriter(Directory d, boolean autoCommit, Analyzer a)
0598:                    throws CorruptIndexException, LockObtainFailedException,
0599:                    IOException {
0600:                init(d, a, false, null, autoCommit);
0601:            }
0602:
0603:            /**
0604:             * Constructs an IndexWriter for the index in <code>d</code>.
0605:             * Text will be analyzed with <code>a</code>.  If <code>create</code>
0606:             * is true, then a new, empty index will be created in
0607:             * <code>d</code>, replacing the index already there, if any.
0608:             *
0609:             * @param d the index directory
0610:             * @param autoCommit see <a href="#autoCommit">above</a>
0611:             * @param a the analyzer to use
0612:             * @param create <code>true</code> to create the index or overwrite
0613:             *  the existing one; <code>false</code> to append to the existing
0614:             *  index
0615:             * @throws CorruptIndexException if the index is corrupt
0616:             * @throws LockObtainFailedException if another writer
0617:             *  has this index open (<code>write.lock</code> could not
0618:             *  be obtained)
0619:             * @throws IOException if the directory cannot be read/written to, or
0620:             *  if it does not exist and <code>create</code> is
0621:             *  <code>false</code> or if there is any other low-level
0622:             *  IO error
0623:             */
0624:            public IndexWriter(Directory d, boolean autoCommit, Analyzer a,
0625:                    boolean create) throws CorruptIndexException,
0626:                    LockObtainFailedException, IOException {
0627:                init(d, a, create, false, null, autoCommit);
0628:            }
0629:
0630:            /**
0631:             * Expert: constructs an IndexWriter with a custom {@link
0632:             * IndexDeletionPolicy}, for the index in <code>d</code>,
0633:             * first creating it if it does not already exist.  Text
0634:             * will be analyzed with <code>a</code>.
0635:             *
0636:             * @param d the index directory
0637:             * @param autoCommit see <a href="#autoCommit">above</a>
0638:             * @param a the analyzer to use
0639:             * @param deletionPolicy see <a href="#deletionPolicy">above</a>
0640:             * @throws CorruptIndexException if the index is corrupt
0641:             * @throws LockObtainFailedException if another writer
0642:             *  has this index open (<code>write.lock</code> could not
0643:             *  be obtained)
0644:             * @throws IOException if the directory cannot be
0645:             *  read/written to or if there is any other low-level
0646:             *  IO error
0647:             */
0648:            public IndexWriter(Directory d, boolean autoCommit, Analyzer a,
0649:                    IndexDeletionPolicy deletionPolicy)
0650:                    throws CorruptIndexException, LockObtainFailedException,
0651:                    IOException {
0652:                init(d, a, false, deletionPolicy, autoCommit);
0653:            }
0654:
0655:            /**
0656:             * Expert: constructs an IndexWriter with a custom {@link
0657:             * IndexDeletionPolicy}, for the index in <code>d</code>.
0658:             * Text will be analyzed with <code>a</code>.  If
0659:             * <code>create</code> is true, then a new, empty index
0660:             * will be created in <code>d</code>, replacing the index
0661:             * already there, if any.
0662:             *
0663:             * @param d the index directory
0664:             * @param autoCommit see <a href="#autoCommit">above</a>
0665:             * @param a the analyzer to use
0666:             * @param create <code>true</code> to create the index or overwrite
0667:             *  the existing one; <code>false</code> to append to the existing
0668:             *  index
0669:             * @param deletionPolicy see <a href="#deletionPolicy">above</a>
0670:             * @throws CorruptIndexException if the index is corrupt
0671:             * @throws LockObtainFailedException if another writer
0672:             *  has this index open (<code>write.lock</code> could not
0673:             *  be obtained)
0674:             * @throws IOException if the directory cannot be read/written to, or
0675:             *  if it does not exist and <code>create</code> is
0676:             *  <code>false</code> or if there is any other low-level
0677:             *  IO error
0678:             */
0679:            public IndexWriter(Directory d, boolean autoCommit, Analyzer a,
0680:                    boolean create, IndexDeletionPolicy deletionPolicy)
0681:                    throws CorruptIndexException, LockObtainFailedException,
0682:                    IOException {
0683:                init(d, a, create, false, deletionPolicy, autoCommit);
0684:            }
0685:
0686:            private void init(Directory d, Analyzer a, boolean closeDir,
0687:                    IndexDeletionPolicy deletionPolicy, boolean autoCommit)
0688:                    throws CorruptIndexException, LockObtainFailedException,
0689:                    IOException {
0690:                if (IndexReader.indexExists(d)) {
0691:                    init(d, a, false, closeDir, deletionPolicy, autoCommit);
0692:                } else {
0693:                    init(d, a, true, closeDir, deletionPolicy, autoCommit);
0694:                }
0695:            }
0696:
0697:            private void init(Directory d, Analyzer a, final boolean create,
0698:                    boolean closeDir, IndexDeletionPolicy deletionPolicy,
0699:                    boolean autoCommit) throws CorruptIndexException,
0700:                    LockObtainFailedException, IOException {
0701:                this .closeDir = closeDir;
0702:                directory = d;
0703:                analyzer = a;
0704:                this .infoStream = defaultInfoStream;
0705:                setMessageID();
0706:
0707:                if (create) {
0708:                    // Clear the write lock in case it's leftover:
0709:                    directory.clearLock(IndexWriter.WRITE_LOCK_NAME);
0710:                }
0711:
0712:                Lock writeLock = directory
0713:                        .makeLock(IndexWriter.WRITE_LOCK_NAME);
0714:                if (!writeLock.obtain(writeLockTimeout)) // obtain write lock
0715:                    throw new LockObtainFailedException(
0716:                            "Index locked for write: " + writeLock);
0717:                this .writeLock = writeLock; // save it
0718:
0719:                try {
0720:                    if (create) {
0721:                        // Try to read first.  This is to allow create
0722:                        // against an index that's currently open for
0723:                        // searching.  In this case we write the next
0724:                        // segments_N file with no segments:
0725:                        try {
0726:                            segmentInfos.read(directory);
0727:                            segmentInfos.clear();
0728:                        } catch (IOException e) {
0729:                            // Likely this means it's a fresh directory
0730:                        }
0731:                        segmentInfos.write(directory);
0732:                    } else {
0733:                        segmentInfos.read(directory);
0734:                    }
0735:
0736:                    this .autoCommit = autoCommit;
0737:                    if (!autoCommit) {
0738:                        rollbackSegmentInfos = (SegmentInfos) segmentInfos
0739:                                .clone();
0740:                    }
0741:
0742:                    docWriter = new DocumentsWriter(directory, this );
0743:                    docWriter.setInfoStream(infoStream);
0744:
0745:                    // Default deleter (for backwards compatibility) is
0746:                    // KeepOnlyLastCommitDeleter:
0747:                    deleter = new IndexFileDeleter(
0748:                            directory,
0749:                            deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy()
0750:                                    : deletionPolicy, segmentInfos, infoStream,
0751:                            docWriter);
0752:
0753:                    pushMaxBufferedDocs();
0754:
0755:                    if (infoStream != null) {
0756:                        message("init: create=" + create);
0757:                        messageState();
0758:                    }
0759:
0760:                } catch (IOException e) {
0761:                    this .writeLock.release();
0762:                    this .writeLock = null;
0763:                    throw e;
0764:                }
0765:            }
0766:
0767:            /**
0768:             * Expert: set the merge policy used by this writer.
0769:             */
0770:            public void setMergePolicy(MergePolicy mp) {
0771:                ensureOpen();
0772:                if (mp == null)
0773:                    throw new NullPointerException(
0774:                            "MergePolicy must be non-null");
0775:
0776:                if (mergePolicy != mp)
0777:                    mergePolicy.close();
0778:                mergePolicy = mp;
0779:                pushMaxBufferedDocs();
0780:                if (infoStream != null)
0781:                    message("setMergePolicy " + mp);
0782:            }
0783:
0784:            /**
0785:             * Expert: returns the current MergePolicy in use by this writer.
0786:             * @see #setMergePolicy
0787:             */
0788:            public MergePolicy getMergePolicy() {
0789:                ensureOpen();
0790:                return mergePolicy;
0791:            }
0792:
0793:            /**
0794:             * Expert: set the merge scheduler used by this writer.
0795:             */
0796:            public void setMergeScheduler(MergeScheduler mergeScheduler)
0797:                    throws CorruptIndexException, IOException {
0798:                ensureOpen();
0799:                if (mergeScheduler == null)
0800:                    throw new NullPointerException(
0801:                            "MergeScheduler must be non-null");
0802:
0803:                if (this .mergeScheduler != mergeScheduler) {
0804:                    finishMerges(true);
0805:                    this .mergeScheduler.close();
0806:                }
0807:                this .mergeScheduler = mergeScheduler;
0808:                if (infoStream != null)
0809:                    message("setMergeScheduler " + mergeScheduler);
0810:            }
0811:
0812:            /**
0813:             * Expert: returns the current MergePolicy in use by this
0814:             * writer.
0815:             * @see #setMergePolicy
0816:             */
0817:            public MergeScheduler getMergeScheduler() {
0818:                ensureOpen();
0819:                return mergeScheduler;
0820:            }
0821:
0822:            /** <p>Determines the largest segment (measured by
0823:             * document count) that may be merged with other segments.
0824:             * Small values (e.g., less than 10,000) are best for
0825:             * interactive indexing, as this limits the length of
0826:             * pauses while indexing to a few seconds.  Larger values
0827:             * are best for batched indexing and speedier
0828:             * searches.</p>
0829:             *
0830:             * <p>The default value is {@link Integer#MAX_VALUE}.</p>
0831:             *
0832:             * <p>Note that this method is a convenience method: it
0833:             * just calls mergePolicy.setMaxMergeDocs as long as
0834:             * mergePolicy is an instance of {@link LogMergePolicy}.
0835:             * Otherwise an IllegalArgumentException is thrown.</p>
0836:             *
0837:             * <p>The default merge policy ({@link
0838:             * LogByteSizeMergePolicy}) also allows you to set this
0839:             * limit by net size (in MB) of the segment, using {@link
0840:             * LogByteSizeMergePolicy#setMaxMergeMB}.</p>
0841:             */
0842:            public void setMaxMergeDocs(int maxMergeDocs) {
0843:                getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
0844:            }
0845:
0846:            /**
0847:             * <p>Returns the largest segment (measured by document
0848:             * count) that may be merged with other segments.</p>
0849:             *
0850:             * <p>Note that this method is a convenience method: it
0851:             * just calls mergePolicy.getMaxMergeDocs as long as
0852:             * mergePolicy is an instance of {@link LogMergePolicy}.
0853:             * Otherwise an IllegalArgumentException is thrown.</p>
0854:             *
0855:             * @see #setMaxMergeDocs
0856:             */
0857:            public int getMaxMergeDocs() {
0858:                return getLogMergePolicy().getMaxMergeDocs();
0859:            }
0860:
0861:            /**
0862:             * The maximum number of terms that will be indexed for a single field in a
0863:             * document.  This limits the amount of memory required for indexing, so that
0864:             * collections with very large files will not crash the indexing process by
0865:             * running out of memory.  This setting refers to the number of running terms,
0866:             * not to the number of different terms.<p/>
0867:             * <strong>Note:</strong> this silently truncates large documents, excluding from the
0868:             * index all terms that occur further in the document.  If you know your source
0869:             * documents are large, be sure to set this value high enough to accomodate
0870:             * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
0871:             * is your memory, but you should anticipate an OutOfMemoryError.<p/>
0872:             * By default, no more than 10,000 terms will be indexed for a field.
0873:             */
0874:            public void setMaxFieldLength(int maxFieldLength) {
0875:                ensureOpen();
0876:                this .maxFieldLength = maxFieldLength;
0877:                if (infoStream != null)
0878:                    message("setMaxFieldLength " + maxFieldLength);
0879:            }
0880:
0881:            /**
0882:             * Returns the maximum number of terms that will be
0883:             * indexed for a single field in a document.
0884:             * @see #setMaxFieldLength
0885:             */
0886:            public int getMaxFieldLength() {
0887:                ensureOpen();
0888:                return maxFieldLength;
0889:            }
0890:
0891:            /** Determines the minimal number of documents required
0892:             * before the buffered in-memory documents are flushed as
0893:             * a new Segment.  Large values generally gives faster
0894:             * indexing.
0895:             *
0896:             * <p>When this is set, the writer will flush every
0897:             * maxBufferedDocs added documents.  Pass in {@link
0898:             * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
0899:             * to number of buffered documents.  Note that if flushing
0900:             * by RAM usage is also enabled, then the flush will be
0901:             * triggered by whichever comes first.</p>
0902:             *
0903:             * <p>Disabled by default (writer flushes by RAM usage).</p>
0904:             *
0905:             * @throws IllegalArgumentException if maxBufferedDocs is
0906:             * enabled but smaller than 2, or it disables maxBufferedDocs
0907:             * when ramBufferSize is already disabled
0908:             * @see #setRAMBufferSizeMB
0909:             */
0910:            public void setMaxBufferedDocs(int maxBufferedDocs) {
0911:                ensureOpen();
0912:                if (maxBufferedDocs != DISABLE_AUTO_FLUSH
0913:                        && maxBufferedDocs < 2)
0914:                    throw new IllegalArgumentException(
0915:                            "maxBufferedDocs must at least be 2 when enabled");
0916:                if (maxBufferedDocs == DISABLE_AUTO_FLUSH
0917:                        && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH)
0918:                    throw new IllegalArgumentException(
0919:                            "at least one of ramBufferSize and maxBufferedDocs must be enabled");
0920:                docWriter.setMaxBufferedDocs(maxBufferedDocs);
0921:                pushMaxBufferedDocs();
0922:                if (infoStream != null)
0923:                    message("setMaxBufferedDocs " + maxBufferedDocs);
0924:            }
0925:
0926:            /**
0927:             * If we are flushing by doc count (not by RAM usage), and
0928:             * using LogDocMergePolicy then push maxBufferedDocs down
0929:             * as its minMergeDocs, to keep backwards compatibility.
0930:             */
0931:            private void pushMaxBufferedDocs() {
0932:                if (docWriter.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) {
0933:                    final MergePolicy mp = mergePolicy;
0934:                    if (mp instanceof  LogDocMergePolicy) {
0935:                        LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
0936:                        final int maxBufferedDocs = docWriter
0937:                                .getMaxBufferedDocs();
0938:                        if (lmp.getMinMergeDocs() != maxBufferedDocs) {
0939:                            if (infoStream != null)
0940:                                message("now push maxBufferedDocs "
0941:                                        + maxBufferedDocs
0942:                                        + " to LogDocMergePolicy");
0943:                            lmp.setMinMergeDocs(maxBufferedDocs);
0944:                        }
0945:                    }
0946:                }
0947:            }
0948:
0949:            /**
0950:             * Returns the number of buffered added documents that will
0951:             * trigger a flush if enabled.
0952:             * @see #setMaxBufferedDocs
0953:             */
0954:            public int getMaxBufferedDocs() {
0955:                ensureOpen();
0956:                return docWriter.getMaxBufferedDocs();
0957:            }
0958:
0959:            /** Determines the amount of RAM that may be used for
0960:             * buffering added documents before they are flushed as a
0961:             * new Segment.  Generally for faster indexing performance
0962:             * it's best to flush by RAM usage instead of document
0963:             * count and use as large a RAM buffer as you can.
0964:             *
0965:             * <p>When this is set, the writer will flush whenever
0966:             * buffered documents use this much RAM.  Pass in {@link
0967:             * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
0968:             * to RAM usage.  Note that if flushing by document count
0969:             * is also enabled, then the flush will be triggered by
0970:             * whichever comes first.</p>
0971:             *
0972:             * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
0973:             * 
0974:             * @throws IllegalArgumentException if ramBufferSize is
0975:             * enabled but non-positive, or it disables ramBufferSize
0976:             * when maxBufferedDocs is already disabled
0977:             */
0978:            public void setRAMBufferSizeMB(double mb) {
0979:                if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0)
0980:                    throw new IllegalArgumentException(
0981:                            "ramBufferSize should be > 0.0 MB when enabled");
0982:                if (mb == DISABLE_AUTO_FLUSH
0983:                        && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH)
0984:                    throw new IllegalArgumentException(
0985:                            "at least one of ramBufferSize and maxBufferedDocs must be enabled");
0986:                docWriter.setRAMBufferSizeMB(mb);
0987:                if (infoStream != null)
0988:                    message("setRAMBufferSizeMB " + mb);
0989:            }
0990:
0991:            /**
0992:             * Returns the value set by {@link #setRAMBufferSizeMB} if enabled.
0993:             */
0994:            public double getRAMBufferSizeMB() {
0995:                return docWriter.getRAMBufferSizeMB();
0996:            }
0997:
0998:            /**
0999:             * <p>Determines the minimal number of delete terms required before the buffered
1000:             * in-memory delete terms are applied and flushed. If there are documents
1001:             * buffered in memory at the time, they are merged and a new segment is
1002:             * created.</p>
1003:
1004:             * <p>Disabled by default (writer flushes by RAM usage).</p>
1005:             * 
1006:             * @throws IllegalArgumentException if maxBufferedDeleteTerms
1007:             * is enabled but smaller than 1
1008:             * @see #setRAMBufferSizeMB
1009:             */
1010:            public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
1011:                ensureOpen();
1012:                if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
1013:                        && maxBufferedDeleteTerms < 1)
1014:                    throw new IllegalArgumentException(
1015:                            "maxBufferedDeleteTerms must at least be 1 when enabled");
1016:                docWriter.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
1017:                if (infoStream != null)
1018:                    message("setMaxBufferedDeleteTerms "
1019:                            + maxBufferedDeleteTerms);
1020:            }
1021:
1022:            /**
1023:             * Returns the number of buffered deleted terms that will
1024:             * trigger a flush if enabled.
1025:             * @see #setMaxBufferedDeleteTerms
1026:             */
1027:            public int getMaxBufferedDeleteTerms() {
1028:                ensureOpen();
1029:                return docWriter.getMaxBufferedDeleteTerms();
1030:            }
1031:
1032:            /** Determines how often segment indices are merged by addDocument().  With
1033:             * smaller values, less RAM is used while indexing, and searches on
1034:             * unoptimized indices are faster, but indexing speed is slower.  With larger
1035:             * values, more RAM is used during indexing, and while searches on unoptimized
1036:             * indices are slower, indexing is faster.  Thus larger values (> 10) are best
1037:             * for batch index creation, and smaller values (< 10) for indices that are
1038:             * interactively maintained.
1039:             *
1040:             * <p>Note that this method is a convenience method: it
1041:             * just calls mergePolicy.setMergeFactor as long as
1042:             * mergePolicy is an instance of {@link LogMergePolicy}.
1043:             * Otherwise an IllegalArgumentException is thrown.</p>
1044:             *
1045:             * <p>This must never be less than 2.  The default value is 10.
1046:             */
1047:            public void setMergeFactor(int mergeFactor) {
1048:                getLogMergePolicy().setMergeFactor(mergeFactor);
1049:            }
1050:
1051:            /**
1052:             * <p>Returns the number of segments that are merged at
1053:             * once and also controls the total number of segments
1054:             * allowed to accumulate in the index.</p>
1055:             *
1056:             * <p>Note that this method is a convenience method: it
1057:             * just calls mergePolicy.getMergeFactor as long as
1058:             * mergePolicy is an instance of {@link LogMergePolicy}.
1059:             * Otherwise an IllegalArgumentException is thrown.</p>
1060:             *
1061:             * @see #setMergeFactor
1062:             */
1063:            public int getMergeFactor() {
1064:                return getLogMergePolicy().getMergeFactor();
1065:            }
1066:
1067:            /** If non-null, this will be the default infoStream used
1068:             * by a newly instantiated IndexWriter.
1069:             * @see #setInfoStream
1070:             */
1071:            public static void setDefaultInfoStream(PrintStream infoStream) {
1072:                IndexWriter.defaultInfoStream = infoStream;
1073:            }
1074:
1075:            /**
1076:             * Returns the current default infoStream for newly
1077:             * instantiated IndexWriters.
1078:             * @see #setDefaultInfoStream
1079:             */
1080:            public static PrintStream getDefaultInfoStream() {
1081:                return IndexWriter.defaultInfoStream;
1082:            }
1083:
1084:            /** If non-null, information about merges, deletes and a
1085:             * message when maxFieldLength is reached will be printed
1086:             * to this.
1087:             */
1088:            public void setInfoStream(PrintStream infoStream) {
1089:                ensureOpen();
1090:                this .infoStream = infoStream;
1091:                setMessageID();
1092:                docWriter.setInfoStream(infoStream);
1093:                deleter.setInfoStream(infoStream);
1094:                if (infoStream != null)
1095:                    messageState();
1096:            }
1097:
1098:            private void messageState() {
1099:                message("setInfoStream: dir=" + directory + " autoCommit="
1100:                        + autoCommit + " mergePolicy=" + mergePolicy
1101:                        + " mergeScheduler=" + mergeScheduler
1102:                        + " ramBufferSizeMB=" + docWriter.getRAMBufferSizeMB()
1103:                        + " maxBuffereDocs=" + docWriter.getMaxBufferedDocs()
1104:                        + " maxBuffereDeleteTerms="
1105:                        + docWriter.getMaxBufferedDeleteTerms()
1106:                        + " maxFieldLength=" + maxFieldLength + " index="
1107:                        + segString());
1108:            }
1109:
1110:            /**
1111:             * Returns the current infoStream in use by this writer.
1112:             * @see #setInfoStream
1113:             */
1114:            public PrintStream getInfoStream() {
1115:                ensureOpen();
1116:                return infoStream;
1117:            }
1118:
1119:            /**
1120:             * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter.  @see
1121:             * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.
1122:             */
1123:            public void setWriteLockTimeout(long writeLockTimeout) {
1124:                ensureOpen();
1125:                this .writeLockTimeout = writeLockTimeout;
1126:            }
1127:
1128:            /**
1129:             * Returns allowed timeout when acquiring the write lock.
1130:             * @see #setWriteLockTimeout
1131:             */
1132:            public long getWriteLockTimeout() {
1133:                ensureOpen();
1134:                return writeLockTimeout;
1135:            }
1136:
1137:            /**
1138:             * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
1139:             * milliseconds).
1140:             */
1141:            public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
1142:                IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout;
1143:            }
1144:
1145:            /**
1146:             * Returns default write lock timeout for newly
1147:             * instantiated IndexWriters.
1148:             * @see #setDefaultWriteLockTimeout
1149:             */
1150:            public static long getDefaultWriteLockTimeout() {
1151:                return IndexWriter.WRITE_LOCK_TIMEOUT;
1152:            }
1153:
1154:            /**
1155:             * Flushes all changes to an index and closes all
1156:             * associated files.
1157:             *
1158:             * <p> If an Exception is hit during close, eg due to disk
1159:             * full or some other reason, then both the on-disk index
1160:             * and the internal state of the IndexWriter instance will
1161:             * be consistent.  However, the close will not be complete
1162:             * even though part of it (flushing buffered documents)
1163:             * may have succeeded, so the write lock will still be
1164:             * held.</p>
1165:             * 
1166:             * <p> If you can correct the underlying cause (eg free up
1167:             * some disk space) then you can call close() again.
1168:             * Failing that, if you want to force the write lock to be
1169:             * released (dangerous, because you may then lose buffered
1170:             * docs in the IndexWriter instance) then you can do
1171:             * something like this:</p>
1172:             *
1173:             * <pre>
1174:             * try {
1175:             *   writer.close();
1176:             * } finally {
1177:             *   if (IndexReader.isLocked(directory)) {
1178:             *     IndexReader.unlock(directory);
1179:             *   }
1180:             * }
1181:             * </pre>
1182:             *
1183:             * after which, you must be certain not to use the writer
1184:             * instance anymore.</p>
1185:             * @throws CorruptIndexException if the index is corrupt
1186:             * @throws IOException if there is a low-level IO error
1187:             */
1188:            public void close() throws CorruptIndexException, IOException {
1189:                close(true);
1190:            }
1191:
1192:            /**
1193:             * Closes the index with or without waiting for currently
1194:             * running merges to finish.  This is only meaningful when
1195:             * using a MergeScheduler that runs merges in background
1196:             * threads.
1197:             * @param waitForMerges if true, this call will block
1198:             * until all merges complete; else, it will ask all
1199:             * running merges to abort, wait until those merges have
1200:             * finished (which should be at most a few seconds), and
1201:             * then return.
1202:             */
1203:            public void close(boolean waitForMerges)
1204:                    throws CorruptIndexException, IOException {
1205:                boolean doClose;
1206:                synchronized (this ) {
1207:                    // Ensure that only one thread actually gets to do the closing:
1208:                    if (!closing) {
1209:                        doClose = true;
1210:                        closing = true;
1211:                    } else
1212:                        doClose = false;
1213:                }
1214:                if (doClose)
1215:                    closeInternal(waitForMerges);
1216:                else
1217:                    // Another thread beat us to it (is actually doing the
1218:                    // close), so we will block until that other thread
1219:                    // has finished closing
1220:                    waitForClose();
1221:            }
1222:
1223:            synchronized private void waitForClose() {
1224:                while (!closed && closing) {
1225:                    try {
1226:                        wait();
1227:                    } catch (InterruptedException ie) {
1228:                    }
1229:                }
1230:            }
1231:
1232:            private void closeInternal(boolean waitForMerges)
1233:                    throws CorruptIndexException, IOException {
1234:                try {
1235:                    if (infoStream != null)
1236:                        message("now flush at close");
1237:
1238:                    docWriter.close();
1239:
1240:                    // Only allow a new merge to be triggered if we are
1241:                    // going to wait for merges:
1242:                    flush(waitForMerges, true);
1243:
1244:                    mergePolicy.close();
1245:
1246:                    finishMerges(waitForMerges);
1247:
1248:                    mergeScheduler.close();
1249:
1250:                    synchronized (this ) {
1251:                        if (commitPending) {
1252:                            boolean success = false;
1253:                            try {
1254:                                segmentInfos.write(directory); // now commit changes
1255:                                success = true;
1256:                            } finally {
1257:                                if (!success) {
1258:                                    if (infoStream != null)
1259:                                        message("hit exception committing segments file during close");
1260:                                    deletePartialSegmentsFile();
1261:                                }
1262:                            }
1263:                            if (infoStream != null)
1264:                                message("close: wrote segments file \""
1265:                                        + segmentInfos
1266:                                                .getCurrentSegmentFileName()
1267:                                        + "\"");
1268:
1269:                            deleter.checkpoint(segmentInfos, true);
1270:
1271:                            commitPending = false;
1272:                            rollbackSegmentInfos = null;
1273:                        }
1274:
1275:                        if (infoStream != null)
1276:                            message("at close: " + segString());
1277:
1278:                        docWriter = null;
1279:
1280:                        deleter.close();
1281:                    }
1282:
1283:                    if (closeDir)
1284:                        directory.close();
1285:
1286:                    if (writeLock != null) {
1287:                        writeLock.release(); // release write lock
1288:                        writeLock = null;
1289:                    }
1290:                    closed = true;
1291:
1292:                } finally {
1293:                    synchronized (this ) {
1294:                        if (!closed)
1295:                            closing = false;
1296:                        notifyAll();
1297:                    }
1298:                }
1299:            }
1300:
1301:            /** Tells the docWriter to close its currently open shared
1302:             *  doc stores (stored fields & vectors files).
1303:             *  Return value specifices whether new doc store files are compound or not.
1304:             */
1305:            private synchronized boolean flushDocStores() throws IOException {
1306:
1307:                List files = docWriter.files();
1308:
1309:                boolean useCompoundDocStore = false;
1310:
1311:                if (files.size() > 0) {
1312:                    String docStoreSegment;
1313:
1314:                    boolean success = false;
1315:                    try {
1316:                        docStoreSegment = docWriter.closeDocStore();
1317:                        success = true;
1318:                    } finally {
1319:                        if (!success) {
1320:                            if (infoStream != null)
1321:                                message("hit exception closing doc store segment");
1322:                            docWriter.abort(null);
1323:                        }
1324:                    }
1325:
1326:                    useCompoundDocStore = mergePolicy
1327:                            .useCompoundDocStore(segmentInfos);
1328:
1329:                    if (useCompoundDocStore && docStoreSegment != null) {
1330:                        // Now build compound doc store file
1331:
1332:                        success = false;
1333:
1334:                        final int numSegments = segmentInfos.size();
1335:                        final String compoundFileName = docStoreSegment + "."
1336:                                + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
1337:
1338:                        try {
1339:                            CompoundFileWriter cfsWriter = new CompoundFileWriter(
1340:                                    directory, compoundFileName);
1341:                            final int size = files.size();
1342:                            for (int i = 0; i < size; i++)
1343:                                cfsWriter.addFile((String) files.get(i));
1344:
1345:                            // Perform the merge
1346:                            cfsWriter.close();
1347:
1348:                            for (int i = 0; i < numSegments; i++) {
1349:                                SegmentInfo si = segmentInfos.info(i);
1350:                                if (si.getDocStoreOffset() != -1
1351:                                        && si.getDocStoreSegment().equals(
1352:                                                docStoreSegment))
1353:                                    si.setDocStoreIsCompoundFile(true);
1354:                            }
1355:                            checkpoint();
1356:                            success = true;
1357:                        } finally {
1358:                            if (!success) {
1359:
1360:                                if (infoStream != null)
1361:                                    message("hit exception building compound file doc store for segment "
1362:                                            + docStoreSegment);
1363:
1364:                                // Rollback to no compound file
1365:                                for (int i = 0; i < numSegments; i++) {
1366:                                    SegmentInfo si = segmentInfos.info(i);
1367:                                    if (si.getDocStoreOffset() != -1
1368:                                            && si.getDocStoreSegment().equals(
1369:                                                    docStoreSegment))
1370:                                        si.setDocStoreIsCompoundFile(false);
1371:                                }
1372:                                deleter.deleteFile(compoundFileName);
1373:                                deletePartialSegmentsFile();
1374:                            }
1375:                        }
1376:
1377:                        deleter.checkpoint(segmentInfos, false);
1378:                    }
1379:                }
1380:
1381:                return useCompoundDocStore;
1382:            }
1383:
1384:            /** Release the write lock, if needed. */
1385:            protected void finalize() throws Throwable {
1386:                try {
1387:                    if (writeLock != null) {
1388:                        writeLock.release(); // release write lock
1389:                        writeLock = null;
1390:                    }
1391:                } finally {
1392:                    super .finalize();
1393:                }
1394:            }
1395:
1396:            /** Returns the Directory used by this index. */
1397:            public Directory getDirectory() {
1398:                ensureOpen();
1399:                return directory;
1400:            }
1401:
1402:            /** Returns the analyzer used by this index. */
1403:            public Analyzer getAnalyzer() {
1404:                ensureOpen();
1405:                return analyzer;
1406:            }
1407:
1408:            /** Returns the number of documents currently in this index. */
1409:            public synchronized int docCount() {
1410:                ensureOpen();
1411:                int count = docWriter.getNumDocsInRAM();
1412:                for (int i = 0; i < segmentInfos.size(); i++) {
1413:                    SegmentInfo si = segmentInfos.info(i);
1414:                    count += si.docCount;
1415:                }
1416:                return count;
1417:            }
1418:
1419:            /**
1420:             * The maximum number of terms that will be indexed for a single field in a
1421:             * document.  This limits the amount of memory required for indexing, so that
1422:             * collections with very large files will not crash the indexing process by
1423:             * running out of memory.<p/>
1424:             * Note that this effectively truncates large documents, excluding from the
1425:             * index terms that occur further in the document.  If you know your source
1426:             * documents are large, be sure to set this value high enough to accomodate
1427:             * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
1428:             * is your memory, but you should anticipate an OutOfMemoryError.<p/>
1429:             * By default, no more than 10,000 terms will be indexed for a field.
1430:             *
1431:             */
1432:            private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
1433:
1434:            /**
1435:             * Adds a document to this index.  If the document contains more than
1436:             * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
1437:             * discarded.
1438:             *
1439:             * <p> Note that if an Exception is hit (for example disk full)
1440:             * then the index will be consistent, but this document
1441:             * may not have been added.  Furthermore, it's possible
1442:             * the index will have one segment in non-compound format
1443:             * even when using compound files (when a merge has
1444:             * partially succeeded).</p>
1445:             *
1446:             * <p> This method periodically flushes pending documents
1447:             * to the Directory (every {@link #setMaxBufferedDocs}),
1448:             * and also periodically merges segments in the index
1449:             * (every {@link #setMergeFactor} flushes).  When this
1450:             * occurs, the method will take more time to run (possibly
1451:             * a long time if the index is large), and will require
1452:             * free temporary space in the Directory to do the
1453:             * merging.</p>
1454:             *
1455:             * <p>The amount of free space required when a merge is triggered is
1456:             * up to 1X the size of all segments being merged, when no
1457:             * readers/searchers are open against the index, and up to 2X the
1458:             * size of all segments being merged when readers/searchers are open
1459:             * against the index (see {@link #optimize()} for details). The
1460:             * sequence of primitive merge operations performed is governed by
1461:             * the merge policy.
1462:             *
1463:             * <p>Note that each term in the document can be no longer
1464:             * than 16383 characters, otherwise an
1465:             * IllegalArgumentException will be thrown.</p>
1466:             *
1467:             * @throws CorruptIndexException if the index is corrupt
1468:             * @throws IOException if there is a low-level IO error
1469:             */
1470:            public void addDocument(Document doc) throws CorruptIndexException,
1471:                    IOException {
1472:                addDocument(doc, analyzer);
1473:            }
1474:
1475:            /**
1476:             * Adds a document to this index, using the provided analyzer instead of the
1477:             * value of {@link #getAnalyzer()}.  If the document contains more than
1478:             * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
1479:             * discarded.
1480:             *
1481:             * <p>See {@link #addDocument(Document)} for details on
1482:             * index and IndexWriter state after an Exception, and
1483:             * flushing/merging temporary free space requirements.</p>
1484:             *
1485:             * @throws CorruptIndexException if the index is corrupt
1486:             * @throws IOException if there is a low-level IO error
1487:             */
1488:            public void addDocument(Document doc, Analyzer analyzer)
1489:                    throws CorruptIndexException, IOException {
1490:                ensureOpen();
1491:                boolean doFlush = false;
1492:                boolean success = false;
1493:                try {
1494:                    doFlush = docWriter.addDocument(doc, analyzer);
1495:                    success = true;
1496:                } finally {
1497:                    if (!success) {
1498:
1499:                        if (infoStream != null)
1500:                            message("hit exception adding document");
1501:
1502:                        synchronized (this ) {
1503:                            // If docWriter has some aborted files that were
1504:                            // never incref'd, then we clean them up here
1505:                            if (docWriter != null) {
1506:                                final List files = docWriter.abortedFiles();
1507:                                if (files != null)
1508:                                    deleter.deleteNewFiles(files);
1509:                            }
1510:                        }
1511:                    }
1512:                }
1513:                if (doFlush)
1514:                    flush(true, false);
1515:            }
1516:
1517:            /**
1518:             * Deletes the document(s) containing <code>term</code>.
1519:             * @param term the term to identify the documents to be deleted
1520:             * @throws CorruptIndexException if the index is corrupt
1521:             * @throws IOException if there is a low-level IO error
1522:             */
1523:            public void deleteDocuments(Term term)
1524:                    throws CorruptIndexException, IOException {
1525:                ensureOpen();
1526:                boolean doFlush = docWriter.bufferDeleteTerm(term);
1527:                if (doFlush)
1528:                    flush(true, false);
1529:            }
1530:
1531:            /**
1532:             * Deletes the document(s) containing any of the
1533:             * terms. All deletes are flushed at the same time.
1534:             * @param terms array of terms to identify the documents
1535:             * to be deleted
1536:             * @throws CorruptIndexException if the index is corrupt
1537:             * @throws IOException if there is a low-level IO error
1538:             */
1539:            public void deleteDocuments(Term[] terms)
1540:                    throws CorruptIndexException, IOException {
1541:                ensureOpen();
1542:                boolean doFlush = docWriter.bufferDeleteTerms(terms);
1543:                if (doFlush)
1544:                    flush(true, false);
1545:            }
1546:
1547:            /**
1548:             * Updates a document by first deleting the document(s)
1549:             * containing <code>term</code> and then adding the new
1550:             * document.  The delete and then add are atomic as seen
1551:             * by a reader on the same index (flush may happen only after
1552:             * the add).
1553:             * @param term the term to identify the document(s) to be
1554:             * deleted
1555:             * @param doc the document to be added
1556:             * @throws CorruptIndexException if the index is corrupt
1557:             * @throws IOException if there is a low-level IO error
1558:             */
1559:            public void updateDocument(Term term, Document doc)
1560:                    throws CorruptIndexException, IOException {
1561:                ensureOpen();
1562:                updateDocument(term, doc, getAnalyzer());
1563:            }
1564:
1565:            /**
1566:             * Updates a document by first deleting the document(s)
1567:             * containing <code>term</code> and then adding the new
1568:             * document.  The delete and then add are atomic as seen
1569:             * by a reader on the same index (flush may happen only after
1570:             * the add).
1571:             * @param term the term to identify the document(s) to be
1572:             * deleted
1573:             * @param doc the document to be added
1574:             * @param analyzer the analyzer to use when analyzing the document
1575:             * @throws CorruptIndexException if the index is corrupt
1576:             * @throws IOException if there is a low-level IO error
1577:             */
1578:            public void updateDocument(Term term, Document doc,
1579:                    Analyzer analyzer) throws CorruptIndexException,
1580:                    IOException {
1581:                ensureOpen();
1582:                boolean doFlush = false;
1583:                boolean success = false;
1584:                try {
1585:                    doFlush = docWriter.updateDocument(term, doc, analyzer);
1586:                    success = true;
1587:                } finally {
1588:                    if (!success) {
1589:
1590:                        if (infoStream != null)
1591:                            message("hit exception updating document");
1592:
1593:                        synchronized (this ) {
1594:                            // If docWriter has some aborted files that were
1595:                            // never incref'd, then we clean them up here
1596:                            final List files = docWriter.abortedFiles();
1597:                            if (files != null)
1598:                                deleter.deleteNewFiles(files);
1599:                        }
1600:                    }
1601:                }
1602:                if (doFlush)
1603:                    flush(true, false);
1604:            }
1605:
1606:            // for test purpose
1607:            final synchronized int getSegmentCount() {
1608:                return segmentInfos.size();
1609:            }
1610:
1611:            // for test purpose
1612:            final synchronized int getNumBufferedDocuments() {
1613:                return docWriter.getNumDocsInRAM();
1614:            }
1615:
1616:            // for test purpose
1617:            final synchronized int getDocCount(int i) {
1618:                if (i >= 0 && i < segmentInfos.size()) {
1619:                    return segmentInfos.info(i).docCount;
1620:                } else {
1621:                    return -1;
1622:                }
1623:            }
1624:
1625:            final String newSegmentName() {
1626:                // Cannot synchronize on IndexWriter because that causes
1627:                // deadlock
1628:                synchronized (segmentInfos) {
1629:                    // Important to set commitPending so that the
1630:                    // segmentInfos is written on close.  Otherwise we
1631:                    // could close, re-open and re-return the same segment
1632:                    // name that was previously returned which can cause
1633:                    // problems at least with ConcurrentMergeScheduler.
1634:                    commitPending = true;
1635:                    return "_"
1636:                            + Integer.toString(segmentInfos.counter++,
1637:                                    Character.MAX_RADIX);
1638:                }
1639:            }
1640:
1641:            /** If non-null, information about merges will be printed to this.
1642:             */
1643:            private PrintStream infoStream = null;
1644:            private static PrintStream defaultInfoStream = null;
1645:
1646:            /**
1647:             * Requests an "optimize" operation on an index, priming the index
1648:             * for the fastest available search. Traditionally this has meant
1649:             * merging all segments into a single segment as is done in the
1650:             * default merge policy, but individaul merge policies may implement
1651:             * optimize in different ways.
1652:             *
1653:             * @see LogMergePolicy#findMergesForOptimize
1654:             *
1655:             * <p>It is recommended that this method be called upon completion of indexing.  In
1656:             * environments with frequent updates, optimize is best done during low volume times, if at all. 
1657:             * 
1658:             * </p>
1659:             * <p>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. </p>
1660:             *
1661:             * <p>Note that this can require substantial temporary free
1662:             * space in the Directory (see <a target="_top"
1663:             * href="http://issues.apache.org/jira/browse/LUCENE-764">LUCENE-764</a>
1664:             * for details):</p>
1665:             *
1666:             * <ul>
1667:             * <li>
1668:             * 
1669:             * <p>If no readers/searchers are open against the index,
1670:             * then free space required is up to 1X the total size of
1671:             * the starting index.  For example, if the starting
1672:             * index is 10 GB, then you must have up to 10 GB of free
1673:             * space before calling optimize.</p>
1674:             *
1675:             * <li>
1676:             * 
1677:             * <p>If readers/searchers are using the index, then free
1678:             * space required is up to 2X the size of the starting
1679:             * index.  This is because in addition to the 1X used by
1680:             * optimize, the original 1X of the starting index is
1681:             * still consuming space in the Directory as the readers
1682:             * are holding the segments files open.  Even on Unix,
1683:             * where it will appear as if the files are gone ("ls"
1684:             * won't list them), they still consume storage due to
1685:             * "delete on last close" semantics.</p>
1686:             * 
1687:             * <p>Furthermore, if some but not all readers re-open
1688:             * while the optimize is underway, this will cause > 2X
1689:             * temporary space to be consumed as those new readers
1690:             * will then hold open the partially optimized segments at
1691:             * that time.  It is best not to re-open readers while
1692:             * optimize is running.</p>
1693:             *
1694:             * </ul>
1695:             *
1696:             * <p>The actual temporary usage could be much less than
1697:             * these figures (it depends on many factors).</p>
1698:             *
1699:             * <p>In general, once the optimize completes, the total size of the
1700:             * index will be less than the size of the starting index.
1701:             * It could be quite a bit smaller (if there were many
1702:             * pending deletes) or just slightly smaller.</p>
1703:             *
1704:             * <p>If an Exception is hit during optimize(), for example
1705:             * due to disk full, the index will not be corrupt and no
1706:             * documents will have been lost.  However, it may have
1707:             * been partially optimized (some segments were merged but
1708:             * not all), and it's possible that one of the segments in
1709:             * the index will be in non-compound format even when
1710:             * using compound file format.  This will occur when the
1711:             * Exception is hit during conversion of the segment into
1712:             * compound format.</p>
1713:             *
1714:             * <p>This call will optimize those segments present in
1715:             * the index when the call started.  If other threads are
1716:             * still adding documents and flushing segments, those
1717:             * newly created segments will not be optimized unless you
1718:             * call optimize again.</p>
1719:             *
1720:             * @throws CorruptIndexException if the index is corrupt
1721:             * @throws IOException if there is a low-level IO error
1722:             */
1723:            public void optimize() throws CorruptIndexException, IOException {
1724:                optimize(true);
1725:            }
1726:
1727:            /**
1728:             * Optimize the index down to <= maxNumSegments.  If
1729:             * maxNumSegments==1 then this is the same as {@link
1730:             * #optimize()}.
1731:             * @param maxNumSegments maximum number of segments left
1732:             * in the index after optimization finishes
1733:             */
1734:            public void optimize(int maxNumSegments)
1735:                    throws CorruptIndexException, IOException {
1736:                optimize(maxNumSegments, true);
1737:            }
1738:
1739:            /** Just like {@link #optimize()}, except you can specify
1740:             *  whether the call should block until the optimize
1741:             *  completes.  This is only meaningful with a
1742:             *  {@link MergeScheduler} that is able to run merges in
1743:             *  background threads. */
1744:            public void optimize(boolean doWait) throws CorruptIndexException,
1745:                    IOException {
1746:                optimize(1, true);
1747:            }
1748:
1749:            /** Just like {@link #optimize(int)}, except you can
1750:             *  specify whether the call should block until the
1751:             *  optimize completes.  This is only meaningful with a
1752:             *  {@link MergeScheduler} that is able to run merges in
1753:             *  background threads. */
1754:            public void optimize(int maxNumSegments, boolean doWait)
1755:                    throws CorruptIndexException, IOException {
1756:                ensureOpen();
1757:
1758:                if (maxNumSegments < 1)
1759:                    throw new IllegalArgumentException(
1760:                            "maxNumSegments must be >= 1; got "
1761:                                    + maxNumSegments);
1762:
1763:                if (infoStream != null)
1764:                    message("optimize: index now " + segString());
1765:
1766:                flush();
1767:
1768:                synchronized (this ) {
1769:                    resetMergeExceptions();
1770:                    segmentsToOptimize = new HashSet();
1771:                    final int numSegments = segmentInfos.size();
1772:                    for (int i = 0; i < numSegments; i++)
1773:                        segmentsToOptimize.add(segmentInfos.info(i));
1774:
1775:                    // Now mark all pending & running merges as optimize
1776:                    // merge:
1777:                    Iterator it = pendingMerges.iterator();
1778:                    while (it.hasNext()) {
1779:                        final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it
1780:                                .next();
1781:                        merge.optimize = true;
1782:                        merge.maxNumSegmentsOptimize = maxNumSegments;
1783:                    }
1784:
1785:                    it = runningMerges.iterator();
1786:                    while (it.hasNext()) {
1787:                        final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it
1788:                                .next();
1789:                        merge.optimize = true;
1790:                        merge.maxNumSegmentsOptimize = maxNumSegments;
1791:                    }
1792:                }
1793:
1794:                maybeMerge(maxNumSegments, true);
1795:
1796:                if (doWait) {
1797:                    synchronized (this ) {
1798:                        while (optimizeMergesPending()) {
1799:                            try {
1800:                                wait();
1801:                            } catch (InterruptedException ie) {
1802:                            }
1803:
1804:                            if (mergeExceptions.size() > 0) {
1805:                                // Forward any exceptions in background merge
1806:                                // threads to the current thread:
1807:                                final int size = mergeExceptions.size();
1808:                                for (int i = 0; i < size; i++) {
1809:                                    final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) mergeExceptions
1810:                                            .get(0);
1811:                                    if (merge.optimize) {
1812:                                        IOException err = new IOException(
1813:                                                "background merge hit exception: "
1814:                                                        + merge
1815:                                                                .segString(directory));
1816:                                        err.initCause(merge.getException());
1817:                                        throw err;
1818:                                    }
1819:                                }
1820:                            }
1821:                        }
1822:                    }
1823:                }
1824:
1825:                // NOTE: in the ConcurrentMergeScheduler case, when
1826:                // doWait is false, we can return immediately while
1827:                // background threads accomplish the optimization
1828:            }
1829:
1830:            /** Returns true if any merges in pendingMerges or
1831:             *  runningMerges are optimization merges. */
1832:            private synchronized boolean optimizeMergesPending() {
1833:                Iterator it = pendingMerges.iterator();
1834:                while (it.hasNext())
1835:                    if (((MergePolicy.OneMerge) it.next()).optimize)
1836:                        return true;
1837:
1838:                it = runningMerges.iterator();
1839:                while (it.hasNext())
1840:                    if (((MergePolicy.OneMerge) it.next()).optimize)
1841:                        return true;
1842:
1843:                return false;
1844:            }
1845:
1846:            /**
1847:             * Expert: asks the mergePolicy whether any merges are
1848:             * necessary now and if so, runs the requested merges and
1849:             * then iterate (test again if merges are needed) until no
1850:             * more merges are returned by the mergePolicy.
1851:             *
1852:             * Explicit calls to maybeMerge() are usually not
1853:             * necessary. The most common case is when merge policy
1854:             * parameters have changed.
1855:             */
1856:            public final void maybeMerge() throws CorruptIndexException,
1857:                    IOException {
1858:                maybeMerge(false);
1859:            }
1860:
1861:            private final void maybeMerge(boolean optimize)
1862:                    throws CorruptIndexException, IOException {
1863:                maybeMerge(1, optimize);
1864:            }
1865:
1866:            private final void maybeMerge(int maxNumSegmentsOptimize,
1867:                    boolean optimize) throws CorruptIndexException, IOException {
1868:                updatePendingMerges(maxNumSegmentsOptimize, optimize);
1869:                mergeScheduler.merge(this );
1870:            }
1871:
1872:            private synchronized void updatePendingMerges(
1873:                    int maxNumSegmentsOptimize, boolean optimize)
1874:                    throws CorruptIndexException, IOException {
1875:                assert !optimize || maxNumSegmentsOptimize > 0;
1876:
1877:                if (stopMerges)
1878:                    return;
1879:
1880:                final MergePolicy.MergeSpecification spec;
1881:                if (optimize) {
1882:                    spec = mergePolicy.findMergesForOptimize(segmentInfos,
1883:                            this , maxNumSegmentsOptimize, segmentsToOptimize);
1884:
1885:                    if (spec != null) {
1886:                        final int numMerges = spec.merges.size();
1887:                        for (int i = 0; i < numMerges; i++) {
1888:                            final MergePolicy.OneMerge merge = ((MergePolicy.OneMerge) spec.merges
1889:                                    .get(i));
1890:                            merge.optimize = true;
1891:                            merge.maxNumSegmentsOptimize = maxNumSegmentsOptimize;
1892:                        }
1893:                    }
1894:
1895:                } else
1896:                    spec = mergePolicy.findMerges(segmentInfos, this );
1897:
1898:                if (spec != null) {
1899:                    final int numMerges = spec.merges.size();
1900:                    for (int i = 0; i < numMerges; i++)
1901:                        registerMerge((MergePolicy.OneMerge) spec.merges.get(i));
1902:                }
1903:            }
1904:
1905:            /** Expert: the {@link MergeScheduler} calls this method
1906:             *  to retrieve the next merge requested by the
1907:             *  MergePolicy */
1908:            synchronized MergePolicy.OneMerge getNextMerge() {
1909:                if (pendingMerges.size() == 0)
1910:                    return null;
1911:                else {
1912:                    // Advance the merge from pending to running
1913:                    MergePolicy.OneMerge merge = (MergePolicy.OneMerge) pendingMerges
1914:                            .removeFirst();
1915:                    runningMerges.add(merge);
1916:                    return merge;
1917:                }
1918:            }
1919:
1920:            /*
1921:             * Begin a transaction.  During a transaction, any segment
1922:             * merges that happen (or ram segments flushed) will not
1923:             * write a new segments file and will not remove any files
1924:             * that were present at the start of the transaction.  You
1925:             * must make a matched (try/finally) call to
1926:             * commitTransaction() or rollbackTransaction() to finish
1927:             * the transaction.
1928:             *
1929:             * Note that buffered documents and delete terms are not handled
1930:             * within the transactions, so they must be flushed before the
1931:             * transaction is started.
1932:             */
1933:            private void startTransaction() throws IOException {
1934:
1935:                if (infoStream != null)
1936:                    message("now start transaction");
1937:
1938:                assert docWriter.getNumBufferedDeleteTerms() == 0 : "calling startTransaction with buffered delete terms not supported";
1939:                assert docWriter.getNumDocsInRAM() == 0 : "calling startTransaction with buffered documents not supported";
1940:
1941:                localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
1942:                localAutoCommit = autoCommit;
1943:
1944:                if (localAutoCommit) {
1945:
1946:                    if (infoStream != null)
1947:                        message("flush at startTransaction");
1948:
1949:                    flush();
1950:                    // Turn off auto-commit during our local transaction:
1951:                    autoCommit = false;
1952:                } else
1953:                    // We must "protect" our files at this point from
1954:                    // deletion in case we need to rollback:
1955:                    deleter.incRef(segmentInfos, false);
1956:            }
1957:
1958:            /*
1959:             * Rolls back the transaction and restores state to where
1960:             * we were at the start.
1961:             */
1962:            private void rollbackTransaction() throws IOException {
1963:
1964:                if (infoStream != null)
1965:                    message("now rollback transaction");
1966:
1967:                // First restore autoCommit in case we hit an exception below:
1968:                autoCommit = localAutoCommit;
1969:
1970:                // Keep the same segmentInfos instance but replace all
1971:                // of its SegmentInfo instances.  This is so the next
1972:                // attempt to commit using this instance of IndexWriter
1973:                // will always write to a new generation ("write once").
1974:                segmentInfos.clear();
1975:                segmentInfos.addAll(localRollbackSegmentInfos);
1976:                localRollbackSegmentInfos = null;
1977:
1978:                // Ask deleter to locate unreferenced files we had
1979:                // created & remove them:
1980:                deleter.checkpoint(segmentInfos, false);
1981:
1982:                if (!autoCommit)
1983:                    // Remove the incRef we did in startTransaction:
1984:                    deleter.decRef(segmentInfos);
1985:
1986:                deleter.refresh();
1987:                finishMerges(false);
1988:                stopMerges = false;
1989:            }
1990:
1991:            /*
1992:             * Commits the transaction.  This will write the new
1993:             * segments file and remove and pending deletions we have
1994:             * accumulated during the transaction
1995:             */
1996:            private void commitTransaction() throws IOException {
1997:
1998:                if (infoStream != null)
1999:                    message("now commit transaction");
2000:
2001:                // First restore autoCommit in case we hit an exception below:
2002:                autoCommit = localAutoCommit;
2003:
2004:                boolean success = false;
2005:                try {
2006:                    checkpoint();
2007:                    success = true;
2008:                } finally {
2009:                    if (!success) {
2010:                        if (infoStream != null)
2011:                            message("hit exception committing transaction");
2012:
2013:                        rollbackTransaction();
2014:                    }
2015:                }
2016:
2017:                if (!autoCommit)
2018:                    // Remove the incRef we did in startTransaction.
2019:                    deleter.decRef(localRollbackSegmentInfos);
2020:
2021:                localRollbackSegmentInfos = null;
2022:
2023:                // Give deleter a chance to remove files now:
2024:                deleter.checkpoint(segmentInfos, autoCommit);
2025:            }
2026:
2027:            /**
2028:             * Close the <code>IndexWriter</code> without committing
2029:             * any of the changes that have occurred since it was
2030:             * opened. This removes any temporary files that had been
2031:             * created, after which the state of the index will be the
2032:             * same as it was when this writer was first opened.  This
2033:             * can only be called when this IndexWriter was opened
2034:             * with <code>autoCommit=false</code>.
2035:             * @throws IllegalStateException if this is called when
2036:             *  the writer was opened with <code>autoCommit=true</code>.
2037:             * @throws IOException if there is a low-level IO error
2038:             */
2039:            public void abort() throws IOException {
2040:                ensureOpen();
2041:                if (autoCommit)
2042:                    throw new IllegalStateException(
2043:                            "abort() can only be called when IndexWriter was opened with autoCommit=false");
2044:
2045:                boolean doClose;
2046:                synchronized (this ) {
2047:                    // Ensure that only one thread actually gets to do the closing:
2048:                    if (!closing) {
2049:                        doClose = true;
2050:                        closing = true;
2051:                    } else
2052:                        doClose = false;
2053:                }
2054:
2055:                if (doClose) {
2056:
2057:                    finishMerges(false);
2058:
2059:                    // Must pre-close these two, in case they set
2060:                    // commitPending=true, so that we can then set it to
2061:                    // false before calling closeInternal
2062:                    mergePolicy.close();
2063:                    mergeScheduler.close();
2064:
2065:                    synchronized (this ) {
2066:                        // Keep the same segmentInfos instance but replace all
2067:                        // of its SegmentInfo instances.  This is so the next
2068:                        // attempt to commit using this instance of IndexWriter
2069:                        // will always write to a new generation ("write
2070:                        // once").
2071:                        segmentInfos.clear();
2072:                        segmentInfos.addAll(rollbackSegmentInfos);
2073:
2074:                        docWriter.abort(null);
2075:
2076:                        // Ask deleter to locate unreferenced files & remove
2077:                        // them:
2078:                        deleter.checkpoint(segmentInfos, false);
2079:                        deleter.refresh();
2080:                    }
2081:
2082:                    commitPending = false;
2083:                    closeInternal(false);
2084:                } else
2085:                    waitForClose();
2086:            }
2087:
2088:            private synchronized void finishMerges(boolean waitForMerges)
2089:                    throws IOException {
2090:                if (!waitForMerges) {
2091:
2092:                    stopMerges = true;
2093:
2094:                    // Abort all pending & running merges:
2095:                    Iterator it = pendingMerges.iterator();
2096:                    while (it.hasNext()) {
2097:                        final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it
2098:                                .next();
2099:                        if (infoStream != null)
2100:                            message("now abort pending merge "
2101:                                    + merge.segString(directory));
2102:                        merge.abort();
2103:                        mergeFinish(merge);
2104:                    }
2105:                    pendingMerges.clear();
2106:
2107:                    it = runningMerges.iterator();
2108:                    while (it.hasNext()) {
2109:                        final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it
2110:                                .next();
2111:                        if (infoStream != null)
2112:                            message("now abort running merge "
2113:                                    + merge.segString(directory));
2114:                        merge.abort();
2115:                    }
2116:
2117:                    // These merges periodically check whether they have
2118:                    // been aborted, and stop if so.  We wait here to make
2119:                    // sure they all stop.  It should not take very long
2120:                    // because the merge threads periodically check if
2121:                    // they are aborted.
2122:                    while (runningMerges.size() > 0) {
2123:                        if (infoStream != null)
2124:                            message("now wait for " + runningMerges.size()
2125:                                    + " running merge to abort");
2126:                        try {
2127:                            wait();
2128:                        } catch (InterruptedException ie) {
2129:                            Thread.currentThread().interrupt();
2130:                        }
2131:                    }
2132:
2133:                    assert 0 == mergingSegments.size();
2134:
2135:                    if (infoStream != null)
2136:                        message("all running merges have aborted");
2137:
2138:                } else {
2139:                    while (pendingMerges.size() > 0 || runningMerges.size() > 0) {
2140:                        try {
2141:                            wait();
2142:                        } catch (InterruptedException ie) {
2143:                        }
2144:                    }
2145:                    assert 0 == mergingSegments.size();
2146:                }
2147:            }
2148:
2149:            /*
2150:             * Called whenever the SegmentInfos has been updated and
2151:             * the index files referenced exist (correctly) in the
2152:             * index directory.  If we are in autoCommit mode, we
2153:             * commit the change immediately.  Else, we mark
2154:             * commitPending.
2155:             */
2156:            private synchronized void checkpoint() throws IOException {
2157:                if (autoCommit) {
2158:                    segmentInfos.write(directory);
2159:                    commitPending = false;
2160:                    if (infoStream != null)
2161:                        message("checkpoint: wrote segments file \""
2162:                                + segmentInfos.getCurrentSegmentFileName()
2163:                                + "\"");
2164:                } else {
2165:                    commitPending = true;
2166:                }
2167:            }
2168:
2169:            /** Merges all segments from an array of indexes into this index.
2170:             *
2171:             * <p>This may be used to parallelize batch indexing.  A large document
2172:             * collection can be broken into sub-collections.  Each sub-collection can be
2173:             * indexed in parallel, on a different thread, process or machine.  The
2174:             * complete index can then be created by merging sub-collection indexes
2175:             * with this method.
2176:             *
2177:             * <p><b>NOTE:</b> the index in each Directory must not be
2178:             * changed (opened by a writer) while this method is
2179:             * running.  This method does not acquire a write lock in
2180:             * each input Directory, so it is up to the caller to
2181:             * enforce this.
2182:             *
2183:             * <p>After this completes, the index is optimized.
2184:             *
2185:             * <p>This method is transactional in how Exceptions are
2186:             * handled: it does not commit a new segments_N file until
2187:             * all indexes are added.  This means if an Exception
2188:             * occurs (for example disk full), then either no indexes
2189:             * will have been added or they all will have been.</p>
2190:             *
2191:             * <p>If an Exception is hit, it's still possible that all
2192:             * indexes were successfully added.  This happens when the
2193:             * Exception is hit when trying to build a CFS file.  In
2194:             * this case, one segment in the index will be in non-CFS
2195:             * format, even when using compound file format.</p>
2196:             *
2197:             * <p>Also note that on an Exception, the index may still
2198:             * have been partially or fully optimized even though none
2199:             * of the input indexes were added. </p>
2200:             *
2201:             * <p>Note that this requires temporary free space in the
2202:             * Directory up to 2X the sum of all input indexes
2203:             * (including the starting index).  If readers/searchers
2204:             * are open against the starting index, then temporary
2205:             * free space required will be higher by the size of the
2206:             * starting index (see {@link #optimize()} for details).
2207:             * </p>
2208:             *
2209:             * <p>Once this completes, the final size of the index
2210:             * will be less than the sum of all input index sizes
2211:             * (including the starting index).  It could be quite a
2212:             * bit smaller (if there were many pending deletes) or
2213:             * just slightly smaller.</p>
2214:             *
2215:             * <p>See <a target="_top"
2216:             * href="http://issues.apache.org/jira/browse/LUCENE-702">LUCENE-702</a>
2217:             * for details.</p>
2218:             * @throws CorruptIndexException if the index is corrupt
2219:             * @throws IOException if there is a low-level IO error
2220:             */
2221:            public synchronized void addIndexes(Directory[] dirs)
2222:                    throws CorruptIndexException, IOException {
2223:
2224:                ensureOpen();
2225:                if (infoStream != null)
2226:                    message("flush at addIndexes");
2227:                flush();
2228:
2229:                boolean success = false;
2230:
2231:                startTransaction();
2232:
2233:                try {
2234:                    for (int i = 0; i < dirs.length; i++) {
2235:                        SegmentInfos sis = new SegmentInfos(); // read infos from dir
2236:                        sis.read(dirs[i]);
2237:                        for (int j = 0; j < sis.size(); j++) {
2238:                            segmentInfos.addElement(sis.info(j)); // add each info
2239:                        }
2240:                    }
2241:
2242:                    optimize();
2243:
2244:                    success = true;
2245:                } finally {
2246:                    if (success) {
2247:                        commitTransaction();
2248:                    } else {
2249:                        rollbackTransaction();
2250:                    }
2251:                }
2252:            }
2253:
2254:            private synchronized void resetMergeExceptions() {
2255:                mergeExceptions = new ArrayList();
2256:                mergeGen++;
2257:            }
2258:
2259:            /**
2260:             * Merges all segments from an array of indexes into this index.
2261:             * <p>
2262:             * This is similar to addIndexes(Directory[]). However, no optimize()
2263:             * is called either at the beginning or at the end. Instead, merges
2264:             * are carried out as necessary.
2265:             *
2266:             * <p><b>NOTE:</b> the index in each Directory must not be
2267:             * changed (opened by a writer) while this method is
2268:             * running.  This method does not acquire a write lock in
2269:             * each input Directory, so it is up to the caller to
2270:             * enforce this.
2271:             *
2272:             * <p>
2273:             * This requires this index not be among those to be added, and the
2274:             * upper bound* of those segment doc counts not exceed maxMergeDocs.
2275:             *
2276:             * <p>See {@link #addIndexes(Directory[])} for
2277:             * details on transactional semantics, temporary free
2278:             * space required in the Directory, and non-CFS segments
2279:             * on an Exception.</p>
2280:             * @throws CorruptIndexException if the index is corrupt
2281:             * @throws IOException if there is a low-level IO error
2282:             */
2283:            public synchronized void addIndexesNoOptimize(Directory[] dirs)
2284:                    throws CorruptIndexException, IOException {
2285:
2286:                ensureOpen();
2287:                if (infoStream != null)
2288:                    message("flush at addIndexesNoOptimize");
2289:                flush();
2290:
2291:                boolean success = false;
2292:
2293:                startTransaction();
2294:
2295:                try {
2296:
2297:                    for (int i = 0; i < dirs.length; i++) {
2298:                        if (directory == dirs[i]) {
2299:                            // cannot add this index: segments may be deleted in merge before added
2300:                            throw new IllegalArgumentException(
2301:                                    "Cannot add this index to itself");
2302:                        }
2303:
2304:                        SegmentInfos sis = new SegmentInfos(); // read infos from dir
2305:                        sis.read(dirs[i]);
2306:                        for (int j = 0; j < sis.size(); j++) {
2307:                            SegmentInfo info = sis.info(j);
2308:                            segmentInfos.addElement(info); // add each info
2309:                        }
2310:                    }
2311:
2312:                    maybeMerge();
2313:
2314:                    // If after merging there remain segments in the index
2315:                    // that are in a different directory, just copy these
2316:                    // over into our index.  This is necessary (before
2317:                    // finishing the transaction) to avoid leaving the
2318:                    // index in an unusable (inconsistent) state.
2319:                    copyExternalSegments();
2320:
2321:                    success = true;
2322:
2323:                } finally {
2324:                    if (success) {
2325:                        commitTransaction();
2326:                    } else {
2327:                        rollbackTransaction();
2328:                    }
2329:                }
2330:            }
2331:
2332:            /* If any of our segments are using a directory != ours
2333:             * then copy them over.  Currently this is only used by
2334:             * addIndexesNoOptimize(). */
2335:            private synchronized void copyExternalSegments()
2336:                    throws CorruptIndexException, IOException {
2337:                final int numSegments = segmentInfos.size();
2338:                for (int i = 0; i < numSegments; i++) {
2339:                    SegmentInfo info = segmentInfos.info(i);
2340:                    if (info.dir != directory) {
2341:                        MergePolicy.OneMerge merge = new MergePolicy.OneMerge(
2342:                                segmentInfos.range(i, 1 + i), info
2343:                                        .getUseCompoundFile());
2344:                        if (registerMerge(merge)) {
2345:                            pendingMerges.remove(merge);
2346:                            runningMerges.add(merge);
2347:                            merge(merge);
2348:                        } else
2349:                            // This means there is a bug in the
2350:                            // MergeScheduler.  MergeSchedulers in general are
2351:                            // not allowed to run a merge involving segments
2352:                            // external to this IndexWriter's directory in the
2353:                            // background because this would put the index
2354:                            // into an inconsistent state (where segmentInfos
2355:                            // has been written with such external segments
2356:                            // that an IndexReader would fail to load).
2357:                            throw new MergePolicy.MergeException(
2358:                                    "segment \""
2359:                                            + info.name
2360:                                            + " exists in external directory yet the MergeScheduler executed the merge in a separate thread");
2361:                    }
2362:                }
2363:            }
2364:
2365:            /** Merges the provided indexes into this index.
2366:             * <p>After this completes, the index is optimized. </p>
2367:             * <p>The provided IndexReaders are not closed.</p>
2368:
2369:             * <p>See {@link #addIndexes(Directory[])} for
2370:             * details on transactional semantics, temporary free
2371:             * space required in the Directory, and non-CFS segments
2372:             * on an Exception.</p>
2373:             * @throws CorruptIndexException if the index is corrupt
2374:             * @throws IOException if there is a low-level IO error
2375:             */
2376:            public synchronized void addIndexes(IndexReader[] readers)
2377:                    throws CorruptIndexException, IOException {
2378:
2379:                ensureOpen();
2380:                optimize(); // start with zero or 1 seg
2381:
2382:                final String mergedName = newSegmentName();
2383:                SegmentMerger merger = new SegmentMerger(this , mergedName, null);
2384:
2385:                SegmentInfo info;
2386:
2387:                IndexReader sReader = null;
2388:                try {
2389:                    if (segmentInfos.size() == 1) { // add existing index, if any
2390:                        sReader = SegmentReader.get(segmentInfos.info(0));
2391:                        merger.add(sReader);
2392:                    }
2393:
2394:                    for (int i = 0; i < readers.length; i++)
2395:                        // add new indexes
2396:                        merger.add(readers[i]);
2397:
2398:                    boolean success = false;
2399:
2400:                    startTransaction();
2401:
2402:                    try {
2403:                        int docCount = merger.merge(); // merge 'em
2404:
2405:                        if (sReader != null) {
2406:                            sReader.close();
2407:                            sReader = null;
2408:                        }
2409:
2410:                        segmentInfos.setSize(0); // pop old infos & add new
2411:                        info = new SegmentInfo(mergedName, docCount, directory,
2412:                                false, true, -1, null, false);
2413:                        segmentInfos.addElement(info);
2414:
2415:                        success = true;
2416:
2417:                    } finally {
2418:                        if (!success) {
2419:                            if (infoStream != null)
2420:                                message("hit exception in addIndexes during merge");
2421:
2422:                            rollbackTransaction();
2423:                        } else {
2424:                            commitTransaction();
2425:                        }
2426:                    }
2427:                } finally {
2428:                    if (sReader != null) {
2429:                        sReader.close();
2430:                    }
2431:                }
2432:
2433:                if (mergePolicy instanceof  LogMergePolicy
2434:                        && getUseCompoundFile()) {
2435:
2436:                    boolean success = false;
2437:
2438:                    startTransaction();
2439:
2440:                    try {
2441:                        merger.createCompoundFile(mergedName + ".cfs");
2442:                        info.setUseCompoundFile(true);
2443:                    } finally {
2444:                        if (!success) {
2445:                            if (infoStream != null)
2446:                                message("hit exception building compound file in addIndexes during merge");
2447:
2448:                            rollbackTransaction();
2449:                        } else {
2450:                            commitTransaction();
2451:                        }
2452:                    }
2453:                }
2454:            }
2455:
2456:            // This is called after pending added and deleted
2457:            // documents have been flushed to the Directory but before
2458:            // the change is committed (new segments_N file written).
2459:            void doAfterFlush() throws IOException {
2460:            }
2461:
2462:            /**
2463:             * Flush all in-memory buffered updates (adds and deletes)
2464:             * to the Directory. 
2465:             * <p>Note: if <code>autoCommit=false</code>, flushed data would still 
2466:             * not be visible to readers, until {@link #close} is called.
2467:             * @throws CorruptIndexException if the index is corrupt
2468:             * @throws IOException if there is a low-level IO error
2469:             */
2470:            public final void flush() throws CorruptIndexException, IOException {
2471:                flush(true, false);
2472:            }
2473:
2474:            /**
2475:             * Flush all in-memory buffered udpates (adds and deletes)
2476:             * to the Directory.
2477:             * @param triggerMerge if true, we may merge segments (if
2478:             *  deletes or docs were flushed) if necessary
2479:             * @param flushDocStores if false we are allowed to keep
2480:             *  doc stores open to share with the next segment
2481:             */
2482:            protected final void flush(boolean triggerMerge,
2483:                    boolean flushDocStores) throws CorruptIndexException,
2484:                    IOException {
2485:                ensureOpen();
2486:
2487:                if (doFlush(flushDocStores) && triggerMerge)
2488:                    maybeMerge();
2489:            }
2490:
2491:            private synchronized final boolean doFlush(boolean flushDocStores)
2492:                    throws CorruptIndexException, IOException {
2493:
2494:                // Make sure no threads are actively adding a document
2495:
2496:                // Returns true if docWriter is currently aborting, in
2497:                // which case we skip flushing this segment
2498:                if (docWriter.pauseAllThreads()) {
2499:                    docWriter.resumeAllThreads();
2500:                    return false;
2501:                }
2502:
2503:                try {
2504:
2505:                    SegmentInfo newSegment = null;
2506:
2507:                    final int numDocs = docWriter.getNumDocsInRAM();
2508:
2509:                    // Always flush docs if there are any
2510:                    boolean flushDocs = numDocs > 0;
2511:
2512:                    // With autoCommit=true we always must flush the doc
2513:                    // stores when we flush
2514:                    flushDocStores |= autoCommit;
2515:                    String docStoreSegment = docWriter.getDocStoreSegment();
2516:                    if (docStoreSegment == null)
2517:                        flushDocStores = false;
2518:
2519:                    // Always flush deletes if there are any delete terms.
2520:                    // TODO: when autoCommit=false we don't have to flush
2521:                    // deletes with every flushed segment; we can save
2522:                    // CPU/IO by buffering longer & flushing deletes only
2523:                    // when they are full or writer is being closed.  We
2524:                    // have to fix the "applyDeletesSelectively" logic to
2525:                    // apply to more than just the last flushed segment
2526:                    boolean flushDeletes = docWriter.hasDeletes();
2527:
2528:                    if (infoStream != null) {
2529:                        message("  flush: segment=" + docWriter.getSegment()
2530:                                + " docStoreSegment="
2531:                                + docWriter.getDocStoreSegment()
2532:                                + " docStoreOffset="
2533:                                + docWriter.getDocStoreOffset() + " flushDocs="
2534:                                + flushDocs + " flushDeletes=" + flushDeletes
2535:                                + " flushDocStores=" + flushDocStores
2536:                                + " numDocs=" + numDocs + " numBufDelTerms="
2537:                                + docWriter.getNumBufferedDeleteTerms());
2538:                        message("  index before flush " + segString());
2539:                    }
2540:
2541:                    int docStoreOffset = docWriter.getDocStoreOffset();
2542:
2543:                    // docStoreOffset should only be non-zero when
2544:                    // autoCommit == false
2545:                    assert !autoCommit || 0 == docStoreOffset;
2546:
2547:                    boolean docStoreIsCompoundFile = false;
2548:
2549:                    // Check if the doc stores must be separately flushed
2550:                    // because other segments, besides the one we are about
2551:                    // to flush, reference it
2552:                    if (flushDocStores
2553:                            && (!flushDocs || !docWriter.getSegment().equals(
2554:                                    docWriter.getDocStoreSegment()))) {
2555:                        // We must separately flush the doc store
2556:                        if (infoStream != null)
2557:                            message("  flush shared docStore segment "
2558:                                    + docStoreSegment);
2559:
2560:                        docStoreIsCompoundFile = flushDocStores();
2561:                        flushDocStores = false;
2562:                    }
2563:
2564:                    String segment = docWriter.getSegment();
2565:
2566:                    // If we are flushing docs, segment must not be null:
2567:                    assert segment != null || !flushDocs;
2568:
2569:                    if (flushDocs || flushDeletes) {
2570:
2571:                        SegmentInfos rollback = null;
2572:
2573:                        if (flushDeletes)
2574:                            rollback = (SegmentInfos) segmentInfos.clone();
2575:
2576:                        boolean success = false;
2577:
2578:                        try {
2579:                            if (flushDocs) {
2580:
2581:                                if (0 == docStoreOffset && flushDocStores) {
2582:                                    // This means we are flushing private doc stores
2583:                                    // with this segment, so it will not be shared
2584:                                    // with other segments
2585:                                    assert docStoreSegment != null;
2586:                                    assert docStoreSegment.equals(segment);
2587:                                    docStoreOffset = -1;
2588:                                    docStoreIsCompoundFile = false;
2589:                                    docStoreSegment = null;
2590:                                }
2591:
2592:                                int flushedDocCount = docWriter
2593:                                        .flush(flushDocStores);
2594:
2595:                                newSegment = new SegmentInfo(segment,
2596:                                        flushedDocCount, directory, false,
2597:                                        true, docStoreOffset, docStoreSegment,
2598:                                        docStoreIsCompoundFile);
2599:                                segmentInfos.addElement(newSegment);
2600:                            }
2601:
2602:                            if (flushDeletes) {
2603:                                // we should be able to change this so we can
2604:                                // buffer deletes longer and then flush them to
2605:                                // multiple flushed segments, when
2606:                                // autoCommit=false
2607:                                applyDeletes(flushDocs);
2608:                                doAfterFlush();
2609:                            }
2610:
2611:                            checkpoint();
2612:                            success = true;
2613:                        } finally {
2614:                            if (!success) {
2615:
2616:                                if (infoStream != null)
2617:                                    message("hit exception flushing segment "
2618:                                            + segment);
2619:
2620:                                if (flushDeletes) {
2621:
2622:                                    // Carefully check if any partial .del files
2623:                                    // should be removed:
2624:                                    final int size = rollback.size();
2625:                                    for (int i = 0; i < size; i++) {
2626:                                        final String newDelFileName = segmentInfos
2627:                                                .info(i).getDelFileName();
2628:                                        final String delFileName = rollback
2629:                                                .info(i).getDelFileName();
2630:                                        if (newDelFileName != null
2631:                                                && !newDelFileName
2632:                                                        .equals(delFileName))
2633:                                            deleter.deleteFile(newDelFileName);
2634:                                    }
2635:
2636:                                    // Fully replace the segmentInfos since flushed
2637:                                    // deletes could have changed any of the
2638:                                    // SegmentInfo instances:
2639:                                    segmentInfos.clear();
2640:                                    segmentInfos.addAll(rollback);
2641:
2642:                                } else {
2643:                                    // Remove segment we added, if any:
2644:                                    if (newSegment != null
2645:                                            && segmentInfos.size() > 0
2646:                                            && segmentInfos.info(segmentInfos
2647:                                                    .size() - 1) == newSegment)
2648:                                        segmentInfos
2649:                                                .remove(segmentInfos.size() - 1);
2650:                                }
2651:                                if (flushDocs)
2652:                                    docWriter.abort(null);
2653:                                deletePartialSegmentsFile();
2654:                                deleter.checkpoint(segmentInfos, false);
2655:
2656:                                if (segment != null)
2657:                                    deleter.refresh(segment);
2658:                            }
2659:                        }
2660:
2661:                        deleter.checkpoint(segmentInfos, autoCommit);
2662:
2663:                        if (flushDocs
2664:                                && mergePolicy.useCompoundFile(segmentInfos,
2665:                                        newSegment)) {
2666:                            success = false;
2667:                            try {
2668:                                docWriter.createCompoundFile(segment);
2669:                                newSegment.setUseCompoundFile(true);
2670:                                checkpoint();
2671:                                success = true;
2672:                            } finally {
2673:                                if (!success) {
2674:                                    if (infoStream != null)
2675:                                        message("hit exception creating compound file for newly flushed segment "
2676:                                                + segment);
2677:                                    newSegment.setUseCompoundFile(false);
2678:                                    deleter
2679:                                            .deleteFile(segment
2680:                                                    + "."
2681:                                                    + IndexFileNames.COMPOUND_FILE_EXTENSION);
2682:                                    deletePartialSegmentsFile();
2683:                                }
2684:                            }
2685:
2686:                            deleter.checkpoint(segmentInfos, autoCommit);
2687:                        }
2688:
2689:                        return true;
2690:                    } else {
2691:                        return false;
2692:                    }
2693:
2694:                } finally {
2695:                    docWriter.clearFlushPending();
2696:                    docWriter.resumeAllThreads();
2697:                }
2698:            }
2699:
2700:            /** Expert:  Return the total size of all index files currently cached in memory.
2701:             * Useful for size management with flushRamDocs()
2702:             */
2703:            public final long ramSizeInBytes() {
2704:                ensureOpen();
2705:                return docWriter.getRAMUsed();
2706:            }
2707:
2708:            /** Expert:  Return the number of documents whose segments are currently cached in memory.
2709:             * Useful when calling flush()
2710:             */
2711:            public final synchronized int numRamDocs() {
2712:                ensureOpen();
2713:                return docWriter.getNumDocsInRAM();
2714:            }
2715:
2716:            private int ensureContiguousMerge(MergePolicy.OneMerge merge) {
2717:
2718:                int first = segmentInfos.indexOf(merge.segments.info(0));
2719:                if (first == -1)
2720:                    throw new MergePolicy.MergeException(
2721:                            "could not find segment "
2722:                                    + merge.segments.info(0).name
2723:                                    + " in current segments");
2724:
2725:                final int numSegments = segmentInfos.size();
2726:
2727:                final int numSegmentsToMerge = merge.segments.size();
2728:                for (int i = 0; i < numSegmentsToMerge; i++) {
2729:                    final SegmentInfo info = merge.segments.info(i);
2730:
2731:                    if (first + i >= numSegments
2732:                            || !segmentInfos.info(first + i).equals(info)) {
2733:                        if (segmentInfos.indexOf(info) == -1)
2734:                            throw new MergePolicy.MergeException(
2735:                                    "MergePolicy selected a segment ("
2736:                                            + info.name
2737:                                            + ") that is not in the index");
2738:                        else
2739:                            throw new MergePolicy.MergeException(
2740:                                    "MergePolicy selected non-contiguous segments to merge ("
2741:                                            + merge
2742:                                            + " vs "
2743:                                            + segString()
2744:                                            + "), which IndexWriter (currently) cannot handle");
2745:                    }
2746:                }
2747:
2748:                return first;
2749:            }
2750:
2751:            /* FIXME if we want to support non-contiguous segment merges */
2752:            synchronized private boolean commitMerge(MergePolicy.OneMerge merge)
2753:                    throws IOException {
2754:
2755:                assert merge.registerDone;
2756:
2757:                // If merge was explicitly aborted, or, if abort() or
2758:                // rollbackTransaction() had been called since our merge
2759:                // started (which results in an unqualified
2760:                // deleter.refresh() call that will remove any index
2761:                // file that current segments does not reference), we
2762:                // abort this merge
2763:                if (merge.isAborted()) {
2764:                    if (infoStream != null)
2765:                        message("commitMerge: skipping merge "
2766:                                + merge.segString(directory)
2767:                                + ": it was aborted");
2768:
2769:                    assert merge.increfDone;
2770:                    decrefMergeSegments(merge);
2771:                    deleter.refresh(merge.info.name);
2772:                    return false;
2773:                }
2774:
2775:                boolean success = false;
2776:
2777:                int start;
2778:
2779:                try {
2780:                    SegmentInfos sourceSegmentsClone = merge.segmentsClone;
2781:                    SegmentInfos sourceSegments = merge.segments;
2782:
2783:                    start = ensureContiguousMerge(merge);
2784:                    if (infoStream != null)
2785:                        message("commitMerge " + merge.segString(directory));
2786:
2787:                    // Carefully merge deletes that occurred after we
2788:                    // started merging:
2789:
2790:                    BitVector deletes = null;
2791:                    int docUpto = 0;
2792:
2793:                    final int numSegmentsToMerge = sourceSegments.size();
2794:                    for (int i = 0; i < numSegmentsToMerge; i++) {
2795:                        final SegmentInfo previousInfo = sourceSegmentsClone
2796:                                .info(i);
2797:                        final SegmentInfo currentInfo = sourceSegments.info(i);
2798:
2799:                        assert currentInfo.docCount == previousInfo.docCount;
2800:
2801:                        final int docCount = currentInfo.docCount;
2802:
2803:                        if (previousInfo.hasDeletions()) {
2804:
2805:                            // There were deletes on this segment when the merge
2806:                            // started.  The merge has collapsed away those
2807:                            // deletes, but, if new deletes were flushed since
2808:                            // the merge started, we must now carefully keep any
2809:                            // newly flushed deletes but mapping them to the new
2810:                            // docIDs.
2811:
2812:                            assert currentInfo.hasDeletions();
2813:
2814:                            // Load deletes present @ start of merge, for this segment:
2815:                            BitVector previousDeletes = new BitVector(
2816:                                    previousInfo.dir, previousInfo
2817:                                            .getDelFileName());
2818:
2819:                            if (!currentInfo.getDelFileName().equals(
2820:                                    previousInfo.getDelFileName())) {
2821:                                // This means this segment has had new deletes
2822:                                // committed since we started the merge, so we
2823:                                // must merge them:
2824:                                if (deletes == null)
2825:                                    deletes = new BitVector(merge.info.docCount);
2826:
2827:                                BitVector currentDeletes = new BitVector(
2828:                                        currentInfo.dir, currentInfo
2829:                                                .getDelFileName());
2830:                                for (int j = 0; j < docCount; j++) {
2831:                                    if (previousDeletes.get(j))
2832:                                        assert currentDeletes.get(j);
2833:                                    else {
2834:                                        if (currentDeletes.get(j))
2835:                                            deletes.set(docUpto);
2836:                                        docUpto++;
2837:                                    }
2838:                                }
2839:                            } else
2840:                                docUpto += docCount - previousDeletes.count();
2841:
2842:                        } else if (currentInfo.hasDeletions()) {
2843:                            // This segment had no deletes before but now it
2844:                            // does:
2845:                            if (deletes == null)
2846:                                deletes = new BitVector(merge.info.docCount);
2847:                            BitVector currentDeletes = new BitVector(directory,
2848:                                    currentInfo.getDelFileName());
2849:
2850:                            for (int j = 0; j < docCount; j++) {
2851:                                if (currentDeletes.get(j))
2852:                                    deletes.set(docUpto);
2853:                                docUpto++;
2854:                            }
2855:
2856:                        } else
2857:                            // No deletes before or after
2858:                            docUpto += currentInfo.docCount;
2859:
2860:                        merge.checkAborted(directory);
2861:                    }
2862:
2863:                    if (deletes != null) {
2864:                        merge.info.advanceDelGen();
2865:                        deletes.write(directory, merge.info.getDelFileName());
2866:                    }
2867:                    success = true;
2868:                } finally {
2869:                    if (!success) {
2870:                        if (infoStream != null)
2871:                            message("hit exception creating merged deletes file");
2872:                        deleter.refresh(merge.info.name);
2873:                    }
2874:                }
2875:
2876:                // Simple optimization: if the doc store we are using
2877:                // has been closed and is in now compound format (but
2878:                // wasn't when we started), then we will switch to the
2879:                // compound format as well:
2880:                final String mergeDocStoreSegment = merge.info
2881:                        .getDocStoreSegment();
2882:                if (mergeDocStoreSegment != null
2883:                        && !merge.info.getDocStoreIsCompoundFile()) {
2884:                    final int size = segmentInfos.size();
2885:                    for (int i = 0; i < size; i++) {
2886:                        final SegmentInfo info = segmentInfos.info(i);
2887:                        final String docStoreSegment = info
2888:                                .getDocStoreSegment();
2889:                        if (docStoreSegment != null
2890:                                && docStoreSegment.equals(mergeDocStoreSegment)
2891:                                && info.getDocStoreIsCompoundFile()) {
2892:                            merge.info.setDocStoreIsCompoundFile(true);
2893:                            break;
2894:                        }
2895:                    }
2896:                }
2897:
2898:                success = false;
2899:                SegmentInfos rollback = null;
2900:                try {
2901:                    rollback = (SegmentInfos) segmentInfos.clone();
2902:                    segmentInfos.subList(start, start + merge.segments.size())
2903:                            .clear();
2904:                    segmentInfos.add(start, merge.info);
2905:                    checkpoint();
2906:                    success = true;
2907:                } finally {
2908:                    if (!success && rollback != null) {
2909:                        if (infoStream != null)
2910:                            message("hit exception when checkpointing after merge");
2911:                        segmentInfos.clear();
2912:                        segmentInfos.addAll(rollback);
2913:                        deletePartialSegmentsFile();
2914:                        deleter.refresh(merge.info.name);
2915:                    }
2916:                }
2917:
2918:                if (merge.optimize)
2919:                    segmentsToOptimize.add(merge.info);
2920:
2921:                // Must checkpoint before decrefing so any newly
2922:                // referenced files in the new merge.info are incref'd
2923:                // first:
2924:                deleter.checkpoint(segmentInfos, autoCommit);
2925:
2926:                decrefMergeSegments(merge);
2927:
2928:                return true;
2929:            }
2930:
2931:            private void decrefMergeSegments(MergePolicy.OneMerge merge)
2932:                    throws IOException {
2933:                final SegmentInfos sourceSegmentsClone = merge.segmentsClone;
2934:                final int numSegmentsToMerge = sourceSegmentsClone.size();
2935:                assert merge.increfDone;
2936:                merge.increfDone = false;
2937:                for (int i = 0; i < numSegmentsToMerge; i++) {
2938:                    final SegmentInfo previousInfo = sourceSegmentsClone
2939:                            .info(i);
2940:                    // Decref all files for this SegmentInfo (this
2941:                    // matches the incref in mergeInit):
2942:                    if (previousInfo.dir == directory)
2943:                        deleter.decRef(previousInfo.files());
2944:                }
2945:            }
2946:
2947:            /**
2948:             * Merges the indicated segments, replacing them in the stack with a
2949:             * single segment.
2950:             */
2951:
2952:            final void merge(MergePolicy.OneMerge merge)
2953:                    throws CorruptIndexException, IOException {
2954:
2955:                assert merge.registerDone;
2956:                assert !merge.optimize || merge.maxNumSegmentsOptimize > 0;
2957:
2958:                boolean success = false;
2959:
2960:                try {
2961:
2962:                    try {
2963:                        if (merge.info == null)
2964:                            mergeInit(merge);
2965:
2966:                        if (infoStream != null)
2967:                            message("now merge\n  merge="
2968:                                    + merge.segString(directory) + "\n  index="
2969:                                    + segString());
2970:
2971:                        mergeMiddle(merge);
2972:                        success = true;
2973:                    } catch (MergePolicy.MergeAbortedException e) {
2974:                        merge.setException(e);
2975:                        addMergeException(merge);
2976:                        // We can ignore this exception, unless the merge
2977:                        // involves segments from external directories, in
2978:                        // which case we must throw it so, for example, the
2979:                        // rollbackTransaction code in addIndexes* is
2980:                        // executed.
2981:                        if (merge.isExternal)
2982:                            throw e;
2983:                    }
2984:                } finally {
2985:                    synchronized (this ) {
2986:                        try {
2987:                            if (!success && infoStream != null)
2988:                                message("hit exception during merge");
2989:
2990:                            mergeFinish(merge);
2991:
2992:                            // This merge (and, generally, any change to the
2993:                            // segments) may now enable new merges, so we call
2994:                            // merge policy & update pending merges.
2995:                            if (success && !merge.isAborted() && !closed
2996:                                    && !closing)
2997:                                updatePendingMerges(
2998:                                        merge.maxNumSegmentsOptimize,
2999:                                        merge.optimize);
3000:                        } finally {
3001:                            runningMerges.remove(merge);
3002:                            // Optimize may be waiting on the final optimize
3003:                            // merge to finish; and finishMerges() may be
3004:                            // waiting for all merges to finish:
3005:                            notifyAll();
3006:                        }
3007:                    }
3008:                }
3009:            }
3010:
3011:            /** Checks whether this merge involves any segments
3012:             *  already participating in a merge.  If not, this merge
3013:             *  is "registered", meaning we record that its segments
3014:             *  are now participating in a merge, and true is
3015:             *  returned.  Else (the merge conflicts) false is
3016:             *  returned. */
3017:            final synchronized boolean registerMerge(MergePolicy.OneMerge merge) {
3018:
3019:                if (merge.registerDone)
3020:                    return true;
3021:
3022:                final int count = merge.segments.size();
3023:                boolean isExternal = false;
3024:                for (int i = 0; i < count; i++) {
3025:                    final SegmentInfo info = merge.segments.info(i);
3026:                    if (mergingSegments.contains(info))
3027:                        return false;
3028:                    if (segmentInfos.indexOf(info) == -1)
3029:                        return false;
3030:                    if (info.dir != directory)
3031:                        isExternal = true;
3032:                }
3033:
3034:                pendingMerges.add(merge);
3035:
3036:                if (infoStream != null)
3037:                    message("add merge to pendingMerges: "
3038:                            + merge.segString(directory) + " [total "
3039:                            + pendingMerges.size() + " pending]");
3040:
3041:                merge.mergeGen = mergeGen;
3042:                merge.isExternal = isExternal;
3043:
3044:                // OK it does not conflict; now record that this merge
3045:                // is running (while synchronized) to avoid race
3046:                // condition where two conflicting merges from different
3047:                // threads, start
3048:                for (int i = 0; i < count; i++)
3049:                    mergingSegments.add(merge.segments.info(i));
3050:
3051:                // Merge is now registered
3052:                merge.registerDone = true;
3053:                return true;
3054:            }
3055:
3056:            /** Does initial setup for a merge, which is fast but holds
3057:             *  the synchronized lock on IndexWriter instance. */
3058:            final synchronized void mergeInit(MergePolicy.OneMerge merge)
3059:                    throws IOException {
3060:
3061:                assert merge.registerDone;
3062:
3063:                if (merge.isAborted())
3064:                    return;
3065:
3066:                final SegmentInfos sourceSegments = merge.segments;
3067:                final int end = sourceSegments.size();
3068:
3069:                ensureContiguousMerge(merge);
3070:
3071:                // Check whether this merge will allow us to skip
3072:                // merging the doc stores (stored field & vectors).
3073:                // This is a very substantial optimization (saves tons
3074:                // of IO) that can only be applied with
3075:                // autoCommit=false.
3076:
3077:                Directory lastDir = directory;
3078:                String lastDocStoreSegment = null;
3079:                int next = -1;
3080:
3081:                boolean mergeDocStores = false;
3082:                boolean doFlushDocStore = false;
3083:                final String currentDocStoreSegment = docWriter
3084:                        .getDocStoreSegment();
3085:
3086:                // Test each segment to be merged: check if we need to
3087:                // flush/merge doc stores
3088:                for (int i = 0; i < end; i++) {
3089:                    SegmentInfo si = sourceSegments.info(i);
3090:
3091:                    // If it has deletions we must merge the doc stores
3092:                    if (si.hasDeletions())
3093:                        mergeDocStores = true;
3094:
3095:                    // If it has its own (private) doc stores we must
3096:                    // merge the doc stores
3097:                    if (-1 == si.getDocStoreOffset())
3098:                        mergeDocStores = true;
3099:
3100:                    // If it has a different doc store segment than
3101:                    // previous segments, we must merge the doc stores
3102:                    String docStoreSegment = si.getDocStoreSegment();
3103:                    if (docStoreSegment == null)
3104:                        mergeDocStores = true;
3105:                    else if (lastDocStoreSegment == null)
3106:                        lastDocStoreSegment = docStoreSegment;
3107:                    else if (!lastDocStoreSegment.equals(docStoreSegment))
3108:                        mergeDocStores = true;
3109:
3110:                    // Segments' docScoreOffsets must be in-order,
3111:                    // contiguous.  For the default merge policy now
3112:                    // this will always be the case but for an arbitrary
3113:                    // merge policy this may not be the case
3114:                    if (-1 == next)
3115:                        next = si.getDocStoreOffset() + si.docCount;
3116:                    else if (next != si.getDocStoreOffset())
3117:                        mergeDocStores = true;
3118:                    else
3119:                        next = si.getDocStoreOffset() + si.docCount;
3120:
3121:                    // If the segment comes from a different directory
3122:                    // we must merge
3123:                    if (lastDir != si.dir)
3124:                        mergeDocStores = true;
3125:
3126:                    // If the segment is referencing the current "live"
3127:                    // doc store outputs then we must merge
3128:                    if (si.getDocStoreOffset() != -1
3129:                            && currentDocStoreSegment != null
3130:                            && si.getDocStoreSegment().equals(
3131:                                    currentDocStoreSegment))
3132:                        doFlushDocStore = true;
3133:                }
3134:
3135:                final int docStoreOffset;
3136:                final String docStoreSegment;
3137:                final boolean docStoreIsCompoundFile;
3138:
3139:                if (mergeDocStores) {
3140:                    docStoreOffset = -1;
3141:                    docStoreSegment = null;
3142:                    docStoreIsCompoundFile = false;
3143:                } else {
3144:                    SegmentInfo si = sourceSegments.info(0);
3145:                    docStoreOffset = si.getDocStoreOffset();
3146:                    docStoreSegment = si.getDocStoreSegment();
3147:                    docStoreIsCompoundFile = si.getDocStoreIsCompoundFile();
3148:                }
3149:
3150:                if (mergeDocStores && doFlushDocStore) {
3151:                    // SegmentMerger intends to merge the doc stores
3152:                    // (stored fields, vectors), and at least one of the
3153:                    // segments to be merged refers to the currently
3154:                    // live doc stores.
3155:
3156:                    // TODO: if we know we are about to merge away these
3157:                    // newly flushed doc store files then we should not
3158:                    // make compound file out of them...
3159:                    if (infoStream != null)
3160:                        message("flush at merge");
3161:                    flush(false, true);
3162:                }
3163:
3164:                // We must take a full copy at this point so that we can
3165:                // properly merge deletes in commitMerge()
3166:                merge.segmentsClone = (SegmentInfos) merge.segments.clone();
3167:
3168:                for (int i = 0; i < end; i++) {
3169:                    SegmentInfo si = merge.segmentsClone.info(i);
3170:
3171:                    // IncRef all files for this segment info to make sure
3172:                    // they are not removed while we are trying to merge.
3173:                    if (si.dir == directory)
3174:                        deleter.incRef(si.files());
3175:                }
3176:
3177:                merge.increfDone = true;
3178:
3179:                merge.mergeDocStores = mergeDocStores;
3180:
3181:                // Bind a new segment name here so even with
3182:                // ConcurrentMergePolicy we keep deterministic segment
3183:                // names.
3184:                merge.info = new SegmentInfo(newSegmentName(), 0, directory,
3185:                        false, true, docStoreOffset, docStoreSegment,
3186:                        docStoreIsCompoundFile);
3187:                // Also enroll the merged segment into mergingSegments;
3188:                // this prevents it from getting selected for a merge
3189:                // after our merge is done but while we are building the
3190:                // CFS:
3191:                mergingSegments.add(merge.info);
3192:            }
3193:
3194:            /** Does fininishing for a merge, which is fast but holds
3195:             *  the synchronized lock on IndexWriter instance. */
3196:            final synchronized void mergeFinish(MergePolicy.OneMerge merge)
3197:                    throws IOException {
3198:
3199:                if (merge.increfDone)
3200:                    decrefMergeSegments(merge);
3201:
3202:                assert merge.registerDone;
3203:
3204:                final SegmentInfos sourceSegments = merge.segments;
3205:                final int end = sourceSegments.size();
3206:                for (int i = 0; i < end; i++)
3207:                    mergingSegments.remove(sourceSegments.info(i));
3208:                mergingSegments.remove(merge.info);
3209:                merge.registerDone = false;
3210:            }
3211:
3212:            /** Does the actual (time-consuming) work of the merge,
3213:             *  but without holding synchronized lock on IndexWriter
3214:             *  instance */
3215:            final private int mergeMiddle(MergePolicy.OneMerge merge)
3216:                    throws CorruptIndexException, IOException {
3217:
3218:                merge.checkAborted(directory);
3219:
3220:                final String mergedName = merge.info.name;
3221:
3222:                SegmentMerger merger = null;
3223:
3224:                int mergedDocCount = 0;
3225:
3226:                SegmentInfos sourceSegments = merge.segments;
3227:                SegmentInfos sourceSegmentsClone = merge.segmentsClone;
3228:                final int numSegments = sourceSegments.size();
3229:
3230:                if (infoStream != null)
3231:                    message("merging " + merge.segString(directory));
3232:
3233:                merger = new SegmentMerger(this , mergedName, merge);
3234:
3235:                // This is try/finally to make sure merger's readers are
3236:                // closed:
3237:
3238:                boolean success = false;
3239:
3240:                try {
3241:                    int totDocCount = 0;
3242:
3243:                    for (int i = 0; i < numSegments; i++) {
3244:                        SegmentInfo si = sourceSegmentsClone.info(i);
3245:                        IndexReader reader = SegmentReader.get(si,
3246:                                MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet)
3247:                        merger.add(reader);
3248:                        totDocCount += reader.numDocs();
3249:                    }
3250:                    if (infoStream != null) {
3251:                        message("merge: total " + totDocCount + " docs");
3252:                    }
3253:
3254:                    merge.checkAborted(directory);
3255:
3256:                    mergedDocCount = merge.info.docCount = merger
3257:                            .merge(merge.mergeDocStores);
3258:
3259:                    assert mergedDocCount == totDocCount;
3260:
3261:                    success = true;
3262:
3263:                } finally {
3264:                    // close readers before we attempt to delete
3265:                    // now-obsolete segments
3266:                    if (merger != null) {
3267:                        merger.closeReaders();
3268:                    }
3269:                    if (!success) {
3270:                        if (infoStream != null)
3271:                            message("hit exception during merge; now refresh deleter on segment "
3272:                                    + mergedName);
3273:                        synchronized (this ) {
3274:                            addMergeException(merge);
3275:                            deleter.refresh(mergedName);
3276:                        }
3277:                    }
3278:                }
3279:
3280:                if (!commitMerge(merge))
3281:                    // commitMerge will return false if this merge was aborted
3282:                    return 0;
3283:
3284:                if (merge.useCompoundFile) {
3285:
3286:                    success = false;
3287:                    boolean skip = false;
3288:                    final String compoundFileName = mergedName + "."
3289:                            + IndexFileNames.COMPOUND_FILE_EXTENSION;
3290:
3291:                    try {
3292:                        try {
3293:                            merger.createCompoundFile(compoundFileName);
3294:                            success = true;
3295:                        } catch (IOException ioe) {
3296:                            synchronized (this ) {
3297:                                if (segmentInfos.indexOf(merge.info) == -1) {
3298:                                    // If another merge kicked in and merged our
3299:                                    // new segment away while we were trying to
3300:                                    // build the compound file, we can hit a
3301:                                    // FileNotFoundException and possibly
3302:                                    // IOException over NFS.  We can tell this has
3303:                                    // happened because our SegmentInfo is no
3304:                                    // longer in the segments; if this has
3305:                                    // happened it is safe to ignore the exception
3306:                                    // & skip finishing/committing our compound
3307:                                    // file creating.
3308:                                    if (infoStream != null)
3309:                                        message("hit exception creating compound file; ignoring it because our info (segment "
3310:                                                + merge.info.name
3311:                                                + ") has been merged away");
3312:                                    skip = true;
3313:                                } else
3314:                                    throw ioe;
3315:                            }
3316:                        }
3317:                    } finally {
3318:                        if (!success) {
3319:                            if (infoStream != null)
3320:                                message("hit exception creating compound file during merge: skip="
3321:                                        + skip);
3322:
3323:                            synchronized (this ) {
3324:                                if (!skip)
3325:                                    addMergeException(merge);
3326:                                deleter.deleteFile(compoundFileName);
3327:                            }
3328:                        }
3329:                    }
3330:
3331:                    if (!skip) {
3332:
3333:                        synchronized (this ) {
3334:                            if (skip || segmentInfos.indexOf(merge.info) == -1
3335:                                    || merge.isAborted()) {
3336:                                // Our segment (committed in non-compound
3337:                                // format) got merged away while we were
3338:                                // building the compound format.
3339:                                deleter.deleteFile(compoundFileName);
3340:                            } else {
3341:                                success = false;
3342:                                try {
3343:                                    merge.info.setUseCompoundFile(true);
3344:                                    checkpoint();
3345:                                    success = true;
3346:                                } finally {
3347:                                    if (!success) {
3348:                                        if (infoStream != null)
3349:                                            message("hit exception checkpointing compound file during merge");
3350:
3351:                                        // Must rollback:
3352:                                        addMergeException(merge);
3353:                                        merge.info.setUseCompoundFile(false);
3354:                                        deletePartialSegmentsFile();
3355:                                        deleter.deleteFile(compoundFileName);
3356:                                    }
3357:                                }
3358:
3359:                                // Give deleter a chance to remove files now.
3360:                                deleter.checkpoint(segmentInfos, autoCommit);
3361:                            }
3362:                        }
3363:                    }
3364:                }
3365:
3366:                return mergedDocCount;
3367:            }
3368:
3369:            synchronized void addMergeException(MergePolicy.OneMerge merge) {
3370:                if (!mergeExceptions.contains(merge)
3371:                        && mergeGen == merge.mergeGen)
3372:                    mergeExceptions.add(merge);
3373:            }
3374:
3375:            private void deletePartialSegmentsFile() throws IOException {
3376:                if (segmentInfos.getLastGeneration() != segmentInfos
3377:                        .getGeneration()) {
3378:                    String segmentFileName = IndexFileNames
3379:                            .fileNameFromGeneration(IndexFileNames.SEGMENTS,
3380:                                    "", segmentInfos.getGeneration());
3381:                    if (infoStream != null)
3382:                        message("now delete partial segments file \""
3383:                                + segmentFileName + "\"");
3384:
3385:                    deleter.deleteFile(segmentFileName);
3386:                }
3387:            }
3388:
3389:            // Called during flush to apply any buffered deletes.  If
3390:            // flushedNewSegment is true then a new segment was just
3391:            // created and flushed from the ram segments, so we will
3392:            // selectively apply the deletes to that new segment.
3393:            private final void applyDeletes(boolean flushedNewSegment)
3394:                    throws CorruptIndexException, IOException {
3395:
3396:                final HashMap bufferedDeleteTerms = docWriter
3397:                        .getBufferedDeleteTerms();
3398:                final List bufferedDeleteDocIDs = docWriter
3399:                        .getBufferedDeleteDocIDs();
3400:
3401:                if (infoStream != null)
3402:                    message("flush " + docWriter.getNumBufferedDeleteTerms()
3403:                            + " buffered deleted terms and "
3404:                            + bufferedDeleteDocIDs.size()
3405:                            + " deleted docIDs on " + segmentInfos.size()
3406:                            + " segments.");
3407:
3408:                if (flushedNewSegment) {
3409:                    IndexReader reader = null;
3410:                    try {
3411:                        // Open readers w/o opening the stored fields /
3412:                        // vectors because these files may still be held
3413:                        // open for writing by docWriter
3414:                        reader = SegmentReader.get(segmentInfos
3415:                                .info(segmentInfos.size() - 1), false);
3416:
3417:                        // Apply delete terms to the segment just flushed from ram
3418:                        // apply appropriately so that a delete term is only applied to
3419:                        // the documents buffered before it, not those buffered after it.
3420:                        applyDeletesSelectively(bufferedDeleteTerms,
3421:                                bufferedDeleteDocIDs, reader);
3422:                    } finally {
3423:                        if (reader != null) {
3424:                            try {
3425:                                reader.doCommit();
3426:                            } finally {
3427:                                reader.doClose();
3428:                            }
3429:                        }
3430:                    }
3431:                }
3432:
3433:                int infosEnd = segmentInfos.size();
3434:                if (flushedNewSegment) {
3435:                    infosEnd--;
3436:                }
3437:
3438:                for (int i = 0; i < infosEnd; i++) {
3439:                    IndexReader reader = null;
3440:                    try {
3441:                        reader = SegmentReader.get(segmentInfos.info(i), false);
3442:
3443:                        // Apply delete terms to disk segments
3444:                        // except the one just flushed from ram.
3445:                        applyDeletes(bufferedDeleteTerms, reader);
3446:                    } finally {
3447:                        if (reader != null) {
3448:                            try {
3449:                                reader.doCommit();
3450:                            } finally {
3451:                                reader.doClose();
3452:                            }
3453:                        }
3454:                    }
3455:                }
3456:
3457:                // Clean up bufferedDeleteTerms.
3458:                docWriter.clearBufferedDeletes();
3459:            }
3460:
3461:            // For test purposes.
3462:            final synchronized int getBufferedDeleteTermsSize() {
3463:                return docWriter.getBufferedDeleteTerms().size();
3464:            }
3465:
3466:            // For test purposes.
3467:            final synchronized int getNumBufferedDeleteTerms() {
3468:                return docWriter.getNumBufferedDeleteTerms();
3469:            }
3470:
3471:            // Apply buffered delete terms to the segment just flushed from ram
3472:            // apply appropriately so that a delete term is only applied to
3473:            // the documents buffered before it, not those buffered after it.
3474:            private final void applyDeletesSelectively(HashMap deleteTerms,
3475:                    List deleteIds, IndexReader reader)
3476:                    throws CorruptIndexException, IOException {
3477:                Iterator iter = deleteTerms.entrySet().iterator();
3478:                while (iter.hasNext()) {
3479:                    Entry entry = (Entry) iter.next();
3480:                    Term term = (Term) entry.getKey();
3481:
3482:                    TermDocs docs = reader.termDocs(term);
3483:                    if (docs != null) {
3484:                        int num = ((DocumentsWriter.Num) entry.getValue())
3485:                                .getNum();
3486:                        try {
3487:                            while (docs.next()) {
3488:                                int doc = docs.doc();
3489:                                if (doc >= num) {
3490:                                    break;
3491:                                }
3492:                                reader.deleteDocument(doc);
3493:                            }
3494:                        } finally {
3495:                            docs.close();
3496:                        }
3497:                    }
3498:                }
3499:
3500:                if (deleteIds.size() > 0) {
3501:                    iter = deleteIds.iterator();
3502:                    while (iter.hasNext())
3503:                        reader.deleteDocument(((Integer) iter.next())
3504:                                .intValue());
3505:                }
3506:            }
3507:
3508:            // Apply buffered delete terms to this reader.
3509:            private final void applyDeletes(HashMap deleteTerms,
3510:                    IndexReader reader) throws CorruptIndexException,
3511:                    IOException {
3512:                Iterator iter = deleteTerms.entrySet().iterator();
3513:                while (iter.hasNext()) {
3514:                    Entry entry = (Entry) iter.next();
3515:                    reader.deleteDocuments((Term) entry.getKey());
3516:                }
3517:            }
3518:
3519:            // utility routines for tests
3520:            SegmentInfo newestSegment() {
3521:                return segmentInfos.info(segmentInfos.size() - 1);
3522:            }
3523:
3524:            public synchronized String segString() {
3525:                StringBuffer buffer = new StringBuffer();
3526:                for (int i = 0; i < segmentInfos.size(); i++) {
3527:                    if (i > 0) {
3528:                        buffer.append(' ');
3529:                    }
3530:                    buffer.append(segmentInfos.info(i).segString(directory));
3531:                }
3532:
3533:                return buffer.toString();
3534:            }
3535:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.