001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.store.Directory;
021:
022: import java.io.IOException;
023: import java.util.List;
024: import java.util.ArrayList;
025: import java.util.Set;
026:
027: /**
028: * <p>Expert: a MergePolicy determines the sequence of
029: * primitive merge operations to be used for overall merge
030: * and optimize operations.</p>
031: *
032: * <p>Whenever the segments in an index have been altered by
033: * {@link IndexWriter}, either the addition of a newly
034: * flushed segment, addition of many segments from
035: * addIndexes* calls, or a previous merge that may now need
036: * to cascade, {@link IndexWriter} invokes {@link
037: * #findMerges} to give the MergePolicy a chance to pick
038: * merges that are now required. This method returns a
039: * {@link MergeSpecification} instance describing the set of
040: * merges that should be done, or null if no merges are
041: * necessary. When IndexWriter.optimize is called, it calls
042: * {@link #findMergesForOptimize} and the MergePolicy should
043: * then return the necessary merges.</p>
044: *
045: * <p>Note that the policy can return more than one merge at
046: * a time. In this case, if the writer is using {@link
047: * SerialMergeScheduler}, the merges will be run
048: * sequentially but if it is using {@link
049: * ConcurrentMergeScheduler} they will be run concurrently.</p>
050: *
051: * <p>The default MergePolicy is {@link
052: * LogByteSizeMergePolicy}.</p>
053: * <p><b>NOTE:</b> This API is new and still experimental
054: * (subject to change suddenly in the next release)</p>
055: */
056:
057: public abstract class MergePolicy {
058:
059: /** OneMerge provides the information necessary to perform
060: * an individual primitive merge operation, resulting in
061: * a single new segment. The merge spec includes the
062: * subset of segments to be merged as well as whether the
063: * new segment should use the compound file format. */
064:
065: public static class OneMerge {
066:
067: SegmentInfo info; // used by IndexWriter
068: boolean mergeDocStores; // used by IndexWriter
069: boolean optimize; // used by IndexWriter
070: SegmentInfos segmentsClone; // used by IndexWriter
071: boolean increfDone; // used by IndexWriter
072: boolean registerDone; // used by IndexWriter
073: long mergeGen; // used by IndexWriter
074: boolean isExternal; // used by IndexWriter
075: int maxNumSegmentsOptimize; // used by IndexWriter
076:
077: final SegmentInfos segments;
078: final boolean useCompoundFile;
079: boolean aborted;
080: Throwable error;
081:
082: public OneMerge(SegmentInfos segments, boolean useCompoundFile) {
083: if (0 == segments.size())
084: throw new RuntimeException(
085: "segments must include at least one segment");
086: this .segments = segments;
087: this .useCompoundFile = useCompoundFile;
088: }
089:
090: /** Record that an exception occurred while executing
091: * this merge */
092: synchronized void setException(Throwable error) {
093: this .error = error;
094: }
095:
096: /** Retrieve previous exception set by {@link
097: * #setException}. */
098: synchronized Throwable getException() {
099: return error;
100: }
101:
102: /** Mark this merge as aborted. If this is called
103: * before the merge is committed then the merge will
104: * not be committed. */
105: synchronized void abort() {
106: aborted = true;
107: }
108:
109: /** Returns true if this merge was aborted. */
110: synchronized boolean isAborted() {
111: return aborted;
112: }
113:
114: synchronized void checkAborted(Directory dir)
115: throws MergeAbortedException {
116: if (aborted)
117: throw new MergeAbortedException("merge is aborted: "
118: + segString(dir));
119: }
120:
121: String segString(Directory dir) {
122: StringBuffer b = new StringBuffer();
123: final int numSegments = segments.size();
124: for (int i = 0; i < numSegments; i++) {
125: if (i > 0)
126: b.append(" ");
127: b.append(segments.info(i).segString(dir));
128: }
129: if (info != null)
130: b.append(" into ").append(info.name);
131: if (optimize)
132: b.append(" [optimize]");
133: return b.toString();
134: }
135: }
136:
137: /**
138: * A MergeSpecification instance provides the information
139: * necessary to perform multiple merges. It simply
140: * contains a list of {@link OneMerge} instances.
141: */
142:
143: public static class MergeSpecification {
144:
145: /**
146: * The subset of segments to be included in the primitive merge.
147: */
148:
149: public List merges = new ArrayList();
150:
151: public void add(OneMerge merge) {
152: merges.add(merge);
153: }
154:
155: public String segString(Directory dir) {
156: StringBuffer b = new StringBuffer();
157: b.append("MergeSpec:\n");
158: final int count = merges.size();
159: for (int i = 0; i < count; i++)
160: b.append(" ").append(1 + i).append(": ").append(
161: ((OneMerge) merges.get(i)).segString(dir));
162: return b.toString();
163: }
164: }
165:
166: /** Exception thrown if there are any problems while
167: * executing a merge. */
168: public static class MergeException extends RuntimeException {
169: public MergeException(String message) {
170: super (message);
171: }
172:
173: public MergeException(Throwable exc) {
174: super (exc);
175: }
176: }
177:
178: public static class MergeAbortedException extends IOException {
179: public MergeAbortedException() {
180: super ("merge is aborted");
181: }
182:
183: public MergeAbortedException(String message) {
184: super (message);
185: }
186: }
187:
188: /**
189: * Determine what set of merge operations are now
190: * necessary on the index. The IndexWriter calls this
191: * whenever there is a change to the segments. This call
192: * is always synchronized on the IndexWriter instance so
193: * only one thread at a time will call this method.
194: *
195: * @param segmentInfos the total set of segments in the index
196: * @param writer IndexWriter instance
197: */
198: abstract MergeSpecification findMerges(SegmentInfos segmentInfos,
199: IndexWriter writer) throws CorruptIndexException,
200: IOException;
201:
202: /**
203: * Determine what set of merge operations are necessary in
204: * order to optimize the index. The IndexWriter calls
205: * this when its optimize() method is called. This call
206: * is always synchronized on the IndexWriter instance so
207: * only one thread at a time will call this method.
208: *
209: * @param segmentInfos the total set of segments in the index
210: * @param writer IndexWriter instance
211: * @param maxSegmentCount requested maximum number of
212: * segments in the index (currently this is always 1)
213: * @param segmentsToOptimize contains the specific
214: * SegmentInfo instances that must be merged away. This
215: * may be a subset of all SegmentInfos.
216: */
217: abstract MergeSpecification findMergesForOptimize(
218: SegmentInfos segmentInfos, IndexWriter writer,
219: int maxSegmentCount, Set segmentsToOptimize)
220: throws CorruptIndexException, IOException;
221:
222: /**
223: * Release all resources for the policy.
224: */
225: abstract void close();
226:
227: /**
228: * Returns true if a newly flushed (not from merge)
229: * segment should use the compound file format.
230: */
231: abstract boolean useCompoundFile(SegmentInfos segments,
232: SegmentInfo newSegment);
233:
234: /**
235: * Returns true if the doc store files should use the
236: * compound file format.
237: */
238: abstract boolean useCompoundDocStore(SegmentInfos segments);
239: }
|