Source Code Cross Referenced for LogMergePolicy.java in » Search-Engine » lucene » org » apache » lucene » index » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation

1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI

Java

Java Tutorial

Illustrator Tutorials

GIMP Tutorials

C# / C Sharp

C# / CSharp Tutorial

C# / CSharp Open Source

SQL Server / T-SQL Tutorial

Oracle PL / SQL

Oracle PL/SQL Tutorial

Flash / Flex / ActionScript

VBA / Excel / Access / Word

XML

XML Tutorial

Microsoft Office PowerPoint 2007 Tutorial

Microsoft Office Excel 2007 Tutorial

Microsoft Office Word 2007 Tutorial

Java Source Code / Java Documentation » Search Engine » lucene » org.apache.lucene.index

Source Cross Referenced Class Diagram Java Document (Java Doc)

001:        package org.apache.lucene.index;
002:
003:        /**
004:         * Licensed to the Apache Software Foundation (ASF) under one or more
005:         * contributor license agreements.  See the NOTICE file distributed with
006:         * this work for additional information regarding copyright ownership.
007:         * The ASF licenses this file to You under the Apache License, Version 2.0
008:         * (the "License"); you may not use this file except in compliance with
009:         * the License.  You may obtain a copy of the License at
010:         *
011:         *     http://www.apache.org/licenses/LICENSE-2.0
012:         *
013:         * Unless required by applicable law or agreed to in writing, software
014:         * distributed under the License is distributed on an "AS IS" BASIS,
015:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016:         * See the License for the specific language governing permissions and
017:         * limitations under the License.
018:         */
019:
020:        import java.io.IOException;
021:        import java.util.Set;
022:
023:        import org.apache.lucene.store.Directory;
024:
025:        /** <p>This class implements a {@link MergePolicy} that tries
026:         *  to merge segments into levels of exponentially
027:         *  increasing size, where each level has < mergeFactor
028:         *  segments in it.  Whenever a given levle has mergeFactor
029:         *  segments or more in it, they will be merged.</p>
030:         *
031:         * <p>This class is abstract and requires a subclass to
032:         * define the {@link #size} method which specifies how a
033:         * segment's size is determined.  {@link LogDocMergePolicy}
034:         * is one subclass that measures size by document count in
035:         * the segment.  {@link LogByteSizeMergePolicy} is another
036:         * subclass that measures size as the total byte size of the
037:         * file(s) for the segment.</p>
038:         */
039:
040:        public abstract class LogMergePolicy extends MergePolicy {
041:
042:            /** Defines the allowed range of log(size) for each
043:             *  level.  A level is computed by taking the max segment
044:             *  log size, minuse LEVEL_LOG_SPAN, and finding all
045:             *  segments falling within that range. */
046:            public static final double LEVEL_LOG_SPAN = 0.75;
047:
048:            /** Default merge factor, which is how many segments are
049:             *  merged at a time */
050:            public static final int DEFAULT_MERGE_FACTOR = 10;
051:
052:            /** Default maximum segment size.  A segment of this size
053:             *  or larger will never be merged.  @see setMaxMergeDocs */
054:            public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
055:
056:            private int mergeFactor = DEFAULT_MERGE_FACTOR;
057:
058:            long minMergeSize;
059:            long maxMergeSize;
060:            int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
061:
062:            private boolean useCompoundFile = true;
063:            private boolean useCompoundDocStore = true;
064:            private IndexWriter writer;
065:
066:            private void message(String message) {
067:                if (writer != null)
068:                    writer.message("LMP: " + message);
069:            }
070:
071:            /** <p>Returns the number of segments that are merged at
072:             * once and also controls the total number of segments
073:             * allowed to accumulate in the index.</p> */
074:            public int getMergeFactor() {
075:                return mergeFactor;
076:            }
077:
078:            /** Determines how often segment indices are merged by
079:             * addDocument().  With smaller values, less RAM is used
080:             * while indexing, and searches on unoptimized indices are
081:             * faster, but indexing speed is slower.  With larger
082:             * values, more RAM is used during indexing, and while
083:             * searches on unoptimized indices are slower, indexing is
084:             * faster.  Thus larger values (> 10) are best for batch
085:             * index creation, and smaller values (< 10) for indices
086:             * that are interactively maintained. */
087:            public void setMergeFactor(int mergeFactor) {
088:                if (mergeFactor < 2)
089:                    throw new IllegalArgumentException(
090:                            "mergeFactor cannot be less than 2");
091:                this .mergeFactor = mergeFactor;
092:            }
093:
094:            // Javadoc inherited
095:            public boolean useCompoundFile(SegmentInfos infos, SegmentInfo info) {
096:                return useCompoundFile;
097:            }
098:
099:            /** Sets whether compound file format should be used for
100:             *  newly flushed and newly merged segments. */
101:            public void setUseCompoundFile(boolean useCompoundFile) {
102:                this .useCompoundFile = useCompoundFile;
103:            }
104:
105:            /** Returns true if newly flushed and newly merge segments
106:             *  are written in compound file format. @see
107:             *  #setUseCompoundFile */
108:            public boolean getUseCompoundFile() {
109:                return useCompoundFile;
110:            }
111:
112:            // Javadoc inherited
113:            public boolean useCompoundDocStore(SegmentInfos infos) {
114:                return useCompoundDocStore;
115:            }
116:
117:            /** Sets whether compound file format should be used for
118:             *  newly flushed and newly merged doc store
119:             *  segment files (term vectors and stored fields). */
120:            public void setUseCompoundDocStore(boolean useCompoundDocStore) {
121:                this .useCompoundDocStore = useCompoundDocStore;
122:            }
123:
124:            /** Returns true if newly flushed and newly merge doc
125:             *  store segment files (term vectors and stored fields)
126:             *  are written in compound file format. @see
127:             *  #setUseCompoundDocStore */
128:            public boolean getUseCompoundDocStore() {
129:                return useCompoundDocStore;
130:            }
131:
132:            public void close() {
133:            }
134:
135:            abstract protected long size(SegmentInfo info) throws IOException;
136:
137:            private boolean isOptimized(SegmentInfos infos, IndexWriter writer,
138:                    int maxNumSegments, Set segmentsToOptimize)
139:                    throws IOException {
140:                final int numSegments = infos.size();
141:                int numToOptimize = 0;
142:                SegmentInfo optimizeInfo = null;
143:                for (int i = 0; i < numSegments
144:                        && numToOptimize <= maxNumSegments; i++) {
145:                    final SegmentInfo info = infos.info(i);
146:                    if (segmentsToOptimize.contains(info)) {
147:                        numToOptimize++;
148:                        optimizeInfo = info;
149:                    }
150:                }
151:
152:                return numToOptimize <= maxNumSegments
153:                        && (numToOptimize != 1 || isOptimized(writer,
154:                                optimizeInfo));
155:            }
156:
157:            /** Returns true if this single nfo is optimized (has no
158:             *  pending norms or deletes, is in the same dir as the
159:             *  writer, and matches the current compound file setting */
160:            private boolean isOptimized(IndexWriter writer, SegmentInfo info)
161:                    throws IOException {
162:                return !info.hasDeletions() && !info.hasSeparateNorms()
163:                        && info.dir == writer.getDirectory()
164:                        && info.getUseCompoundFile() == useCompoundFile;
165:            }
166:
167:            /** Returns the merges necessary to optimize the index.
168:             *  This merge policy defines "optimized" to mean only one
169:             *  segment in the index, where that segment has no
170:             *  deletions pending nor separate norms, and it is in
171:             *  compound file format if the current useCompoundFile
172:             *  setting is true.  This method returns multiple merges
173:             *  (mergeFactor at a time) so the {@link MergeScheduler}
174:             *  in use may make use of concurrency. */
175:            public MergeSpecification findMergesForOptimize(SegmentInfos infos,
176:                    IndexWriter writer, int maxNumSegments,
177:                    Set segmentsToOptimize) throws IOException {
178:                MergeSpecification spec;
179:
180:                assert maxNumSegments > 0;
181:
182:                if (!isOptimized(infos, writer, maxNumSegments,
183:                        segmentsToOptimize)) {
184:
185:                    // Find the newest (rightmost) segment that needs to
186:                    // be optimized (other segments may have been flushed
187:                    // since optimize started):
188:                    int last = infos.size();
189:                    while (last > 0) {
190:                        final SegmentInfo info = infos.info(--last);
191:                        if (segmentsToOptimize.contains(info)) {
192:                            last++;
193:                            break;
194:                        }
195:                    }
196:
197:                    if (last > 0) {
198:
199:                        spec = new MergeSpecification();
200:
201:                        // First, enroll all "full" merges (size
202:                        // mergeFactor) to potentially be run concurrently:
203:                        while (last - maxNumSegments + 1 >= mergeFactor) {
204:                            spec.add(new OneMerge(infos.range(last
205:                                    - mergeFactor, last), useCompoundFile));
206:                            last -= mergeFactor;
207:                        }
208:
209:                        // Only if there are no full merges pending do we
210:                        // add a final partial (< mergeFactor segments) merge:
211:                        if (0 == spec.merges.size()) {
212:                            if (maxNumSegments == 1) {
213:
214:                                // Since we must optimize down to 1 segment, the
215:                                // choice is simple:
216:                                if (last > 1
217:                                        || !isOptimized(writer, infos.info(0)))
218:                                    spec.add(new OneMerge(infos.range(0, last),
219:                                            useCompoundFile));
220:                            } else if (last > maxNumSegments) {
221:
222:                                // Take care to pick a partial merge that is
223:                                // least cost, but does not make the index too
224:                                // lopsided.  If we always just picked the
225:                                // partial tail then we could produce a highly
226:                                // lopsided index over time:
227:
228:                                // We must merge this many segments to leave
229:                                // maxNumSegments in the index (from when
230:                                // optimize was first kicked off):
231:                                final int finalMergeSize = last
232:                                        - maxNumSegments + 1;
233:
234:                                // Consider all possible starting points:
235:                                long bestSize = 0;
236:                                int bestStart = 0;
237:
238:                                for (int i = 0; i < last - finalMergeSize + 1; i++) {
239:                                    long sumSize = 0;
240:                                    for (int j = 0; j < finalMergeSize; j++)
241:                                        sumSize += size(infos.info(j + i));
242:                                    if (i == 0
243:                                            || (sumSize < 2 * size(infos
244:                                                    .info(i - 1)) && sumSize < bestSize)) {
245:                                        bestStart = i;
246:                                        bestSize = sumSize;
247:                                    }
248:                                }
249:
250:                                spec.add(new OneMerge(infos.range(bestStart,
251:                                        bestStart + finalMergeSize),
252:                                        useCompoundFile));
253:                            }
254:                        }
255:
256:                    } else
257:                        spec = null;
258:                } else
259:                    spec = null;
260:
261:                return spec;
262:            }
263:
264:            /** Checks if any merges are now necessary and returns a
265:             *  {@link MergePolicy.MergeSpecification} if so.  A merge
266:             *  is necessary when there are more than {@link
267:             *  #setMergeFactor} segments at a given level.  When
268:             *  multiple levels have too many segments, this method
269:             *  will return multiple merges, allowing the {@link
270:             *  MergeScheduler} to use concurrency. */
271:            public MergeSpecification findMerges(SegmentInfos infos,
272:                    IndexWriter writer) throws IOException {
273:
274:                final int numSegments = infos.size();
275:                this .writer = writer;
276:                message("findMerges: " + numSegments + " segments");
277:
278:                // Compute levels, which is just log (base mergeFactor)
279:                // of the size of each segment
280:                float[] levels = new float[numSegments];
281:                final float norm = (float) Math.log(mergeFactor);
282:
283:                final Directory directory = writer.getDirectory();
284:
285:                for (int i = 0; i < numSegments; i++) {
286:                    final SegmentInfo info = infos.info(i);
287:                    long size = size(info);
288:
289:                    // Refuse to import a segment that's too large
290:                    if (info.docCount > maxMergeDocs && info.dir != directory)
291:                        throw new IllegalArgumentException(
292:                                "Segment is too large (" + info.docCount
293:                                        + " docs vs max docs " + maxMergeDocs
294:                                        + ")");
295:
296:                    if (size >= maxMergeSize && info.dir != directory)
297:                        throw new IllegalArgumentException(
298:                                "Segment is too large (" + size
299:                                        + " vs max size " + maxMergeSize + ")");
300:
301:                    // Floor tiny segments
302:                    if (size < 1)
303:                        size = 1;
304:                    levels[i] = (float) Math.log(size) / norm;
305:                }
306:
307:                final float levelFloor;
308:                if (minMergeSize <= 0)
309:                    levelFloor = (float) 0.0;
310:                else
311:                    levelFloor = (float) (Math.log(minMergeSize) / norm);
312:
313:                // Now, we quantize the log values into levels.  The
314:                // first level is any segment whose log size is within
315:                // LEVEL_LOG_SPAN of the max size, or, who has such as
316:                // segment "to the right".  Then, we find the max of all
317:                // other segments and use that to define the next level
318:                // segment, etc.
319:
320:                MergeSpecification spec = null;
321:
322:                int start = 0;
323:                while (start < numSegments) {
324:
325:                    // Find max level of all segments not already
326:                    // quantized.
327:                    float maxLevel = levels[start];
328:                    for (int i = 1 + start; i < numSegments; i++) {
329:                        final float level = levels[i];
330:                        if (level > maxLevel)
331:                            maxLevel = level;
332:                    }
333:
334:                    // Now search backwards for the rightmost segment that
335:                    // falls into this level:
336:                    float levelBottom;
337:                    if (maxLevel < levelFloor)
338:                        // All remaining segments fall into the min level
339:                        levelBottom = -1.0F;
340:                    else {
341:                        levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);
342:
343:                        // Force a boundary at the level floor
344:                        if (levelBottom < levelFloor && maxLevel >= levelFloor)
345:                            levelBottom = levelFloor;
346:                    }
347:
348:                    int upto = numSegments - 1;
349:                    while (upto >= start) {
350:                        if (levels[upto] >= levelBottom) {
351:                            break;
352:                        }
353:                        upto--;
354:                    }
355:                    message("  level " + levelBottom + " to " + maxLevel + ": "
356:                            + (1 + upto - start) + " segments");
357:
358:                    // Finally, record all merges that are viable at this level:
359:                    int end = start + mergeFactor;
360:                    while (end <= 1 + upto) {
361:                        boolean anyTooLarge = false;
362:                        for (int i = start; i < end; i++) {
363:                            final SegmentInfo info = infos.info(i);
364:                            anyTooLarge |= (size(info) >= maxMergeSize || info.docCount >= maxMergeDocs);
365:                        }
366:
367:                        if (!anyTooLarge) {
368:                            if (spec == null)
369:                                spec = new MergeSpecification();
370:                            message("    " + start + " to " + end
371:                                    + ": add this merge");
372:                            spec.add(new OneMerge(infos.range(start, end),
373:                                    useCompoundFile));
374:                        } else
375:                            message("    "
376:                                    + start
377:                                    + " to "
378:                                    + end
379:                                    + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
380:
381:                        start = end;
382:                        end = start + mergeFactor;
383:                    }
384:
385:                    start = 1 + upto;
386:                }
387:
388:                return spec;
389:            }
390:
391:            /** <p>Determines the largest segment (measured by
392:             * document count) that may be merged with other segments.
393:             * Small values (e.g., less than 10,000) are best for
394:             * interactive indexing, as this limits the length of
395:             * pauses while indexing to a few seconds.  Larger values
396:             * are best for batched indexing and speedier
397:             * searches.</p>
398:             *
399:             * <p>The default value is {@link Integer#MAX_VALUE}.</p>
400:             *
401:             * <p>The default merge policy ({@link
402:             * LogByteSizeMergePolicy}) also allows you to set this
403:             * limit by net size (in MB) of the segment, using {@link
404:             * LogByteSizeMergePolicy#setMaxMergeMB}.</p>
405:             */
406:            public void setMaxMergeDocs(int maxMergeDocs) {
407:                this .maxMergeDocs = maxMergeDocs;
408:            }
409:
410:            /** Returns the largest segment (measured by document
411:             *  count) that may be merged with other segments.
412:             *  @see #setMaxMergeDocs */
413:            public int getMaxMergeDocs() {
414:                return maxMergeDocs;
415:            }
416:
417:        }

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.