Source Code Cross Referenced for CheckIndex.java in » Net » lucene-connector » org » apache » lucene » index » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Net » lucene connector » org.apache.lucene.index
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        package org.apache.lucene.index;
002:
003:        /**
004:         * Licensed to the Apache Software Foundation (ASF) under one or more
005:         * contributor license agreements.  See the NOTICE file distributed with
006:         * this work for additional information regarding copyright ownership.
007:         * The ASF licenses this file to You under the Apache License, Version 2.0
008:         * (the "License"); you may not use this file except in compliance with
009:         * the License.  You may obtain a copy of the License at
010:         *
011:         *     http://www.apache.org/licenses/LICENSE-2.0
012:         *
013:         * Unless required by applicable law or agreed to in writing, software
014:         * distributed under the License is distributed on an "AS IS" BASIS,
015:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016:         * See the License for the specific language governing permissions and
017:         * limitations under the License.
018:         */
019:
020:        import org.apache.lucene.store.FSDirectory;
021:        import org.apache.lucene.store.Directory;
022:        import org.apache.lucene.store.IndexInput;
023:        import org.apache.lucene.document.Document;
024:
025:        import java.text.NumberFormat;
026:        import java.io.PrintStream;
027:        import java.io.IOException;
028:        import java.util.Collection;
029:        import java.util.Iterator;
030:
031:        /**
032:         * Basic tool to check the health of an index and write a
033:         * new segments file that removes reference to problematic
034:         * segments.  There are many more checks that this tool
035:         * could do but does not yet, eg: reconstructing a segments
036:         * file by looking for all loadable segments (if no segments
037:         * file is found), removing specifically specified segments,
038:         * listing files that exist but are not referenced, etc.
039:         */
040:
041:        public class CheckIndex {
042:
043:            public static PrintStream out = System.out;
044:
045:            private static class MySegmentTermDocs extends SegmentTermDocs {
046:
047:                int delCount;
048:
049:                MySegmentTermDocs(SegmentReader p) {
050:                    super (p);
051:                }
052:
053:                public void seek(Term term) throws IOException {
054:                    super .seek(term);
055:                    delCount = 0;
056:                }
057:
058:                protected void skippingDoc() throws IOException {
059:                    delCount++;
060:                }
061:            }
062:
063:            /** Returns true if index is clean, else false.*/
064:            public static boolean check(Directory dir, boolean doFix)
065:                    throws IOException {
066:                NumberFormat nf = NumberFormat.getInstance();
067:                SegmentInfos sis = new SegmentInfos();
068:
069:                try {
070:                    sis.read(dir);
071:                } catch (Throwable t) {
072:                    out
073:                            .println("ERROR: could not read any segments file in directory");
074:                    t.printStackTrace(out);
075:                    return false;
076:                }
077:
078:                final int numSegments = sis.size();
079:                final String segmentsFileName = sis.getCurrentSegmentFileName();
080:                IndexInput input = null;
081:                try {
082:                    input = dir.openInput(segmentsFileName);
083:                } catch (Throwable t) {
084:                    out
085:                            .println("ERROR: could not open segments file in directory");
086:                    t.printStackTrace(out);
087:                    return false;
088:                }
089:                int format = 0;
090:                try {
091:                    format = input.readInt();
092:                } catch (Throwable t) {
093:                    out
094:                            .println("ERROR: could not read segment file version in directory");
095:                    t.printStackTrace(out);
096:                    return false;
097:                } finally {
098:                    if (input != null)
099:                        input.close();
100:                }
101:
102:                String sFormat = "";
103:                boolean skip = false;
104:
105:                if (format == SegmentInfos.FORMAT)
106:                    sFormat = "FORMAT [Lucene Pre-2.1]";
107:                if (format == SegmentInfos.FORMAT_LOCKLESS)
108:                    sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
109:                else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
110:                    sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
111:                else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
112:                    sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
113:                else if (format < SegmentInfos.FORMAT_SHARED_DOC_STORE) {
114:                    sFormat = "int=" + format
115:                            + " [newer version of Lucene than this tool]";
116:                    skip = true;
117:                } else {
118:                    sFormat = format + " [Lucene 1.3 or prior]";
119:                }
120:
121:                out
122:                        .println("Segments file=" + segmentsFileName
123:                                + " numSegments=" + numSegments + " version="
124:                                + sFormat);
125:
126:                if (skip) {
127:                    out
128:                            .println("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
129:                    return false;
130:                }
131:
132:                SegmentInfos newSIS = (SegmentInfos) sis.clone();
133:                newSIS.clear();
134:                boolean changed = false;
135:                int totLoseDocCount = 0;
136:                int numBadSegments = 0;
137:                for (int i = 0; i < numSegments; i++) {
138:                    final SegmentInfo info = sis.info(i);
139:                    out.println("  " + (1 + i) + " of " + numSegments
140:                            + ": name=" + info.name + " docCount="
141:                            + info.docCount);
142:                    int toLoseDocCount = info.docCount;
143:
144:                    SegmentReader reader = null;
145:
146:                    try {
147:                        out
148:                                .println("    compound="
149:                                        + info.getUseCompoundFile());
150:                        out.println("    numFiles=" + info.files().size());
151:                        out.println("    size (MB)="
152:                                + nf.format(info.sizeInBytes()
153:                                        / (1024. * 1024.)));
154:                        final int docStoreOffset = info.getDocStoreOffset();
155:                        if (docStoreOffset != -1) {
156:                            out.println("    docStoreOffset=" + docStoreOffset);
157:                            out.println("    docStoreSegment="
158:                                    + info.getDocStoreSegment());
159:                            out.println("    docStoreIsCompoundFile="
160:                                    + info.getDocStoreIsCompoundFile());
161:                        }
162:                        final String delFileName = info.getDelFileName();
163:                        if (delFileName == null)
164:                            out.println("    no deletions");
165:                        else
166:                            out.println("    has deletions [delFileName="
167:                                    + delFileName + "]");
168:                        out.print("    test: open reader.........");
169:                        reader = SegmentReader.get(info);
170:                        final int numDocs = reader.numDocs();
171:                        toLoseDocCount = numDocs;
172:                        if (reader.hasDeletions())
173:                            out.println("OK [" + (info.docCount - numDocs)
174:                                    + " deleted docs]");
175:                        else
176:                            out.println("OK");
177:
178:                        out.print("    test: fields, norms.......");
179:                        Collection fieldNames = reader
180:                                .getFieldNames(IndexReader.FieldOption.ALL);
181:                        Iterator it = fieldNames.iterator();
182:                        while (it.hasNext()) {
183:                            final String fieldName = (String) it.next();
184:                            byte[] b = reader.norms(fieldName);
185:                            if (b.length != info.docCount)
186:                                throw new RuntimeException("norms for field \""
187:                                        + fieldName + "\" is length "
188:                                        + b.length + " != maxDoc "
189:                                        + info.docCount);
190:
191:                        }
192:                        out.println("OK [" + fieldNames.size() + " fields]");
193:
194:                        out.print("    test: terms, freq, prox...");
195:                        final TermEnum termEnum = reader.terms();
196:                        final TermPositions termPositions = reader
197:                                .termPositions();
198:
199:                        // Used only to count up # deleted docs for this
200:                        // term
201:                        final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(
202:                                reader);
203:
204:                        long termCount = 0;
205:                        long totFreq = 0;
206:                        long totPos = 0;
207:                        while (termEnum.next()) {
208:                            termCount++;
209:                            final Term term = termEnum.term();
210:                            final int docFreq = termEnum.docFreq();
211:                            termPositions.seek(term);
212:                            int lastDoc = -1;
213:                            int freq0 = 0;
214:                            totFreq += docFreq;
215:                            while (termPositions.next()) {
216:                                freq0++;
217:                                final int doc = termPositions.doc();
218:                                final int freq = termPositions.freq();
219:                                if (doc <= lastDoc)
220:                                    throw new RuntimeException("term " + term
221:                                            + ": doc " + doc + " < lastDoc "
222:                                            + lastDoc);
223:                                lastDoc = doc;
224:                                if (freq <= 0)
225:                                    throw new RuntimeException("term " + term
226:                                            + ": doc " + doc + ": freq " + freq
227:                                            + " is out of bounds");
228:
229:                                int lastPos = -1;
230:                                totPos += freq;
231:                                for (int j = 0; j < freq; j++) {
232:                                    final int pos = termPositions
233:                                            .nextPosition();
234:                                    if (pos < 0)
235:                                        throw new RuntimeException("term "
236:                                                + term + ": doc " + doc
237:                                                + ": pos " + pos
238:                                                + " is out of bounds");
239:                                    if (pos <= lastPos)
240:                                        throw new RuntimeException("term "
241:                                                + term + ": doc " + doc
242:                                                + ": pos " + pos
243:                                                + " < lastPos " + lastPos);
244:                                }
245:                            }
246:
247:                            // Now count how many deleted docs occurred in
248:                            // this term:
249:                            final int delCount;
250:                            if (reader.hasDeletions()) {
251:                                myTermDocs.seek(term);
252:                                while (myTermDocs.next()) {
253:                                }
254:                                delCount = myTermDocs.delCount;
255:                            } else
256:                                delCount = 0;
257:
258:                            if (freq0 + delCount != docFreq)
259:                                throw new RuntimeException("term " + term
260:                                        + " docFreq=" + docFreq
261:                                        + " != num docs seen " + freq0
262:                                        + " + num docs deleted " + delCount);
263:                        }
264:
265:                        out.println("OK [" + termCount + " terms; " + totFreq
266:                                + " terms/docs pairs; " + totPos + " tokens]");
267:
268:                        out.print("    test: stored fields.......");
269:                        int docCount = 0;
270:                        long totFields = 0;
271:                        for (int j = 0; j < info.docCount; j++)
272:                            if (!reader.isDeleted(j)) {
273:                                docCount++;
274:                                Document doc = reader.document(j);
275:                                totFields += doc.getFields().size();
276:                            }
277:
278:                        if (docCount != reader.numDocs())
279:                            throw new RuntimeException("docCount=" + docCount
280:                                    + " but saw " + docCount
281:                                    + " undeleted docs");
282:
283:                        out.println("OK [" + totFields
284:                                + " total field count; avg "
285:                                + nf.format((((float) totFields) / docCount))
286:                                + " fields per doc]");
287:
288:                        out.print("    test: term vectors........");
289:                        int totVectors = 0;
290:                        for (int j = 0; j < info.docCount; j++)
291:                            if (!reader.isDeleted(j)) {
292:                                TermFreqVector[] tfv = reader
293:                                        .getTermFreqVectors(j);
294:                                if (tfv != null)
295:                                    totVectors += tfv.length;
296:                            }
297:
298:                        out.println("OK [" + totVectors
299:                                + " total vector count; avg "
300:                                + nf.format((((float) totVectors) / docCount))
301:                                + " term/freq vector fields per doc]");
302:                        out.println("");
303:
304:                    } catch (Throwable t) {
305:                        out.println("FAILED");
306:                        String comment;
307:                        if (doFix)
308:                            comment = "will remove reference to this segment (-fix is specified)";
309:                        else
310:                            comment = "would remove reference to this segment (-fix was not specified)";
311:                        out.println("    WARNING: " + comment
312:                                + "; full exception:");
313:                        t.printStackTrace(out);
314:                        out.println("");
315:                        totLoseDocCount += toLoseDocCount;
316:                        numBadSegments++;
317:                        changed = true;
318:                        continue;
319:                    } finally {
320:                        if (reader != null)
321:                            reader.close();
322:                    }
323:
324:                    // Keeper
325:                    newSIS.add(info.clone());
326:                }
327:
328:                if (!changed) {
329:                    out.println("No problems were detected with this index.\n");
330:                    return true;
331:                } else {
332:                    out.println("WARNING: " + numBadSegments
333:                            + " broken segments detected");
334:                    if (doFix)
335:                        out.println("WARNING: " + totLoseDocCount
336:                                + " documents will be lost");
337:                    else
338:                        out
339:                                .println("WARNING: "
340:                                        + totLoseDocCount
341:                                        + " documents would be lost if -fix were specified");
342:                    out.println();
343:                }
344:
345:                if (doFix) {
346:                    out
347:                            .println("NOTE: will write new segments file in 5 seconds; this will remove "
348:                                    + totLoseDocCount
349:                                    + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
350:                    for (int i = 0; i < 5; i++) {
351:                        try {
352:                            Thread.sleep(1000);
353:                        } catch (InterruptedException ie) {
354:                            Thread.currentThread().interrupt();
355:                            i--;
356:                            continue;
357:                        }
358:
359:                        out.println("  " + (5 - i) + "...");
360:                    }
361:                    out.print("Writing...");
362:                    try {
363:                        newSIS.write(dir);
364:                    } catch (Throwable t) {
365:                        out.println("FAILED; exiting");
366:                        t.printStackTrace(out);
367:                        return false;
368:                    }
369:                    out.println("OK");
370:                    out.println("Wrote new segments file \""
371:                            + newSIS.getCurrentSegmentFileName() + "\"");
372:                } else {
373:                    out
374:                            .println("NOTE: would write new segments file [-fix was not specified]");
375:                }
376:                out.println("");
377:
378:                return false;
379:            }
380:
381:            public static void main(String[] args) throws Throwable {
382:
383:                boolean doFix = false;
384:                for (int i = 0; i < args.length; i++)
385:                    if (args[i].equals("-fix")) {
386:                        doFix = true;
387:                        break;
388:                    }
389:
390:                if (args.length != (doFix ? 2 : 1)) {
391:                    out
392:                            .println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix]\n"
393:                                    + "\n"
394:                                    + "  -fix: actually write a new segments_N file, removing any problematic segments\n"
395:                                    + "\n"
396:                                    + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n"
397:                                    + "documents (perhaps many) to be permanently removed from the index.  Always make\n"
398:                                    + "a backup copy of your index before running this!  Do not run this tool on an index\n"
399:                                    + "that is actively being written to.  You have been warned!\n"
400:                                    + "\n"
401:                                    + "Run without -fix, this tool will open the index, report version information\n"
402:                                    + "and report any exceptions it hits and what action it would take if -fix were\n"
403:                                    + "specified.  With -fix, this tool will remove any segments that have issues and\n"
404:                                    + "write a new segments_N file.  This means all documents contained in the affected\n"
405:                                    + "segments will be removed.\n"
406:                                    + "\n"
407:                                    + "This tool exits with exit code 1 if the index cannot be opened or has has any\n"
408:                                    + "corruption, else 0.\n");
409:                    System.exit(1);
410:                }
411:
412:                final String dirName = args[0];
413:                out.println("\nOpening index @ " + dirName + "\n");
414:                Directory dir = null;
415:                try {
416:                    dir = FSDirectory.getDirectory(dirName);
417:                } catch (Throwable t) {
418:                    out.println("ERROR: could not open directory \"" + dirName
419:                            + "\"; exiting");
420:                    t.printStackTrace(out);
421:                    System.exit(1);
422:                }
423:
424:                boolean isClean = check(dir, doFix);
425:
426:                final int exitCode;
427:                if (isClean)
428:                    exitCode = 0;
429:                else
430:                    exitCode = 1;
431:                System.exit(exitCode);
432:            }
433:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.