Source Code Cross Referenced for IndexReader.java in » Net » lucene-connector » org » apache » lucene » index » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Net » lucene connector » org.apache.lucene.index
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        package org.apache.lucene.index;
0002:
0003:        /**
0004:         * Licensed to the Apache Software Foundation (ASF) under one or more
0005:         * contributor license agreements.  See the NOTICE file distributed with
0006:         * this work for additional information regarding copyright ownership.
0007:         * The ASF licenses this file to You under the Apache License, Version 2.0
0008:         * (the "License"); you may not use this file except in compliance with
0009:         * the License.  You may obtain a copy of the License at
0010:         *
0011:         *     http://www.apache.org/licenses/LICENSE-2.0
0012:         *
0013:         * Unless required by applicable law or agreed to in writing, software
0014:         * distributed under the License is distributed on an "AS IS" BASIS,
0015:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016:         * See the License for the specific language governing permissions and
0017:         * limitations under the License.
0018:         */
0019:
0020:        import org.apache.lucene.document.Document;
0021:        import org.apache.lucene.document.FieldSelector;
0022:        import org.apache.lucene.search.Similarity;
0023:        import org.apache.lucene.store.*;
0024:
0025:        import java.io.File;
0026:        import java.io.FileOutputStream;
0027:        import java.io.IOException;
0028:        import java.util.Arrays;
0029:        import java.util.Collection;
0030:
0031:        /** IndexReader is an abstract class, providing an interface for accessing an
0032:         index.  Search of an index is done entirely through this abstract interface,
0033:         so that any subclass which implements it is searchable.
0034:
0035:         <p> Concrete subclasses of IndexReader are usually constructed with a call to
0036:         one of the static <code>open()</code> methods, e.g. {@link #open(String)}.
0037:
0038:         <p> For efficiency, in this API documents are often referred to via
0039:         <i>document numbers</i>, non-negative integers which each name a unique
0040:         document in the index.  These document numbers are ephemeral--they may change
0041:         as documents are added to and deleted from an index.  Clients should thus not
0042:         rely on a given document having the same number between sessions.
0043:
0044:         <p> An IndexReader can be opened on a directory for which an IndexWriter is
0045:         opened already, but it cannot be used to delete documents from the index then.
0046:
0047:         <p>
0048:         NOTE: for backwards API compatibility, several methods are not listed 
0049:         as abstract, but have no useful implementations in this base class and 
0050:         instead always throw UnsupportedOperationException.  Subclasses are 
0051:         strongly encouraged to override these methods, but in many cases may not 
0052:         need to.
0053:         </p>
0054:
0055:         @version $Id: IndexReader.java 598462 2007-11-26 23:31:39Z dnaber $
0056:         */
0057:        public abstract class IndexReader {
0058:
0059:            /**
0060:             * Constants describing field properties, for example used for
0061:             * {@link IndexReader#getFieldNames(FieldOption)}.
0062:             */
0063:            public static final class FieldOption {
0064:                private String option;
0065:
0066:                private FieldOption() {
0067:                }
0068:
0069:                private FieldOption(String option) {
0070:                    this .option = option;
0071:                }
0072:
0073:                public String toString() {
0074:                    return this .option;
0075:                }
0076:
0077:                /** All fields */
0078:                public static final FieldOption ALL = new FieldOption("ALL");
0079:                /** All indexed fields */
0080:                public static final FieldOption INDEXED = new FieldOption(
0081:                        "INDEXED");
0082:                /** All fields that store payloads */
0083:                public static final FieldOption STORES_PAYLOADS = new FieldOption(
0084:                        "STORES_PAYLOADS");
0085:                /** All fields which are not indexed */
0086:                public static final FieldOption UNINDEXED = new FieldOption(
0087:                        "UNINDEXED");
0088:                /** All fields which are indexed with termvectors enabled */
0089:                public static final FieldOption INDEXED_WITH_TERMVECTOR = new FieldOption(
0090:                        "INDEXED_WITH_TERMVECTOR");
0091:                /** All fields which are indexed but don't have termvectors enabled */
0092:                public static final FieldOption INDEXED_NO_TERMVECTOR = new FieldOption(
0093:                        "INDEXED_NO_TERMVECTOR");
0094:                /** All fields with termvectors enabled. Please note that only standard termvector fields are returned */
0095:                public static final FieldOption TERMVECTOR = new FieldOption(
0096:                        "TERMVECTOR");
0097:                /** All fields with termvectors with position values enabled */
0098:                public static final FieldOption TERMVECTOR_WITH_POSITION = new FieldOption(
0099:                        "TERMVECTOR_WITH_POSITION");
0100:                /** All fields with termvectors with offset values enabled */
0101:                public static final FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption(
0102:                        "TERMVECTOR_WITH_OFFSET");
0103:                /** All fields with termvectors with offset values and position values enabled */
0104:                public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption(
0105:                        "TERMVECTOR_WITH_POSITION_OFFSET");
0106:            }
0107:
0108:            private boolean closed;
0109:            protected boolean hasChanges;
0110:
0111:            private volatile int refCount;
0112:
0113:            // for testing
0114:            synchronized int getRefCount() {
0115:                return refCount;
0116:            }
0117:
0118:            /**
0119:             * Increments the refCount of this IndexReader instance. RefCounts are used to determine
0120:             * when a reader can be closed safely, i. e. as soon as no other IndexReader is referencing
0121:             * it anymore.
0122:             */
0123:            protected synchronized void incRef() {
0124:                assert refCount > 0;
0125:                refCount++;
0126:            }
0127:
0128:            /**
0129:             * Decreases the refCount of this IndexReader instance. If the refCount drops
0130:             * to 0, then pending changes are committed to the index and this reader is closed.
0131:             * 
0132:             * @throws IOException in case an IOException occurs in commit() or doClose()
0133:             */
0134:            protected synchronized void decRef() throws IOException {
0135:                assert refCount > 0;
0136:                if (refCount == 1) {
0137:                    commit();
0138:                    doClose();
0139:                }
0140:                refCount--;
0141:            }
0142:
0143:            /** 
0144:             * @deprecated will be deleted when IndexReader(Directory) is deleted
0145:             * @see #directory()
0146:             */
0147:            private Directory directory;
0148:
0149:            /**
0150:             * Legacy Constructor for backwards compatibility.
0151:             *
0152:             * <p>
0153:             * This Constructor should not be used, it exists for backwards 
0154:             * compatibility only to support legacy subclasses that did not "own" 
0155:             * a specific directory, but needed to specify something to be returned 
0156:             * by the directory() method.  Future subclasses should delegate to the 
0157:             * no arg constructor and implement the directory() method as appropriate.
0158:             * 
0159:             * @param directory Directory to be returned by the directory() method
0160:             * @see #directory()
0161:             * @deprecated - use IndexReader()
0162:             */
0163:            protected IndexReader(Directory directory) {
0164:                this ();
0165:                this .directory = directory;
0166:            }
0167:
0168:            protected IndexReader() {
0169:                refCount = 1;
0170:            }
0171:
0172:            /**
0173:             * @throws AlreadyClosedException if this IndexReader is closed
0174:             */
0175:            protected final void ensureOpen() throws AlreadyClosedException {
0176:                if (refCount <= 0) {
0177:                    throw new AlreadyClosedException(
0178:                            "this IndexReader is closed");
0179:                }
0180:            }
0181:
0182:            /** Returns an IndexReader reading the index in an FSDirectory in the named
0183:             path.
0184:             * @throws CorruptIndexException if the index is corrupt
0185:             * @throws IOException if there is a low-level IO error
0186:             * @param path the path to the index directory */
0187:            public static IndexReader open(String path)
0188:                    throws CorruptIndexException, IOException {
0189:                return open(FSDirectory.getDirectory(path), true, null);
0190:            }
0191:
0192:            /** Returns an IndexReader reading the index in an FSDirectory in the named
0193:             * path.
0194:             * @param path the path to the index directory
0195:             * @throws CorruptIndexException if the index is corrupt
0196:             * @throws IOException if there is a low-level IO error
0197:             */
0198:            public static IndexReader open(File path)
0199:                    throws CorruptIndexException, IOException {
0200:                return open(FSDirectory.getDirectory(path), true, null);
0201:            }
0202:
0203:            /** Returns an IndexReader reading the index in the given Directory.
0204:             * @param directory the index directory
0205:             * @throws CorruptIndexException if the index is corrupt
0206:             * @throws IOException if there is a low-level IO error
0207:             */
0208:            public static IndexReader open(final Directory directory)
0209:                    throws CorruptIndexException, IOException {
0210:                return open(directory, false, null);
0211:            }
0212:
0213:            /** Expert: returns an IndexReader reading the index in the given
0214:             * Directory, with a custom {@link IndexDeletionPolicy}.
0215:             * @param directory the index directory
0216:             * @param deletionPolicy a custom deletion policy (only used
0217:             *  if you use this reader to perform deletes or to set
0218:             *  norms); see {@link IndexWriter} for details.
0219:             * @throws CorruptIndexException if the index is corrupt
0220:             * @throws IOException if there is a low-level IO error
0221:             */
0222:            public static IndexReader open(final Directory directory,
0223:                    IndexDeletionPolicy deletionPolicy)
0224:                    throws CorruptIndexException, IOException {
0225:                return open(directory, false, deletionPolicy);
0226:            }
0227:
0228:            private static IndexReader open(final Directory directory,
0229:                    final boolean closeDirectory,
0230:                    final IndexDeletionPolicy deletionPolicy)
0231:                    throws CorruptIndexException, IOException {
0232:                return DirectoryIndexReader.open(directory, closeDirectory,
0233:                        deletionPolicy);
0234:            }
0235:
0236:            /**
0237:             * Refreshes an IndexReader if the index has changed since this instance 
0238:             * was (re)opened. 
0239:             * <p>
0240:             * Opening an IndexReader is an expensive operation. This method can be used
0241:             * to refresh an existing IndexReader to reduce these costs. This method 
0242:             * tries to only load segments that have changed or were created after the 
0243:             * IndexReader was (re)opened.
0244:             * <p>
0245:             * If the index has not changed since this instance was (re)opened, then this
0246:             * call is a NOOP and returns this instance. Otherwise, a new instance is 
0247:             * returned. The old instance is <b>not</b> closed and remains usable.<br>
0248:             * <b>Note:</b> The re-opened reader instance and the old instance might share
0249:             * the same resources. For this reason no index modification operations 
0250:             * (e. g. {@link #deleteDocument(int)}, {@link #setNorm(int, String, byte)}) 
0251:             * should be performed using one of the readers until the old reader instance
0252:             * is closed. <b>Otherwise, the behavior of the readers is undefined.</b> 
0253:             * <p>   
0254:             * You can determine whether a reader was actually reopened by comparing the
0255:             * old instance with the instance returned by this method: 
0256:             * <pre>
0257:             * IndexReader reader = ... 
0258:             * ...
0259:             * IndexReader new = r.reopen();
0260:             * if (new != reader) {
0261:             *   ...     // reader was reopened
0262:             *   reader.close(); 
0263:             * }
0264:             * reader = new;
0265:             * ...
0266:             * </pre>
0267:             * 
0268:             * @throws CorruptIndexException if the index is corrupt
0269:             * @throws IOException if there is a low-level IO error
0270:             */
0271:            public synchronized IndexReader reopen()
0272:                    throws CorruptIndexException, IOException {
0273:                throw new UnsupportedOperationException(
0274:                        "This reader does not support reopen().");
0275:            }
0276:
0277:            /** 
0278:             * Returns the directory associated with this index.  The Default 
0279:             * implementation returns the directory specified by subclasses when 
0280:             * delegating to the IndexReader(Directory) constructor, or throws an 
0281:             * UnsupportedOperationException if one was not specified.
0282:             * @throws UnsupportedOperationException if no directory
0283:             */
0284:            public Directory directory() {
0285:                ensureOpen();
0286:                if (null != directory) {
0287:                    return directory;
0288:                } else {
0289:                    throw new UnsupportedOperationException(
0290:                            "This reader does not support this method.");
0291:                }
0292:            }
0293:
0294:            /**
0295:             * Returns the time the index in the named directory was last modified.
0296:             * Do not use this to check whether the reader is still up-to-date, use
0297:             * {@link #isCurrent()} instead. 
0298:             * @throws CorruptIndexException if the index is corrupt
0299:             * @throws IOException if there is a low-level IO error
0300:             */
0301:            public static long lastModified(String directory)
0302:                    throws CorruptIndexException, IOException {
0303:                return lastModified(new File(directory));
0304:            }
0305:
0306:            /**
0307:             * Returns the time the index in the named directory was last modified. 
0308:             * Do not use this to check whether the reader is still up-to-date, use
0309:             * {@link #isCurrent()} instead. 
0310:             * @throws CorruptIndexException if the index is corrupt
0311:             * @throws IOException if there is a low-level IO error
0312:             */
0313:            public static long lastModified(File fileDirectory)
0314:                    throws CorruptIndexException, IOException {
0315:                return ((Long) new SegmentInfos.FindSegmentsFile(fileDirectory) {
0316:                    public Object doBody(String segmentFileName) {
0317:                        return new Long(FSDirectory.fileModified(fileDirectory,
0318:                                segmentFileName));
0319:                    }
0320:                }.run()).longValue();
0321:            }
0322:
0323:            /**
0324:             * Returns the time the index in the named directory was last modified. 
0325:             * Do not use this to check whether the reader is still up-to-date, use
0326:             * {@link #isCurrent()} instead. 
0327:             * @throws CorruptIndexException if the index is corrupt
0328:             * @throws IOException if there is a low-level IO error
0329:             */
0330:            public static long lastModified(final Directory directory2)
0331:                    throws CorruptIndexException, IOException {
0332:                return ((Long) new SegmentInfos.FindSegmentsFile(directory2) {
0333:                    public Object doBody(String segmentFileName)
0334:                            throws IOException {
0335:                        return new Long(directory2
0336:                                .fileModified(segmentFileName));
0337:                    }
0338:                }.run()).longValue();
0339:            }
0340:
0341:            /**
0342:             * Reads version number from segments files. The version number is
0343:             * initialized with a timestamp and then increased by one for each change of
0344:             * the index.
0345:             * 
0346:             * @param directory where the index resides.
0347:             * @return version number.
0348:             * @throws CorruptIndexException if the index is corrupt
0349:             * @throws IOException if there is a low-level IO error
0350:             */
0351:            public static long getCurrentVersion(String directory)
0352:                    throws CorruptIndexException, IOException {
0353:                return getCurrentVersion(new File(directory));
0354:            }
0355:
0356:            /**
0357:             * Reads version number from segments files. The version number is
0358:             * initialized with a timestamp and then increased by one for each change of
0359:             * the index.
0360:             * 
0361:             * @param directory where the index resides.
0362:             * @return version number.
0363:             * @throws CorruptIndexException if the index is corrupt
0364:             * @throws IOException if there is a low-level IO error
0365:             */
0366:            public static long getCurrentVersion(File directory)
0367:                    throws CorruptIndexException, IOException {
0368:                Directory dir = FSDirectory.getDirectory(directory);
0369:                long version = getCurrentVersion(dir);
0370:                dir.close();
0371:                return version;
0372:            }
0373:
0374:            /**
0375:             * Reads version number from segments files. The version number is
0376:             * initialized with a timestamp and then increased by one for each change of
0377:             * the index.
0378:             * 
0379:             * @param directory where the index resides.
0380:             * @return version number.
0381:             * @throws CorruptIndexException if the index is corrupt
0382:             * @throws IOException if there is a low-level IO error
0383:             */
0384:            public static long getCurrentVersion(Directory directory)
0385:                    throws CorruptIndexException, IOException {
0386:                return SegmentInfos.readCurrentVersion(directory);
0387:            }
0388:
0389:            /**
0390:             * Version number when this IndexReader was opened. Not implemented in the IndexReader base class.
0391:             * @throws UnsupportedOperationException unless overridden in subclass
0392:             */
0393:            public long getVersion() {
0394:                throw new UnsupportedOperationException(
0395:                        "This reader does not support this method.");
0396:            }
0397:
0398:            /**<p>For IndexReader implementations that use
0399:             * TermInfosReader to read terms, this sets the
0400:             * indexDivisor to subsample the number of indexed terms
0401:             * loaded into memory.  This has the same effect as {@link
0402:             * IndexWriter#setTermIndexInterval} except that setting
0403:             * must be done at indexing time while this setting can be
0404:             * set per reader.  When set to N, then one in every
0405:             * N*termIndexInterval terms in the index is loaded into
0406:             * memory.  By setting this to a value > 1 you can reduce
0407:             * memory usage, at the expense of higher latency when
0408:             * loading a TermInfo.  The default value is 1.</p>
0409:             *
0410:             * <b>NOTE:</b> you must call this before the term
0411:             * index is loaded.  If the index is already loaded, 
0412:             * an IllegalStateException is thrown.
0413:             * @throws IllegalStateException if the term index has already been loaded into memory
0414:             */
0415:            public void setTermInfosIndexDivisor(int indexDivisor)
0416:                    throws IllegalStateException {
0417:                throw new UnsupportedOperationException(
0418:                        "This reader does not support this method.");
0419:            }
0420:
0421:            /** <p>For IndexReader implementations that use
0422:             *  TermInfosReader to read terms, this returns the
0423:             *  current indexDivisor.
0424:             *  @see #setTermInfosIndexDivisor */
0425:            public int getTermInfosIndexDivisor() {
0426:                throw new UnsupportedOperationException(
0427:                        "This reader does not support this method.");
0428:            }
0429:
0430:            /**
0431:             * Check whether this IndexReader is still using the
0432:             * current (i.e., most recently committed) version of the
0433:             * index.  If a writer has committed any changes to the
0434:             * index since this reader was opened, this will return
0435:             * <code>false</code>, in which case you must open a new
0436:             * IndexReader in order to see the changes.  See the
0437:             * description of the <a href="IndexWriter.html#autoCommit"><code>autoCommit</code></a>
0438:             * flag which controls when the {@link IndexWriter}
0439:             * actually commits changes to the index.
0440:             * 
0441:             * <p>
0442:             * Not implemented in the IndexReader base class.
0443:             * </p>
0444:             * @throws CorruptIndexException if the index is corrupt
0445:             * @throws IOException if there is a low-level IO error
0446:             * @throws UnsupportedOperationException unless overridden in subclass
0447:             */
0448:            public boolean isCurrent() throws CorruptIndexException,
0449:                    IOException {
0450:                throw new UnsupportedOperationException(
0451:                        "This reader does not support this method.");
0452:            }
0453:
0454:            /**
0455:             * Checks is the index is optimized (if it has a single segment and 
0456:             * no deletions).  Not implemented in the IndexReader base class.
0457:             * @return <code>true</code> if the index is optimized; <code>false</code> otherwise
0458:             * @throws UnsupportedOperationException unless overridden in subclass
0459:             */
0460:            public boolean isOptimized() {
0461:                throw new UnsupportedOperationException(
0462:                        "This reader does not support this method.");
0463:            }
0464:
0465:            /**
0466:             *  Return an array of term frequency vectors for the specified document.
0467:             *  The array contains a vector for each vectorized field in the document.
0468:             *  Each vector contains terms and frequencies for all terms in a given vectorized field.
0469:             *  If no such fields existed, the method returns null. The term vectors that are
0470:             * returned my either be of type TermFreqVector or of type TermPositionsVector if
0471:             * positions or offsets have been stored.
0472:             * 
0473:             * @param docNumber document for which term frequency vectors are returned
0474:             * @return array of term frequency vectors. May be null if no term vectors have been
0475:             *  stored for the specified document.
0476:             * @throws IOException if index cannot be accessed
0477:             * @see org.apache.lucene.document.Field.TermVector
0478:             */
0479:            abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
0480:                    throws IOException;
0481:
0482:            /**
0483:             *  Return a term frequency vector for the specified document and field. The
0484:             *  returned vector contains terms and frequencies for the terms in
0485:             *  the specified field of this document, if the field had the storeTermVector
0486:             *  flag set. If termvectors had been stored with positions or offsets, a 
0487:             *  TermPositionsVector is returned.
0488:             * 
0489:             * @param docNumber document for which the term frequency vector is returned
0490:             * @param field field for which the term frequency vector is returned.
0491:             * @return term frequency vector May be null if field does not exist in the specified
0492:             * document or term vector was not stored.
0493:             * @throws IOException if index cannot be accessed
0494:             * @see org.apache.lucene.document.Field.TermVector
0495:             */
0496:            abstract public TermFreqVector getTermFreqVector(int docNumber,
0497:                    String field) throws IOException;
0498:
0499:            /**
0500:             * Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of
0501:             * the {@link TermFreqVector}.
0502:             * @param docNumber The number of the document to load the vector for
0503:             * @param field The name of the field to load
0504:             * @param mapper The {@link TermVectorMapper} to process the vector.  Must not be null
0505:             * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
0506:             * 
0507:             */
0508:            abstract public void getTermFreqVector(int docNumber, String field,
0509:                    TermVectorMapper mapper) throws IOException;
0510:
0511:            /**
0512:             * Map all the term vectors for all fields in a Document
0513:             * @param docNumber The number of the document to load the vector for
0514:             * @param mapper The {@link TermVectorMapper} to process the vector.  Must not be null
0515:             * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
0516:             */
0517:            abstract public void getTermFreqVector(int docNumber,
0518:                    TermVectorMapper mapper) throws IOException;
0519:
0520:            /**
0521:             * Returns <code>true</code> if an index exists at the specified directory.
0522:             * If the directory does not exist or if there is no index in it.
0523:             * <code>false</code> is returned.
0524:             * @param  directory the directory to check for an index
0525:             * @return <code>true</code> if an index exists; <code>false</code> otherwise
0526:             */
0527:            public static boolean indexExists(String directory) {
0528:                return indexExists(new File(directory));
0529:            }
0530:
0531:            /**
0532:             * Returns <code>true</code> if an index exists at the specified directory.
0533:             * If the directory does not exist or if there is no index in it.
0534:             * @param  directory the directory to check for an index
0535:             * @return <code>true</code> if an index exists; <code>false</code> otherwise
0536:             */
0537:
0538:            public static boolean indexExists(File directory) {
0539:                return SegmentInfos.getCurrentSegmentGeneration(directory
0540:                        .list()) != -1;
0541:            }
0542:
0543:            /**
0544:             * Returns <code>true</code> if an index exists at the specified directory.
0545:             * If the directory does not exist or if there is no index in it.
0546:             * @param  directory the directory to check for an index
0547:             * @return <code>true</code> if an index exists; <code>false</code> otherwise
0548:             * @throws IOException if there is a problem with accessing the index
0549:             */
0550:            public static boolean indexExists(Directory directory)
0551:                    throws IOException {
0552:                return SegmentInfos.getCurrentSegmentGeneration(directory) != -1;
0553:            }
0554:
0555:            /** Returns the number of documents in this index. */
0556:            public abstract int numDocs();
0557:
0558:            /** Returns one greater than the largest possible document number.
0559:             * This may be used to, e.g., determine how big to allocate an array which
0560:             * will have an element for every document number in an index.
0561:             */
0562:            public abstract int maxDoc();
0563:
0564:            /** Returns the stored fields of the <code>n</code><sup>th</sup>
0565:             <code>Document</code> in this index.
0566:             * @throws CorruptIndexException if the index is corrupt
0567:             * @throws IOException if there is a low-level IO error
0568:             */
0569:            public Document document(int n) throws CorruptIndexException,
0570:                    IOException {
0571:                ensureOpen();
0572:                return document(n, null);
0573:            }
0574:
0575:            /**
0576:             * Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position. The {@link org.apache.lucene.document.FieldSelector}
0577:             * may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded.
0578:             * 
0579:             * <b>NOTE:</b> If this Reader (more specifically, the underlying <code>FieldsReader</code>) is closed before the lazy {@link org.apache.lucene.document.Field} is
0580:             * loaded an exception may be thrown.  If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must
0581:             * explicitly load it or fetch the Document again with a new loader.
0582:             * 
0583:             *  
0584:             * @param n Get the document at the <code>n</code><sup>th</sup> position
0585:             * @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document.  May be null, in which case all Fields will be loaded.
0586:             * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
0587:             * @throws CorruptIndexException if the index is corrupt
0588:             * @throws IOException if there is a low-level IO error
0589:             * 
0590:             * @see org.apache.lucene.document.Fieldable
0591:             * @see org.apache.lucene.document.FieldSelector
0592:             * @see org.apache.lucene.document.SetBasedFieldSelector
0593:             * @see org.apache.lucene.document.LoadFirstFieldSelector
0594:             */
0595:            //When we convert to JDK 1.5 make this Set<String>
0596:            public abstract Document document(int n, FieldSelector fieldSelector)
0597:                    throws CorruptIndexException, IOException;
0598:
0599:            /** Returns true if document <i>n</i> has been deleted */
0600:            public abstract boolean isDeleted(int n);
0601:
0602:            /** Returns true if any documents have been deleted */
0603:            public abstract boolean hasDeletions();
0604:
0605:            /** Returns true if there are norms stored for this field. */
0606:            public boolean hasNorms(String field) throws IOException {
0607:                // backward compatible implementation.
0608:                // SegmentReader has an efficient implementation.
0609:                ensureOpen();
0610:                return norms(field) != null;
0611:            }
0612:
0613:            /** Returns the byte-encoded normalization factor for the named field of
0614:             * every document.  This is used by the search code to score documents.
0615:             *
0616:             * @see org.apache.lucene.document.Field#setBoost(float)
0617:             */
0618:            public abstract byte[] norms(String field) throws IOException;
0619:
0620:            /** Reads the byte-encoded normalization factor for the named field of every
0621:             *  document.  This is used by the search code to score documents.
0622:             *
0623:             * @see org.apache.lucene.document.Field#setBoost(float)
0624:             */
0625:            public abstract void norms(String field, byte[] bytes, int offset)
0626:                    throws IOException;
0627:
0628:            /** Expert: Resets the normalization factor for the named field of the named
0629:             * document.  The norm represents the product of the field's {@link
0630:             * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
0631:             * int) length normalization}.  Thus, to preserve the length normalization
0632:             * values when resetting this, one should base the new value upon the old.
0633:             *
0634:             * @see #norms(String)
0635:             * @see Similarity#decodeNorm(byte)
0636:             * @throws StaleReaderException if the index has changed
0637:             *  since this reader was opened
0638:             * @throws CorruptIndexException if the index is corrupt
0639:             * @throws LockObtainFailedException if another writer
0640:             *  has this index open (<code>write.lock</code> could not
0641:             *  be obtained)
0642:             * @throws IOException if there is a low-level IO error
0643:             */
0644:            public final synchronized void setNorm(int doc, String field,
0645:                    byte value) throws StaleReaderException,
0646:                    CorruptIndexException, LockObtainFailedException,
0647:                    IOException {
0648:                ensureOpen();
0649:                acquireWriteLock();
0650:                hasChanges = true;
0651:                doSetNorm(doc, field, value);
0652:            }
0653:
0654:            /** Implements setNorm in subclass.*/
0655:            protected abstract void doSetNorm(int doc, String field, byte value)
0656:                    throws CorruptIndexException, IOException;
0657:
0658:            /** Expert: Resets the normalization factor for the named field of the named
0659:             * document.
0660:             *
0661:             * @see #norms(String)
0662:             * @see Similarity#decodeNorm(byte)
0663:             * 
0664:             * @throws StaleReaderException if the index has changed
0665:             *  since this reader was opened
0666:             * @throws CorruptIndexException if the index is corrupt
0667:             * @throws LockObtainFailedException if another writer
0668:             *  has this index open (<code>write.lock</code> could not
0669:             *  be obtained)
0670:             * @throws IOException if there is a low-level IO error
0671:             */
0672:            public void setNorm(int doc, String field, float value)
0673:                    throws StaleReaderException, CorruptIndexException,
0674:                    LockObtainFailedException, IOException {
0675:                ensureOpen();
0676:                setNorm(doc, field, Similarity.encodeNorm(value));
0677:            }
0678:
0679:            /** Returns an enumeration of all the terms in the index. The
0680:             * enumeration is ordered by Term.compareTo(). Each term is greater
0681:             * than all that precede it in the enumeration. Note that after
0682:             * calling terms(), {@link TermEnum#next()} must be called
0683:             * on the resulting enumeration before calling other methods such as
0684:             * {@link TermEnum#term()}.
0685:             * @throws IOException if there is a low-level IO error
0686:             */
0687:            public abstract TermEnum terms() throws IOException;
0688:
0689:            /** Returns an enumeration of all terms starting at a given term. If
0690:             * the given term does not exist, the enumeration is positioned at the
0691:             * first term greater than the supplied therm. The enumeration is
0692:             * ordered by Term.compareTo(). Each term is greater than all that
0693:             * precede it in the enumeration.
0694:             * @throws IOException if there is a low-level IO error
0695:             */
0696:            public abstract TermEnum terms(Term t) throws IOException;
0697:
0698:            /** Returns the number of documents containing the term <code>t</code>.
0699:             * @throws IOException if there is a low-level IO error
0700:             */
0701:            public abstract int docFreq(Term t) throws IOException;
0702:
0703:            /** Returns an enumeration of all the documents which contain
0704:             * <code>term</code>. For each document, the document number, the frequency of
0705:             * the term in that document is also provided, for use in search scoring.
0706:             * Thus, this method implements the mapping:
0707:             * <p><ul>
0708:             * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
0709:             * </ul>
0710:             * <p>The enumeration is ordered by document number.  Each document number
0711:             * is greater than all that precede it in the enumeration.
0712:             * @throws IOException if there is a low-level IO error
0713:             */
0714:            public TermDocs termDocs(Term term) throws IOException {
0715:                ensureOpen();
0716:                TermDocs termDocs = termDocs();
0717:                termDocs.seek(term);
0718:                return termDocs;
0719:            }
0720:
0721:            /** Returns an unpositioned {@link TermDocs} enumerator.
0722:             * @throws IOException if there is a low-level IO error
0723:             */
0724:            public abstract TermDocs termDocs() throws IOException;
0725:
0726:            /** Returns an enumeration of all the documents which contain
0727:             * <code>term</code>.  For each document, in addition to the document number
0728:             * and frequency of the term in that document, a list of all of the ordinal
0729:             * positions of the term in the document is available.  Thus, this method
0730:             * implements the mapping:
0731:             *
0732:             * <p><ul>
0733:             * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
0734:             * &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
0735:             * pos<sub>freq-1</sub>&gt;
0736:             * &gt;<sup>*</sup>
0737:             * </ul>
0738:             * <p> This positional information facilitates phrase and proximity searching.
0739:             * <p>The enumeration is ordered by document number.  Each document number is
0740:             * greater than all that precede it in the enumeration.
0741:             * @throws IOException if there is a low-level IO error
0742:             */
0743:            public TermPositions termPositions(Term term) throws IOException {
0744:                ensureOpen();
0745:                TermPositions termPositions = termPositions();
0746:                termPositions.seek(term);
0747:                return termPositions;
0748:            }
0749:
0750:            /** Returns an unpositioned {@link TermPositions} enumerator.
0751:             * @throws IOException if there is a low-level IO error
0752:             */
0753:            public abstract TermPositions termPositions() throws IOException;
0754:
0755:            /** Deletes the document numbered <code>docNum</code>.  Once a document is
0756:             * deleted it will not appear in TermDocs or TermPostitions enumerations.
0757:             * Attempts to read its field with the {@link #document}
0758:             * method will result in an error.  The presence of this document may still be
0759:             * reflected in the {@link #docFreq} statistic, though
0760:             * this will be corrected eventually as the index is further modified.
0761:             *
0762:             * @throws StaleReaderException if the index has changed
0763:             * since this reader was opened
0764:             * @throws CorruptIndexException if the index is corrupt
0765:             * @throws LockObtainFailedException if another writer
0766:             *  has this index open (<code>write.lock</code> could not
0767:             *  be obtained)
0768:             * @throws IOException if there is a low-level IO error
0769:             */
0770:            public final synchronized void deleteDocument(int docNum)
0771:                    throws StaleReaderException, CorruptIndexException,
0772:                    LockObtainFailedException, IOException {
0773:                ensureOpen();
0774:                acquireWriteLock();
0775:                hasChanges = true;
0776:                doDelete(docNum);
0777:            }
0778:
0779:            /** Implements deletion of the document numbered <code>docNum</code>.
0780:             * Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}.
0781:             */
0782:            protected abstract void doDelete(int docNum)
0783:                    throws CorruptIndexException, IOException;
0784:
0785:            /** Deletes all documents that have a given <code>term</code> indexed.
0786:             * This is useful if one uses a document field to hold a unique ID string for
0787:             * the document.  Then to delete such a document, one merely constructs a
0788:             * term with the appropriate field and the unique ID string as its text and
0789:             * passes it to this method.
0790:             * See {@link #deleteDocument(int)} for information about when this deletion will 
0791:             * become effective.
0792:             *
0793:             * @return the number of documents deleted
0794:             * @throws StaleReaderException if the index has changed
0795:             *  since this reader was opened
0796:             * @throws CorruptIndexException if the index is corrupt
0797:             * @throws LockObtainFailedException if another writer
0798:             *  has this index open (<code>write.lock</code> could not
0799:             *  be obtained)
0800:             * @throws IOException if there is a low-level IO error
0801:             */
0802:            public final int deleteDocuments(Term term)
0803:                    throws StaleReaderException, CorruptIndexException,
0804:                    LockObtainFailedException, IOException {
0805:                ensureOpen();
0806:                TermDocs docs = termDocs(term);
0807:                if (docs == null)
0808:                    return 0;
0809:                int n = 0;
0810:                try {
0811:                    while (docs.next()) {
0812:                        deleteDocument(docs.doc());
0813:                        n++;
0814:                    }
0815:                } finally {
0816:                    docs.close();
0817:                }
0818:                return n;
0819:            }
0820:
0821:            /** Undeletes all documents currently marked as deleted in this index.
0822:             *
0823:             * @throws StaleReaderException if the index has changed
0824:             *  since this reader was opened
0825:             * @throws LockObtainFailedException if another writer
0826:             *  has this index open (<code>write.lock</code> could not
0827:             *  be obtained)
0828:             * @throws CorruptIndexException if the index is corrupt
0829:             * @throws IOException if there is a low-level IO error
0830:             */
0831:            public final synchronized void undeleteAll()
0832:                    throws StaleReaderException, CorruptIndexException,
0833:                    LockObtainFailedException, IOException {
0834:                ensureOpen();
0835:                acquireWriteLock();
0836:                hasChanges = true;
0837:                doUndeleteAll();
0838:            }
0839:
0840:            /** Implements actual undeleteAll() in subclass. */
0841:            protected abstract void doUndeleteAll()
0842:                    throws CorruptIndexException, IOException;
0843:
0844:            /** Does nothing by default. Subclasses that require a write lock for
0845:             *  index modifications must implement this method. */
0846:            protected synchronized void acquireWriteLock() throws IOException {
0847:                /* NOOP */
0848:            }
0849:
0850:            /**
0851:             * 
0852:             * @throws IOException
0853:             */
0854:            public final synchronized void flush() throws IOException {
0855:                ensureOpen();
0856:                commit();
0857:            }
0858:
0859:            /**
0860:             * Commit changes resulting from delete, undeleteAll, or
0861:             * setNorm operations
0862:             *
0863:             * If an exception is hit, then either no changes or all
0864:             * changes will have been committed to the index
0865:             * (transactional semantics).
0866:             * @throws IOException if there is a low-level IO error
0867:             */
0868:            protected final synchronized void commit() throws IOException {
0869:                if (hasChanges) {
0870:                    doCommit();
0871:                }
0872:                hasChanges = false;
0873:            }
0874:
0875:            /** Implements commit. */
0876:            protected abstract void doCommit() throws IOException;
0877:
0878:            /**
0879:             * Closes files associated with this index.
0880:             * Also saves any new deletions to disk.
0881:             * No other methods should be called after this has been called.
0882:             * @throws IOException if there is a low-level IO error
0883:             */
0884:            public final synchronized void close() throws IOException {
0885:                if (!closed) {
0886:                    decRef();
0887:                    closed = true;
0888:                }
0889:            }
0890:
0891:            /** Implements close. */
0892:            protected abstract void doClose() throws IOException;
0893:
0894:            /**
0895:             * Get a list of unique field names that exist in this index and have the specified
0896:             * field option information.
0897:             * @param fldOption specifies which field option should be available for the returned fields
0898:             * @return Collection of Strings indicating the names of the fields.
0899:             * @see IndexReader.FieldOption
0900:             */
0901:            public abstract Collection getFieldNames(FieldOption fldOption);
0902:
0903:            /**
0904:             * Returns <code>true</code> iff the index in the named directory is
0905:             * currently locked.
0906:             * @param directory the directory to check for a lock
0907:             * @throws IOException if there is a low-level IO error
0908:             */
0909:            public static boolean isLocked(Directory directory)
0910:                    throws IOException {
0911:                return directory.makeLock(IndexWriter.WRITE_LOCK_NAME)
0912:                        .isLocked();
0913:            }
0914:
0915:            /**
0916:             * Returns <code>true</code> iff the index in the named directory is
0917:             * currently locked.
0918:             * @param directory the directory to check for a lock
0919:             * @throws IOException if there is a low-level IO error
0920:             */
0921:            public static boolean isLocked(String directory) throws IOException {
0922:                Directory dir = FSDirectory.getDirectory(directory);
0923:                boolean result = isLocked(dir);
0924:                dir.close();
0925:                return result;
0926:            }
0927:
0928:            /**
0929:             * Forcibly unlocks the index in the named directory.
0930:             * <P>
0931:             * Caution: this should only be used by failure recovery code,
0932:             * when it is known that no other process nor thread is in fact
0933:             * currently accessing this index.
0934:             */
0935:            public static void unlock(Directory directory) throws IOException {
0936:                directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
0937:            }
0938:
0939:            /**
0940:             * Prints the filename and size of each file within a given compound file.
0941:             * Add the -extract flag to extract files to the current working directory.
0942:             * In order to make the extracted version of the index work, you have to copy
0943:             * the segments file from the compound index into the directory where the extracted files are stored.
0944:             * @param args Usage: org.apache.lucene.index.IndexReader [-extract] &lt;cfsfile&gt;
0945:             */
0946:            public static void main(String[] args) {
0947:                String filename = null;
0948:                boolean extract = false;
0949:
0950:                for (int i = 0; i < args.length; ++i) {
0951:                    if (args[i].equals("-extract")) {
0952:                        extract = true;
0953:                    } else if (filename == null) {
0954:                        filename = args[i];
0955:                    }
0956:                }
0957:
0958:                if (filename == null) {
0959:                    System.out
0960:                            .println("Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile>");
0961:                    return;
0962:                }
0963:
0964:                Directory dir = null;
0965:                CompoundFileReader cfr = null;
0966:
0967:                try {
0968:                    File file = new File(filename);
0969:                    String dirname = file.getAbsoluteFile().getParent();
0970:                    filename = file.getName();
0971:                    dir = FSDirectory.getDirectory(dirname);
0972:                    cfr = new CompoundFileReader(dir, filename);
0973:
0974:                    String[] files = cfr.list();
0975:                    Arrays.sort(files); // sort the array of filename so that the output is more readable
0976:
0977:                    for (int i = 0; i < files.length; ++i) {
0978:                        long len = cfr.fileLength(files[i]);
0979:
0980:                        if (extract) {
0981:                            System.out.println("extract " + files[i] + " with "
0982:                                    + len + " bytes to local directory...");
0983:                            IndexInput ii = cfr.openInput(files[i]);
0984:
0985:                            FileOutputStream f = new FileOutputStream(files[i]);
0986:
0987:                            // read and write with a small buffer, which is more effectiv than reading byte by byte
0988:                            byte[] buffer = new byte[1024];
0989:                            int chunk = buffer.length;
0990:                            while (len > 0) {
0991:                                final int bufLen = (int) Math.min(chunk, len);
0992:                                ii.readBytes(buffer, 0, bufLen);
0993:                                f.write(buffer, 0, bufLen);
0994:                                len -= bufLen;
0995:                            }
0996:
0997:                            f.close();
0998:                            ii.close();
0999:                        } else
1000:                            System.out
1001:                                    .println(files[i] + ": " + len + " bytes");
1002:                    }
1003:                } catch (IOException ioe) {
1004:                    ioe.printStackTrace();
1005:                } finally {
1006:                    try {
1007:                        if (dir != null)
1008:                            dir.close();
1009:                        if (cfr != null)
1010:                            cfr.close();
1011:                    } catch (IOException ioe) {
1012:                        ioe.printStackTrace();
1013:                    }
1014:                }
1015:            }
1016:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.