Source Code Cross Referenced for SegmentReader.java in  » Net » lucene-connector » org » apache » lucene » index » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Net » lucene connector » org.apache.lucene.index 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        package org.apache.lucene.index;
0002:
0003:        /**
0004:         * Licensed to the Apache Software Foundation (ASF) under one or more
0005:         * contributor license agreements.  See the NOTICE file distributed with
0006:         * this work for additional information regarding copyright ownership.
0007:         * The ASF licenses this file to You under the Apache License, Version 2.0
0008:         * (the "License"); you may not use this file except in compliance with
0009:         * the License.  You may obtain a copy of the License at
0010:         *
0011:         *     http://www.apache.org/licenses/LICENSE-2.0
0012:         *
0013:         * Unless required by applicable law or agreed to in writing, software
0014:         * distributed under the License is distributed on an "AS IS" BASIS,
0015:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016:         * See the License for the specific language governing permissions and
0017:         * limitations under the License.
0018:         */
0019:
0020:        import java.io.IOException;
0021:        import java.util.Arrays;
0022:        import java.util.Collection;
0023:        import java.util.HashMap;
0024:        import java.util.HashSet;
0025:        import java.util.Iterator;
0026:        import java.util.Map;
0027:        import java.util.Set;
0028:        import java.util.Vector;
0029:
0030:        import org.apache.lucene.document.Document;
0031:        import org.apache.lucene.document.FieldSelector;
0032:        import org.apache.lucene.search.DefaultSimilarity;
0033:        import org.apache.lucene.store.BufferedIndexInput;
0034:        import org.apache.lucene.store.Directory;
0035:        import org.apache.lucene.store.IndexInput;
0036:        import org.apache.lucene.store.IndexOutput;
0037:        import org.apache.lucene.util.BitVector;
0038:
0039:        /**
0040:         * @version $Id: SegmentReader.java 603061 2007-12-10 21:49:41Z gsingers $
0041:         */
0042:        class SegmentReader extends DirectoryIndexReader {
0043:            private String segment;
0044:            private SegmentInfo si;
0045:            private int readBufferSize;
0046:
0047:            FieldInfos fieldInfos;
0048:            private FieldsReader fieldsReader;
0049:
0050:            TermInfosReader tis;
0051:            TermVectorsReader termVectorsReaderOrig = null;
0052:            ThreadLocal termVectorsLocal = new ThreadLocal();
0053:
0054:            BitVector deletedDocs = null;
0055:            private boolean deletedDocsDirty = false;
0056:            private boolean normsDirty = false;
0057:            private boolean undeleteAll = false;
0058:
0059:            private boolean rollbackDeletedDocsDirty = false;
0060:            private boolean rollbackNormsDirty = false;
0061:            private boolean rollbackUndeleteAll = false;
0062:
0063:            IndexInput freqStream;
0064:            IndexInput proxStream;
0065:
0066:            // optionally used for the .nrm file shared by multiple norms
0067:            private IndexInput singleNormStream;
0068:
0069:            // Compound File Reader when based on a compound file segment
0070:            CompoundFileReader cfsReader = null;
0071:            CompoundFileReader storeCFSReader = null;
0072:
0073:            // indicates the SegmentReader with which the resources are being shared,
0074:            // in case this is a re-opened reader
0075:            private SegmentReader referencedSegmentReader = null;
0076:
0077:            private class Norm {
0078:                volatile int refCount;
0079:                boolean useSingleNormStream;
0080:
0081:                public synchronized void incRef() {
0082:                    assert refCount > 0;
0083:                    refCount++;
0084:                }
0085:
0086:                public synchronized void decRef() throws IOException {
0087:                    assert refCount > 0;
0088:                    if (refCount == 1) {
0089:                        close();
0090:                    }
0091:                    refCount--;
0092:
0093:                }
0094:
0095:                public Norm(IndexInput in, boolean useSingleNormStream,
0096:                        int number, long normSeek) {
0097:                    refCount = 1;
0098:                    this .in = in;
0099:                    this .number = number;
0100:                    this .normSeek = normSeek;
0101:                    this .useSingleNormStream = useSingleNormStream;
0102:                }
0103:
0104:                private IndexInput in;
0105:                private byte[] bytes;
0106:                private boolean dirty;
0107:                private int number;
0108:                private long normSeek;
0109:                private boolean rollbackDirty;
0110:
0111:                private void reWrite(SegmentInfo si) throws IOException {
0112:                    // NOTE: norms are re-written in regular directory, not cfs
0113:                    si.advanceNormGen(this .number);
0114:                    IndexOutput out = directory().createOutput(
0115:                            si.getNormFileName(this .number));
0116:                    try {
0117:                        out.writeBytes(bytes, maxDoc());
0118:                    } finally {
0119:                        out.close();
0120:                    }
0121:                    this .dirty = false;
0122:                }
0123:
0124:                /** Closes the underlying IndexInput for this norm.
0125:                 * It is still valid to access all other norm properties after close is called.
0126:                 * @throws IOException
0127:                 */
0128:                private synchronized void close() throws IOException {
0129:                    if (in != null && !useSingleNormStream) {
0130:                        in.close();
0131:                    }
0132:                    in = null;
0133:                }
0134:            }
0135:
0136:            /**
0137:             * Increments the RC of this reader, as well as
0138:             * of all norms this reader is using
0139:             */
0140:            protected synchronized void incRef() {
0141:                super .incRef();
0142:                Iterator it = norms.values().iterator();
0143:                while (it.hasNext()) {
0144:                    Norm norm = (Norm) it.next();
0145:                    norm.incRef();
0146:                }
0147:            }
0148:
0149:            /**
0150:             * only increments the RC of this reader, not tof 
0151:             * he norms. This is important whenever a reopen()
0152:             * creates a new SegmentReader that doesn't share
0153:             * the norms with this one 
0154:             */
0155:            private synchronized void incRefReaderNotNorms() {
0156:                super .incRef();
0157:            }
0158:
0159:            protected synchronized void decRef() throws IOException {
0160:                super .decRef();
0161:                Iterator it = norms.values().iterator();
0162:                while (it.hasNext()) {
0163:                    Norm norm = (Norm) it.next();
0164:                    norm.decRef();
0165:                }
0166:            }
0167:
0168:            private synchronized void decRefReaderNotNorms() throws IOException {
0169:                super .decRef();
0170:            }
0171:
0172:            Map norms = new HashMap();
0173:
0174:            /** The class which implements SegmentReader. */
0175:            private static Class IMPL;
0176:            static {
0177:                try {
0178:                    String name = System.getProperty(
0179:                            "org.apache.lucene.SegmentReader.class",
0180:                            SegmentReader.class.getName());
0181:                    IMPL = Class.forName(name);
0182:                } catch (ClassNotFoundException e) {
0183:                    throw new RuntimeException(
0184:                            "cannot load SegmentReader class: " + e, e);
0185:                } catch (SecurityException se) {
0186:                    try {
0187:                        IMPL = Class.forName(SegmentReader.class.getName());
0188:                    } catch (ClassNotFoundException e) {
0189:                        throw new RuntimeException(
0190:                                "cannot load default SegmentReader class: " + e,
0191:                                e);
0192:                    }
0193:                }
0194:            }
0195:
0196:            /**
0197:             * @throws CorruptIndexException if the index is corrupt
0198:             * @throws IOException if there is a low-level IO error
0199:             */
0200:            public static SegmentReader get(SegmentInfo si)
0201:                    throws CorruptIndexException, IOException {
0202:                return get(si.dir, si, null, false, false,
0203:                        BufferedIndexInput.BUFFER_SIZE, true);
0204:            }
0205:
0206:            /**
0207:             * @throws CorruptIndexException if the index is corrupt
0208:             * @throws IOException if there is a low-level IO error
0209:             */
0210:            static SegmentReader get(SegmentInfo si, boolean doOpenStores)
0211:                    throws CorruptIndexException, IOException {
0212:                return get(si.dir, si, null, false, false,
0213:                        BufferedIndexInput.BUFFER_SIZE, doOpenStores);
0214:            }
0215:
0216:            /**
0217:             * @throws CorruptIndexException if the index is corrupt
0218:             * @throws IOException if there is a low-level IO error
0219:             */
0220:            public static SegmentReader get(SegmentInfo si, int readBufferSize)
0221:                    throws CorruptIndexException, IOException {
0222:                return get(si.dir, si, null, false, false, readBufferSize, true);
0223:            }
0224:
0225:            /**
0226:             * @throws CorruptIndexException if the index is corrupt
0227:             * @throws IOException if there is a low-level IO error
0228:             */
0229:            static SegmentReader get(SegmentInfo si, int readBufferSize,
0230:                    boolean doOpenStores) throws CorruptIndexException,
0231:                    IOException {
0232:                return get(si.dir, si, null, false, false, readBufferSize,
0233:                        doOpenStores);
0234:            }
0235:
0236:            /**
0237:             * @throws CorruptIndexException if the index is corrupt
0238:             * @throws IOException if there is a low-level IO error
0239:             */
0240:            public static SegmentReader get(SegmentInfos sis, SegmentInfo si,
0241:                    boolean closeDir) throws CorruptIndexException, IOException {
0242:                return get(si.dir, si, sis, closeDir, true,
0243:                        BufferedIndexInput.BUFFER_SIZE, true);
0244:            }
0245:
0246:            /**
0247:             * @throws CorruptIndexException if the index is corrupt
0248:             * @throws IOException if there is a low-level IO error
0249:             */
0250:            public static SegmentReader get(Directory dir, SegmentInfo si,
0251:                    SegmentInfos sis, boolean closeDir, boolean ownDir,
0252:                    int readBufferSize) throws CorruptIndexException,
0253:                    IOException {
0254:                return get(dir, si, sis, closeDir, ownDir, readBufferSize, true);
0255:            }
0256:
0257:            /**
0258:             * @throws CorruptIndexException if the index is corrupt
0259:             * @throws IOException if there is a low-level IO error
0260:             */
0261:            public static SegmentReader get(Directory dir, SegmentInfo si,
0262:                    SegmentInfos sis, boolean closeDir, boolean ownDir,
0263:                    int readBufferSize, boolean doOpenStores)
0264:                    throws CorruptIndexException, IOException {
0265:                SegmentReader instance;
0266:                try {
0267:                    instance = (SegmentReader) IMPL.newInstance();
0268:                } catch (Exception e) {
0269:                    throw new RuntimeException(
0270:                            "cannot load SegmentReader class: " + e, e);
0271:                }
0272:                instance.init(dir, sis, closeDir);
0273:                instance.initialize(si, readBufferSize, doOpenStores);
0274:                return instance;
0275:            }
0276:
0277:            private void initialize(SegmentInfo si, int readBufferSize,
0278:                    boolean doOpenStores) throws CorruptIndexException,
0279:                    IOException {
0280:                segment = si.name;
0281:                this .si = si;
0282:                this .readBufferSize = readBufferSize;
0283:
0284:                boolean success = false;
0285:
0286:                try {
0287:                    // Use compound file directory for some files, if it exists
0288:                    Directory cfsDir = directory();
0289:                    if (si.getUseCompoundFile()) {
0290:                        cfsReader = new CompoundFileReader(directory(), segment
0291:                                + "." + IndexFileNames.COMPOUND_FILE_EXTENSION,
0292:                                readBufferSize);
0293:                        cfsDir = cfsReader;
0294:                    }
0295:
0296:                    final Directory storeDir;
0297:
0298:                    if (doOpenStores) {
0299:                        if (si.getDocStoreOffset() != -1) {
0300:                            if (si.getDocStoreIsCompoundFile()) {
0301:                                storeCFSReader = new CompoundFileReader(
0302:                                        directory(),
0303:                                        si.getDocStoreSegment()
0304:                                                + "."
0305:                                                + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION,
0306:                                        readBufferSize);
0307:                                storeDir = storeCFSReader;
0308:                            } else {
0309:                                storeDir = directory();
0310:                            }
0311:                        } else {
0312:                            storeDir = cfsDir;
0313:                        }
0314:                    } else
0315:                        storeDir = null;
0316:
0317:                    // No compound file exists - use the multi-file format
0318:                    fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
0319:
0320:                    final String fieldsSegment;
0321:
0322:                    if (si.getDocStoreOffset() != -1)
0323:                        fieldsSegment = si.getDocStoreSegment();
0324:                    else
0325:                        fieldsSegment = segment;
0326:
0327:                    if (doOpenStores) {
0328:                        fieldsReader = new FieldsReader(storeDir,
0329:                                fieldsSegment, fieldInfos, readBufferSize, si
0330:                                        .getDocStoreOffset(), si.docCount);
0331:
0332:                        // Verify two sources of "maxDoc" agree:
0333:                        if (si.getDocStoreOffset() == -1
0334:                                && fieldsReader.size() != si.docCount) {
0335:                            throw new CorruptIndexException(
0336:                                    "doc counts differ for segment " + si.name
0337:                                            + ": fieldsReader shows "
0338:                                            + fieldsReader.size()
0339:                                            + " but segmentInfo shows "
0340:                                            + si.docCount);
0341:                        }
0342:                    }
0343:
0344:                    tis = new TermInfosReader(cfsDir, segment, fieldInfos,
0345:                            readBufferSize);
0346:
0347:                    loadDeletedDocs();
0348:
0349:                    // make sure that all index files have been read or are kept open
0350:                    // so that if an index update removes them we'll still have them
0351:                    freqStream = cfsDir.openInput(segment + ".frq",
0352:                            readBufferSize);
0353:                    proxStream = cfsDir.openInput(segment + ".prx",
0354:                            readBufferSize);
0355:                    openNorms(cfsDir, readBufferSize);
0356:
0357:                    if (doOpenStores && fieldInfos.hasVectors()) { // open term vector files only as needed
0358:                        final String vectorsSegment;
0359:                        if (si.getDocStoreOffset() != -1)
0360:                            vectorsSegment = si.getDocStoreSegment();
0361:                        else
0362:                            vectorsSegment = segment;
0363:                        termVectorsReaderOrig = new TermVectorsReader(storeDir,
0364:                                vectorsSegment, fieldInfos, readBufferSize, si
0365:                                        .getDocStoreOffset(), si.docCount);
0366:                    }
0367:                    success = true;
0368:                } finally {
0369:
0370:                    // With lock-less commits, it's entirely possible (and
0371:                    // fine) to hit a FileNotFound exception above.  In
0372:                    // this case, we want to explicitly close any subset
0373:                    // of things that were opened so that we don't have to
0374:                    // wait for a GC to do so.
0375:                    if (!success) {
0376:                        doClose();
0377:                    }
0378:                }
0379:            }
0380:
0381:            private void loadDeletedDocs() throws IOException {
0382:                // NOTE: the bitvector is stored using the regular directory, not cfs
0383:                if (hasDeletions(si)) {
0384:                    deletedDocs = new BitVector(directory(), si
0385:                            .getDelFileName());
0386:
0387:                    // Verify # deletes does not exceed maxDoc for this segment:
0388:                    if (deletedDocs.count() > maxDoc()) {
0389:                        throw new CorruptIndexException("number of deletes ("
0390:                                + deletedDocs.count() + ") exceeds max doc ("
0391:                                + maxDoc() + ") for segment " + si.name);
0392:                    }
0393:                }
0394:            }
0395:
0396:            protected synchronized DirectoryIndexReader doReopen(
0397:                    SegmentInfos infos) throws CorruptIndexException,
0398:                    IOException {
0399:                DirectoryIndexReader newReader;
0400:
0401:                if (infos.size() == 1) {
0402:                    SegmentInfo si = infos.info(0);
0403:                    if (segment.equals(si.name)
0404:                            && si.getUseCompoundFile() == SegmentReader.this .si
0405:                                    .getUseCompoundFile()) {
0406:                        newReader = reopenSegment(si);
0407:                    } else {
0408:                        // segment not referenced anymore, reopen not possible
0409:                        // or segment format changed
0410:                        newReader = SegmentReader.get(infos, infos.info(0),
0411:                                false);
0412:                    }
0413:                } else {
0414:                    return new MultiSegmentReader(directory, infos,
0415:                            closeDirectory, new SegmentReader[] { this  }, null,
0416:                            null);
0417:                }
0418:
0419:                return newReader;
0420:            }
0421:
0422:            synchronized SegmentReader reopenSegment(SegmentInfo si)
0423:                    throws CorruptIndexException, IOException {
0424:                boolean deletionsUpToDate = (this .si.hasDeletions() == si
0425:                        .hasDeletions())
0426:                        && (!si.hasDeletions() || this .si.getDelFileName()
0427:                                .equals(si.getDelFileName()));
0428:                boolean normsUpToDate = true;
0429:
0430:                boolean[] fieldNormsChanged = new boolean[fieldInfos.size()];
0431:                if (normsUpToDate) {
0432:                    for (int i = 0; i < fieldInfos.size(); i++) {
0433:                        if (!this .si.getNormFileName(i).equals(
0434:                                si.getNormFileName(i))) {
0435:                            normsUpToDate = false;
0436:                            fieldNormsChanged[i] = true;
0437:                        }
0438:                    }
0439:                }
0440:
0441:                if (normsUpToDate && deletionsUpToDate) {
0442:                    return this ;
0443:                }
0444:
0445:                // clone reader
0446:                SegmentReader clone = new SegmentReader();
0447:                boolean success = false;
0448:                try {
0449:                    clone.directory = directory;
0450:                    clone.si = si;
0451:                    clone.segment = segment;
0452:                    clone.readBufferSize = readBufferSize;
0453:                    clone.cfsReader = cfsReader;
0454:                    clone.storeCFSReader = storeCFSReader;
0455:
0456:                    clone.fieldInfos = fieldInfos;
0457:                    clone.tis = tis;
0458:                    clone.freqStream = freqStream;
0459:                    clone.proxStream = proxStream;
0460:                    clone.termVectorsReaderOrig = termVectorsReaderOrig;
0461:
0462:                    // we have to open a new FieldsReader, because it is not thread-safe
0463:                    // and can thus not be shared among multiple SegmentReaders
0464:                    // TODO: Change this in case FieldsReader becomes thread-safe in the future
0465:                    final String fieldsSegment;
0466:
0467:                    Directory storeDir = directory();
0468:
0469:                    if (si.getDocStoreOffset() != -1) {
0470:                        fieldsSegment = si.getDocStoreSegment();
0471:                        if (storeCFSReader != null) {
0472:                            storeDir = storeCFSReader;
0473:                        }
0474:                    } else {
0475:                        fieldsSegment = segment;
0476:                        if (cfsReader != null) {
0477:                            storeDir = cfsReader;
0478:                        }
0479:                    }
0480:
0481:                    if (fieldsReader != null) {
0482:                        clone.fieldsReader = new FieldsReader(storeDir,
0483:                                fieldsSegment, fieldInfos, readBufferSize, si
0484:                                        .getDocStoreOffset(), si.docCount);
0485:                    }
0486:
0487:                    if (!deletionsUpToDate) {
0488:                        // load deleted docs
0489:                        clone.deletedDocs = null;
0490:                        clone.loadDeletedDocs();
0491:                    } else {
0492:                        clone.deletedDocs = this .deletedDocs;
0493:                    }
0494:
0495:                    clone.norms = new HashMap();
0496:                    if (!normsUpToDate) {
0497:                        // load norms
0498:                        for (int i = 0; i < fieldNormsChanged.length; i++) {
0499:                            // copy unchanged norms to the cloned reader and incRef those norms
0500:                            if (!fieldNormsChanged[i]) {
0501:                                String curField = fieldInfos.fieldInfo(i).name;
0502:                                Norm norm = (Norm) this .norms.get(curField);
0503:                                norm.incRef();
0504:                                clone.norms.put(curField, norm);
0505:                            }
0506:                        }
0507:
0508:                        clone.openNorms(si.getUseCompoundFile() ? cfsReader
0509:                                : directory(), readBufferSize);
0510:                    } else {
0511:                        Iterator it = norms.keySet().iterator();
0512:                        while (it.hasNext()) {
0513:                            String field = (String) it.next();
0514:                            Norm norm = (Norm) norms.get(field);
0515:                            norm.incRef();
0516:                            clone.norms.put(field, norm);
0517:                        }
0518:                    }
0519:
0520:                    if (clone.singleNormStream == null) {
0521:                        for (int i = 0; i < fieldInfos.size(); i++) {
0522:                            FieldInfo fi = fieldInfos.fieldInfo(i);
0523:                            if (fi.isIndexed && !fi.omitNorms) {
0524:                                Directory d = si.getUseCompoundFile() ? cfsReader
0525:                                        : directory();
0526:                                String fileName = si.getNormFileName(fi.number);
0527:                                if (si.hasSeparateNorms(fi.number)) {
0528:                                    continue;
0529:                                }
0530:
0531:                                if (fileName.endsWith("."
0532:                                        + IndexFileNames.NORMS_EXTENSION)) {
0533:                                    clone.singleNormStream = d.openInput(
0534:                                            fileName, readBufferSize);
0535:                                    break;
0536:                                }
0537:                            }
0538:                        }
0539:                    }
0540:
0541:                    success = true;
0542:                } finally {
0543:                    if (this .referencedSegmentReader != null) {
0544:                        // this reader shares resources with another SegmentReader,
0545:                        // so we increment the other readers refCount. We don't
0546:                        // increment the refCount of the norms because we did
0547:                        // that already for the shared norms
0548:                        clone.referencedSegmentReader = this .referencedSegmentReader;
0549:                        referencedSegmentReader.incRefReaderNotNorms();
0550:                    } else {
0551:                        // this reader wasn't reopened, so we increment this
0552:                        // readers refCount
0553:                        clone.referencedSegmentReader = this ;
0554:                        incRefReaderNotNorms();
0555:                    }
0556:
0557:                    if (!success) {
0558:                        // An exception occured during reopen, we have to decRef the norms
0559:                        // that we incRef'ed already and close singleNormsStream and FieldsReader
0560:                        clone.decRef();
0561:                    }
0562:                }
0563:
0564:                return clone;
0565:            }
0566:
0567:            protected void commitChanges() throws IOException {
0568:                if (deletedDocsDirty) { // re-write deleted
0569:                    si.advanceDelGen();
0570:
0571:                    // We can write directly to the actual name (vs to a
0572:                    // .tmp & renaming it) because the file is not live
0573:                    // until segments file is written:
0574:                    deletedDocs.write(directory(), si.getDelFileName());
0575:                }
0576:                if (undeleteAll && si.hasDeletions()) {
0577:                    si.clearDelGen();
0578:                }
0579:                if (normsDirty) { // re-write norms
0580:                    si.setNumFields(fieldInfos.size());
0581:                    Iterator it = norms.values().iterator();
0582:                    while (it.hasNext()) {
0583:                        Norm norm = (Norm) it.next();
0584:                        if (norm.dirty) {
0585:                            norm.reWrite(si);
0586:                        }
0587:                    }
0588:                }
0589:                deletedDocsDirty = false;
0590:                normsDirty = false;
0591:                undeleteAll = false;
0592:            }
0593:
0594:            FieldsReader getFieldsReader() {
0595:                return fieldsReader;
0596:            }
0597:
0598:            protected void doClose() throws IOException {
0599:                boolean hasReferencedReader = (referencedSegmentReader != null);
0600:
0601:                if (hasReferencedReader) {
0602:                    referencedSegmentReader.decRefReaderNotNorms();
0603:                    referencedSegmentReader = null;
0604:                }
0605:
0606:                deletedDocs = null;
0607:
0608:                // close the single norms stream
0609:                if (singleNormStream != null) {
0610:                    // we can close this stream, even if the norms
0611:                    // are shared, because every reader has it's own 
0612:                    // singleNormStream
0613:                    singleNormStream.close();
0614:                    singleNormStream = null;
0615:                }
0616:
0617:                // re-opened SegmentReaders have their own instance of FieldsReader
0618:                if (fieldsReader != null) {
0619:                    fieldsReader.close();
0620:                }
0621:
0622:                if (!hasReferencedReader) {
0623:                    // close everything, nothing is shared anymore with other readers
0624:                    if (tis != null) {
0625:                        tis.close();
0626:                    }
0627:
0628:                    if (freqStream != null)
0629:                        freqStream.close();
0630:                    if (proxStream != null)
0631:                        proxStream.close();
0632:
0633:                    if (termVectorsReaderOrig != null)
0634:                        termVectorsReaderOrig.close();
0635:
0636:                    if (cfsReader != null)
0637:                        cfsReader.close();
0638:
0639:                    if (storeCFSReader != null)
0640:                        storeCFSReader.close();
0641:
0642:                    // maybe close directory
0643:                    super .doClose();
0644:                }
0645:            }
0646:
0647:            static boolean hasDeletions(SegmentInfo si) throws IOException {
0648:                // Don't call ensureOpen() here (it could affect performance)
0649:                return si.hasDeletions();
0650:            }
0651:
0652:            public boolean hasDeletions() {
0653:                // Don't call ensureOpen() here (it could affect performance)
0654:                return deletedDocs != null;
0655:            }
0656:
0657:            static boolean usesCompoundFile(SegmentInfo si) throws IOException {
0658:                return si.getUseCompoundFile();
0659:            }
0660:
0661:            static boolean hasSeparateNorms(SegmentInfo si) throws IOException {
0662:                return si.hasSeparateNorms();
0663:            }
0664:
0665:            protected void doDelete(int docNum) {
0666:                if (deletedDocs == null)
0667:                    deletedDocs = new BitVector(maxDoc());
0668:                deletedDocsDirty = true;
0669:                undeleteAll = false;
0670:                deletedDocs.set(docNum);
0671:            }
0672:
0673:            protected void doUndeleteAll() {
0674:                deletedDocs = null;
0675:                deletedDocsDirty = false;
0676:                undeleteAll = true;
0677:            }
0678:
0679:            Vector files() throws IOException {
0680:                return new Vector(si.files());
0681:            }
0682:
0683:            public TermEnum terms() {
0684:                ensureOpen();
0685:                return tis.terms();
0686:            }
0687:
0688:            public TermEnum terms(Term t) throws IOException {
0689:                ensureOpen();
0690:                return tis.terms(t);
0691:            }
0692:
0693:            FieldInfos getFieldInfos() {
0694:                return fieldInfos;
0695:            }
0696:
0697:            /**
0698:             * @throws CorruptIndexException if the index is corrupt
0699:             * @throws IOException if there is a low-level IO error
0700:             */
0701:            public synchronized Document document(int n,
0702:                    FieldSelector fieldSelector) throws CorruptIndexException,
0703:                    IOException {
0704:                ensureOpen();
0705:                if (isDeleted(n))
0706:                    throw new IllegalArgumentException(
0707:                            "attempt to access a deleted document");
0708:                return fieldsReader.doc(n, fieldSelector);
0709:            }
0710:
0711:            public synchronized boolean isDeleted(int n) {
0712:                return (deletedDocs != null && deletedDocs.get(n));
0713:            }
0714:
0715:            public TermDocs termDocs() throws IOException {
0716:                ensureOpen();
0717:                return new SegmentTermDocs(this );
0718:            }
0719:
0720:            public TermPositions termPositions() throws IOException {
0721:                ensureOpen();
0722:                return new SegmentTermPositions(this );
0723:            }
0724:
0725:            public int docFreq(Term t) throws IOException {
0726:                ensureOpen();
0727:                TermInfo ti = tis.get(t);
0728:                if (ti != null)
0729:                    return ti.docFreq;
0730:                else
0731:                    return 0;
0732:            }
0733:
0734:            public int numDocs() {
0735:                // Don't call ensureOpen() here (it could affect performance)
0736:                int n = maxDoc();
0737:                if (deletedDocs != null)
0738:                    n -= deletedDocs.count();
0739:                return n;
0740:            }
0741:
0742:            public int maxDoc() {
0743:                // Don't call ensureOpen() here (it could affect performance)
0744:                return si.docCount;
0745:            }
0746:
0747:            public void setTermInfosIndexDivisor(int indexDivisor)
0748:                    throws IllegalStateException {
0749:                tis.setIndexDivisor(indexDivisor);
0750:            }
0751:
0752:            public int getTermInfosIndexDivisor() {
0753:                return tis.getIndexDivisor();
0754:            }
0755:
0756:            /**
0757:             * @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
0758:             */
0759:            public Collection getFieldNames(IndexReader.FieldOption fieldOption) {
0760:                ensureOpen();
0761:
0762:                Set fieldSet = new HashSet();
0763:                for (int i = 0; i < fieldInfos.size(); i++) {
0764:                    FieldInfo fi = fieldInfos.fieldInfo(i);
0765:                    if (fieldOption == IndexReader.FieldOption.ALL) {
0766:                        fieldSet.add(fi.name);
0767:                    } else if (!fi.isIndexed
0768:                            && fieldOption == IndexReader.FieldOption.UNINDEXED) {
0769:                        fieldSet.add(fi.name);
0770:                    } else if (fi.storePayloads
0771:                            && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
0772:                        fieldSet.add(fi.name);
0773:                    } else if (fi.isIndexed
0774:                            && fieldOption == IndexReader.FieldOption.INDEXED) {
0775:                        fieldSet.add(fi.name);
0776:                    } else if (fi.isIndexed
0777:                            && fi.storeTermVector == false
0778:                            && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
0779:                        fieldSet.add(fi.name);
0780:                    } else if (fi.storeTermVector == true
0781:                            && fi.storePositionWithTermVector == false
0782:                            && fi.storeOffsetWithTermVector == false
0783:                            && fieldOption == IndexReader.FieldOption.TERMVECTOR) {
0784:                        fieldSet.add(fi.name);
0785:                    } else if (fi.isIndexed
0786:                            && fi.storeTermVector
0787:                            && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
0788:                        fieldSet.add(fi.name);
0789:                    } else if (fi.storePositionWithTermVector
0790:                            && fi.storeOffsetWithTermVector == false
0791:                            && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
0792:                        fieldSet.add(fi.name);
0793:                    } else if (fi.storeOffsetWithTermVector
0794:                            && fi.storePositionWithTermVector == false
0795:                            && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
0796:                        fieldSet.add(fi.name);
0797:                    } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector)
0798:                            && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
0799:                        fieldSet.add(fi.name);
0800:                    }
0801:                }
0802:                return fieldSet;
0803:            }
0804:
0805:            public synchronized boolean hasNorms(String field) {
0806:                ensureOpen();
0807:                return norms.containsKey(field);
0808:            }
0809:
0810:            static byte[] createFakeNorms(int size) {
0811:                byte[] ones = new byte[size];
0812:                Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f));
0813:                return ones;
0814:            }
0815:
0816:            private byte[] ones;
0817:
0818:            private byte[] fakeNorms() {
0819:                if (ones == null)
0820:                    ones = createFakeNorms(maxDoc());
0821:                return ones;
0822:            }
0823:
0824:            // can return null if norms aren't stored
0825:            protected synchronized byte[] getNorms(String field)
0826:                    throws IOException {
0827:                Norm norm = (Norm) norms.get(field);
0828:                if (norm == null)
0829:                    return null; // not indexed, or norms not stored
0830:                synchronized (norm) {
0831:                    if (norm.bytes == null) { // value not yet read
0832:                        byte[] bytes = new byte[maxDoc()];
0833:                        norms(field, bytes, 0);
0834:                        norm.bytes = bytes; // cache it
0835:                        // it's OK to close the underlying IndexInput as we have cached the
0836:                        // norms and will never read them again.
0837:                        norm.close();
0838:                    }
0839:                    return norm.bytes;
0840:                }
0841:            }
0842:
0843:            // returns fake norms if norms aren't available
0844:            public synchronized byte[] norms(String field) throws IOException {
0845:                ensureOpen();
0846:                byte[] bytes = getNorms(field);
0847:                if (bytes == null)
0848:                    bytes = fakeNorms();
0849:                return bytes;
0850:            }
0851:
0852:            protected void doSetNorm(int doc, String field, byte value)
0853:                    throws IOException {
0854:                Norm norm = (Norm) norms.get(field);
0855:                if (norm == null) // not an indexed field
0856:                    return;
0857:
0858:                norm.dirty = true; // mark it dirty
0859:                normsDirty = true;
0860:
0861:                norms(field)[doc] = value; // set the value
0862:            }
0863:
0864:            /** Read norms into a pre-allocated array. */
0865:            public synchronized void norms(String field, byte[] bytes,
0866:                    int offset) throws IOException {
0867:
0868:                ensureOpen();
0869:                Norm norm = (Norm) norms.get(field);
0870:                if (norm == null) {
0871:                    System.arraycopy(fakeNorms(), 0, bytes, offset, maxDoc());
0872:                    return;
0873:                }
0874:
0875:                synchronized (norm) {
0876:                    if (norm.bytes != null) { // can copy from cache
0877:                        System
0878:                                .arraycopy(norm.bytes, 0, bytes, offset,
0879:                                        maxDoc());
0880:                        return;
0881:                    }
0882:
0883:                    // Read from disk.  norm.in may be shared across  multiple norms and
0884:                    // should only be used in a synchronized context.
0885:                    IndexInput normStream;
0886:                    if (norm.useSingleNormStream) {
0887:                        normStream = singleNormStream;
0888:                    } else {
0889:                        normStream = norm.in;
0890:                    }
0891:                    normStream.seek(norm.normSeek);
0892:                    normStream.readBytes(bytes, offset, maxDoc());
0893:                }
0894:            }
0895:
0896:            private void openNorms(Directory cfsDir, int readBufferSize)
0897:                    throws IOException {
0898:                long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
0899:                int maxDoc = maxDoc();
0900:                for (int i = 0; i < fieldInfos.size(); i++) {
0901:                    FieldInfo fi = fieldInfos.fieldInfo(i);
0902:                    if (norms.containsKey(fi.name)) {
0903:                        // in case this SegmentReader is being re-opened, we might be able to
0904:                        // reuse some norm instances and skip loading them here
0905:                        continue;
0906:                    }
0907:                    if (fi.isIndexed && !fi.omitNorms) {
0908:                        Directory d = directory();
0909:                        String fileName = si.getNormFileName(fi.number);
0910:                        if (!si.hasSeparateNorms(fi.number)) {
0911:                            d = cfsDir;
0912:                        }
0913:
0914:                        // singleNormFile means multiple norms share this file
0915:                        boolean singleNormFile = fileName.endsWith("."
0916:                                + IndexFileNames.NORMS_EXTENSION);
0917:                        IndexInput normInput = null;
0918:                        long normSeek;
0919:
0920:                        if (singleNormFile) {
0921:                            normSeek = nextNormSeek;
0922:                            if (singleNormStream == null) {
0923:                                singleNormStream = d.openInput(fileName,
0924:                                        readBufferSize);
0925:                            }
0926:                            // All norms in the .nrm file can share a single IndexInput since
0927:                            // they are only used in a synchronized context.
0928:                            // If this were to change in the future, a clone could be done here.
0929:                            normInput = singleNormStream;
0930:                        } else {
0931:                            normSeek = 0;
0932:                            normInput = d.openInput(fileName);
0933:                        }
0934:
0935:                        norms.put(fi.name, new Norm(normInput, singleNormFile,
0936:                                fi.number, normSeek));
0937:                        nextNormSeek += maxDoc; // increment also if some norms are separate
0938:                    }
0939:                }
0940:            }
0941:
0942:            // for testing only
0943:            boolean normsClosed() {
0944:                if (singleNormStream != null) {
0945:                    return false;
0946:                }
0947:                Iterator it = norms.values().iterator();
0948:                while (it.hasNext()) {
0949:                    Norm norm = (Norm) it.next();
0950:                    if (norm.refCount > 0) {
0951:                        return false;
0952:                    }
0953:                }
0954:                return true;
0955:            }
0956:
0957:            // for testing only
0958:            boolean normsClosed(String field) {
0959:                Norm norm = (Norm) norms.get(field);
0960:                return norm.refCount == 0;
0961:            }
0962:
0963:            /**
0964:             * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
0965:             * @return TermVectorsReader
0966:             */
0967:            private TermVectorsReader getTermVectorsReader() {
0968:                TermVectorsReader tvReader = (TermVectorsReader) termVectorsLocal
0969:                        .get();
0970:                if (tvReader == null) {
0971:                    tvReader = (TermVectorsReader) termVectorsReaderOrig
0972:                            .clone();
0973:                    termVectorsLocal.set(tvReader);
0974:                }
0975:                return tvReader;
0976:            }
0977:
0978:            /** Return a term frequency vector for the specified document and field. The
0979:             *  vector returned contains term numbers and frequencies for all terms in
0980:             *  the specified field of this document, if the field had storeTermVector
0981:             *  flag set.  If the flag was not set, the method returns null.
0982:             * @throws IOException
0983:             */
0984:            public TermFreqVector getTermFreqVector(int docNumber, String field)
0985:                    throws IOException {
0986:                // Check if this field is invalid or has no stored term vector
0987:                ensureOpen();
0988:                FieldInfo fi = fieldInfos.fieldInfo(field);
0989:                if (fi == null || !fi.storeTermVector
0990:                        || termVectorsReaderOrig == null)
0991:                    return null;
0992:
0993:                TermVectorsReader termVectorsReader = getTermVectorsReader();
0994:                if (termVectorsReader == null)
0995:                    return null;
0996:
0997:                return termVectorsReader.get(docNumber, field);
0998:            }
0999:
1000:            public void getTermFreqVector(int docNumber, String field,
1001:                    TermVectorMapper mapper) throws IOException {
1002:                ensureOpen();
1003:                FieldInfo fi = fieldInfos.fieldInfo(field);
1004:                if (fi == null || !fi.storeTermVector
1005:                        || termVectorsReaderOrig == null)
1006:                    return;
1007:
1008:                TermVectorsReader termVectorsReader = getTermVectorsReader();
1009:                if (termVectorsReader == null) {
1010:                    return;
1011:                }
1012:
1013:                termVectorsReader.get(docNumber, field, mapper);
1014:            }
1015:
1016:            public void getTermFreqVector(int docNumber, TermVectorMapper mapper)
1017:                    throws IOException {
1018:                ensureOpen();
1019:                if (termVectorsReaderOrig == null)
1020:                    return;
1021:
1022:                TermVectorsReader termVectorsReader = getTermVectorsReader();
1023:                if (termVectorsReader == null)
1024:                    return;
1025:
1026:                termVectorsReader.get(docNumber, mapper);
1027:            }
1028:
1029:            /** Return an array of term frequency vectors for the specified document.
1030:             *  The array contains a vector for each vectorized field in the document.
1031:             *  Each vector vector contains term numbers and frequencies for all terms
1032:             *  in a given vectorized field.
1033:             *  If no such fields existed, the method returns null.
1034:             * @throws IOException
1035:             */
1036:            public TermFreqVector[] getTermFreqVectors(int docNumber)
1037:                    throws IOException {
1038:                ensureOpen();
1039:                if (termVectorsReaderOrig == null)
1040:                    return null;
1041:
1042:                TermVectorsReader termVectorsReader = getTermVectorsReader();
1043:                if (termVectorsReader == null)
1044:                    return null;
1045:
1046:                return termVectorsReader.get(docNumber);
1047:            }
1048:
1049:            /** Returns the field infos of this segment */
1050:            FieldInfos fieldInfos() {
1051:                return fieldInfos;
1052:            }
1053:
1054:            /**
1055:             * Return the name of the segment this reader is reading.
1056:             */
1057:            String getSegmentName() {
1058:                return segment;
1059:            }
1060:
1061:            /**
1062:             * Return the SegmentInfo of the segment this reader is reading.
1063:             */
1064:            SegmentInfo getSegmentInfo() {
1065:                return si;
1066:            }
1067:
1068:            void setSegmentInfo(SegmentInfo info) {
1069:                si = info;
1070:            }
1071:
1072:            void startCommit() {
1073:                super .startCommit();
1074:                rollbackDeletedDocsDirty = deletedDocsDirty;
1075:                rollbackNormsDirty = normsDirty;
1076:                rollbackUndeleteAll = undeleteAll;
1077:                Iterator it = norms.values().iterator();
1078:                while (it.hasNext()) {
1079:                    Norm norm = (Norm) it.next();
1080:                    norm.rollbackDirty = norm.dirty;
1081:                }
1082:            }
1083:
1084:            void rollbackCommit() {
1085:                super .rollbackCommit();
1086:                deletedDocsDirty = rollbackDeletedDocsDirty;
1087:                normsDirty = rollbackNormsDirty;
1088:                undeleteAll = rollbackUndeleteAll;
1089:                Iterator it = norms.values().iterator();
1090:                while (it.hasNext()) {
1091:                    Norm norm = (Norm) it.next();
1092:                    norm.dirty = norm.rollbackDirty;
1093:                }
1094:            }
1095:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.