0001: package org.apache.lucene.index;
0002:
0003: /**
0004: * Licensed to the Apache Software Foundation (ASF) under one or more
0005: * contributor license agreements. See the NOTICE file distributed with
0006: * this work for additional information regarding copyright ownership.
0007: * The ASF licenses this file to You under the Apache License, Version 2.0
0008: * (the "License"); you may not use this file except in compliance with
0009: * the License. You may obtain a copy of the License at
0010: *
0011: * http://www.apache.org/licenses/LICENSE-2.0
0012: *
0013: * Unless required by applicable law or agreed to in writing, software
0014: * distributed under the License is distributed on an "AS IS" BASIS,
0015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016: * See the License for the specific language governing permissions and
0017: * limitations under the License.
0018: */
0019:
0020: import java.io.IOException;
0021: import java.util.Arrays;
0022: import java.util.Collection;
0023: import java.util.HashMap;
0024: import java.util.HashSet;
0025: import java.util.Iterator;
0026: import java.util.Map;
0027: import java.util.Set;
0028: import java.util.Vector;
0029:
0030: import org.apache.lucene.document.Document;
0031: import org.apache.lucene.document.FieldSelector;
0032: import org.apache.lucene.search.DefaultSimilarity;
0033: import org.apache.lucene.store.BufferedIndexInput;
0034: import org.apache.lucene.store.Directory;
0035: import org.apache.lucene.store.IndexInput;
0036: import org.apache.lucene.store.IndexOutput;
0037: import org.apache.lucene.util.BitVector;
0038:
0039: /**
0040: * @version $Id: SegmentReader.java 603061 2007-12-10 21:49:41Z gsingers $
0041: */
0042: class SegmentReader extends DirectoryIndexReader {
0043: private String segment;
0044: private SegmentInfo si;
0045: private int readBufferSize;
0046:
0047: FieldInfos fieldInfos;
0048: private FieldsReader fieldsReader;
0049:
0050: TermInfosReader tis;
0051: TermVectorsReader termVectorsReaderOrig = null;
0052: ThreadLocal termVectorsLocal = new ThreadLocal();
0053:
0054: BitVector deletedDocs = null;
0055: private boolean deletedDocsDirty = false;
0056: private boolean normsDirty = false;
0057: private boolean undeleteAll = false;
0058:
0059: private boolean rollbackDeletedDocsDirty = false;
0060: private boolean rollbackNormsDirty = false;
0061: private boolean rollbackUndeleteAll = false;
0062:
0063: IndexInput freqStream;
0064: IndexInput proxStream;
0065:
0066: // optionally used for the .nrm file shared by multiple norms
0067: private IndexInput singleNormStream;
0068:
0069: // Compound File Reader when based on a compound file segment
0070: CompoundFileReader cfsReader = null;
0071: CompoundFileReader storeCFSReader = null;
0072:
0073: // indicates the SegmentReader with which the resources are being shared,
0074: // in case this is a re-opened reader
0075: private SegmentReader referencedSegmentReader = null;
0076:
0077: private class Norm {
0078: volatile int refCount;
0079: boolean useSingleNormStream;
0080:
0081: public synchronized void incRef() {
0082: assert refCount > 0;
0083: refCount++;
0084: }
0085:
0086: public synchronized void decRef() throws IOException {
0087: assert refCount > 0;
0088: if (refCount == 1) {
0089: close();
0090: }
0091: refCount--;
0092:
0093: }
0094:
0095: public Norm(IndexInput in, boolean useSingleNormStream,
0096: int number, long normSeek) {
0097: refCount = 1;
0098: this .in = in;
0099: this .number = number;
0100: this .normSeek = normSeek;
0101: this .useSingleNormStream = useSingleNormStream;
0102: }
0103:
0104: private IndexInput in;
0105: private byte[] bytes;
0106: private boolean dirty;
0107: private int number;
0108: private long normSeek;
0109: private boolean rollbackDirty;
0110:
0111: private void reWrite(SegmentInfo si) throws IOException {
0112: // NOTE: norms are re-written in regular directory, not cfs
0113: si.advanceNormGen(this .number);
0114: IndexOutput out = directory().createOutput(
0115: si.getNormFileName(this .number));
0116: try {
0117: out.writeBytes(bytes, maxDoc());
0118: } finally {
0119: out.close();
0120: }
0121: this .dirty = false;
0122: }
0123:
0124: /** Closes the underlying IndexInput for this norm.
0125: * It is still valid to access all other norm properties after close is called.
0126: * @throws IOException
0127: */
0128: private synchronized void close() throws IOException {
0129: if (in != null && !useSingleNormStream) {
0130: in.close();
0131: }
0132: in = null;
0133: }
0134: }
0135:
0136: /**
0137: * Increments the RC of this reader, as well as
0138: * of all norms this reader is using
0139: */
0140: protected synchronized void incRef() {
0141: super .incRef();
0142: Iterator it = norms.values().iterator();
0143: while (it.hasNext()) {
0144: Norm norm = (Norm) it.next();
0145: norm.incRef();
0146: }
0147: }
0148:
0149: /**
0150: * only increments the RC of this reader, not tof
0151: * he norms. This is important whenever a reopen()
0152: * creates a new SegmentReader that doesn't share
0153: * the norms with this one
0154: */
0155: private synchronized void incRefReaderNotNorms() {
0156: super .incRef();
0157: }
0158:
0159: protected synchronized void decRef() throws IOException {
0160: super .decRef();
0161: Iterator it = norms.values().iterator();
0162: while (it.hasNext()) {
0163: Norm norm = (Norm) it.next();
0164: norm.decRef();
0165: }
0166: }
0167:
0168: private synchronized void decRefReaderNotNorms() throws IOException {
0169: super .decRef();
0170: }
0171:
0172: Map norms = new HashMap();
0173:
0174: /** The class which implements SegmentReader. */
0175: private static Class IMPL;
0176: static {
0177: try {
0178: String name = System.getProperty(
0179: "org.apache.lucene.SegmentReader.class",
0180: SegmentReader.class.getName());
0181: IMPL = Class.forName(name);
0182: } catch (ClassNotFoundException e) {
0183: throw new RuntimeException(
0184: "cannot load SegmentReader class: " + e, e);
0185: } catch (SecurityException se) {
0186: try {
0187: IMPL = Class.forName(SegmentReader.class.getName());
0188: } catch (ClassNotFoundException e) {
0189: throw new RuntimeException(
0190: "cannot load default SegmentReader class: " + e,
0191: e);
0192: }
0193: }
0194: }
0195:
0196: /**
0197: * @throws CorruptIndexException if the index is corrupt
0198: * @throws IOException if there is a low-level IO error
0199: */
0200: public static SegmentReader get(SegmentInfo si)
0201: throws CorruptIndexException, IOException {
0202: return get(si.dir, si, null, false, false,
0203: BufferedIndexInput.BUFFER_SIZE, true);
0204: }
0205:
0206: /**
0207: * @throws CorruptIndexException if the index is corrupt
0208: * @throws IOException if there is a low-level IO error
0209: */
0210: static SegmentReader get(SegmentInfo si, boolean doOpenStores)
0211: throws CorruptIndexException, IOException {
0212: return get(si.dir, si, null, false, false,
0213: BufferedIndexInput.BUFFER_SIZE, doOpenStores);
0214: }
0215:
0216: /**
0217: * @throws CorruptIndexException if the index is corrupt
0218: * @throws IOException if there is a low-level IO error
0219: */
0220: public static SegmentReader get(SegmentInfo si, int readBufferSize)
0221: throws CorruptIndexException, IOException {
0222: return get(si.dir, si, null, false, false, readBufferSize, true);
0223: }
0224:
0225: /**
0226: * @throws CorruptIndexException if the index is corrupt
0227: * @throws IOException if there is a low-level IO error
0228: */
0229: static SegmentReader get(SegmentInfo si, int readBufferSize,
0230: boolean doOpenStores) throws CorruptIndexException,
0231: IOException {
0232: return get(si.dir, si, null, false, false, readBufferSize,
0233: doOpenStores);
0234: }
0235:
0236: /**
0237: * @throws CorruptIndexException if the index is corrupt
0238: * @throws IOException if there is a low-level IO error
0239: */
0240: public static SegmentReader get(SegmentInfos sis, SegmentInfo si,
0241: boolean closeDir) throws CorruptIndexException, IOException {
0242: return get(si.dir, si, sis, closeDir, true,
0243: BufferedIndexInput.BUFFER_SIZE, true);
0244: }
0245:
0246: /**
0247: * @throws CorruptIndexException if the index is corrupt
0248: * @throws IOException if there is a low-level IO error
0249: */
0250: public static SegmentReader get(Directory dir, SegmentInfo si,
0251: SegmentInfos sis, boolean closeDir, boolean ownDir,
0252: int readBufferSize) throws CorruptIndexException,
0253: IOException {
0254: return get(dir, si, sis, closeDir, ownDir, readBufferSize, true);
0255: }
0256:
0257: /**
0258: * @throws CorruptIndexException if the index is corrupt
0259: * @throws IOException if there is a low-level IO error
0260: */
0261: public static SegmentReader get(Directory dir, SegmentInfo si,
0262: SegmentInfos sis, boolean closeDir, boolean ownDir,
0263: int readBufferSize, boolean doOpenStores)
0264: throws CorruptIndexException, IOException {
0265: SegmentReader instance;
0266: try {
0267: instance = (SegmentReader) IMPL.newInstance();
0268: } catch (Exception e) {
0269: throw new RuntimeException(
0270: "cannot load SegmentReader class: " + e, e);
0271: }
0272: instance.init(dir, sis, closeDir);
0273: instance.initialize(si, readBufferSize, doOpenStores);
0274: return instance;
0275: }
0276:
0277: private void initialize(SegmentInfo si, int readBufferSize,
0278: boolean doOpenStores) throws CorruptIndexException,
0279: IOException {
0280: segment = si.name;
0281: this .si = si;
0282: this .readBufferSize = readBufferSize;
0283:
0284: boolean success = false;
0285:
0286: try {
0287: // Use compound file directory for some files, if it exists
0288: Directory cfsDir = directory();
0289: if (si.getUseCompoundFile()) {
0290: cfsReader = new CompoundFileReader(directory(), segment
0291: + "." + IndexFileNames.COMPOUND_FILE_EXTENSION,
0292: readBufferSize);
0293: cfsDir = cfsReader;
0294: }
0295:
0296: final Directory storeDir;
0297:
0298: if (doOpenStores) {
0299: if (si.getDocStoreOffset() != -1) {
0300: if (si.getDocStoreIsCompoundFile()) {
0301: storeCFSReader = new CompoundFileReader(
0302: directory(),
0303: si.getDocStoreSegment()
0304: + "."
0305: + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION,
0306: readBufferSize);
0307: storeDir = storeCFSReader;
0308: } else {
0309: storeDir = directory();
0310: }
0311: } else {
0312: storeDir = cfsDir;
0313: }
0314: } else
0315: storeDir = null;
0316:
0317: // No compound file exists - use the multi-file format
0318: fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
0319:
0320: final String fieldsSegment;
0321:
0322: if (si.getDocStoreOffset() != -1)
0323: fieldsSegment = si.getDocStoreSegment();
0324: else
0325: fieldsSegment = segment;
0326:
0327: if (doOpenStores) {
0328: fieldsReader = new FieldsReader(storeDir,
0329: fieldsSegment, fieldInfos, readBufferSize, si
0330: .getDocStoreOffset(), si.docCount);
0331:
0332: // Verify two sources of "maxDoc" agree:
0333: if (si.getDocStoreOffset() == -1
0334: && fieldsReader.size() != si.docCount) {
0335: throw new CorruptIndexException(
0336: "doc counts differ for segment " + si.name
0337: + ": fieldsReader shows "
0338: + fieldsReader.size()
0339: + " but segmentInfo shows "
0340: + si.docCount);
0341: }
0342: }
0343:
0344: tis = new TermInfosReader(cfsDir, segment, fieldInfos,
0345: readBufferSize);
0346:
0347: loadDeletedDocs();
0348:
0349: // make sure that all index files have been read or are kept open
0350: // so that if an index update removes them we'll still have them
0351: freqStream = cfsDir.openInput(segment + ".frq",
0352: readBufferSize);
0353: proxStream = cfsDir.openInput(segment + ".prx",
0354: readBufferSize);
0355: openNorms(cfsDir, readBufferSize);
0356:
0357: if (doOpenStores && fieldInfos.hasVectors()) { // open term vector files only as needed
0358: final String vectorsSegment;
0359: if (si.getDocStoreOffset() != -1)
0360: vectorsSegment = si.getDocStoreSegment();
0361: else
0362: vectorsSegment = segment;
0363: termVectorsReaderOrig = new TermVectorsReader(storeDir,
0364: vectorsSegment, fieldInfos, readBufferSize, si
0365: .getDocStoreOffset(), si.docCount);
0366: }
0367: success = true;
0368: } finally {
0369:
0370: // With lock-less commits, it's entirely possible (and
0371: // fine) to hit a FileNotFound exception above. In
0372: // this case, we want to explicitly close any subset
0373: // of things that were opened so that we don't have to
0374: // wait for a GC to do so.
0375: if (!success) {
0376: doClose();
0377: }
0378: }
0379: }
0380:
0381: private void loadDeletedDocs() throws IOException {
0382: // NOTE: the bitvector is stored using the regular directory, not cfs
0383: if (hasDeletions(si)) {
0384: deletedDocs = new BitVector(directory(), si
0385: .getDelFileName());
0386:
0387: // Verify # deletes does not exceed maxDoc for this segment:
0388: if (deletedDocs.count() > maxDoc()) {
0389: throw new CorruptIndexException("number of deletes ("
0390: + deletedDocs.count() + ") exceeds max doc ("
0391: + maxDoc() + ") for segment " + si.name);
0392: }
0393: }
0394: }
0395:
0396: protected synchronized DirectoryIndexReader doReopen(
0397: SegmentInfos infos) throws CorruptIndexException,
0398: IOException {
0399: DirectoryIndexReader newReader;
0400:
0401: if (infos.size() == 1) {
0402: SegmentInfo si = infos.info(0);
0403: if (segment.equals(si.name)
0404: && si.getUseCompoundFile() == SegmentReader.this .si
0405: .getUseCompoundFile()) {
0406: newReader = reopenSegment(si);
0407: } else {
0408: // segment not referenced anymore, reopen not possible
0409: // or segment format changed
0410: newReader = SegmentReader.get(infos, infos.info(0),
0411: false);
0412: }
0413: } else {
0414: return new MultiSegmentReader(directory, infos,
0415: closeDirectory, new SegmentReader[] { this }, null,
0416: null);
0417: }
0418:
0419: return newReader;
0420: }
0421:
0422: synchronized SegmentReader reopenSegment(SegmentInfo si)
0423: throws CorruptIndexException, IOException {
0424: boolean deletionsUpToDate = (this .si.hasDeletions() == si
0425: .hasDeletions())
0426: && (!si.hasDeletions() || this .si.getDelFileName()
0427: .equals(si.getDelFileName()));
0428: boolean normsUpToDate = true;
0429:
0430: boolean[] fieldNormsChanged = new boolean[fieldInfos.size()];
0431: if (normsUpToDate) {
0432: for (int i = 0; i < fieldInfos.size(); i++) {
0433: if (!this .si.getNormFileName(i).equals(
0434: si.getNormFileName(i))) {
0435: normsUpToDate = false;
0436: fieldNormsChanged[i] = true;
0437: }
0438: }
0439: }
0440:
0441: if (normsUpToDate && deletionsUpToDate) {
0442: return this ;
0443: }
0444:
0445: // clone reader
0446: SegmentReader clone = new SegmentReader();
0447: boolean success = false;
0448: try {
0449: clone.directory = directory;
0450: clone.si = si;
0451: clone.segment = segment;
0452: clone.readBufferSize = readBufferSize;
0453: clone.cfsReader = cfsReader;
0454: clone.storeCFSReader = storeCFSReader;
0455:
0456: clone.fieldInfos = fieldInfos;
0457: clone.tis = tis;
0458: clone.freqStream = freqStream;
0459: clone.proxStream = proxStream;
0460: clone.termVectorsReaderOrig = termVectorsReaderOrig;
0461:
0462: // we have to open a new FieldsReader, because it is not thread-safe
0463: // and can thus not be shared among multiple SegmentReaders
0464: // TODO: Change this in case FieldsReader becomes thread-safe in the future
0465: final String fieldsSegment;
0466:
0467: Directory storeDir = directory();
0468:
0469: if (si.getDocStoreOffset() != -1) {
0470: fieldsSegment = si.getDocStoreSegment();
0471: if (storeCFSReader != null) {
0472: storeDir = storeCFSReader;
0473: }
0474: } else {
0475: fieldsSegment = segment;
0476: if (cfsReader != null) {
0477: storeDir = cfsReader;
0478: }
0479: }
0480:
0481: if (fieldsReader != null) {
0482: clone.fieldsReader = new FieldsReader(storeDir,
0483: fieldsSegment, fieldInfos, readBufferSize, si
0484: .getDocStoreOffset(), si.docCount);
0485: }
0486:
0487: if (!deletionsUpToDate) {
0488: // load deleted docs
0489: clone.deletedDocs = null;
0490: clone.loadDeletedDocs();
0491: } else {
0492: clone.deletedDocs = this .deletedDocs;
0493: }
0494:
0495: clone.norms = new HashMap();
0496: if (!normsUpToDate) {
0497: // load norms
0498: for (int i = 0; i < fieldNormsChanged.length; i++) {
0499: // copy unchanged norms to the cloned reader and incRef those norms
0500: if (!fieldNormsChanged[i]) {
0501: String curField = fieldInfos.fieldInfo(i).name;
0502: Norm norm = (Norm) this .norms.get(curField);
0503: norm.incRef();
0504: clone.norms.put(curField, norm);
0505: }
0506: }
0507:
0508: clone.openNorms(si.getUseCompoundFile() ? cfsReader
0509: : directory(), readBufferSize);
0510: } else {
0511: Iterator it = norms.keySet().iterator();
0512: while (it.hasNext()) {
0513: String field = (String) it.next();
0514: Norm norm = (Norm) norms.get(field);
0515: norm.incRef();
0516: clone.norms.put(field, norm);
0517: }
0518: }
0519:
0520: if (clone.singleNormStream == null) {
0521: for (int i = 0; i < fieldInfos.size(); i++) {
0522: FieldInfo fi = fieldInfos.fieldInfo(i);
0523: if (fi.isIndexed && !fi.omitNorms) {
0524: Directory d = si.getUseCompoundFile() ? cfsReader
0525: : directory();
0526: String fileName = si.getNormFileName(fi.number);
0527: if (si.hasSeparateNorms(fi.number)) {
0528: continue;
0529: }
0530:
0531: if (fileName.endsWith("."
0532: + IndexFileNames.NORMS_EXTENSION)) {
0533: clone.singleNormStream = d.openInput(
0534: fileName, readBufferSize);
0535: break;
0536: }
0537: }
0538: }
0539: }
0540:
0541: success = true;
0542: } finally {
0543: if (this .referencedSegmentReader != null) {
0544: // this reader shares resources with another SegmentReader,
0545: // so we increment the other readers refCount. We don't
0546: // increment the refCount of the norms because we did
0547: // that already for the shared norms
0548: clone.referencedSegmentReader = this .referencedSegmentReader;
0549: referencedSegmentReader.incRefReaderNotNorms();
0550: } else {
0551: // this reader wasn't reopened, so we increment this
0552: // readers refCount
0553: clone.referencedSegmentReader = this ;
0554: incRefReaderNotNorms();
0555: }
0556:
0557: if (!success) {
0558: // An exception occured during reopen, we have to decRef the norms
0559: // that we incRef'ed already and close singleNormsStream and FieldsReader
0560: clone.decRef();
0561: }
0562: }
0563:
0564: return clone;
0565: }
0566:
0567: protected void commitChanges() throws IOException {
0568: if (deletedDocsDirty) { // re-write deleted
0569: si.advanceDelGen();
0570:
0571: // We can write directly to the actual name (vs to a
0572: // .tmp & renaming it) because the file is not live
0573: // until segments file is written:
0574: deletedDocs.write(directory(), si.getDelFileName());
0575: }
0576: if (undeleteAll && si.hasDeletions()) {
0577: si.clearDelGen();
0578: }
0579: if (normsDirty) { // re-write norms
0580: si.setNumFields(fieldInfos.size());
0581: Iterator it = norms.values().iterator();
0582: while (it.hasNext()) {
0583: Norm norm = (Norm) it.next();
0584: if (norm.dirty) {
0585: norm.reWrite(si);
0586: }
0587: }
0588: }
0589: deletedDocsDirty = false;
0590: normsDirty = false;
0591: undeleteAll = false;
0592: }
0593:
0594: FieldsReader getFieldsReader() {
0595: return fieldsReader;
0596: }
0597:
0598: protected void doClose() throws IOException {
0599: boolean hasReferencedReader = (referencedSegmentReader != null);
0600:
0601: if (hasReferencedReader) {
0602: referencedSegmentReader.decRefReaderNotNorms();
0603: referencedSegmentReader = null;
0604: }
0605:
0606: deletedDocs = null;
0607:
0608: // close the single norms stream
0609: if (singleNormStream != null) {
0610: // we can close this stream, even if the norms
0611: // are shared, because every reader has it's own
0612: // singleNormStream
0613: singleNormStream.close();
0614: singleNormStream = null;
0615: }
0616:
0617: // re-opened SegmentReaders have their own instance of FieldsReader
0618: if (fieldsReader != null) {
0619: fieldsReader.close();
0620: }
0621:
0622: if (!hasReferencedReader) {
0623: // close everything, nothing is shared anymore with other readers
0624: if (tis != null) {
0625: tis.close();
0626: }
0627:
0628: if (freqStream != null)
0629: freqStream.close();
0630: if (proxStream != null)
0631: proxStream.close();
0632:
0633: if (termVectorsReaderOrig != null)
0634: termVectorsReaderOrig.close();
0635:
0636: if (cfsReader != null)
0637: cfsReader.close();
0638:
0639: if (storeCFSReader != null)
0640: storeCFSReader.close();
0641:
0642: // maybe close directory
0643: super .doClose();
0644: }
0645: }
0646:
0647: static boolean hasDeletions(SegmentInfo si) throws IOException {
0648: // Don't call ensureOpen() here (it could affect performance)
0649: return si.hasDeletions();
0650: }
0651:
0652: public boolean hasDeletions() {
0653: // Don't call ensureOpen() here (it could affect performance)
0654: return deletedDocs != null;
0655: }
0656:
0657: static boolean usesCompoundFile(SegmentInfo si) throws IOException {
0658: return si.getUseCompoundFile();
0659: }
0660:
0661: static boolean hasSeparateNorms(SegmentInfo si) throws IOException {
0662: return si.hasSeparateNorms();
0663: }
0664:
0665: protected void doDelete(int docNum) {
0666: if (deletedDocs == null)
0667: deletedDocs = new BitVector(maxDoc());
0668: deletedDocsDirty = true;
0669: undeleteAll = false;
0670: deletedDocs.set(docNum);
0671: }
0672:
0673: protected void doUndeleteAll() {
0674: deletedDocs = null;
0675: deletedDocsDirty = false;
0676: undeleteAll = true;
0677: }
0678:
0679: Vector files() throws IOException {
0680: return new Vector(si.files());
0681: }
0682:
0683: public TermEnum terms() {
0684: ensureOpen();
0685: return tis.terms();
0686: }
0687:
0688: public TermEnum terms(Term t) throws IOException {
0689: ensureOpen();
0690: return tis.terms(t);
0691: }
0692:
0693: FieldInfos getFieldInfos() {
0694: return fieldInfos;
0695: }
0696:
0697: /**
0698: * @throws CorruptIndexException if the index is corrupt
0699: * @throws IOException if there is a low-level IO error
0700: */
0701: public synchronized Document document(int n,
0702: FieldSelector fieldSelector) throws CorruptIndexException,
0703: IOException {
0704: ensureOpen();
0705: if (isDeleted(n))
0706: throw new IllegalArgumentException(
0707: "attempt to access a deleted document");
0708: return fieldsReader.doc(n, fieldSelector);
0709: }
0710:
0711: public synchronized boolean isDeleted(int n) {
0712: return (deletedDocs != null && deletedDocs.get(n));
0713: }
0714:
0715: public TermDocs termDocs() throws IOException {
0716: ensureOpen();
0717: return new SegmentTermDocs(this );
0718: }
0719:
0720: public TermPositions termPositions() throws IOException {
0721: ensureOpen();
0722: return new SegmentTermPositions(this );
0723: }
0724:
0725: public int docFreq(Term t) throws IOException {
0726: ensureOpen();
0727: TermInfo ti = tis.get(t);
0728: if (ti != null)
0729: return ti.docFreq;
0730: else
0731: return 0;
0732: }
0733:
0734: public int numDocs() {
0735: // Don't call ensureOpen() here (it could affect performance)
0736: int n = maxDoc();
0737: if (deletedDocs != null)
0738: n -= deletedDocs.count();
0739: return n;
0740: }
0741:
0742: public int maxDoc() {
0743: // Don't call ensureOpen() here (it could affect performance)
0744: return si.docCount;
0745: }
0746:
0747: public void setTermInfosIndexDivisor(int indexDivisor)
0748: throws IllegalStateException {
0749: tis.setIndexDivisor(indexDivisor);
0750: }
0751:
0752: public int getTermInfosIndexDivisor() {
0753: return tis.getIndexDivisor();
0754: }
0755:
0756: /**
0757: * @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption)
0758: */
0759: public Collection getFieldNames(IndexReader.FieldOption fieldOption) {
0760: ensureOpen();
0761:
0762: Set fieldSet = new HashSet();
0763: for (int i = 0; i < fieldInfos.size(); i++) {
0764: FieldInfo fi = fieldInfos.fieldInfo(i);
0765: if (fieldOption == IndexReader.FieldOption.ALL) {
0766: fieldSet.add(fi.name);
0767: } else if (!fi.isIndexed
0768: && fieldOption == IndexReader.FieldOption.UNINDEXED) {
0769: fieldSet.add(fi.name);
0770: } else if (fi.storePayloads
0771: && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
0772: fieldSet.add(fi.name);
0773: } else if (fi.isIndexed
0774: && fieldOption == IndexReader.FieldOption.INDEXED) {
0775: fieldSet.add(fi.name);
0776: } else if (fi.isIndexed
0777: && fi.storeTermVector == false
0778: && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
0779: fieldSet.add(fi.name);
0780: } else if (fi.storeTermVector == true
0781: && fi.storePositionWithTermVector == false
0782: && fi.storeOffsetWithTermVector == false
0783: && fieldOption == IndexReader.FieldOption.TERMVECTOR) {
0784: fieldSet.add(fi.name);
0785: } else if (fi.isIndexed
0786: && fi.storeTermVector
0787: && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
0788: fieldSet.add(fi.name);
0789: } else if (fi.storePositionWithTermVector
0790: && fi.storeOffsetWithTermVector == false
0791: && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
0792: fieldSet.add(fi.name);
0793: } else if (fi.storeOffsetWithTermVector
0794: && fi.storePositionWithTermVector == false
0795: && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
0796: fieldSet.add(fi.name);
0797: } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector)
0798: && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
0799: fieldSet.add(fi.name);
0800: }
0801: }
0802: return fieldSet;
0803: }
0804:
0805: public synchronized boolean hasNorms(String field) {
0806: ensureOpen();
0807: return norms.containsKey(field);
0808: }
0809:
0810: static byte[] createFakeNorms(int size) {
0811: byte[] ones = new byte[size];
0812: Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f));
0813: return ones;
0814: }
0815:
0816: private byte[] ones;
0817:
0818: private byte[] fakeNorms() {
0819: if (ones == null)
0820: ones = createFakeNorms(maxDoc());
0821: return ones;
0822: }
0823:
0824: // can return null if norms aren't stored
0825: protected synchronized byte[] getNorms(String field)
0826: throws IOException {
0827: Norm norm = (Norm) norms.get(field);
0828: if (norm == null)
0829: return null; // not indexed, or norms not stored
0830: synchronized (norm) {
0831: if (norm.bytes == null) { // value not yet read
0832: byte[] bytes = new byte[maxDoc()];
0833: norms(field, bytes, 0);
0834: norm.bytes = bytes; // cache it
0835: // it's OK to close the underlying IndexInput as we have cached the
0836: // norms and will never read them again.
0837: norm.close();
0838: }
0839: return norm.bytes;
0840: }
0841: }
0842:
0843: // returns fake norms if norms aren't available
0844: public synchronized byte[] norms(String field) throws IOException {
0845: ensureOpen();
0846: byte[] bytes = getNorms(field);
0847: if (bytes == null)
0848: bytes = fakeNorms();
0849: return bytes;
0850: }
0851:
0852: protected void doSetNorm(int doc, String field, byte value)
0853: throws IOException {
0854: Norm norm = (Norm) norms.get(field);
0855: if (norm == null) // not an indexed field
0856: return;
0857:
0858: norm.dirty = true; // mark it dirty
0859: normsDirty = true;
0860:
0861: norms(field)[doc] = value; // set the value
0862: }
0863:
0864: /** Read norms into a pre-allocated array. */
0865: public synchronized void norms(String field, byte[] bytes,
0866: int offset) throws IOException {
0867:
0868: ensureOpen();
0869: Norm norm = (Norm) norms.get(field);
0870: if (norm == null) {
0871: System.arraycopy(fakeNorms(), 0, bytes, offset, maxDoc());
0872: return;
0873: }
0874:
0875: synchronized (norm) {
0876: if (norm.bytes != null) { // can copy from cache
0877: System
0878: .arraycopy(norm.bytes, 0, bytes, offset,
0879: maxDoc());
0880: return;
0881: }
0882:
0883: // Read from disk. norm.in may be shared across multiple norms and
0884: // should only be used in a synchronized context.
0885: IndexInput normStream;
0886: if (norm.useSingleNormStream) {
0887: normStream = singleNormStream;
0888: } else {
0889: normStream = norm.in;
0890: }
0891: normStream.seek(norm.normSeek);
0892: normStream.readBytes(bytes, offset, maxDoc());
0893: }
0894: }
0895:
0896: private void openNorms(Directory cfsDir, int readBufferSize)
0897: throws IOException {
0898: long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
0899: int maxDoc = maxDoc();
0900: for (int i = 0; i < fieldInfos.size(); i++) {
0901: FieldInfo fi = fieldInfos.fieldInfo(i);
0902: if (norms.containsKey(fi.name)) {
0903: // in case this SegmentReader is being re-opened, we might be able to
0904: // reuse some norm instances and skip loading them here
0905: continue;
0906: }
0907: if (fi.isIndexed && !fi.omitNorms) {
0908: Directory d = directory();
0909: String fileName = si.getNormFileName(fi.number);
0910: if (!si.hasSeparateNorms(fi.number)) {
0911: d = cfsDir;
0912: }
0913:
0914: // singleNormFile means multiple norms share this file
0915: boolean singleNormFile = fileName.endsWith("."
0916: + IndexFileNames.NORMS_EXTENSION);
0917: IndexInput normInput = null;
0918: long normSeek;
0919:
0920: if (singleNormFile) {
0921: normSeek = nextNormSeek;
0922: if (singleNormStream == null) {
0923: singleNormStream = d.openInput(fileName,
0924: readBufferSize);
0925: }
0926: // All norms in the .nrm file can share a single IndexInput since
0927: // they are only used in a synchronized context.
0928: // If this were to change in the future, a clone could be done here.
0929: normInput = singleNormStream;
0930: } else {
0931: normSeek = 0;
0932: normInput = d.openInput(fileName);
0933: }
0934:
0935: norms.put(fi.name, new Norm(normInput, singleNormFile,
0936: fi.number, normSeek));
0937: nextNormSeek += maxDoc; // increment also if some norms are separate
0938: }
0939: }
0940: }
0941:
0942: // for testing only
0943: boolean normsClosed() {
0944: if (singleNormStream != null) {
0945: return false;
0946: }
0947: Iterator it = norms.values().iterator();
0948: while (it.hasNext()) {
0949: Norm norm = (Norm) it.next();
0950: if (norm.refCount > 0) {
0951: return false;
0952: }
0953: }
0954: return true;
0955: }
0956:
0957: // for testing only
0958: boolean normsClosed(String field) {
0959: Norm norm = (Norm) norms.get(field);
0960: return norm.refCount == 0;
0961: }
0962:
0963: /**
0964: * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
0965: * @return TermVectorsReader
0966: */
0967: private TermVectorsReader getTermVectorsReader() {
0968: TermVectorsReader tvReader = (TermVectorsReader) termVectorsLocal
0969: .get();
0970: if (tvReader == null) {
0971: tvReader = (TermVectorsReader) termVectorsReaderOrig
0972: .clone();
0973: termVectorsLocal.set(tvReader);
0974: }
0975: return tvReader;
0976: }
0977:
0978: /** Return a term frequency vector for the specified document and field. The
0979: * vector returned contains term numbers and frequencies for all terms in
0980: * the specified field of this document, if the field had storeTermVector
0981: * flag set. If the flag was not set, the method returns null.
0982: * @throws IOException
0983: */
0984: public TermFreqVector getTermFreqVector(int docNumber, String field)
0985: throws IOException {
0986: // Check if this field is invalid or has no stored term vector
0987: ensureOpen();
0988: FieldInfo fi = fieldInfos.fieldInfo(field);
0989: if (fi == null || !fi.storeTermVector
0990: || termVectorsReaderOrig == null)
0991: return null;
0992:
0993: TermVectorsReader termVectorsReader = getTermVectorsReader();
0994: if (termVectorsReader == null)
0995: return null;
0996:
0997: return termVectorsReader.get(docNumber, field);
0998: }
0999:
1000: public void getTermFreqVector(int docNumber, String field,
1001: TermVectorMapper mapper) throws IOException {
1002: ensureOpen();
1003: FieldInfo fi = fieldInfos.fieldInfo(field);
1004: if (fi == null || !fi.storeTermVector
1005: || termVectorsReaderOrig == null)
1006: return;
1007:
1008: TermVectorsReader termVectorsReader = getTermVectorsReader();
1009: if (termVectorsReader == null) {
1010: return;
1011: }
1012:
1013: termVectorsReader.get(docNumber, field, mapper);
1014: }
1015:
1016: public void getTermFreqVector(int docNumber, TermVectorMapper mapper)
1017: throws IOException {
1018: ensureOpen();
1019: if (termVectorsReaderOrig == null)
1020: return;
1021:
1022: TermVectorsReader termVectorsReader = getTermVectorsReader();
1023: if (termVectorsReader == null)
1024: return;
1025:
1026: termVectorsReader.get(docNumber, mapper);
1027: }
1028:
1029: /** Return an array of term frequency vectors for the specified document.
1030: * The array contains a vector for each vectorized field in the document.
1031: * Each vector vector contains term numbers and frequencies for all terms
1032: * in a given vectorized field.
1033: * If no such fields existed, the method returns null.
1034: * @throws IOException
1035: */
1036: public TermFreqVector[] getTermFreqVectors(int docNumber)
1037: throws IOException {
1038: ensureOpen();
1039: if (termVectorsReaderOrig == null)
1040: return null;
1041:
1042: TermVectorsReader termVectorsReader = getTermVectorsReader();
1043: if (termVectorsReader == null)
1044: return null;
1045:
1046: return termVectorsReader.get(docNumber);
1047: }
1048:
1049: /** Returns the field infos of this segment */
1050: FieldInfos fieldInfos() {
1051: return fieldInfos;
1052: }
1053:
1054: /**
1055: * Return the name of the segment this reader is reading.
1056: */
1057: String getSegmentName() {
1058: return segment;
1059: }
1060:
1061: /**
1062: * Return the SegmentInfo of the segment this reader is reading.
1063: */
1064: SegmentInfo getSegmentInfo() {
1065: return si;
1066: }
1067:
1068: void setSegmentInfo(SegmentInfo info) {
1069: si = info;
1070: }
1071:
1072: void startCommit() {
1073: super .startCommit();
1074: rollbackDeletedDocsDirty = deletedDocsDirty;
1075: rollbackNormsDirty = normsDirty;
1076: rollbackUndeleteAll = undeleteAll;
1077: Iterator it = norms.values().iterator();
1078: while (it.hasNext()) {
1079: Norm norm = (Norm) it.next();
1080: norm.rollbackDirty = norm.dirty;
1081: }
1082: }
1083:
1084: void rollbackCommit() {
1085: super .rollbackCommit();
1086: deletedDocsDirty = rollbackDeletedDocsDirty;
1087: normsDirty = rollbackNormsDirty;
1088: undeleteAll = rollbackUndeleteAll;
1089: Iterator it = norms.values().iterator();
1090: while (it.hasNext()) {
1091: Norm norm = (Norm) it.next();
1092: norm.dirty = norm.rollbackDirty;
1093: }
1094: }
1095: }
|