001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021: import org.apache.lucene.util.BitVector;
022: import org.apache.lucene.store.IndexInput;
023:
024: class SegmentTermDocs implements TermDocs {
025: protected SegmentReader parent;
026: protected IndexInput freqStream;
027: protected int count;
028: protected int df;
029: protected BitVector deletedDocs;
030: int doc = 0;
031: int freq;
032:
033: private int skipInterval;
034: private int maxSkipLevels;
035: private DefaultSkipListReader skipListReader;
036:
037: private long freqBasePointer;
038: private long proxBasePointer;
039:
040: private long skipPointer;
041: private boolean haveSkipped;
042:
043: protected boolean currentFieldStoresPayloads;
044:
045: protected SegmentTermDocs(SegmentReader parent) {
046: this .parent = parent;
047: this .freqStream = (IndexInput) parent.freqStream.clone();
048: this .deletedDocs = parent.deletedDocs;
049: this .skipInterval = parent.tis.getSkipInterval();
050: this .maxSkipLevels = parent.tis.getMaxSkipLevels();
051: }
052:
053: public void seek(Term term) throws IOException {
054: TermInfo ti = parent.tis.get(term);
055: seek(ti, term);
056: }
057:
058: public void seek(TermEnum termEnum) throws IOException {
059: TermInfo ti;
060: Term term;
061:
062: // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs
063: if (termEnum instanceof SegmentTermEnum
064: && ((SegmentTermEnum) termEnum).fieldInfos == parent.fieldInfos) { // optimized case
065: SegmentTermEnum segmentTermEnum = ((SegmentTermEnum) termEnum);
066: term = segmentTermEnum.term();
067: ti = segmentTermEnum.termInfo();
068: } else { // punt case
069: term = termEnum.term();
070: ti = parent.tis.get(term);
071: }
072:
073: seek(ti, term);
074: }
075:
076: void seek(TermInfo ti, Term term) throws IOException {
077: count = 0;
078: FieldInfo fi = parent.fieldInfos.fieldInfo(term.field);
079: currentFieldStoresPayloads = (fi != null) ? fi.storePayloads
080: : false;
081: if (ti == null) {
082: df = 0;
083: } else {
084: df = ti.docFreq;
085: doc = 0;
086: freqBasePointer = ti.freqPointer;
087: proxBasePointer = ti.proxPointer;
088: skipPointer = freqBasePointer + ti.skipOffset;
089: freqStream.seek(freqBasePointer);
090: haveSkipped = false;
091: }
092: }
093:
094: public void close() throws IOException {
095: freqStream.close();
096: if (skipListReader != null)
097: skipListReader.close();
098: }
099:
100: public final int doc() {
101: return doc;
102: }
103:
104: public final int freq() {
105: return freq;
106: }
107:
108: protected void skippingDoc() throws IOException {
109: }
110:
111: public boolean next() throws IOException {
112: while (true) {
113: if (count == df)
114: return false;
115:
116: int docCode = freqStream.readVInt();
117: doc += docCode >>> 1; // shift off low bit
118: if ((docCode & 1) != 0) // if low bit is set
119: freq = 1; // freq is one
120: else
121: freq = freqStream.readVInt(); // else read freq
122:
123: count++;
124:
125: if (deletedDocs == null || !deletedDocs.get(doc))
126: break;
127: skippingDoc();
128: }
129: return true;
130: }
131:
132: /** Optimized implementation. */
133: public int read(final int[] docs, final int[] freqs)
134: throws IOException {
135: final int length = docs.length;
136: int i = 0;
137: while (i < length && count < df) {
138:
139: // manually inlined call to next() for speed
140: final int docCode = freqStream.readVInt();
141: doc += docCode >>> 1; // shift off low bit
142: if ((docCode & 1) != 0) // if low bit is set
143: freq = 1; // freq is one
144: else
145: freq = freqStream.readVInt(); // else read freq
146: count++;
147:
148: if (deletedDocs == null || !deletedDocs.get(doc)) {
149: docs[i] = doc;
150: freqs[i] = freq;
151: ++i;
152: }
153: }
154: return i;
155: }
156:
157: /** Overridden by SegmentTermPositions to skip in prox stream. */
158: protected void skipProx(long proxPointer, int payloadLength)
159: throws IOException {
160: }
161:
162: /** Optimized implementation. */
163: public boolean skipTo(int target) throws IOException {
164: if (df >= skipInterval) { // optimized case
165: if (skipListReader == null)
166: skipListReader = new DefaultSkipListReader(
167: (IndexInput) freqStream.clone(), maxSkipLevels,
168: skipInterval); // lazily clone
169:
170: if (!haveSkipped) { // lazily initialize skip stream
171: skipListReader
172: .init(skipPointer, freqBasePointer,
173: proxBasePointer, df,
174: currentFieldStoresPayloads);
175: haveSkipped = true;
176: }
177:
178: int newCount = skipListReader.skipTo(target);
179: if (newCount > count) {
180: freqStream.seek(skipListReader.getFreqPointer());
181: skipProx(skipListReader.getProxPointer(),
182: skipListReader.getPayloadLength());
183:
184: doc = skipListReader.getDoc();
185: count = newCount;
186: }
187: }
188:
189: // done skipping, now just scan
190: do {
191: if (!next())
192: return false;
193: } while (target > doc);
194: return true;
195: }
196: }
|