001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021: import org.apache.lucene.store.IndexInput;
022:
023: final class SegmentTermEnum extends TermEnum implements Cloneable {
024: private IndexInput input;
025: FieldInfos fieldInfos;
026: long size;
027: long position = -1;
028:
029: private TermBuffer termBuffer = new TermBuffer();
030: private TermBuffer prevBuffer = new TermBuffer();
031: private TermBuffer scratch; // used for scanning
032:
033: private TermInfo termInfo = new TermInfo();
034:
035: private int format;
036: private boolean isIndex = false;
037: long indexPointer = 0;
038: int indexInterval;
039: int skipInterval;
040: int maxSkipLevels;
041: private int formatM1SkipInterval;
042:
043: SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi)
044: throws CorruptIndexException, IOException {
045: input = i;
046: fieldInfos = fis;
047: isIndex = isi;
048: maxSkipLevels = 1; // use single-level skip lists for formats > -3
049:
050: int firstInt = input.readInt();
051: if (firstInt >= 0) {
052: // original-format file, without explicit format version number
053: format = 0;
054: size = firstInt;
055:
056: // back-compatible settings
057: indexInterval = 128;
058: skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization
059: } else {
060: // we have a format version number
061: format = firstInt;
062:
063: // check that it is a format we can understand
064: if (format < TermInfosWriter.FORMAT)
065: throw new CorruptIndexException(
066: "Unknown format version:" + format);
067:
068: size = input.readLong(); // read the size
069:
070: if (format == -1) {
071: if (!isIndex) {
072: indexInterval = input.readInt();
073: formatM1SkipInterval = input.readInt();
074: }
075: // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
076: // skipTo implementation of these versions
077: skipInterval = Integer.MAX_VALUE;
078: } else {
079: indexInterval = input.readInt();
080: skipInterval = input.readInt();
081: if (format == -3) {
082: // this new format introduces multi-level skipping
083: maxSkipLevels = input.readInt();
084: }
085: }
086: }
087:
088: }
089:
090: protected Object clone() {
091: SegmentTermEnum clone = null;
092: try {
093: clone = (SegmentTermEnum) super .clone();
094: } catch (CloneNotSupportedException e) {
095: }
096:
097: clone.input = (IndexInput) input.clone();
098: clone.termInfo = new TermInfo(termInfo);
099:
100: clone.termBuffer = (TermBuffer) termBuffer.clone();
101: clone.prevBuffer = (TermBuffer) prevBuffer.clone();
102: clone.scratch = null;
103:
104: return clone;
105: }
106:
107: final void seek(long pointer, int p, Term t, TermInfo ti)
108: throws IOException {
109: input.seek(pointer);
110: position = p;
111: termBuffer.set(t);
112: prevBuffer.reset();
113: termInfo.set(ti);
114: }
115:
116: /** Increments the enumeration to the next element. True if one exists.*/
117: public final boolean next() throws IOException {
118: if (position++ >= size - 1) {
119: prevBuffer.set(termBuffer);
120: termBuffer.reset();
121: return false;
122: }
123:
124: prevBuffer.set(termBuffer);
125: termBuffer.read(input, fieldInfos);
126:
127: termInfo.docFreq = input.readVInt(); // read doc freq
128: termInfo.freqPointer += input.readVLong(); // read freq pointer
129: termInfo.proxPointer += input.readVLong(); // read prox pointer
130:
131: if (format == -1) {
132: // just read skipOffset in order to increment file pointer;
133: // value is never used since skipTo is switched off
134: if (!isIndex) {
135: if (termInfo.docFreq > formatM1SkipInterval) {
136: termInfo.skipOffset = input.readVInt();
137: }
138: }
139: } else {
140: if (termInfo.docFreq >= skipInterval)
141: termInfo.skipOffset = input.readVInt();
142: }
143:
144: if (isIndex)
145: indexPointer += input.readVLong(); // read index pointer
146:
147: return true;
148: }
149:
150: /** Optimized scan, without allocating new terms. */
151: final void scanTo(Term term) throws IOException {
152: if (scratch == null)
153: scratch = new TermBuffer();
154: scratch.set(term);
155: while (scratch.compareTo(termBuffer) > 0 && next()) {
156: }
157: }
158:
159: /** Returns the current Term in the enumeration.
160: Initially invalid, valid after next() called for the first time.*/
161: public final Term term() {
162: return termBuffer.toTerm();
163: }
164:
165: /** Returns the previous Term enumerated. Initially null.*/
166: final Term prev() {
167: return prevBuffer.toTerm();
168: }
169:
170: /** Returns the current TermInfo in the enumeration.
171: Initially invalid, valid after next() called for the first time.*/
172: final TermInfo termInfo() {
173: return new TermInfo(termInfo);
174: }
175:
176: /** Sets the argument to the current TermInfo in the enumeration.
177: Initially invalid, valid after next() called for the first time.*/
178: final void termInfo(TermInfo ti) {
179: ti.set(termInfo);
180: }
181:
182: /** Returns the docFreq from the current TermInfo in the enumeration.
183: Initially invalid, valid after next() called for the first time.*/
184: public final int docFreq() {
185: return termInfo.docFreq;
186: }
187:
188: /* Returns the freqPointer from the current TermInfo in the enumeration.
189: Initially invalid, valid after next() called for the first time.*/
190: final long freqPointer() {
191: return termInfo.freqPointer;
192: }
193:
194: /* Returns the proxPointer from the current TermInfo in the enumeration.
195: Initially invalid, valid after next() called for the first time.*/
196: final long proxPointer() {
197: return termInfo.proxPointer;
198: }
199:
200: /** Closes the enumeration to further activity, freeing resources. */
201: public final void close() throws IOException {
202: input.close();
203: }
204: }
|