001: package org.apache.lucene.index;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021: import java.util.Arrays;
022:
023: import org.apache.lucene.store.BufferedIndexInput;
024: import org.apache.lucene.store.IndexInput;
025:
026: /**
027: * This abstract class reads skip lists with multiple levels.
028: *
029: * See {@link MultiLevelSkipListWriter} for the information about the encoding
030: * of the multi level skip lists.
031: *
032: * Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)}
033: * which defines the actual format of the skip data.
034: */
035: abstract class MultiLevelSkipListReader {
036: // the maximum number of skip levels possible for this index
037: private int maxNumberOfSkipLevels;
038:
039: // number of levels in this skip list
040: private int numberOfSkipLevels;
041:
042: // Expert: defines the number of top skip levels to buffer in memory.
043: // Reducing this number results in less memory usage, but possibly
044: // slower performance due to more random I/Os.
045: // Please notice that the space each level occupies is limited by
046: // the skipInterval. The top level can not contain more than
047: // skipLevel entries, the second top level can not contain more
048: // than skipLevel^2 entries and so forth.
049: private int numberOfLevelsToBuffer = 1;
050:
051: private int docCount;
052: private boolean haveSkipped;
053:
054: private IndexInput[] skipStream; // skipStream for each level
055: private long skipPointer[]; // the start pointer of each skip level
056: private int skipInterval[]; // skipInterval of each level
057: private int[] numSkipped; // number of docs skipped per level
058:
059: private int[] skipDoc; // doc id of current skip entry per level
060: private int lastDoc; // doc id of last read skip entry with docId <= target
061: private long[] childPointer; // child pointer of current skip entry per level
062: private long lastChildPointer; // childPointer of last read skip entry with docId <= target
063:
064: private boolean inputIsBuffered;
065:
066: public MultiLevelSkipListReader(IndexInput skipStream,
067: int maxSkipLevels, int skipInterval) {
068: this .skipStream = new IndexInput[maxSkipLevels];
069: this .skipPointer = new long[maxSkipLevels];
070: this .childPointer = new long[maxSkipLevels];
071: this .numSkipped = new int[maxSkipLevels];
072: this .maxNumberOfSkipLevels = maxSkipLevels;
073: this .skipInterval = new int[maxSkipLevels];
074: this .skipStream[0] = skipStream;
075: this .inputIsBuffered = (skipStream instanceof BufferedIndexInput);
076: this .skipInterval[0] = skipInterval;
077: for (int i = 1; i < maxSkipLevels; i++) {
078: // cache skip intervals
079: this .skipInterval[i] = this .skipInterval[i - 1]
080: * skipInterval;
081: }
082: skipDoc = new int[maxSkipLevels];
083: }
084:
085: /** Returns the id of the doc to which the last call of {@link #skipTo(int)}
086: * has skipped. */
087: int getDoc() {
088: return lastDoc;
089: }
090:
091: /** Skips entries to the first beyond the current whose document number is
092: * greater than or equal to <i>target</i>. Returns the current doc count.
093: */
094: int skipTo(int target) throws IOException {
095: if (!haveSkipped) {
096: // first time, load skip levels
097: loadSkipLevels();
098: haveSkipped = true;
099: }
100:
101: // walk up the levels until highest level is found that has a skip
102: // for this target
103: int level = 0;
104: while (level < numberOfSkipLevels - 1
105: && target > skipDoc[level + 1]) {
106: level++;
107: }
108:
109: while (level >= 0) {
110: if (target > skipDoc[level]) {
111: if (!loadNextSkip(level)) {
112: continue;
113: }
114: } else {
115: // no more skips on this level, go down one level
116: if (level > 0
117: && lastChildPointer > skipStream[level - 1]
118: .getFilePointer()) {
119: seekChild(level - 1);
120: }
121: level--;
122: }
123: }
124:
125: return numSkipped[0] - skipInterval[0] - 1;
126: }
127:
128: private boolean loadNextSkip(int level) throws IOException {
129: // we have to skip, the target document is greater than the current
130: // skip list entry
131: setLastSkipData(level);
132:
133: numSkipped[level] += skipInterval[level];
134:
135: if (numSkipped[level] > docCount) {
136: // this skip list is exhausted
137: skipDoc[level] = Integer.MAX_VALUE;
138: if (numberOfSkipLevels > level)
139: numberOfSkipLevels = level;
140: return false;
141: }
142:
143: // read next skip entry
144: skipDoc[level] += readSkipData(level, skipStream[level]);
145:
146: if (level != 0) {
147: // read the child pointer if we are not on the leaf level
148: childPointer[level] = skipStream[level].readVLong()
149: + skipPointer[level - 1];
150: }
151:
152: return true;
153:
154: }
155:
156: /** Seeks the skip entry on the given level */
157: protected void seekChild(int level) throws IOException {
158: skipStream[level].seek(lastChildPointer);
159: numSkipped[level] = numSkipped[level + 1]
160: - skipInterval[level + 1];
161: skipDoc[level] = lastDoc;
162: if (level > 0) {
163: childPointer[level] = skipStream[level].readVLong()
164: + skipPointer[level - 1];
165: }
166: }
167:
168: void close() throws IOException {
169: for (int i = 1; i < skipStream.length; i++) {
170: if (skipStream[i] != null) {
171: skipStream[i].close();
172: }
173: }
174: }
175:
176: /** initializes the reader */
177: void init(long skipPointer, int df) {
178: this .skipPointer[0] = skipPointer;
179: this .docCount = df;
180: Arrays.fill(skipDoc, 0);
181: Arrays.fill(numSkipped, 0);
182: Arrays.fill(childPointer, 0);
183:
184: haveSkipped = false;
185: for (int i = 1; i < numberOfSkipLevels; i++) {
186: skipStream[i] = null;
187: }
188: }
189:
190: /** Loads the skip levels */
191: private void loadSkipLevels() throws IOException {
192: numberOfSkipLevels = docCount == 0 ? 0 : (int) Math.floor(Math
193: .log(docCount)
194: / Math.log(skipInterval[0]));
195: if (numberOfSkipLevels > maxNumberOfSkipLevels) {
196: numberOfSkipLevels = maxNumberOfSkipLevels;
197: }
198:
199: skipStream[0].seek(skipPointer[0]);
200:
201: int toBuffer = numberOfLevelsToBuffer;
202:
203: for (int i = numberOfSkipLevels - 1; i > 0; i--) {
204: // the length of the current level
205: long length = skipStream[0].readVLong();
206:
207: // the start pointer of the current level
208: skipPointer[i] = skipStream[0].getFilePointer();
209: if (toBuffer > 0) {
210: // buffer this level
211: skipStream[i] = new SkipBuffer(skipStream[0],
212: (int) length);
213: toBuffer--;
214: } else {
215: // clone this stream, it is already at the start of the current level
216: skipStream[i] = (IndexInput) skipStream[0].clone();
217: if (inputIsBuffered
218: && length < BufferedIndexInput.BUFFER_SIZE) {
219: ((BufferedIndexInput) skipStream[i])
220: .setBufferSize((int) length);
221: }
222:
223: // move base stream beyond the current level
224: skipStream[0].seek(skipStream[0].getFilePointer()
225: + length);
226: }
227: }
228:
229: // use base stream for the lowest level
230: skipPointer[0] = skipStream[0].getFilePointer();
231: }
232:
233: /**
234: * Subclasses must implement the actual skip data encoding in this method.
235: *
236: * @param level the level skip data shall be read from
237: * @param skipStream the skip stream to read from
238: */
239: protected abstract int readSkipData(int level, IndexInput skipStream)
240: throws IOException;
241:
242: /** Copies the values of the last read skip entry on this level */
243: protected void setLastSkipData(int level) {
244: lastDoc = skipDoc[level];
245: lastChildPointer = childPointer[level];
246: }
247:
248: /** used to buffer the top skip levels */
249: private final static class SkipBuffer extends IndexInput {
250: private byte[] data;
251: private long pointer;
252: private int pos;
253:
254: SkipBuffer(IndexInput input, int length) throws IOException {
255: data = new byte[length];
256: pointer = input.getFilePointer();
257: input.readBytes(data, 0, length);
258: }
259:
260: public void close() throws IOException {
261: data = null;
262: }
263:
264: public long getFilePointer() {
265: return pointer + pos;
266: }
267:
268: public long length() {
269: return data.length;
270: }
271:
272: public byte readByte() throws IOException {
273: return data[pos++];
274: }
275:
276: public void readBytes(byte[] b, int offset, int len)
277: throws IOException {
278: System.arraycopy(data, pos, b, offset, len);
279: pos += len;
280: }
281:
282: public void seek(long pos) throws IOException {
283: this .pos = (int) (pos - pointer);
284: }
285:
286: }
287: }
|