Source Code Cross Referenced for SkipGammaDeltaGammaDeltaBitStreamIndexReader.java in  » Search-Engine » mg4j » it » unimi » dsi » mg4j » index » wired » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Search Engine » mg4j » it.unimi.dsi.mg4j.index.wired 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        package it.unimi.dsi.mg4j.index.wired;
002:
003:        /*		 
004:         * MG4J: Managing Gigabytes for Java
005:         *
006:         * Copyright (C) 2003-2006 Paolo Boldi and Sebastiano Vigna 
007:         *
008:         *  This library is free software; you can redistribute it and/or modify it
009:         *  under the terms of the GNU Lesser General Public License as published by the Free
010:         *  Software Foundation; either version 2.1 of the License, or (at your option)
011:         *  any later version.
012:         *
013:         *  This library is distributed in the hope that it will be useful, but
014:         *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015:         *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
016:         *  for more details.
017:         *
018:         *  You should have received a copy of the GNU Lesser General Public License
019:         *  along with this program; if not, write to the Free Software
020:         *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021:         *
022:         */
023:        import it.unimi.dsi.fastutil.ints.IntIterator;
024:        import it.unimi.dsi.fastutil.ints.IntIterators;
025:        import it.unimi.dsi.fastutil.ints.IntSet;
026:        import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;
027:        import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
028:        import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
029:        import it.unimi.dsi.fastutil.objects.ReferenceSet;
030:        import it.unimi.dsi.mg4j.index.AbstractIndexIterator;
031:        import it.unimi.dsi.mg4j.index.AbstractIndexReader;
032:        import it.unimi.dsi.mg4j.index.BitStreamIndex;
033:        import it.unimi.dsi.mg4j.index.Index;
034:        import it.unimi.dsi.mg4j.index.IndexIterator;
035:        import it.unimi.dsi.mg4j.index.CompressionFlags.Coding;
036:        import it.unimi.dsi.mg4j.index.payload.Payload;
037:        import it.unimi.dsi.io.InputBitStream;
038:        import it.unimi.dsi.util.Interval;
039:        import it.unimi.dsi.mg4j.search.IntervalIterator;
040:        import it.unimi.dsi.mg4j.search.IntervalIterators;
041:        import it.unimi.dsi.bits.Fast;
042:        import it.unimi.dsi.Util;
043:        import java.io.IOException;
044:        import java.util.NoSuchElementException;
045:        import org.apache.log4j.Logger;
046:
047:        public class SkipGammaDeltaGammaDeltaBitStreamIndexReader extends
048:                AbstractIndexReader {
049:            @SuppressWarnings("unused")
050:            private static final Logger LOGGER = Util
051:                    .getLogger(SkipGammaDeltaGammaDeltaBitStreamIndexReader.class);
052:            /** The reference index. */
053:            protected final BitStreamIndex index;
054:            private final static boolean ASSERTS = false;
055:            private final static boolean DEBUG = false;
056:            /** The {@link IndexIterator} view of this reader (returned by {@link #documents(CharSequence)}). */
057:            protected final BitStreamIndexReaderIndexIterator indexIterator;
058:
059:            /** Creates a new skip index reader, with the specified underlying {@link Index} and input bit stream.
060:             *
061:             * @param index the index.
062:             * @param ibs the underlying bit stream.
063:             */
064:            public SkipGammaDeltaGammaDeltaBitStreamIndexReader(
065:                    final BitStreamIndex index, final InputBitStream ibs) {
066:                this .index = index;
067:                this .indexIterator = new BitStreamIndexReaderIndexIterator(
068:                        this , ibs);
069:            }
070:
071:            protected static final class BitStreamIndexReaderIndexIterator
072:                    extends AbstractIndexIterator implements  IndexIterator {
073:                /** The enclosing instance. */
074:                private final SkipGammaDeltaGammaDeltaBitStreamIndexReader parent;
075:                /** The reference index. */
076:                protected final BitStreamIndex index;
077:                /** The underlying input bit stream. */
078:                protected final InputBitStream ibs;
079:                /** The enclosed interval iterator. */
080:                private final IndexIntervalIterator intervalIterator;
081:                /** A singleton set containing the enclosed interval iterator. */
082:                private final Reference2ReferenceMap<Index, IntervalIterator> singletonIntervalIterator;
083:                /** The key index. */
084:                private final Index keyIndex;
085:                /** The cached copy of {@link #index index.pointerCoding}. */
086:                protected final Coding pointerCoding;
087:                /** The cached copy of {@link #index index.countCoding}. */
088:                protected final Coding countCoding;
089:                /** The cached copy of {@link #index index.positionCoding}. */
090:                protected final Coding positionCoding;
091:                /** The current term. */
092:                protected int currentTerm = -1;
093:                /** The current frequency. */
094:                protected int frequency;
095:                /** Whether the current terms has pointers at all (this happens when the {@link #frequency} is smaller than the number of documents). */
096:                protected boolean hasPointers;
097:                /** The current count (if this index contains counts). */
098:                protected int count;
099:                /** The last document pointer we read from current list, -1 if we just read the frequency,
100:                 * {@link Integer#MAX_VALUE} if we are beyond the end of list. */
101:                protected int currentDocument;
102:                /** The number of the document record we are going to read inside the current inverted list. */
103:                protected int numberOfDocumentRecord;
104:                /** This variable tracks the current state of the reader. */
105:                protected int state;
106:                /** The parameter <code>h</code> (the maximum height of a skip tower). */
107:                public final int height;
108:                /** The quantum. */
109:                public final int quantum;
110:                /** The bit mask giving the remainder of the division by {@link #quantum}. */
111:                public final int quantumModuloMask;
112:                /** The shift giving result of the division by {@link #quantum}. */
113:                public final int quantumDivisionShift;
114:                /** The maximum height of a skip tower in the current block. May be less than {@link #height} if the block is defective,
115:                 * and will be -1 on defective quanta (no tower at all). */
116:                private int maxh;
117:                /** The maximum valid index of the current skip tower, if any. */
118:                private int s;
119:                /** The minimum valid index of the current skip tower, or {@link Integer#MAX_VALUE}. If {@link #maxh} is negative, the value is undefined. */
120:                private int lowest;
121:                /** We have <var>w</var> = <var>Hq</var>. */
122:                private final int w;
123:                /** The bit mask giving the remainder of the division by {@link #w}. */
124:                private final int wModuloMask;
125:                /** The shift giving result of the division by {@link #w}. */
126:                private final int wDivisionShift;
127:                /** The Golomb modulus for a top pointer skip, for each level. */
128:                private int[] towerTopB;
129:                /** The most significant bit of the Golomb modulus for a top point[]er skip, for each level. */
130:                private int[] towerTopLog2B;
131:                /** The Golomb modulus for a lower pointer skip, for each level. */
132:                private int[] towerLowerB;
133:                /** The most significant bit of the Golomb modulus for a lower pointer skip, for each level. */
134:                private int[] towerLowerLog2B;
135:                /** The prediction for a pointer skip, for each level. */
136:                private int[] pointerPrediction;
137:                /** An array to decode bit skips. */
138:                private long[] bitSkip;
139:                /** An array to decode the pointer skips. */
140:                private int[] pointerSkip;
141:                /** The number of bits read just after reading the last skip tower. */
142:                private long readBitsAtLastSkipTower;
143:                /** The document pointer corresponding to the last skip tower. */
144:                private int pointerAtLastSkipTower;
145:                /** The current quantum bit length, as provided by the index. */
146:                private int quantumBitLength;
147:                /** The current entry bit length, as provided by the index. */
148:                private int entryBitLength;
149:                /** This value of {@link #state} means that we are positioned just before a tower. */
150:                private static final int BEFORE_TOWER = 0;
151:                /** This value of {@link #state} can be assumed only in indices that contain a payload; it
152:                 * means that we are positioned just before the payload for the current document record. */
153:                private static final int BEFORE_PAYLOAD = 1;
154:                /** This value of {@link #state} can be assumed only in indices that contain counts; it
155:                 * means that we are positioned just before the count for the current document record. */
156:                private static final int BEFORE_COUNT = 2;
157:                /** This value of {@link #state} can be assumed only in indices that contain document positions; 
158:                 * it means that we are positioned just before the position list of the current document record. */
159:                private static final int BEFORE_POSITIONS = 3;
160:                /** This value of {@link #state} means that we are at the start of a new document record, 
161:                 * unless we already read all documents (i.e., {@link #numberOfDocumentRecord} == {@link #frequency}),
162:                 * in which case we are at the end of the inverted list, and {@link #endOfList()} is true. */
163:                private static final int BEFORE_POINTER = 4;
164:                /** The cached position array. */
165:                protected int[] positionCache = new int[16];
166:
167:                public BitStreamIndexReaderIndexIterator(
168:                        final SkipGammaDeltaGammaDeltaBitStreamIndexReader parent,
169:                        final InputBitStream ibs) {
170:                    this .parent = parent;
171:                    this .ibs = ibs;
172:                    index = parent.index;
173:                    keyIndex = index.keyIndex;
174:                    pointerCoding = index.pointerCoding;
175:                    if (index.hasPayloads)
176:                        throw new IllegalStateException();
177:                    if (!index.hasCounts)
178:                        throw new IllegalStateException();
179:                    countCoding = index.countCoding;
180:                    if (!index.hasPositions)
181:                        throw new IllegalStateException();
182:                    positionCoding = index.positionCoding;
183:                    intervalIterator = index.hasPositions ? new IndexIntervalIterator()
184:                            : null;
185:                    singletonIntervalIterator = index.hasPositions ? Reference2ReferenceMaps
186:                            .singleton(keyIndex,
187:                                    (IntervalIterator) intervalIterator)
188:                            : null;
189:                    quantum = index.quantum;
190:                    height = index.height;
191:                    if ((quantum == -1) != (height == -1))
192:                        throw new IllegalArgumentException();
193:                    w = (1 << height) * quantum;
194:                    quantumModuloMask = quantum - 1;
195:                    wModuloMask = w - 1;
196:                    quantumDivisionShift = Fast.mostSignificantBit(quantum);
197:                    wDivisionShift = Fast.mostSignificantBit(w);
198:                    bitSkip = new long[height + 1];
199:                    pointerSkip = new int[height + 1];
200:                    towerTopB = new int[height + 1];
201:                    towerTopLog2B = new int[height + 1];
202:                    towerLowerB = new int[height + 1];
203:                    towerLowerLog2B = new int[height + 1];
204:                    pointerPrediction = new int[height + 1];
205:                }
206:
207:                /** Positions the index on the inverted list of a given term.
208:                 *
209:                 * <p>This method can be called at any time. Note that it is <em>always</em> possible
210:                 * to call this method with argument 0, even if offsets have not been loaded.
211:                 *
212:                 * @param term a term.
213:                 */
214:                protected void position(final int term) throws IOException {
215:                    if (term == 0) {
216:                        ibs.position(0);
217:                        ibs.readBits(0);
218:                    } else {
219:                        if (index.offsets == null)
220:                            throw new IllegalStateException(
221:                                    "You cannot position an index without offsets");
222:                        final long offset = index.offsets.getLong(term);
223:                        ibs.position(offset);
224:                        // TODO: Can't we set this to 0?
225:                        ibs.readBits(offset);
226:                    }
227:                    currentTerm = term;
228:                    readFrequency();
229:                }
230:
231:                public int termNumber() {
232:                    return currentTerm;
233:                }
234:
235:                protected IndexIterator advance() throws IOException {
236:                    if (currentTerm == index.numberOfTerms - 1)
237:                        return null;
238:                    if (currentTerm != -1) {
239:                        skipTo(Integer.MAX_VALUE);
240:                        nextDocument(); // This guarantees we have no garbage before the frequency
241:                    }
242:                    currentTerm++;
243:                    readFrequency();
244:                    return this ;
245:                }
246:
247:                private void readFrequency() throws IOException {
248:                    // Read the frequency
249:                    frequency = ibs.readGamma() + 1;
250:                    hasPointers = frequency < index.numberOfDocuments;
251:                    quantumBitLength = entryBitLength = -1;
252:                    lowest = Integer.MAX_VALUE;
253:                    if (ASSERTS)
254:                        for (int i = height; i > Math
255:                                .min(
256:                                        height,
257:                                        Fast
258:                                                .mostSignificantBit(frequency >> quantumDivisionShift)); i--)
259:                            towerTopB[i] = towerLowerB[i] = pointerPrediction[i] = -1;
260:                    final long pointerQuantumSigma = BitStreamIndex
261:                            .quantumSigma(frequency, index.numberOfDocuments,
262:                                    quantum);
263:                    for (int i = Math
264:                            .min(
265:                                    height,
266:                                    Fast
267:                                            .mostSignificantBit(frequency >> quantumDivisionShift)); i >= 0; i--) {
268:                        towerTopB[i] = BitStreamIndex.gaussianGolombModulus(
269:                                pointerQuantumSigma, i + 1);
270:                        towerTopLog2B[i] = Fast
271:                                .mostSignificantBit(towerTopB[i]);
272:                        towerLowerB[i] = BitStreamIndex.gaussianGolombModulus(
273:                                pointerQuantumSigma, i);
274:                        towerLowerLog2B[i] = Fast
275:                                .mostSignificantBit(towerLowerB[i]);
276:                        pointerPrediction[i] = (int) ((quantum * (1L << i)
277:                                * index.numberOfDocuments + frequency / 2) / frequency);
278:                    }
279:                    count = -1;
280:                    currentDocument = -1;
281:                    numberOfDocumentRecord = -1;
282:                    state = BEFORE_POINTER;
283:                }
284:
285:                public Index index() {
286:                    return keyIndex;
287:                }
288:
289:                public int frequency() {
290:                    return frequency;
291:                }
292:
293:                private void ensureCurrentDocument() {
294:                    if (currentDocument < 0)
295:                        throw new IllegalStateException(
296:                                "nextDocument() has never been called for (term="
297:                                        + currentTerm + ")");
298:                    if (currentDocument == Integer.MAX_VALUE)
299:                        throw new IllegalStateException(
300:                                "This reader is positioned beyond the end of list of (term="
301:                                        + currentTerm + ")");
302:                }
303:
304:                /** Returns whether there are no more document records in the current inverted list.
305:                 * 
306:                 * <p>This method returns true if the last document pointer of the current inverted
307:                 * list has been read. It makes no distinction as to where (inside the last document
308:                 * record) this reader is currently positioned. In particular, this method will
309:                 * return true independently of whether count and positions have been read or not (we
310:                 * note by passing that this is the only sensible behaviour, as you can build indices
311:                 * with or without counts/positions).
312:                 * 
313:                 * <p>This method will return true also when this reader is positioned <em>beyond</em>
314:                 * the last document pointer. In this case, {@link #currentDocumentPointer()} will
315:                 * return {@link Integer#MAX_VALUE}.
316:                 *
317:                 * @return true whether there are no more document records in the current inverted list.
318:                 */
319:                private boolean endOfList() {
320:                    if (ASSERTS)
321:                        assert numberOfDocumentRecord <= frequency;
322:                    return numberOfDocumentRecord >= frequency - 1;
323:                }
324:
325:                public int document() {
326:                    if (ASSERTS)
327:                        ensureCurrentDocument();
328:                    return currentDocument;
329:                }
330:
331:                public Payload payload() throws IOException {
332:                    if (DEBUG)
333:                        System.err.println(this  + ".payload()");
334:                    if (ASSERTS)
335:                        ensureCurrentDocument();
336:                    throw new UnsupportedOperationException("This index ("
337:                            + index + ") does not contain payloads");
338:                }
339:
340:                public int count() throws IOException {
341:                    if (DEBUG)
342:                        System.err.println(this  + ".count()");
343:                    if (count != -1)
344:                        return count;
345:                    if (ASSERTS)
346:                        ensureCurrentDocument();
347:                    if (state == BEFORE_TOWER)
348:                        readTower();
349:                    {
350:                        state = BEFORE_POSITIONS;
351:                        count = ibs.readGamma() + 1;
352:                    }
353:                    return count;
354:                }
355:
356:                /** We read positions, assuming state <= BEFORE_POSITIONS */
357:                @SuppressWarnings("unused")
358:                protected void updatePositionCache() throws IOException {
359:                    if (ASSERTS)
360:                        assert state <= BEFORE_POSITIONS;
361:                    if (state < BEFORE_POSITIONS) {
362:                        if (state == BEFORE_TOWER)
363:                            readTower();
364:                        if (state == BEFORE_COUNT) {
365:                            state = BEFORE_POSITIONS;
366:                            count = ibs.readGamma() + 1;
367:                        }
368:                    }
369:                    if (count > positionCache.length)
370:                        positionCache = new int[Math.max(
371:                                positionCache.length * 2, count)];
372:                    final int[] occ = positionCache;
373:                    state = BEFORE_POINTER;
374:                    ibs.readDeltas(occ, count);
375:                    for (int i = 1; i < count; i++)
376:                        occ[i] += occ[i - 1] + 1;
377:                }
378:
379:                public IntIterator positions() throws IOException {
380:                    if (ASSERTS)
381:                        ensureCurrentDocument();
382:                    if (state <= BEFORE_POSITIONS)
383:                        updatePositionCache();
384:                    return IntIterators.wrap(positionCache, 0, count);
385:                }
386:
387:                public int[] positionArray() throws IOException {
388:                    if (ASSERTS)
389:                        ensureCurrentDocument();
390:                    if (state <= BEFORE_POSITIONS)
391:                        updatePositionCache();
392:                    return positionCache;
393:                }
394:
395:                // TODO: check who's using this (positionArray() is actually faster now)
396:                public int positions(final int[] position) throws IOException {
397:                    if (ASSERTS)
398:                        ensureCurrentDocument();
399:                    if (state <= BEFORE_POSITIONS)
400:                        updatePositionCache(); // And also that positions have been read
401:                    if (position.length < count)
402:                        return -count;
403:                    for (int i = count; i-- != 0;)
404:                        position[i] = this .positionCache[i];
405:                    return count;
406:                }
407:
408:                public int nextDocument() throws IOException {
409:                    if (DEBUG)
410:                        System.err.println("{" + this  + "} nextDocument()");
411:                    if (state != BEFORE_POINTER) {
412:                        if (state == BEFORE_TOWER)
413:                            readTower();
414:                        if (state == BEFORE_COUNT) {
415:                            state = BEFORE_POSITIONS;
416:                            count = ibs.readGamma() + 1;
417:                        }
418:                        if (state == BEFORE_POSITIONS) {
419:                            // Here we just skip; note that the state change is necessary if endOfList() is true
420:                            state = BEFORE_POINTER;
421:                            ibs.skipDeltas(count);
422:                        }
423:                    }
424:                    if (endOfList())
425:                        return -1;
426:                    if (hasPointers) {// We do not write pointers for everywhere occurring terms.
427:                        currentDocument += ibs.readDelta() + 1;
428:                    } else
429:                        currentDocument++;
430:                    numberOfDocumentRecord++;
431:                    state = BEFORE_COUNT;
432:                    count = -1;
433:                    if ((numberOfDocumentRecord & quantumModuloMask) == 0)
434:                        state = BEFORE_TOWER;
435:                    return currentDocument;
436:                }
437:
438:                /** Reads the entire skip tower for the current position. 
439:                 */
440:                private void readTower() throws IOException {
441:                    readTower(-1);
442:                }
443:
444:                /** Reads the skip tower for the current position, possibly skipping part of the tower. 
445:                 * 
446:                 * <P>Note that this method will update {@link #state} only if it reads the entire tower,
447:                 * otherwise the state remains {@link #BEFORE_TOWER}.
448:                 * 
449:                 * @param pointer the tower will be read up to the first entry smaller than or equal to this pointer; use
450:                 * -1 to guarantee that the entire tower will be read.
451:                 */
452:                private void readTower(final int pointer) throws IOException {
453:                    int i, j, k, cacheOffset, cache, towerLength = 0;
454:                    long bitsAtTowerStart = 0;
455:                    boolean truncated = false;
456:                    if (ASSERTS)
457:                        assert numberOfDocumentRecord % quantum == 0;
458:                    if (ASSERTS && state != BEFORE_TOWER)
459:                        throw new IllegalStateException(
460:                                "readTower() called in state " + state);
461:                    cacheOffset = (numberOfDocumentRecord & wModuloMask);
462:                    k = cacheOffset >> quantumDivisionShift;
463:                    if (ASSERTS && k == 0) { // Invalidate current tower data
464:                        it.unimi.dsi.fastutil.ints.IntArrays.fill(pointerSkip,
465:                                Integer.MAX_VALUE);
466:                        it.unimi.dsi.fastutil.longs.LongArrays.fill(bitSkip,
467:                                Integer.MAX_VALUE);
468:                    }
469:                    // Compute the height of the current skip tower.
470:                    s = (k == 0) ? height : Fast.leastSignificantBit(k);
471:                    cache = frequency - w
472:                            * (numberOfDocumentRecord >> wDivisionShift);
473:                    if (cache < w) {
474:                        maxh = Fast
475:                                .mostSignificantBit((cache >> quantumDivisionShift)
476:                                        - k);
477:                        if (maxh < s) {
478:                            s = maxh;
479:                            truncated = true;
480:                        } else
481:                            truncated = false;
482:                    } else {
483:                        cache = w;
484:                        maxh = height;
485:                        truncated = k == 0;
486:                    }
487:                    //assert w == cache || k == 0 || lastMaxh == Fast.mostSignificantBit( k ^ ( cache/quantum ) )  : lastMaxh +","+ (Fast.mostSignificantBit( k ^ ( cache/quantum ) ));
488:                    i = s;
489:                    if (s >= 0) {
490:                        if (k == 0) {
491:                            if (quantumBitLength < 0) {
492:                                quantumBitLength = ibs.readDelta();
493:                                entryBitLength = ibs.readDelta();
494:                            } else {
495:                                quantumBitLength += Fast.nat2int(ibs
496:                                        .readDelta());
497:                                entryBitLength += Fast.nat2int(ibs.readDelta());
498:                            }
499:                            if (DEBUG)
500:                                System.err
501:                                        .println("{" + this 
502:                                                + "} quantum bit length="
503:                                                + quantumBitLength
504:                                                + " entry bit length="
505:                                                + entryBitLength);
506:                        }
507:                        if (DEBUG)
508:                            System.err.println("{" + this 
509:                                    + "} Reading tower; pointer=" + pointer
510:                                    + " maxh=" + maxh + " s=" + s);
511:                        if (s > 0) {
512:                            towerLength = entryBitLength * (s + 1)
513:                                    + Fast.nat2int(ibs.readDelta());
514:                            if (DEBUG)
515:                                System.err.println("{" + this 
516:                                        + "} Tower length=" + towerLength);
517:                        }
518:                        // We store the number of bits read at the start of the tower (just after the length).
519:                        bitsAtTowerStart = ibs.readBits();
520:                        if (truncated) {
521:                            if (DEBUG)
522:                                System.err.println("{" + this 
523:                                        + "} Truncated--reading tops");
524:                            // We read the tower top.
525:                            pointerSkip[s] = Fast.nat2int(ibs.readGolomb(
526:                                    towerTopB[s], towerTopLog2B[s]))
527:                                    + pointerPrediction[s];
528:                            bitSkip[s] = quantumBitLength * (1 << s)
529:                                    + entryBitLength * ((1 << s + 1) - s - 2)
530:                                    + Fast.nat2int(ibs.readLongDelta());
531:                        } else {
532:                            // We copy the tower top from the lowest inherited entry suitably updated.
533:                            pointerSkip[s] = pointerSkip[s + 1]
534:                                    - (currentDocument - pointerAtLastSkipTower);
535:                            bitSkip[s] = bitSkip[s + 1]
536:                                    - (bitsAtTowerStart - readBitsAtLastSkipTower)
537:                                    - towerLength;
538:                        }
539:                        // We read the remaining part of the tower, at least until we point after pointer.
540:                        if (currentDocument + pointerSkip[i] > pointer) {
541:                            for (i = s - 1; i >= 0; i--) {
542:                                pointerSkip[i] = Fast.nat2int(ibs.readGolomb(
543:                                        towerLowerB[i], towerLowerLog2B[i]))
544:                                        + pointerSkip[i + 1] / 2;
545:                                bitSkip[i] = (bitSkip[i + 1] - entryBitLength
546:                                        * (i + 1))
547:                                        / 2 - Fast.nat2int(ibs.readLongDelta());
548:                                if (DEBUG
549:                                        && currentDocument + pointerSkip[i] <= pointer)
550:                                    System.err
551:                                            .println("{"
552:                                                    + this 
553:                                                    + "} stopping reading at i="
554:                                                    + i
555:                                                    + " as currentDocument ("
556:                                                    + currentDocument
557:                                                    + ") plus pointer skip ("
558:                                                    + pointerSkip[i]
559:                                                    + ") is smaller than or equal target ("
560:                                                    + pointer + ")");
561:                                if (currentDocument + pointerSkip[i] <= pointer)
562:                                    break;
563:                            }
564:                        }
565:                    }
566:                    /* If we did not read the entire tower, we need to fix the skips we read (as they
567:                     * are offsets from the *end* of the tower) and moreover we must make unusable the
568:                     * rest of the tower (for asserts). */
569:                    if (i > 0) {
570:                        final long fix = ibs.readBits() - bitsAtTowerStart;
571:                        for (j = s; j >= i; j--)
572:                            bitSkip[j] += towerLength - fix;
573:                        if (ASSERTS)
574:                            for (; j >= 0; j--)
575:                                pointerSkip[j] = Integer.MAX_VALUE;
576:                    } else
577:                        state = BEFORE_COUNT;
578:                    // We update the inherited tower.
579:                    final long deltaBits = ibs.readBits()
580:                            - readBitsAtLastSkipTower;
581:                    final int deltaPointers = currentDocument
582:                            - pointerAtLastSkipTower;
583:                    for (j = Fast.mostSignificantBit(k
584:                            ^ (cache >> quantumDivisionShift)); j >= s + 1; j--) {
585:                        bitSkip[j] -= deltaBits;
586:                        pointerSkip[j] -= deltaPointers;
587:                    }
588:                    readBitsAtLastSkipTower = ibs.readBits();
589:                    pointerAtLastSkipTower = currentDocument;
590:                    lowest = i < 0 ? 0 : i;
591:                    if (DEBUG) {
592:                        System.err.println("{" + this  + "} "
593:                                + "Computed skip tower (lowest: " + lowest
594:                                + ") for document record number "
595:                                + numberOfDocumentRecord + " (pointer "
596:                                + currentDocument + ") from " + Math.max(i, 0)
597:                                + ": ");
598:                        System.err.print("% ");
599:                        for (j = 0; j <= s; j++)
600:                            System.err.print(pointerSkip[j] + ":" + bitSkip[j]
601:                                    + " ");
602:                        System.err.print(" [");
603:                        for (; j <= height; j++)
604:                            System.err.print(pointerSkip[j] + ":" + bitSkip[j]
605:                                    + " ");
606:                        System.err.print("]");
607:                        System.err.println();
608:                    }
609:                }
610:
611:                /*
612:                	public int skip( final int n ) throws IOException {
613:                		int i, k, cacheOffset, start = numberOfDocumentRecord, skip = 0;
614:
615:                		if ( DEBUG ) System.err.println( "{" + this + "} " + "Going to enter linear skip code with lastDoc=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", n=" + n + ", endOfList()=" + endOfList() );
616:                		if ( n < 0 ) throw new IllegalArgumentException();
617:                		if ( n == 0 ) return 0;
618:                		
619:                		// If we are just at the start of a list, let us read the first pointer.
620:                		if ( numberOfDocumentRecord == -1 ) readDocumentPointer();
621:                		if ( state == BEFORE_TOWER ) readTower( -1 ); 
622:
623:                		if ( DEBUG ) System.err.println( "{" + this + "} " + "Entering skip code with lastDoc=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", n=" + n + ", endOfList()=" + endOfList() );
624:
625:                		for(;;) {
626:                			if ( DEBUG ) System.err.println( "{" + this + "} " + "In for loop, lastDoc=" + currentDocument + ", maxh=" + maxh + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", n=" + n );
627:
628:                			cacheOffset = ( numberOfDocumentRecord & wModuloMask );
629:                			k = cacheOffset >> quantumDivisionShift;
630:
631:                			if ( maxh < 0 ) break; // Defective quantum--no tower.
632:
633:                			for( i = Fast.mostSignificantBit( k ^ ( Math.min( w, frequency - numberOfDocumentRecord + cacheOffset ) >> quantumDivisionShift ) ); i >= 0; i-- )
634:                				if ( ( skip = ( ( k & - ( 1 << i ) ) + ( 1 << i ) ) * quantum - cacheOffset ) <= n ) break;
635:                			
636:                			if ( i >= 0 ) {
637:                				ibs.skip( bitSkip[ i ] - ( ibs.readBits() - readBitsAtLastSkipTower ) );
638:                				state = BEFORE_TOWER;
639:                				currentDocument = pointerSkip[ i ] + pointerAtLastSkipTower;
640:                				numberOfDocumentRecord += skip;
641:                				// If we skipped beyond the end of the list, we invalidate the current document.
642:                				if ( numberOfDocumentRecord == frequency ) currentDocument = -1;
643:                				readTower( -1 );
644:                				count = -1; // We must invalidate count as readDocumentPointer() would do.
645:                				if ( endOfList() ) return numberOfDocumentRecord - start;
646:                			}
647:                			else break;
648:                		}
649:
650:                		if ( DEBUG ) System.err.println( "{" + this + "} " + "Completing sequentially, lastDoc=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", n=" + n );
651:
652:                		while( numberOfDocumentRecord - start < n ) {
653:                			if ( endOfList() ) break;
654:                			readDocumentPointer();
655:                		}
656:                		return numberOfDocumentRecord - start;
657:                	}
658:                 */
659:                public int skipTo(final int p) throws IOException {
660:                    if (DEBUG)
661:                        System.err.println(this  + ".skipTo(" + p
662:                                + ") [currentDocument=" + currentDocument
663:                                + ", numberOfDocumentRecord="
664:                                + numberOfDocumentRecord + ", endOfList()="
665:                                + endOfList());
666:                    // If we are just at the start of a list, let us read the first pointer.
667:                    if (numberOfDocumentRecord == -1)
668:                        nextDocument(); // TODO: shouldn't we just read the tower?
669:                    if (currentDocument >= p) {
670:                        if (DEBUG)
671:                            System.err.println(this 
672:                                    + ": No skip necessary, returning "
673:                                    + currentDocument);
674:                        return currentDocument;
675:                    }
676:                    if (state == BEFORE_TOWER)
677:                        readTower(p);
678:                    final int[] pointerSkip = this .pointerSkip;
679:                    for (;;) {
680:                        if (ASSERTS)
681:                            assert maxh < 0 || lowest > 0
682:                                    || pointerSkip[0] != Integer.MAX_VALUE;
683:                        // If on a defective quantum (no tower) or p is inside the current quantum (no need to scan the tower) we bail out. 
684:                        if (maxh < 0 || lowest == 0
685:                                && pointerAtLastSkipTower + pointerSkip[0] > p)
686:                            break;
687:                        if (DEBUG)
688:                            System.err.println(this 
689:                                    + ": In for loop, currentDocument="
690:                                    + currentDocument + ", maxh=" + maxh
691:                                    + ", numberOfDocumentRecord="
692:                                    + numberOfDocumentRecord + ", p=" + p);
693:                        final int cacheOffset = (numberOfDocumentRecord & wModuloMask);
694:                        final int k = cacheOffset >> quantumDivisionShift;
695:                        final int top = Fast
696:                                .mostSignificantBit(k
697:                                        ^ (Math.min(w, frequency
698:                                                - numberOfDocumentRecord
699:                                                + cacheOffset) >> quantumDivisionShift));
700:                        int i;
701:                        for (i = lowest; i <= top; i++) {
702:                            if (ASSERTS && (k & 1 << i) != 0)
703:                                assert pointerSkip[i] == pointerSkip[i + 1];
704:                            if (ASSERTS)
705:                                assert pointerSkip[i] != Integer.MAX_VALUE : "Invalid pointer skip "
706:                                        + i
707:                                        + " (lowest="
708:                                        + lowest
709:                                        + ", top="
710:                                        + top + ")";
711:                            if (pointerAtLastSkipTower + pointerSkip[i] > p)
712:                                break;
713:                        }
714:                        if (--i < 0)
715:                            break;
716:                        if (ASSERTS)
717:                            assert i >= lowest : i + " < " + lowest;
718:                        if (DEBUG)
719:                            System.err.println(this 
720:                                    + ": Safely after for with i=" + i
721:                                    + ", P[i]=" + pointerSkip[i] + ", A[i]="
722:                                    + bitSkip[i]);
723:                        if (DEBUG)
724:                            System.err
725:                                    .println(this 
726:                                            + ": ["
727:                                            + ibs.readBits()
728:                                            + "] Skipping "
729:                                            + (bitSkip[i] - (ibs.readBits() - readBitsAtLastSkipTower))
730:                                            + " bits ("
731:                                            + (((k & -(1 << i)) + (1 << i))
732:                                                    * quantum - cacheOffset)
733:                                            + " records) to get to document pointer "
734:                                            + (currentDocument + pointerSkip[i]));
735:                        ibs.skip(bitSkip[i]
736:                                - (ibs.readBits() - readBitsAtLastSkipTower));
737:                        state = BEFORE_TOWER;
738:                        currentDocument = pointerSkip[i]
739:                                + pointerAtLastSkipTower;
740:                        numberOfDocumentRecord += ((k & -(1 << i)) + (1 << i))
741:                                * quantum - cacheOffset;
742:                        // If we skipped beyond the end of the list, we invalidate the current document.
743:                        if (numberOfDocumentRecord == frequency) {
744:                            currentDocument = Integer.MAX_VALUE;
745:                            state = BEFORE_POINTER; // We are actually before a frequency, but we must avoid that calls to nextDocument() read anything
746:                        } else
747:                            readTower(p); // Note that if we are exactly on the destination pointer, we will read the entire tower.
748:                        count = -1; // We must invalidate count as readDocumentPointer() would do.
749:                        if (endOfList()) {
750:                            if (DEBUG)
751:                                System.err.println(this 
752:                                        + ".toSkip(): end-of-list, returning "
753:                                        + currentDocument);
754:                            // Note that if p == Integer.MAX_VALUE, we are certainly beyond end-of-list
755:                            return p == Integer.MAX_VALUE ? Integer.MAX_VALUE
756:                                    : currentDocument;
757:                        }
758:                    }
759:                    if (DEBUG)
760:                        System.err.println(this 
761:                                + ": Completing sequentially, currentDocument="
762:                                + currentDocument + ", numberOfDocumentRecord="
763:                                + numberOfDocumentRecord + ", p=" + p);
764:                    while (currentDocument < p) {
765:                        if (DEBUG)
766:                            System.err
767:                                    .println(this 
768:                                            + ": Skipping sequentially (second), currentDocument="
769:                                            + currentDocument
770:                                            + ", numberOfDocumentRecord="
771:                                            + numberOfDocumentRecord + ", p="
772:                                            + p);
773:                        if (nextDocument() == -1) {
774:                            if (DEBUG)
775:                                System.err.println(this 
776:                                        + ": end-of-list, returning MAX_VALUE");
777:                            return Integer.MAX_VALUE;
778:                        }
779:                    }
780:                    if (DEBUG)
781:                        System.err.println(this  + ".toSkip(): Returning "
782:                                + currentDocument);
783:                    return currentDocument;
784:                }
785:
786:                public void dispose() throws IOException {
787:                    parent.close();
788:                }
789:
790:                public boolean hasNext() {
791:                    return !endOfList();
792:                }
793:
794:                public int nextInt() {
795:                    if (!hasNext())
796:                        throw new NoSuchElementException();
797:                    try {
798:                        return nextDocument();
799:                    } catch (IOException e) {
800:                        throw new RuntimeException(e);
801:                    }
802:                }
803:
804:                public String toString() {
805:                    return index + " [" + currentTerm + "]";
806:                }
807:
808:                /** An interval iterator returning the positions of the current document as singleton intervals. */
809:                private final class IndexIntervalIterator extends
810:                        AbstractObjectIterator<Interval> implements 
811:                        IntervalIterator {
812:                    int pos = -1;
813:
814:                    public void reset() throws IOException {
815:                        pos = -1;
816:                        if (state <= BEFORE_POSITIONS)
817:                            updatePositionCache(); // This guarantees the position cache is ok 
818:                    }
819:
820:                    public void intervalTerms(final IntSet terms) {
821:                        terms
822:                                .add(BitStreamIndexReaderIndexIterator.this .currentTerm);
823:                    }
824:
825:                    public boolean hasNext() {
826:                        return pos < count - 1;
827:                    }
828:
829:                    public Interval next() {
830:                        if (!hasNext())
831:                            throw new NoSuchElementException();
832:                        return Interval.valueOf(positionCache[++pos]);
833:                    }
834:
835:                    public Interval nextInterval() {
836:                        return pos < count - 1 ? Interval
837:                                .valueOf(positionCache[++pos]) : null;
838:                    }
839:
840:                    public int extent() {
841:                        return 1;
842:                    }
843:
844:                    public String toString() {
845:                        return index + ": " + term + "[doc=" + currentDocument
846:                                + ", count=" + count + ", pos=" + pos + "]";
847:                    }
848:                };
849:
850:                public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators()
851:                        throws IOException {
852:                    intervalIterator();
853:                    return singletonIntervalIterator;
854:                }
855:
856:                public IntervalIterator intervalIterator() throws IOException {
857:                    return intervalIterator(keyIndex);
858:                }
859:
860:                public IntervalIterator intervalIterator(final Index index)
861:                        throws IOException {
862:                    if (ASSERTS)
863:                        ensureCurrentDocument();
864:                    // TODO: this was if ( index != keyIndex || hasPayloads ) 
865:                    if (index != keyIndex)
866:                        return IntervalIterators.TRUE;
867:                    if (ASSERTS)
868:                        assert intervalIterator != null;
869:                    intervalIterator.reset();
870:                    return intervalIterator;
871:                }
872:
873:                public ReferenceSet<Index> indices() {
874:                    return index.singletonSet;
875:                }
876:            }
877:
878:            private IndexIterator documents(final CharSequence term,
879:                    final int termNumber) throws IOException {
880:                indexIterator.term(term);
881:                indexIterator.position(termNumber);
882:                return indexIterator;
883:            }
884:
885:            public IndexIterator documents(final int term) throws IOException {
886:                return documents(null, term);
887:            }
888:
889:            public IndexIterator documents(final CharSequence term)
890:                    throws IOException {
891:                if (closed)
892:                    throw new IllegalStateException("This "
893:                            + getClass().getSimpleName() + " has been closed");
894:                if (index.termMap != null) {
895:                    final int termIndex = (int) index.termMap.getLong(term);
896:                    if (termIndex == -1)
897:                        return index.emptyIndexIterator;
898:                    return documents(term, termIndex);
899:                }
900:                throw new UnsupportedOperationException("Index " + index
901:                        + " has no term map");
902:            }
903:
904:            @Override
905:            public IndexIterator nextIterator() throws IOException {
906:                return indexIterator.advance();
907:            }
908:
909:            public String toString() {
910:                return getClass().getSimpleName() + "[" + index + "]";
911:            }
912:
913:            public void close() throws IOException {
914:                super.close();
915:                indexIterator.ibs.close();
916:            }
917:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.