Source Code Cross Referenced for GammaDeltaGammaDeltaBitStreamIndexReader.java in » Search-Engine » mg4j » it » unimi » dsi » mg4j » index » wired » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Search Engine » mg4j » it.unimi.dsi.mg4j.index.wired
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        package it.unimi.dsi.mg4j.index.wired;
002:
003:        /*		 
004:         * MG4J: Managing Gigabytes for Java
005:         *
006:         * Copyright (C) 2003-2006 Paolo Boldi and Sebastiano Vigna 
007:         *
008:         *  This library is free software; you can redistribute it and/or modify it
009:         *  under the terms of the GNU Lesser General Public License as published by the Free
010:         *  Software Foundation; either version 2.1 of the License, or (at your option)
011:         *  any later version.
012:         *
013:         *  This library is distributed in the hope that it will be useful, but
014:         *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015:         *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
016:         *  for more details.
017:         *
018:         *  You should have received a copy of the GNU Lesser General Public License
019:         *  along with this program; if not, write to the Free Software
020:         *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021:         *
022:         */
023:        import it.unimi.dsi.fastutil.ints.IntIterator;
024:        import it.unimi.dsi.fastutil.ints.IntIterators;
025:        import it.unimi.dsi.fastutil.ints.IntSet;
026:        import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;
027:        import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
028:        import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
029:        import it.unimi.dsi.fastutil.objects.ReferenceSet;
030:        import it.unimi.dsi.mg4j.index.AbstractIndexIterator;
031:        import it.unimi.dsi.mg4j.index.AbstractIndexReader;
032:        import it.unimi.dsi.mg4j.index.BitStreamIndex;
033:        import it.unimi.dsi.mg4j.index.Index;
034:        import it.unimi.dsi.mg4j.index.IndexIterator;
035:        import it.unimi.dsi.mg4j.index.CompressionFlags.Coding;
036:        import it.unimi.dsi.mg4j.index.payload.Payload;
037:        import it.unimi.dsi.io.InputBitStream;
038:        import it.unimi.dsi.util.Interval;
039:        import it.unimi.dsi.mg4j.search.IntervalIterator;
040:        import it.unimi.dsi.mg4j.search.IntervalIterators;
041:        import it.unimi.dsi.bits.Fast;
042:        import it.unimi.dsi.Util;
043:        import java.io.IOException;
044:        import java.util.NoSuchElementException;
045:        import org.apache.log4j.Logger;
046:
047:        public class GammaDeltaGammaDeltaBitStreamIndexReader extends
048:                AbstractIndexReader {
049:            @SuppressWarnings("unused")
050:            private static final Logger LOGGER = Util
051:                    .getLogger(GammaDeltaGammaDeltaBitStreamIndexReader.class);
052:            /** The reference index. */
053:            protected final BitStreamIndex index;
054:            private final static boolean ASSERTS = false;
055:            private final static boolean DEBUG = false;
056:            /** The {@link IndexIterator} view of this reader (returned by {@link #documents(CharSequence)}). */
057:            protected final BitStreamIndexReaderIndexIterator indexIterator;
058:
059:            /** Creates a new skip index reader, with the specified underlying {@link Index} and input bit stream.
060:             *
061:             * @param index the index.
062:             * @param ibs the underlying bit stream.
063:             */
064:            public GammaDeltaGammaDeltaBitStreamIndexReader(
065:                    final BitStreamIndex index, final InputBitStream ibs) {
066:                this .index = index;
067:                this .indexIterator = new BitStreamIndexReaderIndexIterator(
068:                        this , ibs);
069:            }
070:
071:            protected static final class BitStreamIndexReaderIndexIterator
072:                    extends AbstractIndexIterator implements  IndexIterator {
073:                /** The enclosing instance. */
074:                private final GammaDeltaGammaDeltaBitStreamIndexReader parent;
075:                /** The reference index. */
076:                protected final BitStreamIndex index;
077:                /** The underlying input bit stream. */
078:                protected final InputBitStream ibs;
079:                /** The enclosed interval iterator. */
080:                private final IndexIntervalIterator intervalIterator;
081:                /** A singleton set containing the enclosed interval iterator. */
082:                private final Reference2ReferenceMap<Index, IntervalIterator> singletonIntervalIterator;
083:                /** The key index. */
084:                private final Index keyIndex;
085:                /** The cached copy of {@link #index index.pointerCoding}. */
086:                protected final Coding pointerCoding;
087:                /** The cached copy of {@link #index index.countCoding}. */
088:                protected final Coding countCoding;
089:                /** The cached copy of {@link #index index.positionCoding}. */
090:                protected final Coding positionCoding;
091:                /** The current term. */
092:                protected int currentTerm = -1;
093:                /** The current frequency. */
094:                protected int frequency;
095:                /** Whether the current terms has pointers at all (this happens when the {@link #frequency} is smaller than the number of documents). */
096:                protected boolean hasPointers;
097:                /** The current count (if this index contains counts). */
098:                protected int count;
099:                /** The last document pointer we read from current list, -1 if we just read the frequency,
100:                 * {@link Integer#MAX_VALUE} if we are beyond the end of list. */
101:                protected int currentDocument;
102:                /** The number of the document record we are going to read inside the current inverted list. */
103:                protected int numberOfDocumentRecord;
104:                /** This variable tracks the current state of the reader. */
105:                protected int state;
106:                /** This value of {@link #state} can be assumed only in indices that contain a payload; it
107:                 * means that we are positioned just before the payload for the current document record. */
108:                private static final int BEFORE_PAYLOAD = 1;
109:                /** This value of {@link #state} can be assumed only in indices that contain counts; it
110:                 * means that we are positioned just before the count for the current document record. */
111:                private static final int BEFORE_COUNT = 2;
112:                /** This value of {@link #state} can be assumed only in indices that contain document positions; 
113:                 * it means that we are positioned just before the position list of the current document record. */
114:                private static final int BEFORE_POSITIONS = 3;
115:                /** This value of {@link #state} means that we are at the start of a new document record, 
116:                 * unless we already read all documents (i.e., {@link #numberOfDocumentRecord} == {@link #frequency}),
117:                 * in which case we are at the end of the inverted list, and {@link #endOfList()} is true. */
118:                private static final int BEFORE_POINTER = 4;
119:                /** The cached position array. */
120:                protected int[] positionCache = new int[16];
121:
122:                public BitStreamIndexReaderIndexIterator(
123:                        final GammaDeltaGammaDeltaBitStreamIndexReader parent,
124:                        final InputBitStream ibs) {
125:                    this .parent = parent;
126:                    this .ibs = ibs;
127:                    index = parent.index;
128:                    keyIndex = index.keyIndex;
129:                    pointerCoding = index.pointerCoding;
130:                    if (index.hasPayloads)
131:                        throw new IllegalStateException();
132:                    if (!index.hasCounts)
133:                        throw new IllegalStateException();
134:                    countCoding = index.countCoding;
135:                    if (!index.hasPositions)
136:                        throw new IllegalStateException();
137:                    positionCoding = index.positionCoding;
138:                    intervalIterator = index.hasPositions ? new IndexIntervalIterator()
139:                            : null;
140:                    singletonIntervalIterator = index.hasPositions ? Reference2ReferenceMaps
141:                            .singleton(keyIndex,
142:                                    (IntervalIterator) intervalIterator)
143:                            : null;
144:                }
145:
146:                /** Positions the index on the inverted list of a given term.
147:                 *
148:                 * <p>This method can be called at any time. Note that it is <em>always</em> possible
149:                 * to call this method with argument 0, even if offsets have not been loaded.
150:                 *
151:                 * @param term a term.
152:                 */
153:                protected void position(final int term) throws IOException {
154:                    if (term == 0) {
155:                        ibs.position(0);
156:                        ibs.readBits(0);
157:                    } else {
158:                        if (index.offsets == null)
159:                            throw new IllegalStateException(
160:                                    "You cannot position an index without offsets");
161:                        final long offset = index.offsets.getLong(term);
162:                        ibs.position(offset);
163:                        // TODO: Can't we set this to 0?
164:                        ibs.readBits(offset);
165:                    }
166:                    currentTerm = term;
167:                    readFrequency();
168:                }
169:
170:                public int termNumber() {
171:                    return currentTerm;
172:                }
173:
174:                protected IndexIterator advance() throws IOException {
175:                    if (currentTerm == index.numberOfTerms - 1)
176:                        return null;
177:                    if (currentTerm != -1) {
178:                        skipTo(Integer.MAX_VALUE);
179:                        nextDocument(); // This guarantees we have no garbage before the frequency
180:                    }
181:                    currentTerm++;
182:                    readFrequency();
183:                    return this ;
184:                }
185:
186:                private void readFrequency() throws IOException {
187:                    // Read the frequency
188:                    frequency = ibs.readGamma() + 1;
189:                    hasPointers = frequency < index.numberOfDocuments;
190:                    count = -1;
191:                    currentDocument = -1;
192:                    numberOfDocumentRecord = -1;
193:                    state = BEFORE_POINTER;
194:                }
195:
196:                public Index index() {
197:                    return keyIndex;
198:                }
199:
200:                public int frequency() {
201:                    return frequency;
202:                }
203:
204:                private void ensureCurrentDocument() {
205:                    if (currentDocument < 0)
206:                        throw new IllegalStateException(
207:                                "nextDocument() has never been called for (term="
208:                                        + currentTerm + ")");
209:                    if (currentDocument == Integer.MAX_VALUE)
210:                        throw new IllegalStateException(
211:                                "This reader is positioned beyond the end of list of (term="
212:                                        + currentTerm + ")");
213:                }
214:
215:                /** Returns whether there are no more document records in the current inverted list.
216:                 * 
217:                 * <p>This method returns true if the last document pointer of the current inverted
218:                 * list has been read. It makes no distinction as to where (inside the last document
219:                 * record) this reader is currently positioned. In particular, this method will
220:                 * return true independently of whether count and positions have been read or not (we
221:                 * note by passing that this is the only sensible behaviour, as you can build indices
222:                 * with or without counts/positions).
223:                 * 
224:                 * <p>This method will return true also when this reader is positioned <em>beyond</em>
225:                 * the last document pointer. In this case, {@link #currentDocumentPointer()} will
226:                 * return {@link Integer#MAX_VALUE}.
227:                 *
228:                 * @return true whether there are no more document records in the current inverted list.
229:                 */
230:                private boolean endOfList() {
231:                    if (ASSERTS)
232:                        assert numberOfDocumentRecord <= frequency;
233:                    return numberOfDocumentRecord >= frequency - 1;
234:                }
235:
236:                public int document() {
237:                    if (ASSERTS)
238:                        ensureCurrentDocument();
239:                    return currentDocument;
240:                }
241:
242:                public Payload payload() throws IOException {
243:                    if (DEBUG)
244:                        System.err.println(this  + ".payload()");
245:                    if (ASSERTS)
246:                        ensureCurrentDocument();
247:                    throw new UnsupportedOperationException("This index ("
248:                            + index + ") does not contain payloads");
249:                }
250:
251:                public int count() throws IOException {
252:                    if (DEBUG)
253:                        System.err.println(this  + ".count()");
254:                    if (count != -1)
255:                        return count;
256:                    if (ASSERTS)
257:                        ensureCurrentDocument();
258:                    {
259:                        state = BEFORE_POSITIONS;
260:                        count = ibs.readGamma() + 1;
261:                    }
262:                    return count;
263:                }
264:
265:                /** We read positions, assuming state <= BEFORE_POSITIONS */
266:                @SuppressWarnings("unused")
267:                protected void updatePositionCache() throws IOException {
268:                    if (ASSERTS)
269:                        assert state <= BEFORE_POSITIONS;
270:                    if (state < BEFORE_POSITIONS) {
271:                        if (state == BEFORE_COUNT) {
272:                            state = BEFORE_POSITIONS;
273:                            count = ibs.readGamma() + 1;
274:                        }
275:                    }
276:                    if (count > positionCache.length)
277:                        positionCache = new int[Math.max(
278:                                positionCache.length * 2, count)];
279:                    final int[] occ = positionCache;
280:                    state = BEFORE_POINTER;
281:                    ibs.readDeltas(occ, count);
282:                    for (int i = 1; i < count; i++)
283:                        occ[i] += occ[i - 1] + 1;
284:                }
285:
286:                public IntIterator positions() throws IOException {
287:                    if (ASSERTS)
288:                        ensureCurrentDocument();
289:                    if (state <= BEFORE_POSITIONS)
290:                        updatePositionCache();
291:                    return IntIterators.wrap(positionCache, 0, count);
292:                }
293:
294:                public int[] positionArray() throws IOException {
295:                    if (ASSERTS)
296:                        ensureCurrentDocument();
297:                    if (state <= BEFORE_POSITIONS)
298:                        updatePositionCache();
299:                    return positionCache;
300:                }
301:
302:                // TODO: check who's using this (positionArray() is actually faster now)
303:                public int positions(final int[] position) throws IOException {
304:                    if (ASSERTS)
305:                        ensureCurrentDocument();
306:                    if (state <= BEFORE_POSITIONS)
307:                        updatePositionCache(); // And also that positions have been read
308:                    if (position.length < count)
309:                        return -count;
310:                    for (int i = count; i-- != 0;)
311:                        position[i] = this .positionCache[i];
312:                    return count;
313:                }
314:
315:                public int nextDocument() throws IOException {
316:                    if (DEBUG)
317:                        System.err.println("{" + this  + "} nextDocument()");
318:                    if (state != BEFORE_POINTER) {
319:                        if (state == BEFORE_COUNT) {
320:                            state = BEFORE_POSITIONS;
321:                            count = ibs.readGamma() + 1;
322:                        }
323:                        if (state == BEFORE_POSITIONS) {
324:                            // Here we just skip; note that the state change is necessary if endOfList() is true
325:                            state = BEFORE_POINTER;
326:                            ibs.skipDeltas(count);
327:                        }
328:                    }
329:                    if (endOfList())
330:                        return -1;
331:                    if (hasPointers) {// We do not write pointers for everywhere occurring terms.
332:                        currentDocument += ibs.readDelta() + 1;
333:                    } else
334:                        currentDocument++;
335:                    numberOfDocumentRecord++;
336:                    state = BEFORE_COUNT;
337:                    count = -1;
338:                    return currentDocument;
339:                }
340:
341:                public int skipTo(final int p) throws IOException {
342:                    if (DEBUG)
343:                        System.err.println(this  + ".skipTo(" + p
344:                                + ") [currentDocument=" + currentDocument
345:                                + ", numberOfDocumentRecord="
346:                                + numberOfDocumentRecord + ", endOfList()="
347:                                + endOfList());
348:                    // If we are just at the start of a list, let us read the first pointer.
349:                    if (numberOfDocumentRecord == -1)
350:                        nextDocument(); // TODO: shouldn't we just read the tower?
351:                    if (currentDocument >= p) {
352:                        if (DEBUG)
353:                            System.err.println(this 
354:                                    + ": No skip necessary, returning "
355:                                    + currentDocument);
356:                        return currentDocument;
357:                    }
358:                    while (currentDocument < p) {
359:                        if (DEBUG)
360:                            System.err
361:                                    .println(this 
362:                                            + ": Skipping sequentially (second), currentDocument="
363:                                            + currentDocument
364:                                            + ", numberOfDocumentRecord="
365:                                            + numberOfDocumentRecord + ", p="
366:                                            + p);
367:                        if (nextDocument() == -1) {
368:                            if (DEBUG)
369:                                System.err.println(this 
370:                                        + ": end-of-list, returning MAX_VALUE");
371:                            return Integer.MAX_VALUE;
372:                        }
373:                    }
374:                    if (DEBUG)
375:                        System.err.println(this  + ".toSkip(): Returning "
376:                                + currentDocument);
377:                    return currentDocument;
378:                }
379:
380:                public void dispose() throws IOException {
381:                    parent.close();
382:                }
383:
384:                public boolean hasNext() {
385:                    return !endOfList();
386:                }
387:
388:                public int nextInt() {
389:                    if (!hasNext())
390:                        throw new NoSuchElementException();
391:                    try {
392:                        return nextDocument();
393:                    } catch (IOException e) {
394:                        throw new RuntimeException(e);
395:                    }
396:                }
397:
398:                public String toString() {
399:                    return index + " [" + currentTerm + "]";
400:                }
401:
402:                /** An interval iterator returning the positions of the current document as singleton intervals. */
403:                private final class IndexIntervalIterator extends
404:                        AbstractObjectIterator<Interval> implements 
405:                        IntervalIterator {
406:                    int pos = -1;
407:
408:                    public void reset() throws IOException {
409:                        pos = -1;
410:                        if (state <= BEFORE_POSITIONS)
411:                            updatePositionCache(); // This guarantees the position cache is ok 
412:                    }
413:
414:                    public void intervalTerms(final IntSet terms) {
415:                        terms
416:                                .add(BitStreamIndexReaderIndexIterator.this .currentTerm);
417:                    }
418:
419:                    public boolean hasNext() {
420:                        return pos < count - 1;
421:                    }
422:
423:                    public Interval next() {
424:                        if (!hasNext())
425:                            throw new NoSuchElementException();
426:                        return Interval.valueOf(positionCache[++pos]);
427:                    }
428:
429:                    public Interval nextInterval() {
430:                        return pos < count - 1 ? Interval
431:                                .valueOf(positionCache[++pos]) : null;
432:                    }
433:
434:                    public int extent() {
435:                        return 1;
436:                    }
437:
438:                    public String toString() {
439:                        return index + ": " + term + "[doc=" + currentDocument
440:                                + ", count=" + count + ", pos=" + pos + "]";
441:                    }
442:                };
443:
444:                public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators()
445:                        throws IOException {
446:                    intervalIterator();
447:                    return singletonIntervalIterator;
448:                }
449:
450:                public IntervalIterator intervalIterator() throws IOException {
451:                    return intervalIterator(keyIndex);
452:                }
453:
454:                public IntervalIterator intervalIterator(final Index index)
455:                        throws IOException {
456:                    if (ASSERTS)
457:                        ensureCurrentDocument();
458:                    // TODO: this was if ( index != keyIndex || hasPayloads ) 
459:                    if (index != keyIndex)
460:                        return IntervalIterators.TRUE;
461:                    if (ASSERTS)
462:                        assert intervalIterator != null;
463:                    intervalIterator.reset();
464:                    return intervalIterator;
465:                }
466:
467:                public ReferenceSet<Index> indices() {
468:                    return index.singletonSet;
469:                }
470:            }
471:
472:            private IndexIterator documents(final CharSequence term,
473:                    final int termNumber) throws IOException {
474:                indexIterator.term(term);
475:                indexIterator.position(termNumber);
476:                return indexIterator;
477:            }
478:
479:            public IndexIterator documents(final int term) throws IOException {
480:                return documents(null, term);
481:            }
482:
483:            public IndexIterator documents(final CharSequence term)
484:                    throws IOException {
485:                if (closed)
486:                    throw new IllegalStateException("This "
487:                            + getClass().getSimpleName() + " has been closed");
488:                if (index.termMap != null) {
489:                    final int termIndex = (int) index.termMap.getLong(term);
490:                    if (termIndex == -1)
491:                        return index.emptyIndexIterator;
492:                    return documents(term, termIndex);
493:                }
494:                throw new UnsupportedOperationException("Index " + index
495:                        + " has no term map");
496:            }
497:
498:            @Override
499:            public IndexIterator nextIterator() throws IOException {
500:                return indexIterator.advance();
501:            }
502:
503:            public String toString() {
504:                return getClass().getSimpleName() + "[" + index + "]";
505:            }
506:
507:            public void close() throws IOException {
508:                super.close();
509:                indexIterator.ibs.close();
510:            }
511:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.