001: package it.unimi.dsi.mg4j.search;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2005-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITfNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.ints.AbstractIntIterator;
025: import it.unimi.dsi.fastutil.ints.IntSet;
026: import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;
027: import it.unimi.dsi.fastutil.objects.ObjectArrayList;
028: import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;
029: import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
030: import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
031: import it.unimi.dsi.fastutil.objects.ReferenceSet;
032: import it.unimi.dsi.mg4j.index.Index;
033: import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;
034: import it.unimi.dsi.util.Interval;
035:
036: import java.io.IOException;
037: import java.util.NoSuchElementException;
038:
039: /** A decorator that caches the intervals produced by the underlying document iterator.
040: *
041: * <P>Often, scores exhaust the intervals produced by a document iterator to compute their
042: * result. However, often you also need those intervals for other purposes (maybe just
043: * because you are aggregating several interval-based scorers). Decorating a document
044: * iterator with an instance of this class you get again a document iterator, but its intervals can
045: * be retrieved several times by calling {@link #intervalIterator(Index)}, {@link #intervalIterator()} and
046: * {@link #intervalIterators()}.
047: *
048: * <strong>Important</strong>: calls are <em>not nestable</em>: when you require again an iterator,
049: * the one previously returned is no longer valid, and when the current document changes (e.g.,
050: * because of a call to {@link #nextDocument()}) the previously returned interval iterators are invalidated.
051: *
052: * @author Sebastiano Vigna
053: * @since 0.9.1
054: */
055: public class CachingDocumentIterator extends AbstractIntIterator
056: implements DocumentIterator {
057:
058: /** The underlying document iterator. */
059: private final DocumentIterator documentIterator;
060:
061: /** If not <code>null</code>, the sole index involved in this iterator. */
062: final private Index soleIndex;
063:
064: /** A map from indices to caching iterators. We reuse iterators, and we create
065: * them on demand, so we must keep track of them separately from the set of current
066: * iterators (which will be returned to the user). */
067: final private Reference2ReferenceArrayMap<Index, CachingIntervalIterator> cachingIterators;
068:
069: /** A map from indices to the iterators already returned for the current document. The key set may
070: * not contain an index because the related iterator has never been requested. */
071: final private Reference2ReferenceArrayMap<Index, IntervalIterator> currentIterators;
072:
073: /** An unmodifiable wrapper around {@link #currentIterators}. */
074: final private Reference2ReferenceMap<Index, IntervalIterator> unmodifiableCurrentIterators;
075:
076: public CachingDocumentIterator(
077: final DocumentIterator documentIterator) {
078: this .documentIterator = documentIterator;
079: final int n = documentIterator.indices().size();
080: soleIndex = n == 1 ? indices().iterator().next() : null;
081:
082: this .currentIterators = new Reference2ReferenceArrayMap<Index, IntervalIterator>(
083: new Index[n], new IntervalIterator[n]);
084: this .cachingIterators = new Reference2ReferenceArrayMap<Index, CachingIntervalIterator>(
085: new Index[n], new CachingIntervalIterator[n]);
086: this .unmodifiableCurrentIterators = Reference2ReferenceMaps
087: .unmodifiable(currentIterators);
088: }
089:
090: public int document() {
091: return documentIterator.document();
092: }
093:
094: public boolean hasNext() {
095: return documentIterator.hasNext();
096: }
097:
098: public ReferenceSet<Index> indices() {
099: return documentIterator.indices();
100: }
101:
102: /** An instance of this class caches lazily the intervals returning by the underlying interval iterator, and can
103: * return them from the start after a call to {@link #restart()}. */
104:
105: private final static class CachingIntervalIterator extends
106: AbstractObjectIterator<Interval> implements
107: IntervalIterator {
108: /** The list of cached intervals (must be emitted before the ones returned by the iterator). */
109: private final ObjectArrayList<Interval> cachedIntervals = new ObjectArrayList<Interval>();
110:
111: /** The actual iterator (might be partially exhausted). */
112: private IntervalIterator intervalIterator;
113:
114: /** The current position in the cached intervals array. */
115: private int pos = 0;
116:
117: /** Sets the underlying iterator.
118: *
119: * <P>Most MG4J classes reuse interval iterators, but in principle a document
120: * iterator might return a different interval iterator at each call. This
121: * method is used to set it.
122: *
123: * @param intervalIterator the new underlying iterator.
124: */
125: public void wrap(final IntervalIterator intervalIterator) {
126: this .intervalIterator = intervalIterator;
127: }
128:
129: public int extent() {
130: return intervalIterator.extent();
131: }
132:
133: public boolean hasNext() {
134: return pos < cachedIntervals.size()
135: || intervalIterator.hasNext();
136: }
137:
138: @Deprecated
139: public Interval next() {
140: if (!hasNext())
141: throw new NoSuchElementException();
142:
143: if (pos < cachedIntervals.size())
144: return cachedIntervals.get(pos++);
145: else {
146: final Interval next = intervalIterator.next();
147: cachedIntervals.add(next);
148: pos++;
149: return next;
150: }
151: }
152:
153: public Interval nextInterval() throws IOException {
154: if (pos < cachedIntervals.size())
155: return cachedIntervals.get(pos++);
156: else {
157: final Interval next = intervalIterator.nextInterval();
158: if (next == null)
159: return null;
160: cachedIntervals.add(next);
161: pos++;
162: return next;
163: }
164: }
165:
166: public void reset() {
167: cachedIntervals.clear();
168: restart();
169: }
170:
171: public void restart() {
172: pos = 0;
173: }
174:
175: public void intervalTerms(IntSet terms) {
176: // ALERT: implement this
177: throw new UnsupportedOperationException(
178: "This should be implemented!");
179: }
180:
181: };
182:
183: public IntervalIterator intervalIterator(final Index index)
184: throws IOException {
185: if (!documentIterator.indices().contains(index))
186: return IntervalIterators.TRUE;
187: CachingIntervalIterator result = cachingIterators.get(index);
188:
189: if (currentIterators.containsKey(index)) {
190: // the interval iterator for index is result
191: result.restart();
192: return result;
193: }
194:
195: final IntervalIterator intervalIterator = documentIterator
196: .intervalIterator(index);
197: if (intervalIterator == IntervalIterators.TRUE
198: || !intervalIterator.hasNext())
199: return intervalIterator;
200:
201: // We instantiate caching iterators on demand
202: if (result == null)
203: cachingIterators.put(index,
204: result = new CachingIntervalIterator());
205: result.wrap(intervalIterator);
206: result.reset();
207: currentIterators.put(index, result);
208:
209: return result;
210: }
211:
212: public IntervalIterator intervalIterator() throws IOException {
213: if (soleIndex == null)
214: throw new IllegalStateException();
215: return intervalIterator(soleIndex);
216: }
217:
218: public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators()
219: throws IOException {
220: for (Index i : indices())
221: intervalIterator(i);
222: return unmodifiableCurrentIterators;
223: }
224:
225: public int nextDocument() throws IOException {
226: currentIterators.clear();
227: return documentIterator.nextDocument();
228: }
229:
230: public int nextInt() {
231: currentIterators.clear();
232: return nextInt();
233: }
234:
235: public int skipTo(final int n) throws IOException {
236: currentIterators.clear();
237: return documentIterator.skipTo(n);
238: }
239:
240: public void dispose() throws IOException {
241: documentIterator.dispose();
242: }
243:
244: public boolean accept(DocumentIteratorVisitor visitor)
245: throws IOException {
246: return documentIterator.accept(visitor);
247: }
248:
249: public boolean acceptOnTruePaths(DocumentIteratorVisitor visitor)
250: throws IOException {
251: return documentIterator.acceptOnTruePaths(visitor);
252: }
253:
254: public IntervalIterator iterator() {
255: try {
256: return intervalIterator();
257: } catch (IOException e) {
258: throw new RuntimeException(e);
259: }
260: }
261: }
|