001: package it.unimi.dsi.mg4j.search;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.ints.IntSet;
025: import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;
026: import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
027: import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
028: import it.unimi.dsi.fastutil.objects.ReferenceSet;
029: import it.unimi.dsi.mg4j.index.Index;
030: import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;
031: import it.unimi.dsi.util.Interval;
032: import it.unimi.dsi.util.Intervals;
033:
034: import java.io.IOException;
035:
036: /** A document iterator that computes the Brouwerian difference between two given document iterators.
037: *
038: * <p>In the lattice of interval antichains, the Brouwerian difference is obtained by deleting from
039: * the first operand all intervals that contain some interval of the second operand. Thus,
040: * Brouwerian difference can be fruitfully employed to kill intervals containing a term or, even
041: * more fruitfully, to change at query time the granularity of an index by subtracting from the
042: * results of a query those length-two intervals
043: * that cross the cutpoints between the desired parts of the index.
044: *
045: * <p>Additionally, this class provides <em>interval enlargment</em>—by using a suitable
046: * {@linkplain #getInstance(DocumentIterator, DocumentIterator, int, int) factory method} each interval
047: * returned by the subtrahend will be enlarged to the left and to the right by the given amount (e.g.,
048: * if the left margin is 1 and the right margin is 2 the interval [2..3] will turn into [1..5]).
049: *
050: * @author Sebastiano Vigna
051: * @since 1.2
052: */
053:
054: public class DifferenceDocumentIterator extends
055: AbstractDocumentIterator implements DocumentIterator {
056: private static final boolean DEBUG = false;
057: private final static boolean ASSERTS = false;
058:
059: /** The first operand. */
060: final private DocumentIterator minuendIterator;
061: /** The second operand. */
062: final private DocumentIterator subtrahendIterator;
063: /** If not <code>null</code>, the sole index involved in this iterator. */
064: final private Index soleIndex;
065: /** A map from indices to interval iterators. */
066: final private Reference2ReferenceArrayMap<Index, IntervalIterator> intervalIterators;
067: /** A map from indices to the iterators returned for the current document. The key set may
068: * not contain an index because the related iterator has never been requested. Moreover,
069: * the iterator in this map for a given index may differ from the one in {@link #intervalIterators}
070: * because it could be {@link IntervalIterators#TRUE} (in fact, in that case it may even
071: * happen that {@link #intervalIterators} does not contain the index). */
072: final private Reference2ReferenceArrayMap<Index, IntervalIterator> currentIterators;
073: /** An unmodifiable wrapper around {@link #currentIterators}. */
074: final private Reference2ReferenceMap<Index, IntervalIterator> unmodifiableCurrentIterators;
075:
076: /** A margin that will be added to the left of each interval. */
077: private final int leftMargin;
078: /** A margin that will be added to the right of each interval. */
079: private final int rightMargin;
080:
081: /** Creates a new difference document iterator given a minuend and a subtrahend iterator.
082: * @param minuendIterator the minuend.
083: * @param subtrahendIterator the subtrahend.
084: */
085: protected DifferenceDocumentIterator(
086: final DocumentIterator minuendIterator,
087: final DocumentIterator subtrahendIterator,
088: final int leftMargin, final int rightMargin) {
089: if (leftMargin < 0 || rightMargin < 0)
090: throw new IllegalArgumentException("Illegal margins: "
091: + leftMargin + ", " + rightMargin);
092: this .minuendIterator = minuendIterator;
093: this .subtrahendIterator = subtrahendIterator;
094: this .leftMargin = leftMargin;
095: this .rightMargin = rightMargin;
096: final int n = minuendIterator.indices().size();
097: soleIndex = n == 1 ? indices().iterator().next() : null;
098:
099: intervalIterators = new Reference2ReferenceArrayMap<Index, IntervalIterator>(
100: n);
101: currentIterators = new Reference2ReferenceArrayMap<Index, IntervalIterator>(
102: n);
103: unmodifiableCurrentIterators = Reference2ReferenceMaps
104: .unmodifiable(currentIterators);
105: }
106:
107: /** Returns new difference document iterator given a minuend and a subtrahend iterator.
108: * @param minuendIterator the minuend.
109: * @param subtrahendIterator the subtrahend.
110: */
111: public static DocumentIterator getInstance(
112: final DocumentIterator minuendIterator,
113: final DocumentIterator subtrahendIterator) {
114: return getInstance(minuendIterator, subtrahendIterator, 0, 0);
115: }
116:
117: /** Returns new difference document iterator given a minuend and a subtrahend iterator.
118: * @param minuendIterator the minuend.
119: * @param subtrahendIterator the subtrahend.
120: * @param leftMargin a margin that will be added to the left of each interval.
121: * @param rightMargin a margin that will be added to the right of each interval.
122: */
123: public static DocumentIterator getInstance(
124: final DocumentIterator minuendIterator,
125: final DocumentIterator subtrahendIterator,
126: final int leftMargin, final int rightMargin) {
127: // If the subtrahend is empty, the result is equal to the minuend.
128: if (!subtrahendIterator.hasNext())
129: return minuendIterator;
130: return new DifferenceDocumentIterator(minuendIterator,
131: subtrahendIterator, leftMargin, rightMargin);
132: }
133:
134: public ReferenceSet<Index> indices() {
135: return minuendIterator.indices();
136: }
137:
138: public int nextDocument() throws IOException {
139: if (next >= 0) {
140: last = next;
141: next = -1;
142: return last;
143: }
144:
145: do
146: currentIterators.clear();
147: while ((last = minuendIterator.nextDocument()) != -1
148: && !isValid());
149: return last;
150: }
151:
152: public int skipTo(final int n) throws IOException {
153: // The easy case.
154: if (last >= n)
155: return last;
156:
157: next = -1;
158: currentIterators.clear();
159: // We first try to get a candidate document.
160: last = minuendIterator.skipTo(n);
161: // If this doesn't work, be bail out.
162: if (last == Integer.MAX_VALUE) {
163: last = -1;
164: return Integer.MAX_VALUE;
165: }
166:
167: // Otherwise, we must manually check that we are on a valid document
168: if (isValid())
169: return last;
170: // If not, we invalidate and check whether there is another possible document.
171: return nextDocument() != -1 ? last : Integer.MAX_VALUE;
172: }
173:
174: private boolean isValid() throws IOException {
175: // An easy optimisation for the case in which the subtrahend does not include the current document.
176: if (subtrahendIterator.skipTo(last) != last)
177: return true;
178:
179: /* The policy here is that a difference iterator is valid is at least one of the underlying
180: * interval iterators would return at least one interval. */
181: if (soleIndex != null)
182: return intervalIterator(soleIndex).hasNext();
183:
184: for (Index index : indices())
185: if (intervalIterator(index).hasNext())
186: return true;
187: return false;
188: }
189:
190: public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators()
191: throws IOException {
192: if (last == -1)
193: throw new IllegalStateException();
194: for (Index index : indices())
195: intervalIterator(index);
196: return unmodifiableCurrentIterators;
197: }
198:
199: public IntervalIterator intervalIterator() throws IOException {
200: if (soleIndex == null)
201: throw new IllegalStateException();
202: return intervalIterator(soleIndex);
203: }
204:
205: public IntervalIterator intervalIterator(final Index index)
206: throws IOException {
207: if (last == -1)
208: throw new IllegalStateException();
209: if (DEBUG)
210: System.err.println(this + ".intervalIterator(" + index
211: + ")");
212: if (!minuendIterator.indices().contains(index))
213: return IntervalIterators.TRUE;
214:
215: IntervalIterator intervalIterator, subtrahendIntervalIterator;
216:
217: // If the iterator has been created and it's ready, we just return it.
218: if ((intervalIterator = currentIterators.get(index)) != null)
219: return intervalIterator;
220: intervalIterator = minuendIterator.intervalIterator(index);
221:
222: if (subtrahendIterator.document() == minuendIterator.document()
223: && intervalIterator.hasNext()
224: && (subtrahendIntervalIterator = subtrahendIterator
225: .intervalIterator(index)).hasNext()) {
226: if (subtrahendIntervalIterator == IntervalIterators.TRUE)
227: intervalIterator = IntervalIterators.FALSE;
228: else if (intervalIterator != IntervalIterators.TRUE) {
229: intervalIterator = intervalIterators.get(index);
230: if (intervalIterator == null)
231: intervalIterators
232: .put(
233: index,
234: intervalIterator = new DifferenceIntervalIterator(
235: index));
236: intervalIterator.reset();
237: }
238: }
239:
240: currentIterators.put(index, intervalIterator);
241:
242: if (DEBUG)
243: System.err.println("Returning interval iterator "
244: + intervalIterator);
245: return intervalIterator;
246: }
247:
248: public void dispose() throws IOException {
249: minuendIterator.dispose();
250: subtrahendIterator.dispose();
251: }
252:
253: public boolean accept(final DocumentIteratorVisitor visitor)
254: throws IOException {
255: return visitor.visitPre(this )
256: && minuendIterator.accept(visitor)
257: && subtrahendIterator.accept(visitor)
258: && visitor.visitPost(this );
259: }
260:
261: public boolean acceptOnTruePaths(
262: final DocumentIteratorVisitor visitor) throws IOException {
263: return visitor.visitPre(this )
264: && minuendIterator.acceptOnTruePaths(visitor)
265: && visitor.visitPost(this );
266: }
267:
268: public String toString() {
269: return getClass().getSimpleName()
270: + "("
271: + minuendIterator
272: + (leftMargin == 0 && rightMargin == 0 ? " - " : " -["
273: + leftMargin + "," + rightMargin + "] ")
274: + subtrahendIterator + ")";
275: }
276:
277: /** An interval iterator returning just the interval shorter than {@link #threshold}. */
278:
279: private class DifferenceIntervalIterator extends
280: AbstractIntervalIterator implements IntervalIterator {
281: /** The index of this iterator. */
282: final Index index;
283: /** The underlying minuend interal iterator. */
284: private IntervalIterator minuendIntervalIterator;
285: /** The underlying minuend interal iterator. */
286: private IntervalIterator subtrahendIntervalIterator;
287: /** The last interval returned by {@link #subtrahendIntervalIterator}. */
288: private Interval subtrahendInterval;
289:
290: public DifferenceIntervalIterator(final Index index) {
291: this .index = index;
292: }
293:
294: public void reset() throws IOException {
295: next = null;
296: subtrahendInterval = Intervals.MINUS_INFINITY;
297: minuendIntervalIterator = minuendIterator
298: .intervalIterator(index);
299: subtrahendIntervalIterator = subtrahendIterator
300: .intervalIterator(index);
301: if (ASSERTS)
302: assert minuendIntervalIterator != IntervalIterators.TRUE;
303: if (ASSERTS)
304: assert minuendIntervalIterator != IntervalIterators.FALSE;
305: if (ASSERTS)
306: assert minuendIntervalIterator.hasNext();
307: if (ASSERTS)
308: assert subtrahendIntervalIterator != IntervalIterators.TRUE;
309: if (ASSERTS)
310: assert subtrahendIntervalIterator != IntervalIterators.FALSE;
311: if (ASSERTS)
312: assert subtrahendIntervalIterator.hasNext();
313: }
314:
315: public void intervalTerms(final IntSet terms) {
316: // Just delegate to minuend
317: minuendIntervalIterator.intervalTerms(terms);
318: }
319:
320: public Interval nextInterval() throws IOException {
321: if (next != null) {
322: final Interval result = next;
323: next = null;
324: return result;
325: }
326:
327: if (subtrahendInterval == Intervals.MINUS_INFINITY)
328: subtrahendInterval = subtrahendIntervalIterator
329: .nextInterval();
330:
331: Interval minuendInterval;
332: while ((minuendInterval = minuendIntervalIterator
333: .nextInterval()) != null) {
334: while (subtrahendInterval != null
335: && subtrahendInterval.left - leftMargin < minuendInterval.left
336: && subtrahendInterval.right + rightMargin < minuendInterval.right)
337: subtrahendInterval = subtrahendIntervalIterator
338: .nextInterval();
339: if (subtrahendInterval == null
340: || subtrahendInterval.left - leftMargin < minuendInterval.left
341: || subtrahendInterval.right + rightMargin > minuendInterval.right)
342: return minuendInterval;
343: }
344:
345: return null;
346: }
347:
348: public int extent() {
349: return minuendIntervalIterator.extent(); // TODO: check this
350: }
351:
352: public String toString() {
353: return getClass().getSimpleName()
354: + "("
355: + minuendIntervalIterator
356: + (leftMargin == 0 && rightMargin == 0 ? " - "
357: : " -[" + leftMargin + "," + rightMargin
358: + "] ")
359: + subtrahendIntervalIterator + ")";
360: }
361: }
362: }
|