001: package it.unimi.dsi.mg4j.search;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2003-2007 Paolo Boldi and Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.ints.IntArrays;
025: import it.unimi.dsi.fastutil.ints.IntSet;
026: import it.unimi.dsi.mg4j.index.Index;
027: import it.unimi.dsi.util.Interval;
028: import it.unimi.dsi.util.Intervals;
029:
030: import java.io.IOException;
031:
032: /** An iterator returning documents containing nonoverlapping intervals in query order
033: * satisfying the underlying queries.
034: *
035: * <p>In practice, this iterator implements <em>strictly ordered AND</em>, which is
036: * satisfied when the subqueries are satisfied by nonoverlapping intervals in query order.
037: */
038:
039: public class OrderedAndDocumentIterator extends
040: AbstractOrderedIntervalDocumentIterator {
041:
042: @SuppressWarnings("hiding")
043: private final static boolean ASSERTS = false;
044:
045: /** Returns a document iterator that computes the ordered AND of the given array of iterators.
046: *
047: * <P>Note that the special case of the empty and of the singleton arrays
048: * are handled efficiently.
049: *
050: * @param numberOfDocuments the number of documents; relevant only if <code>it</code> has zero length.
051: * @param documentIterator the iterators to be joined.
052: * @return a document iterator that computes the ordered AND of <code>it</code>.
053: * @throws IOException
054: */
055: public static DocumentIterator getInstance(
056: final int numberOfDocuments,
057: final DocumentIterator... documentIterator)
058: throws IOException {
059: if (documentIterator.length == 0)
060: return NotDocumentIterator
061: .getInstance(DocumentIterators.EMPTY_ITERATOR,
062: numberOfDocuments);
063: if (documentIterator.length == 1)
064: return documentIterator[0];
065: return new OrderedAndDocumentIterator(documentIterator);
066: }
067:
068: /** Returns a document iterator that computes the ordered AND of the given nonzero-length array of iterators.
069: *
070: * <P>Note that the special case of the singleton array is handled efficiently.
071: *
072: * @param documentIterator the iterators to be joined (at least one).
073: * @return a document iterator that computes the ordered AND of <code>it</code>.
074: * @throws IOException
075: */
076: public static DocumentIterator getInstance(
077: final DocumentIterator... documentIterator)
078: throws IOException {
079: if (documentIterator.length == 0)
080: throw new IllegalArgumentException();
081: return getInstance(-1, documentIterator);
082: }
083:
084: protected OrderedAndDocumentIterator(
085: final DocumentIterator[] documentIterator)
086: throws IOException {
087: super (documentIterator);
088: }
089:
090: protected IntervalIterator getComposedIntervalIterator(
091: final Index unused) {
092: if (ASSERTS)
093: assert unused == soleIndex;
094: return indexIterator == null ? new OrderedAndIntervalIterator()
095: : new OrderedAndIndexIntervalIterator();
096: }
097:
098: /** An interval iterator returning the ordered AND of the component iterators
099: * (i.e., intervals made of sequences of intervals
100: * of the component iterator, in the given order).
101: *
102: * <p>In this implementation, {@link #advanced} can be true
103: * even when {@link AbstractOrderedIntervalIterator#endOfProcess} is true, as a candidate
104: * can be ready to be returned even if the do-while loop in {@link #hasNext()} has
105: * set {@link AbstractOrderedIntervalIterator#endOfProcess}.
106: */
107:
108: private class OrderedAndIntervalIterator extends
109: AbstractOrderedIntervalIterator {
110: @SuppressWarnings("hiding")
111: private final static boolean DEBUG = false;
112: /** Whether the scan is over. */
113: private boolean endOfProcess;
114: /** The index of the next list to be aligned (from 0 to {@link #m}). */
115: private int toBeAligned;
116: /** The number of non-{@link IntervalIterators#TRUE} interval iterator. Only
117: * elements with index smaller than this value are valid in {@link AbstractCompositeIntervalIterator#intervalIterator}. */
118: private int m;
119:
120: /** Loads {@link #curr} with the first interval from each non-{@link IntervalIterators#TRUE} iterator, leaving
121: * in {@link #m} the number of non-{@link IntervalIterators#TRUE} iterators.
122: */
123:
124: public void reset() throws IOException {
125: m = 0;
126: next = null;
127: toBeAligned = 1;
128: endOfProcess = false;
129:
130: for (int i = 0; i < n; i++) {
131: intervalIterator[m] = documentIterator[i]
132: .intervalIterator();
133: if (intervalIterator[m] != IntervalIterators.TRUE) {
134: if (ASSERTS)
135: assert intervalIterator[m].hasNext();
136: curr[m++] = Intervals.MINUS_INFINITY;
137: }
138: }
139:
140: if (m == 0)
141: throw new IllegalStateException();
142: endOfProcess = (curr[0] = intervalIterator[0]
143: .nextInterval()) == null;
144: }
145:
146: public void intervalTerms(final IntSet terms) {
147: for (int i = n; i-- != 0;)
148: intervalIterator[i].intervalTerms(terms);
149: }
150:
151: public Interval nextInterval() throws IOException {
152: if (next != null) {
153: final Interval result = next;
154: next = null;
155: return result;
156: }
157:
158: if (endOfProcess)
159: return null;
160:
161: final Interval[] curr = this .curr;
162: final IntervalIterator[] intervalIterator = this .intervalIterator;
163: final int m = this .m;
164: // We have to decrease leftOfLast to avoid overflows. Do not test it against Integer.MAX_VALUE.
165: int nextLeft = Integer.MAX_VALUE, nextRight = Integer.MAX_VALUE, leftOfLast = Integer.MAX_VALUE - 1;
166:
167: int i = toBeAligned;
168:
169: for (;;) {
170: if (DEBUG)
171: System.err.println("Current candidate: "
172: + Interval.valueOf(nextLeft, nextRight));
173:
174: for (;;) {
175:
176: if (curr[i - 1].right >= leftOfLast - (m - i - 1)) {
177: // If we're here the last interval we obtained is aligned, but it cannot completed to an alignment smaller than [nextLeft..nextRight]
178: toBeAligned = i;
179: if (ASSERTS)
180: assert nextLeft != Integer.MAX_VALUE;
181: return Interval.valueOf(nextLeft, nextRight);
182: }
183:
184: if (i == m || curr[i].left > curr[i - 1].right)
185: break;
186:
187: do {
188: if (curr[i].right >= leftOfLast - (m - i - 2)
189: || (curr[i] = intervalIterator[i]
190: .nextInterval()) == null) {
191: toBeAligned = i;
192: endOfProcess = curr[i] == null;
193: return nextLeft == Integer.MAX_VALUE ? null
194: : Interval.valueOf(nextLeft,
195: nextRight);
196: }
197: } while (curr[i].left <= curr[i - 1].right);
198:
199: i++;
200: }
201:
202: nextLeft = curr[0].left;
203: nextRight = curr[m - 1].right;
204: leftOfLast = curr[m - 1].left;
205: i = 1;
206:
207: if ((curr[0] = intervalIterator[0].nextInterval()) == null) {
208: endOfProcess = true;
209: toBeAligned = 1;
210: return Interval.valueOf(nextLeft, nextRight);
211: }
212: }
213: }
214:
215: public int extent() {
216: int s = 0;
217: for (int i = m; i-- != 0;)
218: s += intervalIterator[i].extent();
219: return s;
220: }
221: }
222:
223: /** An interval iterator returning the BLOCK of the component iterator
224: * (i.e., intervals made of sequences of consecutive intervals
225: * of the component iterator, in the given order).
226: *
227: * <p>In this implementation, {@link #advanced} is
228: * never true when {@link AbstractOrderedIntervalIterator#endOfProcess} is true.
229: */
230:
231: private class OrderedAndIndexIntervalIterator extends
232: AbstractOrderedIndexIntervalIterator {
233: /** Whether the scan is over. */
234: private boolean endOfProcess;
235: /** The index of the next list to be aligned. */
236: private int toBeAligned;
237:
238: public void reset() throws IOException {
239: final int[][] position = this .position;
240: final int[] curr = this .curr;
241: final int[] count = this .count;
242:
243: IntArrays.fill(currPos, -1);
244: for (int i = n; i-- != 0;) {
245: count[i] = indexIterator[i].count();
246: position[i] = indexIterator[i].positionArray();
247: curr[i] = Integer.MIN_VALUE;
248: }
249: next = null;
250: toBeAligned = 1;
251: endOfProcess = false;
252: curr[0] = position[0][currPos[0] = 0];
253: }
254:
255: public void intervalTerms(final IntSet terms) {
256: for (int i = n; i-- != 0;)
257: terms.add(indexIterator[i].termNumber());
258: }
259:
260: public Interval nextInterval() {
261: if (next != null) {
262: final Interval result = next;
263: next = null;
264: return result;
265: }
266:
267: if (endOfProcess)
268: return null;
269:
270: // We have to decrease nextRight to avoid overflows. Do not test it against Integer.MAX_VALUE.
271: int nextLeft = Integer.MAX_VALUE, nextRight = Integer.MAX_VALUE - 1;
272: final int[][] position = this .position;
273: final int[] currPos = this .currPos;
274: final int[] count = this .count;
275: final int[] curr = this .curr;
276: final int n = OrderedAndDocumentIterator.this .n;
277:
278: int i = toBeAligned;
279:
280: for (;;) {
281: if (DEBUG)
282: System.err.println("Current candidate: "
283: + Interval.valueOf(nextLeft, nextRight));
284: for (;;) {
285: if (curr[i - 1] >= nextRight - (n - i - 1)) {
286: // If we're here the last position we obtained is aligned, but it cannot completed to an alignment smaller than [nextLeft..nextRight]
287: toBeAligned = i;
288: if (ASSERTS)
289: assert nextLeft != Integer.MAX_VALUE;
290: return Interval.valueOf(nextLeft, nextRight);
291: }
292:
293: // Note that in this particular case we must check that this is not the first iteration of the external loop
294: if (i == n || curr[i] > curr[i - 1])
295: break;
296:
297: do {
298: // For singletons, curr[ i ] >= nextRight - ( n - i - 2 ) is always false here.
299: if (ASSERTS)
300: assert curr[i] < nextRight - (n - i - 2);
301: if (++currPos[i] == count[i]) {
302: endOfProcess = true;
303: return nextLeft == Integer.MAX_VALUE ? null
304: : Interval.valueOf(nextLeft,
305: nextRight);
306: } else
307: curr[i] = position[i][currPos[i]];
308: } while (curr[i] <= curr[i - 1]);
309:
310: i++;
311: }
312:
313: nextLeft = curr[0];
314: nextRight = curr[n - 1];
315: i = 1;
316:
317: if (++currPos[0] == count[0]) {
318: endOfProcess = true;
319: return Interval.valueOf(nextLeft, nextRight);
320: }
321:
322: curr[0] = position[0][currPos[0]];
323: }
324: }
325: }
326: }
|