001: package it.unimi.dsi.mg4j.search;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2003-2007 Paolo Boldi and Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;
025: import it.unimi.dsi.fastutil.ints.IntSet;
026: import it.unimi.dsi.fastutil.objects.ObjectHeapSemiIndirectPriorityQueue;
027: import it.unimi.dsi.mg4j.index.Index;
028: import it.unimi.dsi.mg4j.index.IndexIterator;
029: import it.unimi.dsi.util.Interval;
030: import it.unimi.dsi.util.Intervals;
031:
032: import java.io.IOException;
033:
034: /** An iterator on documents that returns the OR of a number of document iterators.
035: *
036: * <P>This class adds to {@link it.unimi.dsi.mg4j.search.AbstractUnionDocumentIterator}
037: * an interval iterator generating the OR of the intervals returned for each of the documents involved.
038: */
039:
040: public class OrDocumentIterator extends AbstractUnionDocumentIterator
041: implements DocumentIterator {
042: @SuppressWarnings("unused")
043: private final static boolean DEBUG = false;
044:
045: /** Returns a document iterator that computes the OR of the given array of iterators.
046: *
047: * <P>Note that the special case of the empty and of the singleton arrays
048: * are handled efficiently.
049: *
050: * @param documentIterator the iterators to be joined.
051: * @return a document iterator that computes the OR of <code>it</code>.
052: * @throws IOException
053: */
054: public static DocumentIterator getInstance(
055: final DocumentIterator... documentIterator)
056: throws IOException {
057: if (documentIterator.length == 0)
058: return DocumentIterators.EMPTY_ITERATOR;
059: if (documentIterator.length == 1)
060: return documentIterator[0];
061: return new OrDocumentIterator(documentIterator);
062: }
063:
064: /** Creates a new document iterator that computes the OR of the given array of iterators.
065: * @param documentIterator the iterators to be joined.
066: * @throws IOException
067: */
068: protected OrDocumentIterator(
069: final DocumentIterator... documentIterator)
070: throws IOException {
071: super (documentIterator);
072: }
073:
074: protected IntervalIterator getComposedIntervalIterator(
075: final Index index) {
076: return indexIterator == null ? new OrIntervalIterator(index)
077: : new OrIndexIntervalIterator(index);
078: }
079:
080: /** An interval iterator of nondecreasing disjoint intervals
081: * obtained ORing the output of a set of iterators with
082: * the same property.
083: */
084:
085: private class OrIntervalIterator extends
086: AbstractCompositeIntervalIterator {
087: /** The index of this iterator. */
088: final Index index;
089: /** A heap-based indirect priority queue used to keep track of the currently scanned intervals. */
090: private ObjectHeapSemiIndirectPriorityQueue<Interval> intervalQueue;
091: /** The left extreme of the last returned interval, or {@link Integer#MIN_VALUE} after a {@link #reset()}. */
092: private int lastLeft;
093: /** An array to hold the front of the interval queue. */
094: private final int[] intervalFront;
095:
096: /** Creates a new OR interval iterator.
097: *
098: * @param index the index of the iterator.
099: */
100: public OrIntervalIterator(final Index index) {
101: super (n);
102: // We just set up some internal data, but we perform no initialisation.
103: this .index = index;
104: intervalQueue = new ObjectHeapSemiIndirectPriorityQueue<Interval>(
105: curr, Intervals.ENDS_BEFORE_OR_IS_SUFFIX);
106: intervalFront = new int[n];
107: }
108:
109: public void reset() throws IOException {
110: lastLeft = Integer.MIN_VALUE;
111: next = null;
112: intervalQueue.clear();
113:
114: for (int i = computeFront(), k; i-- != 0;) {
115: k = front[i];
116: intervalIterator[k] = documentIterator[k]
117: .intervalIterator(index);
118: if (intervalIterator[k] != IntervalIterators.TRUE
119: && (curr[k] = intervalIterator[k]
120: .nextInterval()) != null)
121: intervalQueue.enqueue(k);
122: }
123: }
124:
125: public void intervalTerms(final IntSet terms) {
126: final int frontSize = intervalQueue.front(intervalFront);
127: final int[] intervalFront = this .intervalFront;
128: for (int i = frontSize; i-- != 0;)
129: intervalIterator[intervalFront[i]].intervalTerms(terms);
130: }
131:
132: public Interval nextInterval() throws IOException {
133: if (next != null) {
134: final Interval result = next;
135: next = null;
136: return result;
137: }
138:
139: if (intervalQueue.isEmpty())
140: return null;
141:
142: int first;
143:
144: while (curr[first = intervalQueue.first()].left <= lastLeft) {
145: if ((curr[first] = intervalIterator[first]
146: .nextInterval()) != null)
147: intervalQueue.changed();
148: else {
149: intervalQueue.dequeue();
150: if (intervalQueue.isEmpty())
151: return null;
152: }
153: }
154:
155: lastLeft = curr[first].left;
156: return curr[first];
157: }
158:
159: public int extent() {
160: int e, s;
161:
162: s = Integer.MAX_VALUE;
163: for (int i = computeFront(), k; i-- != 0;) {
164: k = front[i];
165: e = curr[k] != null ? intervalIterator[k].extent()
166: : Integer.MAX_VALUE;
167: if (e < s)
168: s = e;
169: }
170: return s;
171: }
172: }
173:
174: /** An optimised interval iterator with the same semantics as that implemented
175: * by {@link OrDocumentIterator}, but using just {@link IndexIterator#positionArray()}.
176: */
177: protected class OrIndexIntervalIterator extends
178: AbstractCompositeIndexIntervalIterator implements
179: IntervalIterator {
180: @SuppressWarnings({"unused","hiding"})
181: private final static boolean DEBUG = false;
182: @SuppressWarnings("hiding")
183: private final static boolean ASSERTS = false;
184:
185: /** The index of this iterator. */
186: final Index index;
187: /** A heap-based semi-indirect priority queue used to keep track of the currently scanned positions. */
188: private final IntHeapSemiIndirectPriorityQueue positionQueue;
189: /** An array to hold the front of the position queue. */
190: private final int[] positionFront;
191:
192: public OrIndexIntervalIterator(final Index index) {
193: super (n);
194: this .index = index;
195: positionQueue = new IntHeapSemiIndirectPriorityQueue(curr);
196: positionFront = new int[n];
197: }
198:
199: public void reset() throws IOException {
200: positionQueue.clear();
201:
202: for (int i = computeFront(), k; i-- != 0;) {
203: k = front[i];
204: if (indexIterator[k].index() == index) {
205: position[k] = indexIterator[k].positionArray();
206: count[k] = indexIterator[k].count();
207: curr[k] = position[k][currPos[k] = 0];
208: positionQueue.enqueue(k);
209: }
210: }
211:
212: if (ASSERTS)
213: assert !positionQueue.isEmpty();
214: next = Interval.valueOf(curr[positionQueue.first()]);
215: }
216:
217: public void intervalTerms(final IntSet terms) {
218: final int frontSize = positionQueue.front(positionFront);
219: final int[] positionFront = this .positionFront;
220: for (int i = frontSize; i-- != 0;)
221: terms.add(indexIterator[positionFront[i]].termNumber());
222: }
223:
224: public Interval nextInterval() {
225: if (next != null) {
226: final Interval result = next;
227: next = null;
228: return result;
229: }
230:
231: if (positionQueue.isEmpty())
232: return null;
233:
234: int first = positionQueue.first();
235: final int previous = curr[first];
236:
237: do
238: if (++currPos[first] == count[first]) {
239: positionQueue.dequeue();
240: if (positionQueue.isEmpty())
241: return null;
242: } else {
243: curr[first] = position[first][currPos[first]];
244: positionQueue.changed();
245: }
246: while (curr[first = positionQueue.first()] == previous);
247:
248: return Interval.valueOf(curr[first]);
249: }
250:
251: public int extent() {
252: return 1;
253: }
254: }
255: }
|