001: package it.unimi.dsi.mg4j.search;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2006-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.ints.IntArrayList;
025: import it.unimi.dsi.fastutil.objects.ReferenceArraySet;
026: import it.unimi.dsi.fastutil.objects.ReferenceSet;
027: import it.unimi.dsi.lang.MutableString;
028: import it.unimi.dsi.mg4j.index.Index;
029: import it.unimi.dsi.mg4j.index.IndexIterator;
030: import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;
031: import it.unimi.dsi.util.Interval;
032:
033: import java.io.IOException;
034:
035: /** An abstract iterator on documents, based on a list of component iterators.
036: *
037: * <p>The {@linkplain #AbstractCompositeDocumentIterator(DocumentIterator[]) constructor} caches
038: * into {@link #documentIterator} the component iterators, and sets up a number of protected
039: * fields that can be useful to implementors. It also provide abstract member classes that make it
040: * easier to implement interval iterators.
041: *
042: * <p>Note that this class implementa both {@link #accept(DocumentIteratorVisitor)}
043: * and {@link #acceptOnTruePaths(DocumentIteratorVisitor)} with a series of recursive
044: * calls on <em>all</em> component iterator. If you desire a different behaviour
045: * for {@link #acceptOnTruePaths(DocumentIteratorVisitor)} (see, e.g.,
046: * {@link it.unimi.dsi.mg4j.search.OrDocumentIterator}, please override it.
047: */
048:
049: public abstract class AbstractCompositeDocumentIterator extends
050: AbstractDocumentIterator implements DocumentIterator {
051: /** The component document iterators. */
052: protected final DocumentIterator[] documentIterator;
053: /** The number of component iterators. */
054: protected final int n;
055: /** A cached copy of {@link #documentIterator}, if all
056: * underlying iterators are {@linkplain IndexIterator index iterators}; <code>null</code>, otherwise. */
057: protected final IndexIterator[] indexIterator;
058: /** The set of indices involved in this iterator. */
059: protected final ReferenceArraySet<Index> indices = new ReferenceArraySet<Index>();
060: /** If not <code>null</code>, the sole index involved in this iterator. */
061: protected final Index soleIndex;
062:
063: /** Creates a new composite document iterator using a given list of component document iterators.
064: *
065: * @param documentIterator the component iterators.
066: */
067: public AbstractCompositeDocumentIterator(
068: final DocumentIterator... documentIterator) {
069: this .documentIterator = documentIterator;
070: this .n = documentIterator.length;
071:
072: /* Now, for each index involved we build a corresponding interval iterator.
073: * Note that the set indices() may contain indices from empty subqueries, too. */
074: for (int i = n; i-- != 0;)
075: indices.addAll(documentIterator[i].indices());
076: soleIndex = indices.size() == 1 ? indices.iterator().next()
077: : null;
078:
079: int i = n;
080: while (i-- != 0)
081: if (!(documentIterator[i] instanceof IndexIterator))
082: break;
083: if (i == -1) {
084: indexIterator = new IndexIterator[n];
085: System.arraycopy(documentIterator, 0, indexIterator, 0, n);
086: } else
087: indexIterator = null;
088: }
089:
090: public boolean accept(final DocumentIteratorVisitor visitor)
091: throws IOException {
092: if (!visitor.visitPre(this ))
093: return false;
094: for (int i = 0; i < n; i++)
095: if (documentIterator[i] != null
096: && !documentIterator[i].accept(visitor))
097: return false;
098: return visitor.visitPost(this );
099: }
100:
101: public boolean acceptOnTruePaths(
102: final DocumentIteratorVisitor visitor) throws IOException {
103: if (!visitor.visitPre(this ))
104: return false;
105: for (int i = 0; i < n; i++)
106: if (documentIterator[i] != null
107: && !documentIterator[i].acceptOnTruePaths(visitor))
108: return false;
109: return visitor.visitPost(this );
110: }
111:
112: public ReferenceSet<Index> indices() {
113: return indices;
114: }
115:
116: public IntervalIterator intervalIterator() throws IOException {
117: if (soleIndex == null)
118: throw new IllegalStateException();
119: return intervalIterator(soleIndex);
120: }
121:
122: public void dispose() throws IOException {
123: for (int i = n; i-- != 0;)
124: documentIterator[i].dispose();
125: }
126:
127: public String toString() {
128: StringBuilder res = new StringBuilder();
129: res.append(this .getClass().getSimpleName()).append("(");
130: for (int i = 0; i < n; i++)
131: res.append(i > 0 ? "," : "").append(documentIterator[i]);
132: return res.append(")").toString();
133: }
134:
135: /** An abstract interval iterator. Provide mainly storage for the {@linkplain #intervalIterator component interval iterators},
136: * place for {@linkplain #curr the last interval returned by each iterator} and {@link #toString()}. */
137:
138: protected abstract static class AbstractCompositeIntervalIterator
139: extends AbstractIntervalIterator {
140: /** The underlying iterators. */
141: protected IntervalIterator[] intervalIterator;
142: /** The last interval returned by each iterator. */
143: protected Interval[] curr;
144:
145: public AbstractCompositeIntervalIterator(final int n) {
146: // We just set up some internal data, but we perform no initialisation.
147: curr = new Interval[n];
148: intervalIterator = new IntervalIterator[n];
149: }
150:
151: public String toString() {
152: MutableString res = new MutableString();
153: res.append(this .getClass().getName()).append("(").delete(0,
154: res.lastIndexOf('.') + 1);
155: for (int i = 0; i < intervalIterator.length; i++)
156: res.append(i > 0 ? "," : "")
157: .append(intervalIterator[i]);
158: return res.append(")").toString();
159: }
160: }
161:
162: /** An abstract {@link IndexIterator}-based interval iterator. The difference with {@link AbstractCompositeIntervalIterator}
163: * is that this class assumes that all document iterators are actually index iterators.
164: * The algorithms in this (very common) case can be significantly simplified, obtaining
165: * a large gain in performance. */
166:
167: protected abstract static class AbstractCompositeIndexIntervalIterator
168: extends AbstractIntervalIterator {
169: /** The position arrays returned by each index iterator. */
170: protected int[][] position;
171: /** The index of current element of {@link #position} for each index iterator. */
172: protected int[] currPos;
173: /** At any time, <code>curr[ i ]</code> contains <code>position[ i ][ currPos[i ] ]</code>. */
174: protected int[] curr;
175: /** The number of elements of {@link #position} for each index iterator. */
176: protected int[] count;
177:
178: public AbstractCompositeIndexIntervalIterator(final int n) {
179: // We just set up some internal data, but we perform no initialisation.
180: position = new int[n][];
181: count = new int[n];
182: currPos = new int[n];
183: curr = new int[n];
184: }
185:
186: public String toString() {
187: MutableString res = new MutableString();
188: res.append(this .getClass().getName()).append("(").delete(0,
189: res.lastIndexOf('.') + 1);
190: for (int i = 0; i < position.length; i++)
191: res.append(i > 0 ? "," : "").append(
192: IntArrayList.wrap(position[i], count[i]));
193: return res.append(")").toString();
194: }
195: }
196:
197: }
|