001: package test.it.unimi.dsi.mg4j.search;
002:
003: import it.unimi.dsi.fastutil.ints.AbstractIntIterator;
004: import it.unimi.dsi.fastutil.ints.IntIterator;
005: import it.unimi.dsi.fastutil.ints.IntIterators;
006: import it.unimi.dsi.fastutil.ints.IntSet;
007: import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;
008: import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
009: import it.unimi.dsi.fastutil.objects.ReferenceSet;
010: import it.unimi.dsi.fastutil.objects.ReferenceSets;
011: import it.unimi.dsi.lang.MutableString;
012: import it.unimi.dsi.mg4j.index.Index;
013: import it.unimi.dsi.mg4j.index.IndexIterator;
014: import it.unimi.dsi.mg4j.index.IndexReader;
015: import it.unimi.dsi.mg4j.index.NullTermProcessor;
016: import it.unimi.dsi.mg4j.index.TooManyTermsException;
017: import it.unimi.dsi.mg4j.index.payload.Payload;
018: import it.unimi.dsi.mg4j.search.IntervalIterator;
019: import it.unimi.dsi.mg4j.search.IntervalIterators;
020: import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;
021: import it.unimi.dsi.util.Interval;
022:
023: import java.io.IOException;
024: import java.util.Arrays;
025: import java.util.NoSuchElementException;
026:
027: /** A partially implemented {@link IndexIterator index iterator} that returns
028: * a given list of documents and associated positions.
029: *
030: */
031:
032: public class IntArrayIndexIterator extends AbstractIntIterator
033: implements IndexIterator {
034: private final static Index index = new TestIndex();
035:
036: private static class TestIndex extends Index {
037: private static final long serialVersionUID = 1L;
038:
039: public TestIndex() {
040: super (Integer.MAX_VALUE, Integer.MAX_VALUE,
041: Integer.MAX_VALUE, Integer.MAX_VALUE,
042: Integer.MAX_VALUE, null, true, true,
043: NullTermProcessor.getInstance(), "text", null, null);
044: }
045:
046: public IndexIterator documents(CharSequence prefix, int limit)
047: throws IOException, TooManyTermsException {
048: throw new UnsupportedOperationException();
049: }
050:
051: public IndexReader getReader() throws IOException {
052: throw new UnsupportedOperationException();
053: }
054:
055: public IndexReader getReader(int bufferSize) throws IOException {
056: throw new UnsupportedOperationException();
057: }
058: }
059:
060: private final int[] document;
061: private final int[][] position;
062:
063: private int curr = -1;
064: private IntervalIterator currentIterator;
065: private String term;
066: private int id;
067: private final int termNumber;
068:
069: /** Creates a new array-based index iterator with term number 0.
070: *
071: * @param document an (increasing) array of documents that will be returned.
072: * @param position a parallel array of arrays of positions.
073: */
074:
075: public IntArrayIndexIterator(int[] document, int[][] position) {
076: this (0, document, position);
077: }
078:
079: /** Creates a new array-based index iterator.
080: *
081: * @param termNumber the term number of this iterator.
082: * @param document an (increasing) array of documents that will be returned.
083: * @param position a parallel array of arrays of positions.
084: */
085:
086: public IntArrayIndexIterator(final int termNumber, int[] document,
087: int[][] position) {
088: this .termNumber = termNumber;
089: this .document = document;
090: this .position = position;
091: if (document.length != position.length)
092: throw new IllegalArgumentException();
093: for (int i = 0; i < document.length - 1; i++)
094: if (document[i] >= document[i + 1])
095: throw new IllegalArgumentException(
096: "Document array is not increasing");
097: for (int i = 0; i < document.length; i++)
098: for (int j = position[i].length - 1; j-- != 0;)
099: if (position[i][j] >= position[i][j + 1])
100: throw new IllegalArgumentException(
101: "Non-increasing position list for document "
102: + i + ": "
103: + Arrays.toString(position[i]));
104: }
105:
106: public int termNumber() {
107: return termNumber;
108: }
109:
110: public boolean hasNext() {
111: return curr < document.length - 1;
112: }
113:
114: public int nextInt() {
115: if (!hasNext())
116: throw new NoSuchElementException();
117: curr++;
118: currentIterator = null;
119: return document[curr];
120: }
121:
122: public int nextDocument() {
123: if (!hasNext())
124: return -1;
125: return nextInt();
126: }
127:
128: public int skipTo(int n) {
129: if (curr != -1 && document[curr] >= n)
130: return document[curr];
131: int result;
132: while (hasNext())
133: if ((result = nextInt()) >= n)
134: return result;
135: return Integer.MAX_VALUE;
136: }
137:
138: public boolean accept(DocumentIteratorVisitor visitor)
139: throws IOException {
140: return visitor.visit(this );
141: }
142:
143: public boolean acceptOnTruePaths(DocumentIteratorVisitor visitor)
144: throws IOException {
145: return visitor.visit(this );
146: }
147:
148: public void dispose() {
149: }
150:
151: public int document() {
152: if (curr == -1)
153: throw new IllegalStateException();
154: return document[curr];
155: }
156:
157: public ReferenceSet<Index> indices() {
158: return ReferenceSets.singleton(index);
159: }
160:
161: public static class ArraySingletonIntervalIterator extends
162: AbstractObjectIterator<Interval> implements
163: IntervalIterator {
164: private int curr = -1;
165: private final int[] position;
166:
167: public ArraySingletonIntervalIterator(int[] position) {
168: this .position = position;
169: }
170:
171: public int extent() {
172: return 1;
173: }
174:
175: public void reset() {
176: curr = -1;
177: }
178:
179: public void intervalTerms(final IntSet terms) {
180: throw new UnsupportedOperationException();
181: }
182:
183: public boolean hasNext() {
184: return curr < position.length - 1;
185: }
186:
187: public Interval next() {
188: if (!hasNext())
189: throw new NoSuchElementException();
190: curr++;
191: return Interval.valueOf(position[curr]);
192: }
193:
194: public Interval nextInterval() {
195: if (!hasNext())
196: return null;
197: return next();
198: }
199:
200: public String toString() {
201: return Arrays.toString(position);
202: }
203: }
204:
205: public IntervalIterator intervalIterator() {
206: if (curr == -1)
207: throw new IllegalStateException();
208: if (currentIterator != null)
209: return currentIterator;
210: if (position[curr].length == 0)
211: return IntervalIterators.FALSE;
212: return currentIterator = new ArraySingletonIntervalIterator(
213: position[curr]);
214: }
215:
216: public IntervalIterator intervalIterator(Index index) {
217: return intervalIterator();
218: }
219:
220: public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators() {
221: throw new UnsupportedOperationException();
222: }
223:
224: public IntervalIterator iterator() {
225: return intervalIterator();
226: }
227:
228: public void reset() {
229: curr = -1;
230: }
231:
232: public String toString() {
233: MutableString result = new MutableString();
234: result.append('[');
235: for (int i = 0; i < document.length; i++) {
236: if (i != 0)
237: result.append(", ");
238: result.append('<').append(document[i]).append(':').append(
239: Arrays.toString(position[i])).append('>');
240: }
241: return result.append(']').toString();
242: }
243:
244: public int count() {
245: return position[curr].length;
246: }
247:
248: public int frequency() {
249: return document.length;
250: }
251:
252: public void id(int id) {
253: this .id = id;
254: }
255:
256: public int id() {
257: return id;
258: }
259:
260: public Index index() {
261: return index;
262: }
263:
264: public Payload payload() {
265: return null;
266: }
267:
268: public int[] positionArray() {
269: return position[curr];
270: }
271:
272: public IntIterator positions() {
273: return IntIterators.wrap(position[curr]);
274: }
275:
276: public int positions(int[] position) {
277: if (this .position[curr].length > position.length)
278: return -this .position[curr].length - 1;
279: System.arraycopy(this .position[curr], 0, position, 0,
280: this .position[curr].length);
281: return this .position[curr].length;
282: }
283:
284: public String term() {
285: return term;
286: }
287:
288: public void term(CharSequence term) {
289: this.term = term.toString();
290: }
291:
292: }
|