001: package it.unimi.dsi.mg4j.search;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2006-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.objects.Object2ReferenceMap;
025: import it.unimi.dsi.fastutil.objects.ObjectArrayList;
026: import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
027: import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
028: import it.unimi.dsi.mg4j.index.Index;
029: import it.unimi.dsi.mg4j.index.IndexIterator;
030: import it.unimi.dsi.mg4j.index.MultiTermIndexIterator;
031: import it.unimi.dsi.mg4j.index.TooManyTermsException;
032: import it.unimi.dsi.mg4j.index.payload.Payload;
033: import it.unimi.dsi.mg4j.query.nodes.AbstractQueryBuilderVisitor;
034: import it.unimi.dsi.mg4j.query.nodes.And;
035: import it.unimi.dsi.mg4j.query.nodes.Consecutive;
036: import it.unimi.dsi.mg4j.query.nodes.Difference;
037: import it.unimi.dsi.mg4j.query.nodes.LowPass;
038: import it.unimi.dsi.mg4j.query.nodes.MultiTerm;
039: import it.unimi.dsi.mg4j.query.nodes.Not;
040: import it.unimi.dsi.mg4j.query.nodes.Or;
041: import it.unimi.dsi.mg4j.query.nodes.OrderedAnd;
042: import it.unimi.dsi.mg4j.query.nodes.Prefix;
043: import it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitorException;
044: import it.unimi.dsi.mg4j.query.nodes.Range;
045: import it.unimi.dsi.mg4j.query.nodes.Select;
046: import it.unimi.dsi.mg4j.query.nodes.Term;
047:
048: import java.io.IOException;
049: import java.lang.reflect.InvocationTargetException;
050: import java.lang.reflect.Method;
051: import java.util.NoSuchElementException;
052:
053: /** A {@link it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitor}
054: * that builds a {@link it.unimi.dsi.mg4j.search.DocumentIterator}
055: * resolving the queries using the objects in {@link it.unimi.dsi.mg4j.search}.
056: *
057: * <p>This elementary builder visitor invokes {@link it.unimi.dsi.mg4j.index.Index#documents(CharSequence)}
058: * to build the leaf {@linkplain it.unimi.dsi.mg4j.index.IndexIterator index iterators}. Thus, the
059: * resulting {@link it.unimi.dsi.mg4j.search.DocumentIterator} should be carefully
060: * {@linkplain it.unimi.dsi.mg4j.search.DocumentIterator#dispose() disposed} after usage (every
061: * index iterator may open a file or a socket).
062: *
063: * <p>{@link Prefix} and {@link MultiTerm} nodes cause the creation of a {@link MultiTermIndexIterator},
064: * in the first case by callying {@link it.unimi.dsi.mg4j.index.Index#documents(CharSequence,int)} and
065: * in the second case by creating a {@link MultiTermIndexIterator} with the name and frequency equal to the
066: * maximum frequency over all terms. Other implementations might choose differently.
067: *
068: * <p>At construction time, you must provide a map from strings to indices that will be used to resolve
069: * {@link it.unimi.dsi.mg4j.query.nodes.Select} nodes. The map may be <code>null</code>, in which case
070: * such nodes will cause an {@link java.lang.IllegalArgumentException}.
071: * If a {@link it.unimi.dsi.mg4j.query.nodes.Select}
072: * node contains an index name that does not appear in the map a {@link NoSuchElementException}
073: * will be thrown instead.
074: *
075: * <p>A production site will likely substitute this builder visitor with one that reuses
076: * {@linkplain it.unimi.dsi.mg4j.index.IndexReader index readers} out of a pool.
077: *
078: * <p>Instances of this class may be safely reused by calling {@link #prepare()}.
079: */
080:
081: public class DocumentIteratorBuilderVisitor extends
082: AbstractQueryBuilderVisitor<DocumentIterator> {
083:
084: /** A map associating a textual key to indices. */
085: private final Object2ReferenceMap<String, Index> indexMap;
086: /** A map associating an object with a <code>parse(String)</code> method to each payload-based index. */
087: private final Reference2ReferenceMap<Index, Object> index2Parser;
088: /** The default index. */
089: private final Index defaultIndex;
090: /** The number of documents (fetched from the default index). */
091: private final int numberOfDocuments;
092: /** The limit on prefix queries provided in the constructor. */
093: private final int limit;
094: /** The stack of selected indices (changed by {@link Select} nodes). */
095: private ObjectArrayList<Index> curr;
096:
097: /** Creates a new builder visitor.
098: *
099: * @param indexMap a map from index names to indices, to be used in {@link Select} nodes, or <code>null</code>
100: * if the only used index is the default index.
101: * @param defaultIndex the default index.
102: * @param limit a limit that will be used with {@link Prefix} nodes.
103: */
104: @SuppressWarnings("unchecked")
105: public DocumentIteratorBuilderVisitor(
106: final Object2ReferenceMap<String, Index> indexMap,
107: final Index defaultIndex, final int limit) {
108: this (indexMap, Reference2ReferenceMaps.EMPTY_MAP, defaultIndex,
109: limit);
110: }
111:
112: /** Creates a new builder visitor with additional parsers for payload-based indices.
113: *
114: * @param indexMap a map from index names to indices, to be used in {@link Select} nodes, or <code>null</code>
115: * if the only used index is the default index.
116: * @param defaultIndex the default index.
117: * @param limit a limit that will be used with {@link Prefix} nodes.
118: */
119: public DocumentIteratorBuilderVisitor(
120: final Object2ReferenceMap<String, Index> indexMap,
121: final Reference2ReferenceMap<Index, Object> index2Parser,
122: final Index defaultIndex, final int limit) {
123: this .indexMap = indexMap;
124: this .defaultIndex = defaultIndex;
125: this .index2Parser = index2Parser;
126: this .limit = limit;
127: curr = new ObjectArrayList<Index>();
128: curr.push(defaultIndex);
129: this .numberOfDocuments = defaultIndex.numberOfDocuments;
130: }
131:
132: public DocumentIteratorBuilderVisitor copy() {
133: return new DocumentIteratorBuilderVisitor(indexMap,
134: defaultIndex, limit);
135: }
136:
137: public DocumentIteratorBuilderVisitor prepare() {
138: curr.size(1);
139: return this ;
140: }
141:
142: public DocumentIterator[] newArray(final int len) {
143: return new DocumentIterator[len];
144: }
145:
146: public DocumentIterator visit(final Term node)
147: throws QueryBuilderVisitorException {
148: try {
149: if (node.termNumber != -1)
150: return curr.top().documents(node.termNumber);
151: return curr.top().documents(node.term);
152: } catch (IOException e) {
153: throw new QueryBuilderVisitorException(e);
154: }
155: }
156:
157: public DocumentIterator visit(final Prefix node)
158: throws QueryBuilderVisitorException {
159: try {
160: return curr.top().documents(node.prefix, limit);
161: } catch (IOException e) {
162: throw new QueryBuilderVisitorException(e);
163: } catch (TooManyTermsException e) {
164: throw new QueryBuilderVisitorException(e);
165: }
166: }
167:
168: public DocumentIterator visit(Range node)
169: throws QueryBuilderVisitorException {
170: final Index index = curr.top();
171: if (!index.hasPayloads)
172: throw new IllegalStateException("Index " + index
173: + " does not have payloads");
174: try {
175: final Object parser = index2Parser.containsKey(index) ? index2Parser
176: .get(index)
177: : index.payload;
178: final Method method = parser.getClass().getMethod("parse",
179: String.class);
180: final Payload left = index.payload.copy(), right = index.payload
181: .copy();
182: if (node.left != null)
183: left.set(method.invoke(parser, node.left.toString()));
184: if (node.right != null)
185: right.set(method.invoke(parser, node.right.toString()));
186: return PayloadPredicateDocumentIterator.getInstance(index
187: .documents(0), index.payload.rangeFilter(
188: node.left == null ? null : left,
189: node.right == null ? null : right));
190: } catch (InvocationTargetException e) {
191: throw new QueryBuilderVisitorException(e.getCause());
192: } catch (Exception e) {
193: throw new QueryBuilderVisitorException(e);
194: }
195: }
196:
197: public DocumentIterator visitPost(final And node,
198: final DocumentIterator[] subNode)
199: throws QueryBuilderVisitorException {
200: try {
201: return AndDocumentIterator.getInstance(numberOfDocuments,
202: subNode);
203: } catch (IOException e) {
204: throw new QueryBuilderVisitorException(e);
205: }
206: }
207:
208: public DocumentIterator visitPost(final Consecutive node,
209: final DocumentIterator[] subNode)
210: throws QueryBuilderVisitorException {
211: try {
212: return ConsecutiveDocumentIterator.getInstance(subNode,
213: node.gap);
214: } catch (IOException e) {
215: throw new QueryBuilderVisitorException(e);
216: }
217: }
218:
219: public DocumentIterator visitPost(final LowPass node,
220: final DocumentIterator subNode) {
221: return LowPassDocumentIterator.getInstance(subNode, node.k);
222: }
223:
224: public DocumentIterator visitPost(final Not node,
225: final DocumentIterator subNode)
226: throws QueryBuilderVisitorException {
227: try {
228: return NotDocumentIterator.getInstance(subNode,
229: numberOfDocuments);
230: } catch (IOException e) {
231: throw new QueryBuilderVisitorException(e);
232: }
233: }
234:
235: public DocumentIterator visitPost(final Or node,
236: final DocumentIterator[] subNode)
237: throws QueryBuilderVisitorException {
238: try {
239: return OrDocumentIterator.getInstance(subNode);
240: } catch (IOException e) {
241: throw new QueryBuilderVisitorException(e);
242: }
243: }
244:
245: public DocumentIterator visitPost(final OrderedAnd node,
246: final DocumentIterator[] subNode)
247: throws QueryBuilderVisitorException {
248: try {
249: return OrderedAndDocumentIterator.getInstance(
250: numberOfDocuments, subNode);
251: } catch (IOException e) {
252: throw new QueryBuilderVisitorException(e);
253: }
254: }
255:
256: public DocumentIterator visitPost(final Difference node,
257: final DocumentIterator[] subNode) {
258: return DifferenceDocumentIterator.getInstance(subNode[0],
259: subNode[1], node.leftMargin, node.rightMargin);
260: }
261:
262: public DocumentIterator visitPost(final MultiTerm node,
263: final DocumentIterator subNode[])
264: throws QueryBuilderVisitorException {
265: final IndexIterator[] indexIterator = new IndexIterator[subNode.length];
266: System.arraycopy(subNode, 0, indexIterator, 0,
267: indexIterator.length);
268: IndexIterator result;
269: try {
270: result = MultiTermIndexIterator.getInstance(curr.top(),
271: indexIterator);
272: } catch (IOException e) {
273: throw new QueryBuilderVisitorException(e);
274: }
275: result.term(node.toString());
276: return result;
277: }
278:
279: public boolean visitPre(final Select node)
280: throws QueryBuilderVisitorException {
281: if (indexMap == null)
282: throw new IllegalArgumentException(
283: "You cannot use Select nodes without an index map");
284: final Index index = indexMap.get(node.index.toString());
285: if (index == null)
286: throw new NoSuchElementException("The selected index ("
287: + node.index + ")"
288: + " does not appear in the index map (" + indexMap
289: + ")");
290: curr.push(indexMap.get(node.index.toString()));
291: return true;
292: }
293:
294: public DocumentIterator visitPost(final Select node,
295: final DocumentIterator subNode) {
296: curr.pop();
297: return subNode;
298: }
299: }
|