01: package it.unimi.dsi.mg4j.index.cluster;
02:
03: /*
04: * MG4J: Managing Gigabytes for Java
05: *
06: * Copyright (C) 2006-2007 Sebastiano Vigna
07: *
08: * This library is free software; you can redistribute it and/or modify it
09: * under the terms of the GNU Lesser General Public License as published by the Free
10: * Software Foundation; either version 2.1 of the License, or (at your option)
11: * any later version.
12: *
13: * This library is distributed in the hope that it will be useful, but
14: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
16: * for more details.
17: *
18: * You should have received a copy of the GNU Lesser General Public License
19: * along with this program; if not, write to the Free Software
20: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21: *
22: */
23:
24: import it.unimi.dsi.fastutil.ints.IntList;
25: import it.unimi.dsi.mg4j.index.Index;
26: import it.unimi.dsi.mg4j.index.IndexIterator;
27: import it.unimi.dsi.mg4j.index.IndexIterators;
28: import it.unimi.dsi.mg4j.index.IndexReader;
29: import it.unimi.dsi.mg4j.index.MultiTermIndexIterator;
30: import it.unimi.dsi.mg4j.index.TermProcessor;
31: import it.unimi.dsi.mg4j.index.TooManyTermsException;
32: import it.unimi.dsi.mg4j.index.payload.Payload;
33: import it.unimi.dsi.mg4j.search.DocumentIterator;
34: import it.unimi.dsi.util.BloomFilter;
35: import it.unimi.dsi.util.Properties;
36:
37: import java.io.IOException;
38: import java.util.ArrayList;
39:
40: /** A cluster exhibiting local indices referring to the same collection, but
41: * containing different set of terms, as a single index.
42: *
43: * @author Alessandro Arrabito
44: * @author Sebastiano Vigna
45: */
46: public class LexicalCluster extends IndexCluster {
47:
48: private static final long serialVersionUID = 1L;
49:
50: /** The strategy to be used.*/
51: protected final LexicalClusteringStrategy strategy;
52: /** The strategy, cast to a partition strategy, or <code>null</code>. */
53: protected final LexicalPartitioningStrategy partitioningStrategy;
54:
55: /** Creates a new lexical index cluster. */
56:
57: public LexicalCluster(final Index[] localIndex,
58: final LexicalClusteringStrategy strategy,
59: final BloomFilter[] termFilter,
60: final int numberOfDocuments, final int numberOfTerms,
61: final long numberOfPostings,
62: final long numberOfOccurrences, final int maxCount,
63: final Payload payload, final boolean hasCounts,
64: final boolean hasPositions,
65: final TermProcessor termProcessor, final String field,
66: final IntList sizes, final Properties properties) {
67: super (localIndex, termFilter, numberOfDocuments, numberOfTerms,
68: numberOfPostings, numberOfOccurrences, maxCount,
69: payload, hasCounts, hasPositions, termProcessor, field,
70: sizes, properties);
71: this .strategy = strategy;
72: this .partitioningStrategy = strategy instanceof LexicalPartitioningStrategy ? ((LexicalPartitioningStrategy) strategy)
73: : null;
74: }
75:
76: public IndexReader getReader(final int bufferSize)
77: throws IOException {
78: return new LexicalClusterIndexReader(this , bufferSize);
79: }
80:
81: public IndexIterator documents(final CharSequence prefix,
82: final int limit) throws IOException, TooManyTermsException {
83: final ArrayList<DocumentIterator> iterators = new ArrayList<DocumentIterator>(
84: localIndex.length);
85:
86: DocumentIterator documentIterator;
87: for (int i = 0; i < localIndex.length; i++) {
88: // TODO: check for limit globally
89: documentIterator = localIndex[i].documents(prefix, limit);
90: if (documentIterator.hasNext())
91: iterators.add(documentIterator);
92: }
93: // TODO: test that this multiterm-of-multiterm actually works.
94: return MultiTermIndexIterator.getInstance(this, iterators
95: .toArray(IndexIterators.EMPTY_ARRAY));
96: }
97: }
|