01: package it.unimi.dsi.mg4j.index.cluster;
02:
03: /*
04: * MG4J: Managing Gigabytes for Java
05: *
06: * Copyright (C) 2006-2007 Sebastiano Vigna
07: *
08: * This library is free software; you can redistribute it and/or modify it
09: * under the terms of the GNU Lesser General Public License as published by the Free
10: * Software Foundation; either version 2.1 of the License, or (at your option)
11: * any later version.
12: *
13: * This library is distributed in the hope that it will be useful, but
14: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
16: * for more details.
17: *
18: * You should have received a copy of the GNU Lesser General Public License
19: * along with this program; if not, write to the Free Software
20: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21: *
22: */
23:
24: import it.unimi.dsi.mg4j.index.BitStreamIndex;
25: import it.unimi.dsi.mg4j.index.DiskBasedIndex;
26: import it.unimi.dsi.mg4j.index.Index;
27: import it.unimi.dsi.util.BloomFilter;
28: import it.unimi.dsi.util.StringMap;
29:
30: /** A lexical clustering strategy that uses a chain of responsability to choose the local index:
31: * {@linkplain StringMap term maps} out of a given list are inquired
32: * until one contains the given term.
33: *
34: * <p>If the index cluster has Bloom filters, they will be used to reduce useless accesses to
35: * term maps.
36: *
37: * <p>The intended usage of this class is memory/disk lexical partitioning. Note that a serialised version
38: * of this class is <em>empty</em>. It acts just like a placeholder, so that loaders now that they
39: * must generate a new instance depending on the indices contained in the cluster.
40: *
41: * @author Sebastiano Vigna
42: */
43:
44: public class ChainedLexicalClusteringStrategy implements
45: LexicalClusteringStrategy {
46: static final long serialVersionUID = 0;
47: /** The array of indices to inquiry. */
48: private transient final StringMap<? extends CharSequence>[] termMap;
49: /** An array of optional Bloom filters to reduce term map access, or <code>null</code>. */
50: private transient final BloomFilter[] termFilter;
51:
52: /** Creates a new chained lexical clustering strategy using additional Bloom filters.
53: *
54: * <p>Note that the static type of the parameter <code>index</code> is
55: * an array of {@link Index}, but the elements of the array must be
56: * {@linkplain DiskBasedIndex disk-based indices}, or an exception will be thrown.
57: *
58: * @param index an array of disk-based indices, from which term maps will be extracted.
59: * @param termFilter an array, parallel to <code>index</code>, of Bloom filter representing the terms contained in each local index.
60: */
61: public ChainedLexicalClusteringStrategy(final Index[] index,
62: final BloomFilter[] termFilter) {
63: this .termMap = new StringMap<?>[index.length];
64: for (int i = index.length; i-- != 0;)
65: termMap[i] = ((BitStreamIndex) index[i]).termMap;
66: this .termFilter = termFilter;
67: }
68:
69: /** Creates a new chained lexical clustering strategy.
70: *
71: * <p>Note that the static type of the parameter <code>index</code> is
72: * an array of {@link Index}, but the elements of the array must be
73: * {@linkplain DiskBasedIndex disk-based indices}, or an exception will be thrown.
74: *
75: * @param index an array of disk-based indices, from which term maps will be extracted.
76: */
77: public ChainedLexicalClusteringStrategy(final Index[] index) {
78: this (index, null);
79: }
80:
81: public int numberOfLocalIndices() {
82: return termMap.length;
83: }
84:
85: public int localIndex(final CharSequence term) {
86: for (int i = 0; i < termMap.length; i++)
87: if ((termFilter == null || termFilter[i].contains(term))
88: && termMap[i].getLong(term) != -1)
89: return i;
90: return -1;
91: }
92:
93: public int globalNumber(int localIndex, int localNumber) {
94: throw new UnsupportedOperationException();
95: }
96: }
|