001: package it.unimi.dsi.mg4j.index.cluster;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2006-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.ints.IntArrayList;
025: import it.unimi.dsi.mg4j.index.IndexIterator;
026: import it.unimi.dsi.mg4j.index.IndexIterators;
027: import it.unimi.dsi.Util;
028:
029: import java.io.IOException;
030: import java.util.ArrayList;
031:
032: import org.apache.log4j.Logger;
033:
034: /** An index reader for a {@link it.unimi.dsi.mg4j.index.cluster.DocumentalCluster}. It dispatches
035: * the correct {@link it.unimi.dsi.mg4j.index.IndexReader} depending on the concrete subclass of
036: * {@link it.unimi.dsi.mg4j.index.cluster.DocumentalCluster}.
037: *
038: * @author Sebastiano Vigna
039: */
040:
041: public class DocumentalClusterIndexReader extends
042: AbstractIndexClusterIndexReader {
043: private static final Logger LOGGER = Util
044: .getLogger(DocumentalClusterIndexReader.class);
045: private static final boolean DEBUG = false;
046:
047: /** The index this reader refers to. */
048: protected final DocumentalCluster index;
049:
050: public DocumentalClusterIndexReader(DocumentalCluster index,
051: int bufferSize) throws IOException {
052: super (index, bufferSize);
053: this .index = index;
054: }
055:
056: public IndexIterator documents(int term) throws IOException {
057: if (!index.flat)
058: throw new UnsupportedOperationException(
059: "Only flat documental clusters allow access by term number");
060:
061: final IndexIterator[] iterator = new IndexIterator[indexReader.length];
062: for (int i = 0; i < indexReader.length; i++)
063: iterator[i] = indexReader[i].documents(term);
064:
065: final IndexIterator indexIterator = index.concatenated ? new DocumentalConcatenatedClusterIndexIterator(
066: this , iterator, index.allIndices)
067: : new DocumentalMergedClusterIndexIterator(this ,
068: iterator, index.allIndices);
069:
070: return indexIterator;
071: }
072:
073: public IndexIterator documents(final CharSequence term)
074: throws IOException {
075: final ArrayList<IndexIterator> iterators = new ArrayList<IndexIterator>(
076: indexReader.length);
077: final IntArrayList usedIndices = new IntArrayList();
078: for (int i = 0; i < indexReader.length; i++) {
079: if (index.termFilter == null
080: || index.termFilter[i].contains(term)) {
081: IndexIterator it = indexReader[i].documents(term);
082: if (it.hasNext()) {
083: iterators.add(it);
084: usedIndices.add(i);
085: }
086: }
087: }
088:
089: if (DEBUG)
090: LOGGER.debug("Indices used for " + term + ": "
091: + usedIndices);
092:
093: if (iterators.isEmpty())
094: return index.emptyIndexIterator;
095: final IndexIterator indexIterator = index.concatenated ? new DocumentalConcatenatedClusterIndexIterator(
096: this , iterators.toArray(IndexIterators.EMPTY_ARRAY),
097: usedIndices.toIntArray())
098: : new DocumentalMergedClusterIndexIterator(this,
099: iterators.toArray(IndexIterators.EMPTY_ARRAY),
100: usedIndices.toIntArray());
101:
102: indexIterator.term(term);
103: return indexIterator;
104: }
105: }
|