001: package it.unimi.dsi.mg4j.search.visitor;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2006-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.ints.IntArrays;
025: import it.unimi.dsi.mg4j.index.IndexIterator;
026:
027: import java.io.IOException;
028: import java.util.Arrays;
029:
030: /** A visitor using the information collected by a
031: * {@link it.unimi.dsi.mg4j.search.visitor.TermCollectionVisitor}
032: * to set up term frequencies and counters.
033: *
034: * <p>Term {@linkplain #frequency frequencies} and {@linkplain #count counts} are stored
035: * in publicly accessible parallel arrays of integers indexed by <em>offsets</em>,
036: * as defined by a {@link it.unimi.dsi.mg4j.search.visitor.TermCollectionVisitor} provided at construction time.
037: * Additionally, the {@linkplain #indexNumber index number} (a position into the array returned by
038: * {@link it.unimi.dsi.mg4j.search.visitor.TermCollectionVisitor#indices()}) and the
039: * {@linkplain #term term} for each offset are available.
040: *
041: * <p>When instances of this class perform a visit, they prepare the arrays and
042: * fill those contaning {@linkplain #frequency frequencies} and {@linkplain #indexNumber index numbers}.
043: * It is up to an instance of {@link it.unimi.dsi.mg4j.search.visitor.CounterCollectionVisitor}
044: * (which requires an instance of this class in its constructor) to fill
045: * the {@linkplain #count counts} with data related to
046: * the current document.
047: *
048: * <p>For a more complete picture, see {@link it.unimi.dsi.mg4j.search.visitor.CounterCollectionVisitor}.
049: */
050:
051: public class CounterSetupVisitor extends
052: AbstractDocumentIteratorVisitor {
053: /** For each offset, the corresponding index as a position in {@link TermCollectionVisitor#indices()}. */
054: public int[] indexNumber;
055: /** For each offset, the corresponding term. */
056: public CharSequence[] term;
057: /** For each offset, its count. */
058: public int[] count;
059: /** For each offset, its frequency. */
060: public int[] frequency;
061: /** The underlying term-collection visitor. */
062: private final TermCollectionVisitor termCollectionVisitor;
063:
064: /** Creates a new counter-setup visitor based on a given term-collection visitor.
065: *
066: * @param termCollectionVisitor a term-collection visitor.
067: */
068:
069: public CounterSetupVisitor(
070: TermCollectionVisitor termCollectionVisitor) {
071: this .termCollectionVisitor = termCollectionVisitor;
072: prepare();
073: }
074:
075: /** Prepares the internal state of this visitor using data from the associated
076: * {@link TermCollectionVisitor}.
077: *
078: * <p>Note that because of this dependency, it is essential that you
079: * first prepare and visit with the associated {@link TermCollectionVisitor},
080: * and then prepare and visit with this visitor.
081: */
082:
083: public CounterSetupVisitor prepare() {
084: count = new int[termCollectionVisitor.numberOfPairs()];
085: frequency = new int[termCollectionVisitor.numberOfPairs()];
086: indexNumber = new int[termCollectionVisitor.numberOfPairs()];
087: term = new CharSequence[termCollectionVisitor.numberOfPairs()];
088: return this ;
089: }
090:
091: public boolean visit(final IndexIterator indexIterator)
092: throws IOException {
093: if (indexIterator.frequency() > 0
094: && indexIterator.index().hasCounts) {
095: // We fill the frequency and index entries
096: final int id = indexIterator.id(); // offset into all arrays
097: this .frequency[id] = indexIterator.frequency();
098: this .indexNumber[id] = termCollectionVisitor.indexMap()
099: .getInt(indexIterator.index());
100: this .term[id] = indexIterator.term();
101: }
102: return true;
103: }
104:
105: /** Updates the {@link #count} using the provided index iterator.
106: *
107: * <p>This method is usually called back by a {@link CounterCollectionVisitor} built upon
108: * this counter-setup visitor. It simply retrieves the index iterator
109: * {@linkplain IndexIterator#id() id} and use it as an index into
110: * {@link #count} to store {@link IndexIterator#count()}.
111: *
112: * @param indexIterator an index iterator.
113: * @throws IOException
114: */
115:
116: public void update(final IndexIterator indexIterator)
117: throws IOException {
118: count[indexIterator.id()] = indexIterator.count();
119: }
120:
121: /** Zeroes all counters, but not frequencies. */
122: public void clear() {
123: IntArrays.fill(count, 0);
124: }
125:
126: public String toString() {
127: return "[" + Arrays.toString(frequency) + ", "
128: + Arrays.toString(count) + "]";
129: }
130: }
|