001: package it.unimi.dsi.mg4j.search.score;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2004-2007 Paolo Boldi and Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITfNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import java.io.IOException;
025:
026: import it.unimi.dsi.fastutil.ints.AbstractIntIterator;
027: import it.unimi.dsi.fastutil.objects.Reference2DoubleMap;
028: import it.unimi.dsi.mg4j.index.Index;
029: import it.unimi.dsi.mg4j.search.CachingDocumentIterator;
030: import it.unimi.dsi.mg4j.search.DocumentIterator;
031:
032: /** A {@link Scorer} that aggregates a number of underlying {@link it.unimi.dsi.mg4j.search.score.DelegatingScorer delegating scorers}, providing equalisation if required.
033: *
034: * <p>An aggregator combines the results of several scorers following some policy (see, e.g.,
035: * {@link it.unimi.dsi.mg4j.search.score.LinearAggregator}). In doing so, often the aggregator
036: * needs to explore the first scores returned by each scorer, and tune some internal parameters. This
037: * procedure, <em>equalisation</em>, is supported by this class: if {@link #equalize(int)} is provided with a
038: * positive number of samples, they will be fetched from the underlying document iterator, scored, and
039: * passed to the implementing subclass so that equalisation information can be properly set up.
040: *
041: * <p>Additionally, this class ensures that if several scorers need access to intervals,
042: * the document iterator to be scored is decorated with a {@link it.unimi.dsi.mg4j.search.CachingDocumentIterator},
043: * so that several scorer can access intervals.
044: *
045: * <p>Since this class uses the same document iterator for <em>all</em> aggregated scorers, they
046: * must be necessarily {@linkplain it.unimi.dsi.mg4j.search.score.DelegatingScorer delegating scorers}.
047: *
048: * <p>Implementing subclasses must provide the following methods:
049: * <ul>
050: * <li>{@link #setupEqualizationFactors()}, which is called in case equalisation is required and
051: * must examine {@link #actualSamples} elements from {@link #sampleScore} (each element is a tuple
052: * of scores, one for each scorer) and use that information to set the equalisation factors (if {@link #samples}
053: * is zero, default values must be applied);
054: * <li>{@link #score(double[])}, which must compute the equalised aggregated score using
055: * the given array of scores (each to be thought as a score coming from the respective scorer).
056: * </ul>
057: *
058: * <p>Additionally, implementing subclasses must remember to call {@link #equalize(int)}
059: * when generating a {@linkplain it.unimi.dsi.lang.FlyweightPrototype#copy() flyweight copy},
060: * so that the state of the aggregator is reproduced correctly.
061: */
062: public abstract class AbstractAggregator extends AbstractIntIterator
063: implements Scorer {
064:
065: /** The current document iterator. */
066: protected DocumentIterator documentIterator;
067: /** The number of underlying scorers. */
068: protected final int n;
069: /** The underlying scorers. */
070: protected final Scorer[] scorer;
071: /** The current score. */
072: protected final double[] currScore;
073: /** Whether we need caching the intervals. */
074: protected final boolean needsCaching;
075:
076: /** Cached sample of document pointers. */
077: protected int[] sampleDocument;
078: /** Cached sample of document scores. */
079: protected double[][] sampleScore;
080: /** The number of samples for equalisation (0 means no equalisation). */
081: protected int samples;
082: /** The next sample to be returned, if smaller than {@link #actualSamples}. */
083: protected int currSample;
084: /** The actual number of samples obtained (might be less than {@link #samples} if we exhausted the document iterator). */
085: protected int actualSamples;
086:
087: /** Creates an aggregator.
088: *
089: * @param scorer the scorers.
090: */
091: public AbstractAggregator(final Scorer[] scorer) {
092: this .n = scorer.length;
093: this .scorer = scorer;
094: this .currScore = new double[n];
095: int needsIntervals = 0;
096: for (int i = scorer.length; i-- != 0;) {
097: if (!(scorer[i] instanceof DelegatingScorer))
098: throw new IllegalArgumentException(
099: "An aggregator needs delegating scorers");
100: if (scorer[i].usesIntervals())
101: needsIntervals++;
102: }
103: needsCaching = needsIntervals > 1;
104: actualSamples = -1;
105: }
106:
107: public double score(final Index index) {
108: throw new UnsupportedOperationException();
109: }
110:
111: public double score() throws IOException {
112: // If we are still walking through the sample, return a score from there
113: if (currSample <= actualSamples)
114: return score(sampleScore[currSample - 1]);
115: // Otherwise, create new score array and pass it to the implementing subclass.
116: final double[] currScore = this .currScore;
117: for (int i = n; i-- != 0;)
118: currScore[i] = scorer[i].score();
119: return score(currScore);
120: }
121:
122: /** Set the number of samples for equalisation.
123: *
124: * @param samples the number of samples to be used to equalise scores; a value
125: * of zero disables equalisation.
126: */
127:
128: public synchronized void equalize(int samples) {
129: this .samples = samples;
130: if (samples == 0) {
131: sampleDocument = null;
132: sampleScore = null;
133: actualSamples = -1;
134: } else {
135: sampleDocument = new int[samples];
136: sampleScore = new double[samples][n];
137: }
138: }
139:
140: /** Delegates to the underlying scorers.
141: *
142: * @return true if at least one underlying scorer supports weights.
143: */
144: public synchronized boolean setWeights(
145: final Reference2DoubleMap<Index> index2weight) {
146: boolean atLeastOne = false;
147: for (int i = n; i-- != 0;)
148: atLeastOne |= scorer[i].setWeights(index2weight);
149: return atLeastOne;
150: }
151:
152: /** Delegates to the underlying scorers.
153: *
154: * @return true if at least one underlying scorer uses intervals.
155: */
156: public boolean usesIntervals() {
157: for (int i = n; i-- != 0;)
158: if (scorer[i].usesIntervals())
159: return true;
160: return false;
161: }
162:
163: /** Delegates to the underlying scorers, possibly wrapping the argument in a
164: * {@link CachingDocumentIterator}; then, if {@link #samples} is nonzero computes
165: * that many document scores and invokes {@link #setupEqualizationFactors()}.
166: */
167: public void wrap(DocumentIterator documentIterator)
168: throws IOException {
169: if (needsCaching)
170: documentIterator = new CachingDocumentIterator(
171: documentIterator);
172: for (int i = n; i-- != 0;)
173: scorer[i].wrap(documentIterator);
174: if (samples > 0) {
175: // Let us prepare a sample.
176: int i;
177: for (i = 0; i < samples
178: && (sampleDocument[i] = documentIterator
179: .nextDocument()) != -1; i++) {
180: ;
181: for (int j = n; j-- != 0;)
182: sampleScore[i][j] = scorer[j].score();
183: }
184: actualSamples = i;
185: currSample = 0;
186: }
187: // This must be *always* called--in the worst case, it will just set all factors to 1.
188: setupEqualizationFactors();
189:
190: this .documentIterator = documentIterator;
191: }
192:
193: /** Computes an aggregated score using the given array of basic scores.
194: * The array is parallel to {@link #scorer}.
195: *
196: * @param score an array of scores.
197: * @return the aggregated scorer.
198: */
199: protected abstract double score(double score[]);
200:
201: /** Sets up the equalisation factors.
202: *
203: * <p>Implementations should look into {@link #sampleScore} and set up the
204: * equalisation logic. Note that this method is responsible for setting
205: * up appropriate equalisation factors <em>even if no equalisation is required</em>
206: * (e.g., setting all factors to 1 ).
207: */
208: protected abstract void setupEqualizationFactors();
209:
210: public int nextDocument() throws IOException {
211: if (currSample < actualSamples)
212: return sampleDocument[currSample++];
213: currSample = Integer.MAX_VALUE;
214: return documentIterator.nextDocument();
215: }
216:
217: public boolean hasNext() {
218: return currSample < actualSamples || documentIterator.hasNext();
219: }
220:
221: public int nextInt() {
222: if (!hasNext())
223: throw new UnsupportedOperationException();
224: try {
225: return nextDocument();
226: } catch (IOException e) {
227: throw new RuntimeException(e);
228: }
229: }
230: }
|