01: package it.unimi.dsi.mg4j.search.score;
02:
03: /*
04: * MG4J: Managing Gigabytes for Java
05: *
06: * Copyright (C) 2006-2007 Sebastiano Vigna
07: *
08: * This library is free software; you can redistribute it and/or modify it
09: * under the terms of the GNU Lesser General Public License as published by the Free
10: * Software Foundation; either version 2.1 of the License, or (at your option)
11: * any later version.
12: *
13: * This library is distributed in the hope that it will be useful, but
14: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
16: * for more details.
17: *
18: * You should have received a copy of the GNU Lesser General Public License
19: * along with this program; if not, write to the Free Software
20: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21: *
22: */
23:
24: import java.io.IOException;
25:
26: import it.unimi.dsi.mg4j.index.Index;
27: import it.unimi.dsi.mg4j.search.DocumentIterator;
28: import it.unimi.dsi.mg4j.search.visitor.CounterCollectionVisitor;
29: import it.unimi.dsi.mg4j.search.visitor.CounterSetupVisitor;
30: import it.unimi.dsi.mg4j.search.visitor.TermCollectionVisitor;
31:
32: import org.apache.log4j.Logger;
33:
34: /** A trivial scorer that computes the score by adding the counts
35: * (the number of occurrences within the current document) of each term
36: * multiplied by the weight of the relative index.
37: * Mainly useful for debugging and testing purposes.
38: *
39: * <p>This class uses a {@link it.unimi.dsi.mg4j.search.visitor.CounterCollectionVisitor}
40: * and related classes to take into consideration only terms that are actually involved
41: * in the current document.
42: *
43: * @author Mauro Mereu
44: * @author Sebastiano Vigna
45: */
46: public class CountScorer extends AbstractWeightedScorer implements
47: DelegatingScorer {
48: static Logger LOGGER = Logger.getLogger(CountScorer.class);
49:
50: /** The counter collection visitor used to estimate counts. */
51: private final CounterCollectionVisitor counterCollectionVisitor;
52: /** The counter setup visitor used to estimate counts. */
53: private final CounterSetupVisitor counterSetupVisitor;
54: /** The term collection visitor used to estimate counts. */
55: private final TermCollectionVisitor termCollectionVisitor;
56:
57: public CountScorer() {
58: termCollectionVisitor = new TermCollectionVisitor();
59: counterSetupVisitor = new CounterSetupVisitor(
60: termCollectionVisitor);
61: counterCollectionVisitor = new CounterCollectionVisitor(
62: counterSetupVisitor);
63: }
64:
65: public double score() throws IOException {
66: counterSetupVisitor.clear();
67: documentIterator.acceptOnTruePaths(counterCollectionVisitor);
68:
69: double score = 0;
70: final int[] count = counterSetupVisitor.count;
71: final int[] indexNumber = counterSetupVisitor.indexNumber;
72: for (int i = count.length; i-- != 0;)
73: score += count[i] * currWeight[indexNumber[i]];
74: return score;
75: }
76:
77: public double score(final Index index) {
78: throw new UnsupportedOperationException();
79: }
80:
81: public void wrap(DocumentIterator d) throws IOException {
82: super .wrap(d);
83: termCollectionVisitor.prepare();
84: d.accept(termCollectionVisitor);
85: currIndex = termCollectionVisitor.indices();
86: counterSetupVisitor.prepare();
87: d.accept(counterSetupVisitor);
88: }
89:
90: public synchronized CountScorer copy() {
91: final CountScorer scorer = new CountScorer();
92: scorer.setWeights(index2Weight);
93: return scorer;
94: }
95:
96: public boolean usesIntervals() {
97: return false;
98: }
99: }
|