01: package it.unimi.dsi.mg4j.search.score;
02:
03: /*
04: * MG4J: Managing Gigabytes for Java
05: *
06: * Copyright (C) 2005-2007 Sebastiano Vigna
07: *
08: * This library is free software; you can redistribute it and/or modify it
09: * under the terms of the GNU Lesser General Public License as published by the Free
10: * Software Foundation; either version 2.1 of the License, or (at your option)
11: * any later version.
12: *
13: * This library is distributed in the hope that it will be useful, but
14: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
16: * for more details.
17: *
18: * You should have received a copy of the GNU Lesser General Public License
19: * along with this program; if not, write to the Free Software
20: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21: *
22: */
23:
24: import java.io.IOException;
25:
26: import it.unimi.dsi.mg4j.index.Index;
27: import it.unimi.dsi.util.Interval;
28: import it.unimi.dsi.mg4j.search.IntervalIterator;
29: import it.unimi.dsi.mg4j.search.IntervalIterators;
30:
31: /** Computes the Vigna score of all interval iterators of a document.
32: *
33: * <p>This scorer progressively moves score from a residual (initialised to 1)
34: * to the current score (initialised to 0). For each interval, we move a fraction of the residual
35: * equal to the ratio of the {@linkplain it.unimi.dsi.mg4j.search.IntervalIterator#extent() extent}
36: * over the interval length, minimised with 1 and divided by 2. For instance,
37: * on a two-term query meeting intervals of length 2 will increase the score from 0 to 1/2, 3/4 and so on.
38: * On the other hand, larger intervals take away less from the residual.
39: *
40: * <p>When the score exceeds .99, the computation is interrupted. In this way, we exploit
41: * the laziness of the algorithms for minimal-interval
42: * semantics implemented in {@link it.unimi.dsi.mg4j.search}, greatly improving performance for
43: * extremely frequent terms, with no perceivable effect on the score itself.
44: */
45: public class VignaScorer extends AbstractWeightedScorer implements
46: DelegatingScorer {
47:
48: public double score(final Index index) throws IOException {
49: final IntervalIterator it = documentIterator
50: .intervalIterator(index);
51: if (it == IntervalIterators.TRUE
52: || it == IntervalIterators.FALSE)
53: return 0;
54: double score = 0, residual = 1, t;
55: int extent = it.extent(), length;
56: Interval interval;
57: while ((interval = it.nextInterval()) != null) {
58: length = interval.length();
59: t = residual * Math.min((double) extent / length, 1) / 2;
60: residual -= t;
61: score += t;
62: if (score > .99)
63: return 1;
64: }
65: return score;
66: }
67:
68: public String toString() {
69: return "Vigna()";
70: }
71:
72: public synchronized VignaScorer copy() {
73: final VignaScorer scorer = new VignaScorer();
74: scorer.setWeights(index2Weight);
75: return scorer;
76: }
77:
78: /** Returns true.
79: * @return true.
80: */
81: public boolean usesIntervals() {
82: return true;
83: }
84: }
|