001: package org.apache.lucene.search;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021:
022: import org.apache.lucene.index.TermDocs;
023:
024: /** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
025: */
026: final class TermScorer extends Scorer {
027: private Weight weight;
028: private TermDocs termDocs;
029: private byte[] norms;
030: private float weightValue;
031: private int doc;
032:
033: private final int[] docs = new int[32]; // buffered doc numbers
034: private final int[] freqs = new int[32]; // buffered term freqs
035: private int pointer;
036: private int pointerMax;
037:
038: private static final int SCORE_CACHE_SIZE = 32;
039: private float[] scoreCache = new float[SCORE_CACHE_SIZE];
040:
041: /** Construct a <code>TermScorer</code>.
042: * @param weight The weight of the <code>Term</code> in the query.
043: * @param td An iterator over the documents matching the <code>Term</code>.
044: * @param similarity The </code>Similarity</code> implementation to be used for score computations.
045: * @param norms The field norms of the document fields for the <code>Term</code>.
046: */
047: TermScorer(Weight weight, TermDocs td, Similarity similarity,
048: byte[] norms) {
049: super (similarity);
050: this .weight = weight;
051: this .termDocs = td;
052: this .norms = norms;
053: this .weightValue = weight.getValue();
054:
055: for (int i = 0; i < SCORE_CACHE_SIZE; i++)
056: scoreCache[i] = getSimilarity().tf(i) * weightValue;
057: }
058:
059: public void score(HitCollector hc) throws IOException {
060: next();
061: score(hc, Integer.MAX_VALUE);
062: }
063:
064: protected boolean score(HitCollector c, int end) throws IOException {
065: Similarity similarity = getSimilarity(); // cache sim in local
066: float[] normDecoder = Similarity.getNormDecoder();
067: while (doc < end) { // for docs in window
068: int f = freqs[pointer];
069: float score = // compute tf(f)*weight
070: f < SCORE_CACHE_SIZE // check cache
071: ? scoreCache[f] // cache hit
072: : similarity.tf(f) * weightValue; // cache miss
073:
074: score *= normDecoder[norms[doc] & 0xFF]; // normalize for field
075:
076: c.collect(doc, score); // collect score
077:
078: if (++pointer >= pointerMax) {
079: pointerMax = termDocs.read(docs, freqs); // refill buffers
080: if (pointerMax != 0) {
081: pointer = 0;
082: } else {
083: termDocs.close(); // close stream
084: doc = Integer.MAX_VALUE; // set to sentinel value
085: return false;
086: }
087: }
088: doc = docs[pointer];
089: }
090: return true;
091: }
092:
093: /** Returns the current document number matching the query.
094: * Initially invalid, until {@link #next()} is called the first time.
095: */
096: public int doc() {
097: return doc;
098: }
099:
100: /** Advances to the next document matching the query.
101: * <br>The iterator over the matching documents is buffered using
102: * {@link TermDocs#read(int[],int[])}.
103: * @return true iff there is another document matching the query.
104: */
105: public boolean next() throws IOException {
106: pointer++;
107: if (pointer >= pointerMax) {
108: pointerMax = termDocs.read(docs, freqs); // refill buffer
109: if (pointerMax != 0) {
110: pointer = 0;
111: } else {
112: termDocs.close(); // close stream
113: doc = Integer.MAX_VALUE; // set to sentinel value
114: return false;
115: }
116: }
117: doc = docs[pointer];
118: return true;
119: }
120:
121: public float score() {
122: int f = freqs[pointer];
123: float raw = // compute tf(f)*weight
124: f < SCORE_CACHE_SIZE // check cache
125: ? scoreCache[f] // cache hit
126: : getSimilarity().tf(f) * weightValue; // cache miss
127:
128: return raw * Similarity.decodeNorm(norms[doc]); // normalize for field
129: }
130:
131: /** Skips to the first match beyond the current whose document number is
132: * greater than or equal to a given target.
133: * <br>The implementation uses {@link TermDocs#skipTo(int)}.
134: * @param target The target document number.
135: * @return true iff there is such a match.
136: */
137: public boolean skipTo(int target) throws IOException {
138: // first scan in cache
139: for (pointer++; pointer < pointerMax; pointer++) {
140: if (docs[pointer] >= target) {
141: doc = docs[pointer];
142: return true;
143: }
144: }
145:
146: // not found in cache, seek underlying stream
147: boolean result = termDocs.skipTo(target);
148: if (result) {
149: pointerMax = 1;
150: pointer = 0;
151: docs[pointer] = doc = termDocs.doc();
152: freqs[pointer] = termDocs.freq();
153: } else {
154: doc = Integer.MAX_VALUE;
155: }
156: return result;
157: }
158:
159: /** Returns an explanation of the score for a document.
160: * <br>When this method is used, the {@link #next()} method
161: * and the {@link #score(HitCollector)} method should not be used.
162: * @param doc The document number for the explanation.
163: */
164: public Explanation explain(int doc) throws IOException {
165: TermQuery query = (TermQuery) weight.getQuery();
166: Explanation tfExplanation = new Explanation();
167: int tf = 0;
168: while (pointer < pointerMax) {
169: if (docs[pointer] == doc)
170: tf = freqs[pointer];
171: pointer++;
172: }
173: if (tf == 0) {
174: if (termDocs.skipTo(doc)) {
175: if (termDocs.doc() == doc) {
176: tf = termDocs.freq();
177: }
178: }
179: }
180: termDocs.close();
181: tfExplanation.setValue(getSimilarity().tf(tf));
182: tfExplanation.setDescription("tf(termFreq(" + query.getTerm()
183: + ")=" + tf + ")");
184:
185: return tfExplanation;
186: }
187:
188: /** Returns a string representation of this <code>TermScorer</code>. */
189: public String toString() {
190: return "scorer(" + weight + ")";
191: }
192: }
|