001: package org.apache.lucene.search;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.util.List;
021: import java.util.Iterator;
022: import java.io.IOException;
023:
024: import org.apache.lucene.util.ScorerDocQueue;
025:
026: /** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
027: * This Scorer implements {@link Scorer#skipTo(int)} and uses skipTo() on the given Scorers.
028: * @todo Implement score(HitCollector, int).
029: */
030: class DisjunctionSumScorer extends Scorer {
031: /** The number of subscorers. */
032: private final int nrScorers;
033:
034: /** The subscorers. */
035: protected final List subScorers;
036:
037: /** The minimum number of scorers that should match. */
038: private final int minimumNrMatchers;
039:
040: /** The scorerDocQueue contains all subscorers ordered by their current doc(),
041: * with the minimum at the top.
042: * <br>The scorerDocQueue is initialized the first time next() or skipTo() is called.
043: * <br>An exhausted scorer is immediately removed from the scorerDocQueue.
044: * <br>If less than the minimumNrMatchers scorers
045: * remain in the scorerDocQueue next() and skipTo() return false.
046: * <p>
047: * After each to call to next() or skipTo()
048: * <code>currentSumScore</code> is the total score of the current matching doc,
049: * <code>nrMatchers</code> is the number of matching scorers,
050: * and all scorers are after the matching doc, or are exhausted.
051: */
052: private ScorerDocQueue scorerDocQueue = null;
053: private int queueSize = -1; // used to avoid size() method calls on scorerDocQueue
054:
055: /** The document number of the current match. */
056: private int currentDoc = -1;
057:
058: /** The number of subscorers that provide the current match. */
059: protected int nrMatchers = -1;
060:
061: private float currentScore = Float.NaN;
062:
063: /** Construct a <code>DisjunctionScorer</code>.
064: * @param subScorers A collection of at least two subscorers.
065: * @param minimumNrMatchers The positive minimum number of subscorers that should
066: * match to match this query.
067: * <br>When <code>minimumNrMatchers</code> is bigger than
068: * the number of <code>subScorers</code>,
069: * no matches will be produced.
070: * <br>When minimumNrMatchers equals the number of subScorers,
071: * it more efficient to use <code>ConjunctionScorer</code>.
072: */
073: public DisjunctionSumScorer(List subScorers, int minimumNrMatchers) {
074: super (null);
075:
076: nrScorers = subScorers.size();
077:
078: if (minimumNrMatchers <= 0) {
079: throw new IllegalArgumentException(
080: "Minimum nr of matchers must be positive");
081: }
082: if (nrScorers <= 1) {
083: throw new IllegalArgumentException(
084: "There must be at least 2 subScorers");
085: }
086:
087: this .minimumNrMatchers = minimumNrMatchers;
088: this .subScorers = subScorers;
089: }
090:
091: /** Construct a <code>DisjunctionScorer</code>, using one as the minimum number
092: * of matching subscorers.
093: */
094: public DisjunctionSumScorer(List subScorers) {
095: this (subScorers, 1);
096: }
097:
098: /** Called the first time next() or skipTo() is called to
099: * initialize <code>scorerDocQueue</code>.
100: */
101: private void initScorerDocQueue() throws IOException {
102: Iterator si = subScorers.iterator();
103: scorerDocQueue = new ScorerDocQueue(nrScorers);
104: queueSize = 0;
105: while (si.hasNext()) {
106: Scorer se = (Scorer) si.next();
107: if (se.next()) { // doc() method will be used in scorerDocQueue.
108: if (scorerDocQueue.insert(se)) {
109: queueSize++;
110: }
111: }
112: }
113: }
114:
115: /** Scores and collects all matching documents.
116: * @param hc The collector to which all matching documents are passed through
117: * {@link HitCollector#collect(int, float)}.
118: * <br>When this method is used the {@link #explain(int)} method should not be used.
119: */
120: public void score(HitCollector hc) throws IOException {
121: while (next()) {
122: hc.collect(currentDoc, currentScore);
123: }
124: }
125:
126: /** Expert: Collects matching documents in a range. Hook for optimization.
127: * Note that {@link #next()} must be called once before this method is called
128: * for the first time.
129: * @param hc The collector to which all matching documents are passed through
130: * {@link HitCollector#collect(int, float)}.
131: * @param max Do not score documents past this.
132: * @return true if more matching documents may remain.
133: */
134: protected boolean score(HitCollector hc, int max)
135: throws IOException {
136: while (currentDoc < max) {
137: hc.collect(currentDoc, currentScore);
138: if (!next()) {
139: return false;
140: }
141: }
142: return true;
143: }
144:
145: public boolean next() throws IOException {
146: if (scorerDocQueue == null) {
147: initScorerDocQueue();
148: }
149: return (scorerDocQueue.size() >= minimumNrMatchers)
150: && advanceAfterCurrent();
151: }
152:
153: /** Advance all subscorers after the current document determined by the
154: * top of the <code>scorerDocQueue</code>.
155: * Repeat until at least the minimum number of subscorers match on the same
156: * document and all subscorers are after that document or are exhausted.
157: * <br>On entry the <code>scorerDocQueue</code> has at least <code>minimumNrMatchers</code>
158: * available. At least the scorer with the minimum document number will be advanced.
159: * @return true iff there is a match.
160: * <br>In case there is a match, </code>currentDoc</code>, </code>currentSumScore</code>,
161: * and </code>nrMatchers</code> describe the match.
162: *
163: * @todo Investigate whether it is possible to use skipTo() when
164: * the minimum number of matchers is bigger than one, ie. try and use the
165: * character of ConjunctionScorer for the minimum number of matchers.
166: * Also delay calling score() on the sub scorers until the minimum number of
167: * matchers is reached.
168: * <br>For this, a Scorer array with minimumNrMatchers elements might
169: * hold Scorers at currentDoc that are temporarily popped from scorerQueue.
170: */
171: protected boolean advanceAfterCurrent() throws IOException {
172: do { // repeat until minimum nr of matchers
173: currentDoc = scorerDocQueue.topDoc();
174: currentScore = scorerDocQueue.topScore();
175: nrMatchers = 1;
176: do { // Until all subscorers are after currentDoc
177: if (!scorerDocQueue.topNextAndAdjustElsePop()) {
178: if (--queueSize == 0) {
179: break; // nothing more to advance, check for last match.
180: }
181: }
182: if (scorerDocQueue.topDoc() != currentDoc) {
183: break; // All remaining subscorers are after currentDoc.
184: }
185: currentScore += scorerDocQueue.topScore();
186: nrMatchers++;
187: } while (true);
188:
189: if (nrMatchers >= minimumNrMatchers) {
190: return true;
191: } else if (queueSize < minimumNrMatchers) {
192: return false;
193: }
194: } while (true);
195: }
196:
197: /** Returns the score of the current document matching the query.
198: * Initially invalid, until {@link #next()} is called the first time.
199: */
200: public float score() throws IOException {
201: return currentScore;
202: }
203:
204: public int doc() {
205: return currentDoc;
206: }
207:
208: /** Returns the number of subscorers matching the current document.
209: * Initially invalid, until {@link #next()} is called the first time.
210: */
211: public int nrMatchers() {
212: return nrMatchers;
213: }
214:
215: /** Skips to the first match beyond the current whose document number is
216: * greater than or equal to a given target.
217: * <br>When this method is used the {@link #explain(int)} method should not be used.
218: * <br>The implementation uses the skipTo() method on the subscorers.
219: * @param target The target document number.
220: * @return true iff there is such a match.
221: */
222: public boolean skipTo(int target) throws IOException {
223: if (scorerDocQueue == null) {
224: initScorerDocQueue();
225: }
226: if (queueSize < minimumNrMatchers) {
227: return false;
228: }
229: if (target <= currentDoc) {
230: return true;
231: }
232: do {
233: if (scorerDocQueue.topDoc() >= target) {
234: return advanceAfterCurrent();
235: } else if (!scorerDocQueue
236: .topSkipToAndAdjustElsePop(target)) {
237: if (--queueSize < minimumNrMatchers) {
238: return false;
239: }
240: }
241: } while (true);
242: }
243:
244: /** @return An explanation for the score of a given document. */
245: public Explanation explain(int doc) throws IOException {
246: Explanation res = new Explanation();
247: Iterator ssi = subScorers.iterator();
248: float sumScore = 0.0f;
249: int nrMatches = 0;
250: while (ssi.hasNext()) {
251: Explanation es = ((Scorer) ssi.next()).explain(doc);
252: if (es.getValue() > 0.0f) { // indicates match
253: sumScore += es.getValue();
254: nrMatches++;
255: }
256: res.addDetail(es);
257: }
258: if (nrMatchers >= minimumNrMatchers) {
259: res.setValue(sumScore);
260: res.setDescription("sum over at least " + minimumNrMatchers
261: + " of " + subScorers.size() + ":");
262: } else {
263: res.setValue(0.0f);
264: res.setDescription(nrMatches + " match(es) but at least "
265: + minimumNrMatchers + " of " + subScorers.size()
266: + " needed");
267: }
268: return res;
269: }
270: }
|