001: package org.apache.lucene.search;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021: import java.util.ArrayList;
022: import java.util.List;
023: import java.util.Iterator;
024:
025: /** An alternative to BooleanScorer that also allows a minimum number
026: * of optional scorers that should match.
027: * <br>Implements skipTo(), and has no limitations on the numbers of added scorers.
028: * <br>Uses ConjunctionScorer, DisjunctionScorer, ReqOptScorer and ReqExclScorer.
029: */
030: class BooleanScorer2 extends Scorer {
031: private ArrayList requiredScorers = new ArrayList();
032: private ArrayList optionalScorers = new ArrayList();
033: private ArrayList prohibitedScorers = new ArrayList();
034:
035: private class Coordinator {
036: int maxCoord = 0; // to be increased for each non prohibited scorer
037:
038: private float[] coordFactors = null;
039:
040: void init() { // use after all scorers have been added.
041: coordFactors = new float[maxCoord + 1];
042: Similarity sim = getSimilarity();
043: for (int i = 0; i <= maxCoord; i++) {
044: coordFactors[i] = sim.coord(i, maxCoord);
045: }
046: }
047:
048: int nrMatchers; // to be increased by score() of match counting scorers.
049:
050: void initDoc() {
051: nrMatchers = 0;
052: }
053:
054: float coordFactor() {
055: return coordFactors[nrMatchers];
056: }
057: }
058:
059: private final Coordinator coordinator;
060:
061: /** The scorer to which all scoring will be delegated,
062: * except for computing and using the coordination factor.
063: */
064: private Scorer countingSumScorer = null;
065:
066: /** The number of optionalScorers that need to match (if there are any) */
067: private final int minNrShouldMatch;
068:
069: /** Whether it is allowed to return documents out of order.
070: * This can accelerate the scoring of disjunction queries.
071: */
072: private boolean allowDocsOutOfOrder;
073:
074: /** Create a BooleanScorer2.
075: * @param similarity The similarity to be used.
076: * @param minNrShouldMatch The minimum number of optional added scorers
077: * that should match during the search.
078: * In case no required scorers are added,
079: * at least one of the optional scorers will have to
080: * match during the search.
081: * @param allowDocsOutOfOrder Whether it is allowed to return documents out of order.
082: * This can accelerate the scoring of disjunction queries.
083: */
084: public BooleanScorer2(Similarity similarity, int minNrShouldMatch,
085: boolean allowDocsOutOfOrder) {
086: super (similarity);
087: if (minNrShouldMatch < 0) {
088: throw new IllegalArgumentException(
089: "Minimum number of optional scorers should not be negative");
090: }
091: coordinator = new Coordinator();
092: this .minNrShouldMatch = minNrShouldMatch;
093: this .allowDocsOutOfOrder = allowDocsOutOfOrder;
094: }
095:
096: /** Create a BooleanScorer2.
097: * In no required scorers are added,
098: * at least one of the optional scorers will have to match during the search.
099: * @param similarity The similarity to be used.
100: * @param minNrShouldMatch The minimum number of optional added scorers
101: * that should match during the search.
102: * In case no required scorers are added,
103: * at least one of the optional scorers will have to
104: * match during the search.
105: */
106: public BooleanScorer2(Similarity similarity, int minNrShouldMatch) {
107: this (similarity, minNrShouldMatch, false);
108: }
109:
110: /** Create a BooleanScorer2.
111: * In no required scorers are added,
112: * at least one of the optional scorers will have to match during the search.
113: * @param similarity The similarity to be used.
114: */
115: public BooleanScorer2(Similarity similarity) {
116: this (similarity, 0, false);
117: }
118:
119: public void add(final Scorer scorer, boolean required,
120: boolean prohibited) {
121: if (!prohibited) {
122: coordinator.maxCoord++;
123: }
124:
125: if (required) {
126: if (prohibited) {
127: throw new IllegalArgumentException(
128: "scorer cannot be required and prohibited");
129: }
130: requiredScorers.add(scorer);
131: } else if (prohibited) {
132: prohibitedScorers.add(scorer);
133: } else {
134: optionalScorers.add(scorer);
135: }
136: }
137:
138: /** Initialize the match counting scorer that sums all the
139: * scores. <p>
140: * When "counting" is used in a name it means counting the number
141: * of matching scorers.<br>
142: * When "sum" is used in a name it means score value summing
143: * over the matching scorers
144: */
145: private void initCountingSumScorer() throws IOException {
146: coordinator.init();
147: countingSumScorer = makeCountingSumScorer();
148: }
149:
150: /** Count a scorer as a single match. */
151: private class SingleMatchScorer extends Scorer {
152: private Scorer scorer;
153: private int lastScoredDoc = -1;
154:
155: SingleMatchScorer(Scorer scorer) {
156: super (scorer.getSimilarity());
157: this .scorer = scorer;
158: }
159:
160: public float score() throws IOException {
161: if (this .doc() >= lastScoredDoc) {
162: lastScoredDoc = this .doc();
163: coordinator.nrMatchers++;
164: }
165: return scorer.score();
166: }
167:
168: public int doc() {
169: return scorer.doc();
170: }
171:
172: public boolean next() throws IOException {
173: return scorer.next();
174: }
175:
176: public boolean skipTo(int docNr) throws IOException {
177: return scorer.skipTo(docNr);
178: }
179:
180: public Explanation explain(int docNr) throws IOException {
181: return scorer.explain(docNr);
182: }
183: }
184:
185: private Scorer countingDisjunctionSumScorer(final List scorers,
186: int minNrShouldMatch)
187: // each scorer from the list counted as a single matcher
188: {
189: return new DisjunctionSumScorer(scorers, minNrShouldMatch) {
190: private int lastScoredDoc = -1;
191:
192: public float score() throws IOException {
193: if (this .doc() >= lastScoredDoc) {
194: lastScoredDoc = this .doc();
195: coordinator.nrMatchers += super .nrMatchers;
196: }
197: return super .score();
198: }
199: };
200: }
201:
202: private static Similarity defaultSimilarity = new DefaultSimilarity();
203:
204: private Scorer countingConjunctionSumScorer(List requiredScorers)
205: throws IOException {
206: // each scorer from the list counted as a single matcher
207: final int requiredNrMatchers = requiredScorers.size();
208: return new ConjunctionScorer(defaultSimilarity, requiredScorers) {
209: private int lastScoredDoc = -1;
210:
211: public float score() throws IOException {
212: if (this .doc() >= lastScoredDoc) {
213: lastScoredDoc = this .doc();
214: coordinator.nrMatchers += requiredNrMatchers;
215: }
216: // All scorers match, so defaultSimilarity super.score() always has 1 as
217: // the coordination factor.
218: // Therefore the sum of the scores of the requiredScorers
219: // is used as score.
220: return super .score();
221: }
222: };
223: }
224:
225: private Scorer dualConjunctionSumScorer(Scorer req1, Scorer req2)
226: throws IOException { // non counting.
227: return new ConjunctionScorer(defaultSimilarity, new Scorer[] {
228: req1, req2 });
229: // All scorers match, so defaultSimilarity always has 1 as
230: // the coordination factor.
231: // Therefore the sum of the scores of two scorers
232: // is used as score.
233: }
234:
235: /** Returns the scorer to be used for match counting and score summing.
236: * Uses requiredScorers, optionalScorers and prohibitedScorers.
237: */
238: private Scorer makeCountingSumScorer() throws IOException { // each scorer counted as a single matcher
239: return (requiredScorers.size() == 0) ? makeCountingSumScorerNoReq()
240: : makeCountingSumScorerSomeReq();
241: }
242:
243: private Scorer makeCountingSumScorerNoReq() throws IOException { // No required scorers
244: if (optionalScorers.size() == 0) {
245: return new NonMatchingScorer(); // no clauses or only prohibited clauses
246: } else { // No required scorers. At least one optional scorer.
247: // minNrShouldMatch optional scorers are required, but at least 1
248: int nrOptRequired = (minNrShouldMatch < 1) ? 1
249: : minNrShouldMatch;
250: if (optionalScorers.size() < nrOptRequired) {
251: return new NonMatchingScorer(); // fewer optional clauses than minimum (at least 1) that should match
252: } else { // optionalScorers.size() >= nrOptRequired, no required scorers
253: Scorer requiredCountingSumScorer = (optionalScorers
254: .size() > nrOptRequired) ? countingDisjunctionSumScorer(
255: optionalScorers, nrOptRequired)
256: : // optionalScorers.size() == nrOptRequired (all optional scorers are required), no required scorers
257: (optionalScorers.size() == 1) ? new SingleMatchScorer(
258: (Scorer) optionalScorers.get(0))
259: : countingConjunctionSumScorer(optionalScorers);
260: return addProhibitedScorers(requiredCountingSumScorer);
261: }
262: }
263: }
264:
265: private Scorer makeCountingSumScorerSomeReq() throws IOException { // At least one required scorer.
266: if (optionalScorers.size() < minNrShouldMatch) {
267: return new NonMatchingScorer(); // fewer optional clauses than minimum that should match
268: } else if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required.
269: ArrayList allReq = new ArrayList(requiredScorers);
270: allReq.addAll(optionalScorers);
271: return addProhibitedScorers(countingConjunctionSumScorer(allReq));
272: } else { // optionalScorers.size() > minNrShouldMatch, and at least one required scorer
273: Scorer requiredCountingSumScorer = (requiredScorers.size() == 1) ? new SingleMatchScorer(
274: (Scorer) requiredScorers.get(0))
275: : countingConjunctionSumScorer(requiredScorers);
276: if (minNrShouldMatch > 0) { // use a required disjunction scorer over the optional scorers
277: return addProhibitedScorers(dualConjunctionSumScorer(
278: // non counting
279: requiredCountingSumScorer,
280: countingDisjunctionSumScorer(optionalScorers,
281: minNrShouldMatch)));
282: } else { // minNrShouldMatch == 0
283: return new ReqOptSumScorer(
284: addProhibitedScorers(requiredCountingSumScorer),
285: ((optionalScorers.size() == 1) ? new SingleMatchScorer(
286: (Scorer) optionalScorers.get(0))
287: : countingDisjunctionSumScorer(
288: optionalScorers, 1))); // require 1 in combined, optional scorer.
289: }
290: }
291: }
292:
293: /** Returns the scorer to be used for match counting and score summing.
294: * Uses the given required scorer and the prohibitedScorers.
295: * @param requiredCountingSumScorer A required scorer already built.
296: */
297: private Scorer addProhibitedScorers(Scorer requiredCountingSumScorer) {
298: return (prohibitedScorers.size() == 0) ? requiredCountingSumScorer // no prohibited
299: : new ReqExclScorer(
300: requiredCountingSumScorer,
301: ((prohibitedScorers.size() == 1) ? (Scorer) prohibitedScorers
302: .get(0)
303: : new DisjunctionSumScorer(
304: prohibitedScorers)));
305: }
306:
307: /** Scores and collects all matching documents.
308: * @param hc The collector to which all matching documents are passed through
309: * {@link HitCollector#collect(int, float)}.
310: * <br>When this method is used the {@link #explain(int)} method should not be used.
311: */
312: public void score(HitCollector hc) throws IOException {
313: if (allowDocsOutOfOrder && requiredScorers.size() == 0
314: && prohibitedScorers.size() < 32) {
315: // fall back to BooleanScorer, scores documents somewhat out of order
316: BooleanScorer bs = new BooleanScorer(getSimilarity(),
317: minNrShouldMatch);
318: Iterator si = optionalScorers.iterator();
319: while (si.hasNext()) {
320: bs
321: .add((Scorer) si.next(), false /* required */,
322: false /* prohibited */);
323: }
324: si = prohibitedScorers.iterator();
325: while (si.hasNext()) {
326: bs
327: .add((Scorer) si.next(), false /* required */,
328: true /* prohibited */);
329: }
330: bs.score(hc);
331: } else {
332: if (countingSumScorer == null) {
333: initCountingSumScorer();
334: }
335: while (countingSumScorer.next()) {
336: hc.collect(countingSumScorer.doc(), score());
337: }
338: }
339: }
340:
341: /** Expert: Collects matching documents in a range.
342: * <br>Note that {@link #next()} must be called once before this method is
343: * called for the first time.
344: * @param hc The collector to which all matching documents are passed through
345: * {@link HitCollector#collect(int, float)}.
346: * @param max Do not score documents past this.
347: * @return true if more matching documents may remain.
348: */
349: protected boolean score(HitCollector hc, int max)
350: throws IOException {
351: // null pointer exception when next() was not called before:
352: int docNr = countingSumScorer.doc();
353: while (docNr < max) {
354: hc.collect(docNr, score());
355: if (!countingSumScorer.next()) {
356: return false;
357: }
358: docNr = countingSumScorer.doc();
359: }
360: return true;
361: }
362:
363: public int doc() {
364: return countingSumScorer.doc();
365: }
366:
367: public boolean next() throws IOException {
368: if (countingSumScorer == null) {
369: initCountingSumScorer();
370: }
371: return countingSumScorer.next();
372: }
373:
374: public float score() throws IOException {
375: coordinator.initDoc();
376: float sum = countingSumScorer.score();
377: return sum * coordinator.coordFactor();
378: }
379:
380: /** Skips to the first match beyond the current whose document number is
381: * greater than or equal to a given target.
382: *
383: * <p>When this method is used the {@link #explain(int)} method should not be used.
384: *
385: * @param target The target document number.
386: * @return true iff there is such a match.
387: */
388: public boolean skipTo(int target) throws IOException {
389: if (countingSumScorer == null) {
390: initCountingSumScorer();
391: }
392: return countingSumScorer.skipTo(target);
393: }
394:
395: /** Throws an UnsupportedOperationException.
396: * TODO: Implement an explanation of the coordination factor.
397: * @param doc The document number for the explanation.
398: * @throws UnsupportedOperationException
399: */
400: public Explanation explain(int doc) {
401: throw new UnsupportedOperationException();
402: /* How to explain the coordination factor?
403: initCountingSumScorer();
404: return countingSumScorer.explain(doc); // misses coord factor.
405: */
406: }
407: }
|