001: package org.apache.lucene.search;
002:
003: /**
004: * Copyright 2004 The Apache Software Foundation
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: import java.io.IOException;
020: import java.util.ArrayList;
021:
022: /**
023: * The Scorer for DisjunctionMaxQuery's. The union of all documents generated by the the subquery scorers
024: * is generated in document number order. The score for each document is the maximum of the scores computed
025: * by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores
026: * for the other subqueries that generate the document.
027: * @author Chuck Williams
028: */
029: class DisjunctionMaxScorer extends Scorer {
030:
031: /* The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. */
032: private ArrayList subScorers = new ArrayList();
033:
034: /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
035: private float tieBreakerMultiplier;
036:
037: private boolean more = false; // True iff there is a next document
038: private boolean firstTime = true; // True iff next() has not yet been called
039:
040: /** Creates a new instance of DisjunctionMaxScorer
041: * @param tieBreakerMultiplier Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result.
042: * @param similarity -- not used since our definition involves neither coord nor terms directly */
043: public DisjunctionMaxScorer(float tieBreakerMultiplier,
044: Similarity similarity) {
045: super (similarity);
046: this .tieBreakerMultiplier = tieBreakerMultiplier;
047: }
048:
049: /** Add the scorer for a subquery
050: * @param scorer the scorer of a subquery of our associated DisjunctionMaxQuery
051: */
052: public void add(Scorer scorer) throws IOException {
053: if (scorer.next()) { // Initialize and retain only if it produces docs
054: subScorers.add(scorer);
055: more = true;
056: }
057: }
058:
059: /** Generate the next document matching our associated DisjunctionMaxQuery.
060: * @return true iff there is a next document
061: */
062: public boolean next() throws IOException {
063: if (!more)
064: return false;
065: if (firstTime) {
066: heapify();
067: firstTime = false;
068: return true; // more would have been false if no subScorers had any docs
069: }
070: // Increment all generators that generated the last doc and adjust the heap.
071: int lastdoc = ((Scorer) subScorers.get(0)).doc();
072: do {
073: if (((Scorer) subScorers.get(0)).next())
074: heapAdjust(0);
075: else {
076: heapRemoveRoot();
077: if (subScorers.isEmpty())
078: return (more = false);
079: }
080: } while (((Scorer) subScorers.get(0)).doc() == lastdoc);
081: return true;
082: }
083:
084: /** Determine the current document number. Initially invalid, until {@link #next()} is called the first time.
085: * @return the document number of the currently generated document
086: */
087: public int doc() {
088: return ((Scorer) subScorers.get(0)).doc();
089: }
090:
091: /** Determine the current document score. Initially invalid, until {@link #next()} is called the first time.
092: * @return the score of the current generated document
093: */
094: public float score() throws IOException {
095: int doc = ((Scorer) subScorers.get(0)).doc();
096: float[] sum = { ((Scorer) subScorers.get(0)).score() }, max = { sum[0] };
097: int size = subScorers.size();
098: scoreAll(1, size, doc, sum, max);
099: scoreAll(2, size, doc, sum, max);
100: return max[0] + (sum[0] - max[0]) * tieBreakerMultiplier;
101: }
102:
103: // Recursively iterate all subScorers that generated last doc computing sum and max
104: private void scoreAll(int root, int size, int doc, float[] sum,
105: float[] max) throws IOException {
106: if (root < size && ((Scorer) subScorers.get(root)).doc() == doc) {
107: float sub = ((Scorer) subScorers.get(root)).score();
108: sum[0] += sub;
109: max[0] = Math.max(max[0], sub);
110: scoreAll((root << 1) + 1, size, doc, sum, max);
111: scoreAll((root << 1) + 2, size, doc, sum, max);
112: }
113: }
114:
115: /** Advance to the first document beyond the current whose number is greater than or equal to target.
116: * @param target the minimum number of the next desired document
117: * @return true iff there is a document to be generated whose number is at least target
118: */
119: public boolean skipTo(int target) throws IOException {
120: if (firstTime) {
121: if (!more)
122: return false;
123: heapify();
124: firstTime = false;
125: }
126:
127: while (subScorers.size() > 0
128: && ((Scorer) subScorers.get(0)).doc() < target) {
129: if (((Scorer) subScorers.get(0)).skipTo(target))
130: heapAdjust(0);
131: else
132: heapRemoveRoot();
133: }
134: if ((subScorers.size() == 0))
135: return (more = false);
136: return true;
137: }
138:
139: /** Explain a score that we computed. UNSUPPORTED -- see explanation capability in DisjunctionMaxQuery.
140: * @param doc the number of a document we scored
141: * @return the Explanation for our score
142: */
143: public Explanation explain(int doc) throws IOException {
144: throw new UnsupportedOperationException();
145: }
146:
147: // Organize subScorers into a min heap with scorers generating the earlest document on top.
148: private void heapify() {
149: int size = subScorers.size();
150: for (int i = (size >> 1) - 1; i >= 0; i--)
151: heapAdjust(i);
152: }
153:
154: /* The subtree of subScorers at root is a min heap except possibly for its root element.
155: * Bubble the root down as required to make the subtree a heap.
156: */
157: private void heapAdjust(int root) {
158: Scorer scorer = (Scorer) subScorers.get(root);
159: int doc = scorer.doc();
160: int i = root, size = subScorers.size();
161: while (i <= (size >> 1) - 1) {
162: int lchild = (i << 1) + 1;
163: Scorer lscorer = (Scorer) subScorers.get(lchild);
164: int ldoc = lscorer.doc();
165: int rdoc = Integer.MAX_VALUE, rchild = (i << 1) + 2;
166: Scorer rscorer = null;
167: if (rchild < size) {
168: rscorer = (Scorer) subScorers.get(rchild);
169: rdoc = rscorer.doc();
170: }
171: if (ldoc < doc) {
172: if (rdoc < ldoc) {
173: subScorers.set(i, rscorer);
174: subScorers.set(rchild, scorer);
175: i = rchild;
176: } else {
177: subScorers.set(i, lscorer);
178: subScorers.set(lchild, scorer);
179: i = lchild;
180: }
181: } else if (rdoc < doc) {
182: subScorers.set(i, rscorer);
183: subScorers.set(rchild, scorer);
184: i = rchild;
185: } else
186: return;
187: }
188: }
189:
190: // Remove the root Scorer from subScorers and re-establish it as a heap
191: private void heapRemoveRoot() {
192: int size = subScorers.size();
193: if (size == 1)
194: subScorers.remove(0);
195: else {
196: subScorers.set(0, subScorers.get(size - 1));
197: subScorers.remove(size - 1);
198: heapAdjust(0);
199: }
200: }
201:
202: }
|