001: package org.apache.lucene.search;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.index.IndexReader;
021: import org.apache.lucene.util.ToStringUtils;
022: import org.apache.lucene.search.BooleanClause.Occur;
023:
024: import java.io.IOException;
025: import java.util.*;
026:
027: /** A Query that matches documents matching boolean combinations of other
028: * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
029: * BooleanQuerys.
030: */
031: public class BooleanQuery extends Query {
032:
033: /**
034:
035: */
036: private static int maxClauseCount = 1024;
037:
038: /** Thrown when an attempt is made to add more than {@link
039: * #getMaxClauseCount()} clauses. This typically happens if
040: * a PrefixQuery, FuzzyQuery, WildcardQuery, or RangeQuery
041: * is expanded to many terms during search.
042: */
043: public static class TooManyClauses extends RuntimeException {
044: public TooManyClauses() {
045: }
046:
047: public String getMessage() {
048: return "maxClauseCount is set to " + maxClauseCount;
049: }
050: }
051:
052: /** Return the maximum number of clauses permitted, 1024 by default.
053: * Attempts to add more than the permitted number of clauses cause {@link
054: * TooManyClauses} to be thrown.
055: * @see #setMaxClauseCount(int)
056: */
057: public static int getMaxClauseCount() {
058: return maxClauseCount;
059: }
060:
061: /** Set the maximum number of clauses permitted per BooleanQuery.
062: * Default value is 1024.
063: * <p>TermQuery clauses are generated from for example prefix queries and
064: * fuzzy queries. Each TermQuery needs some buffer space during search,
065: * so this parameter indirectly controls the maximum buffer requirements for
066: * query search.
067: * <p>When this parameter becomes a bottleneck for a Query one can use a
068: * Filter. For example instead of a {@link RangeQuery} one can use a
069: * {@link RangeFilter}.
070: * <p>Normally the buffers are allocated by the JVM. When using for example
071: * {@link org.apache.lucene.store.MMapDirectory} the buffering is left to
072: * the operating system.
073: */
074: public static void setMaxClauseCount(int maxClauseCount) {
075: if (maxClauseCount < 1)
076: throw new IllegalArgumentException(
077: "maxClauseCount must be >= 1");
078: BooleanQuery.maxClauseCount = maxClauseCount;
079: }
080:
081: private ArrayList clauses = new ArrayList();
082: private boolean disableCoord;
083:
084: /** Constructs an empty boolean query. */
085: public BooleanQuery() {
086: }
087:
088: /** Constructs an empty boolean query.
089: *
090: * {@link Similarity#coord(int,int)} may be disabled in scoring, as
091: * appropriate. For example, this score factor does not make sense for most
092: * automatically generated queries, like {@link WildcardQuery} and {@link
093: * FuzzyQuery}.
094: *
095: * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.
096: */
097: public BooleanQuery(boolean disableCoord) {
098: this .disableCoord = disableCoord;
099: }
100:
101: /** Returns true iff {@link Similarity#coord(int,int)} is disabled in
102: * scoring for this query instance.
103: * @see #BooleanQuery(boolean)
104: */
105: public boolean isCoordDisabled() {
106: return disableCoord;
107: }
108:
109: // Implement coord disabling.
110: // Inherit javadoc.
111: public Similarity getSimilarity(Searcher searcher) {
112: Similarity result = super .getSimilarity(searcher);
113: if (disableCoord) { // disable coord as requested
114: result = new SimilarityDelegator(result) {
115: public float coord(int overlap, int maxOverlap) {
116: return 1.0f;
117: }
118: };
119: }
120: return result;
121: }
122:
123: /**
124: * Specifies a minimum number of the optional BooleanClauses
125: * which must be satisfied.
126: *
127: * <p>
128: * By default no optional clauses are necessary for a match
129: * (unless there are no required clauses). If this method is used,
130: * then the specified number of clauses is required.
131: * </p>
132: * <p>
133: * Use of this method is totally independent of specifying that
134: * any specific clauses are required (or prohibited). This number will
135: * only be compared against the number of matching optional clauses.
136: * </p>
137: * <p>
138: * EXPERT NOTE: Using this method may force collecting docs in order,
139: * regardless of whether setAllowDocsOutOfOrder(true) has been called.
140: * </p>
141: *
142: * @param min the number of optional clauses that must match
143: * @see #setAllowDocsOutOfOrder
144: */
145: public void setMinimumNumberShouldMatch(int min) {
146: this .minNrShouldMatch = min;
147: }
148:
149: protected int minNrShouldMatch = 0;
150:
151: /**
152: * Gets the minimum number of the optional BooleanClauses
153: * which must be satisifed.
154: */
155: public int getMinimumNumberShouldMatch() {
156: return minNrShouldMatch;
157: }
158:
159: /** Adds a clause to a boolean query.
160: *
161: * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
162: * @see #getMaxClauseCount()
163: */
164: public void add(Query query, BooleanClause.Occur occur) {
165: add(new BooleanClause(query, occur));
166: }
167:
168: /** Adds a clause to a boolean query.
169: * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
170: * @see #getMaxClauseCount()
171: */
172: public void add(BooleanClause clause) {
173: if (clauses.size() >= maxClauseCount)
174: throw new TooManyClauses();
175:
176: clauses.add(clause);
177: }
178:
179: /** Returns the set of clauses in this query. */
180: public BooleanClause[] getClauses() {
181: return (BooleanClause[]) clauses
182: .toArray(new BooleanClause[clauses.size()]);
183: }
184:
185: /** Returns the list of clauses in this query. */
186: public List clauses() {
187: return clauses;
188: }
189:
190: private class BooleanWeight implements Weight {
191: protected Similarity similarity;
192: protected Vector weights = new Vector();
193:
194: public BooleanWeight(Searcher searcher) throws IOException {
195: this .similarity = getSimilarity(searcher);
196: for (int i = 0; i < clauses.size(); i++) {
197: BooleanClause c = (BooleanClause) clauses.get(i);
198: weights.add(c.getQuery().createWeight(searcher));
199: }
200: }
201:
202: public Query getQuery() {
203: return BooleanQuery.this ;
204: }
205:
206: public float getValue() {
207: return getBoost();
208: }
209:
210: public float sumOfSquaredWeights() throws IOException {
211: float sum = 0.0f;
212: for (int i = 0; i < weights.size(); i++) {
213: BooleanClause c = (BooleanClause) clauses.get(i);
214: Weight w = (Weight) weights.elementAt(i);
215: // call sumOfSquaredWeights for all clauses in case of side effects
216: float s = w.sumOfSquaredWeights(); // sum sub weights
217: if (!c.isProhibited())
218: // only add to sum for non-prohibited clauses
219: sum += s;
220: }
221:
222: sum *= getBoost() * getBoost(); // boost each sub-weight
223:
224: return sum;
225: }
226:
227: public void normalize(float norm) {
228: norm *= getBoost(); // incorporate boost
229: for (int i = 0; i < weights.size(); i++) {
230: Weight w = (Weight) weights.elementAt(i);
231: // normalize all clauses, (even if prohibited in case of side affects)
232: w.normalize(norm);
233: }
234: }
235:
236: /** @return Returns BooleanScorer2 that uses and provides skipTo(),
237: * and scores documents in document number order.
238: */
239: public Scorer scorer(IndexReader reader) throws IOException {
240: BooleanScorer2 result = new BooleanScorer2(similarity,
241: minNrShouldMatch, allowDocsOutOfOrder);
242:
243: for (int i = 0; i < weights.size(); i++) {
244: BooleanClause c = (BooleanClause) clauses.get(i);
245: Weight w = (Weight) weights.elementAt(i);
246: Scorer subScorer = w.scorer(reader);
247: if (subScorer != null)
248: result.add(subScorer, c.isRequired(), c
249: .isProhibited());
250: else if (c.isRequired())
251: return null;
252: }
253:
254: return result;
255: }
256:
257: public Explanation explain(IndexReader reader, int doc)
258: throws IOException {
259: final int minShouldMatch = BooleanQuery.this
260: .getMinimumNumberShouldMatch();
261: ComplexExplanation sumExpl = new ComplexExplanation();
262: sumExpl.setDescription("sum of:");
263: int coord = 0;
264: int maxCoord = 0;
265: float sum = 0.0f;
266: boolean fail = false;
267: int shouldMatchCount = 0;
268: for (int i = 0; i < weights.size(); i++) {
269: BooleanClause c = (BooleanClause) clauses.get(i);
270: Weight w = (Weight) weights.elementAt(i);
271: Explanation e = w.explain(reader, doc);
272: if (!c.isProhibited())
273: maxCoord++;
274: if (e.isMatch()) {
275: if (!c.isProhibited()) {
276: sumExpl.addDetail(e);
277: sum += e.getValue();
278: coord++;
279: } else {
280: Explanation r = new Explanation(0.0f,
281: "match on prohibited clause ("
282: + c.getQuery().toString() + ")");
283: r.addDetail(e);
284: sumExpl.addDetail(r);
285: fail = true;
286: }
287: if (c.getOccur().equals(Occur.SHOULD))
288: shouldMatchCount++;
289: } else if (c.isRequired()) {
290: Explanation r = new Explanation(0.0f,
291: "no match on required clause ("
292: + c.getQuery().toString() + ")");
293: r.addDetail(e);
294: sumExpl.addDetail(r);
295: fail = true;
296: }
297: }
298: if (fail) {
299: sumExpl.setMatch(Boolean.FALSE);
300: sumExpl.setValue(0.0f);
301: sumExpl
302: .setDescription("Failure to meet condition(s) of required/prohibited clause(s)");
303: return sumExpl;
304: } else if (shouldMatchCount < minShouldMatch) {
305: sumExpl.setMatch(Boolean.FALSE);
306: sumExpl.setValue(0.0f);
307: sumExpl
308: .setDescription("Failure to match minimum number "
309: + "of optional clauses: "
310: + minShouldMatch);
311: return sumExpl;
312: }
313:
314: sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
315: sumExpl.setValue(sum);
316:
317: float coordFactor = similarity.coord(coord, maxCoord);
318: if (coordFactor == 1.0f) // coord is no-op
319: return sumExpl; // eliminate wrapper
320: else {
321: ComplexExplanation result = new ComplexExplanation(
322: sumExpl.isMatch(), sum * coordFactor,
323: "product of:");
324: result.addDetail(sumExpl);
325: result.addDetail(new Explanation(coordFactor, "coord("
326: + coord + "/" + maxCoord + ")"));
327: return result;
328: }
329: }
330: }
331:
332: /** Whether hit docs may be collected out of docid order. */
333: private static boolean allowDocsOutOfOrder = false;
334:
335: /**
336: * Expert: Indicates whether hit docs may be collected out of docid
337: * order.
338: *
339: * <p>
340: * Background: although the contract of the Scorer class requires that
341: * documents be iterated in order of doc id, this was not true in early
342: * versions of Lucene. Many pieces of functionality in the current
343: * Lucene code base have undefined behavior if this contract is not
344: * upheld, but in some specific simple cases may be faster. (For
345: * example: disjunction queries with less than 32 prohibited clauses;
346: * This setting has no effect for other queries.)
347: * </p>
348: *
349: * <p>
350: * Specifics: By setting this option to true, calls to
351: * {@link HitCollector#collect(int,float)} might be
352: * invoked first for docid N and only later for docid N-1.
353: * Being static, this setting is system wide.
354: * </p>
355: */
356: public static void setAllowDocsOutOfOrder(boolean allow) {
357: allowDocsOutOfOrder = allow;
358: }
359:
360: /**
361: * Whether hit docs may be collected out of docid order.
362: * @see #setAllowDocsOutOfOrder(boolean)
363: */
364: public static boolean getAllowDocsOutOfOrder() {
365: return allowDocsOutOfOrder;
366: }
367:
368: /**
369: * @deprecated Use {@link #setAllowDocsOutOfOrder(boolean)} instead.
370: */
371: public static void setUseScorer14(boolean use14) {
372: setAllowDocsOutOfOrder(use14);
373: }
374:
375: /**
376: * @deprecated Use {@link #getAllowDocsOutOfOrder()} instead.
377: */
378: public static boolean getUseScorer14() {
379: return getAllowDocsOutOfOrder();
380: }
381:
382: protected Weight createWeight(Searcher searcher) throws IOException {
383: return new BooleanWeight(searcher);
384: }
385:
386: public Query rewrite(IndexReader reader) throws IOException {
387: if (clauses.size() == 1) { // optimize 1-clause queries
388: BooleanClause c = (BooleanClause) clauses.get(0);
389: if (!c.isProhibited()) { // just return clause
390:
391: Query query = c.getQuery().rewrite(reader); // rewrite first
392:
393: if (getBoost() != 1.0f) { // incorporate boost
394: if (query == c.getQuery()) // if rewrite was no-op
395: query = (Query) query.clone(); // then clone before boost
396: query.setBoost(getBoost() * query.getBoost());
397: }
398:
399: return query;
400: }
401: }
402:
403: BooleanQuery clone = null; // recursively rewrite
404: for (int i = 0; i < clauses.size(); i++) {
405: BooleanClause c = (BooleanClause) clauses.get(i);
406: Query query = c.getQuery().rewrite(reader);
407: if (query != c.getQuery()) { // clause rewrote: must clone
408: if (clone == null)
409: clone = (BooleanQuery) this .clone();
410: clone.clauses.set(i, new BooleanClause(query, c
411: .getOccur()));
412: }
413: }
414: if (clone != null) {
415: return clone; // some clauses rewrote
416: } else
417: return this ; // no clauses rewrote
418: }
419:
420: // inherit javadoc
421: public void extractTerms(Set terms) {
422: for (Iterator i = clauses.iterator(); i.hasNext();) {
423: BooleanClause clause = (BooleanClause) i.next();
424: clause.getQuery().extractTerms(terms);
425: }
426: }
427:
428: public Object clone() {
429: BooleanQuery clone = (BooleanQuery) super .clone();
430: clone.clauses = (ArrayList) this .clauses.clone();
431: return clone;
432: }
433:
434: /** Prints a user-readable version of this query. */
435: public String toString(String field) {
436: StringBuffer buffer = new StringBuffer();
437: boolean needParens = (getBoost() != 1.0)
438: || (getMinimumNumberShouldMatch() > 0);
439: if (needParens) {
440: buffer.append("(");
441: }
442:
443: for (int i = 0; i < clauses.size(); i++) {
444: BooleanClause c = (BooleanClause) clauses.get(i);
445: if (c.isProhibited())
446: buffer.append("-");
447: else if (c.isRequired())
448: buffer.append("+");
449:
450: Query subQuery = c.getQuery();
451: if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens
452: buffer.append("(");
453: buffer.append(c.getQuery().toString(field));
454: buffer.append(")");
455: } else
456: buffer.append(c.getQuery().toString(field));
457:
458: if (i != clauses.size() - 1)
459: buffer.append(" ");
460: }
461:
462: if (needParens) {
463: buffer.append(")");
464: }
465:
466: if (getMinimumNumberShouldMatch() > 0) {
467: buffer.append('~');
468: buffer.append(getMinimumNumberShouldMatch());
469: }
470:
471: if (getBoost() != 1.0f) {
472: buffer.append(ToStringUtils.boost(getBoost()));
473: }
474:
475: return buffer.toString();
476: }
477:
478: /** Returns true iff <code>o</code> is equal to this. */
479: public boolean equals(Object o) {
480: if (!(o instanceof BooleanQuery))
481: return false;
482: BooleanQuery other = (BooleanQuery) o;
483: return (this .getBoost() == other.getBoost())
484: && this .clauses.equals(other.clauses)
485: && this .getMinimumNumberShouldMatch() == other
486: .getMinimumNumberShouldMatch();
487: }
488:
489: /** Returns a hash code value for this object.*/
490: public int hashCode() {
491: return Float.floatToIntBits(getBoost()) ^ clauses.hashCode()
492: + getMinimumNumberShouldMatch();
493: }
494:
495: }
|