001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.search;
017:
018: /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
019: /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
020:
021: import org.apache.lucene.search.*;
022:
023: import java.util.LinkedHashMap;
024: import java.util.List;
025: import java.util.Map;
026: import java.io.IOException;
027:
028: /** Utility which converts certain query clauses into {@link QueryFilter}s and
029: * caches these. Only required {@link TermQuery}s whose boost is zero and
030: * whose term occurs in at least a certain fraction of documents are converted
031: * to cached filters. This accellerates query constraints like language,
032: * document format, etc., which do not affect ranking but might otherwise slow
033: * search considerably. */
034: // Taken from Nutch and modified - YCS
035: class LuceneQueryOptimizer {
036: private LinkedHashMap cache; // an LRU cache of QueryFilter
037:
038: private float threshold;
039:
040: /** Construct an optimizer that caches and uses filters for required {@link
041: * TermQuery}s whose boost is zero.
042: * @param cacheSize the number of QueryFilters to cache
043: * @param threshold the fraction of documents which must contain term
044: */
045: public LuceneQueryOptimizer(final int cacheSize, float threshold) {
046: this .cache = new LinkedHashMap(cacheSize, 0.75f, true) {
047: protected boolean removeEldestEntry(Map.Entry eldest) {
048: return size() > cacheSize; // limit size of cache
049: }
050: };
051: this .threshold = threshold;
052: }
053:
054: public TopDocs optimize(BooleanQuery original, Searcher searcher,
055: int numHits, Query[] queryOut, Filter[] filterOut)
056: throws IOException {
057:
058: BooleanQuery query = new BooleanQuery();
059: BooleanQuery filterQuery = null;
060:
061: for (BooleanClause c : (List<BooleanClause>) original.clauses()) {
062:
063: /***
064: System.out.println("required="+c.required);
065: System.out.println("boost="+c.query.getBoost());
066: System.out.println("isTermQuery="+(c.query instanceof TermQuery));
067: if (c.query instanceof TermQuery) {
068: System.out.println("term="+((TermQuery)c.query).getTerm());
069: System.out.println("docFreq="+searcher.docFreq(((TermQuery)c.query).getTerm()));
070: }
071: ***/
072: Query q = c.getQuery();
073: if (c.isRequired() // required
074: && q.getBoost() == 0.0f // boost is zero
075: && q instanceof TermQuery // TermQuery
076: && (searcher.docFreq(((TermQuery) q).getTerm()) / (float) searcher
077: .maxDoc()) >= threshold) { // check threshold
078: if (filterQuery == null)
079: filterQuery = new BooleanQuery();
080: filterQuery.add(q, BooleanClause.Occur.MUST); // filter it
081: //System.out.println("WooHoo... qualified to be hoisted to a filter!");
082: } else {
083: query.add(c); // query it
084: }
085: }
086:
087: Filter filter = null;
088: if (filterQuery != null) {
089: synchronized (cache) { // check cache
090: filter = (Filter) cache.get(filterQuery);
091: }
092: if (filter == null) { // miss
093: filter = new QueryFilter(filterQuery); // construct new entry
094: synchronized (cache) {
095: cache.put(filterQuery, filter); // cache it
096: }
097: }
098: }
099:
100: // YCS: added code to pass out optimized query and filter
101: // so they can be used with Hits
102: if (queryOut != null && filterOut != null) {
103: queryOut[0] = query;
104: filterOut[0] = filter;
105: return null;
106: } else {
107: return searcher.search(query, filter, numHits);
108: }
109:
110: }
111: }
|