0001: /**
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: */package org.apache.solr.search;
0017:
0018: import org.apache.lucene.document.*;
0019: import org.apache.lucene.index.IndexReader;
0020: import org.apache.lucene.index.Term;
0021: import org.apache.lucene.index.TermDocs;
0022: import org.apache.lucene.index.CorruptIndexException;
0023: import org.apache.lucene.search.*;
0024: import org.apache.lucene.store.Directory;
0025: import org.apache.lucene.util.PriorityQueue;
0026: import org.apache.solr.core.SolrConfig;
0027: import org.apache.solr.core.SolrCore;
0028: import org.apache.solr.core.SolrInfoMBean;
0029: import org.apache.solr.core.SolrInfoRegistry;
0030: import org.apache.solr.schema.IndexSchema;
0031: import org.apache.solr.schema.SchemaField;
0032: import org.apache.solr.util.NamedList;
0033: import org.apache.solr.util.OpenBitSet;
0034: import org.apache.solr.util.SimpleOrderedMap;
0035:
0036: import java.io.IOException;
0037: import java.net.URL;
0038: import java.util.*;
0039: import java.util.logging.Level;
0040: import java.util.logging.Logger;
0041:
0042: /**
0043: * SolrIndexSearcher adds schema awareness and caching functionality
0044: * over the lucene IndexSearcher.
0045: *
0046: * @author yonik
0047: * @version $Id: SolrIndexSearcher.java 541944 2007-05-26 22:29:12Z hossman $
0048: * @since solr 0.9
0049: */
0050:
0051: // Since the internal reader in IndexSearcher is
0052: // package protected, I can't get to it by inheritance.
0053: // For now, I am using delgation and creating the
0054: // IndexReader to pass to the searcher myself.
0055: // NOTE: as of Lucene 1.9, this has changed!
0056: public class SolrIndexSearcher extends Searcher implements
0057: SolrInfoMBean {
0058: private static Logger log = Logger
0059: .getLogger(SolrIndexSearcher.class.getName());
0060:
0061: private final IndexSchema schema;
0062:
0063: private final String name;
0064: private long openTime = System.currentTimeMillis();
0065: private long registerTime = 0;
0066: private final IndexSearcher searcher;
0067: private final IndexReader reader;
0068: private final boolean closeReader;
0069:
0070: private final boolean cachingEnabled;
0071: private final SolrCache filterCache;
0072: private final SolrCache queryResultCache;
0073: private final SolrCache documentCache;
0074:
0075: // map of generic caches - not synchronized since it's read-only after the constructor.
0076: private final HashMap<String, SolrCache> cacheMap;
0077: private static final HashMap<String, SolrCache> noGenericCaches = new HashMap<String, SolrCache>(
0078: 0);
0079:
0080: // list of all caches associated with this searcher.
0081: private final SolrCache[] cacheList;
0082: private static final SolrCache[] noCaches = new SolrCache[0];
0083:
0084: /** Creates a searcher searching the index in the named directory. */
0085: public SolrIndexSearcher(IndexSchema schema, String name,
0086: String path, boolean enableCache) throws IOException {
0087: this (schema, name, IndexReader.open(path), true, enableCache);
0088: }
0089:
0090: /** Creates a searcher searching the index in the provided directory. */
0091: public SolrIndexSearcher(IndexSchema schema, String name,
0092: Directory directory, boolean enableCache)
0093: throws IOException {
0094: this (schema, name, IndexReader.open(directory), true,
0095: enableCache);
0096: }
0097:
0098: /** Creates a searcher searching the provided index. */
0099: public SolrIndexSearcher(IndexSchema schema, String name,
0100: IndexReader r, boolean enableCache) {
0101: this (schema, name, r, false, enableCache);
0102: }
0103:
0104: private SolrIndexSearcher(IndexSchema schema, String name,
0105: IndexReader r, boolean closeReader, boolean enableCache) {
0106: this .schema = schema;
0107: this .name = "Searcher@" + Integer.toHexString(hashCode())
0108: + (name != null ? " " + name : "");
0109:
0110: log.info("Opening " + this .name);
0111:
0112: reader = r;
0113: searcher = new IndexSearcher(r);
0114: this .closeReader = closeReader;
0115: searcher.setSimilarity(schema.getSimilarity());
0116:
0117: cachingEnabled = enableCache;
0118: if (cachingEnabled) {
0119: ArrayList<SolrCache> clist = new ArrayList<SolrCache>();
0120: filterCache = filterCacheConfig == null ? null
0121: : filterCacheConfig.newInstance();
0122: if (filterCache != null)
0123: clist.add(filterCache);
0124: queryResultCache = queryResultCacheConfig == null ? null
0125: : queryResultCacheConfig.newInstance();
0126: if (queryResultCache != null)
0127: clist.add(queryResultCache);
0128: documentCache = documentCacheConfig == null ? null
0129: : documentCacheConfig.newInstance();
0130: if (documentCache != null)
0131: clist.add(documentCache);
0132:
0133: if (userCacheConfigs == null) {
0134: cacheMap = noGenericCaches;
0135: } else {
0136: cacheMap = new HashMap<String, SolrCache>(
0137: userCacheConfigs.length);
0138: for (CacheConfig userCacheConfig : userCacheConfigs) {
0139: SolrCache cache = null;
0140: if (userCacheConfig != null)
0141: cache = userCacheConfig.newInstance();
0142: if (cache != null) {
0143: cacheMap.put(cache.name(), cache);
0144: clist.add(cache);
0145: }
0146: }
0147: }
0148:
0149: cacheList = clist.toArray(new SolrCache[clist.size()]);
0150: } else {
0151: filterCache = null;
0152: queryResultCache = null;
0153: documentCache = null;
0154: cacheMap = noGenericCaches;
0155: cacheList = noCaches;
0156: }
0157:
0158: // register self
0159: SolrInfoRegistry.getRegistry().put(this .name, this );
0160: }
0161:
0162: public String toString() {
0163: return name;
0164: }
0165:
0166: /** Register sub-objects such as caches
0167: */
0168: public void register() {
0169: for (SolrCache cache : cacheList) {
0170: cache.setState(SolrCache.State.LIVE);
0171: SolrInfoRegistry.getRegistry().put(cache.name(), cache);
0172: }
0173: registerTime = System.currentTimeMillis();
0174: }
0175:
0176: /**
0177: * Free's resources associated with this searcher.
0178: *
0179: * In particular, the underlying reader and any cache's in use are closed.
0180: */
0181: public void close() throws IOException {
0182: // unregister first, so no management actions are tried on a closing searcher.
0183: SolrInfoRegistry.getRegistry().remove(name);
0184:
0185: if (cachingEnabled) {
0186: StringBuilder sb = new StringBuilder();
0187: sb.append("Closing ").append(name);
0188: for (SolrCache cache : cacheList) {
0189: sb.append("\n\t");
0190: sb.append(cache);
0191: }
0192: log.info(sb.toString());
0193: } else {
0194: log.fine("Closing " + name);
0195: }
0196: try {
0197: searcher.close();
0198: } finally {
0199: if (closeReader)
0200: reader.close();
0201: for (SolrCache cache : cacheList) {
0202: cache.close();
0203: }
0204: }
0205: }
0206:
0207: /** Direct access to the IndexReader used by this searcher */
0208: public IndexReader getReader() {
0209: return reader;
0210: }
0211:
0212: /** Direct access to the IndexSchema for use with this searcher */
0213: public IndexSchema getSchema() {
0214: return schema;
0215: }
0216:
0217: // params for the "nutch" query optimizer
0218: private static boolean filtOptEnabled = SolrConfig.config.getBool(
0219: "query/boolTofilterOptimizer/@enabled", false);
0220: private static int filtOptCacheSize = SolrConfig.config.getInt(
0221: "query/boolTofilterOptimizer/@cacheSize", 32);
0222: private static float filtOptThreshold = SolrConfig.config.getFloat(
0223: "query/boolTofilterOptimizer/@threshold", .05f);
0224: private LuceneQueryOptimizer optimizer = filtOptEnabled ? new LuceneQueryOptimizer(
0225: filtOptCacheSize, filtOptThreshold)
0226: : null;
0227:
0228: private static final CacheConfig filterCacheConfig = CacheConfig
0229: .getConfig("query/filterCache");
0230: private static final CacheConfig queryResultCacheConfig = CacheConfig
0231: .getConfig("query/queryResultCache");
0232: private static final CacheConfig documentCacheConfig = CacheConfig
0233: .getConfig("query/documentCache");
0234: private static final CacheConfig[] userCacheConfigs = CacheConfig
0235: .getMultipleConfigs("query/cache");
0236:
0237: //
0238: // Set default regenerators on filter and query caches if they don't have any
0239: //
0240: static {
0241: if (filterCacheConfig != null
0242: && filterCacheConfig.getRegenerator() == null) {
0243: filterCacheConfig.setRegenerator(new CacheRegenerator() {
0244: public boolean regenerateItem(
0245: SolrIndexSearcher newSearcher,
0246: SolrCache newCache, SolrCache oldCache,
0247: Object oldKey, Object oldVal)
0248: throws IOException {
0249: newSearcher
0250: .cacheDocSet((Query) oldKey, null, false);
0251: return true;
0252: }
0253: });
0254: }
0255:
0256: if (queryResultCacheConfig != null
0257: && queryResultCacheConfig.getRegenerator() == null) {
0258: queryResultCacheConfig
0259: .setRegenerator(new CacheRegenerator() {
0260: public boolean regenerateItem(
0261: SolrIndexSearcher newSearcher,
0262: SolrCache newCache, SolrCache oldCache,
0263: Object oldKey, Object oldVal)
0264: throws IOException {
0265: QueryResultKey key = (QueryResultKey) oldKey;
0266: int nDocs = 1;
0267: // request 1 doc and let caching round up to the next window size...
0268: // unless the window size is <=1, in which case we will pick
0269: // the minimum of the number of documents requested last time and
0270: // a reasonable number such as 40.
0271: // TODO: make more configurable later...
0272:
0273: if (queryResultWindowSize <= 1) {
0274: DocList oldList = (DocList) oldVal;
0275: int oldnDocs = oldList.offset()
0276: + oldList.size();
0277: // 40 has factors of 2,4,5,10,20
0278: nDocs = Math.min(oldnDocs, 40);
0279: }
0280:
0281: DocListAndSet ret = new DocListAndSet();
0282: int flags = NO_CHECK_QCACHE | key.nc_flags;
0283:
0284: newSearcher.getDocListC(ret, key.query,
0285: key.filters, null, key.sort, 0,
0286: nDocs, flags);
0287: return true;
0288: }
0289: });
0290: }
0291: }
0292:
0293: private static boolean useFilterForSortedQuery = SolrConfig.config
0294: .getBool("query/useFilterForSortedQuery", false);
0295: private static int queryResultWindowSize = SolrConfig.config
0296: .getInt("query/queryResultWindowSize", 1);
0297:
0298: public Hits search(Query query, Filter filter, Sort sort)
0299: throws IOException {
0300: // todo - when Solr starts accepting filters, need to
0301: // change this conditional check (filter!=null) and create a new filter
0302: // that ANDs them together if it already exists.
0303:
0304: if (optimizer == null || filter != null
0305: || !(query instanceof BooleanQuery)) {
0306: return searcher.search(query, filter, sort);
0307: } else {
0308: Query[] newQuery = new Query[1];
0309: Filter[] newFilter = new Filter[1];
0310: optimizer.optimize((BooleanQuery) query, searcher, 0,
0311: newQuery, newFilter);
0312:
0313: return searcher.search(newQuery[0], newFilter[0], sort);
0314: }
0315: }
0316:
0317: public Hits search(Query query, Filter filter) throws IOException {
0318: return searcher.search(query, filter);
0319: }
0320:
0321: public Hits search(Query query, Sort sort) throws IOException {
0322: return searcher.search(query, sort);
0323: }
0324:
0325: public void search(Query query, HitCollector results)
0326: throws IOException {
0327: searcher.search(query, results);
0328: }
0329:
0330: public void setSimilarity(Similarity similarity) {
0331: searcher.setSimilarity(similarity);
0332: }
0333:
0334: public Similarity getSimilarity() {
0335: return searcher.getSimilarity();
0336: }
0337:
0338: public int docFreq(Term term) throws IOException {
0339: return searcher.docFreq(term);
0340: }
0341:
0342: /* ********************** Document retrieval *************************/
0343:
0344: /* Future optimizations (yonik)
0345: *
0346: * If no cache is present:
0347: * - use NO_LOAD instead of LAZY_LOAD
0348: * - use LOAD_AND_BREAK if a single field is begin retrieved
0349: */
0350:
0351: /**
0352: * FieldSelector which loads the specified fields, and load all other
0353: * field lazily.
0354: */
0355: class SetNonLazyFieldSelector implements FieldSelector {
0356: private Set<String> fieldsToLoad;
0357:
0358: SetNonLazyFieldSelector(Set<String> toLoad) {
0359: fieldsToLoad = toLoad;
0360: }
0361:
0362: public FieldSelectorResult accept(String fieldName) {
0363: if (fieldsToLoad.contains(fieldName))
0364: return FieldSelectorResult.LOAD;
0365: else
0366: return FieldSelectorResult.LAZY_LOAD;
0367: }
0368: }
0369:
0370: /* solrconfig lazyfields setting */
0371: public static final boolean enableLazyFieldLoading = SolrConfig.config
0372: .getBool("query/enableLazyFieldLoading", false);
0373:
0374: /**
0375: * Retrieve the {@link Document} instance corresponding to the document id.
0376: */
0377: public Document doc(int i) throws IOException {
0378: return doc(i, (Set<String>) null);
0379: }
0380:
0381: /** Retrieve a {@link Document} using a {@link org.apache.lucene.document.FieldSelector}
0382: * This method does not currently use the Solr document cache.
0383: *
0384: * @see IndexReader#document(int, FieldSelector) */
0385: public Document doc(int n, FieldSelector fieldSelector)
0386: throws IOException {
0387: return searcher.getIndexReader().document(n, fieldSelector);
0388: }
0389:
0390: /**
0391: * Retrieve the {@link Document} instance corresponding to the document id.
0392: *
0393: * Note: The document will have all fields accessable, but if a field
0394: * filter is provided, only the provided fields will be loaded (the
0395: * remainder will be available lazily).
0396: */
0397: public Document doc(int i, Set<String> fields) throws IOException {
0398:
0399: Document d;
0400: if (documentCache != null) {
0401: d = (Document) documentCache.get(i);
0402: if (d != null)
0403: return d;
0404: }
0405:
0406: if (!enableLazyFieldLoading || fields == null) {
0407: d = searcher.getIndexReader().document(i);
0408: } else {
0409: d = searcher.getIndexReader().document(i,
0410: new SetNonLazyFieldSelector(fields));
0411: }
0412:
0413: if (documentCache != null) {
0414: documentCache.put(i, d);
0415: }
0416:
0417: return d;
0418: }
0419:
0420: /**
0421: * Takes a list of docs (the doc ids actually), and reads them into an array
0422: * of Documents.
0423: */
0424: public void readDocs(Document[] docs, DocList ids)
0425: throws IOException {
0426: readDocs(docs, ids, null);
0427: }
0428:
0429: /**
0430: * Takes a list of docs (the doc ids actually) and a set of fields to load,
0431: * and reads them into an array of Documents.
0432: */
0433: public void readDocs(Document[] docs, DocList ids,
0434: Set<String> fields) throws IOException {
0435: DocIterator iter = ids.iterator();
0436: for (int i = 0; i < docs.length; i++) {
0437: docs[i] = doc(iter.nextDoc(), fields);
0438: }
0439: }
0440:
0441: /* ********************** end document retrieval *************************/
0442:
0443: public int maxDoc() throws IOException {
0444: return searcher.maxDoc();
0445: }
0446:
0447: public TopDocs search(Weight weight, Filter filter, int i)
0448: throws IOException {
0449: return searcher.search(weight, filter, i);
0450: }
0451:
0452: public void search(Weight weight, Filter filter,
0453: HitCollector hitCollector) throws IOException {
0454: searcher.search(weight, filter, hitCollector);
0455: }
0456:
0457: public Query rewrite(Query original) throws IOException {
0458: return searcher.rewrite(original);
0459: }
0460:
0461: public Explanation explain(Weight weight, int i) throws IOException {
0462: return searcher.explain(weight, i);
0463: }
0464:
0465: public TopFieldDocs search(Weight weight, Filter filter, int i,
0466: Sort sort) throws IOException {
0467: return searcher.search(weight, filter, i, sort);
0468: }
0469:
0470: ////////////////////////////////////////////////////////////////////////////////
0471: ////////////////////////////////////////////////////////////////////////////////
0472: ////////////////////////////////////////////////////////////////////////////////
0473:
0474: /**
0475: * Returns the first document number containing the term <code>t</code>
0476: * Returns -1 if no document was found.
0477: * This method is primarily intended for clients that want to fetch
0478: * documents using a unique identifier."
0479: * @param t
0480: * @return the first document number containing the term
0481: */
0482: public int getFirstMatch(Term t) throws IOException {
0483: TermDocs tdocs = null;
0484: try {
0485: tdocs = reader.termDocs(t);
0486: if (!tdocs.next())
0487: return -1;
0488: return tdocs.doc();
0489: } finally {
0490: if (tdocs != null)
0491: tdocs.close();
0492: }
0493: }
0494:
0495: /**
0496: * Compute and cache the DocSet that matches a query.
0497: * The normal usage is expected to be cacheDocSet(myQuery, null,false)
0498: * meaning that Solr will determine if the Query warrants caching, and
0499: * if so, will compute the DocSet that matches the Query and cache it.
0500: * If the answer to the query is already cached, nothing further will be done.
0501: * <p>
0502: * If the optionalAnswer DocSet is provided, it should *not* be modified
0503: * after this call.
0504: *
0505: * @param query the lucene query that will act as the key
0506: * @param optionalAnswer the DocSet to be cached - if null, it will be computed.
0507: * @param mustCache if true, a best effort will be made to cache this entry.
0508: * if false, heuristics may be used to determine if it should be cached.
0509: */
0510: public void cacheDocSet(Query query, DocSet optionalAnswer,
0511: boolean mustCache) throws IOException {
0512: // Even if the cache is null, still compute the DocSet as it may serve to warm the Lucene
0513: // or OS disk cache.
0514: if (optionalAnswer != null) {
0515: if (filterCache != null) {
0516: filterCache.put(query, optionalAnswer);
0517: }
0518: return;
0519: }
0520:
0521: // Throw away the result, relying on the fact that getDocSet
0522: // will currently always cache what it found. If getDocSet() starts
0523: // using heuristics about what to cache, and mustCache==true, (or if we
0524: // want this method to start using heuristics too) then
0525: // this needs to change.
0526: getDocSet(query);
0527: }
0528:
0529: /**
0530: * Returns the set of document ids matching a query.
0531: * This method is cache-aware and attempts to retrieve the answer from the cache if possible.
0532: * If the answer was not cached, it may have been inserted into the cache as a result of this call.
0533: * This method can handle negative queries.
0534: * <p>
0535: * The DocSet returned should <b>not</b> be modified.
0536: */
0537: public DocSet getDocSet(Query query) throws IOException {
0538: // Get the absolute value (positive version) of this query. If we
0539: // get back the same reference, we know it's positive.
0540: Query absQ = QueryUtils.getAbs(query);
0541: boolean positive = query == absQ;
0542:
0543: if (filterCache != null) {
0544: DocSet absAnswer = (DocSet) filterCache.get(absQ);
0545: if (absAnswer != null) {
0546: if (positive)
0547: return absAnswer;
0548: else
0549: return getPositiveDocSet(matchAllDocsQuery).andNot(
0550: absAnswer);
0551: }
0552: }
0553:
0554: DocSet absAnswer = getDocSetNC(absQ, null);
0555: DocSet answer = positive ? absAnswer : getPositiveDocSet(
0556: matchAllDocsQuery).andNot(absAnswer);
0557:
0558: if (filterCache != null) {
0559: // cache negative queries as positive
0560: filterCache.put(absQ, absAnswer);
0561: }
0562:
0563: return answer;
0564: }
0565:
0566: // only handle positive (non negative) queries
0567: DocSet getPositiveDocSet(Query q) throws IOException {
0568: DocSet answer;
0569: if (filterCache != null) {
0570: answer = (DocSet) filterCache.get(q);
0571: if (answer != null)
0572: return answer;
0573: }
0574: answer = getDocSetNC(q, null);
0575: if (filterCache != null)
0576: filterCache.put(q, answer);
0577: return answer;
0578: }
0579:
0580: private static Query matchAllDocsQuery = new MatchAllDocsQuery();
0581:
0582: protected DocSet getDocSet(List<Query> queries) throws IOException {
0583: if (queries == null)
0584: return null;
0585: if (queries.size() == 1)
0586: return getDocSet(queries.get(0));
0587: DocSet answer = null;
0588:
0589: boolean[] neg = new boolean[queries.size()];
0590: DocSet[] sets = new DocSet[queries.size()];
0591:
0592: int smallestIndex = -1;
0593: int smallestCount = Integer.MAX_VALUE;
0594: for (int i = 0; i < sets.length; i++) {
0595: Query q = queries.get(i);
0596: Query posQuery = QueryUtils.getAbs(q);
0597: sets[i] = getPositiveDocSet(posQuery);
0598: // Negative query if absolute value different from original
0599: if (q == posQuery) {
0600: neg[i] = false;
0601: // keep track of the smallest positive set.
0602: // This optimization is only worth it if size() is cached, which it would
0603: // be if we don't do any set operations.
0604: int sz = sets[i].size();
0605: if (sz < smallestCount) {
0606: smallestCount = sz;
0607: smallestIndex = i;
0608: answer = sets[i];
0609: }
0610: } else {
0611: neg[i] = true;
0612: }
0613: }
0614:
0615: // if no positive queries, start off with all docs
0616: if (answer == null)
0617: answer = getPositiveDocSet(matchAllDocsQuery);
0618:
0619: // do negative queries first to shrink set size
0620: for (int i = 0; i < sets.length; i++) {
0621: if (neg[i])
0622: answer = answer.andNot(sets[i]);
0623: }
0624:
0625: for (int i = 0; i < sets.length; i++) {
0626: if (!neg[i] && i != smallestIndex)
0627: answer = answer.intersection(sets[i]);
0628: }
0629:
0630: return answer;
0631: }
0632:
0633: // query must be positive
0634: protected DocSet getDocSetNC(Query query, DocSet filter)
0635: throws IOException {
0636: if (filter == null) {
0637: DocSetHitCollector hc = new DocSetHitCollector(maxDoc());
0638: if (query instanceof TermQuery) {
0639: Term t = ((TermQuery) query).getTerm();
0640: TermDocs tdocs = null;
0641: try {
0642: tdocs = reader.termDocs(t);
0643: while (tdocs.next())
0644: hc.collect(tdocs.doc(), 0.0f);
0645: } finally {
0646: if (tdocs != null)
0647: tdocs.close();
0648: }
0649: } else {
0650: searcher.search(query, null, hc);
0651: }
0652: return hc.getDocSet();
0653:
0654: } else {
0655: // FUTURE: if the filter is sorted by docid, could use skipTo (SkipQueryFilter)
0656: final DocSetHitCollector hc = new DocSetHitCollector(
0657: maxDoc());
0658: final DocSet filt = filter;
0659: searcher.search(query, null, new HitCollector() {
0660: public void collect(int doc, float score) {
0661: if (filt.exists(doc))
0662: hc.collect(doc, score);
0663: }
0664: });
0665: return hc.getDocSet();
0666: }
0667: }
0668:
0669: /**
0670: * Returns the set of document ids matching both the query and the filter.
0671: * This method is cache-aware and attempts to retrieve the answer from the cache if possible.
0672: * If the answer was not cached, it may have been inserted into the cache as a result of this call.
0673: * <p>
0674: *
0675: * @param query
0676: * @param filter may be null
0677: * @return DocSet meeting the specified criteria, should <b>not</b> be modified by the caller.
0678: */
0679: public DocSet getDocSet(Query query, DocSet filter)
0680: throws IOException {
0681: if (filter == null)
0682: return getDocSet(query);
0683:
0684: // Negative query if absolute value different from original
0685: Query absQ = QueryUtils.getAbs(query);
0686: boolean positive = absQ == query;
0687:
0688: DocSet first;
0689: if (filterCache != null) {
0690: first = (DocSet) filterCache.get(absQ);
0691: if (first == null) {
0692: first = getDocSetNC(absQ, null);
0693: filterCache.put(absQ, first);
0694: }
0695: return positive ? first.intersection(filter) : filter
0696: .andNot(first);
0697: }
0698:
0699: // If there isn't a cache, then do a single filtered query if positive.
0700: return positive ? getDocSetNC(absQ, filter) : filter
0701: .andNot(getPositiveDocSet(absQ));
0702: }
0703:
0704: /**
0705: * Converts a filter into a DocSet.
0706: * This method is not cache-aware and no caches are checked.
0707: */
0708: public DocSet convertFilter(Filter lfilter) throws IOException {
0709: BitSet bs = lfilter.bits(this .reader);
0710: OpenBitSet obs = new OpenBitSet(bs.size());
0711: for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
0712: obs.fastSet(i);
0713: }
0714: return new BitDocSet(obs);
0715: }
0716:
0717: /**
0718: * Returns documents matching both <code>query</code> and <code>filter</code>
0719: * and sorted by <code>sort</code>.
0720: * <p>
0721: * This method is cache aware and may retrieve <code>filter</code> from
0722: * the cache or make an insertion into the cache as a result of this call.
0723: * <p>
0724: * FUTURE: The returned DocList may be retrieved from a cache.
0725: *
0726: * @param query
0727: * @param filter may be null
0728: * @param lsort criteria by which to sort (if null, query relevance is used)
0729: * @param offset offset into the list of documents to return
0730: * @param len maximum number of documents to return
0731: * @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
0732: * @throws IOException
0733: */
0734: public DocList getDocList(Query query, Query filter, Sort lsort,
0735: int offset, int len) throws IOException {
0736: List<Query> filterList = null;
0737: if (filter != null) {
0738: filterList = new ArrayList<Query>(1);
0739: filterList.add(filter);
0740: }
0741: return getDocList(query, filterList, lsort, offset, len, 0);
0742: }
0743:
0744: /**
0745: * Returns documents matching both <code>query</code> and the
0746: * intersection of the <code>filterList</code>, sorted by <code>sort</code>.
0747: * <p>
0748: * This method is cache aware and may retrieve <code>filter</code> from
0749: * the cache or make an insertion into the cache as a result of this call.
0750: * <p>
0751: * FUTURE: The returned DocList may be retrieved from a cache.
0752: *
0753: * @param query
0754: * @param filterList may be null
0755: * @param lsort criteria by which to sort (if null, query relevance is used)
0756: * @param offset offset into the list of documents to return
0757: * @param len maximum number of documents to return
0758: * @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
0759: * @throws IOException
0760: */
0761: public DocList getDocList(Query query, List<Query> filterList,
0762: Sort lsort, int offset, int len, int flags)
0763: throws IOException {
0764: DocListAndSet answer = new DocListAndSet();
0765: getDocListC(answer, query, filterList, null, lsort, offset,
0766: len, flags);
0767: return answer.docList;
0768: }
0769:
0770: private static final int NO_CHECK_QCACHE = 0x80000000;
0771: private static final int GET_DOCSET = 0x40000000;
0772: private static final int NO_CHECK_FILTERCACHE = 0x20000000;
0773:
0774: public static final int GET_SCORES = 0x01;
0775:
0776: private void getDocListC(DocListAndSet out, Query query,
0777: List<Query> filterList, DocSet filter, Sort lsort,
0778: int offset, int len, int flags) throws IOException {
0779: QueryResultKey key = null;
0780: int maxDoc = offset + len;
0781: int super setMaxDoc = maxDoc;
0782: DocList super set;
0783:
0784: // we can try and look up the complete query in the cache.
0785: // we can't do that if filter!=null though (we don't want to
0786: // do hashCode() and equals() for a big DocSet).
0787: if (queryResultCache != null && filter == null) {
0788: // all of the current flags can be reused during warming,
0789: // so set all of them on the cache key.
0790: key = new QueryResultKey(query, filterList, lsort, flags);
0791: if ((flags & NO_CHECK_QCACHE) == 0) {
0792: super set = (DocList) queryResultCache.get(key);
0793:
0794: if (super set != null) {
0795: // check that the cache entry has scores recorded if we need them
0796: if ((flags & GET_SCORES) == 0
0797: || super set.hasScores()) {
0798: out.docList = super set.subset(offset, len);
0799: }
0800: }
0801: if (out.docList != null) {
0802: // found the docList in the cache... now check if we need the docset too.
0803: // OPT: possible future optimization - if the doclist contains all the matches,
0804: // use it to make the docset instead of rerunning the query.
0805: if (out.docSet == null
0806: && ((flags & GET_DOCSET) != 0)) {
0807: if (filterList == null) {
0808: out.docSet = getDocSet(query);
0809: } else {
0810: List<Query> newList = new ArrayList<Query>(
0811: filterList.size() + 1);
0812: newList.add(query);
0813: newList.addAll(filterList);
0814: out.docSet = getDocSet(newList);
0815: }
0816: }
0817: return;
0818: }
0819: }
0820:
0821: // If we are going to generate the result, bump up to the
0822: // next resultWindowSize for better caching.
0823:
0824: // handle 0 special case as well as avoid idiv in the common case.
0825: if (maxDoc < queryResultWindowSize) {
0826: super setMaxDoc = queryResultWindowSize;
0827: } else {
0828: super setMaxDoc = ((maxDoc - 1) / queryResultWindowSize + 1)
0829: * queryResultWindowSize;
0830: }
0831: }
0832:
0833: // OK, so now we need to generate an answer.
0834: // One way to do that would be to check if we have an unordered list
0835: // of results for the base query. If so, we can apply the filters and then
0836: // sort by the resulting set. This can only be used if:
0837: // - the sort doesn't contain score
0838: // - we don't want score returned.
0839:
0840: // check if we should try and use the filter cache
0841: boolean useFilterCache = false;
0842: if ((flags & (GET_SCORES | NO_CHECK_FILTERCACHE)) == 0
0843: && useFilterForSortedQuery && lsort != null
0844: && filterCache != null) {
0845: useFilterCache = true;
0846: SortField[] sfields = lsort.getSort();
0847: for (SortField sf : sfields) {
0848: if (sf.getType() == SortField.SCORE) {
0849: useFilterCache = false;
0850: break;
0851: }
0852: }
0853: }
0854:
0855: if (useFilterCache) {
0856: // now actually use the filter cache.
0857: // for large filters that match few documents, this may be
0858: // slower than simply re-executing the query.
0859: if (out.docSet == null) {
0860: out.docSet = getDocSet(query, filter);
0861: DocSet bigFilt = getDocSet(filterList);
0862: if (bigFilt != null)
0863: out.docSet = out.docSet.intersection(bigFilt);
0864: }
0865: // todo: there could be a sortDocSet that could take a list of
0866: // the filters instead of anding them first...
0867: // perhaps there should be a multi-docset-iterator
0868: super set = sortDocSet(out.docSet, lsort, super setMaxDoc);
0869: out.docList = super set.subset(offset, len);
0870: } else {
0871: // do it the normal way...
0872: DocSet theFilt = filter != null ? filter
0873: : getDocSet(filterList);
0874:
0875: if ((flags & GET_DOCSET) != 0) {
0876: DocSet qDocSet = getDocListAndSetNC(out, query,
0877: theFilt, lsort, 0, super setMaxDoc, flags);
0878: // cache the docSet matching the query w/o filtering
0879: if (filterCache != null)
0880: filterCache.put(query, qDocSet);
0881: } else {
0882: out.docList = getDocListNC(query, theFilt, lsort, 0,
0883: super setMaxDoc, flags);
0884: }
0885: super set = out.docList;
0886: out.docList = super set.subset(offset, len);
0887: }
0888:
0889: // lastly, put the superset in the cache
0890: if (key != null) {
0891: queryResultCache.put(key, super set);
0892: }
0893: }
0894:
0895: private DocList getDocListNC(Query query, DocSet filter,
0896: Sort lsort, int offset, int len, int flags)
0897: throws IOException {
0898: final int lastDocRequested = offset + len;
0899: int nDocsReturned;
0900: int totalHits;
0901: float maxScore;
0902: int[] ids;
0903: float[] scores;
0904:
0905: query = QueryUtils.makeQueryable(query);
0906:
0907: // handle zero case...
0908: if (lastDocRequested <= 0) {
0909: final DocSet filt = filter;
0910: final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
0911: final int[] numHits = new int[1];
0912:
0913: searcher.search(query, new HitCollector() {
0914: public void collect(int doc, float score) {
0915: if (filt != null && !filt.exists(doc))
0916: return;
0917: numHits[0]++;
0918: if (score > topscore[0])
0919: topscore[0] = score;
0920: }
0921: });
0922:
0923: nDocsReturned = 0;
0924: ids = new int[nDocsReturned];
0925: scores = new float[nDocsReturned];
0926: totalHits = numHits[0];
0927: maxScore = totalHits > 0 ? topscore[0] : 0.0f;
0928: } else if (lsort != null) {
0929: // can't use TopDocs if there is a sort since it
0930: // will do automatic score normalization.
0931: // NOTE: this changed late in Lucene 1.9
0932:
0933: final DocSet filt = filter;
0934: final int[] numHits = new int[1];
0935: final FieldSortedHitQueue hq = new FieldSortedHitQueue(
0936: reader, lsort.getSort(), offset + len);
0937:
0938: searcher.search(query, new HitCollector() {
0939: public void collect(int doc, float score) {
0940: if (filt != null && !filt.exists(doc))
0941: return;
0942: numHits[0]++;
0943: hq.insert(new FieldDoc(doc, score));
0944: }
0945: });
0946:
0947: totalHits = numHits[0];
0948: maxScore = totalHits > 0 ? hq.getMaxScore() : 0.0f;
0949:
0950: nDocsReturned = hq.size();
0951: ids = new int[nDocsReturned];
0952: scores = (flags & GET_SCORES) != 0 ? new float[nDocsReturned]
0953: : null;
0954: for (int i = nDocsReturned - 1; i >= 0; i--) {
0955: FieldDoc fieldDoc = (FieldDoc) hq.pop();
0956: // fillFields is the point where score normalization happens
0957: // hq.fillFields(fieldDoc)
0958: ids[i] = fieldDoc.doc;
0959: if (scores != null)
0960: scores[i] = fieldDoc.score;
0961: }
0962: } else {
0963: // No Sort specified (sort by score descending)
0964: // This case could be done with TopDocs, but would currently require
0965: // getting a BitSet filter from a DocSet which may be inefficient.
0966:
0967: final DocSet filt = filter;
0968: final ScorePriorityQueue hq = new ScorePriorityQueue(
0969: lastDocRequested);
0970: final int[] numHits = new int[1];
0971: searcher.search(query, new HitCollector() {
0972: float minScore = Float.NEGATIVE_INFINITY; // minimum score in the priority queue
0973:
0974: public void collect(int doc, float score) {
0975: if (filt != null && !filt.exists(doc))
0976: return;
0977: if (numHits[0]++ < lastDocRequested
0978: || score >= minScore) {
0979: // TODO: if docs are always delivered in order, we could use "score>minScore"
0980: // instead of "score>=minScore" and avoid tiebreaking scores
0981: // in the priority queue.
0982: // but might BooleanScorer14 might still be used and deliver docs out-of-order?
0983: hq.insert(new ScoreDoc(doc, score));
0984: minScore = ((ScoreDoc) hq.top()).score;
0985: }
0986: }
0987: });
0988:
0989: totalHits = numHits[0];
0990: nDocsReturned = hq.size();
0991: ids = new int[nDocsReturned];
0992: scores = (flags & GET_SCORES) != 0 ? new float[nDocsReturned]
0993: : null;
0994: ScoreDoc sdoc = null;
0995: for (int i = nDocsReturned - 1; i >= 0; i--) {
0996: sdoc = (ScoreDoc) hq.pop();
0997: ids[i] = sdoc.doc;
0998: if (scores != null)
0999: scores[i] = sdoc.score;
1000: }
1001: maxScore = sdoc == null ? 0.0f : sdoc.score;
1002: }
1003:
1004: int sliceLen = Math.min(lastDocRequested, nDocsReturned)
1005: - offset;
1006: if (sliceLen < 0)
1007: sliceLen = 0;
1008: return new DocSlice(offset, sliceLen, ids, scores, totalHits,
1009: maxScore);
1010:
1011: /**************** older implementation using TopDocs *******************
1012:
1013:
1014: Filter lfilter=null;
1015: if (filter != null) {
1016: final BitSet bits = filter.getBits(); // avoid if possible
1017: lfilter = new Filter() {
1018: public BitSet bits(IndexReader reader) {
1019: return bits;
1020: }
1021: };
1022: }
1023:
1024: int lastDocRequested=offset+len;
1025:
1026: // lucene doesn't allow 0 to be passed for nDocs
1027: if (lastDocRequested==0) lastDocRequested=1;
1028:
1029: // TopFieldDocs sortedDocs; // use TopDocs so both versions can use it
1030: TopDocs sortedDocs;
1031: if (lsort!=null) {
1032: sortedDocs = searcher.search(query, lfilter, lastDocRequested, lsort);
1033: } else {
1034: sortedDocs = searcher.search(query, lfilter, lastDocRequested);
1035: }
1036:
1037: int nDocsReturned = sortedDocs.scoreDocs.length;
1038: int[] docs = new int[nDocsReturned];
1039: for (int i=0; i<nDocsReturned; i++) {
1040: docs[i] = sortedDocs.scoreDocs[i].doc;
1041: }
1042: float[] scores=null;
1043: float maxScore=0.0f;
1044: if ((flags & GET_SCORES) != 0) {
1045: scores = new float[nDocsReturned];
1046: for (int i=0; i<nDocsReturned; i++) {
1047: scores[i] = sortedDocs.scoreDocs[i].score;
1048: }
1049: if (nDocsReturned>0) {
1050: maxScore=sortedDocs.scoreDocs[0].score;
1051: }
1052: }
1053: int sliceLen = Math.min(offset+len,nDocsReturned) - offset;
1054: if (sliceLen < 0) sliceLen=0;
1055: return new DocSlice(offset,sliceLen,docs,scores,sortedDocs.totalHits, maxScore);
1056:
1057: **********************************************************************************/
1058:
1059: }
1060:
1061: // the DocSet returned is for the query only, without any filtering... that way it may
1062: // be cached if desired.
1063: private DocSet getDocListAndSetNC(DocListAndSet out, Query query,
1064: DocSet filter, Sort lsort, int offset, int len, int flags)
1065: throws IOException {
1066: final int lastDocRequested = offset + len;
1067: int nDocsReturned;
1068: int totalHits;
1069: float maxScore;
1070: int[] ids;
1071: float[] scores;
1072: final DocSetHitCollector setHC = new DocSetHitCollector(
1073: maxDoc());
1074:
1075: query = QueryUtils.makeQueryable(query);
1076:
1077: // TODO: perhaps unify getDocListAndSetNC and getDocListNC without imposing a significant performance hit
1078:
1079: // Comment: gathering the set before the filter is applied allows one to cache
1080: // the resulting DocSet under the query. The drawback is that it requires an
1081: // extra intersection with the filter at the end. This will be a net win
1082: // for expensive queries.
1083:
1084: // Q: what if the final intersection results in a small set from two large
1085: // sets... it won't be a HashDocSet or other small set. One way around
1086: // this would be to collect the resulting set as we go (the filter is
1087: // checked anyway).
1088:
1089: // handle zero case...
1090: if (lastDocRequested <= 0) {
1091: final DocSet filt = filter;
1092: final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
1093: final int[] numHits = new int[1];
1094:
1095: searcher.search(query, new HitCollector() {
1096: public void collect(int doc, float score) {
1097: setHC.collect(doc, score);
1098: if (filt != null && !filt.exists(doc))
1099: return;
1100: numHits[0]++;
1101: if (score > topscore[0])
1102: topscore[0] = score;
1103: }
1104: });
1105:
1106: nDocsReturned = 0;
1107: ids = new int[nDocsReturned];
1108: scores = new float[nDocsReturned];
1109: totalHits = numHits[0];
1110: maxScore = totalHits > 0 ? topscore[0] : 0.0f;
1111: } else if (lsort != null) {
1112: // can't use TopDocs if there is a sort since it
1113: // will do automatic score normalization.
1114: // NOTE: this changed late in Lucene 1.9
1115:
1116: final DocSet filt = filter;
1117: final int[] numHits = new int[1];
1118: final FieldSortedHitQueue hq = new FieldSortedHitQueue(
1119: reader, lsort.getSort(), offset + len);
1120:
1121: searcher.search(query, new HitCollector() {
1122: public void collect(int doc, float score) {
1123: setHC.collect(doc, score);
1124: if (filt != null && !filt.exists(doc))
1125: return;
1126: numHits[0]++;
1127: hq.insert(new FieldDoc(doc, score));
1128: }
1129: });
1130:
1131: totalHits = numHits[0];
1132: maxScore = totalHits > 0 ? hq.getMaxScore() : 0.0f;
1133:
1134: nDocsReturned = hq.size();
1135: ids = new int[nDocsReturned];
1136: scores = (flags & GET_SCORES) != 0 ? new float[nDocsReturned]
1137: : null;
1138: for (int i = nDocsReturned - 1; i >= 0; i--) {
1139: FieldDoc fieldDoc = (FieldDoc) hq.pop();
1140: // fillFields is the point where score normalization happens
1141: // hq.fillFields(fieldDoc)
1142: ids[i] = fieldDoc.doc;
1143: if (scores != null)
1144: scores[i] = fieldDoc.score;
1145: }
1146: } else {
1147: // No Sort specified (sort by score descending)
1148: // This case could be done with TopDocs, but would currently require
1149: // getting a BitSet filter from a DocSet which may be inefficient.
1150:
1151: final DocSet filt = filter;
1152: final ScorePriorityQueue hq = new ScorePriorityQueue(
1153: lastDocRequested);
1154: final int[] numHits = new int[1];
1155: searcher.search(query, new HitCollector() {
1156: float minScore = Float.NEGATIVE_INFINITY; // minimum score in the priority queue
1157:
1158: public void collect(int doc, float score) {
1159: setHC.collect(doc, score);
1160: if (filt != null && !filt.exists(doc))
1161: return;
1162: if (numHits[0]++ < lastDocRequested
1163: || score >= minScore) {
1164: // if docs are always delivered in order, we could use "score>minScore"
1165: // but might BooleanScorer14 might still be used and deliver docs out-of-order?
1166: hq.insert(new ScoreDoc(doc, score));
1167: minScore = ((ScoreDoc) hq.top()).score;
1168: }
1169: }
1170: });
1171:
1172: totalHits = numHits[0];
1173: nDocsReturned = hq.size();
1174: ids = new int[nDocsReturned];
1175: scores = (flags & GET_SCORES) != 0 ? new float[nDocsReturned]
1176: : null;
1177: ScoreDoc sdoc = null;
1178: for (int i = nDocsReturned - 1; i >= 0; i--) {
1179: sdoc = (ScoreDoc) hq.pop();
1180: ids[i] = sdoc.doc;
1181: if (scores != null)
1182: scores[i] = sdoc.score;
1183: }
1184: maxScore = sdoc == null ? 0.0f : sdoc.score;
1185: }
1186:
1187: int sliceLen = Math.min(lastDocRequested, nDocsReturned)
1188: - offset;
1189: if (sliceLen < 0)
1190: sliceLen = 0;
1191: out.docList = new DocSlice(offset, sliceLen, ids, scores,
1192: totalHits, maxScore);
1193: DocSet qDocSet = setHC.getDocSet();
1194: out.docSet = filter == null ? qDocSet : qDocSet
1195: .intersection(filter);
1196: return qDocSet;
1197: }
1198:
1199: /**
1200: * Returns documents matching both <code>query</code> and <code>filter</code>
1201: * and sorted by <code>sort</code>.
1202: * FUTURE: The returned DocList may be retrieved from a cache.
1203: *
1204: * @param query
1205: * @param filter may be null
1206: * @param lsort criteria by which to sort (if null, query relevance is used)
1207: * @param offset offset into the list of documents to return
1208: * @param len maximum number of documents to return
1209: * @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
1210: * @throws IOException
1211: */
1212: public DocList getDocList(Query query, DocSet filter, Sort lsort,
1213: int offset, int len) throws IOException {
1214: DocListAndSet answer = new DocListAndSet();
1215: getDocListC(answer, query, null, filter, lsort, offset, len, 0);
1216: return answer.docList;
1217: }
1218:
1219: /**
1220: * Returns documents matching both <code>query</code> and <code>filter</code>
1221: * and sorted by <code>sort</code>. Also returns the compete set of documents
1222: * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
1223: * <p>
1224: * This method is cache aware and may retrieve <code>filter</code> from
1225: * the cache or make an insertion into the cache as a result of this call.
1226: * <p>
1227: * FUTURE: The returned DocList may be retrieved from a cache.
1228: * <p>
1229: * The DocList and DocSet returned should <b>not</b> be modified.
1230: *
1231: * @param query
1232: * @param filter may be null
1233: * @param lsort criteria by which to sort (if null, query relevance is used)
1234: * @param offset offset into the list of documents to return
1235: * @param len maximum number of documents to return
1236: * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
1237: * @throws IOException
1238: */
1239: public DocListAndSet getDocListAndSet(Query query, Query filter,
1240: Sort lsort, int offset, int len) throws IOException {
1241: List<Query> filterList = buildQueryList(filter);
1242: return getDocListAndSet(query, filterList, lsort, offset, len);
1243:
1244: }
1245:
1246: /**
1247: * Returns documents matching both <code>query</code> and <code>filter</code>
1248: * and sorted by <code>sort</code>. Also returns the compete set of documents
1249: * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
1250: * <p>
1251: * This method is cache aware and may retrieve <code>filter</code> from
1252: * the cache or make an insertion into the cache as a result of this call.
1253: * <p>
1254: * FUTURE: The returned DocList may be retrieved from a cache.
1255: * <p>
1256: * The DocList and DocSet returned should <b>not</b> be modified.
1257: *
1258: * @param query
1259: * @param filter may be null
1260: * @param lsort criteria by which to sort (if null, query relevance is used)
1261: * @param offset offset into the list of documents to return
1262: * @param len maximum number of documents to return
1263: * @param flags user supplied flags for the result set
1264: * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
1265: * @throws IOException
1266: */
1267: public DocListAndSet getDocListAndSet(Query query, Query filter,
1268: Sort lsort, int offset, int len, int flags)
1269: throws IOException {
1270: List<Query> filterList = buildQueryList(filter);
1271: return getDocListAndSet(query, filterList, lsort, offset, len,
1272: flags);
1273: }
1274:
1275: /**
1276: * A simple utility method for to build a filterList from a query
1277: * @param filter
1278: */
1279: private List<Query> buildQueryList(Query filter) {
1280: List<Query> filterList = null;
1281: if (filter != null) {
1282: filterList = new ArrayList<Query>(2);
1283: filterList.add(filter);
1284: }
1285: return filterList;
1286: }
1287:
1288: /**
1289: * Returns documents matching both <code>query</code> and the intersection
1290: * of <code>filterList</code>, sorted by <code>sort</code>.
1291: * Also returns the compete set of documents
1292: * matching <code>query</code> and <code>filter</code>
1293: * (regardless of <code>offset</code> and <code>len</code>).
1294: * <p>
1295: * This method is cache aware and may retrieve <code>filter</code> from
1296: * the cache or make an insertion into the cache as a result of this call.
1297: * <p>
1298: * FUTURE: The returned DocList may be retrieved from a cache.
1299: * <p>
1300: * The DocList and DocSet returned should <b>not</b> be modified.
1301: *
1302: * @param query
1303: * @param filterList may be null
1304: * @param lsort criteria by which to sort (if null, query relevance is used)
1305: * @param offset offset into the list of documents to return
1306: * @param len maximum number of documents to return
1307: * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
1308: * @throws IOException
1309: */
1310: public DocListAndSet getDocListAndSet(Query query,
1311: List<Query> filterList, Sort lsort, int offset, int len)
1312: throws IOException {
1313: DocListAndSet ret = new DocListAndSet();
1314: getDocListC(ret, query, filterList, null, lsort, offset, len,
1315: GET_DOCSET);
1316: return ret;
1317: }
1318:
1319: /**
1320: * Returns documents matching both <code>query</code> and the intersection
1321: * of <code>filterList</code>, sorted by <code>sort</code>.
1322: * Also returns the compete set of documents
1323: * matching <code>query</code> and <code>filter</code>
1324: * (regardless of <code>offset</code> and <code>len</code>).
1325: * <p>
1326: * This method is cache aware and may retrieve <code>filter</code> from
1327: * the cache or make an insertion into the cache as a result of this call.
1328: * <p>
1329: * FUTURE: The returned DocList may be retrieved from a cache.
1330: * <p>
1331: * The DocList and DocSet returned should <b>not</b> be modified.
1332: *
1333: * @param query
1334: * @param filterList may be null
1335: * @param lsort criteria by which to sort (if null, query relevance is used)
1336: * @param offset offset into the list of documents to return
1337: * @param len maximum number of documents to return
1338: * @param flags user supplied flags for the result set
1339: * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
1340: * @throws IOException
1341: */
1342: public DocListAndSet getDocListAndSet(Query query,
1343: List<Query> filterList, Sort lsort, int offset, int len,
1344: int flags) throws IOException {
1345: DocListAndSet ret = new DocListAndSet();
1346: getDocListC(ret, query, filterList, null, lsort, offset, len,
1347: flags |= GET_DOCSET);
1348: return ret;
1349: }
1350:
1351: /**
1352: * Returns documents matching both <code>query</code> and <code>filter</code>
1353: * and sorted by <code>sort</code>. Also returns the compete set of documents
1354: * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
1355: * <p>
1356: * FUTURE: The returned DocList may be retrieved from a cache.
1357: *
1358: * @param query
1359: * @param filter may be null
1360: * @param lsort criteria by which to sort (if null, query relevance is used)
1361: * @param offset offset into the list of documents to return
1362: * @param len maximum number of documents to return
1363: * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
1364: * @throws IOException
1365: */
1366: public DocListAndSet getDocListAndSet(Query query, DocSet filter,
1367: Sort lsort, int offset, int len) throws IOException {
1368: DocListAndSet ret = new DocListAndSet();
1369: getDocListC(ret, query, null, filter, lsort, offset, len,
1370: GET_DOCSET);
1371: return ret;
1372: }
1373:
1374: /**
1375: * Returns documents matching both <code>query</code> and <code>filter</code>
1376: * and sorted by <code>sort</code>. Also returns the compete set of documents
1377: * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
1378: * <p>
1379: * This method is cache aware and may make an insertion into the cache
1380: * as a result of this call.
1381: * <p>
1382: * FUTURE: The returned DocList may be retrieved from a cache.
1383: * <p>
1384: * The DocList and DocSet returned should <b>not</b> be modified.
1385: *
1386: * @param query
1387: * @param filter may be null
1388: * @param lsort criteria by which to sort (if null, query relevance is used)
1389: * @param offset offset into the list of documents to return
1390: * @param len maximum number of documents to return
1391: * @param flags user supplied flags for the result set
1392: * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
1393: * @throws IOException
1394: */
1395: public DocListAndSet getDocListAndSet(Query query, DocSet filter,
1396: Sort lsort, int offset, int len, int flags)
1397: throws IOException {
1398: DocListAndSet ret = new DocListAndSet();
1399: getDocListC(ret, query, null, filter, lsort, offset, len,
1400: flags |= GET_DOCSET);
1401: return ret;
1402: }
1403:
1404: protected DocList sortDocSet(DocSet set, Sort sort, int nDocs)
1405: throws IOException {
1406: final FieldSortedHitQueue hq = new FieldSortedHitQueue(reader,
1407: sort.getSort(), nDocs);
1408: DocIterator iter = set.iterator();
1409: int hits = 0;
1410: while (iter.hasNext()) {
1411: int doc = iter.nextDoc();
1412: hits++; // could just use set.size(), but that would be slower for a bitset
1413: hq.insert(new FieldDoc(doc, 1.0f));
1414: }
1415:
1416: int numCollected = hq.size();
1417: int[] ids = new int[numCollected];
1418: for (int i = numCollected - 1; i >= 0; i--) {
1419: FieldDoc fieldDoc = (FieldDoc) hq.pop();
1420: // hq.fillFields(fieldDoc) // optional, if we need that info
1421: ids[i] = fieldDoc.doc;
1422: }
1423:
1424: return new DocSlice(0, numCollected, ids, null, hits, 0.0f);
1425: }
1426:
1427: /**
1428: * Returns the number of documents that match both <code>a</code> and <code>b</code>.
1429: * <p>
1430: * This method is cache-aware and may check as well as modify the cache.
1431: *
1432: * @param a
1433: * @param b
1434: * @return the numer of documents in the intersection between <code>a</code> and <code>b</code>.
1435: * @throws IOException
1436: */
1437: public int numDocs(Query a, DocSet b) throws IOException {
1438: // Negative query if absolute value different from original
1439: Query absQ = QueryUtils.getAbs(a);
1440: DocSet positiveA = getPositiveDocSet(absQ);
1441: return a == absQ ? b.intersectionSize(positiveA) : b
1442: .andNotSize(positiveA);
1443: }
1444:
1445: /**
1446: * Returns the number of documents that match both <code>a</code> and <code>b</code>.
1447: * <p>
1448: * This method is cache-aware and may check as well as modify the cache.
1449: *
1450: * @param a
1451: * @param b
1452: * @return the numer of documents in the intersection between <code>a</code> and <code>b</code>.
1453: * @throws IOException
1454: */
1455: public int numDocs(Query a, Query b) throws IOException {
1456: Query absA = QueryUtils.getAbs(a);
1457: Query absB = QueryUtils.getAbs(b);
1458: DocSet positiveA = getPositiveDocSet(absA);
1459: DocSet positiveB = getPositiveDocSet(absB);
1460:
1461: // Negative query if absolute value different from original
1462: if (a == absA) {
1463: if (b == absB)
1464: return positiveA.intersectionSize(positiveB);
1465: return positiveA.andNotSize(positiveB);
1466: }
1467: if (b == absB)
1468: return positiveB.andNotSize(positiveA);
1469:
1470: // if both negative, we need to create a temp DocSet since we
1471: // don't have a counting method that takes three.
1472: DocSet all = getPositiveDocSet(matchAllDocsQuery);
1473:
1474: // -a -b == *:*.andNot(a).andNotSize(b) == *.*.andNotSize(a.union(b))
1475: // we use the last form since the intermediate DocSet should normally be smaller.
1476: return all.andNotSize(positiveA.union(positiveB));
1477: }
1478:
1479: /**
1480: * Takes a list of docs (the doc ids actually), and returns an array
1481: * of Documents containing all of the stored fields.
1482: */
1483: public Document[] readDocs(DocList ids) throws IOException {
1484: Document[] docs = new Document[ids.size()];
1485: readDocs(docs, ids);
1486: return docs;
1487: }
1488:
1489: /**
1490: * Warm this searcher based on an old one (primarily for auto-cache warming).
1491: */
1492: public void warm(SolrIndexSearcher old) throws IOException {
1493: // Make sure this is first! filters can help queryResults execute!
1494: boolean logme = log.isLoggable(Level.INFO);
1495:
1496: // warm the caches in order...
1497: for (int i = 0; i < cacheList.length; i++) {
1498: if (logme)
1499: log.info("autowarming " + this + " from " + old
1500: + "\n\t" + old.cacheList[i]);
1501: this .cacheList[i].warm(this , old.cacheList[i]);
1502: if (logme)
1503: log.info("autowarming result for " + this + "\n\t"
1504: + this .cacheList[i]);
1505: }
1506: }
1507:
1508: /**
1509: * return the named generic cache
1510: */
1511: public SolrCache getCache(String cacheName) {
1512: return cacheMap.get(cacheName);
1513: }
1514:
1515: /**
1516: * lookup an entry in a generic cache
1517: */
1518: public Object cacheLookup(String cacheName, Object key) {
1519: SolrCache cache = cacheMap.get(cacheName);
1520: return cache == null ? null : cache.get(key);
1521: }
1522:
1523: /**
1524: * insert an entry in a generic cache
1525: */
1526: public Object cacheInsert(String cacheName, Object key, Object val) {
1527: SolrCache cache = cacheMap.get(cacheName);
1528: return cache == null ? null : cache.put(key, val);
1529: }
1530:
1531: /////////////////////////////////////////////////////////////////////
1532: // SolrInfoMBean stuff: Statistics and Module Info
1533: /////////////////////////////////////////////////////////////////////
1534:
1535: public String getName() {
1536: return SolrIndexSearcher.class.getName();
1537: }
1538:
1539: public String getVersion() {
1540: return SolrCore.version;
1541: }
1542:
1543: public String getDescription() {
1544: return "index searcher";
1545: }
1546:
1547: public Category getCategory() {
1548: return Category.CORE;
1549: }
1550:
1551: public String getSourceId() {
1552: return "$Id: SolrIndexSearcher.java 541944 2007-05-26 22:29:12Z hossman $";
1553: }
1554:
1555: public String getSource() {
1556: return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.2/src/java/org/apache/solr/search/SolrIndexSearcher.java $";
1557: }
1558:
1559: public URL[] getDocs() {
1560: return null;
1561: }
1562:
1563: public NamedList getStatistics() {
1564: NamedList lst = new SimpleOrderedMap();
1565: lst.add("caching", cachingEnabled);
1566: lst.add("numDocs", reader.numDocs());
1567: lst.add("maxDoc", reader.maxDoc());
1568: lst.add("readerImpl", reader.getClass().getSimpleName());
1569: lst.add("readerDir", reader.directory());
1570: lst.add("indexVersion", reader.getVersion());
1571: lst.add("openedAt", new Date(openTime));
1572: if (registerTime != 0)
1573: lst.add("registeredAt", new Date(registerTime));
1574: return lst;
1575: }
1576: }
1577:
1578: // Lucene's HitQueue isn't public, so here is our own.
1579: final class ScorePriorityQueue extends PriorityQueue {
1580: ScorePriorityQueue(int size) {
1581: initialize(size);
1582: }
1583:
1584: protected final boolean lessThan(Object o1, Object o2) {
1585: ScoreDoc sd1 = (ScoreDoc) o1;
1586: ScoreDoc sd2 = (ScoreDoc) o2;
1587: // use index order as a tiebreaker to make sorts stable
1588: return sd1.score < sd2.score
1589: || (sd1.score == sd2.score && sd1.doc > sd2.doc);
1590: }
1591: }
|