Source Code Cross Referenced for SolrIndexSearcher.java in » Search-Engine » apache-solr-1.2.0 » org » apache » solr » search » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Search Engine » apache solr 1.2.0 » org.apache.solr.search
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /**
0002:         * Licensed to the Apache Software Foundation (ASF) under one or more
0003:         * contributor license agreements.  See the NOTICE file distributed with
0004:         * this work for additional information regarding copyright ownership.
0005:         * The ASF licenses this file to You under the Apache License, Version 2.0
0006:         * (the "License"); you may not use this file except in compliance with
0007:         * the License.  You may obtain a copy of the License at
0008:         *
0009:         *     http://www.apache.org/licenses/LICENSE-2.0
0010:         *
0011:         * Unless required by applicable law or agreed to in writing, software
0012:         * distributed under the License is distributed on an "AS IS" BASIS,
0013:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014:         * See the License for the specific language governing permissions and
0015:         * limitations under the License.
0016:         */package org.apache.solr.search;
0017:
0018:        import org.apache.lucene.document.*;
0019:        import org.apache.lucene.index.IndexReader;
0020:        import org.apache.lucene.index.Term;
0021:        import org.apache.lucene.index.TermDocs;
0022:        import org.apache.lucene.index.CorruptIndexException;
0023:        import org.apache.lucene.search.*;
0024:        import org.apache.lucene.store.Directory;
0025:        import org.apache.lucene.util.PriorityQueue;
0026:        import org.apache.solr.core.SolrConfig;
0027:        import org.apache.solr.core.SolrCore;
0028:        import org.apache.solr.core.SolrInfoMBean;
0029:        import org.apache.solr.core.SolrInfoRegistry;
0030:        import org.apache.solr.schema.IndexSchema;
0031:        import org.apache.solr.schema.SchemaField;
0032:        import org.apache.solr.util.NamedList;
0033:        import org.apache.solr.util.OpenBitSet;
0034:        import org.apache.solr.util.SimpleOrderedMap;
0035:
0036:        import java.io.IOException;
0037:        import java.net.URL;
0038:        import java.util.*;
0039:        import java.util.logging.Level;
0040:        import java.util.logging.Logger;
0041:
0042:        /**
0043:         * SolrIndexSearcher adds schema awareness and caching functionality
0044:         * over the lucene IndexSearcher.
0045:         *
0046:         * @author yonik
0047:         * @version $Id: SolrIndexSearcher.java 541944 2007-05-26 22:29:12Z hossman $
0048:         * @since solr 0.9
0049:         */
0050:
0051:        // Since the internal reader in IndexSearcher is
0052:        // package protected, I can't get to it by inheritance.
0053:        // For now, I am using delgation and creating the
0054:        // IndexReader to pass to the searcher myself.
0055:        // NOTE: as of Lucene 1.9, this has changed!
0056:        public class SolrIndexSearcher extends Searcher implements 
0057:                SolrInfoMBean {
0058:            private static Logger log = Logger
0059:                    .getLogger(SolrIndexSearcher.class.getName());
0060:
0061:            private final IndexSchema schema;
0062:
0063:            private final String name;
0064:            private long openTime = System.currentTimeMillis();
0065:            private long registerTime = 0;
0066:            private final IndexSearcher searcher;
0067:            private final IndexReader reader;
0068:            private final boolean closeReader;
0069:
0070:            private final boolean cachingEnabled;
0071:            private final SolrCache filterCache;
0072:            private final SolrCache queryResultCache;
0073:            private final SolrCache documentCache;
0074:
0075:            // map of generic caches - not synchronized since it's read-only after the constructor.
0076:            private final HashMap<String, SolrCache> cacheMap;
0077:            private static final HashMap<String, SolrCache> noGenericCaches = new HashMap<String, SolrCache>(
0078:                    0);
0079:
0080:            // list of all caches associated with this searcher.
0081:            private final SolrCache[] cacheList;
0082:            private static final SolrCache[] noCaches = new SolrCache[0];
0083:
0084:            /** Creates a searcher searching the index in the named directory. */
0085:            public SolrIndexSearcher(IndexSchema schema, String name,
0086:                    String path, boolean enableCache) throws IOException {
0087:                this (schema, name, IndexReader.open(path), true, enableCache);
0088:            }
0089:
0090:            /** Creates a searcher searching the index in the provided directory. */
0091:            public SolrIndexSearcher(IndexSchema schema, String name,
0092:                    Directory directory, boolean enableCache)
0093:                    throws IOException {
0094:                this (schema, name, IndexReader.open(directory), true,
0095:                        enableCache);
0096:            }
0097:
0098:            /** Creates a searcher searching the provided index. */
0099:            public SolrIndexSearcher(IndexSchema schema, String name,
0100:                    IndexReader r, boolean enableCache) {
0101:                this (schema, name, r, false, enableCache);
0102:            }
0103:
0104:            private SolrIndexSearcher(IndexSchema schema, String name,
0105:                    IndexReader r, boolean closeReader, boolean enableCache) {
0106:                this .schema = schema;
0107:                this .name = "Searcher@" + Integer.toHexString(hashCode())
0108:                        + (name != null ? " " + name : "");
0109:
0110:                log.info("Opening " + this .name);
0111:
0112:                reader = r;
0113:                searcher = new IndexSearcher(r);
0114:                this .closeReader = closeReader;
0115:                searcher.setSimilarity(schema.getSimilarity());
0116:
0117:                cachingEnabled = enableCache;
0118:                if (cachingEnabled) {
0119:                    ArrayList<SolrCache> clist = new ArrayList<SolrCache>();
0120:                    filterCache = filterCacheConfig == null ? null
0121:                            : filterCacheConfig.newInstance();
0122:                    if (filterCache != null)
0123:                        clist.add(filterCache);
0124:                    queryResultCache = queryResultCacheConfig == null ? null
0125:                            : queryResultCacheConfig.newInstance();
0126:                    if (queryResultCache != null)
0127:                        clist.add(queryResultCache);
0128:                    documentCache = documentCacheConfig == null ? null
0129:                            : documentCacheConfig.newInstance();
0130:                    if (documentCache != null)
0131:                        clist.add(documentCache);
0132:
0133:                    if (userCacheConfigs == null) {
0134:                        cacheMap = noGenericCaches;
0135:                    } else {
0136:                        cacheMap = new HashMap<String, SolrCache>(
0137:                                userCacheConfigs.length);
0138:                        for (CacheConfig userCacheConfig : userCacheConfigs) {
0139:                            SolrCache cache = null;
0140:                            if (userCacheConfig != null)
0141:                                cache = userCacheConfig.newInstance();
0142:                            if (cache != null) {
0143:                                cacheMap.put(cache.name(), cache);
0144:                                clist.add(cache);
0145:                            }
0146:                        }
0147:                    }
0148:
0149:                    cacheList = clist.toArray(new SolrCache[clist.size()]);
0150:                } else {
0151:                    filterCache = null;
0152:                    queryResultCache = null;
0153:                    documentCache = null;
0154:                    cacheMap = noGenericCaches;
0155:                    cacheList = noCaches;
0156:                }
0157:
0158:                // register self
0159:                SolrInfoRegistry.getRegistry().put(this .name, this );
0160:            }
0161:
0162:            public String toString() {
0163:                return name;
0164:            }
0165:
0166:            /** Register sub-objects such as caches
0167:             */
0168:            public void register() {
0169:                for (SolrCache cache : cacheList) {
0170:                    cache.setState(SolrCache.State.LIVE);
0171:                    SolrInfoRegistry.getRegistry().put(cache.name(), cache);
0172:                }
0173:                registerTime = System.currentTimeMillis();
0174:            }
0175:
0176:            /**
0177:             * Free's resources associated with this searcher.
0178:             *
0179:             * In particular, the underlying reader and any cache's in use are closed.
0180:             */
0181:            public void close() throws IOException {
0182:                // unregister first, so no management actions are tried on a closing searcher.
0183:                SolrInfoRegistry.getRegistry().remove(name);
0184:
0185:                if (cachingEnabled) {
0186:                    StringBuilder sb = new StringBuilder();
0187:                    sb.append("Closing ").append(name);
0188:                    for (SolrCache cache : cacheList) {
0189:                        sb.append("\n\t");
0190:                        sb.append(cache);
0191:                    }
0192:                    log.info(sb.toString());
0193:                } else {
0194:                    log.fine("Closing " + name);
0195:                }
0196:                try {
0197:                    searcher.close();
0198:                } finally {
0199:                    if (closeReader)
0200:                        reader.close();
0201:                    for (SolrCache cache : cacheList) {
0202:                        cache.close();
0203:                    }
0204:                }
0205:            }
0206:
0207:            /** Direct access to the IndexReader used by this searcher */
0208:            public IndexReader getReader() {
0209:                return reader;
0210:            }
0211:
0212:            /** Direct access to the IndexSchema for use with this searcher */
0213:            public IndexSchema getSchema() {
0214:                return schema;
0215:            }
0216:
0217:            // params for the "nutch" query optimizer
0218:            private static boolean filtOptEnabled = SolrConfig.config.getBool(
0219:                    "query/boolTofilterOptimizer/@enabled", false);
0220:            private static int filtOptCacheSize = SolrConfig.config.getInt(
0221:                    "query/boolTofilterOptimizer/@cacheSize", 32);
0222:            private static float filtOptThreshold = SolrConfig.config.getFloat(
0223:                    "query/boolTofilterOptimizer/@threshold", .05f);
0224:            private LuceneQueryOptimizer optimizer = filtOptEnabled ? new LuceneQueryOptimizer(
0225:                    filtOptCacheSize, filtOptThreshold)
0226:                    : null;
0227:
0228:            private static final CacheConfig filterCacheConfig = CacheConfig
0229:                    .getConfig("query/filterCache");
0230:            private static final CacheConfig queryResultCacheConfig = CacheConfig
0231:                    .getConfig("query/queryResultCache");
0232:            private static final CacheConfig documentCacheConfig = CacheConfig
0233:                    .getConfig("query/documentCache");
0234:            private static final CacheConfig[] userCacheConfigs = CacheConfig
0235:                    .getMultipleConfigs("query/cache");
0236:
0237:            //
0238:            // Set default regenerators on filter and query caches if they don't have any
0239:            //
0240:            static {
0241:                if (filterCacheConfig != null
0242:                        && filterCacheConfig.getRegenerator() == null) {
0243:                    filterCacheConfig.setRegenerator(new CacheRegenerator() {
0244:                        public boolean regenerateItem(
0245:                                SolrIndexSearcher newSearcher,
0246:                                SolrCache newCache, SolrCache oldCache,
0247:                                Object oldKey, Object oldVal)
0248:                                throws IOException {
0249:                            newSearcher
0250:                                    .cacheDocSet((Query) oldKey, null, false);
0251:                            return true;
0252:                        }
0253:                    });
0254:                }
0255:
0256:                if (queryResultCacheConfig != null
0257:                        && queryResultCacheConfig.getRegenerator() == null) {
0258:                    queryResultCacheConfig
0259:                            .setRegenerator(new CacheRegenerator() {
0260:                                public boolean regenerateItem(
0261:                                        SolrIndexSearcher newSearcher,
0262:                                        SolrCache newCache, SolrCache oldCache,
0263:                                        Object oldKey, Object oldVal)
0264:                                        throws IOException {
0265:                                    QueryResultKey key = (QueryResultKey) oldKey;
0266:                                    int nDocs = 1;
0267:                                    // request 1 doc and let caching round up to the next window size...
0268:                                    // unless the window size is <=1, in which case we will pick
0269:                                    // the minimum of the number of documents requested last time and
0270:                                    // a reasonable number such as 40.
0271:                                    // TODO: make more configurable later...
0272:
0273:                                    if (queryResultWindowSize <= 1) {
0274:                                        DocList oldList = (DocList) oldVal;
0275:                                        int oldnDocs = oldList.offset()
0276:                                                + oldList.size();
0277:                                        // 40 has factors of 2,4,5,10,20
0278:                                        nDocs = Math.min(oldnDocs, 40);
0279:                                    }
0280:
0281:                                    DocListAndSet ret = new DocListAndSet();
0282:                                    int flags = NO_CHECK_QCACHE | key.nc_flags;
0283:
0284:                                    newSearcher.getDocListC(ret, key.query,
0285:                                            key.filters, null, key.sort, 0,
0286:                                            nDocs, flags);
0287:                                    return true;
0288:                                }
0289:                            });
0290:                }
0291:            }
0292:
0293:            private static boolean useFilterForSortedQuery = SolrConfig.config
0294:                    .getBool("query/useFilterForSortedQuery", false);
0295:            private static int queryResultWindowSize = SolrConfig.config
0296:                    .getInt("query/queryResultWindowSize", 1);
0297:
0298:            public Hits search(Query query, Filter filter, Sort sort)
0299:                    throws IOException {
0300:                // todo - when Solr starts accepting filters, need to
0301:                // change this conditional check (filter!=null) and create a new filter
0302:                // that ANDs them together if it already exists.
0303:
0304:                if (optimizer == null || filter != null
0305:                        || !(query instanceof  BooleanQuery)) {
0306:                    return searcher.search(query, filter, sort);
0307:                } else {
0308:                    Query[] newQuery = new Query[1];
0309:                    Filter[] newFilter = new Filter[1];
0310:                    optimizer.optimize((BooleanQuery) query, searcher, 0,
0311:                            newQuery, newFilter);
0312:
0313:                    return searcher.search(newQuery[0], newFilter[0], sort);
0314:                }
0315:            }
0316:
0317:            public Hits search(Query query, Filter filter) throws IOException {
0318:                return searcher.search(query, filter);
0319:            }
0320:
0321:            public Hits search(Query query, Sort sort) throws IOException {
0322:                return searcher.search(query, sort);
0323:            }
0324:
0325:            public void search(Query query, HitCollector results)
0326:                    throws IOException {
0327:                searcher.search(query, results);
0328:            }
0329:
0330:            public void setSimilarity(Similarity similarity) {
0331:                searcher.setSimilarity(similarity);
0332:            }
0333:
0334:            public Similarity getSimilarity() {
0335:                return searcher.getSimilarity();
0336:            }
0337:
0338:            public int docFreq(Term term) throws IOException {
0339:                return searcher.docFreq(term);
0340:            }
0341:
0342:            /* ********************** Document retrieval *************************/
0343:
0344:            /* Future optimizations (yonik)
0345:             *
0346:             * If no cache is present:
0347:             *   - use NO_LOAD instead of LAZY_LOAD
0348:             *   - use LOAD_AND_BREAK if a single field is begin retrieved
0349:             */
0350:
0351:            /**
0352:             * FieldSelector which loads the specified fields, and load all other
0353:             * field lazily.
0354:             */
0355:            class SetNonLazyFieldSelector implements  FieldSelector {
0356:                private Set<String> fieldsToLoad;
0357:
0358:                SetNonLazyFieldSelector(Set<String> toLoad) {
0359:                    fieldsToLoad = toLoad;
0360:                }
0361:
0362:                public FieldSelectorResult accept(String fieldName) {
0363:                    if (fieldsToLoad.contains(fieldName))
0364:                        return FieldSelectorResult.LOAD;
0365:                    else
0366:                        return FieldSelectorResult.LAZY_LOAD;
0367:                }
0368:            }
0369:
0370:            /* solrconfig lazyfields setting */
0371:            public static final boolean enableLazyFieldLoading = SolrConfig.config
0372:                    .getBool("query/enableLazyFieldLoading", false);
0373:
0374:            /**
0375:             * Retrieve the {@link Document} instance corresponding to the document id.
0376:             */
0377:            public Document doc(int i) throws IOException {
0378:                return doc(i, (Set<String>) null);
0379:            }
0380:
0381:            /** Retrieve a {@link Document} using a {@link org.apache.lucene.document.FieldSelector}
0382:             * This method does not currently use the Solr document cache.
0383:             * 
0384:             * @see IndexReader#document(int, FieldSelector) */
0385:            public Document doc(int n, FieldSelector fieldSelector)
0386:                    throws IOException {
0387:                return searcher.getIndexReader().document(n, fieldSelector);
0388:            }
0389:
0390:            /**
0391:             * Retrieve the {@link Document} instance corresponding to the document id.
0392:             *
0393:             * Note: The document will have all fields accessable, but if a field
0394:             * filter is provided, only the provided fields will be loaded (the 
0395:             * remainder will be available lazily).
0396:             */
0397:            public Document doc(int i, Set<String> fields) throws IOException {
0398:
0399:                Document d;
0400:                if (documentCache != null) {
0401:                    d = (Document) documentCache.get(i);
0402:                    if (d != null)
0403:                        return d;
0404:                }
0405:
0406:                if (!enableLazyFieldLoading || fields == null) {
0407:                    d = searcher.getIndexReader().document(i);
0408:                } else {
0409:                    d = searcher.getIndexReader().document(i,
0410:                            new SetNonLazyFieldSelector(fields));
0411:                }
0412:
0413:                if (documentCache != null) {
0414:                    documentCache.put(i, d);
0415:                }
0416:
0417:                return d;
0418:            }
0419:
0420:            /**
0421:             * Takes a list of docs (the doc ids actually), and reads them into an array 
0422:             * of Documents.
0423:             */
0424:            public void readDocs(Document[] docs, DocList ids)
0425:                    throws IOException {
0426:                readDocs(docs, ids, null);
0427:            }
0428:
0429:            /**
0430:             * Takes a list of docs (the doc ids actually) and a set of fields to load,
0431:             * and reads them into an array of Documents.
0432:             */
0433:            public void readDocs(Document[] docs, DocList ids,
0434:                    Set<String> fields) throws IOException {
0435:                DocIterator iter = ids.iterator();
0436:                for (int i = 0; i < docs.length; i++) {
0437:                    docs[i] = doc(iter.nextDoc(), fields);
0438:                }
0439:            }
0440:
0441:            /* ********************** end document retrieval *************************/
0442:
0443:            public int maxDoc() throws IOException {
0444:                return searcher.maxDoc();
0445:            }
0446:
0447:            public TopDocs search(Weight weight, Filter filter, int i)
0448:                    throws IOException {
0449:                return searcher.search(weight, filter, i);
0450:            }
0451:
0452:            public void search(Weight weight, Filter filter,
0453:                    HitCollector hitCollector) throws IOException {
0454:                searcher.search(weight, filter, hitCollector);
0455:            }
0456:
0457:            public Query rewrite(Query original) throws IOException {
0458:                return searcher.rewrite(original);
0459:            }
0460:
0461:            public Explanation explain(Weight weight, int i) throws IOException {
0462:                return searcher.explain(weight, i);
0463:            }
0464:
0465:            public TopFieldDocs search(Weight weight, Filter filter, int i,
0466:                    Sort sort) throws IOException {
0467:                return searcher.search(weight, filter, i, sort);
0468:            }
0469:
0470:            ////////////////////////////////////////////////////////////////////////////////
0471:            ////////////////////////////////////////////////////////////////////////////////
0472:            ////////////////////////////////////////////////////////////////////////////////
0473:
0474:            /**
0475:             * Returns the first document number containing the term <code>t</code>
0476:             * Returns -1 if no document was found.
0477:             * This method is primarily intended for clients that want to fetch
0478:             * documents using a unique identifier."
0479:             * @param t
0480:             * @return the first document number containing the term
0481:             */
0482:            public int getFirstMatch(Term t) throws IOException {
0483:                TermDocs tdocs = null;
0484:                try {
0485:                    tdocs = reader.termDocs(t);
0486:                    if (!tdocs.next())
0487:                        return -1;
0488:                    return tdocs.doc();
0489:                } finally {
0490:                    if (tdocs != null)
0491:                        tdocs.close();
0492:                }
0493:            }
0494:
0495:            /**
0496:             * Compute and cache the DocSet that matches a query.
0497:             * The normal usage is expected to be cacheDocSet(myQuery, null,false)
0498:             * meaning that Solr will determine if the Query warrants caching, and
0499:             * if so, will compute the DocSet that matches the Query and cache it.
0500:             * If the answer to the query is already cached, nothing further will be done.
0501:             * <p>
0502:             * If the optionalAnswer DocSet is provided, it should *not* be modified
0503:             * after this call.
0504:             *
0505:             * @param query           the lucene query that will act as the key
0506:             * @param optionalAnswer   the DocSet to be cached - if null, it will be computed.
0507:             * @param mustCache        if true, a best effort will be made to cache this entry.
0508:             *                         if false, heuristics may be used to determine if it should be cached.
0509:             */
0510:            public void cacheDocSet(Query query, DocSet optionalAnswer,
0511:                    boolean mustCache) throws IOException {
0512:                // Even if the cache is null, still compute the DocSet as it may serve to warm the Lucene
0513:                // or OS disk cache.
0514:                if (optionalAnswer != null) {
0515:                    if (filterCache != null) {
0516:                        filterCache.put(query, optionalAnswer);
0517:                    }
0518:                    return;
0519:                }
0520:
0521:                // Throw away the result, relying on the fact that getDocSet
0522:                // will currently always cache what it found.  If getDocSet() starts
0523:                // using heuristics about what to cache, and mustCache==true, (or if we
0524:                // want this method to start using heuristics too) then
0525:                // this needs to change.
0526:                getDocSet(query);
0527:            }
0528:
0529:            /**
0530:             * Returns the set of document ids matching a query.
0531:             * This method is cache-aware and attempts to retrieve the answer from the cache if possible.
0532:             * If the answer was not cached, it may have been inserted into the cache as a result of this call.
0533:             * This method can handle negative queries.
0534:             * <p>
0535:             * The DocSet returned should <b>not</b> be modified.
0536:             */
0537:            public DocSet getDocSet(Query query) throws IOException {
0538:                // Get the absolute value (positive version) of this query.  If we
0539:                // get back the same reference, we know it's positive.
0540:                Query absQ = QueryUtils.getAbs(query);
0541:                boolean positive = query == absQ;
0542:
0543:                if (filterCache != null) {
0544:                    DocSet absAnswer = (DocSet) filterCache.get(absQ);
0545:                    if (absAnswer != null) {
0546:                        if (positive)
0547:                            return absAnswer;
0548:                        else
0549:                            return getPositiveDocSet(matchAllDocsQuery).andNot(
0550:                                    absAnswer);
0551:                    }
0552:                }
0553:
0554:                DocSet absAnswer = getDocSetNC(absQ, null);
0555:                DocSet answer = positive ? absAnswer : getPositiveDocSet(
0556:                        matchAllDocsQuery).andNot(absAnswer);
0557:
0558:                if (filterCache != null) {
0559:                    // cache negative queries as positive
0560:                    filterCache.put(absQ, absAnswer);
0561:                }
0562:
0563:                return answer;
0564:            }
0565:
0566:            // only handle positive (non negative) queries
0567:            DocSet getPositiveDocSet(Query q) throws IOException {
0568:                DocSet answer;
0569:                if (filterCache != null) {
0570:                    answer = (DocSet) filterCache.get(q);
0571:                    if (answer != null)
0572:                        return answer;
0573:                }
0574:                answer = getDocSetNC(q, null);
0575:                if (filterCache != null)
0576:                    filterCache.put(q, answer);
0577:                return answer;
0578:            }
0579:
0580:            private static Query matchAllDocsQuery = new MatchAllDocsQuery();
0581:
0582:            protected DocSet getDocSet(List<Query> queries) throws IOException {
0583:                if (queries == null)
0584:                    return null;
0585:                if (queries.size() == 1)
0586:                    return getDocSet(queries.get(0));
0587:                DocSet answer = null;
0588:
0589:                boolean[] neg = new boolean[queries.size()];
0590:                DocSet[] sets = new DocSet[queries.size()];
0591:
0592:                int smallestIndex = -1;
0593:                int smallestCount = Integer.MAX_VALUE;
0594:                for (int i = 0; i < sets.length; i++) {
0595:                    Query q = queries.get(i);
0596:                    Query posQuery = QueryUtils.getAbs(q);
0597:                    sets[i] = getPositiveDocSet(posQuery);
0598:                    // Negative query if absolute value different from original
0599:                    if (q == posQuery) {
0600:                        neg[i] = false;
0601:                        // keep track of the smallest positive set.
0602:                        // This optimization is only worth it if size() is cached, which it would
0603:                        // be if we don't do any set operations.
0604:                        int sz = sets[i].size();
0605:                        if (sz < smallestCount) {
0606:                            smallestCount = sz;
0607:                            smallestIndex = i;
0608:                            answer = sets[i];
0609:                        }
0610:                    } else {
0611:                        neg[i] = true;
0612:                    }
0613:                }
0614:
0615:                // if no positive queries, start off with all docs
0616:                if (answer == null)
0617:                    answer = getPositiveDocSet(matchAllDocsQuery);
0618:
0619:                // do negative queries first to shrink set size
0620:                for (int i = 0; i < sets.length; i++) {
0621:                    if (neg[i])
0622:                        answer = answer.andNot(sets[i]);
0623:                }
0624:
0625:                for (int i = 0; i < sets.length; i++) {
0626:                    if (!neg[i] && i != smallestIndex)
0627:                        answer = answer.intersection(sets[i]);
0628:                }
0629:
0630:                return answer;
0631:            }
0632:
0633:            // query must be positive
0634:            protected DocSet getDocSetNC(Query query, DocSet filter)
0635:                    throws IOException {
0636:                if (filter == null) {
0637:                    DocSetHitCollector hc = new DocSetHitCollector(maxDoc());
0638:                    if (query instanceof  TermQuery) {
0639:                        Term t = ((TermQuery) query).getTerm();
0640:                        TermDocs tdocs = null;
0641:                        try {
0642:                            tdocs = reader.termDocs(t);
0643:                            while (tdocs.next())
0644:                                hc.collect(tdocs.doc(), 0.0f);
0645:                        } finally {
0646:                            if (tdocs != null)
0647:                                tdocs.close();
0648:                        }
0649:                    } else {
0650:                        searcher.search(query, null, hc);
0651:                    }
0652:                    return hc.getDocSet();
0653:
0654:                } else {
0655:                    // FUTURE: if the filter is sorted by docid, could use skipTo (SkipQueryFilter)
0656:                    final DocSetHitCollector hc = new DocSetHitCollector(
0657:                            maxDoc());
0658:                    final DocSet filt = filter;
0659:                    searcher.search(query, null, new HitCollector() {
0660:                        public void collect(int doc, float score) {
0661:                            if (filt.exists(doc))
0662:                                hc.collect(doc, score);
0663:                        }
0664:                    });
0665:                    return hc.getDocSet();
0666:                }
0667:            }
0668:
0669:            /**
0670:             * Returns the set of document ids matching both the query and the filter.
0671:             * This method is cache-aware and attempts to retrieve the answer from the cache if possible.
0672:             * If the answer was not cached, it may have been inserted into the cache as a result of this call.
0673:             * <p>
0674:             *
0675:             * @param query
0676:             * @param filter may be null
0677:             * @return DocSet meeting the specified criteria, should <b>not</b> be modified by the caller.
0678:             */
0679:            public DocSet getDocSet(Query query, DocSet filter)
0680:                    throws IOException {
0681:                if (filter == null)
0682:                    return getDocSet(query);
0683:
0684:                // Negative query if absolute value different from original
0685:                Query absQ = QueryUtils.getAbs(query);
0686:                boolean positive = absQ == query;
0687:
0688:                DocSet first;
0689:                if (filterCache != null) {
0690:                    first = (DocSet) filterCache.get(absQ);
0691:                    if (first == null) {
0692:                        first = getDocSetNC(absQ, null);
0693:                        filterCache.put(absQ, first);
0694:                    }
0695:                    return positive ? first.intersection(filter) : filter
0696:                            .andNot(first);
0697:                }
0698:
0699:                // If there isn't a cache, then do a single filtered query if positive.
0700:                return positive ? getDocSetNC(absQ, filter) : filter
0701:                        .andNot(getPositiveDocSet(absQ));
0702:            }
0703:
0704:            /**
0705:             * Converts a filter into a DocSet.
0706:             * This method is not cache-aware and no caches are checked.
0707:             */
0708:            public DocSet convertFilter(Filter lfilter) throws IOException {
0709:                BitSet bs = lfilter.bits(this .reader);
0710:                OpenBitSet obs = new OpenBitSet(bs.size());
0711:                for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
0712:                    obs.fastSet(i);
0713:                }
0714:                return new BitDocSet(obs);
0715:            }
0716:
0717:            /**
0718:             * Returns documents matching both <code>query</code> and <code>filter</code>
0719:             * and sorted by <code>sort</code>.
0720:             * <p>
0721:             * This method is cache aware and may retrieve <code>filter</code> from
0722:             * the cache or make an insertion into the cache as a result of this call.
0723:             * <p>
0724:             * FUTURE: The returned DocList may be retrieved from a cache.
0725:             *
0726:             * @param query
0727:             * @param filter   may be null
0728:             * @param lsort    criteria by which to sort (if null, query relevance is used)
0729:             * @param offset   offset into the list of documents to return
0730:             * @param len      maximum number of documents to return
0731:             * @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
0732:             * @throws IOException
0733:             */
0734:            public DocList getDocList(Query query, Query filter, Sort lsort,
0735:                    int offset, int len) throws IOException {
0736:                List<Query> filterList = null;
0737:                if (filter != null) {
0738:                    filterList = new ArrayList<Query>(1);
0739:                    filterList.add(filter);
0740:                }
0741:                return getDocList(query, filterList, lsort, offset, len, 0);
0742:            }
0743:
0744:            /**
0745:             * Returns documents matching both <code>query</code> and the 
0746:             * intersection of the <code>filterList</code>, sorted by <code>sort</code>.
0747:             * <p>
0748:             * This method is cache aware and may retrieve <code>filter</code> from
0749:             * the cache or make an insertion into the cache as a result of this call.
0750:             * <p>
0751:             * FUTURE: The returned DocList may be retrieved from a cache.
0752:             *
0753:             * @param query
0754:             * @param filterList may be null
0755:             * @param lsort    criteria by which to sort (if null, query relevance is used)
0756:             * @param offset   offset into the list of documents to return
0757:             * @param len      maximum number of documents to return
0758:             * @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
0759:             * @throws IOException
0760:             */
0761:            public DocList getDocList(Query query, List<Query> filterList,
0762:                    Sort lsort, int offset, int len, int flags)
0763:                    throws IOException {
0764:                DocListAndSet answer = new DocListAndSet();
0765:                getDocListC(answer, query, filterList, null, lsort, offset,
0766:                        len, flags);
0767:                return answer.docList;
0768:            }
0769:
0770:            private static final int NO_CHECK_QCACHE = 0x80000000;
0771:            private static final int GET_DOCSET = 0x40000000;
0772:            private static final int NO_CHECK_FILTERCACHE = 0x20000000;
0773:
0774:            public static final int GET_SCORES = 0x01;
0775:
0776:            private void getDocListC(DocListAndSet out, Query query,
0777:                    List<Query> filterList, DocSet filter, Sort lsort,
0778:                    int offset, int len, int flags) throws IOException {
0779:                QueryResultKey key = null;
0780:                int maxDoc = offset + len;
0781:                int super setMaxDoc = maxDoc;
0782:                DocList super set;
0783:
0784:                // we can try and look up the complete query in the cache.
0785:                // we can't do that if filter!=null though (we don't want to
0786:                // do hashCode() and equals() for a big DocSet).
0787:                if (queryResultCache != null && filter == null) {
0788:                    // all of the current flags can be reused during warming,
0789:                    // so set all of them on the cache key.
0790:                    key = new QueryResultKey(query, filterList, lsort, flags);
0791:                    if ((flags & NO_CHECK_QCACHE) == 0) {
0792:                        super set = (DocList) queryResultCache.get(key);
0793:
0794:                        if (super set != null) {
0795:                            // check that the cache entry has scores recorded if we need them
0796:                            if ((flags & GET_SCORES) == 0
0797:                                    || super set.hasScores()) {
0798:                                out.docList = super set.subset(offset, len);
0799:                            }
0800:                        }
0801:                        if (out.docList != null) {
0802:                            // found the docList in the cache... now check if we need the docset too.
0803:                            // OPT: possible future optimization - if the doclist contains all the matches,
0804:                            // use it to make the docset instead of rerunning the query.
0805:                            if (out.docSet == null
0806:                                    && ((flags & GET_DOCSET) != 0)) {
0807:                                if (filterList == null) {
0808:                                    out.docSet = getDocSet(query);
0809:                                } else {
0810:                                    List<Query> newList = new ArrayList<Query>(
0811:                                            filterList.size() + 1);
0812:                                    newList.add(query);
0813:                                    newList.addAll(filterList);
0814:                                    out.docSet = getDocSet(newList);
0815:                                }
0816:                            }
0817:                            return;
0818:                        }
0819:                    }
0820:
0821:                    // If we are going to generate the result, bump up to the
0822:                    // next resultWindowSize for better caching.
0823:
0824:                    // handle 0 special case as well as avoid idiv in the common case.
0825:                    if (maxDoc < queryResultWindowSize) {
0826:                        super setMaxDoc = queryResultWindowSize;
0827:                    } else {
0828:                        super setMaxDoc = ((maxDoc - 1) / queryResultWindowSize + 1)
0829:                                * queryResultWindowSize;
0830:                    }
0831:                }
0832:
0833:                // OK, so now we need to generate an answer.
0834:                // One way to do that would be to check if we have an unordered list
0835:                // of results for the base query.  If so, we can apply the filters and then
0836:                // sort by the resulting set.  This can only be used if:
0837:                // - the sort doesn't contain score
0838:                // - we don't want score returned.
0839:
0840:                // check if we should try and use the filter cache
0841:                boolean useFilterCache = false;
0842:                if ((flags & (GET_SCORES | NO_CHECK_FILTERCACHE)) == 0
0843:                        && useFilterForSortedQuery && lsort != null
0844:                        && filterCache != null) {
0845:                    useFilterCache = true;
0846:                    SortField[] sfields = lsort.getSort();
0847:                    for (SortField sf : sfields) {
0848:                        if (sf.getType() == SortField.SCORE) {
0849:                            useFilterCache = false;
0850:                            break;
0851:                        }
0852:                    }
0853:                }
0854:
0855:                if (useFilterCache) {
0856:                    // now actually use the filter cache.
0857:                    // for large filters that match few documents, this may be
0858:                    // slower than simply re-executing the query.
0859:                    if (out.docSet == null) {
0860:                        out.docSet = getDocSet(query, filter);
0861:                        DocSet bigFilt = getDocSet(filterList);
0862:                        if (bigFilt != null)
0863:                            out.docSet = out.docSet.intersection(bigFilt);
0864:                    }
0865:                    // todo: there could be a sortDocSet that could take a list of
0866:                    // the filters instead of anding them first...
0867:                    // perhaps there should be a multi-docset-iterator
0868:                    super set = sortDocSet(out.docSet, lsort, super setMaxDoc);
0869:                    out.docList = super set.subset(offset, len);
0870:                } else {
0871:                    // do it the normal way...
0872:                    DocSet theFilt = filter != null ? filter
0873:                            : getDocSet(filterList);
0874:
0875:                    if ((flags & GET_DOCSET) != 0) {
0876:                        DocSet qDocSet = getDocListAndSetNC(out, query,
0877:                                theFilt, lsort, 0, super setMaxDoc, flags);
0878:                        // cache the docSet matching the query w/o filtering
0879:                        if (filterCache != null)
0880:                            filterCache.put(query, qDocSet);
0881:                    } else {
0882:                        out.docList = getDocListNC(query, theFilt, lsort, 0,
0883:                                super setMaxDoc, flags);
0884:                    }
0885:                    super set = out.docList;
0886:                    out.docList = super set.subset(offset, len);
0887:                }
0888:
0889:                // lastly, put the superset in the cache
0890:                if (key != null) {
0891:                    queryResultCache.put(key, super set);
0892:                }
0893:            }
0894:
0895:            private DocList getDocListNC(Query query, DocSet filter,
0896:                    Sort lsort, int offset, int len, int flags)
0897:                    throws IOException {
0898:                final int lastDocRequested = offset + len;
0899:                int nDocsReturned;
0900:                int totalHits;
0901:                float maxScore;
0902:                int[] ids;
0903:                float[] scores;
0904:
0905:                query = QueryUtils.makeQueryable(query);
0906:
0907:                // handle zero case...
0908:                if (lastDocRequested <= 0) {
0909:                    final DocSet filt = filter;
0910:                    final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
0911:                    final int[] numHits = new int[1];
0912:
0913:                    searcher.search(query, new HitCollector() {
0914:                        public void collect(int doc, float score) {
0915:                            if (filt != null && !filt.exists(doc))
0916:                                return;
0917:                            numHits[0]++;
0918:                            if (score > topscore[0])
0919:                                topscore[0] = score;
0920:                        }
0921:                    });
0922:
0923:                    nDocsReturned = 0;
0924:                    ids = new int[nDocsReturned];
0925:                    scores = new float[nDocsReturned];
0926:                    totalHits = numHits[0];
0927:                    maxScore = totalHits > 0 ? topscore[0] : 0.0f;
0928:                } else if (lsort != null) {
0929:                    // can't use TopDocs if there is a sort since it
0930:                    // will do automatic score normalization.
0931:                    // NOTE: this changed late in Lucene 1.9
0932:
0933:                    final DocSet filt = filter;
0934:                    final int[] numHits = new int[1];
0935:                    final FieldSortedHitQueue hq = new FieldSortedHitQueue(
0936:                            reader, lsort.getSort(), offset + len);
0937:
0938:                    searcher.search(query, new HitCollector() {
0939:                        public void collect(int doc, float score) {
0940:                            if (filt != null && !filt.exists(doc))
0941:                                return;
0942:                            numHits[0]++;
0943:                            hq.insert(new FieldDoc(doc, score));
0944:                        }
0945:                    });
0946:
0947:                    totalHits = numHits[0];
0948:                    maxScore = totalHits > 0 ? hq.getMaxScore() : 0.0f;
0949:
0950:                    nDocsReturned = hq.size();
0951:                    ids = new int[nDocsReturned];
0952:                    scores = (flags & GET_SCORES) != 0 ? new float[nDocsReturned]
0953:                            : null;
0954:                    for (int i = nDocsReturned - 1; i >= 0; i--) {
0955:                        FieldDoc fieldDoc = (FieldDoc) hq.pop();
0956:                        // fillFields is the point where score normalization happens
0957:                        // hq.fillFields(fieldDoc)
0958:                        ids[i] = fieldDoc.doc;
0959:                        if (scores != null)
0960:                            scores[i] = fieldDoc.score;
0961:                    }
0962:                } else {
0963:                    // No Sort specified (sort by score descending)
0964:                    // This case could be done with TopDocs, but would currently require
0965:                    // getting a BitSet filter from a DocSet which may be inefficient.
0966:
0967:                    final DocSet filt = filter;
0968:                    final ScorePriorityQueue hq = new ScorePriorityQueue(
0969:                            lastDocRequested);
0970:                    final int[] numHits = new int[1];
0971:                    searcher.search(query, new HitCollector() {
0972:                        float minScore = Float.NEGATIVE_INFINITY; // minimum score in the priority queue
0973:
0974:                        public void collect(int doc, float score) {
0975:                            if (filt != null && !filt.exists(doc))
0976:                                return;
0977:                            if (numHits[0]++ < lastDocRequested
0978:                                    || score >= minScore) {
0979:                                // TODO: if docs are always delivered in order, we could use "score>minScore"
0980:                                // instead of "score>=minScore" and avoid tiebreaking scores
0981:                                // in the priority queue.
0982:                                // but might BooleanScorer14 might still be used and deliver docs out-of-order?
0983:                                hq.insert(new ScoreDoc(doc, score));
0984:                                minScore = ((ScoreDoc) hq.top()).score;
0985:                            }
0986:                        }
0987:                    });
0988:
0989:                    totalHits = numHits[0];
0990:                    nDocsReturned = hq.size();
0991:                    ids = new int[nDocsReturned];
0992:                    scores = (flags & GET_SCORES) != 0 ? new float[nDocsReturned]
0993:                            : null;
0994:                    ScoreDoc sdoc = null;
0995:                    for (int i = nDocsReturned - 1; i >= 0; i--) {
0996:                        sdoc = (ScoreDoc) hq.pop();
0997:                        ids[i] = sdoc.doc;
0998:                        if (scores != null)
0999:                            scores[i] = sdoc.score;
1000:                    }
1001:                    maxScore = sdoc == null ? 0.0f : sdoc.score;
1002:                }
1003:
1004:                int sliceLen = Math.min(lastDocRequested, nDocsReturned)
1005:                        - offset;
1006:                if (sliceLen < 0)
1007:                    sliceLen = 0;
1008:                return new DocSlice(offset, sliceLen, ids, scores, totalHits,
1009:                        maxScore);
1010:
1011:                /**************** older implementation using TopDocs *******************
1012:
1013:
1014:                  Filter lfilter=null;
1015:                  if (filter != null) {
1016:                    final BitSet bits = filter.getBits();   // avoid if possible
1017:                    lfilter = new Filter() {
1018:                      public BitSet bits(IndexReader reader)  {
1019:                        return bits;
1020:                      }
1021:                    };
1022:                  }
1023:
1024:                  int lastDocRequested=offset+len;
1025:
1026:                  // lucene doesn't allow 0 to be passed for nDocs
1027:                  if (lastDocRequested==0) lastDocRequested=1;
1028:
1029:                  // TopFieldDocs sortedDocs;  // use TopDocs so both versions can use it
1030:                  TopDocs sortedDocs;
1031:                  if (lsort!=null) {
1032:                     sortedDocs = searcher.search(query, lfilter, lastDocRequested, lsort);
1033:                  } else {
1034:                     sortedDocs = searcher.search(query, lfilter, lastDocRequested);
1035:                  }
1036:
1037:                  int nDocsReturned = sortedDocs.scoreDocs.length;
1038:                  int[] docs = new int[nDocsReturned];
1039:                  for (int i=0; i<nDocsReturned; i++) {
1040:                    docs[i] = sortedDocs.scoreDocs[i].doc;
1041:                  }
1042:                  float[] scores=null;
1043:                  float maxScore=0.0f;
1044:                  if ((flags & GET_SCORES) != 0) {
1045:                    scores = new float[nDocsReturned];
1046:                    for (int i=0; i<nDocsReturned; i++) {
1047:                      scores[i] = sortedDocs.scoreDocs[i].score;
1048:                    }
1049:                    if (nDocsReturned>0) {
1050:                      maxScore=sortedDocs.scoreDocs[0].score;
1051:                    }
1052:                  }
1053:                  int sliceLen = Math.min(offset+len,nDocsReturned) - offset;
1054:                  if (sliceLen < 0) sliceLen=0;
1055:                  return new DocSlice(offset,sliceLen,docs,scores,sortedDocs.totalHits, maxScore);
1056:
1057:                 **********************************************************************************/
1058:
1059:            }
1060:
1061:            // the DocSet returned is for the query only, without any filtering... that way it may
1062:            // be cached if desired.
1063:            private DocSet getDocListAndSetNC(DocListAndSet out, Query query,
1064:                    DocSet filter, Sort lsort, int offset, int len, int flags)
1065:                    throws IOException {
1066:                final int lastDocRequested = offset + len;
1067:                int nDocsReturned;
1068:                int totalHits;
1069:                float maxScore;
1070:                int[] ids;
1071:                float[] scores;
1072:                final DocSetHitCollector setHC = new DocSetHitCollector(
1073:                        maxDoc());
1074:
1075:                query = QueryUtils.makeQueryable(query);
1076:
1077:                // TODO: perhaps unify getDocListAndSetNC and getDocListNC without imposing a significant performance hit
1078:
1079:                // Comment: gathering the set before the filter is applied allows one to cache
1080:                // the resulting DocSet under the query.  The drawback is that it requires an
1081:                // extra intersection with the filter at the end.  This will be a net win
1082:                // for expensive queries.
1083:
1084:                // Q: what if the final intersection results in a small set from two large
1085:                // sets... it won't be a HashDocSet or other small set.  One way around
1086:                // this would be to collect the resulting set as we go (the filter is
1087:                // checked anyway).
1088:
1089:                // handle zero case...
1090:                if (lastDocRequested <= 0) {
1091:                    final DocSet filt = filter;
1092:                    final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
1093:                    final int[] numHits = new int[1];
1094:
1095:                    searcher.search(query, new HitCollector() {
1096:                        public void collect(int doc, float score) {
1097:                            setHC.collect(doc, score);
1098:                            if (filt != null && !filt.exists(doc))
1099:                                return;
1100:                            numHits[0]++;
1101:                            if (score > topscore[0])
1102:                                topscore[0] = score;
1103:                        }
1104:                    });
1105:
1106:                    nDocsReturned = 0;
1107:                    ids = new int[nDocsReturned];
1108:                    scores = new float[nDocsReturned];
1109:                    totalHits = numHits[0];
1110:                    maxScore = totalHits > 0 ? topscore[0] : 0.0f;
1111:                } else if (lsort != null) {
1112:                    // can't use TopDocs if there is a sort since it
1113:                    // will do automatic score normalization.
1114:                    // NOTE: this changed late in Lucene 1.9
1115:
1116:                    final DocSet filt = filter;
1117:                    final int[] numHits = new int[1];
1118:                    final FieldSortedHitQueue hq = new FieldSortedHitQueue(
1119:                            reader, lsort.getSort(), offset + len);
1120:
1121:                    searcher.search(query, new HitCollector() {
1122:                        public void collect(int doc, float score) {
1123:                            setHC.collect(doc, score);
1124:                            if (filt != null && !filt.exists(doc))
1125:                                return;
1126:                            numHits[0]++;
1127:                            hq.insert(new FieldDoc(doc, score));
1128:                        }
1129:                    });
1130:
1131:                    totalHits = numHits[0];
1132:                    maxScore = totalHits > 0 ? hq.getMaxScore() : 0.0f;
1133:
1134:                    nDocsReturned = hq.size();
1135:                    ids = new int[nDocsReturned];
1136:                    scores = (flags & GET_SCORES) != 0 ? new float[nDocsReturned]
1137:                            : null;
1138:                    for (int i = nDocsReturned - 1; i >= 0; i--) {
1139:                        FieldDoc fieldDoc = (FieldDoc) hq.pop();
1140:                        // fillFields is the point where score normalization happens
1141:                        // hq.fillFields(fieldDoc)
1142:                        ids[i] = fieldDoc.doc;
1143:                        if (scores != null)
1144:                            scores[i] = fieldDoc.score;
1145:                    }
1146:                } else {
1147:                    // No Sort specified (sort by score descending)
1148:                    // This case could be done with TopDocs, but would currently require
1149:                    // getting a BitSet filter from a DocSet which may be inefficient.
1150:
1151:                    final DocSet filt = filter;
1152:                    final ScorePriorityQueue hq = new ScorePriorityQueue(
1153:                            lastDocRequested);
1154:                    final int[] numHits = new int[1];
1155:                    searcher.search(query, new HitCollector() {
1156:                        float minScore = Float.NEGATIVE_INFINITY; // minimum score in the priority queue
1157:
1158:                        public void collect(int doc, float score) {
1159:                            setHC.collect(doc, score);
1160:                            if (filt != null && !filt.exists(doc))
1161:                                return;
1162:                            if (numHits[0]++ < lastDocRequested
1163:                                    || score >= minScore) {
1164:                                // if docs are always delivered in order, we could use "score>minScore"
1165:                                // but might BooleanScorer14 might still be used and deliver docs out-of-order?
1166:                                hq.insert(new ScoreDoc(doc, score));
1167:                                minScore = ((ScoreDoc) hq.top()).score;
1168:                            }
1169:                        }
1170:                    });
1171:
1172:                    totalHits = numHits[0];
1173:                    nDocsReturned = hq.size();
1174:                    ids = new int[nDocsReturned];
1175:                    scores = (flags & GET_SCORES) != 0 ? new float[nDocsReturned]
1176:                            : null;
1177:                    ScoreDoc sdoc = null;
1178:                    for (int i = nDocsReturned - 1; i >= 0; i--) {
1179:                        sdoc = (ScoreDoc) hq.pop();
1180:                        ids[i] = sdoc.doc;
1181:                        if (scores != null)
1182:                            scores[i] = sdoc.score;
1183:                    }
1184:                    maxScore = sdoc == null ? 0.0f : sdoc.score;
1185:                }
1186:
1187:                int sliceLen = Math.min(lastDocRequested, nDocsReturned)
1188:                        - offset;
1189:                if (sliceLen < 0)
1190:                    sliceLen = 0;
1191:                out.docList = new DocSlice(offset, sliceLen, ids, scores,
1192:                        totalHits, maxScore);
1193:                DocSet qDocSet = setHC.getDocSet();
1194:                out.docSet = filter == null ? qDocSet : qDocSet
1195:                        .intersection(filter);
1196:                return qDocSet;
1197:            }
1198:
1199:            /**
1200:             * Returns documents matching both <code>query</code> and <code>filter</code>
1201:             * and sorted by <code>sort</code>.
1202:             * FUTURE: The returned DocList may be retrieved from a cache.
1203:             *
1204:             * @param query
1205:             * @param filter   may be null
1206:             * @param lsort    criteria by which to sort (if null, query relevance is used)
1207:             * @param offset   offset into the list of documents to return
1208:             * @param len      maximum number of documents to return
1209:             * @return DocList meeting the specified criteria, should <b>not</b> be modified by the caller.
1210:             * @throws IOException
1211:             */
1212:            public DocList getDocList(Query query, DocSet filter, Sort lsort,
1213:                    int offset, int len) throws IOException {
1214:                DocListAndSet answer = new DocListAndSet();
1215:                getDocListC(answer, query, null, filter, lsort, offset, len, 0);
1216:                return answer.docList;
1217:            }
1218:
1219:            /**
1220:             * Returns documents matching both <code>query</code> and <code>filter</code>
1221:             * and sorted by <code>sort</code>.  Also returns the compete set of documents
1222:             * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
1223:             * <p>
1224:             * This method is cache aware and may retrieve <code>filter</code> from
1225:             * the cache or make an insertion into the cache as a result of this call.
1226:             * <p>
1227:             * FUTURE: The returned DocList may be retrieved from a cache.
1228:             * <p>
1229:             * The DocList and DocSet returned should <b>not</b> be modified.
1230:             *
1231:             * @param query
1232:             * @param filter   may be null
1233:             * @param lsort    criteria by which to sort (if null, query relevance is used)
1234:             * @param offset   offset into the list of documents to return
1235:             * @param len      maximum number of documents to return
1236:             * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
1237:             * @throws IOException
1238:             */
1239:            public DocListAndSet getDocListAndSet(Query query, Query filter,
1240:                    Sort lsort, int offset, int len) throws IOException {
1241:                List<Query> filterList = buildQueryList(filter);
1242:                return getDocListAndSet(query, filterList, lsort, offset, len);
1243:
1244:            }
1245:
1246:            /**
1247:             * Returns documents matching both <code>query</code> and <code>filter</code>
1248:             * and sorted by <code>sort</code>.  Also returns the compete set of documents
1249:             * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
1250:             * <p>
1251:             * This method is cache aware and may retrieve <code>filter</code> from
1252:             * the cache or make an insertion into the cache as a result of this call.
1253:             * <p>
1254:             * FUTURE: The returned DocList may be retrieved from a cache.
1255:             * <p>
1256:             * The DocList and DocSet returned should <b>not</b> be modified.
1257:             *
1258:             * @param query
1259:             * @param filter   may be null
1260:             * @param lsort    criteria by which to sort (if null, query relevance is used)
1261:             * @param offset   offset into the list of documents to return
1262:             * @param len      maximum number of documents to return
1263:             * @param flags    user supplied flags for the result set
1264:             * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
1265:             * @throws IOException
1266:             */
1267:            public DocListAndSet getDocListAndSet(Query query, Query filter,
1268:                    Sort lsort, int offset, int len, int flags)
1269:                    throws IOException {
1270:                List<Query> filterList = buildQueryList(filter);
1271:                return getDocListAndSet(query, filterList, lsort, offset, len,
1272:                        flags);
1273:            }
1274:
1275:            /**
1276:             * A simple utility method for to build a filterList from a query
1277:             * @param filter
1278:             */
1279:            private List<Query> buildQueryList(Query filter) {
1280:                List<Query> filterList = null;
1281:                if (filter != null) {
1282:                    filterList = new ArrayList<Query>(2);
1283:                    filterList.add(filter);
1284:                }
1285:                return filterList;
1286:            }
1287:
1288:            /**
1289:             * Returns documents matching both <code>query</code> and the intersection 
1290:             * of <code>filterList</code>, sorted by <code>sort</code>.  
1291:             * Also returns the compete set of documents
1292:             * matching <code>query</code> and <code>filter</code> 
1293:             * (regardless of <code>offset</code> and <code>len</code>).
1294:             * <p>
1295:             * This method is cache aware and may retrieve <code>filter</code> from
1296:             * the cache or make an insertion into the cache as a result of this call.
1297:             * <p>
1298:             * FUTURE: The returned DocList may be retrieved from a cache.
1299:             * <p>
1300:             * The DocList and DocSet returned should <b>not</b> be modified.
1301:             *
1302:             * @param query
1303:             * @param filterList   may be null
1304:             * @param lsort    criteria by which to sort (if null, query relevance is used)
1305:             * @param offset   offset into the list of documents to return
1306:             * @param len      maximum number of documents to return
1307:             * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
1308:             * @throws IOException
1309:             */
1310:            public DocListAndSet getDocListAndSet(Query query,
1311:                    List<Query> filterList, Sort lsort, int offset, int len)
1312:                    throws IOException {
1313:                DocListAndSet ret = new DocListAndSet();
1314:                getDocListC(ret, query, filterList, null, lsort, offset, len,
1315:                        GET_DOCSET);
1316:                return ret;
1317:            }
1318:
1319:            /**
1320:             * Returns documents matching both <code>query</code> and the intersection 
1321:             * of <code>filterList</code>, sorted by <code>sort</code>.  
1322:             * Also returns the compete set of documents
1323:             * matching <code>query</code> and <code>filter</code> 
1324:             * (regardless of <code>offset</code> and <code>len</code>).
1325:             * <p>
1326:             * This method is cache aware and may retrieve <code>filter</code> from
1327:             * the cache or make an insertion into the cache as a result of this call.
1328:             * <p>
1329:             * FUTURE: The returned DocList may be retrieved from a cache.
1330:             * <p>
1331:             * The DocList and DocSet returned should <b>not</b> be modified.
1332:             *
1333:             * @param query
1334:             * @param filterList   may be null
1335:             * @param lsort    criteria by which to sort (if null, query relevance is used)
1336:             * @param offset   offset into the list of documents to return
1337:             * @param len      maximum number of documents to return
1338:             * @param flags    user supplied flags for the result set
1339:             * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
1340:             * @throws IOException
1341:             */
1342:            public DocListAndSet getDocListAndSet(Query query,
1343:                    List<Query> filterList, Sort lsort, int offset, int len,
1344:                    int flags) throws IOException {
1345:                DocListAndSet ret = new DocListAndSet();
1346:                getDocListC(ret, query, filterList, null, lsort, offset, len,
1347:                        flags |= GET_DOCSET);
1348:                return ret;
1349:            }
1350:
1351:            /**
1352:             * Returns documents matching both <code>query</code> and <code>filter</code>
1353:             * and sorted by <code>sort</code>. Also returns the compete set of documents
1354:             * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
1355:             * <p>
1356:             * FUTURE: The returned DocList may be retrieved from a cache.
1357:             *
1358:             * @param query
1359:             * @param filter   may be null
1360:             * @param lsort    criteria by which to sort (if null, query relevance is used)
1361:             * @param offset   offset into the list of documents to return
1362:             * @param len      maximum number of documents to return
1363:             * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
1364:             * @throws IOException
1365:             */
1366:            public DocListAndSet getDocListAndSet(Query query, DocSet filter,
1367:                    Sort lsort, int offset, int len) throws IOException {
1368:                DocListAndSet ret = new DocListAndSet();
1369:                getDocListC(ret, query, null, filter, lsort, offset, len,
1370:                        GET_DOCSET);
1371:                return ret;
1372:            }
1373:
1374:            /**
1375:             * Returns documents matching both <code>query</code> and <code>filter</code>
1376:             * and sorted by <code>sort</code>.  Also returns the compete set of documents
1377:             * matching <code>query</code> and <code>filter</code> (regardless of <code>offset</code> and <code>len</code>).
1378:             * <p>
1379:             * This method is cache aware and may make an insertion into the cache 
1380:             * as a result of this call.
1381:             * <p>
1382:             * FUTURE: The returned DocList may be retrieved from a cache.
1383:             * <p>
1384:             * The DocList and DocSet returned should <b>not</b> be modified.
1385:             *
1386:             * @param query
1387:             * @param filter   may be null
1388:             * @param lsort    criteria by which to sort (if null, query relevance is used)
1389:             * @param offset   offset into the list of documents to return
1390:             * @param len      maximum number of documents to return
1391:             * @param flags    user supplied flags for the result set
1392:             * @return DocListAndSet meeting the specified criteria, should <b>not</b> be modified by the caller.
1393:             * @throws IOException
1394:             */
1395:            public DocListAndSet getDocListAndSet(Query query, DocSet filter,
1396:                    Sort lsort, int offset, int len, int flags)
1397:                    throws IOException {
1398:                DocListAndSet ret = new DocListAndSet();
1399:                getDocListC(ret, query, null, filter, lsort, offset, len,
1400:                        flags |= GET_DOCSET);
1401:                return ret;
1402:            }
1403:
1404:            protected DocList sortDocSet(DocSet set, Sort sort, int nDocs)
1405:                    throws IOException {
1406:                final FieldSortedHitQueue hq = new FieldSortedHitQueue(reader,
1407:                        sort.getSort(), nDocs);
1408:                DocIterator iter = set.iterator();
1409:                int hits = 0;
1410:                while (iter.hasNext()) {
1411:                    int doc = iter.nextDoc();
1412:                    hits++; // could just use set.size(), but that would be slower for a bitset
1413:                    hq.insert(new FieldDoc(doc, 1.0f));
1414:                }
1415:
1416:                int numCollected = hq.size();
1417:                int[] ids = new int[numCollected];
1418:                for (int i = numCollected - 1; i >= 0; i--) {
1419:                    FieldDoc fieldDoc = (FieldDoc) hq.pop();
1420:                    // hq.fillFields(fieldDoc)  // optional, if we need that info
1421:                    ids[i] = fieldDoc.doc;
1422:                }
1423:
1424:                return new DocSlice(0, numCollected, ids, null, hits, 0.0f);
1425:            }
1426:
1427:            /**
1428:             * Returns the number of documents that match both <code>a</code> and <code>b</code>.
1429:             * <p>
1430:             * This method is cache-aware and may check as well as modify the cache.
1431:             *
1432:             * @param a
1433:             * @param b
1434:             * @return the numer of documents in the intersection between <code>a</code> and <code>b</code>.
1435:             * @throws IOException
1436:             */
1437:            public int numDocs(Query a, DocSet b) throws IOException {
1438:                // Negative query if absolute value different from original
1439:                Query absQ = QueryUtils.getAbs(a);
1440:                DocSet positiveA = getPositiveDocSet(absQ);
1441:                return a == absQ ? b.intersectionSize(positiveA) : b
1442:                        .andNotSize(positiveA);
1443:            }
1444:
1445:            /**
1446:             * Returns the number of documents that match both <code>a</code> and <code>b</code>.
1447:             * <p>
1448:             * This method is cache-aware and may check as well as modify the cache.
1449:             *
1450:             * @param a
1451:             * @param b
1452:             * @return the numer of documents in the intersection between <code>a</code> and <code>b</code>.
1453:             * @throws IOException
1454:             */
1455:            public int numDocs(Query a, Query b) throws IOException {
1456:                Query absA = QueryUtils.getAbs(a);
1457:                Query absB = QueryUtils.getAbs(b);
1458:                DocSet positiveA = getPositiveDocSet(absA);
1459:                DocSet positiveB = getPositiveDocSet(absB);
1460:
1461:                // Negative query if absolute value different from original
1462:                if (a == absA) {
1463:                    if (b == absB)
1464:                        return positiveA.intersectionSize(positiveB);
1465:                    return positiveA.andNotSize(positiveB);
1466:                }
1467:                if (b == absB)
1468:                    return positiveB.andNotSize(positiveA);
1469:
1470:                // if both negative, we need to create a temp DocSet since we
1471:                // don't have a counting method that takes three.
1472:                DocSet all = getPositiveDocSet(matchAllDocsQuery);
1473:
1474:                // -a -b == *:*.andNot(a).andNotSize(b) == *.*.andNotSize(a.union(b))
1475:                // we use the last form since the intermediate DocSet should normally be smaller.
1476:                return all.andNotSize(positiveA.union(positiveB));
1477:            }
1478:
1479:            /**
1480:             * Takes a list of docs (the doc ids actually), and returns an array 
1481:             * of Documents containing all of the stored fields.
1482:             */
1483:            public Document[] readDocs(DocList ids) throws IOException {
1484:                Document[] docs = new Document[ids.size()];
1485:                readDocs(docs, ids);
1486:                return docs;
1487:            }
1488:
1489:            /**
1490:             * Warm this searcher based on an old one (primarily for auto-cache warming).
1491:             */
1492:            public void warm(SolrIndexSearcher old) throws IOException {
1493:                // Make sure this is first!  filters can help queryResults execute!
1494:                boolean logme = log.isLoggable(Level.INFO);
1495:
1496:                // warm the caches in order...
1497:                for (int i = 0; i < cacheList.length; i++) {
1498:                    if (logme)
1499:                        log.info("autowarming " + this  + " from " + old
1500:                                + "\n\t" + old.cacheList[i]);
1501:                    this .cacheList[i].warm(this , old.cacheList[i]);
1502:                    if (logme)
1503:                        log.info("autowarming result for " + this  + "\n\t"
1504:                                + this .cacheList[i]);
1505:                }
1506:            }
1507:
1508:            /**
1509:             * return the named generic cache
1510:             */
1511:            public SolrCache getCache(String cacheName) {
1512:                return cacheMap.get(cacheName);
1513:            }
1514:
1515:            /**
1516:             * lookup an entry in a generic cache
1517:             */
1518:            public Object cacheLookup(String cacheName, Object key) {
1519:                SolrCache cache = cacheMap.get(cacheName);
1520:                return cache == null ? null : cache.get(key);
1521:            }
1522:
1523:            /**
1524:             * insert an entry in a generic cache
1525:             */
1526:            public Object cacheInsert(String cacheName, Object key, Object val) {
1527:                SolrCache cache = cacheMap.get(cacheName);
1528:                return cache == null ? null : cache.put(key, val);
1529:            }
1530:
1531:            /////////////////////////////////////////////////////////////////////
1532:            // SolrInfoMBean stuff: Statistics and Module Info
1533:            /////////////////////////////////////////////////////////////////////
1534:
1535:            public String getName() {
1536:                return SolrIndexSearcher.class.getName();
1537:            }
1538:
1539:            public String getVersion() {
1540:                return SolrCore.version;
1541:            }
1542:
1543:            public String getDescription() {
1544:                return "index searcher";
1545:            }
1546:
1547:            public Category getCategory() {
1548:                return Category.CORE;
1549:            }
1550:
1551:            public String getSourceId() {
1552:                return "$Id: SolrIndexSearcher.java 541944 2007-05-26 22:29:12Z hossman $";
1553:            }
1554:
1555:            public String getSource() {
1556:                return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.2/src/java/org/apache/solr/search/SolrIndexSearcher.java $";
1557:            }
1558:
1559:            public URL[] getDocs() {
1560:                return null;
1561:            }
1562:
1563:            public NamedList getStatistics() {
1564:                NamedList lst = new SimpleOrderedMap();
1565:                lst.add("caching", cachingEnabled);
1566:                lst.add("numDocs", reader.numDocs());
1567:                lst.add("maxDoc", reader.maxDoc());
1568:                lst.add("readerImpl", reader.getClass().getSimpleName());
1569:                lst.add("readerDir", reader.directory());
1570:                lst.add("indexVersion", reader.getVersion());
1571:                lst.add("openedAt", new Date(openTime));
1572:                if (registerTime != 0)
1573:                    lst.add("registeredAt", new Date(registerTime));
1574:                return lst;
1575:            }
1576:        }
1577:
1578:        // Lucene's HitQueue isn't public, so here is our own.
1579:        final class ScorePriorityQueue extends PriorityQueue {
1580:            ScorePriorityQueue(int size) {
1581:                initialize(size);
1582:            }
1583:
1584:            protected final boolean lessThan(Object o1, Object o2) {
1585:                ScoreDoc sd1 = (ScoreDoc) o1;
1586:                ScoreDoc sd2 = (ScoreDoc) o2;
1587:                // use index order as a tiebreaker to make sorts stable
1588:                return sd1.score < sd2.score
1589:                        || (sd1.score == sd2.score && sd1.doc > sd2.doc);
1590:            }
1591:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.