Source Code Cross Referenced for Hits.java in » Net » lucene-connector » org » apache » lucene » search » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation

1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI

Java

Java Tutorial

Illustrator Tutorials

GIMP Tutorials

C# / C Sharp

C# / CSharp Tutorial

C# / CSharp Open Source

SQL Server / T-SQL Tutorial

Oracle PL / SQL

Oracle PL/SQL Tutorial

Flash / Flex / ActionScript

VBA / Excel / Access / Word

XML

XML Tutorial

Microsoft Office PowerPoint 2007 Tutorial

Microsoft Office Excel 2007 Tutorial

Microsoft Office Word 2007 Tutorial

Java Source Code / Java Documentation » Net » lucene connector » org.apache.lucene.search

Source Cross Referenced Class Diagram Java Document (Java Doc)

001:        package org.apache.lucene.search;
002:
003:        /**
004:         * Licensed to the Apache Software Foundation (ASF) under one or more
005:         * contributor license agreements.  See the NOTICE file distributed with
006:         * this work for additional information regarding copyright ownership.
007:         * The ASF licenses this file to You under the Apache License, Version 2.0
008:         * (the "License"); you may not use this file except in compliance with
009:         * the License.  You may obtain a copy of the License at
010:         *
011:         *     http://www.apache.org/licenses/LICENSE-2.0
012:         *
013:         * Unless required by applicable law or agreed to in writing, software
014:         * distributed under the License is distributed on an "AS IS" BASIS,
015:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016:         * See the License for the specific language governing permissions and
017:         * limitations under the License.
018:         */
019:
020:        import java.io.IOException;
021:        import java.util.ConcurrentModificationException;
022:        import java.util.Vector;
023:        import java.util.Iterator;
024:
025:        import org.apache.lucene.document.Document;
026:        import org.apache.lucene.index.CorruptIndexException;
027:
028:        /** A ranked list of documents, used to hold search results.
029:         * <p>
030:         * <b>Caution:</b> Iterate only over the hits needed.  Iterating over all
031:         * hits is generally not desirable and may be the source of
032:         * performance issues. If you need to iterate over many or all hits, consider
033:         * using the search method that takes a {@link HitCollector}.
034:         * </p>
035:         * <p><b>Note:</b> Deleting matching documents concurrently with traversing 
036:         * the hits, might, when deleting hits that were not yet retrieved, decrease
037:         * {@link #length()}. In such case, 
038:         * {@link java.util.ConcurrentModificationException ConcurrentModificationException}
039:         * is thrown when accessing hit <code>n</code> &ge; current_{@link #length()} 
040:         * (but <code>n</code> &lt; {@link #length()}_at_start). 
041:         */
042:        public final class Hits {
043:            private Weight weight;
044:            private Searcher searcher;
045:            private Filter filter = null;
046:            private Sort sort = null;
047:
048:            private int length; // the total number of hits
049:            private Vector hitDocs = new Vector(); // cache of hits retrieved
050:
051:            private HitDoc first; // head of LRU cache
052:            private HitDoc last; // tail of LRU cache
053:            private int numDocs = 0; // number cached
054:            private int maxDocs = 200; // max to cache
055:
056:            private int nDeletions; // # deleted docs in the index.    
057:            private int lengthAtStart; // this is the number apps usually count on (although deletions can bring it down). 
058:            private int nDeletedHits = 0; // # of already collected hits that were meanwhile deleted.
059:
060:            boolean debugCheckedForDeletions = false; // for test purposes.
061:
062:            Hits(Searcher s, Query q, Filter f) throws IOException {
063:                weight = q.weight(s);
064:                searcher = s;
065:                filter = f;
066:                nDeletions = countDeletions(s);
067:                getMoreDocs(50); // retrieve 100 initially
068:                lengthAtStart = length;
069:            }
070:
071:            Hits(Searcher s, Query q, Filter f, Sort o) throws IOException {
072:                weight = q.weight(s);
073:                searcher = s;
074:                filter = f;
075:                sort = o;
076:                nDeletions = countDeletions(s);
077:                getMoreDocs(50); // retrieve 100 initially
078:                lengthAtStart = length;
079:            }
080:
081:            // count # deletions, return -1 if unknown.
082:            private int countDeletions(Searcher s) throws IOException {
083:                int cnt = -1;
084:                if (s instanceof  IndexSearcher) {
085:                    cnt = s.maxDoc()
086:                            - ((IndexSearcher) s).getIndexReader().numDocs();
087:                }
088:                return cnt;
089:            }
090:
091:            /**
092:             * Tries to add new documents to hitDocs.
093:             * Ensures that the hit numbered <code>min</code> has been retrieved.
094:             */
095:            private final void getMoreDocs(int min) throws IOException {
096:                if (hitDocs.size() > min) {
097:                    min = hitDocs.size();
098:                }
099:
100:                int n = min * 2; // double # retrieved
101:                TopDocs topDocs = (sort == null) ? searcher.search(weight,
102:                        filter, n) : searcher.search(weight, filter, n, sort);
103:
104:                length = topDocs.totalHits;
105:                ScoreDoc[] scoreDocs = topDocs.scoreDocs;
106:
107:                float scoreNorm = 1.0f;
108:
109:                if (length > 0 && topDocs.getMaxScore() > 1.0f) {
110:                    scoreNorm = 1.0f / topDocs.getMaxScore();
111:                }
112:
113:                int start = hitDocs.size() - nDeletedHits;
114:
115:                // any new deletions?
116:                int nDels2 = countDeletions(searcher);
117:                debugCheckedForDeletions = false;
118:                if (nDeletions < 0 || nDels2 > nDeletions) {
119:                    // either we cannot count deletions, or some "previously valid hits" might have been deleted, so find exact start point
120:                    nDeletedHits = 0;
121:                    debugCheckedForDeletions = true;
122:                    int i2 = 0;
123:                    for (int i1 = 0; i1 < hitDocs.size()
124:                            && i2 < scoreDocs.length; i1++) {
125:                        int id1 = ((HitDoc) hitDocs.get(i1)).id;
126:                        int id2 = scoreDocs[i2].doc;
127:                        if (id1 == id2) {
128:                            i2++;
129:                        } else {
130:                            nDeletedHits++;
131:                        }
132:                    }
133:                    start = i2;
134:                }
135:
136:                int end = scoreDocs.length < length ? scoreDocs.length : length;
137:                length += nDeletedHits;
138:                for (int i = start; i < end; i++) {
139:                    hitDocs.addElement(new HitDoc(scoreDocs[i].score
140:                            * scoreNorm, scoreDocs[i].doc));
141:                }
142:
143:                nDeletions = nDels2;
144:            }
145:
146:            /** Returns the total number of hits available in this set. */
147:            public final int length() {
148:                return length;
149:            }
150:
151:            /** Returns the stored fields of the n<sup>th</sup> document in this set.
152:             * <p>Documents are cached, so that repeated requests for the same element may
153:             * return the same Document object.
154:             * @throws CorruptIndexException if the index is corrupt
155:             * @throws IOException if there is a low-level IO error
156:             */
157:            public final Document doc(int n) throws CorruptIndexException,
158:                    IOException {
159:                HitDoc hitDoc = hitDoc(n);
160:
161:                // Update LRU cache of documents
162:                remove(hitDoc); // remove from list, if there
163:                addToFront(hitDoc); // add to front of list
164:                if (numDocs > maxDocs) { // if cache is full
165:                    HitDoc oldLast = last;
166:                    remove(last); // flush last
167:                    oldLast.doc = null; // let doc get gc'd
168:                }
169:
170:                if (hitDoc.doc == null) {
171:                    hitDoc.doc = searcher.doc(hitDoc.id); // cache miss: read document
172:                }
173:
174:                return hitDoc.doc;
175:            }
176:
177:            /** Returns the score for the n<sup>th</sup> document in this set. */
178:            public final float score(int n) throws IOException {
179:                return hitDoc(n).score;
180:            }
181:
182:            /** Returns the id for the n<sup>th</sup> document in this set.
183:             * Note that ids may change when the index changes, so you cannot
184:             * rely on the id to be stable.
185:             */
186:            public final int id(int n) throws IOException {
187:                return hitDoc(n).id;
188:            }
189:
190:            /**
191:             * Returns a {@link HitIterator} to navigate the Hits.  Each item returned
192:             * from {@link Iterator#next()} is a {@link Hit}.
193:             * <p>
194:             * <b>Caution:</b> Iterate only over the hits needed.  Iterating over all
195:             * hits is generally not desirable and may be the source of
196:             * performance issues. If you need to iterate over many or all hits, consider
197:             * using a search method that takes a {@link HitCollector}.
198:             * </p>
199:             */
200:            public Iterator iterator() {
201:                return new HitIterator(this );
202:            }
203:
204:            private final HitDoc hitDoc(int n) throws IOException {
205:                if (n >= lengthAtStart) {
206:                    throw new IndexOutOfBoundsException(
207:                            "Not a valid hit number: " + n);
208:                }
209:
210:                if (n >= hitDocs.size()) {
211:                    getMoreDocs(n);
212:                }
213:
214:                if (n >= length) {
215:                    throw new ConcurrentModificationException(
216:                            "Not a valid hit number: " + n);
217:                }
218:
219:                return (HitDoc) hitDocs.elementAt(n);
220:            }
221:
222:            private final void addToFront(HitDoc hitDoc) { // insert at front of cache
223:                if (first == null) {
224:                    last = hitDoc;
225:                } else {
226:                    first.prev = hitDoc;
227:                }
228:
229:                hitDoc.next = first;
230:                first = hitDoc;
231:                hitDoc.prev = null;
232:
233:                numDocs++;
234:            }
235:
236:            private final void remove(HitDoc hitDoc) { // remove from cache
237:                if (hitDoc.doc == null) { // it's not in the list
238:                    return; // abort
239:                }
240:
241:                if (hitDoc.next == null) {
242:                    last = hitDoc.prev;
243:                } else {
244:                    hitDoc.next.prev = hitDoc.prev;
245:                }
246:
247:                if (hitDoc.prev == null) {
248:                    first = hitDoc.next;
249:                } else {
250:                    hitDoc.prev.next = hitDoc.next;
251:                }
252:
253:                numDocs--;
254:            }
255:        }
256:
257:        final class HitDoc {
258:            float score;
259:            int id;
260:            Document doc = null;
261:
262:            HitDoc next; // in doubly-linked cache
263:            HitDoc prev; // in doubly-linked cache
264:
265:            HitDoc(float s, int i) {
266:                score = s;
267:                id = i;
268:            }
269:        }

www.java2java.com | Contact Us

All other trademarks are property of their respective owners.