Source Code Cross Referenced for LukeRequestHandler.java in  » Search-Engine » apache-solr-1.2.0 » org » apache » solr » handler » admin » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Search Engine » apache solr 1.2.0 » org.apache.solr.handler.admin 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /**
002:         * Licensed to the Apache Software Foundation (ASF) under one or more
003:         * contributor license agreements.  See the NOTICE file distributed with
004:         * this work for additional information regarding copyright ownership.
005:         * The ASF licenses this file to You under the Apache License, Version 2.0
006:         * (the "License"); you may not use this file except in compliance with
007:         * the License.  You may obtain a copy of the License at
008:         *
009:         *     http://www.apache.org/licenses/LICENSE-2.0
010:         *
011:         * Unless required by applicable law or agreed to in writing, software
012:         * distributed under the License is distributed on an "AS IS" BASIS,
013:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014:         * See the License for the specific language governing permissions and
015:         * limitations under the License.
016:         */package org.apache.solr.handler.admin;
017:
018:        import java.io.IOException;
019:        import java.net.MalformedURLException;
020:        import java.net.URL;
021:        import java.util.Collection;
022:        import java.util.Date;
023:        import java.util.HashMap;
024:        import java.util.HashSet;
025:        import java.util.LinkedList;
026:        import java.util.List;
027:        import java.util.Map;
028:        import java.util.Set;
029:        import java.util.logging.Level;
030:        import java.util.logging.Logger;
031:
032:        import org.apache.lucene.document.Document;
033:        import org.apache.lucene.document.Fieldable;
034:        import org.apache.lucene.index.IndexReader;
035:        import org.apache.lucene.index.Term;
036:        import org.apache.lucene.index.TermEnum;
037:        import org.apache.lucene.index.TermFreqVector;
038:        import org.apache.lucene.search.MatchAllDocsQuery;
039:        import org.apache.lucene.search.Query;
040:        import org.apache.lucene.search.Sort;
041:        import org.apache.lucene.store.Directory;
042:        import org.apache.lucene.util.PriorityQueue;
043:        import org.apache.solr.core.SolrException;
044:        import org.apache.solr.handler.RequestHandlerBase;
045:        import org.apache.solr.handler.RequestHandlerUtils;
046:        import org.apache.solr.request.SolrParams;
047:        import org.apache.solr.request.SolrQueryRequest;
048:        import org.apache.solr.request.SolrQueryResponse;
049:        import org.apache.solr.schema.FieldType;
050:        import org.apache.solr.schema.IndexSchema;
051:        import org.apache.solr.schema.SchemaField;
052:        import org.apache.solr.search.DocList;
053:        import org.apache.solr.search.SolrIndexSearcher;
054:        import org.apache.solr.search.SolrQueryParser;
055:        import org.apache.solr.util.NamedList;
056:        import org.apache.solr.util.SimpleOrderedMap;
057:
058:        /**
059:         * This handler exposes the internal lucene index.  It is inspired by and 
060:         * modeled on Luke, the Lucene Index Browser by Andrzej Bialecki.
061:         *   http://www.getopt.org/luke/
062:         * <p>
063:         * NOTE: the response format is still likely to change.  It should be designed so
064:         * that it works nicely with an XSLT transformation.  Until we have a nice
065:         * XSLT front end for /admin, the format is still open to change.
066:         * </p>
067:         * 
068:         * For more documentation see:
069:         *  http://wiki.apache.org/solr/LukeRequestHandler
070:         * 
071:         * @author ryan
072:         * @version $Id: LukeRequestHandler.java 542679 2007-05-29 22:28:21Z ryan $
073:         * @since solr 1.2
074:         */
075:        public class LukeRequestHandler extends RequestHandlerBase {
076:            private static Logger log = Logger
077:                    .getLogger(LukeRequestHandler.class.getName());
078:
079:            public static final String NUMTERMS = "numTerms";
080:            public static final String DOC_ID = "docId";
081:            public static final String ID = "id";
082:            public static final int DEFAULT_COUNT = 10;
083:
084:            @Override
085:            public void handleRequestBody(SolrQueryRequest req,
086:                    SolrQueryResponse rsp) throws Exception {
087:                RequestHandlerUtils.addExperimentalFormatWarning(rsp);
088:
089:                IndexSchema schema = req.getSchema();
090:                SolrIndexSearcher searcher = req.getSearcher();
091:                IndexReader reader = searcher.getReader();
092:                SolrParams params = req.getParams();
093:                int numTerms = params.getInt(NUMTERMS, DEFAULT_COUNT);
094:
095:                // Always show the core lucene info
096:                rsp.add("index", getIndexInfo(reader, numTerms > 0));
097:
098:                Integer docId = params.getInt(DOC_ID);
099:                if (docId == null && params.get(ID) != null) {
100:                    // Look for something with a given solr ID
101:                    SchemaField uniqueKey = schema.getUniqueKeyField();
102:                    String v = uniqueKey.getType().toInternal(params.get(ID));
103:                    Term t = new Term(uniqueKey.getName(), v);
104:                    docId = searcher.getFirstMatch(t);
105:                    if (docId < 0) {
106:                        throw new SolrException(
107:                                SolrException.ErrorCode.NOT_FOUND,
108:                                "Can't find document: " + params.get(ID));
109:                    }
110:                }
111:
112:                // Read the document from the index
113:                if (docId != null) {
114:                    Document doc = null;
115:                    try {
116:                        doc = reader.document(docId);
117:                    } catch (Exception ex) {
118:                    }
119:                    if (doc == null) {
120:                        throw new SolrException(
121:                                SolrException.ErrorCode.NOT_FOUND,
122:                                "Can't find document: " + docId);
123:                    }
124:
125:                    SimpleOrderedMap<Object> info = getDocumentFieldsInfo(doc,
126:                            docId, reader, schema);
127:
128:                    SimpleOrderedMap<Object> docinfo = new SimpleOrderedMap<Object>();
129:                    docinfo.add("docId", docId);
130:                    docinfo.add("lucene", info);
131:                    docinfo.add("solr", doc);
132:                    rsp.add("doc", docinfo);
133:                } else {
134:                    // If no doc is given, show all fields and top terms
135:                    Set<String> fields = null;
136:                    if (params.get(SolrParams.FL) != null) {
137:                        fields = new HashSet<String>();
138:                        for (String f : params.getParams(SolrParams.FL)) {
139:                            fields.add(f);
140:                        }
141:                    }
142:                    rsp.add("fields", getIndexedFieldsInfo(searcher, fields,
143:                            numTerms));
144:                }
145:
146:                // Add some generally helpful information
147:                NamedList<Object> info = new SimpleOrderedMap<Object>();
148:                info.add("key", getFieldFlagsKey());
149:                info
150:                        .add(
151:                                "NOTE",
152:                                "Document Frequency (df) is not updated when a document is marked for deletion.  df values include deleted documents.");
153:                rsp.add("info", info);
154:            }
155:
156:            /**
157:             * @return a string representing a Fieldable's flags.  
158:             */
159:            private static String getFieldFlags(Fieldable f) {
160:                StringBuilder flags = new StringBuilder();
161:                flags.append((f != null && f.isIndexed()) ? 'I' : '-');
162:                flags.append((f != null && f.isTokenized()) ? 'T' : '-');
163:                flags.append((f != null && f.isStored()) ? 'S' : '-');
164:                flags.append((false) ? 'M' : '-'); // SchemaField Specific
165:                flags.append((f != null && f.isTermVectorStored()) ? 'V' : '-');
166:                flags
167:                        .append((f != null && f.isStoreOffsetWithTermVector()) ? 'o'
168:                                : '-');
169:                flags
170:                        .append((f != null && f.isStorePositionWithTermVector()) ? 'p'
171:                                : '-');
172:                flags.append((f != null && f.getOmitNorms()) ? 'O' : '-');
173:                flags.append((f != null && f.isLazy()) ? 'L' : '-');
174:                flags.append((f != null && f.isBinary()) ? 'B' : '-');
175:                flags.append((f != null && f.isCompressed()) ? 'C' : '-');
176:                flags.append((false) ? 'f' : '-'); // SchemaField Specific
177:                flags.append((false) ? 'l' : '-'); // SchemaField Specific
178:                return flags.toString();
179:            }
180:
181:            /**
182:             * @return a string representing a SchemaField's flags.  
183:             */
184:            private static String getFieldFlags(SchemaField f) {
185:                FieldType t = (f == null) ? null : f.getType();
186:
187:                // see: http://www.nabble.com/schema-field-properties-tf3437753.html#a9585549
188:                boolean lazy = false; // "lazy" is purely a property of reading fields
189:                boolean binary = false; // Currently not possible
190:
191:                StringBuilder flags = new StringBuilder();
192:                flags.append((f != null && f.indexed()) ? 'I' : '-');
193:                flags.append((t != null && t.isTokenized()) ? 'T' : '-');
194:                flags.append((f != null && f.stored()) ? 'S' : '-');
195:                flags.append((f != null && f.multiValued()) ? 'M' : '-');
196:                flags.append((f != null && f.storeTermVector()) ? 'V' : '-');
197:                flags.append((f != null && f.storeTermOffsets()) ? 'o' : '-');
198:                flags.append((f != null && f.storeTermPositions()) ? 'p' : '-');
199:                flags.append((f != null && f.omitNorms()) ? 'O' : '-');
200:                flags.append((lazy) ? 'L' : '-');
201:                flags.append((binary) ? 'B' : '-');
202:                flags.append((f != null && f.isCompressed()) ? 'C' : '-');
203:                flags.append((f != null && f.sortMissingFirst()) ? 'f' : '-');
204:                flags.append((f != null && f.sortMissingLast()) ? 'l' : '-');
205:                return flags.toString();
206:            }
207:
208:            /**
209:             * @return a key to what each character means
210:             */
211:            private static SimpleOrderedMap<String> getFieldFlagsKey() {
212:                SimpleOrderedMap<String> key = new SimpleOrderedMap<String>();
213:                key.add("I", "Indexed");
214:                key.add("T", "Tokenized");
215:                key.add("S", "Stored");
216:                key.add("M", "Multivalued");
217:                key.add("V", "TermVector Stored");
218:                key.add("o", "Store Offset With TermVector");
219:                key.add("p", "Store Position With TermVector");
220:                key.add("O", "Omit Norms");
221:                key.add("L", "Lazy");
222:                key.add("B", "Binary");
223:                key.add("C", "Compressed");
224:                key.add("f", "Sort Missing First");
225:                key.add("l", "Sort Missing Last");
226:                return key;
227:            }
228:
229:            private static SimpleOrderedMap<Object> getDocumentFieldsInfo(
230:                    Document doc, int docId, IndexReader reader,
231:                    IndexSchema schema) throws IOException {
232:                SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
233:                for (Object o : doc.getFields()) {
234:                    Fieldable fieldable = (Fieldable) o;
235:                    SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();
236:
237:                    SchemaField sfield = schema
238:                            .getFieldOrNull(fieldable.name());
239:                    FieldType ftype = (sfield == null) ? null : sfield
240:                            .getType();
241:
242:                    f.add("type", (ftype == null) ? null : ftype.getTypeName());
243:                    f.add("schema", getFieldFlags(sfield));
244:                    f.add("flags", getFieldFlags(fieldable));
245:
246:                    Term t = new Term(fieldable.name(), fieldable.stringValue());
247:                    f.add("value", (ftype == null) ? null : ftype
248:                            .toExternal(fieldable));
249:                    f.add("internal", fieldable.stringValue()); // may be a binary number
250:                    f.add("boost", fieldable.getBoost());
251:                    f.add("docFreq", reader.docFreq(t)); // this can be 0 for non-indexed fields
252:
253:                    // If we have a term vector, return that
254:                    if (fieldable.isTermVectorStored()) {
255:                        try {
256:                            TermFreqVector v = reader.getTermFreqVector(docId,
257:                                    fieldable.name());
258:                            if (v != null) {
259:                                SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>();
260:                                for (int i = 0; i < v.size(); i++) {
261:                                    tfv.add(v.getTerms()[i], v
262:                                            .getTermFrequencies()[i]);
263:                                }
264:                                f.add("termVector", tfv);
265:                            }
266:                        } catch (Exception ex) {
267:                            log.log(Level.WARNING, "error writing term vector",
268:                                    ex);
269:                        }
270:                    }
271:
272:                    finfo.add(fieldable.name(), f);
273:                }
274:                return finfo;
275:            }
276:
277:            @SuppressWarnings("unchecked")
278:            private static SimpleOrderedMap<Object> getIndexedFieldsInfo(
279:                    final SolrIndexSearcher searcher, final Set<String> fields,
280:                    final int numTerms) throws Exception {
281:                Query matchAllDocs = new MatchAllDocsQuery();
282:                SolrQueryParser qp = searcher.getSchema().getSolrQueryParser(
283:                        null);
284:
285:                IndexReader reader = searcher.getReader();
286:                IndexSchema schema = searcher.getSchema();
287:
288:                // Walk the term enum and keep a priority queue for each map in our set
289:                Map<String, TopTermQueue> ttinfo = null;
290:                if (numTerms > 0) {
291:                    ttinfo = getTopTerms(reader, fields, numTerms, null);
292:                }
293:                SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
294:                Collection<String> fieldNames = reader
295:                        .getFieldNames(IndexReader.FieldOption.ALL);
296:                for (String fieldName : fieldNames) {
297:                    if (fields != null && !fields.contains(fieldName)) {
298:                        continue; // if a field is specified, only them
299:                    }
300:
301:                    SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();
302:
303:                    SchemaField sfield = schema.getFieldOrNull(fieldName);
304:                    FieldType ftype = (sfield == null) ? null : sfield
305:                            .getType();
306:
307:                    f.add("type", (ftype == null) ? null : ftype.getTypeName());
308:                    f.add("schema", getFieldFlags(sfield));
309:
310:                    // If numTerms==0, the call is just asking for a quick field list
311:                    if (ttinfo != null && sfield != null && sfield.indexed()) {
312:                        Query q = qp.parse(fieldName + ":[* TO *]");
313:                        int docCount = searcher.numDocs(q, matchAllDocs);
314:                        if (docCount > 0) {
315:                            // Find a document with this field
316:                            DocList ds = searcher.getDocList(q, (Query) null,
317:                                    (Sort) null, 0, 1);
318:                            try {
319:                                Document doc = searcher.doc(ds.iterator()
320:                                        .next());
321:                                Fieldable fld = doc.getFieldable(fieldName);
322:                                if (fld != null) {
323:                                    f.add("index", getFieldFlags(fld));
324:                                } else {
325:                                    // it is a non-stored field...
326:                                    f.add("index", "(unstored field)");
327:                                }
328:                            } catch (Exception ex) {
329:                                log
330:                                        .warning("error reading field: "
331:                                                + fieldName);
332:                            }
333:                            // Find one document so we can get the fieldable
334:                        }
335:                        f.add("docs", docCount);
336:
337:                        TopTermQueue topTerms = ttinfo.get(fieldName);
338:                        if (topTerms != null) {
339:                            f.add("distinct", topTerms.distinctTerms);
340:
341:                            // Include top terms
342:                            f.add("topTerms", topTerms.toNamedList(searcher
343:                                    .getSchema()));
344:
345:                            // Add a histogram
346:                            f
347:                                    .add("histogram", topTerms.histogram
348:                                            .toNamedList());
349:                        }
350:                    }
351:
352:                    // Add the field
353:                    finfo.add(fieldName, f);
354:                }
355:                return finfo;
356:            }
357:
358:            private static SimpleOrderedMap<Object> getIndexInfo(
359:                    IndexReader reader, boolean countTerms) throws IOException {
360:                Directory dir = reader.directory();
361:                SimpleOrderedMap<Object> indexInfo = new SimpleOrderedMap<Object>();
362:                indexInfo.add("numDocs", reader.numDocs());
363:                indexInfo.add("maxDoc", reader.maxDoc());
364:
365:                if (countTerms) {
366:                    TermEnum te = reader.terms();
367:                    int numTerms = 0;
368:                    while (te.next()) {
369:                        numTerms++;
370:                    }
371:                    indexInfo.add("numTerms", numTerms);
372:                }
373:
374:                indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
375:                indexInfo.add("optimized", reader.isOptimized());
376:                indexInfo.add("current", reader.isCurrent());
377:                indexInfo.add("hasDeletions", reader.hasDeletions());
378:                indexInfo.add("directory", dir);
379:                indexInfo.add("lastModified", new Date(IndexReader
380:                        .lastModified(dir)));
381:                return indexInfo;
382:            }
383:
384:            //////////////////////// SolrInfoMBeans methods //////////////////////
385:
386:            @Override
387:            public String getDescription() {
388:                return "Lucene Index Browser.  Inspired and modeled after Luke: http://www.getopt.org/luke/";
389:            }
390:
391:            @Override
392:            public String getVersion() {
393:                return "$Revision: 542679 $";
394:            }
395:
396:            @Override
397:            public String getSourceId() {
398:                return "$Id: LukeRequestHandler.java 542679 2007-05-29 22:28:21Z ryan $";
399:            }
400:
401:            @Override
402:            public String getSource() {
403:                return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.2/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java $";
404:            }
405:
406:            @Override
407:            public URL[] getDocs() {
408:                try {
409:                    return new URL[] { new URL(
410:                            "http://wiki.apache.org/solr/LukeRequestHandler") };
411:                } catch (MalformedURLException ex) {
412:                    return null;
413:                }
414:            }
415:
416:            ///////////////////////////////////////////////////////////////////////////////////////
417:
418:            private static class TermHistogram {
419:                int maxBucket = -1;
420:                public Map<Integer, Integer> hist = new HashMap<Integer, Integer>();
421:
422:                private static final double LOG2 = Math.log(2);
423:
424:                public static int getPowerOfTwoBucket(int num) {
425:                    int exp = (int) Math.ceil((Math.log(num) / LOG2));
426:                    return (int) Math.pow(2, exp);
427:                }
428:
429:                public void add(int df) {
430:                    Integer bucket = getPowerOfTwoBucket(df);
431:                    if (bucket > maxBucket) {
432:                        maxBucket = bucket;
433:                    }
434:                    Integer old = hist.get(bucket);
435:                    if (old == null) {
436:                        hist.put(bucket, 1);
437:                    } else {
438:                        hist.put(bucket, old + 1);
439:                    }
440:                }
441:
442:                // TODO? should this be a list or a map?
443:                public NamedList<Integer> toNamedList() {
444:                    NamedList<Integer> nl = new NamedList<Integer>();
445:                    for (int bucket = 2; bucket <= maxBucket; bucket *= 2) {
446:                        Integer val = hist.get(bucket);
447:                        if (val == null) {
448:                            val = 0;
449:                        }
450:                        nl.add("" + bucket, val);
451:                    }
452:                    return nl;
453:                }
454:            }
455:
456:            /**
457:             * Private internal class that counts up frequent terms
458:             */
459:            private static class TopTermQueue extends PriorityQueue {
460:                static class TermInfo {
461:                    TermInfo(Term t, int df) {
462:                        term = t;
463:                        docFreq = df;
464:                    }
465:
466:                    int docFreq;
467:                    Term term;
468:                }
469:
470:                public int minFreq = 0;
471:                public int distinctTerms = 0;
472:                public TermHistogram histogram;
473:
474:                TopTermQueue(int size) {
475:                    initialize(size);
476:                    histogram = new TermHistogram();
477:                }
478:
479:                @Override
480:                protected final boolean lessThan(Object a, Object b) {
481:                    TermInfo termInfoA = (TermInfo) a;
482:                    TermInfo termInfoB = (TermInfo) b;
483:                    return termInfoA.docFreq < termInfoB.docFreq;
484:                }
485:
486:                /**
487:                 * This is a destructive call... the queue is empty at the end
488:                 */
489:                public NamedList<Integer> toNamedList(IndexSchema schema) {
490:                    // reverse the list..
491:                    List<TermInfo> aslist = new LinkedList<TermInfo>();
492:                    while (size() > 0) {
493:                        aslist.add(0, (TermInfo) pop());
494:                    }
495:
496:                    NamedList<Integer> list = new NamedList<Integer>();
497:                    for (TermInfo i : aslist) {
498:                        String txt = i.term.text();
499:                        SchemaField ft = schema.getFieldOrNull(i.term.field());
500:                        if (ft != null) {
501:                            txt = ft.getType().indexedToReadable(txt);
502:                        }
503:                        list.add(txt, i.docFreq);
504:                    }
505:                    return list;
506:                }
507:            }
508:
509:            private static Map<String, TopTermQueue> getTopTerms(
510:                    IndexReader reader, Set<String> fields, int numTerms,
511:                    Set<String> junkWords) throws Exception {
512:                Map<String, TopTermQueue> info = new HashMap<String, TopTermQueue>();
513:                TermEnum terms = reader.terms();
514:
515:                while (terms.next()) {
516:                    String field = terms.term().field();
517:                    String t = terms.term().text();
518:
519:                    // Compute distinct terms for every field
520:                    TopTermQueue tiq = info.get(field);
521:                    if (tiq == null) {
522:                        tiq = new TopTermQueue(numTerms);
523:                        info.put(field, tiq);
524:                    }
525:                    tiq.distinctTerms++;
526:                    tiq.histogram.add(terms.docFreq()); // add the term to the histogram
527:
528:                    // Only save the distinct terms for fields we worry about
529:                    if (fields != null && fields.size() > 0) {
530:                        if (!fields.contains(field)) {
531:                            continue;
532:                        }
533:                    }
534:                    if (junkWords != null && junkWords.contains(t)) {
535:                        continue;
536:                    }
537:
538:                    if (terms.docFreq() > tiq.minFreq) {
539:                        tiq.put(new TopTermQueue.TermInfo(terms.term(), terms
540:                                .docFreq()));
541:                        if (tiq.size() >= numTerms) { // if tiq full
542:                            tiq.pop(); // remove lowest in tiq
543:                            tiq.minFreq = ((TopTermQueue.TermInfo) tiq.top()).docFreq; // reset minFreq
544:                        }
545:                    }
546:                }
547:                return info;
548:            }
549:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.