001: package org.apache.lucene.demo;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.analysis.Analyzer;
021: import org.apache.lucene.analysis.standard.StandardAnalyzer;
022: import org.apache.lucene.document.Document;
023: import org.apache.lucene.index.FilterIndexReader;
024: import org.apache.lucene.index.IndexReader;
025: import org.apache.lucene.queryParser.QueryParser;
026: import org.apache.lucene.search.Hits;
027: import org.apache.lucene.search.IndexSearcher;
028: import org.apache.lucene.search.Query;
029: import org.apache.lucene.search.Searcher;
030:
031: import java.io.BufferedReader;
032: import java.io.FileReader;
033: import java.io.IOException;
034: import java.io.InputStreamReader;
035: import java.util.Date;
036:
037: /** Simple command-line based search demo. */
038: public class SearchFiles {
039:
040: /** Use the norms from one field for all fields. Norms are read into memory,
041: * using a byte of memory per document per searched field. This can cause
042: * search of large collections with a large number of fields to run out of
043: * memory. If all of the fields contain only a single token, then the norms
044: * are all identical, then single norm vector may be shared. */
045: private static class OneNormsReader extends FilterIndexReader {
046: private String field;
047:
048: public OneNormsReader(IndexReader in, String field) {
049: super (in);
050: this .field = field;
051: }
052:
053: public byte[] norms(String field) throws IOException {
054: return in.norms(this .field);
055: }
056: }
057:
058: private SearchFiles() {
059: }
060:
061: /** Simple command-line based search demo. */
062: public static void main(String[] args) throws Exception {
063: String usage = "Usage: java org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field]";
064: if (args.length > 0
065: && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
066: System.out.println(usage);
067: System.exit(0);
068: }
069:
070: String index = "index";
071: String field = "contents";
072: String queries = null;
073: int repeat = 0;
074: boolean raw = false;
075: String normsField = null;
076:
077: for (int i = 0; i < args.length; i++) {
078: if ("-index".equals(args[i])) {
079: index = args[i + 1];
080: i++;
081: } else if ("-field".equals(args[i])) {
082: field = args[i + 1];
083: i++;
084: } else if ("-queries".equals(args[i])) {
085: queries = args[i + 1];
086: i++;
087: } else if ("-repeat".equals(args[i])) {
088: repeat = Integer.parseInt(args[i + 1]);
089: i++;
090: } else if ("-raw".equals(args[i])) {
091: raw = true;
092: } else if ("-norms".equals(args[i])) {
093: normsField = args[i + 1];
094: i++;
095: }
096: }
097:
098: IndexReader reader = IndexReader.open(index);
099:
100: if (normsField != null)
101: reader = new OneNormsReader(reader, normsField);
102:
103: Searcher searcher = new IndexSearcher(reader);
104: Analyzer analyzer = new StandardAnalyzer();
105:
106: BufferedReader in = null;
107: if (queries != null) {
108: in = new BufferedReader(new FileReader(queries));
109: } else {
110: in = new BufferedReader(new InputStreamReader(System.in,
111: "UTF-8"));
112: }
113: QueryParser parser = new QueryParser(field, analyzer);
114: while (true) {
115: if (queries == null) // prompt the user
116: System.out.println("Enter query: ");
117:
118: String line = in.readLine();
119:
120: if (line == null || line.length() == -1)
121: break;
122:
123: line = line.trim();
124: if (line.length() == 0)
125: break;
126:
127: Query query = parser.parse(line);
128: System.out.println("Searching for: "
129: + query.toString(field));
130:
131: Hits hits = searcher.search(query);
132:
133: if (repeat > 0) { // repeat & time as benchmark
134: Date start = new Date();
135: for (int i = 0; i < repeat; i++) {
136: hits = searcher.search(query);
137: }
138: Date end = new Date();
139: System.out.println("Time: "
140: + (end.getTime() - start.getTime()) + "ms");
141: }
142:
143: System.out.println(hits.length()
144: + " total matching documents");
145:
146: final int HITS_PER_PAGE = 10;
147: for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
148: int end = Math
149: .min(hits.length(), start + HITS_PER_PAGE);
150: for (int i = start; i < end; i++) {
151:
152: if (raw) { // output raw format
153: System.out.println("doc=" + hits.id(i)
154: + " score=" + hits.score(i));
155: continue;
156: }
157:
158: Document doc = hits.doc(i);
159: String path = doc.get("path");
160: if (path != null) {
161: System.out.println((i + 1) + ". " + path);
162: String title = doc.get("title");
163: if (title != null) {
164: System.out.println(" Title: "
165: + doc.get("title"));
166: }
167: } else {
168: System.out.println((i + 1) + ". "
169: + "No path for this document");
170: }
171: }
172:
173: if (queries != null) // non-interactive
174: break;
175:
176: if (hits.length() > end) {
177: System.out.println("more (y/n) ? ");
178: line = in.readLine();
179: if (line.length() == 0 || line.charAt(0) == 'n')
180: break;
181: }
182: }
183: }
184: reader.close();
185: }
186: }
|