001: package org.apache.lucene.benchmark.byTask.feeds;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.analysis.Analyzer;
021: import org.apache.lucene.index.Term;
022: import org.apache.lucene.queryParser.QueryParser;
023: import org.apache.lucene.search.Query;
024: import org.apache.lucene.search.WildcardQuery;
025: import org.apache.lucene.search.spans.SpanFirstQuery;
026: import org.apache.lucene.search.spans.SpanNearQuery;
027: import org.apache.lucene.search.spans.SpanQuery;
028: import org.apache.lucene.search.spans.SpanTermQuery;
029:
030: import java.util.ArrayList;
031: import java.util.Arrays;
032: import java.util.List;
033:
034: /**
035: * A QueryMaker that makes queries devised manually (by Grant Ingersoll) for
036: * searching in the Reuters collection.
037: */
038: public class ReutersQueryMaker extends AbstractQueryMaker implements
039: QueryMaker {
040:
041: private static String[] STANDARD_QUERIES = {
042: //Start with some short queries
043: "Salomon",
044: "Comex",
045: "night trading",
046: "Japan Sony",
047: //Try some Phrase Queries
048: "\"Sony Japan\"",
049: "\"food needs\"~3",
050: "\"World Bank\"^2 AND Nigeria",
051: "\"World Bank\" -Nigeria",
052: "\"Ford Credit\"~5",
053: //Try some longer queries
054: "airline Europe Canada destination",
055: "Long term pressure by trade "
056: + "ministers is necessary if the current Uruguay round of talks on "
057: + "the General Agreement on Trade and Tariffs (GATT) is to "
058: + "succeed" };
059:
060: private static Query[] getPrebuiltQueries(String field) {
061: // be wary of unanalyzed text
062: return new Query[] {
063: new SpanFirstQuery(new SpanTermQuery(new Term(field,
064: "ford")), 5),
065: new SpanNearQuery(
066: new SpanQuery[] {
067: new SpanTermQuery(new Term(field,
068: "night")),
069: new SpanTermQuery(new Term(field,
070: "trading")) }, 4, false),
071: new SpanNearQuery(new SpanQuery[] {
072: new SpanFirstQuery(new SpanTermQuery(new Term(
073: field, "ford")), 10),
074: new SpanTermQuery(new Term(field, "credit")) },
075: 10, false),
076: new WildcardQuery(new Term(field, "fo*")), };
077: }
078:
079: /**
080: * Parse the strings containing Lucene queries.
081: *
082: * @param qs array of strings containing query expressions
083: * @param a analyzer to use when parsing queries
084: * @return array of Lucene queries
085: */
086: private static Query[] createQueries(List qs, Analyzer a) {
087: QueryParser qp = new QueryParser(BasicDocMaker.BODY_FIELD, a);
088: List queries = new ArrayList();
089: for (int i = 0; i < qs.size(); i++) {
090: try {
091:
092: Object query = qs.get(i);
093: Query q = null;
094: if (query instanceof String) {
095: q = qp.parse((String) query);
096:
097: } else if (query instanceof Query) {
098: q = (Query) query;
099:
100: } else {
101: System.err.println("Unsupported Query Type: "
102: + query);
103: }
104:
105: if (q != null) {
106: queries.add(q);
107: }
108:
109: } catch (Exception e) {
110: e.printStackTrace();
111: }
112: }
113:
114: return (Query[]) queries.toArray(new Query[0]);
115: }
116:
117: protected Query[] prepareQueries() throws Exception {
118: // analyzer (default is standard analyzer)
119: Analyzer anlzr = (Analyzer) Class
120: .forName(
121: config
122: .get("analyzer",
123: "org.apache.lucene.analysis.standard.StandardAnalyzer"))
124: .newInstance();
125:
126: List queryList = new ArrayList(20);
127: queryList.addAll(Arrays.asList(STANDARD_QUERIES));
128: queryList.addAll(Arrays
129: .asList(getPrebuiltQueries(BasicDocMaker.BODY_FIELD)));
130: return createQueries(queryList, anlzr);
131: }
132:
133: }
|