01: package org.apache.lucene.benchmark.byTask.feeds;
02:
03: /**
04: * Licensed to the Apache Software Foundation (ASF) under one or more
05: * contributor license agreements. See the NOTICE file distributed with
06: * this work for additional information regarding copyright ownership.
07: * The ASF licenses this file to You under the Apache License, Version 2.0
08: * (the "License"); you may not use this file except in compliance with
09: * the License. You may obtain a copy of the License at
10: *
11: * http://www.apache.org/licenses/LICENSE-2.0
12: *
13: * Unless required by applicable law or agreed to in writing, software
14: * distributed under the License is distributed on an "AS IS" BASIS,
15: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16: * See the License for the specific language governing permissions and
17: * limitations under the License.
18: */
19:
20: import java.util.ArrayList;
21: import java.util.StringTokenizer;
22:
23: import org.apache.lucene.index.Term;
24: import org.apache.lucene.search.PhraseQuery;
25: import org.apache.lucene.search.Query;
26:
27: /**
28: * Create sloppy phrase queries for performance test, in an index created using simple doc maker.
29: */
30: public class SimpleSloppyPhraseQueryMaker extends SimpleQueryMaker {
31:
32: /* (non-Javadoc)
33: * @see org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker#prepareQueries()
34: */
35: protected Query[] prepareQueries() throws Exception {
36: // exatract some 100 words from doc text to an array
37: String words[];
38: ArrayList w = new ArrayList();
39: StringTokenizer st = new StringTokenizer(
40: SimpleDocMaker.DOC_TEXT);
41: while (st.hasMoreTokens() && w.size() < 100) {
42: w.add(st.nextToken());
43: }
44: words = (String[]) w.toArray(new String[0]);
45:
46: // create queries (that would find stuff) with varying slops
47: ArrayList queries = new ArrayList();
48: for (int slop = 0; slop < 8; slop++) {
49: for (int qlen = 2; qlen < 6; qlen++) {
50: for (int wd = 0; wd < words.length - qlen - slop; wd++) {
51: // ordered
52: int remainedSlop = slop;
53: PhraseQuery q = new PhraseQuery();
54: q.setSlop(slop);
55: int wind = wd;
56: for (int i = 0; i < qlen; i++) {
57: q.add(new Term(BasicDocMaker.BODY_FIELD,
58: words[wind++]));
59: if (remainedSlop > 0) {
60: remainedSlop--;
61: wind++;
62: }
63: }
64: queries.add(q);
65: // reveresed
66: remainedSlop = slop;
67: q = new PhraseQuery();
68: q.setSlop(slop + 2 * qlen);
69: wind = wd + qlen + remainedSlop - 1;
70: for (int i = 0; i < qlen; i++) {
71: q.add(new Term(BasicDocMaker.BODY_FIELD,
72: words[wind--]));
73: if (remainedSlop > 0) {
74: remainedSlop--;
75: wind--;
76: }
77: }
78: queries.add(q);
79: }
80: }
81: }
82: return (Query[]) queries.toArray(new Query[0]);
83: }
84:
85: }
|