001: package org.apache.lucene.search.spans;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.util.LuceneTestCase;
021:
022: import java.io.IOException;
023:
024: import org.apache.lucene.util.English;
025: import org.apache.lucene.analysis.SimpleAnalyzer;
026: import org.apache.lucene.document.Document;
027: import org.apache.lucene.document.Field;
028: import org.apache.lucene.index.IndexWriter;
029: import org.apache.lucene.index.Term;
030: import org.apache.lucene.store.RAMDirectory;
031:
032: import org.apache.lucene.search.*;
033:
034: /**
035: * Tests basic search capabilities.
036: *
037: * <p>Uses a collection of 1000 documents, each the english rendition of their
038: * document number. For example, the document numbered 333 has text "three
039: * hundred thirty three".
040: *
041: * <p>Tests are each a single query, and its hits are checked to ensure that
042: * all and only the correct documents are returned, thus providing end-to-end
043: * testing of the indexing and search code.
044: *
045: * @author Doug Cutting
046: */
047: public class TestBasics extends LuceneTestCase {
048: private IndexSearcher searcher;
049:
050: public void setUp() throws Exception {
051: super .setUp();
052: RAMDirectory directory = new RAMDirectory();
053: IndexWriter writer = new IndexWriter(directory,
054: new SimpleAnalyzer(), true);
055: //writer.infoStream = System.out;
056: for (int i = 0; i < 1000; i++) {
057: Document doc = new Document();
058: doc.add(new Field("field", English.intToEnglish(i),
059: Field.Store.YES, Field.Index.TOKENIZED));
060: writer.addDocument(doc);
061: }
062:
063: writer.close();
064:
065: searcher = new IndexSearcher(directory);
066: }
067:
068: public void testTerm() throws Exception {
069: Query query = new TermQuery(new Term("field", "seventy"));
070: checkHits(query, new int[] { 70, 71, 72, 73, 74, 75, 76, 77,
071: 78, 79, 170, 171, 172, 173, 174, 175, 176, 177, 178,
072: 179, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279,
073: 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 470,
074: 471, 472, 473, 474, 475, 476, 477, 478, 479, 570, 571,
075: 572, 573, 574, 575, 576, 577, 578, 579, 670, 671, 672,
076: 673, 674, 675, 676, 677, 678, 679, 770, 771, 772, 773,
077: 774, 775, 776, 777, 778, 779, 870, 871, 872, 873, 874,
078: 875, 876, 877, 878, 879, 970, 971, 972, 973, 974, 975,
079: 976, 977, 978, 979 });
080: }
081:
082: public void testTerm2() throws Exception {
083: Query query = new TermQuery(new Term("field", "seventish"));
084: checkHits(query, new int[] {});
085: }
086:
087: public void testPhrase() throws Exception {
088: PhraseQuery query = new PhraseQuery();
089: query.add(new Term("field", "seventy"));
090: query.add(new Term("field", "seven"));
091: checkHits(query, new int[] { 77, 177, 277, 377, 477, 577, 677,
092: 777, 877, 977 });
093: }
094:
095: public void testPhrase2() throws Exception {
096: PhraseQuery query = new PhraseQuery();
097: query.add(new Term("field", "seventish"));
098: query.add(new Term("field", "sevenon"));
099: checkHits(query, new int[] {});
100: }
101:
102: public void testBoolean() throws Exception {
103: BooleanQuery query = new BooleanQuery();
104: query.add(new TermQuery(new Term("field", "seventy")),
105: BooleanClause.Occur.MUST);
106: query.add(new TermQuery(new Term("field", "seven")),
107: BooleanClause.Occur.MUST);
108: checkHits(query, new int[] { 77, 777, 177, 277, 377, 477, 577,
109: 677, 770, 771, 772, 773, 774, 775, 776, 778, 779, 877,
110: 977 });
111: }
112:
113: public void testBoolean2() throws Exception {
114: BooleanQuery query = new BooleanQuery();
115: query.add(new TermQuery(new Term("field", "sevento")),
116: BooleanClause.Occur.MUST);
117: query.add(new TermQuery(new Term("field", "sevenly")),
118: BooleanClause.Occur.MUST);
119: checkHits(query, new int[] {});
120: }
121:
122: public void testSpanNearExact() throws Exception {
123: SpanTermQuery term1 = new SpanTermQuery(new Term("field",
124: "seventy"));
125: SpanTermQuery term2 = new SpanTermQuery(new Term("field",
126: "seven"));
127: SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {
128: term1, term2 }, 0, true);
129: checkHits(query, new int[] { 77, 177, 277, 377, 477, 577, 677,
130: 777, 877, 977 });
131:
132: assertTrue(searcher.explain(query, 77).getValue() > 0.0f);
133: assertTrue(searcher.explain(query, 977).getValue() > 0.0f);
134:
135: QueryUtils.check(term1);
136: QueryUtils.check(term2);
137: QueryUtils.checkUnequal(term1, term2);
138: }
139:
140: public void testSpanNearUnordered() throws Exception {
141: SpanTermQuery term1 = new SpanTermQuery(new Term("field",
142: "nine"));
143: SpanTermQuery term2 = new SpanTermQuery(
144: new Term("field", "six"));
145: SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {
146: term1, term2 }, 4, false);
147:
148: checkHits(query, new int[] { 609, 629, 639, 649, 659, 669, 679,
149: 689, 699, 906, 926, 936, 946, 956, 966, 976, 986, 996 });
150: }
151:
152: public void testSpanNearOrdered() throws Exception {
153: SpanTermQuery term1 = new SpanTermQuery(new Term("field",
154: "nine"));
155: SpanTermQuery term2 = new SpanTermQuery(
156: new Term("field", "six"));
157: SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {
158: term1, term2 }, 4, true);
159: checkHits(query, new int[] { 906, 926, 936, 946, 956, 966, 976,
160: 986, 996 });
161: }
162:
163: public void testSpanNot() throws Exception {
164: SpanTermQuery term1 = new SpanTermQuery(new Term("field",
165: "eight"));
166: SpanTermQuery term2 = new SpanTermQuery(
167: new Term("field", "one"));
168: SpanNearQuery near = new SpanNearQuery(new SpanQuery[] { term1,
169: term2 }, 4, true);
170: SpanTermQuery term3 = new SpanTermQuery(new Term("field",
171: "forty"));
172: SpanNotQuery query = new SpanNotQuery(near, term3);
173:
174: checkHits(query, new int[] { 801, 821, 831, 851, 861, 871, 881,
175: 891 });
176:
177: assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
178: assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
179: }
180:
181: public void testSpanWithMultipleNotSingle() throws Exception {
182: SpanTermQuery term1 = new SpanTermQuery(new Term("field",
183: "eight"));
184: SpanTermQuery term2 = new SpanTermQuery(
185: new Term("field", "one"));
186: SpanNearQuery near = new SpanNearQuery(new SpanQuery[] { term1,
187: term2 }, 4, true);
188: SpanTermQuery term3 = new SpanTermQuery(new Term("field",
189: "forty"));
190:
191: SpanOrQuery or = new SpanOrQuery(new SpanQuery[] { term3 });
192:
193: SpanNotQuery query = new SpanNotQuery(near, or);
194:
195: checkHits(query, new int[] { 801, 821, 831, 851, 861, 871, 881,
196: 891 });
197:
198: assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
199: assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
200: }
201:
202: public void testSpanWithMultipleNotMany() throws Exception {
203: SpanTermQuery term1 = new SpanTermQuery(new Term("field",
204: "eight"));
205: SpanTermQuery term2 = new SpanTermQuery(
206: new Term("field", "one"));
207: SpanNearQuery near = new SpanNearQuery(new SpanQuery[] { term1,
208: term2 }, 4, true);
209: SpanTermQuery term3 = new SpanTermQuery(new Term("field",
210: "forty"));
211: SpanTermQuery term4 = new SpanTermQuery(new Term("field",
212: "sixty"));
213: SpanTermQuery term5 = new SpanTermQuery(new Term("field",
214: "eighty"));
215:
216: SpanOrQuery or = new SpanOrQuery(new SpanQuery[] { term3,
217: term4, term5 });
218:
219: SpanNotQuery query = new SpanNotQuery(near, or);
220:
221: checkHits(query, new int[] { 801, 821, 831, 851, 871, 891 });
222:
223: assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
224: assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
225: }
226:
227: public void testNpeInSpanNearWithSpanNot() throws Exception {
228: SpanTermQuery term1 = new SpanTermQuery(new Term("field",
229: "eight"));
230: SpanTermQuery term2 = new SpanTermQuery(
231: new Term("field", "one"));
232: SpanNearQuery near = new SpanNearQuery(new SpanQuery[] { term1,
233: term2 }, 4, true);
234: SpanTermQuery hun = new SpanTermQuery(new Term("field",
235: "hundred"));
236: SpanTermQuery term3 = new SpanTermQuery(new Term("field",
237: "forty"));
238: SpanNearQuery exclude = new SpanNearQuery(new SpanQuery[] {
239: hun, term3 }, 1, true);
240:
241: SpanNotQuery query = new SpanNotQuery(near, exclude);
242:
243: checkHits(query, new int[] { 801, 821, 831, 851, 861, 871, 881,
244: 891 });
245:
246: assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
247: assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
248: }
249:
250: public void testNpeInSpanNearInSpanFirstInSpanNot()
251: throws Exception {
252: int n = 5;
253: SpanTermQuery hun = new SpanTermQuery(new Term("field",
254: "hundred"));
255: SpanTermQuery term40 = new SpanTermQuery(new Term("field",
256: "forty"));
257: SpanTermQuery term40c = (SpanTermQuery) term40.clone();
258:
259: SpanFirstQuery include = new SpanFirstQuery(term40, n);
260: SpanNearQuery near = new SpanNearQuery(new SpanQuery[] { hun,
261: term40c }, n - 1, true);
262: SpanFirstQuery exclude = new SpanFirstQuery(near, n - 1);
263: SpanNotQuery q = new SpanNotQuery(include, exclude);
264:
265: checkHits(q,
266: new int[] { 40, 41, 42, 43, 44, 45, 46, 47, 48, 49 });
267:
268: }
269:
270: public void testSpanFirst() throws Exception {
271: SpanTermQuery term1 = new SpanTermQuery(new Term("field",
272: "five"));
273: SpanFirstQuery query = new SpanFirstQuery(term1, 1);
274:
275: checkHits(query, new int[] { 5, 500, 501, 502, 503, 504, 505,
276: 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516,
277: 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527,
278: 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538,
279: 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549,
280: 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560,
281: 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571,
282: 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582,
283: 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593,
284: 594, 595, 596, 597, 598, 599 });
285:
286: assertTrue(searcher.explain(query, 5).getValue() > 0.0f);
287: assertTrue(searcher.explain(query, 599).getValue() > 0.0f);
288:
289: }
290:
291: public void testSpanOr() throws Exception {
292: SpanTermQuery term1 = new SpanTermQuery(new Term("field",
293: "thirty"));
294: SpanTermQuery term2 = new SpanTermQuery(new Term("field",
295: "three"));
296: SpanNearQuery near1 = new SpanNearQuery(new SpanQuery[] {
297: term1, term2 }, 0, true);
298: SpanTermQuery term3 = new SpanTermQuery(new Term("field",
299: "forty"));
300: SpanTermQuery term4 = new SpanTermQuery(new Term("field",
301: "seven"));
302: SpanNearQuery near2 = new SpanNearQuery(new SpanQuery[] {
303: term3, term4 }, 0, true);
304:
305: SpanOrQuery query = new SpanOrQuery(new SpanQuery[] { near1,
306: near2 });
307:
308: checkHits(query, new int[] { 33, 47, 133, 147, 233, 247, 333,
309: 347, 433, 447, 533, 547, 633, 647, 733, 747, 833, 847,
310: 933, 947 });
311:
312: assertTrue(searcher.explain(query, 33).getValue() > 0.0f);
313: assertTrue(searcher.explain(query, 947).getValue() > 0.0f);
314: }
315:
316: public void testSpanExactNested() throws Exception {
317: SpanTermQuery term1 = new SpanTermQuery(new Term("field",
318: "three"));
319: SpanTermQuery term2 = new SpanTermQuery(new Term("field",
320: "hundred"));
321: SpanNearQuery near1 = new SpanNearQuery(new SpanQuery[] {
322: term1, term2 }, 0, true);
323: SpanTermQuery term3 = new SpanTermQuery(new Term("field",
324: "thirty"));
325: SpanTermQuery term4 = new SpanTermQuery(new Term("field",
326: "three"));
327: SpanNearQuery near2 = new SpanNearQuery(new SpanQuery[] {
328: term3, term4 }, 0, true);
329:
330: SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {
331: near1, near2 }, 0, true);
332:
333: checkHits(query, new int[] { 333 });
334:
335: assertTrue(searcher.explain(query, 333).getValue() > 0.0f);
336: }
337:
338: public void testSpanNearOr() throws Exception {
339:
340: SpanTermQuery t1 = new SpanTermQuery(new Term("field", "six"));
341: SpanTermQuery t3 = new SpanTermQuery(new Term("field", "seven"));
342:
343: SpanTermQuery t5 = new SpanTermQuery(new Term("field", "seven"));
344: SpanTermQuery t6 = new SpanTermQuery(new Term("field", "six"));
345:
346: SpanOrQuery to1 = new SpanOrQuery(new SpanQuery[] { t1, t3 });
347: SpanOrQuery to2 = new SpanOrQuery(new SpanQuery[] { t5, t6 });
348:
349: SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { to1,
350: to2 }, 10, true);
351:
352: checkHits(query, new int[] { 606, 607, 626, 627, 636, 637, 646,
353: 647, 656, 657, 666, 667, 676, 677, 686, 687, 696, 697,
354: 706, 707, 726, 727, 736, 737, 746, 747, 756, 757, 766,
355: 767, 776, 777, 786, 787, 796, 797 });
356: }
357:
358: public void testSpanComplex1() throws Exception {
359:
360: SpanTermQuery t1 = new SpanTermQuery(new Term("field", "six"));
361: SpanTermQuery t2 = new SpanTermQuery(new Term("field",
362: "hundred"));
363: SpanNearQuery tt1 = new SpanNearQuery(
364: new SpanQuery[] { t1, t2 }, 0, true);
365:
366: SpanTermQuery t3 = new SpanTermQuery(new Term("field", "seven"));
367: SpanTermQuery t4 = new SpanTermQuery(new Term("field",
368: "hundred"));
369: SpanNearQuery tt2 = new SpanNearQuery(
370: new SpanQuery[] { t3, t4 }, 0, true);
371:
372: SpanTermQuery t5 = new SpanTermQuery(new Term("field", "seven"));
373: SpanTermQuery t6 = new SpanTermQuery(new Term("field", "six"));
374:
375: SpanOrQuery to1 = new SpanOrQuery(new SpanQuery[] { tt1, tt2 });
376: SpanOrQuery to2 = new SpanOrQuery(new SpanQuery[] { t5, t6 });
377:
378: SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { to1,
379: to2 }, 100, true);
380:
381: checkHits(query, new int[] { 606, 607, 626, 627, 636, 637, 646,
382: 647, 656, 657, 666, 667, 676, 677, 686, 687, 696, 697,
383: 706, 707, 726, 727, 736, 737, 746, 747, 756, 757, 766,
384: 767, 776, 777, 786, 787, 796, 797 });
385: }
386:
387: private void checkHits(Query query, int[] results)
388: throws IOException {
389: CheckHits.checkHits(query, "field", searcher, results);
390: }
391: }
|