001: package org.apache.lucene.search;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.search.IndexSearcher;
021: import org.apache.lucene.index.Term;
022: import org.apache.lucene.index.TermEnum;
023: import org.apache.lucene.index.IndexReader;
024: import org.apache.lucene.index.IndexWriter;
025: import org.apache.lucene.store.RAMDirectory;
026: import org.apache.lucene.analysis.SimpleAnalyzer;
027: import org.apache.lucene.analysis.standard.StandardAnalyzer;
028: import org.apache.lucene.document.Document;
029: import org.apache.lucene.document.Field;
030:
031: import org.apache.lucene.util.LuceneTestCase;
032:
033: import java.io.IOException;
034: import java.util.LinkedList;
035:
036: /**
037: * This class tests the MultiPhraseQuery class.
038: *
039: *
040: * @version $Id: TestMultiPhraseQuery.java 583534 2007-10-10 16:46:35Z mikemccand $
041: */
042: public class TestMultiPhraseQuery extends LuceneTestCase {
043: public TestMultiPhraseQuery(String name) {
044: super (name);
045: }
046:
047: public void testPhrasePrefix() throws IOException {
048: RAMDirectory indexStore = new RAMDirectory();
049: IndexWriter writer = new IndexWriter(indexStore,
050: new SimpleAnalyzer(), true);
051: add("blueberry pie", writer);
052: add("blueberry strudel", writer);
053: add("blueberry pizza", writer);
054: add("blueberry chewing gum", writer);
055: add("bluebird pizza", writer);
056: add("bluebird foobar pizza", writer);
057: add("piccadilly circus", writer);
058: writer.optimize();
059: writer.close();
060:
061: IndexSearcher searcher = new IndexSearcher(indexStore);
062:
063: // search for "blueberry pi*":
064: MultiPhraseQuery query1 = new MultiPhraseQuery();
065: // search for "strawberry pi*":
066: MultiPhraseQuery query2 = new MultiPhraseQuery();
067: query1.add(new Term("body", "blueberry"));
068: query2.add(new Term("body", "strawberry"));
069:
070: LinkedList termsWithPrefix = new LinkedList();
071: IndexReader ir = IndexReader.open(indexStore);
072:
073: // this TermEnum gives "piccadilly", "pie" and "pizza".
074: String prefix = "pi";
075: TermEnum te = ir.terms(new Term("body", prefix));
076: do {
077: if (te.term().text().startsWith(prefix)) {
078: termsWithPrefix.add(te.term());
079: }
080: } while (te.next());
081:
082: query1.add((Term[]) termsWithPrefix.toArray(new Term[0]));
083: assertEquals("body:\"blueberry (piccadilly pie pizza)\"",
084: query1.toString());
085: query2.add((Term[]) termsWithPrefix.toArray(new Term[0]));
086: assertEquals("body:\"strawberry (piccadilly pie pizza)\"",
087: query2.toString());
088:
089: Hits result;
090: result = searcher.search(query1);
091: assertEquals(2, result.length());
092: result = searcher.search(query2);
093: assertEquals(0, result.length());
094:
095: // search for "blue* pizza":
096: MultiPhraseQuery query3 = new MultiPhraseQuery();
097: termsWithPrefix.clear();
098: prefix = "blue";
099: te = ir.terms(new Term("body", prefix));
100: do {
101: if (te.term().text().startsWith(prefix)) {
102: termsWithPrefix.add(te.term());
103: }
104: } while (te.next());
105: query3.add((Term[]) termsWithPrefix.toArray(new Term[0]));
106: query3.add(new Term("body", "pizza"));
107:
108: result = searcher.search(query3);
109: assertEquals(2, result.length()); // blueberry pizza, bluebird pizza
110: assertEquals("body:\"(blueberry bluebird) pizza\"", query3
111: .toString());
112:
113: // test slop:
114: query3.setSlop(1);
115: result = searcher.search(query3);
116: assertEquals(3, result.length()); // blueberry pizza, bluebird pizza, bluebird foobar pizza
117:
118: MultiPhraseQuery query4 = new MultiPhraseQuery();
119: try {
120: query4.add(new Term("field1", "foo"));
121: query4.add(new Term("field2", "foobar"));
122: fail();
123: } catch (IllegalArgumentException e) {
124: // okay, all terms must belong to the same field
125: }
126:
127: searcher.close();
128: indexStore.close();
129:
130: }
131:
132: private void add(String s, IndexWriter writer) throws IOException {
133: Document doc = new Document();
134: doc.add(new Field("body", s, Field.Store.YES,
135: Field.Index.TOKENIZED));
136: writer.addDocument(doc);
137: }
138:
139: public void testBooleanQueryContainingSingleTermPrefixQuery()
140: throws IOException {
141: // this tests against bug 33161 (now fixed)
142: // In order to cause the bug, the outer query must have more than one term
143: // and all terms required.
144: // The contained PhraseMultiQuery must contain exactly one term array.
145:
146: RAMDirectory indexStore = new RAMDirectory();
147: IndexWriter writer = new IndexWriter(indexStore,
148: new SimpleAnalyzer(), true);
149: add("blueberry pie", writer);
150: add("blueberry chewing gum", writer);
151: add("blue raspberry pie", writer);
152: writer.optimize();
153: writer.close();
154:
155: IndexSearcher searcher = new IndexSearcher(indexStore);
156: // This query will be equivalent to +body:pie +body:"blue*"
157: BooleanQuery q = new BooleanQuery();
158: q.add(new TermQuery(new Term("body", "pie")),
159: BooleanClause.Occur.MUST);
160:
161: MultiPhraseQuery trouble = new MultiPhraseQuery();
162: trouble.add(new Term[] { new Term("body", "blueberry"),
163: new Term("body", "blue") });
164: q.add(trouble, BooleanClause.Occur.MUST);
165:
166: // exception will be thrown here without fix
167: Hits hits = searcher.search(q);
168:
169: assertEquals("Wrong number of hits", 2, hits.length());
170: searcher.close();
171: }
172:
173: public void testPhrasePrefixWithBooleanQuery() throws IOException {
174: RAMDirectory indexStore = new RAMDirectory();
175: IndexWriter writer = new IndexWriter(indexStore,
176: new StandardAnalyzer(new String[] {}), true);
177: add("This is a test", "object", writer);
178: add("a note", "note", writer);
179: writer.close();
180:
181: IndexSearcher searcher = new IndexSearcher(indexStore);
182:
183: // This query will be equivalent to +type:note +body:"a t*"
184: BooleanQuery q = new BooleanQuery();
185: q.add(new TermQuery(new Term("type", "note")),
186: BooleanClause.Occur.MUST);
187:
188: MultiPhraseQuery trouble = new MultiPhraseQuery();
189: trouble.add(new Term("body", "a"));
190: trouble.add(new Term[] { new Term("body", "test"),
191: new Term("body", "this") });
192: q.add(trouble, BooleanClause.Occur.MUST);
193:
194: // exception will be thrown here without fix for #35626:
195: Hits hits = searcher.search(q);
196: assertEquals("Wrong number of hits", 0, hits.length());
197: searcher.close();
198: }
199:
200: private void add(String s, String type, IndexWriter writer)
201: throws IOException {
202: Document doc = new Document();
203: doc.add(new Field("body", s, Field.Store.YES,
204: Field.Index.TOKENIZED));
205: doc.add(new Field("type", type, Field.Store.YES,
206: Field.Index.UN_TOKENIZED));
207: writer.addDocument(doc);
208: }
209:
210: }
|