001: package org.apache.lucene.search;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.util.LuceneTestCase;
021: import org.apache.lucene.analysis.SimpleAnalyzer;
022: import org.apache.lucene.analysis.WhitespaceAnalyzer;
023: import org.apache.lucene.document.Document;
024: import org.apache.lucene.document.Field;
025: import org.apache.lucene.document.Field.Store;
026: import org.apache.lucene.document.Field.Index;
027: import org.apache.lucene.index.IndexWriter;
028: import org.apache.lucene.index.Term;
029: import org.apache.lucene.queryParser.QueryParser;
030: import org.apache.lucene.store.RAMDirectory;
031:
032: import java.io.IOException;
033:
034: /**
035: * TestWildcard tests the '*' and '?' wildcard characters.
036: *
037: * @version $Id: TestWildcard.java 583534 2007-10-10 16:46:35Z mikemccand $
038: *
039: */
040: public class TestWildcard extends LuceneTestCase {
041: public void testEquals() {
042: WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a"));
043: WildcardQuery wq2 = new WildcardQuery(new Term("field", "b*a"));
044: WildcardQuery wq3 = new WildcardQuery(new Term("field", "b*a"));
045:
046: // reflexive?
047: assertEquals(wq1, wq2);
048: assertEquals(wq2, wq1);
049:
050: // transitive?
051: assertEquals(wq2, wq3);
052: assertEquals(wq1, wq3);
053:
054: assertFalse(wq1.equals(null));
055:
056: FuzzyQuery fq = new FuzzyQuery(new Term("field", "b*a"));
057: assertFalse(wq1.equals(fq));
058: assertFalse(fq.equals(wq1));
059: }
060:
061: /**
062: * Tests if a WildcardQuery that has no wildcard in the term is rewritten to a single
063: * TermQuery.
064: */
065: public void testTermWithoutWildcard() throws IOException {
066: RAMDirectory indexStore = getIndexStore("field", new String[] {
067: "nowildcard", "nowildcardx" });
068: IndexSearcher searcher = new IndexSearcher(indexStore);
069:
070: Query wq = new WildcardQuery(new Term("field", "nowildcard"));
071: assertMatches(searcher, wq, 1);
072:
073: wq = searcher.rewrite(wq);
074: assertTrue(wq instanceof TermQuery);
075: }
076:
077: /**
078: * Tests Wildcard queries with an asterisk.
079: */
080: public void testAsterisk() throws IOException {
081: RAMDirectory indexStore = getIndexStore("body", new String[] {
082: "metal", "metals" });
083: IndexSearcher searcher = new IndexSearcher(indexStore);
084: Query query1 = new TermQuery(new Term("body", "metal"));
085: Query query2 = new WildcardQuery(new Term("body", "metal*"));
086: Query query3 = new WildcardQuery(new Term("body", "m*tal"));
087: Query query4 = new WildcardQuery(new Term("body", "m*tal*"));
088: Query query5 = new WildcardQuery(new Term("body", "m*tals"));
089:
090: BooleanQuery query6 = new BooleanQuery();
091: query6.add(query5, BooleanClause.Occur.SHOULD);
092:
093: BooleanQuery query7 = new BooleanQuery();
094: query7.add(query3, BooleanClause.Occur.SHOULD);
095: query7.add(query5, BooleanClause.Occur.SHOULD);
096:
097: // Queries do not automatically lower-case search terms:
098: Query query8 = new WildcardQuery(new Term("body", "M*tal*"));
099:
100: assertMatches(searcher, query1, 1);
101: assertMatches(searcher, query2, 2);
102: assertMatches(searcher, query3, 1);
103: assertMatches(searcher, query4, 2);
104: assertMatches(searcher, query5, 1);
105: assertMatches(searcher, query6, 1);
106: assertMatches(searcher, query7, 2);
107: assertMatches(searcher, query8, 0);
108: assertMatches(searcher, new WildcardQuery(new Term("body",
109: "*tall")), 0);
110: assertMatches(searcher, new WildcardQuery(new Term("body",
111: "*tal")), 1);
112: assertMatches(searcher, new WildcardQuery(new Term("body",
113: "*tal*")), 2);
114: }
115:
116: /**
117: * Tests Wildcard queries with a question mark.
118: *
119: * @throws IOException if an error occurs
120: */
121: public void testQuestionmark() throws IOException {
122: RAMDirectory indexStore = getIndexStore("body", new String[] {
123: "metal", "metals", "mXtals", "mXtXls" });
124: IndexSearcher searcher = new IndexSearcher(indexStore);
125: Query query1 = new WildcardQuery(new Term("body", "m?tal"));
126: Query query2 = new WildcardQuery(new Term("body", "metal?"));
127: Query query3 = new WildcardQuery(new Term("body", "metals?"));
128: Query query4 = new WildcardQuery(new Term("body", "m?t?ls"));
129: Query query5 = new WildcardQuery(new Term("body", "M?t?ls"));
130: Query query6 = new WildcardQuery(new Term("body", "meta??"));
131:
132: assertMatches(searcher, query1, 1);
133: assertMatches(searcher, query2, 1);
134: assertMatches(searcher, query3, 0);
135: assertMatches(searcher, query4, 3);
136: assertMatches(searcher, query5, 0);
137: assertMatches(searcher, query6, 1); // Query: 'meta??' matches 'metals' not 'metal'
138: }
139:
140: private RAMDirectory getIndexStore(String field, String[] contents)
141: throws IOException {
142: RAMDirectory indexStore = new RAMDirectory();
143: IndexWriter writer = new IndexWriter(indexStore,
144: new SimpleAnalyzer(), true);
145: for (int i = 0; i < contents.length; ++i) {
146: Document doc = new Document();
147: doc.add(new Field(field, contents[i], Field.Store.YES,
148: Field.Index.TOKENIZED));
149: writer.addDocument(doc);
150: }
151: writer.optimize();
152: writer.close();
153:
154: return indexStore;
155: }
156:
157: private void assertMatches(IndexSearcher searcher, Query q,
158: int expectedMatches) throws IOException {
159: Hits result = searcher.search(q);
160: assertEquals(expectedMatches, result.length());
161: }
162:
163: /**
164: * Test that wild card queries are parsed to the correct type and are searched correctly.
165: * This test looks at both parsing and execution of wildcard queries.
166: * Although placed here, it also tests prefix queries, verifying that
167: * prefix queries are not parsed into wild card queries, and viceversa.
168: * @throws Exception
169: */
170: public void testParsingAndSearching() throws Exception {
171: String field = "content";
172: boolean dbg = false;
173: QueryParser qp = new QueryParser(field,
174: new WhitespaceAnalyzer());
175: qp.setAllowLeadingWildcard(true);
176: String docs[] = { "\\ abcdefg1", "\\79 hijklmn1",
177: "\\\\ opqrstu1", };
178: // queries that should find all docs
179: String matchAll[] = { "*", "*1", "**1", "*?", "*?1", "?*1",
180: "**", "***", "\\\\*" };
181: // queries that should find no docs
182: String matchNone[] = { "a*h", "a?h", "*a*h", "?a", "a?", };
183: // queries that should be parsed to prefix queries
184: String matchOneDocPrefix[][] = { { "a*", "ab*", "abc*", }, // these should find only doc 0
185: { "h*", "hi*", "hij*", "\\\\7*" }, // these should find only doc 1
186: { "o*", "op*", "opq*", "\\\\\\\\*" }, // these should find only doc 2
187: };
188: // queries that should be parsed to wildcard queries
189: String matchOneDocWild[][] = {
190: { "*a*", "*ab*", "*abc**", "ab*e*", "*g?", "*f?1",
191: "abc**" }, // these should find only doc 0
192: { "*h*", "*hi*", "*hij**", "hi*k*", "*n?", "*m?1",
193: "hij**" }, // these should find only doc 1
194: { "*o*", "*op*", "*opq**", "op*q*", "*u?", "*t?1",
195: "opq**" }, // these should find only doc 2
196: };
197:
198: // prepare the index
199: RAMDirectory dir = new RAMDirectory();
200: IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer());
201: for (int i = 0; i < docs.length; i++) {
202: Document doc = new Document();
203: doc
204: .add(new Field(field, docs[i], Store.NO,
205: Index.TOKENIZED));
206: iw.addDocument(doc);
207: }
208: iw.close();
209:
210: IndexSearcher searcher = new IndexSearcher(dir);
211:
212: // test queries that must find all
213: for (int i = 0; i < matchAll.length; i++) {
214: String qtxt = matchAll[i];
215: Query q = qp.parse(qtxt);
216: if (dbg)
217: System.out.println("matchAll: qtxt=" + qtxt + " q=" + q
218: + " " + q.getClass().getName());
219: Hits hits = searcher.search(q);
220: assertEquals(docs.length, hits.length());
221: }
222:
223: // test queries that must find none
224: for (int i = 0; i < matchNone.length; i++) {
225: String qtxt = matchNone[i];
226: Query q = qp.parse(qtxt);
227: if (dbg)
228: System.out.println("matchNone: qtxt=" + qtxt + " q="
229: + q + " " + q.getClass().getName());
230: Hits hits = searcher.search(q);
231: assertEquals(0, hits.length());
232: }
233:
234: // test queries that must be prefix queries and must find only one doc
235: for (int i = 0; i < matchOneDocPrefix.length; i++) {
236: for (int j = 0; j < matchOneDocPrefix[i].length; j++) {
237: String qtxt = matchOneDocPrefix[i][j];
238: Query q = qp.parse(qtxt);
239: if (dbg)
240: System.out.println("match 1 prefix: doc=" + docs[i]
241: + " qtxt=" + qtxt + " q=" + q + " "
242: + q.getClass().getName());
243: assertEquals(PrefixQuery.class, q.getClass());
244: Hits hits = searcher.search(q);
245: assertEquals(1, hits.length());
246: assertEquals(i, hits.id(0));
247: }
248: }
249:
250: // test queries that must be wildcard queries and must find only one doc
251: for (int i = 0; i < matchOneDocPrefix.length; i++) {
252: for (int j = 0; j < matchOneDocWild[i].length; j++) {
253: String qtxt = matchOneDocWild[i][j];
254: Query q = qp.parse(qtxt);
255: if (dbg)
256: System.out.println("match 1 wild: doc=" + docs[i]
257: + " qtxt=" + qtxt + " q=" + q + " "
258: + q.getClass().getName());
259: assertEquals(WildcardQuery.class, q.getClass());
260: Hits hits = searcher.search(q);
261: assertEquals(1, hits.length());
262: assertEquals(i, hits.id(0));
263: }
264: }
265:
266: searcher.close();
267: }
268:
269: }
|