001: package org.apache.lucene.search;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.util.LuceneTestCase;
021: import org.apache.lucene.analysis.WhitespaceAnalyzer;
022: import org.apache.lucene.document.Document;
023: import org.apache.lucene.document.Field;
024: import org.apache.lucene.index.IndexReader;
025: import org.apache.lucene.index.IndexWriter;
026: import org.apache.lucene.index.Term;
027: import org.apache.lucene.store.Directory;
028: import org.apache.lucene.store.RAMDirectory;
029:
030: import java.text.DecimalFormat;
031: import java.io.IOException;
032:
033: /**
034: * Test of the DisjunctionMaxQuery.
035: *
036: */
037: public class TestDisjunctionMaxQuery extends LuceneTestCase {
038:
039: /** threshold for comparing floats */
040: public static final float SCORE_COMP_THRESH = 0.0000f;
041:
042: /**
043: * Similarity to eliminate tf, idf and lengthNorm effects to
044: * isolate test case.
045: *
046: * <p>
047: * same as TestRankingSimilarity in TestRanking.zip from
048: * http://issues.apache.org/jira/browse/LUCENE-323
049: * </p>
050: * @author Williams
051: */
052: private static class TestSimilarity extends DefaultSimilarity {
053:
054: public TestSimilarity() {
055: }
056:
057: public float tf(float freq) {
058: if (freq > 0.0f)
059: return 1.0f;
060: else
061: return 0.0f;
062: }
063:
064: public float lengthNorm(String fieldName, int numTerms) {
065: return 1.0f;
066: }
067:
068: public float idf(int docFreq, int numDocs) {
069: return 1.0f;
070: }
071: }
072:
073: public Similarity sim = new TestSimilarity();
074: public Directory index;
075: public IndexReader r;
076: public IndexSearcher s;
077:
078: public void setUp() throws Exception {
079:
080: super .setUp();
081:
082: index = new RAMDirectory();
083: IndexWriter writer = new IndexWriter(index,
084: new WhitespaceAnalyzer(), true);
085: writer.setSimilarity(sim);
086:
087: // hed is the most important field, dek is secondary
088:
089: // d1 is an "ok" match for: albino elephant
090: {
091: Document d1 = new Document();
092: d1.add(new Field("id", "d1", Field.Store.YES,
093: Field.Index.UN_TOKENIZED));//Field.Keyword("id", "d1"));
094: d1.add(new Field("hed", "elephant", Field.Store.YES,
095: Field.Index.TOKENIZED));//Field.Text("hed", "elephant"));
096: d1.add(new Field("dek", "elephant", Field.Store.YES,
097: Field.Index.TOKENIZED));//Field.Text("dek", "elephant"));
098: writer.addDocument(d1);
099: }
100:
101: // d2 is a "good" match for: albino elephant
102: {
103: Document d2 = new Document();
104: d2.add(new Field("id", "d2", Field.Store.YES,
105: Field.Index.UN_TOKENIZED));//Field.Keyword("id", "d2"));
106: d2.add(new Field("hed", "elephant", Field.Store.YES,
107: Field.Index.TOKENIZED));//Field.Text("hed", "elephant"));
108: d2.add(new Field("dek", "albino", Field.Store.YES,
109: Field.Index.TOKENIZED));//Field.Text("dek", "albino"));
110: d2.add(new Field("dek", "elephant", Field.Store.YES,
111: Field.Index.TOKENIZED));//Field.Text("dek", "elephant"));
112: writer.addDocument(d2);
113: }
114:
115: // d3 is a "better" match for: albino elephant
116: {
117: Document d3 = new Document();
118: d3.add(new Field("id", "d3", Field.Store.YES,
119: Field.Index.UN_TOKENIZED));//Field.Keyword("id", "d3"));
120: d3.add(new Field("hed", "albino", Field.Store.YES,
121: Field.Index.TOKENIZED));//Field.Text("hed", "albino"));
122: d3.add(new Field("hed", "elephant", Field.Store.YES,
123: Field.Index.TOKENIZED));//Field.Text("hed", "elephant"));
124: writer.addDocument(d3);
125: }
126:
127: // d4 is the "best" match for: albino elephant
128: {
129: Document d4 = new Document();
130: d4.add(new Field("id", "d4", Field.Store.YES,
131: Field.Index.UN_TOKENIZED));//Field.Keyword("id", "d4"));
132: d4.add(new Field("hed", "albino", Field.Store.YES,
133: Field.Index.TOKENIZED));//Field.Text("hed", "albino"));
134: d4.add(new Field("hed", "elephant", Field.Store.YES,
135: Field.Index.TOKENIZED));//Field.Text("hed", "elephant"));
136: d4.add(new Field("dek", "albino", Field.Store.YES,
137: Field.Index.TOKENIZED));//Field.Text("dek", "albino"));
138: writer.addDocument(d4);
139: }
140:
141: writer.close();
142:
143: r = IndexReader.open(index);
144: s = new IndexSearcher(r);
145: s.setSimilarity(sim);
146: }
147:
148: public void testSkipToFirsttimeMiss() throws IOException {
149: final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f);
150: dq.add(tq("id", "d1"));
151: dq.add(tq("dek", "DOES_NOT_EXIST"));
152:
153: QueryUtils.check(dq, s);
154:
155: final Weight dw = dq.weight(s);
156: final Scorer ds = dw.scorer(r);
157: final boolean skipOk = ds.skipTo(3);
158: if (skipOk) {
159: fail("firsttime skipTo found a match? ... "
160: + r.document(ds.doc()).get("id"));
161: }
162: }
163:
164: public void testSkipToFirsttimeHit() throws IOException {
165: final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f);
166: dq.add(tq("dek", "albino"));
167: dq.add(tq("dek", "DOES_NOT_EXIST"));
168:
169: QueryUtils.check(dq, s);
170:
171: final Weight dw = dq.weight(s);
172: final Scorer ds = dw.scorer(r);
173: assertTrue("firsttime skipTo found no match", ds.skipTo(3));
174: assertEquals("found wrong docid", "d4", r.document(ds.doc())
175: .get("id"));
176: }
177:
178: public void testSimpleEqualScores1() throws Exception {
179:
180: DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
181: q.add(tq("hed", "albino"));
182: q.add(tq("hed", "elephant"));
183: QueryUtils.check(q, s);
184:
185: Hits h = s.search(q);
186:
187: try {
188: assertEquals("all docs should match " + q.toString(), 4, h
189: .length());
190:
191: float score = h.score(0);
192: for (int i = 1; i < h.length(); i++) {
193: assertEquals("score #" + i + " is not the same", score,
194: h.score(i), SCORE_COMP_THRESH);
195: }
196: } catch (Error e) {
197: printHits("testSimpleEqualScores1", h);
198: throw e;
199: }
200:
201: }
202:
203: public void testSimpleEqualScores2() throws Exception {
204:
205: DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
206: q.add(tq("dek", "albino"));
207: q.add(tq("dek", "elephant"));
208: QueryUtils.check(q, s);
209:
210: Hits h = s.search(q);
211:
212: try {
213: assertEquals("3 docs should match " + q.toString(), 3, h
214: .length());
215: float score = h.score(0);
216: for (int i = 1; i < h.length(); i++) {
217: assertEquals("score #" + i + " is not the same", score,
218: h.score(i), SCORE_COMP_THRESH);
219: }
220: } catch (Error e) {
221: printHits("testSimpleEqualScores2", h);
222: throw e;
223: }
224:
225: }
226:
227: public void testSimpleEqualScores3() throws Exception {
228:
229: DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
230: q.add(tq("hed", "albino"));
231: q.add(tq("hed", "elephant"));
232: q.add(tq("dek", "albino"));
233: q.add(tq("dek", "elephant"));
234: QueryUtils.check(q, s);
235:
236: Hits h = s.search(q);
237:
238: try {
239: assertEquals("all docs should match " + q.toString(), 4, h
240: .length());
241: float score = h.score(0);
242: for (int i = 1; i < h.length(); i++) {
243: assertEquals("score #" + i + " is not the same", score,
244: h.score(i), SCORE_COMP_THRESH);
245: }
246: } catch (Error e) {
247: printHits("testSimpleEqualScores3", h);
248: throw e;
249: }
250:
251: }
252:
253: public void testSimpleTiebreaker() throws Exception {
254:
255: DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.01f);
256: q.add(tq("dek", "albino"));
257: q.add(tq("dek", "elephant"));
258: QueryUtils.check(q, s);
259:
260: Hits h = s.search(q);
261:
262: try {
263: assertEquals("3 docs should match " + q.toString(), 3, h
264: .length());
265: assertEquals("wrong first", "d2", h.doc(0).get("id"));
266: float score0 = h.score(0);
267: float score1 = h.score(1);
268: float score2 = h.score(2);
269: assertTrue("d2 does not have better score then others: "
270: + score0 + " >? " + score1, score0 > score1);
271: assertEquals("d4 and d1 don't have equal scores", score1,
272: score2, SCORE_COMP_THRESH);
273: } catch (Error e) {
274: printHits("testSimpleTiebreaker", h);
275: throw e;
276: }
277: }
278:
279: public void testBooleanRequiredEqualScores() throws Exception {
280:
281: BooleanQuery q = new BooleanQuery();
282: {
283: DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f);
284: q1.add(tq("hed", "albino"));
285: q1.add(tq("dek", "albino"));
286: q.add(q1, BooleanClause.Occur.MUST);//true,false);
287: QueryUtils.check(q1, s);
288:
289: }
290: {
291: DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f);
292: q2.add(tq("hed", "elephant"));
293: q2.add(tq("dek", "elephant"));
294: q.add(q2, BooleanClause.Occur.MUST);//true,false);
295: QueryUtils.check(q2, s);
296: }
297:
298: QueryUtils.check(q, s);
299:
300: Hits h = s.search(q);
301:
302: try {
303: assertEquals("3 docs should match " + q.toString(), 3, h
304: .length());
305: float score = h.score(0);
306: for (int i = 1; i < h.length(); i++) {
307: assertEquals("score #" + i + " is not the same", score,
308: h.score(i), SCORE_COMP_THRESH);
309: }
310: } catch (Error e) {
311: printHits("testBooleanRequiredEqualScores1", h);
312: throw e;
313: }
314: }
315:
316: public void testBooleanOptionalNoTiebreaker() throws Exception {
317:
318: BooleanQuery q = new BooleanQuery();
319: {
320: DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f);
321: q1.add(tq("hed", "albino"));
322: q1.add(tq("dek", "albino"));
323: q.add(q1, BooleanClause.Occur.SHOULD);//false,false);
324: }
325: {
326: DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f);
327: q2.add(tq("hed", "elephant"));
328: q2.add(tq("dek", "elephant"));
329: q.add(q2, BooleanClause.Occur.SHOULD);//false,false);
330: }
331: QueryUtils.check(q, s);
332:
333: Hits h = s.search(q);
334:
335: try {
336: assertEquals("4 docs should match " + q.toString(), 4, h
337: .length());
338: float score = h.score(0);
339: for (int i = 1; i < h.length() - 1; i++) { /* note: -1 */
340: assertEquals("score #" + i + " is not the same", score,
341: h.score(i), SCORE_COMP_THRESH);
342: }
343: assertEquals("wrong last", "d1", h.doc(h.length() - 1).get(
344: "id"));
345: float score1 = h.score(h.length() - 1);
346: assertTrue("d1 does not have worse score then others: "
347: + score + " >? " + score1, score > score1);
348: } catch (Error e) {
349: printHits("testBooleanOptionalNoTiebreaker", h);
350: throw e;
351: }
352: }
353:
354: public void testBooleanOptionalWithTiebreaker() throws Exception {
355:
356: BooleanQuery q = new BooleanQuery();
357: {
358: DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f);
359: q1.add(tq("hed", "albino"));
360: q1.add(tq("dek", "albino"));
361: q.add(q1, BooleanClause.Occur.SHOULD);//false,false);
362: }
363: {
364: DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f);
365: q2.add(tq("hed", "elephant"));
366: q2.add(tq("dek", "elephant"));
367: q.add(q2, BooleanClause.Occur.SHOULD);//false,false);
368: }
369: QueryUtils.check(q, s);
370:
371: Hits h = s.search(q);
372:
373: try {
374:
375: assertEquals("4 docs should match " + q.toString(), 4, h
376: .length());
377:
378: float score0 = h.score(0);
379: float score1 = h.score(1);
380: float score2 = h.score(2);
381: float score3 = h.score(3);
382:
383: String doc0 = h.doc(0).get("id");
384: String doc1 = h.doc(1).get("id");
385: String doc2 = h.doc(2).get("id");
386: String doc3 = h.doc(3).get("id");
387:
388: assertTrue("doc0 should be d2 or d4: " + doc0, doc0
389: .equals("d2")
390: || doc0.equals("d4"));
391: assertTrue("doc1 should be d2 or d4: " + doc0, doc1
392: .equals("d2")
393: || doc1.equals("d4"));
394: assertEquals("score0 and score1 should match", score0,
395: score1, SCORE_COMP_THRESH);
396: assertEquals("wrong third", "d3", doc2);
397: assertTrue("d3 does not have worse score then d2 and d4: "
398: + score1 + " >? " + score2, score1 > score2);
399:
400: assertEquals("wrong fourth", "d1", doc3);
401: assertTrue("d1 does not have worse score then d3: "
402: + score2 + " >? " + score3, score2 > score3);
403:
404: } catch (Error e) {
405: printHits("testBooleanOptionalWithTiebreaker", h);
406: throw e;
407: }
408:
409: }
410:
411: public void testBooleanOptionalWithTiebreakerAndBoost()
412: throws Exception {
413:
414: BooleanQuery q = new BooleanQuery();
415: {
416: DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f);
417: q1.add(tq("hed", "albino", 1.5f));
418: q1.add(tq("dek", "albino"));
419: q.add(q1, BooleanClause.Occur.SHOULD);//false,false);
420: }
421: {
422: DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f);
423: q2.add(tq("hed", "elephant", 1.5f));
424: q2.add(tq("dek", "elephant"));
425: q.add(q2, BooleanClause.Occur.SHOULD);//false,false);
426: }
427: QueryUtils.check(q, s);
428:
429: Hits h = s.search(q);
430:
431: try {
432:
433: assertEquals("4 docs should match " + q.toString(), 4, h
434: .length());
435:
436: float score0 = h.score(0);
437: float score1 = h.score(1);
438: float score2 = h.score(2);
439: float score3 = h.score(3);
440:
441: String doc0 = h.doc(0).get("id");
442: String doc1 = h.doc(1).get("id");
443: String doc2 = h.doc(2).get("id");
444: String doc3 = h.doc(3).get("id");
445:
446: assertEquals("doc0 should be d4: ", "d4", doc0);
447: assertEquals("doc1 should be d3: ", "d3", doc1);
448: assertEquals("doc2 should be d2: ", "d2", doc2);
449: assertEquals("doc3 should be d1: ", "d1", doc3);
450:
451: assertTrue("d4 does not have a better score then d3: "
452: + score0 + " >? " + score1, score0 > score1);
453: assertTrue("d3 does not have a better score then d2: "
454: + score1 + " >? " + score2, score1 > score2);
455: assertTrue("d3 does not have a better score then d1: "
456: + score2 + " >? " + score3, score2 > score3);
457:
458: } catch (Error e) {
459: printHits("testBooleanOptionalWithTiebreakerAndBoost", h);
460: throw e;
461: }
462: }
463:
464: /** macro */
465: protected Query tq(String f, String t) {
466: return new TermQuery(new Term(f, t));
467: }
468:
469: /** macro */
470: protected Query tq(String f, String t, float b) {
471: Query q = tq(f, t);
472: q.setBoost(b);
473: return q;
474: }
475:
476: protected void printHits(String test, Hits h) throws Exception {
477:
478: System.err.println("------- " + test + " -------");
479:
480: DecimalFormat f = new DecimalFormat("0.000000000");
481:
482: for (int i = 0; i < h.length(); i++) {
483: Document d = h.doc(i);
484: float score = h.score(i);
485: System.err.println("#" + i + ": " + f.format(score) + " - "
486: + d.get("id"));
487: }
488: }
489:
490: }
|