001: package org.apache.lucene.search;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.store.Directory;
021: import org.apache.lucene.index.IndexReader;
022:
023: import junit.framework.TestCase;
024:
025: import java.io.IOException;
026: import java.util.Set;
027: import java.util.TreeSet;
028:
029: public class CheckHits {
030:
031: /**
032: * Some explains methods calculate their vlaues though a slightly
033: * differnet order of operations from the acctaul scoring method ...
034: * this allows for a small amount of variation
035: */
036: public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.00005f;
037:
038: /**
039: * Tests that all documents up to maxDoc which are *not* in the
040: * expected result set, have an explanation which indicates no match
041: * (ie: Explanation value of 0.0f)
042: */
043: public static void checkNoMatchExplanations(Query q,
044: String defaultFieldName, Searcher searcher, int[] results)
045: throws IOException {
046:
047: String d = q.toString(defaultFieldName);
048: Set ignore = new TreeSet();
049: for (int i = 0; i < results.length; i++) {
050: ignore.add(new Integer(results[i]));
051: }
052:
053: int maxDoc = searcher.maxDoc();
054: for (int doc = 0; doc < maxDoc; doc++) {
055: if (ignore.contains(new Integer(doc)))
056: continue;
057:
058: Explanation exp = searcher.explain(q, doc);
059: TestCase.assertNotNull("Explanation of [[" + d + "]] for #"
060: + doc + " is null", exp);
061: TestCase.assertEquals("Explanation of [[" + d + "]] for #"
062: + doc + " doesn't indicate non-match: "
063: + exp.toString(), 0.0f, exp.getValue(), 0.0f);
064: }
065:
066: }
067:
068: /**
069: * Tests that a query matches the an expected set of documents using a
070: * HitCollector.
071: *
072: * <p>
073: * Note that when using the HitCollector API, documents will be collected
074: * if they "match" regardless of what their score is.
075: * </p>
076: * @param query the query to test
077: * @param searcher the searcher to test the query against
078: * @param defaultFieldName used for displaing the query in assertion messages
079: * @param results a list of documentIds that must match the query
080: * @see Searcher#search(Query,HitCollector)
081: * @see #checkHits
082: */
083: public static void checkHitCollector(Query query,
084: String defaultFieldName, Searcher searcher, int[] results)
085: throws IOException {
086:
087: Set correct = new TreeSet();
088: for (int i = 0; i < results.length; i++) {
089: correct.add(new Integer(results[i]));
090: }
091:
092: final Set actual = new TreeSet();
093: searcher.search(query, new HitCollector() {
094: public void collect(int doc, float score) {
095: actual.add(new Integer(doc));
096: }
097: });
098: TestCase.assertEquals(query.toString(defaultFieldName),
099: correct, actual);
100:
101: QueryUtils.check(query, searcher);
102: }
103:
104: /**
105: * Tests that a query matches the an expected set of documents using Hits.
106: *
107: * <p>
108: * Note that when using the Hits API, documents will only be returned
109: * if they have a positive normalized score.
110: * </p>
111: * @param query the query to test
112: * @param searcher the searcher to test the query against
113: * @param defaultFieldName used for displaing the query in assertion messages
114: * @param results a list of documentIds that must match the query
115: * @see Searcher#search(Query)
116: * @see #checkHitCollector
117: */
118: public static void checkHits(Query query, String defaultFieldName,
119: Searcher searcher, int[] results) throws IOException {
120: if (searcher instanceof IndexSearcher) {
121: QueryUtils.check(query, (IndexSearcher) searcher);
122: }
123:
124: Hits hits = searcher.search(query);
125:
126: Set correct = new TreeSet();
127: for (int i = 0; i < results.length; i++) {
128: correct.add(new Integer(results[i]));
129: }
130:
131: Set actual = new TreeSet();
132: for (int i = 0; i < hits.length(); i++) {
133: actual.add(new Integer(hits.id(i)));
134: }
135:
136: TestCase.assertEquals(query.toString(defaultFieldName),
137: correct, actual);
138:
139: QueryUtils.check(query, searcher);
140: }
141:
142: /** Tests that a Hits has an expected order of documents */
143: public static void checkDocIds(String mes, int[] results, Hits hits)
144: throws IOException {
145: TestCase.assertEquals(mes + " nr of hits", results.length, hits
146: .length());
147: for (int i = 0; i < results.length; i++) {
148: TestCase.assertEquals(mes + " doc nrs for hit " + i,
149: results[i], hits.id(i));
150: }
151: }
152:
153: /** Tests that two queries have an expected order of documents,
154: * and that the two queries have the same score values.
155: */
156: public static void checkHitsQuery(Query query, Hits hits1,
157: Hits hits2, int[] results) throws IOException {
158:
159: checkDocIds("hits1", results, hits1);
160: checkDocIds("hits2", results, hits2);
161: checkEqual(query, hits1, hits2);
162: }
163:
164: public static void checkEqual(Query query, Hits hits1, Hits hits2)
165: throws IOException {
166: final float scoreTolerance = 1.0e-6f;
167: if (hits1.length() != hits2.length()) {
168: TestCase.fail("Unequal lengths: hits1=" + hits1.length()
169: + ",hits2=" + hits2.length());
170: }
171: for (int i = 0; i < hits1.length(); i++) {
172: if (hits1.id(i) != hits2.id(i)) {
173: TestCase.fail("Hit " + i + " docnumbers don't match\n"
174: + hits2str(hits1, hits2, 0, 0) + "for query:"
175: + query.toString());
176: }
177:
178: if ((hits1.id(i) != hits2.id(i))
179: || Math.abs(hits1.score(i) - hits2.score(i)) > scoreTolerance) {
180: TestCase.fail("Hit " + i + ", doc nrs " + hits1.id(i)
181: + " and " + hits2.id(i) + "\nunequal : "
182: + hits1.score(i) + "\n and: "
183: + hits2.score(i) + "\nfor query:"
184: + query.toString());
185: }
186: }
187: }
188:
189: public static String hits2str(Hits hits1, Hits hits2, int start,
190: int end) throws IOException {
191: StringBuffer sb = new StringBuffer();
192: int len1 = hits1 == null ? 0 : hits1.length();
193: int len2 = hits2 == null ? 0 : hits2.length();
194: if (end <= 0) {
195: end = Math.max(len1, len2);
196: }
197:
198: sb.append("Hits length1=").append(len1).append("\tlength2=")
199: .append(len2);
200:
201: sb.append('\n');
202: for (int i = start; i < end; i++) {
203: sb.append("hit=").append(i).append(':');
204: if (i < len1) {
205: sb.append(" doc").append(hits1.id(i)).append('=')
206: .append(hits1.score(i));
207: } else {
208: sb.append(" ");
209: }
210: sb.append(",\t");
211: if (i < len2) {
212: sb.append(" doc").append(hits2.id(i)).append('=')
213: .append(hits2.score(i));
214: }
215: sb.append('\n');
216: }
217: return sb.toString();
218: }
219:
220: public static String topdocsString(TopDocs docs, int start, int end) {
221: StringBuffer sb = new StringBuffer();
222: sb.append("TopDocs totalHits=").append(docs.totalHits).append(
223: " top=").append(docs.scoreDocs.length).append('\n');
224: if (end <= 0)
225: end = docs.scoreDocs.length;
226: else
227: end = Math.min(end, docs.scoreDocs.length);
228: for (int i = start; i < end; i++) {
229: sb.append('\t');
230: sb.append(i);
231: sb.append(") doc=");
232: sb.append(docs.scoreDocs[i].doc);
233: sb.append("\tscore=");
234: sb.append(docs.scoreDocs[i].score);
235: sb.append('\n');
236: }
237: return sb.toString();
238: }
239:
240: /**
241: * Asserts that the explanation value for every document matching a
242: * query corresponds with the true score.
243: *
244: * @see ExplanationAsserter
245: * @see #checkExplanations(Query, String, Searcher, boolean) for a
246: * "deep" testing of the explanation details.
247: *
248: * @param query the query to test
249: * @param searcher the searcher to test the query against
250: * @param defaultFieldName used for displaing the query in assertion messages
251: */
252: public static void checkExplanations(Query query,
253: String defaultFieldName, Searcher searcher)
254: throws IOException {
255: checkExplanations(query, defaultFieldName, searcher, false);
256: }
257:
258: /**
259: * Asserts that the explanation value for every document matching a
260: * query corresponds with the true score. Optionally does "deep"
261: * testing of the explanation details.
262: *
263: * @see ExplanationAsserter
264: * @param query the query to test
265: * @param searcher the searcher to test the query against
266: * @param defaultFieldName used for displaing the query in assertion messages
267: * @param deep indicates whether a deep comparison of sub-Explanation details should be executed
268: */
269: public static void checkExplanations(Query query,
270: String defaultFieldName, Searcher searcher, boolean deep)
271: throws IOException {
272:
273: searcher.search(query, new ExplanationAsserter(query,
274: defaultFieldName, searcher, deep));
275:
276: }
277:
278: /**
279: * Assert that an explanation has the expected score, and optionally that its
280: * sub-details max/sum/factor match to that score.
281: *
282: * @param q String representation of the query for assertion messages
283: * @param doc Document ID for assertion messages
284: * @param score Real score value of doc with query q
285: * @param deep indicates whether a deep comparison of sub-Explanation details should be executed
286: * @param expl The Explanation to match against score
287: */
288: public static void verifyExplanation(String q, int doc,
289: float score, boolean deep, Explanation expl) {
290: float value = expl.getValue();
291: TestCase.assertEquals(q + ": score(doc=" + doc + ")=" + score
292: + " != explanationScore=" + value + " Explanation: "
293: + expl, score, value, EXPLAIN_SCORE_TOLERANCE_DELTA);
294:
295: if (!deep)
296: return;
297:
298: Explanation detail[] = expl.getDetails();
299: if (detail != null) {
300: if (detail.length == 1) {
301: // simple containment, no matter what the description says,
302: // just verify contained expl has same score
303: verifyExplanation(q, doc, score, deep, detail[0]);
304: } else {
305: // explanation must either:
306: // - end with one of: "product of:", "sum of:", "max of:", or
307: // - have "max plus <x> times others" (where <x> is float).
308: float x = 0;
309: String descr = expl.getDescription().toLowerCase();
310: boolean productOf = descr.endsWith("product of:");
311: boolean sumOf = descr.endsWith("sum of:");
312: boolean maxOf = descr.endsWith("max of:");
313: boolean maxTimesOthers = false;
314: if (!(productOf || sumOf || maxOf)) {
315: // maybe 'max plus x times others'
316: int k1 = descr.indexOf("max plus ");
317: if (k1 >= 0) {
318: k1 += "max plus ".length();
319: int k2 = descr.indexOf(" ", k1);
320: try {
321: x = Float.parseFloat(descr
322: .substring(k1, k2).trim());
323: if (descr.substring(k2).trim().equals(
324: "times others of:")) {
325: maxTimesOthers = true;
326: }
327: } catch (NumberFormatException e) {
328: }
329: }
330: }
331: TestCase
332: .assertTrue(
333: q
334: + ": multi valued explanation description=\""
335: + descr
336: + "\" must be 'max of plus x times others' or end with 'product of'"
337: + " or 'sum of:' or 'max of:' - "
338: + expl, productOf || sumOf
339: || maxOf || maxTimesOthers);
340: float sum = 0;
341: float product = 1;
342: float max = 0;
343: for (int i = 0; i < detail.length; i++) {
344: float dval = detail[i].getValue();
345: verifyExplanation(q, doc, dval, deep, detail[i]);
346: product *= dval;
347: sum += dval;
348: max = Math.max(max, dval);
349: }
350: float combined = 0;
351: if (productOf) {
352: combined = product;
353: } else if (sumOf) {
354: combined = sum;
355: } else if (maxOf) {
356: combined = max;
357: } else if (maxTimesOthers) {
358: combined = max + x * (sum - max);
359: } else {
360: TestCase
361: .assertTrue("should never get here!", false);
362: }
363: TestCase.assertEquals(q
364: + ": actual subDetails combined==" + combined
365: + " != value=" + value + " Explanation: "
366: + expl, combined, value,
367: EXPLAIN_SCORE_TOLERANCE_DELTA);
368: }
369: }
370: }
371:
372: /**
373: * an IndexSearcher that implicitly checks hte explanation of every match
374: * whenever it executes a search.
375: *
376: * @see ExplanationAsserter
377: */
378: public static class ExplanationAssertingSearcher extends
379: IndexSearcher {
380: public ExplanationAssertingSearcher(Directory d)
381: throws IOException {
382: super (d);
383: }
384:
385: public ExplanationAssertingSearcher(IndexReader r)
386: throws IOException {
387: super (r);
388: }
389:
390: protected void checkExplanations(Query q) throws IOException {
391: super .search(q, null,
392: new ExplanationAsserter(q, null, this ));
393: }
394:
395: public Hits search(Query query, Filter filter)
396: throws IOException {
397: checkExplanations(query);
398: return super .search(query, filter);
399: }
400:
401: public Hits search(Query query, Sort sort) throws IOException {
402: checkExplanations(query);
403: return super .search(query, sort);
404: }
405:
406: public Hits search(Query query, Filter filter, Sort sort)
407: throws IOException {
408: checkExplanations(query);
409: return super .search(query, filter, sort);
410: }
411:
412: public TopFieldDocs search(Query query, Filter filter, int n,
413: Sort sort) throws IOException {
414:
415: checkExplanations(query);
416: return super .search(query, filter, n, sort);
417: }
418:
419: public void search(Query query, HitCollector results)
420: throws IOException {
421: checkExplanations(query);
422: super .search(query, results);
423: }
424:
425: public void search(Query query, Filter filter,
426: HitCollector results) throws IOException {
427: checkExplanations(query);
428: super .search(query, filter, results);
429: }
430:
431: public TopDocs search(Query query, Filter filter, int n)
432: throws IOException {
433:
434: checkExplanations(query);
435: return super .search(query, filter, n);
436: }
437: }
438:
439: /**
440: * Asserts that the score explanation for every document matching a
441: * query corresponds with the true score.
442: *
443: * NOTE: this HitCollector should only be used with the Query and Searcher
444: * specified at when it is constructed.
445: *
446: * @see CheckHits#verifyExplanation
447: */
448: public static class ExplanationAsserter extends HitCollector {
449:
450: /**
451: * @deprecated
452: * @see CheckHits#EXPLAIN_SCORE_TOLERANCE_DELTA
453: */
454: public static float SCORE_TOLERANCE_DELTA = 0.00005f;
455:
456: Query q;
457: Searcher s;
458: String d;
459: boolean deep;
460:
461: /** Constructs an instance which does shallow tests on the Explanation */
462: public ExplanationAsserter(Query q, String defaultFieldName,
463: Searcher s) {
464: this (q, defaultFieldName, s, false);
465: }
466:
467: public ExplanationAsserter(Query q, String defaultFieldName,
468: Searcher s, boolean deep) {
469: this .q = q;
470: this .s = s;
471: this .d = q.toString(defaultFieldName);
472: this .deep = deep;
473: }
474:
475: public void collect(int doc, float score) {
476: Explanation exp = null;
477:
478: try {
479: exp = s.explain(q, doc);
480: } catch (IOException e) {
481: throw new RuntimeException(
482: "exception in hitcollector of [[" + d
483: + "]] for #" + doc, e);
484: }
485:
486: TestCase.assertNotNull("Explanation of [[" + d + "]] for #"
487: + doc + " is null", exp);
488: verifyExplanation(d, doc, score, deep, exp);
489: }
490:
491: }
492:
493: }
|