001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.lucene.benchmark.quality;
017:
018: import java.io.IOException;
019: import java.io.PrintWriter;
020:
021: import org.apache.lucene.benchmark.quality.utils.DocNameExtractor;
022: import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
023: import org.apache.lucene.search.Query;
024: import org.apache.lucene.search.ScoreDoc;
025: import org.apache.lucene.search.Searcher;
026: import org.apache.lucene.search.TopDocs;
027:
028: /**
029: * Main entry point for running a quality benchmark.
030: * <p>
031: * There are two main configurations for running a quality benchmark: <ul>
032: * <li>Against existing judgements.</li>
033: * <li>For submission (e.g. for a contest).</li>
034: * </ul>
035: * The first configuration requires a non null
036: * {@link org.apache.lucene.benchmark.quality.Judge Judge}.
037: * The second configuration requires a non null
038: * {@link org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}.
039: */
040: public class QualityBenchmark {
041:
042: /** Quality Queries that this quality benchmark would execute. */
043: protected QualityQuery qualityQueries[];
044:
045: /** Parser for turning QualityQueries into Lucene Queries. */
046: protected QualityQueryParser qqParser;
047:
048: /** Index to be searched. */
049: protected Searcher searcher;
050:
051: /** index field to extract doc name for each search result; used for judging the results. */
052: protected String docNameField;
053:
054: /** maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging. */
055: private int maxQueries = Integer.MAX_VALUE;
056:
057: /** maximal number of results to collect for each query. Default: 1000. */
058: private int maxResults = 1000;
059:
060: /**
061: * Create a QualityBenchmark.
062: * @param qqs quality queries to run.
063: * @param qqParser parser for turning QualityQueries into Lucene Queries.
064: * @param searcher index to be searched.
065: * @param docNameField name of field containg the document name.
066: * This allows to extract the doc name for search results,
067: * and is important for judging the results.
068: */
069: public QualityBenchmark(QualityQuery qqs[],
070: QualityQueryParser qqParser, Searcher searcher,
071: String docNameField) {
072: this .qualityQueries = qqs;
073: this .qqParser = qqParser;
074: this .searcher = searcher;
075: this .docNameField = docNameField;
076: }
077:
078: /**
079: * Run the quality benchmark.
080: * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query.
081: * If null, no judgements would be made. Usually null for a submission run.
082: * @param submitRep submission report is created if non null.
083: * @param qualityLog If not null, quality run data would be printed for each query.
084: * @return QualityStats of each quality query that was executed.
085: * @throws Exception if quality benchmark failed to run.
086: */
087: public QualityStats[] execute(Judge judge,
088: SubmissionReport submitRep, PrintWriter qualityLog)
089: throws Exception {
090: int nQueries = Math.min(maxQueries, qualityQueries.length);
091: QualityStats stats[] = new QualityStats[nQueries];
092: for (int i = 0; i < nQueries; i++) {
093: QualityQuery qq = qualityQueries[i];
094: // generate query
095: Query q = qqParser.parse(qq);
096: // search with this query
097: long t1 = System.currentTimeMillis();
098: TopDocs td = searcher.search(q, null, maxResults);
099: long searchTime = System.currentTimeMillis() - t1;
100: //most likely we either submit or judge, but check both
101: if (judge != null) {
102: stats[i] = analyzeQueryResults(qq, q, td, judge,
103: qualityLog, searchTime);
104: }
105: if (submitRep != null) {
106: submitRep.report(qq, td, docNameField, searcher);
107: }
108: }
109: if (submitRep != null) {
110: submitRep.flush();
111: }
112: return stats;
113: }
114:
115: /* Analyze/judge results for a single quality query; optionally log them. */
116: private QualityStats analyzeQueryResults(QualityQuery qq, Query q,
117: TopDocs td, Judge judge, PrintWriter logger, long searchTime)
118: throws IOException {
119: QualityStats stts = new QualityStats(judge.maxRecall(qq),
120: searchTime);
121: ScoreDoc sd[] = td.scoreDocs;
122: long t1 = System.currentTimeMillis(); // extraction of first doc name we meassure also construction of doc name extractor, just in case.
123: DocNameExtractor xt = new DocNameExtractor(docNameField);
124: for (int i = 0; i < sd.length; i++) {
125: String docName = xt.docName(searcher, sd[i].doc);
126: long docNameExtractTime = System.currentTimeMillis() - t1;
127: t1 = System.currentTimeMillis();
128: boolean isRelevant = judge.isRelevant(docName, qq);
129: stts.addResult(i + 1, isRelevant, docNameExtractTime);
130: }
131: if (logger != null) {
132: logger.println(qq.getQueryID() + " - " + q);
133: stts.log(qq.getQueryID() + " Stats:", 1, logger, " ");
134: }
135: return stts;
136: }
137:
138: /**
139: * @return the maximum number of quality queries to run. Useful at debugging.
140: */
141: public int getMaxQueries() {
142: return maxQueries;
143: }
144:
145: /**
146: * Set the maximum number of quality queries to run. Useful at debugging.
147: */
148: public void setMaxQueries(int maxQueries) {
149: this .maxQueries = maxQueries;
150: }
151:
152: /**
153: * @return the maximum number of results to collect for each quality query.
154: */
155: public int getMaxResults() {
156: return maxResults;
157: }
158:
159: /**
160: * set the maximum number of results to collect for each quality query.
161: */
162: public void setMaxResults(int maxResults) {
163: this.maxResults = maxResults;
164: }
165:
166: }
|