01: /**
02: * Licensed to the Apache Software Foundation (ASF) under one or more
03: * contributor license agreements. See the NOTICE file distributed with
04: * this work for additional information regarding copyright ownership.
05: * The ASF licenses this file to You under the Apache License, Version 2.0
06: * (the "License"); you may not use this file except in compliance with
07: * the License. You may obtain a copy of the License at
08: *
09: * http://www.apache.org/licenses/LICENSE-2.0
10: *
11: * Unless required by applicable law or agreed to in writing, software
12: * distributed under the License is distributed on an "AS IS" BASIS,
13: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14: * See the License for the specific language governing permissions and
15: * limitations under the License.
16: */package org.apache.lucene.benchmark.quality.trec;
17:
18: import java.io.BufferedReader;
19: import java.io.IOException;
20: import java.util.ArrayList;
21: import java.util.Arrays;
22: import java.util.HashMap;
23:
24: import org.apache.lucene.benchmark.quality.QualityQuery;
25:
26: /**
27: * Read topics of TREC 1MQ track.
28: * <p>
29: * Expects this topic format -
30: * <pre>
31: * qnum:qtext
32: * </pre>
33: * Comment lines starting with '#' are ignored.
34: * <p>
35: * All topics will have a single name value pair.
36: */
37: public class Trec1MQReader {
38:
39: private String name;
40:
41: /**
42: * Constructor for Trec's 1MQ TopicsReader
43: * @param name name of name-value pair to set for all queries.
44: */
45: public Trec1MQReader(String name) {
46: super ();
47: this .name = name;
48: }
49:
50: /**
51: * Read quality queries from trec 1MQ format topics file.
52: * @param reader where queries are read from.
53: * @return the result quality queries.
54: * @throws IOException if cannot read the queries.
55: */
56: public QualityQuery[] readQueries(BufferedReader reader)
57: throws IOException {
58: ArrayList res = new ArrayList();
59: String line;
60: try {
61: while (null != (line = reader.readLine())) {
62: line = line.trim();
63: if (line.startsWith("#")) {
64: continue;
65: }
66: // id
67: int k = line.indexOf(":");
68: String id = line.substring(0, k).trim();
69: // qtext
70: String qtext = line.substring(k + 1).trim();
71: // we got a topic!
72: HashMap fields = new HashMap();
73: fields.put(name, qtext);
74: //System.out.println("id: "+id+" qtext: "+qtext+" line: "+line);
75: QualityQuery topic = new QualityQuery(id, fields);
76: res.add(topic);
77: }
78: } finally {
79: reader.close();
80: }
81: // sort result array (by ID)
82: QualityQuery qq[] = (QualityQuery[]) res
83: .toArray(new QualityQuery[0]);
84: Arrays.sort(qq);
85: return qq;
86: }
87:
88: }
|