001: package org.apache.lucene.search;
002:
003: /**
004: * Copyright 2005 The Apache Software Foundation
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: import java.io.IOException;
020: import java.io.Serializable;
021: import java.util.Calendar;
022: import java.util.GregorianCalendar;
023: import java.util.Map;
024: import java.util.Random;
025: import java.util.TreeMap;
026:
027: import junit.framework.Test;
028: import junit.framework.TestCase;
029: import junit.framework.TestSuite;
030: import junit.textui.TestRunner;
031:
032: import org.apache.lucene.analysis.standard.StandardAnalyzer;
033: import org.apache.lucene.document.DateTools;
034: import org.apache.lucene.document.Document;
035: import org.apache.lucene.document.Field;
036: import org.apache.lucene.index.IndexReader;
037: import org.apache.lucene.index.IndexWriter;
038: import org.apache.lucene.index.Term;
039: import org.apache.lucene.store.Directory;
040: import org.apache.lucene.store.RAMDirectory;
041:
042: /**
043: * Unit test for sorting code.
044: *
045: * @author Martin Seitz (T-Systems)
046: */
047:
048: public class TestCustomSearcherSort extends TestCase implements
049: Serializable {
050:
051: private Directory index = null;
052: private Query query = null;
053: // reduced from 20000 to 2000 to speed up test...
054: private final static int INDEX_SIZE = 2000;
055:
056: public TestCustomSearcherSort(String name) {
057: super (name);
058: }
059:
060: public static void main(String[] argv) {
061: TestRunner.run(suite());
062: }
063:
064: public static Test suite() {
065: return new TestSuite(TestCustomSearcherSort.class);
066: }
067:
068: // create an index for testing
069: private Directory getIndex() throws IOException {
070: RAMDirectory indexStore = new RAMDirectory();
071: IndexWriter writer = new IndexWriter(indexStore,
072: new StandardAnalyzer(), true);
073: RandomGen random = new RandomGen();
074: for (int i = 0; i < INDEX_SIZE; ++i) { // don't decrease; if to low the problem doesn't show up
075: Document doc = new Document();
076: if ((i % 5) != 0) { // some documents must not have an entry in the first sort field
077: doc.add(new Field("publicationDate_", random
078: .getLuceneDate(), Field.Store.YES,
079: Field.Index.UN_TOKENIZED));
080: }
081: if ((i % 7) == 0) { // some documents to match the query (see below)
082: doc.add(new Field("content", "test", Field.Store.YES,
083: Field.Index.TOKENIZED));
084: }
085: // every document has a defined 'mandant' field
086: doc.add(new Field("mandant", Integer.toString(i % 3),
087: Field.Store.YES, Field.Index.UN_TOKENIZED));
088: writer.addDocument(doc);
089: }
090: writer.optimize();
091: writer.close();
092: return indexStore;
093: }
094:
095: /**
096: * Create index and query for test cases.
097: */
098: public void setUp() throws Exception {
099: index = getIndex();
100: query = new TermQuery(new Term("content", "test"));
101: }
102:
103: /**
104: * Run the test using two CustomSearcher instances.
105: */
106: public void testFieldSortCustomSearcher() throws Exception {
107: // log("Run testFieldSortCustomSearcher");
108: // define the sort criteria
109: Sort custSort = new Sort(new SortField[] {
110: new SortField("publicationDate_"),
111: SortField.FIELD_SCORE });
112: Searcher searcher = new CustomSearcher(index, 2);
113: // search and check hits
114: matchHits(searcher, custSort);
115: }
116:
117: /**
118: * Run the test using one CustomSearcher wrapped by a MultiSearcher.
119: */
120: public void testFieldSortSingleSearcher() throws Exception {
121: // log("Run testFieldSortSingleSearcher");
122: // define the sort criteria
123: Sort custSort = new Sort(new SortField[] {
124: new SortField("publicationDate_"),
125: SortField.FIELD_SCORE });
126: Searcher searcher = new MultiSearcher(
127: new Searchable[] { new CustomSearcher(index, 2) });
128: // search and check hits
129: matchHits(searcher, custSort);
130: }
131:
132: /**
133: * Run the test using two CustomSearcher instances.
134: */
135: public void testFieldSortMultiCustomSearcher() throws Exception {
136: // log("Run testFieldSortMultiCustomSearcher");
137: // define the sort criteria
138: Sort custSort = new Sort(new SortField[] {
139: new SortField("publicationDate_"),
140: SortField.FIELD_SCORE });
141: Searcher searcher = new MultiSearcher(new Searchable[] {
142: new CustomSearcher(index, 0),
143: new CustomSearcher(index, 2) });
144: // search and check hits
145: matchHits(searcher, custSort);
146: }
147:
148: // make sure the documents returned by the search match the expected list
149: private void matchHits(Searcher searcher, Sort sort)
150: throws IOException {
151: // make a query without sorting first
152: Hits hitsByRank = searcher.search(query);
153: checkHits(hitsByRank, "Sort by rank: "); // check for duplicates
154: Map resultMap = new TreeMap();
155: // store hits in TreeMap - TreeMap does not allow duplicates; existing entries are silently overwritten
156: for (int hitid = 0; hitid < hitsByRank.length(); ++hitid) {
157: resultMap.put(new Integer(hitsByRank.id(hitid)), // Key: Lucene Document ID
158: new Integer(hitid)); // Value: Hits-Objekt Index
159: }
160:
161: // now make a query using the sort criteria
162: Hits resultSort = searcher.search(query, sort);
163: checkHits(resultSort, "Sort by custom criteria: "); // check for duplicates
164:
165: String lf = System.getProperty("line.separator", "\n");
166: // besides the sorting both sets of hits must be identical
167: for (int hitid = 0; hitid < resultSort.length(); ++hitid) {
168: Integer idHitDate = new Integer(resultSort.id(hitid)); // document ID from sorted search
169: if (!resultMap.containsKey(idHitDate)) {
170: log("ID " + idHitDate
171: + " not found. Possibliy a duplicate.");
172: }
173: assertTrue(resultMap.containsKey(idHitDate)); // same ID must be in the Map from the rank-sorted search
174: // every hit must appear once in both result sets --> remove it from the Map.
175: // At the end the Map must be empty!
176: resultMap.remove(idHitDate);
177: }
178: if (resultMap.size() == 0) {
179: // log("All hits matched");
180: } else {
181: log("Couldn't match " + resultMap.size() + " hits.");
182: }
183: assertEquals(resultMap.size(), 0);
184: }
185:
186: /**
187: * Check the hits for duplicates.
188: * @param hits
189: */
190: private void checkHits(Hits hits, String prefix) {
191: if (hits != null) {
192: Map idMap = new TreeMap();
193: for (int docnum = 0; docnum < hits.length(); ++docnum) {
194: Integer luceneId = null;
195: try {
196: luceneId = new Integer(hits.id(docnum));
197: if (idMap.containsKey(luceneId)) {
198: StringBuffer message = new StringBuffer(prefix);
199: message
200: .append("Duplicate key for hit index = ");
201: message.append(docnum);
202: message.append(", previous index = ");
203: message.append(((Integer) idMap.get(luceneId))
204: .toString());
205: message.append(", Lucene ID = ");
206: message.append(luceneId);
207: log(message.toString());
208: } else {
209: idMap.put(luceneId, new Integer(docnum));
210: }
211: } catch (IOException ioe) {
212: StringBuffer message = new StringBuffer(prefix);
213: message.append("Error occurred for hit index = ");
214: message.append(docnum);
215: message.append(" (");
216: message.append(ioe.getMessage());
217: message.append(")");
218: log(message.toString());
219: }
220: }
221: }
222: }
223:
224: // Simply write to console - choosen to be independant of log4j etc
225: private void log(String message) {
226: System.out.println(message);
227: }
228:
229: public class CustomSearcher extends IndexSearcher {
230: private int switcher;
231:
232: /**
233: * @param directory
234: * @throws IOException
235: */
236: public CustomSearcher(Directory directory, int switcher)
237: throws IOException {
238: super (directory);
239: this .switcher = switcher;
240: }
241:
242: /**
243: * @param r
244: */
245: public CustomSearcher(IndexReader r, int switcher) {
246: super (r);
247: this .switcher = switcher;
248: }
249:
250: /**
251: * @param path
252: * @throws IOException
253: */
254: public CustomSearcher(String path, int switcher)
255: throws IOException {
256: super (path);
257: this .switcher = switcher;
258: }
259:
260: /* (non-Javadoc)
261: * @see org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query, org.apache.lucene.search.Filter, int, org.apache.lucene.search.Sort)
262: */
263: public TopFieldDocs search(Query query, Filter filter,
264: int nDocs, Sort sort) throws IOException {
265: BooleanQuery bq = new BooleanQuery();
266: bq.add(query, BooleanClause.Occur.MUST);
267: bq.add(new TermQuery(new Term("mandant", Integer
268: .toString(switcher))), BooleanClause.Occur.MUST);
269: return super .search(bq, filter, nDocs, sort);
270: }
271:
272: /* (non-Javadoc)
273: * @see org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query, org.apache.lucene.search.Filter, int)
274: */
275: public TopDocs search(Query query, Filter filter, int nDocs)
276: throws IOException {
277: BooleanQuery bq = new BooleanQuery();
278: bq.add(query, BooleanClause.Occur.MUST);
279: bq.add(new TermQuery(new Term("mandant", Integer
280: .toString(switcher))), BooleanClause.Occur.MUST);
281: return super .search(bq, filter, nDocs);
282: }
283: }
284:
285: private class RandomGen {
286: private Random random = new Random(0); // to generate some arbitrary contents
287: private Calendar base = new GregorianCalendar(1980, 1, 1);
288:
289: // Just to generate some different Lucene Date strings
290: private String getLuceneDate() {
291: return DateTools.timeToString(base.getTimeInMillis()
292: + random.nextInt() - Integer.MIN_VALUE,
293: DateTools.Resolution.DAY);
294: }
295: }
296: }
|