001: package org.apache.lucene.search.payloads;
002:
003: /**
004: * Copyright 2004 The Apache Software Foundation
005: *
006: * Licensed under the Apache License, Version 2.0 (the "License");
007: * you may not use this file except in compliance with the License.
008: * You may obtain a copy of the License at
009: *
010: * http://www.apache.org/licenses/LICENSE-2.0
011: *
012: * Unless required by applicable law or agreed to in writing, software
013: * distributed under the License is distributed on an "AS IS" BASIS,
014: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: * See the License for the specific language governing permissions and
016: * limitations under the License.
017: */
018:
019: import org.apache.lucene.util.LuceneTestCase;
020: import org.apache.lucene.analysis.*;
021: import org.apache.lucene.document.Document;
022: import org.apache.lucene.document.Field;
023: import org.apache.lucene.index.IndexWriter;
024: import org.apache.lucene.index.Payload;
025: import org.apache.lucene.index.Term;
026: import org.apache.lucene.search.*;
027: import org.apache.lucene.search.spans.Spans;
028: import org.apache.lucene.search.spans.TermSpans;
029: import org.apache.lucene.store.RAMDirectory;
030: import org.apache.lucene.util.English;
031:
032: import java.io.IOException;
033: import java.io.Reader;
034:
035: public class TestBoostingTermQuery extends LuceneTestCase {
036: private IndexSearcher searcher;
037: private BoostingSimilarity similarity = new BoostingSimilarity();
038: private byte[] payloadField = new byte[] { 1 };
039: private byte[] payloadMultiField1 = new byte[] { 2 };
040: private byte[] payloadMultiField2 = new byte[] { 4 };
041:
042: public TestBoostingTermQuery(String s) {
043: super (s);
044: }
045:
046: private class PayloadAnalyzer extends Analyzer {
047:
048: public TokenStream tokenStream(String fieldName, Reader reader) {
049: TokenStream result = new LowerCaseTokenizer(reader);
050: result = new PayloadFilter(result, fieldName);
051: return result;
052: }
053: }
054:
055: private class PayloadFilter extends TokenFilter {
056: String fieldName;
057: int numSeen = 0;
058:
059: public PayloadFilter(TokenStream input, String fieldName) {
060: super (input);
061: this .fieldName = fieldName;
062: }
063:
064: public Token next() throws IOException {
065: Token result = input.next();
066: if (result != null) {
067: if (fieldName.equals("field")) {
068: result.setPayload(new Payload(payloadField));
069: } else if (fieldName.equals("multiField")) {
070: if (numSeen % 2 == 0) {
071: result.setPayload(new Payload(
072: payloadMultiField1));
073: } else {
074: result.setPayload(new Payload(
075: payloadMultiField2));
076: }
077: numSeen++;
078: }
079:
080: }
081: return result;
082: }
083: }
084:
085: protected void setUp() throws Exception {
086: super .setUp();
087: RAMDirectory directory = new RAMDirectory();
088: PayloadAnalyzer analyzer = new PayloadAnalyzer();
089: IndexWriter writer = new IndexWriter(directory, analyzer, true);
090: writer.setSimilarity(similarity);
091: //writer.infoStream = System.out;
092: for (int i = 0; i < 1000; i++) {
093: Document doc = new Document();
094: Field noPayloadField = new Field("noPayLoad", English
095: .intToEnglish(i), Field.Store.YES,
096: Field.Index.TOKENIZED);
097: noPayloadField.setBoost(0);
098: doc.add(noPayloadField);
099: doc.add(new Field("field", English.intToEnglish(i),
100: Field.Store.YES, Field.Index.TOKENIZED));
101: doc.add(new Field("multiField", English.intToEnglish(i)
102: + " " + English.intToEnglish(i), Field.Store.YES,
103: Field.Index.TOKENIZED));
104: writer.addDocument(doc);
105: }
106: writer.optimize();
107: writer.close();
108:
109: searcher = new IndexSearcher(directory);
110: searcher.setSimilarity(similarity);
111: }
112:
113: public void test() throws IOException {
114: BoostingTermQuery query = new BoostingTermQuery(new Term(
115: "field", "seventy"));
116: TopDocs hits = searcher.search(query, null, 100);
117: assertTrue("hits is null and it shouldn't be", hits != null);
118: assertTrue("hits Size: " + hits.totalHits + " is not: " + 100,
119: hits.totalHits == 100);
120:
121: //they should all have the exact same score, because they all contain seventy once, and we set
122: //all the other similarity factors to be 1
123:
124: assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits
125: .getMaxScore() == 1);
126: for (int i = 0; i < hits.scoreDocs.length; i++) {
127: ScoreDoc doc = hits.scoreDocs[i];
128: assertTrue(doc.score + " does not equal: " + 1,
129: doc.score == 1);
130: }
131: CheckHits.checkExplanations(query, "field", searcher, true);
132: Spans spans = query.getSpans(searcher.getIndexReader());
133: assertTrue("spans is null and it shouldn't be", spans != null);
134: assertTrue("spans is not an instanceof " + TermSpans.class,
135: spans instanceof TermSpans);
136: /*float score = hits.score(0);
137: for (int i =1; i < hits.length(); i++)
138: {
139: assertTrue("scores are not equal and they should be", score == hits.score(i));
140: }*/
141:
142: }
143:
144: public void testMultipleMatchesPerDoc() throws Exception {
145: BoostingTermQuery query = new BoostingTermQuery(new Term(
146: "multiField", "seventy"));
147: TopDocs hits = searcher.search(query, null, 100);
148: assertTrue("hits is null and it shouldn't be", hits != null);
149: assertTrue("hits Size: " + hits.totalHits + " is not: " + 100,
150: hits.totalHits == 100);
151:
152: //they should all have the exact same score, because they all contain seventy once, and we set
153: //all the other similarity factors to be 1
154:
155: //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
156: assertTrue(hits.getMaxScore() + " does not equal: " + 3, hits
157: .getMaxScore() == 3);
158: //there should be exactly 10 items that score a 3, all the rest should score a 2
159: //The 10 items are: 70 + i*100 where i in [0-9]
160: int numTens = 0;
161: for (int i = 0; i < hits.scoreDocs.length; i++) {
162: ScoreDoc doc = hits.scoreDocs[i];
163: if (doc.doc % 10 == 0) {
164: numTens++;
165: assertTrue(doc.score + " does not equal: " + 3,
166: doc.score == 3);
167: } else {
168: assertTrue(doc.score + " does not equal: " + 2,
169: doc.score == 2);
170: }
171: }
172: assertTrue(numTens + " does not equal: " + 10, numTens == 10);
173: CheckHits.checkExplanations(query, "field", searcher, true);
174: Spans spans = query.getSpans(searcher.getIndexReader());
175: assertTrue("spans is null and it shouldn't be", spans != null);
176: assertTrue("spans is not an instanceof " + TermSpans.class,
177: spans instanceof TermSpans);
178: //should be two matches per document
179: int count = 0;
180: //100 hits times 2 matches per hit, we should have 200 in count
181: while (spans.next()) {
182: count++;
183: }
184: assertTrue(count + " does not equal: " + 200, count == 200);
185: }
186:
187: public void testNoMatch() throws Exception {
188: BoostingTermQuery query = new BoostingTermQuery(new Term(
189: "field", "junk"));
190: TopDocs hits = searcher.search(query, null, 100);
191: assertTrue("hits is null and it shouldn't be", hits != null);
192: assertTrue("hits Size: " + hits.totalHits + " is not: " + 0,
193: hits.totalHits == 0);
194:
195: }
196:
197: public void testNoPayload() throws Exception {
198: BoostingTermQuery q1 = new BoostingTermQuery(new Term(
199: "noPayLoad", "zero"));
200: BoostingTermQuery q2 = new BoostingTermQuery(new Term(
201: "noPayLoad", "foo"));
202: BooleanClause c1 = new BooleanClause(q1,
203: BooleanClause.Occur.MUST);
204: BooleanClause c2 = new BooleanClause(q2,
205: BooleanClause.Occur.MUST_NOT);
206: BooleanQuery query = new BooleanQuery();
207: query.add(c1);
208: query.add(c2);
209: TopDocs hits = searcher.search(query, null, 100);
210: assertTrue("hits is null and it shouldn't be", hits != null);
211: //assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1);
212: int[] results = new int[1];
213: results[0] = 0;//hits.scoreDocs[0].doc;
214: CheckHits.checkHitCollector(query, "noPayLoad", searcher,
215: results);
216: }
217:
218: // must be static for weight serialization tests
219: static class BoostingSimilarity extends DefaultSimilarity {
220:
221: // TODO: Remove warning after API has been finalized
222: public float scorePayload(String fieldName, byte[] payload,
223: int offset, int length) {
224: //we know it is size 4 here, so ignore the offset/length
225: return payload[0];
226: }
227:
228: //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
229: //Make everything else 1 so we see the effect of the payload
230: //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
231: public float lengthNorm(String fieldName, int numTerms) {
232: return 1;
233: }
234:
235: public float queryNorm(float sumOfSquaredWeights) {
236: return 1;
237: }
238:
239: public float sloppyFreq(int distance) {
240: return 1;
241: }
242:
243: public float coord(int overlap, int maxOverlap) {
244: return 1;
245: }
246:
247: public float idf(int docFreq, int numDocs) {
248: return 1;
249: }
250:
251: public float tf(float freq) {
252: return freq == 0 ? 0 : 1;
253: }
254: }
255: }
|