001: package org.apache.lucene.search.payloads;
002:
003: import org.apache.lucene.index.IndexReader;
004: import org.apache.lucene.index.Term;
005: import org.apache.lucene.index.TermPositions;
006: import org.apache.lucene.search.*;
007: import org.apache.lucene.search.spans.SpanScorer;
008: import org.apache.lucene.search.spans.SpanTermQuery;
009: import org.apache.lucene.search.spans.SpanWeight;
010: import org.apache.lucene.search.spans.TermSpans;
011:
012: import java.io.IOException;
013:
014: /**
015: * Copyright 2004 The Apache Software Foundation
016: * <p/>
017: * Licensed under the Apache License, Version 2.0 (the "License");
018: * you may not use this file except in compliance with the License.
019: * You may obtain a copy of the License at
020: * <p/>
021: * http://www.apache.org/licenses/LICENSE-2.0
022: * <p/>
023: * Unless required by applicable law or agreed to in writing, software
024: * distributed under the License is distributed on an "AS IS" BASIS,
025: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
026: * See the License for the specific language governing permissions and
027: * limitations under the License.
028: */
029:
030: /**
031: * The BoostingTermQuery is very similar to the {@link org.apache.lucene.search.spans.SpanTermQuery} except
032: * that it factors in the value of the payload located at each of the positions where the
033: * {@link org.apache.lucene.index.Term} occurs.
034: * <p>
035: * In order to take advantage of this, you must override {@link org.apache.lucene.search.Similarity#scorePayload(String, byte[],int,int)}
036: * which returns 1 by default.
037: * <p>
038: * Payload scores are averaged across term occurrences in the document.
039: *
040: * @see org.apache.lucene.search.Similarity#scorePayload(String, byte[], int, int)
041: */
042: public class BoostingTermQuery extends SpanTermQuery {
043:
044: public BoostingTermQuery(Term term) {
045: super (term);
046: }
047:
048: protected Weight createWeight(Searcher searcher) throws IOException {
049: return new BoostingTermWeight(this , searcher);
050: }
051:
052: protected class BoostingTermWeight extends SpanWeight implements
053: Weight {
054:
055: public BoostingTermWeight(BoostingTermQuery query,
056: Searcher searcher) throws IOException {
057: super (query, searcher);
058: }
059:
060: public Scorer scorer(IndexReader reader) throws IOException {
061: return new BoostingSpanScorer((TermSpans) query
062: .getSpans(reader), this , similarity, reader
063: .norms(query.getField()));
064: }
065:
066: class BoostingSpanScorer extends SpanScorer {
067:
068: //TODO: is this the best way to allocate this?
069: byte[] payload = new byte[256];
070: private TermPositions positions;
071: protected float payloadScore;
072: private int payloadsSeen;
073:
074: public BoostingSpanScorer(TermSpans spans, Weight weight,
075: Similarity similarity, byte[] norms)
076: throws IOException {
077: super (spans, weight, similarity, norms);
078: positions = spans.getPositions();
079:
080: }
081:
082: protected boolean setFreqCurrentDoc() throws IOException {
083: if (!more) {
084: return false;
085: }
086: doc = spans.doc();
087: freq = 0.0f;
088: payloadScore = 0;
089: payloadsSeen = 0;
090: Similarity similarity1 = getSimilarity();
091: while (more && doc == spans.doc()) {
092: int matchLength = spans.end() - spans.start();
093:
094: freq += similarity1.sloppyFreq(matchLength);
095: processPayload(similarity1);
096:
097: more = spans.next();//this moves positions to the next match in this document
098: }
099: return more || (freq != 0);
100: }
101:
102: protected void processPayload(Similarity similarity)
103: throws IOException {
104: if (positions.isPayloadAvailable()) {
105: payload = positions.getPayload(payload, 0);
106: payloadScore += similarity.scorePayload(term
107: .field(), payload, 0, positions
108: .getPayloadLength());
109: payloadsSeen++;
110:
111: } else {
112: //zero out the payload?
113: }
114:
115: }
116:
117: public float score() throws IOException {
118:
119: return super .score()
120: * (payloadsSeen > 0 ? (payloadScore / payloadsSeen)
121: : 1);
122: }
123:
124: public Explanation explain(final int doc)
125: throws IOException {
126: Explanation result = new Explanation();
127: Explanation nonPayloadExpl = super .explain(doc);
128: result.addDetail(nonPayloadExpl);
129: //QUESTION: Is there a wau to avoid this skipTo call? We need to know whether to load the payload or not
130:
131: Explanation payloadBoost = new Explanation();
132: result.addDetail(payloadBoost);
133: /*
134: if (skipTo(doc) == true) {
135: processPayload();
136: }
137: */
138:
139: float avgPayloadScore = (payloadsSeen > 0 ? (payloadScore / payloadsSeen)
140: : 1);
141: payloadBoost.setValue(avgPayloadScore);
142: //GSI: I suppose we could toString the payload, but I don't think that would be a good idea
143: payloadBoost.setDescription("scorePayload(...)");
144: result.setValue(nonPayloadExpl.getValue()
145: * avgPayloadScore);
146: result.setDescription("btq, product of:");
147: return result;
148: }
149: }
150:
151: }
152:
153: public boolean equals(Object o) {
154: if (!(o instanceof BoostingTermQuery))
155: return false;
156: BoostingTermQuery other = (BoostingTermQuery) o;
157: return (this.getBoost() == other.getBoost())
158: && this.term.equals(other.term);
159: }
160: }
|