001: package org.apache.lucene.search;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021: import java.util.*;
022:
023: import org.apache.lucene.index.IndexReader;
024: import org.apache.lucene.index.MultipleTermPositions;
025: import org.apache.lucene.index.Term;
026: import org.apache.lucene.index.TermPositions;
027: import org.apache.lucene.search.Query;
028: import org.apache.lucene.util.ToStringUtils;
029:
030: /**
031: * MultiPhraseQuery is a generalized version of PhraseQuery, with an added
032: * method {@link #add(Term[])}.
033: * To use this class, to search for the phrase "Microsoft app*" first use
034: * add(Term) on the term "Microsoft", then find all terms that have "app" as
035: * prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[]
036: * terms) to add them to the query.
037: *
038: * @author Anders Nielsen
039: * @version 1.0
040: */
041: public class MultiPhraseQuery extends Query {
042: private String field;
043: private ArrayList termArrays = new ArrayList();
044: private Vector positions = new Vector();
045:
046: private int slop = 0;
047:
048: /** Sets the phrase slop for this query.
049: * @see PhraseQuery#setSlop(int)
050: */
051: public void setSlop(int s) {
052: slop = s;
053: }
054:
055: /** Sets the phrase slop for this query.
056: * @see PhraseQuery#getSlop()
057: */
058: public int getSlop() {
059: return slop;
060: }
061:
062: /** Add a single term at the next position in the phrase.
063: * @see PhraseQuery#add(Term)
064: */
065: public void add(Term term) {
066: add(new Term[] { term });
067: }
068:
069: /** Add multiple terms at the next position in the phrase. Any of the terms
070: * may match.
071: *
072: * @see PhraseQuery#add(Term)
073: */
074: public void add(Term[] terms) {
075: int position = 0;
076: if (positions.size() > 0)
077: position = ((Integer) positions.lastElement()).intValue() + 1;
078:
079: add(terms, position);
080: }
081:
082: /**
083: * Allows to specify the relative position of terms within the phrase.
084: *
085: * @see PhraseQuery#add(Term, int)
086: * @param terms
087: * @param position
088: */
089: public void add(Term[] terms, int position) {
090: if (termArrays.size() == 0)
091: field = terms[0].field();
092:
093: for (int i = 0; i < terms.length; i++) {
094: if (terms[i].field() != field) {
095: throw new IllegalArgumentException(
096: "All phrase terms must be in the same field ("
097: + field + "): " + terms[i]);
098: }
099: }
100:
101: termArrays.add(terms);
102: positions.addElement(new Integer(position));
103: }
104:
105: /**
106: * Returns a List<Term[]> of the terms in the multiphrase.
107: * Do not modify the List or its contents.
108: */
109: public List getTermArrays() {
110: return Collections.unmodifiableList(termArrays);
111: }
112:
113: /**
114: * Returns the relative positions of terms in this phrase.
115: */
116: public int[] getPositions() {
117: int[] result = new int[positions.size()];
118: for (int i = 0; i < positions.size(); i++)
119: result[i] = ((Integer) positions.elementAt(i)).intValue();
120: return result;
121: }
122:
123: // inherit javadoc
124: public void extractTerms(Set terms) {
125: for (Iterator iter = termArrays.iterator(); iter.hasNext();) {
126: Term[] arr = (Term[]) iter.next();
127: for (int i = 0; i < arr.length; i++) {
128: terms.add(arr[i]);
129: }
130: }
131: }
132:
133: private class MultiPhraseWeight implements Weight {
134: private Similarity similarity;
135: private float value;
136: private float idf;
137: private float queryNorm;
138: private float queryWeight;
139:
140: public MultiPhraseWeight(Searcher searcher) throws IOException {
141: this .similarity = getSimilarity(searcher);
142:
143: // compute idf
144: Iterator i = termArrays.iterator();
145: while (i.hasNext()) {
146: Term[] terms = (Term[]) i.next();
147: for (int j = 0; j < terms.length; j++) {
148: idf += getSimilarity(searcher).idf(terms[j],
149: searcher);
150: }
151: }
152: }
153:
154: public Query getQuery() {
155: return MultiPhraseQuery.this ;
156: }
157:
158: public float getValue() {
159: return value;
160: }
161:
162: public float sumOfSquaredWeights() {
163: queryWeight = idf * getBoost(); // compute query weight
164: return queryWeight * queryWeight; // square it
165: }
166:
167: public void normalize(float queryNorm) {
168: this .queryNorm = queryNorm;
169: queryWeight *= queryNorm; // normalize query weight
170: value = queryWeight * idf; // idf for document
171: }
172:
173: public Scorer scorer(IndexReader reader) throws IOException {
174: if (termArrays.size() == 0) // optimize zero-term case
175: return null;
176:
177: TermPositions[] tps = new TermPositions[termArrays.size()];
178: for (int i = 0; i < tps.length; i++) {
179: Term[] terms = (Term[]) termArrays.get(i);
180:
181: TermPositions p;
182: if (terms.length > 1)
183: p = new MultipleTermPositions(reader, terms);
184: else
185: p = reader.termPositions(terms[0]);
186:
187: if (p == null)
188: return null;
189:
190: tps[i] = p;
191: }
192:
193: if (slop == 0)
194: return new ExactPhraseScorer(this , tps, getPositions(),
195: similarity, reader.norms(field));
196: else
197: return new SloppyPhraseScorer(this , tps,
198: getPositions(), similarity, slop, reader
199: .norms(field));
200: }
201:
202: public Explanation explain(IndexReader reader, int doc)
203: throws IOException {
204: ComplexExplanation result = new ComplexExplanation();
205: result.setDescription("weight(" + getQuery() + " in " + doc
206: + "), product of:");
207:
208: Explanation idfExpl = new Explanation(idf, "idf("
209: + getQuery() + ")");
210:
211: // explain query weight
212: Explanation queryExpl = new Explanation();
213: queryExpl.setDescription("queryWeight(" + getQuery()
214: + "), product of:");
215:
216: Explanation boostExpl = new Explanation(getBoost(), "boost");
217: if (getBoost() != 1.0f)
218: queryExpl.addDetail(boostExpl);
219:
220: queryExpl.addDetail(idfExpl);
221:
222: Explanation queryNormExpl = new Explanation(queryNorm,
223: "queryNorm");
224: queryExpl.addDetail(queryNormExpl);
225:
226: queryExpl.setValue(boostExpl.getValue()
227: * idfExpl.getValue() * queryNormExpl.getValue());
228:
229: result.addDetail(queryExpl);
230:
231: // explain field weight
232: ComplexExplanation fieldExpl = new ComplexExplanation();
233: fieldExpl.setDescription("fieldWeight(" + getQuery()
234: + " in " + doc + "), product of:");
235:
236: Explanation tfExpl = scorer(reader).explain(doc);
237: fieldExpl.addDetail(tfExpl);
238: fieldExpl.addDetail(idfExpl);
239:
240: Explanation fieldNormExpl = new Explanation();
241: byte[] fieldNorms = reader.norms(field);
242: float fieldNorm = fieldNorms != null ? Similarity
243: .decodeNorm(fieldNorms[doc]) : 0.0f;
244: fieldNormExpl.setValue(fieldNorm);
245: fieldNormExpl.setDescription("fieldNorm(field=" + field
246: + ", doc=" + doc + ")");
247: fieldExpl.addDetail(fieldNormExpl);
248:
249: fieldExpl.setMatch(Boolean.valueOf(tfExpl.isMatch()));
250: fieldExpl.setValue(tfExpl.getValue() * idfExpl.getValue()
251: * fieldNormExpl.getValue());
252:
253: result.addDetail(fieldExpl);
254: result.setMatch(fieldExpl.getMatch());
255:
256: // combine them
257: result
258: .setValue(queryExpl.getValue()
259: * fieldExpl.getValue());
260:
261: if (queryExpl.getValue() == 1.0f)
262: return fieldExpl;
263:
264: return result;
265: }
266: }
267:
268: public Query rewrite(IndexReader reader) {
269: if (termArrays.size() == 1) { // optimize one-term case
270: Term[] terms = (Term[]) termArrays.get(0);
271: BooleanQuery boq = new BooleanQuery(true);
272: for (int i = 0; i < terms.length; i++) {
273: boq.add(new TermQuery(terms[i]),
274: BooleanClause.Occur.SHOULD);
275: }
276: boq.setBoost(getBoost());
277: return boq;
278: } else {
279: return this ;
280: }
281: }
282:
283: protected Weight createWeight(Searcher searcher) throws IOException {
284: return new MultiPhraseWeight(searcher);
285: }
286:
287: /** Prints a user-readable version of this query. */
288: public final String toString(String f) {
289: StringBuffer buffer = new StringBuffer();
290: if (!field.equals(f)) {
291: buffer.append(field);
292: buffer.append(":");
293: }
294:
295: buffer.append("\"");
296: Iterator i = termArrays.iterator();
297: while (i.hasNext()) {
298: Term[] terms = (Term[]) i.next();
299: if (terms.length > 1) {
300: buffer.append("(");
301: for (int j = 0; j < terms.length; j++) {
302: buffer.append(terms[j].text());
303: if (j < terms.length - 1)
304: buffer.append(" ");
305: }
306: buffer.append(")");
307: } else {
308: buffer.append(terms[0].text());
309: }
310: if (i.hasNext())
311: buffer.append(" ");
312: }
313: buffer.append("\"");
314:
315: if (slop != 0) {
316: buffer.append("~");
317: buffer.append(slop);
318: }
319:
320: buffer.append(ToStringUtils.boost(getBoost()));
321:
322: return buffer.toString();
323: }
324:
325: /** Returns true if <code>o</code> is equal to this. */
326: public boolean equals(Object o) {
327: if (!(o instanceof MultiPhraseQuery))
328: return false;
329: MultiPhraseQuery other = (MultiPhraseQuery) o;
330: return this .getBoost() == other.getBoost()
331: && this .slop == other.slop
332: && this .termArrays.equals(other.termArrays)
333: && this .positions.equals(other.positions);
334: }
335:
336: /** Returns a hash code value for this object.*/
337: public int hashCode() {
338: return Float.floatToIntBits(getBoost()) ^ slop
339: ^ termArrays.hashCode() ^ positions.hashCode()
340: ^ 0x4AC65113;
341: }
342: }
|