001: package org.apache.lucene.search;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021: import java.util.Set;
022: import java.util.Vector;
023:
024: import org.apache.lucene.index.Term;
025: import org.apache.lucene.index.TermPositions;
026: import org.apache.lucene.index.IndexReader;
027: import org.apache.lucene.util.ToStringUtils;
028:
029: /** A Query that matches documents containing a particular sequence of terms.
030: * A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
031: *
032: * <p>This query may be combined with other terms or queries with a {@link BooleanQuery}.
033: */
034: public class PhraseQuery extends Query {
035: private String field;
036: private Vector terms = new Vector();
037: private Vector positions = new Vector();
038: private int slop = 0;
039:
040: /** Constructs an empty phrase query. */
041: public PhraseQuery() {
042: }
043:
044: /** Sets the number of other words permitted between words in query phrase.
045: If zero, then this is an exact phrase search. For larger values this works
046: like a <code>WITHIN</code> or <code>NEAR</code> operator.
047:
048: <p>The slop is in fact an edit-distance, where the units correspond to
049: moves of terms in the query phrase out of position. For example, to switch
050: the order of two words requires two moves (the first move places the words
051: atop one another), so to permit re-orderings of phrases, the slop must be
052: at least two.
053:
054: <p>More exact matches are scored higher than sloppier matches, thus search
055: results are sorted by exactness.
056:
057: <p>The slop is zero by default, requiring exact matches.*/
058: public void setSlop(int s) {
059: slop = s;
060: }
061:
062: /** Returns the slop. See setSlop(). */
063: public int getSlop() {
064: return slop;
065: }
066:
067: /**
068: * Adds a term to the end of the query phrase.
069: * The relative position of the term is the one immediately after the last term added.
070: */
071: public void add(Term term) {
072: int position = 0;
073: if (positions.size() > 0)
074: position = ((Integer) positions.lastElement()).intValue() + 1;
075:
076: add(term, position);
077: }
078:
079: /**
080: * Adds a term to the end of the query phrase.
081: * The relative position of the term within the phrase is specified explicitly.
082: * This allows e.g. phrases with more than one term at the same position
083: * or phrases with gaps (e.g. in connection with stopwords).
084: *
085: * @param term
086: * @param position
087: */
088: public void add(Term term, int position) {
089: if (terms.size() == 0)
090: field = term.field();
091: else if (term.field() != field)
092: throw new IllegalArgumentException(
093: "All phrase terms must be in the same field: "
094: + term);
095:
096: terms.addElement(term);
097: positions.addElement(new Integer(position));
098: }
099:
100: /** Returns the set of terms in this phrase. */
101: public Term[] getTerms() {
102: return (Term[]) terms.toArray(new Term[0]);
103: }
104:
105: /**
106: * Returns the relative positions of terms in this phrase.
107: */
108: public int[] getPositions() {
109: int[] result = new int[positions.size()];
110: for (int i = 0; i < positions.size(); i++)
111: result[i] = ((Integer) positions.elementAt(i)).intValue();
112: return result;
113: }
114:
115: private class PhraseWeight implements Weight {
116: private Similarity similarity;
117: private float value;
118: private float idf;
119: private float queryNorm;
120: private float queryWeight;
121:
122: public PhraseWeight(Searcher searcher) throws IOException {
123: this .similarity = getSimilarity(searcher);
124:
125: idf = similarity.idf(terms, searcher);
126: }
127:
128: public String toString() {
129: return "weight(" + PhraseQuery.this + ")";
130: }
131:
132: public Query getQuery() {
133: return PhraseQuery.this ;
134: }
135:
136: public float getValue() {
137: return value;
138: }
139:
140: public float sumOfSquaredWeights() {
141: queryWeight = idf * getBoost(); // compute query weight
142: return queryWeight * queryWeight; // square it
143: }
144:
145: public void normalize(float queryNorm) {
146: this .queryNorm = queryNorm;
147: queryWeight *= queryNorm; // normalize query weight
148: value = queryWeight * idf; // idf for document
149: }
150:
151: public Scorer scorer(IndexReader reader) throws IOException {
152: if (terms.size() == 0) // optimize zero-term case
153: return null;
154:
155: TermPositions[] tps = new TermPositions[terms.size()];
156: for (int i = 0; i < terms.size(); i++) {
157: TermPositions p = reader.termPositions((Term) terms
158: .elementAt(i));
159: if (p == null)
160: return null;
161: tps[i] = p;
162: }
163:
164: if (slop == 0) // optimize exact case
165: return new ExactPhraseScorer(this , tps, getPositions(),
166: similarity, reader.norms(field));
167: else
168: return new SloppyPhraseScorer(this , tps,
169: getPositions(), similarity, slop, reader
170: .norms(field));
171:
172: }
173:
174: public Explanation explain(IndexReader reader, int doc)
175: throws IOException {
176:
177: Explanation result = new Explanation();
178: result.setDescription("weight(" + getQuery() + " in " + doc
179: + "), product of:");
180:
181: StringBuffer docFreqs = new StringBuffer();
182: StringBuffer query = new StringBuffer();
183: query.append('\"');
184: for (int i = 0; i < terms.size(); i++) {
185: if (i != 0) {
186: docFreqs.append(" ");
187: query.append(" ");
188: }
189:
190: Term term = (Term) terms.elementAt(i);
191:
192: docFreqs.append(term.text());
193: docFreqs.append("=");
194: docFreqs.append(reader.docFreq(term));
195:
196: query.append(term.text());
197: }
198: query.append('\"');
199:
200: Explanation idfExpl = new Explanation(idf, "idf(" + field
201: + ": " + docFreqs + ")");
202:
203: // explain query weight
204: Explanation queryExpl = new Explanation();
205: queryExpl.setDescription("queryWeight(" + getQuery()
206: + "), product of:");
207:
208: Explanation boostExpl = new Explanation(getBoost(), "boost");
209: if (getBoost() != 1.0f)
210: queryExpl.addDetail(boostExpl);
211: queryExpl.addDetail(idfExpl);
212:
213: Explanation queryNormExpl = new Explanation(queryNorm,
214: "queryNorm");
215: queryExpl.addDetail(queryNormExpl);
216:
217: queryExpl.setValue(boostExpl.getValue()
218: * idfExpl.getValue() * queryNormExpl.getValue());
219:
220: result.addDetail(queryExpl);
221:
222: // explain field weight
223: Explanation fieldExpl = new Explanation();
224: fieldExpl.setDescription("fieldWeight(" + field + ":"
225: + query + " in " + doc + "), product of:");
226:
227: Explanation tfExpl = scorer(reader).explain(doc);
228: fieldExpl.addDetail(tfExpl);
229: fieldExpl.addDetail(idfExpl);
230:
231: Explanation fieldNormExpl = new Explanation();
232: byte[] fieldNorms = reader.norms(field);
233: float fieldNorm = fieldNorms != null ? Similarity
234: .decodeNorm(fieldNorms[doc]) : 0.0f;
235: fieldNormExpl.setValue(fieldNorm);
236: fieldNormExpl.setDescription("fieldNorm(field=" + field
237: + ", doc=" + doc + ")");
238: fieldExpl.addDetail(fieldNormExpl);
239:
240: fieldExpl.setValue(tfExpl.getValue() * idfExpl.getValue()
241: * fieldNormExpl.getValue());
242:
243: result.addDetail(fieldExpl);
244:
245: // combine them
246: result
247: .setValue(queryExpl.getValue()
248: * fieldExpl.getValue());
249:
250: if (queryExpl.getValue() == 1.0f)
251: return fieldExpl;
252:
253: return result;
254: }
255: }
256:
257: protected Weight createWeight(Searcher searcher) throws IOException {
258: if (terms.size() == 1) { // optimize one-term case
259: Term term = (Term) terms.elementAt(0);
260: Query termQuery = new TermQuery(term);
261: termQuery.setBoost(getBoost());
262: return termQuery.createWeight(searcher);
263: }
264: return new PhraseWeight(searcher);
265: }
266:
267: /**
268: * @see org.apache.lucene.search.Query#extractTerms(java.util.Set)
269: */
270: public void extractTerms(Set queryTerms) {
271: queryTerms.addAll(terms);
272: }
273:
274: /** Prints a user-readable version of this query. */
275: public String toString(String f) {
276: StringBuffer buffer = new StringBuffer();
277: if (!field.equals(f)) {
278: buffer.append(field);
279: buffer.append(":");
280: }
281:
282: buffer.append("\"");
283: for (int i = 0; i < terms.size(); i++) {
284: buffer.append(((Term) terms.elementAt(i)).text());
285: if (i != terms.size() - 1)
286: buffer.append(" ");
287: }
288: buffer.append("\"");
289:
290: if (slop != 0) {
291: buffer.append("~");
292: buffer.append(slop);
293: }
294:
295: buffer.append(ToStringUtils.boost(getBoost()));
296:
297: return buffer.toString();
298: }
299:
300: /** Returns true iff <code>o</code> is equal to this. */
301: public boolean equals(Object o) {
302: if (!(o instanceof PhraseQuery))
303: return false;
304: PhraseQuery other = (PhraseQuery) o;
305: return (this .getBoost() == other.getBoost())
306: && (this .slop == other.slop)
307: && this .terms.equals(other.terms)
308: && this .positions.equals(other.positions);
309: }
310:
311: /** Returns a hash code value for this object.*/
312: public int hashCode() {
313: return Float.floatToIntBits(getBoost()) ^ slop
314: ^ terms.hashCode() ^ positions.hashCode();
315: }
316:
317: }
|