001: /*
002: * Created on 25-Jan-2006
003: */
004: package org.apache.lucene.search.similar;
005:
006: /**
007: * Licensed to the Apache Software Foundation (ASF) under one or more
008: * contributor license agreements. See the NOTICE file distributed with
009: * this work for additional information regarding copyright ownership.
010: * The ASF licenses this file to You under the Apache License, Version 2.0
011: * (the "License"); you may not use this file except in compliance with
012: * the License. You may obtain a copy of the License at
013: *
014: * http://www.apache.org/licenses/LICENSE-2.0
015: *
016: * Unless required by applicable law or agreed to in writing, software
017: * distributed under the License is distributed on an "AS IS" BASIS,
018: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
019: * See the License for the specific language governing permissions and
020: * limitations under the License.
021: */
022:
023: import java.io.ByteArrayInputStream;
024: import java.io.IOException;
025: import java.util.Set;
026:
027: import org.apache.lucene.analysis.Analyzer;
028: import org.apache.lucene.index.IndexReader;
029: import org.apache.lucene.search.BooleanClause;
030: import org.apache.lucene.search.BooleanQuery;
031: import org.apache.lucene.search.Query;
032: import org.apache.lucene.search.similar.MoreLikeThis;
033:
034: /**
035: * A simple wrapper for MoreLikeThis for use in scenarios where a Query object is required eg
036: * in custom QueryParser extensions. At query.rewrite() time the reader is used to construct the
037: * actual MoreLikeThis object and obtain the real Query object.
038: * @author maharwood
039: */
040: public class MoreLikeThisQuery extends Query {
041:
042: private String likeText;
043: private String[] moreLikeFields;
044: private Analyzer analyzer;
045: float percentTermsToMatch = 0.3f;
046: int minTermFrequency = 1;
047: int maxQueryTerms = 5;
048: Set stopWords = null;
049: int minDocFreq = -1;
050:
051: /**
052: * @param moreLikeFields
053: */
054: public MoreLikeThisQuery(String likeText, String[] moreLikeFields,
055: Analyzer analyzer) {
056: this .likeText = likeText;
057: this .moreLikeFields = moreLikeFields;
058: this .analyzer = analyzer;
059: }
060:
061: public Query rewrite(IndexReader reader) throws IOException {
062: MoreLikeThis mlt = new MoreLikeThis(reader);
063:
064: mlt.setFieldNames(moreLikeFields);
065: mlt.setAnalyzer(analyzer);
066: mlt.setMinTermFreq(minTermFrequency);
067: if (minDocFreq >= 0) {
068: mlt.setMinDocFreq(minDocFreq);
069: }
070: mlt.setMaxQueryTerms(maxQueryTerms);
071: mlt.setStopWords(stopWords);
072: BooleanQuery bq = (BooleanQuery) mlt
073: .like(new ByteArrayInputStream(likeText.getBytes()));
074: BooleanClause[] clauses = bq.getClauses();
075: //make at least half the terms match
076: bq
077: .setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch));
078: return bq;
079: }
080:
081: /* (non-Javadoc)
082: * @see org.apache.lucene.search.Query#toString(java.lang.String)
083: */
084: public String toString(String field) {
085: return "like:" + likeText;
086: }
087:
088: public float getPercentTermsToMatch() {
089: return percentTermsToMatch;
090: }
091:
092: public void setPercentTermsToMatch(float percentTermsToMatch) {
093: this .percentTermsToMatch = percentTermsToMatch;
094: }
095:
096: public Analyzer getAnalyzer() {
097: return analyzer;
098: }
099:
100: public void setAnalyzer(Analyzer analyzer) {
101: this .analyzer = analyzer;
102: }
103:
104: public String getLikeText() {
105: return likeText;
106: }
107:
108: public void setLikeText(String likeText) {
109: this .likeText = likeText;
110: }
111:
112: public int getMaxQueryTerms() {
113: return maxQueryTerms;
114: }
115:
116: public void setMaxQueryTerms(int maxQueryTerms) {
117: this .maxQueryTerms = maxQueryTerms;
118: }
119:
120: public int getMinTermFrequency() {
121: return minTermFrequency;
122: }
123:
124: public void setMinTermFrequency(int minTermFrequency) {
125: this .minTermFrequency = minTermFrequency;
126: }
127:
128: public String[] getMoreLikeFields() {
129: return moreLikeFields;
130: }
131:
132: public void setMoreLikeFields(String[] moreLikeFields) {
133: this .moreLikeFields = moreLikeFields;
134: }
135:
136: public Set getStopWords() {
137: return stopWords;
138: }
139:
140: public void setStopWords(Set stopWords) {
141: this .stopWords = stopWords;
142: }
143:
144: public int getMinDocFreq() {
145: return minDocFreq;
146: }
147:
148: public void setMinDocFreq(int minDocFreq) {
149: this.minDocFreq = minDocFreq;
150: }
151: }
|