001: /*
002: * Copyright 2004-2006 the original author or authors.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.lucene.queryParser;
018:
019: import java.io.IOException;
020: import java.io.StringReader;
021: import java.util.ArrayList;
022: import java.util.List;
023: import java.util.Vector;
024:
025: import org.apache.lucene.analysis.Analyzer;
026: import org.apache.lucene.analysis.KeywordAnalyzer;
027: import org.apache.lucene.analysis.TokenStream;
028: import org.apache.lucene.index.Term;
029: import org.apache.lucene.search.BooleanClause;
030: import org.apache.lucene.search.BooleanQuery;
031: import org.apache.lucene.search.ConstantScoreRangeQuery;
032: import org.apache.lucene.search.MultiPhraseQuery;
033: import org.apache.lucene.search.PhraseQuery;
034: import org.apache.lucene.search.Query;
035: import org.apache.lucene.search.TermQuery;
036: import org.compass.core.Property;
037: import org.compass.core.engine.SearchEngineFactory;
038: import org.compass.core.lucene.engine.LuceneSearchEngineFactory;
039: import org.compass.core.lucene.engine.all.AllBoostingTermQuery;
040: import org.compass.core.lucene.engine.queryparser.QueryParserUtils;
041: import org.compass.core.lucene.search.ConstantScorePrefixQuery;
042: import org.compass.core.mapping.CompassMapping;
043: import org.compass.core.mapping.ResourcePropertyLookup;
044:
045: /**
046: * Extends Lucene {@link org.apache.lucene.queryParser.MultiFieldQueryParser} and overrides {@link #getRangeQuery(String,String,String,boolean)}
047: * since lucene performs data parsing which is a performance killer. Anyhow, handling dates in Compass
048: * is different and simpler than Lucene.
049: *
050: * @author kimchy
051: */
052: public class CompassMultiFieldQueryParser extends MultiFieldQueryParser {
053:
054: private static final KeywordAnalyzer KEYWORD_ANALYZER = new KeywordAnalyzer();
055:
056: protected final LuceneSearchEngineFactory searchEngineFactory;
057:
058: protected final CompassMapping mapping;
059:
060: private boolean allowConstantScorePrefixQuery;
061:
062: private boolean addAliasQueryWithDotPath = true;
063:
064: private boolean forceAnalyzer;
065:
066: protected boolean suggestedQuery = false;
067:
068: public CompassMultiFieldQueryParser(String[] fields,
069: Analyzer analyzer, CompassMapping mapping,
070: SearchEngineFactory searchEngineFactory,
071: boolean forceAnalyzer) {
072: super (fields, analyzer);
073: this .mapping = mapping;
074: this .searchEngineFactory = (LuceneSearchEngineFactory) searchEngineFactory;
075: this .forceAnalyzer = forceAnalyzer;
076: }
077:
078: public void setAllowConstantScorePrefixQuery(
079: boolean allowConstantScorePrefixQuery) {
080: this .allowConstantScorePrefixQuery = allowConstantScorePrefixQuery;
081: }
082:
083: public void setAddAliasQueryWithDotPath(
084: boolean addAliasQueryWithDotPath) {
085: this .addAliasQueryWithDotPath = addAliasQueryWithDotPath;
086: }
087:
088: protected Query getWildcardQuery(String field, String termStr)
089: throws ParseException {
090: ResourcePropertyLookup lookup = null;
091: if (field != null) {
092: lookup = mapping.getResourcePropertyLookup(field);
093: lookup.setConvertOnlyWithDotPath(false);
094: field = lookup.getPath();
095: }
096: return QueryParserUtils.andAliasQueryIfNeeded(super
097: .getWildcardQuery(field, termStr), lookup,
098: addAliasQueryWithDotPath, searchEngineFactory);
099: }
100:
101: protected Query getFuzzyQuery(String field, String termStr,
102: float minSimilarity) throws ParseException {
103: ResourcePropertyLookup lookup = null;
104: if (field != null) {
105: lookup = mapping.getResourcePropertyLookup(field);
106: lookup.setConvertOnlyWithDotPath(false);
107: field = lookup.getPath();
108: }
109: return QueryParserUtils.andAliasQueryIfNeeded(super
110: .getFuzzyQuery(field, termStr, minSimilarity), lookup,
111: addAliasQueryWithDotPath, searchEngineFactory);
112: }
113:
114: protected Query getFieldQuery(String field, String queryText)
115: throws ParseException {
116: if (field == null) {
117: return super .getFieldQuery(field, queryText);
118: }
119: ResourcePropertyLookup lookup = mapping
120: .getResourcePropertyLookup(field);
121: lookup.setConvertOnlyWithDotPath(false);
122: if (lookup.hasSpecificConverter()) {
123: queryText = lookup.normalizeString(queryText);
124: }
125: Analyzer origAnalyzer = analyzer;
126: if (!forceAnalyzer) {
127: String analyzerName = lookup.getAnalyzer();
128: if (analyzerName != null) {
129: analyzer = searchEngineFactory.getAnalyzerManager()
130: .getAnalyzerMustExist(analyzerName);
131: } else {
132: if (lookup.getResourcePropertyMapping() != null
133: && lookup.getResourcePropertyMapping()
134: .getIndex() == Property.Index.UN_TOKENIZED) {
135: analyzer = KEYWORD_ANALYZER;
136: }
137: }
138: }
139: try {
140: return QueryParserUtils.andAliasQueryIfNeeded(
141: getInternalFieldQuery(lookup.getPath(), queryText),
142: lookup, addAliasQueryWithDotPath,
143: searchEngineFactory);
144: } finally {
145: if (origAnalyzer != null) {
146: analyzer = origAnalyzer;
147: }
148: }
149: }
150:
151: /**
152: * Override it so we won't use the date format to try and parse dates
153: */
154: protected Query getRangeQuery(String field, String part1,
155: String part2, boolean inclusive) throws ParseException {
156: if (field == null) {
157: Vector clauses = new Vector();
158: for (int i = 0; i < fields.length; i++) {
159: clauses.add(new BooleanClause(getRangeQuery(fields[i],
160: part1, part2, inclusive),
161: BooleanClause.Occur.SHOULD));
162: }
163: return getBooleanQuery(clauses, true);
164: }
165:
166: if (getLowercaseExpandedTerms()) {
167: part1 = part1.toLowerCase();
168: part2 = part2.toLowerCase();
169: }
170:
171: ResourcePropertyLookup lookup = mapping
172: .getResourcePropertyLookup(field);
173: lookup.setConvertOnlyWithDotPath(false);
174: if (lookup.hasSpecificConverter()) {
175: if ("*".equals(part1)) {
176: part1 = null;
177: } else {
178: part1 = lookup.normalizeString(part1);
179: }
180: if ("*".equals(part2)) {
181: part2 = null;
182: } else {
183: part2 = lookup.normalizeString(part2);
184: }
185: } else {
186: if ("*".equals(part1)) {
187: part1 = null;
188: }
189: if ("*".equals(part2)) {
190: part2 = null;
191: }
192: }
193:
194: return QueryParserUtils.andAliasQueryIfNeeded(
195: new ConstantScoreRangeQuery(lookup.getPath(), part1,
196: part2, inclusive, inclusive), lookup,
197: addAliasQueryWithDotPath, searchEngineFactory);
198: }
199:
200: protected Query getPrefixQuery(String field, String termStr)
201: throws ParseException {
202: ResourcePropertyLookup lookup = mapping
203: .getResourcePropertyLookup(field);
204: lookup.setConvertOnlyWithDotPath(false);
205:
206: if (!allowConstantScorePrefixQuery) {
207: return super .getPrefixQuery(lookup.getPath(), termStr);
208: }
209:
210: if (field == null) {
211: Vector clauses = new Vector();
212: for (int i = 0; i < fields.length; i++) {
213: clauses.add(new BooleanClause(getPrefixQuery(fields[i],
214: termStr), BooleanClause.Occur.SHOULD));
215: }
216: return getBooleanQuery(clauses, true);
217: }
218:
219: if (getLowercaseExpandedTerms()) {
220: termStr = termStr.toLowerCase();
221: }
222:
223: Term t = new Term(lookup.getPath(), termStr);
224: return QueryParserUtils.andAliasQueryIfNeeded(
225: new ConstantScorePrefixQuery(t), lookup,
226: addAliasQueryWithDotPath, searchEngineFactory);
227: }
228:
229: /**
230: * @throws ParseException throw in overridden method to disallow
231: */
232: // MONITOR AGAINST LUCENE
233: // Changed: Added boostAll flag
234: // Extracted the creation of Terms to allow for overrides
235: protected Query getInternalFieldQuery(String field, String queryText)
236: throws ParseException {
237: boolean boostAll = false;
238: if (searchEngineFactory.getLuceneSettings()
239: .isAllPropertyBoostSupport()
240: && field.equals(searchEngineFactory.getLuceneSettings()
241: .getAllProperty())) {
242: boostAll = true;
243: }
244: // Use the analyzer to get all the tokens, and then build a TermQuery,
245: // PhraseQuery, or nothing based on the term count
246:
247: TokenStream source = analyzer.tokenStream(field,
248: new StringReader(queryText));
249: Vector v = new Vector();
250: org.apache.lucene.analysis.Token t;
251: int positionCount = 0;
252: boolean severalTokensAtSamePosition = false;
253:
254: while (true) {
255: try {
256: t = source.next();
257: } catch (IOException e) {
258: t = null;
259: }
260: if (t == null)
261: break;
262: v.addElement(t);
263: if (t.getPositionIncrement() != 0)
264: positionCount += t.getPositionIncrement();
265: else
266: severalTokensAtSamePosition = true;
267: }
268: try {
269: source.close();
270: } catch (IOException e) {
271: // ignore
272: }
273:
274: if (v.size() == 0)
275: return null;
276: else if (v.size() == 1) {
277: t = (org.apache.lucene.analysis.Token) v.elementAt(0);
278: if (boostAll) {
279: return new AllBoostingTermQuery(getTerm(field, t
280: .termText()));
281: } else {
282: return new TermQuery(getTerm(field, t.termText()));
283: }
284: } else {
285: if (severalTokensAtSamePosition) {
286: if (positionCount == 1) {
287: // no phrase query:
288: BooleanQuery q = new BooleanQuery(true);
289: for (int i = 0; i < v.size(); i++) {
290: t = (org.apache.lucene.analysis.Token) v
291: .elementAt(i);
292: if (boostAll) {
293: AllBoostingTermQuery currentQuery = new AllBoostingTermQuery(
294: getTerm(field, t.termText()));
295: q.add(currentQuery,
296: BooleanClause.Occur.SHOULD);
297: } else {
298: TermQuery currentQuery = new TermQuery(
299: getTerm(field, t.termText()));
300: q.add(currentQuery,
301: BooleanClause.Occur.SHOULD);
302: }
303: }
304: return q;
305: } else {
306: // phrase query:
307: MultiPhraseQuery mpq = new MultiPhraseQuery();
308: mpq.setSlop(phraseSlop);
309: List multiTerms = new ArrayList();
310: int position = -1;
311: for (int i = 0; i < v.size(); i++) {
312: t = (org.apache.lucene.analysis.Token) v
313: .elementAt(i);
314: if (t.getPositionIncrement() > 0
315: && multiTerms.size() > 0) {
316: if (enablePositionIncrements) {
317: mpq
318: .add((Term[]) multiTerms
319: .toArray(new Term[0]),
320: position);
321: } else {
322: mpq.add((Term[]) multiTerms
323: .toArray(new Term[0]));
324: }
325: multiTerms.clear();
326: }
327: position += t.getPositionIncrement();
328: multiTerms.add(getTerm(field, t.termText()));
329: }
330: if (enablePositionIncrements) {
331: mpq.add((Term[]) multiTerms
332: .toArray(new Term[0]), position);
333: } else {
334: mpq.add((Term[]) multiTerms
335: .toArray(new Term[0]));
336: }
337: return mpq;
338: }
339: } else {
340: PhraseQuery pq = new PhraseQuery();
341: pq.setSlop(phraseSlop);
342: int position = -1;
343: for (int i = 0; i < v.size(); i++) {
344: t = (org.apache.lucene.analysis.Token) v
345: .elementAt(i);
346: if (enablePositionIncrements) {
347: position += t.getPositionIncrement();
348: pq.add(getTerm(field, t.termText()), position);
349: } else {
350: pq.add(getTerm(field, t.termText()));
351: }
352: }
353: return pq;
354: }
355: }
356: }
357:
358: public void close() {
359:
360: }
361:
362: protected Term getTerm(String field, String text)
363: throws ParseException {
364: return new Term(field, text);
365: }
366:
367: public boolean isSuggestedQuery() {
368: return suggestedQuery;
369: }
370: }
|