001: /*
002: * Copyright 2004-2006 the original author or authors.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.lucene.queryParser;
018:
019: import java.io.IOException;
020: import java.io.StringReader;
021: import java.util.ArrayList;
022: import java.util.List;
023: import java.util.Vector;
024:
025: import org.apache.lucene.analysis.Analyzer;
026: import org.apache.lucene.analysis.KeywordAnalyzer;
027: import org.apache.lucene.analysis.TokenStream;
028: import org.apache.lucene.index.Term;
029: import org.apache.lucene.search.BooleanClause;
030: import org.apache.lucene.search.BooleanQuery;
031: import org.apache.lucene.search.ConstantScoreRangeQuery;
032: import org.apache.lucene.search.MultiPhraseQuery;
033: import org.apache.lucene.search.PhraseQuery;
034: import org.apache.lucene.search.Query;
035: import org.apache.lucene.search.TermQuery;
036: import org.compass.core.Property;
037: import org.compass.core.engine.SearchEngineFactory;
038: import org.compass.core.lucene.engine.LuceneSearchEngineFactory;
039: import org.compass.core.lucene.engine.all.AllBoostingTermQuery;
040: import org.compass.core.lucene.engine.queryparser.QueryParserUtils;
041: import org.compass.core.lucene.search.ConstantScorePrefixQuery;
042: import org.compass.core.mapping.CompassMapping;
043: import org.compass.core.mapping.ResourcePropertyLookup;
044:
045: /**
046: * Extends Lucene {@link org.apache.lucene.queryParser.QueryParser} and overrides {@link #getRangeQuery(String,String,String,boolean)}
047: * since lucene performs data parsing which is a performance killer. Anyhow, handling dates in Compass
048: * is different and simpler than Lucene.
049: *
050: * @author kimchy
051: */
052: public class CompassQueryParser extends QueryParser {
053:
054: private static final KeywordAnalyzer KEYWORD_ANALYZER = new KeywordAnalyzer();
055:
056: protected final CompassMapping mapping;
057:
058: protected final LuceneSearchEngineFactory searchEngineFactory;
059:
060: private boolean allowConstantScorePrefixQuery;
061:
062: private boolean addAliasQueryWithDotPath = true;
063:
064: private boolean forceAnalyzer;
065:
066: protected boolean suggestedQuery = false;
067:
068: public CompassQueryParser(String f, Analyzer a,
069: CompassMapping mapping,
070: SearchEngineFactory searchEngineFactory,
071: boolean forceAnalyzer) {
072: super (f, a);
073: this .mapping = mapping;
074: this .searchEngineFactory = (LuceneSearchEngineFactory) searchEngineFactory;
075: this .forceAnalyzer = forceAnalyzer;
076: }
077:
078: public void setAllowConstantScorePrefixQuery(
079: boolean allowConstantScorePrefixQuery) {
080: this .allowConstantScorePrefixQuery = allowConstantScorePrefixQuery;
081: }
082:
083: public void setAddAliasQueryWithDotPath(
084: boolean addAliasQueryWithDotPath) {
085: this .addAliasQueryWithDotPath = addAliasQueryWithDotPath;
086: }
087:
088: protected Query getWildcardQuery(String field, String termStr)
089: throws ParseException {
090: ResourcePropertyLookup lookup = null;
091: if (field != null) {
092: lookup = mapping.getResourcePropertyLookup(field);
093: lookup.setConvertOnlyWithDotPath(false);
094: field = lookup.getPath();
095: }
096: return QueryParserUtils.andAliasQueryIfNeeded(super
097: .getWildcardQuery(field, termStr), lookup,
098: addAliasQueryWithDotPath, searchEngineFactory);
099: }
100:
101: protected Query getFuzzyQuery(String field, String termStr,
102: float minSimilarity) throws ParseException {
103: ResourcePropertyLookup lookup = null;
104: if (field != null) {
105: lookup = mapping.getResourcePropertyLookup(field);
106: lookup.setConvertOnlyWithDotPath(false);
107: field = lookup.getPath();
108: }
109: return QueryParserUtils.andAliasQueryIfNeeded(super
110: .getFuzzyQuery(field, termStr, minSimilarity), lookup,
111: addAliasQueryWithDotPath, searchEngineFactory);
112: }
113:
114: protected Query getFieldQuery(String field, String queryText)
115: throws ParseException {
116: if (field == null) {
117: return super .getFieldQuery(field, queryText);
118: }
119: ResourcePropertyLookup lookup = mapping
120: .getResourcePropertyLookup(field);
121: lookup.setConvertOnlyWithDotPath(false);
122: if (lookup.hasSpecificConverter()) {
123: queryText = lookup.normalizeString(queryText);
124:
125: }
126: Analyzer origAnalyzer = analyzer;
127: if (!forceAnalyzer) {
128: String analyzerName = lookup.getAnalyzer();
129: if (analyzerName != null) {
130: analyzer = searchEngineFactory.getAnalyzerManager()
131: .getAnalyzerMustExist(analyzerName);
132: } else {
133: if (lookup.getResourcePropertyMapping() != null
134: && lookup.getResourcePropertyMapping()
135: .getIndex() == Property.Index.UN_TOKENIZED) {
136: analyzer = KEYWORD_ANALYZER;
137: }
138: }
139: }
140: try {
141: return QueryParserUtils.andAliasQueryIfNeeded(
142: getInternalFieldQuery(lookup.getPath(), queryText),
143: lookup, addAliasQueryWithDotPath,
144: searchEngineFactory);
145: } finally {
146: if (origAnalyzer != null) {
147: analyzer = origAnalyzer;
148: }
149: }
150: }
151:
152: /**
153: * Override it so we won't use the date format to try and parse dates
154: */
155: protected Query getRangeQuery(String field, String part1,
156: String part2, boolean inclusive) throws ParseException {
157: if (getLowercaseExpandedTerms()) {
158: part1 = part1.toLowerCase();
159: part2 = part2.toLowerCase();
160: }
161:
162: ResourcePropertyLookup lookup = mapping
163: .getResourcePropertyLookup(field);
164: lookup.setConvertOnlyWithDotPath(false);
165: if (lookup.hasSpecificConverter()) {
166: if ("*".equals(part1)) {
167: part1 = null;
168: } else {
169: part1 = lookup.normalizeString(part1);
170: }
171: if ("*".equals(part2)) {
172: part2 = null;
173: } else {
174: part2 = lookup.normalizeString(part2);
175: }
176: } else {
177: if ("*".equals(part1)) {
178: part1 = null;
179: }
180: if ("*".equals(part2)) {
181: part2 = null;
182: }
183: }
184:
185: return QueryParserUtils.andAliasQueryIfNeeded(
186: new ConstantScoreRangeQuery(lookup.getPath(), part1,
187: part2, inclusive, inclusive), lookup,
188: addAliasQueryWithDotPath, searchEngineFactory);
189: }
190:
191: protected Query getPrefixQuery(String field, String termStr)
192: throws ParseException {
193: ResourcePropertyLookup lookup = mapping
194: .getResourcePropertyLookup(field);
195: lookup.setConvertOnlyWithDotPath(false);
196:
197: if (!allowConstantScorePrefixQuery) {
198: return super .getPrefixQuery(lookup.getPath(), termStr);
199: }
200:
201: if (getLowercaseExpandedTerms()) {
202: termStr = termStr.toLowerCase();
203: }
204:
205: Term t = new Term(lookup.getPath(), termStr);
206: return QueryParserUtils.andAliasQueryIfNeeded(
207: new ConstantScorePrefixQuery(t), lookup,
208: addAliasQueryWithDotPath, searchEngineFactory);
209: }
210:
211: /**
212: * @throws ParseException throw in overridden method to disallow
213: */
214: // MONITOR AGAINST LUCENE
215: // Changed: Added boostAll flag
216: // Extracted the creation of Terms to allow for overrides
217: protected Query getInternalFieldQuery(String field, String queryText)
218: throws ParseException {
219: boolean boostAll = false;
220: if (searchEngineFactory.getLuceneSettings()
221: .isAllPropertyBoostSupport()
222: && field.equals(searchEngineFactory.getLuceneSettings()
223: .getAllProperty())) {
224: boostAll = true;
225: }
226: // Use the analyzer to get all the tokens, and then build a TermQuery,
227: // PhraseQuery, or nothing based on the term count
228:
229: TokenStream source = analyzer.tokenStream(field,
230: new StringReader(queryText));
231: Vector v = new Vector();
232: org.apache.lucene.analysis.Token t;
233: int positionCount = 0;
234: boolean severalTokensAtSamePosition = false;
235:
236: while (true) {
237: try {
238: t = source.next();
239: } catch (IOException e) {
240: t = null;
241: }
242: if (t == null)
243: break;
244: v.addElement(t);
245: if (t.getPositionIncrement() != 0)
246: positionCount += t.getPositionIncrement();
247: else
248: severalTokensAtSamePosition = true;
249: }
250: try {
251: source.close();
252: } catch (IOException e) {
253: // ignore
254: }
255:
256: if (v.size() == 0)
257: return null;
258: else if (v.size() == 1) {
259: t = (org.apache.lucene.analysis.Token) v.elementAt(0);
260: if (boostAll) {
261: return new AllBoostingTermQuery(getTerm(field, t
262: .termText()));
263: } else {
264: return new TermQuery(getTerm(field, t.termText()));
265: }
266: } else {
267: if (severalTokensAtSamePosition) {
268: if (positionCount == 1) {
269: // no phrase query:
270: BooleanQuery q = new BooleanQuery(true);
271: for (int i = 0; i < v.size(); i++) {
272: t = (org.apache.lucene.analysis.Token) v
273: .elementAt(i);
274: if (boostAll) {
275: AllBoostingTermQuery currentQuery = new AllBoostingTermQuery(
276: getTerm(field, t.termText()));
277: q.add(currentQuery,
278: BooleanClause.Occur.SHOULD);
279: } else {
280: TermQuery currentQuery = new TermQuery(
281: getTerm(field, t.termText()));
282: q.add(currentQuery,
283: BooleanClause.Occur.SHOULD);
284: }
285: }
286: return q;
287: } else {
288: // phrase query:
289: MultiPhraseQuery mpq = new MultiPhraseQuery();
290: mpq.setSlop(phraseSlop);
291: List multiTerms = new ArrayList();
292: int position = -1;
293: for (int i = 0; i < v.size(); i++) {
294: t = (org.apache.lucene.analysis.Token) v
295: .elementAt(i);
296: if (t.getPositionIncrement() > 0
297: && multiTerms.size() > 0) {
298: if (enablePositionIncrements) {
299: mpq
300: .add((Term[]) multiTerms
301: .toArray(new Term[0]),
302: position);
303: } else {
304: mpq.add((Term[]) multiTerms
305: .toArray(new Term[0]));
306: }
307: multiTerms.clear();
308: }
309: position += t.getPositionIncrement();
310: multiTerms.add(getTerm(field, t.termText()));
311: }
312: if (enablePositionIncrements) {
313: mpq.add((Term[]) multiTerms
314: .toArray(new Term[0]), position);
315: } else {
316: mpq.add((Term[]) multiTerms
317: .toArray(new Term[0]));
318: }
319: return mpq;
320: }
321: } else {
322: PhraseQuery pq = new PhraseQuery();
323: pq.setSlop(phraseSlop);
324: int position = -1;
325: for (int i = 0; i < v.size(); i++) {
326: t = (org.apache.lucene.analysis.Token) v
327: .elementAt(i);
328: if (enablePositionIncrements) {
329: position += t.getPositionIncrement();
330: pq.add(getTerm(field, t.termText()), position);
331: } else {
332: pq.add(getTerm(field, t.termText()));
333: }
334: }
335: return pq;
336: }
337: }
338: }
339:
340: protected Term getTerm(String field, String text)
341: throws ParseException {
342: return new Term(field, text);
343: }
344:
345: public void close() {
346:
347: }
348:
349: public boolean isSuggestedQuery() {
350: return suggestedQuery;
351: }
352: }
|