001: /*
002: * $Header$
003: * $Revision: 7067 $
004: * $Date: 2007-07-09 02:45:41 -0700 $
005: *
006: * ====================================================================
007: *
008: * Copyright 1999-2004 The Apache Software Foundation
009: *
010: * Licensed under the Apache License, Version 2.0 (the "License");
011: * you may not use this file except in compliance with the License.
012: * You may obtain a copy of the License at
013: *
014: * http://www.apache.org/licenses/LICENSE-2.0
015: *
016: * Unless required by applicable law or agreed to in writing, software
017: * distributed under the License is distributed on an "AS IS" BASIS,
018: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
019: * See the License for the specific language governing permissions and
020: * limitations under the License.
021: *
022: */
023: package org.apache.slide.index.lucene.expressions;
024:
025: import java.io.IOException;
026: import java.io.StringReader;
027: import java.util.ArrayList;
028:
029: import org.apache.lucene.analysis.Token;
030: import org.apache.lucene.analysis.TokenStream;
031: import org.apache.lucene.index.Term;
032: import org.apache.lucene.queryParser.QueryParser;
033: import org.apache.lucene.search.BooleanClause;
034: import org.apache.lucene.search.BooleanQuery;
035: import org.apache.lucene.search.PhraseQuery;
036: import org.apache.lucene.search.PrefixQuery;
037: import org.apache.lucene.search.Query;
038: import org.apache.lucene.search.TermQuery;
039: import org.apache.lucene.search.WildcardQuery;
040: import org.apache.slide.index.lucene.Index;
041: import org.apache.slide.index.lucene.IndexConfiguration;
042: import org.apache.slide.search.BadQueryException;
043: import org.jdom.Element;
044:
045: /**
046: * Implements the <code>property-contains</code> expression, that works
047: * exacly like <code>contains</code> but on properties.
048: */
049: public class PropertyContainsExpression extends
050: AbstractLuceneExpression {
051:
052: public PropertyContainsExpression(Index index, Element element,
053: boolean negated) throws BadQueryException {
054: super (index);
055:
056: IndexConfiguration config = index.getConfiguration();
057: Element prop = getPropertyElement(element);
058: String field = IndexConfiguration.generateFieldName(prop
059: .getNamespaceURI(), prop.getName());
060: String literal = getLiteralElement(element).getText();
061:
062: try {
063: // use queryparser to make the query here
064: parseQuery(config, field, literal);
065: } catch (Exception e) {
066:
067: index
068: .getLogger()
069: .debug(
070: "PropertyContainsExpression: Couldn't parse query! Falling back to default. query: "
071: + literal, e);
072:
073: if (!index.getConfiguration().isCaseSensitive())
074: literal = literal.toLowerCase();
075:
076: int starPos = literal.indexOf('*');
077: int qmPos = literal.indexOf('?');
078: if (starPos != -1 || qmPos != -1) {
079: if (starPos == literal.length() - 1 && qmPos == -1) {
080: // some thing like "word*"
081: // TODO the .toLowerCase() should depend from the Analyzer
082: setQuery(new PrefixQuery(new Term(field, literal
083: .substring(0, literal.length() - 1))));
084: } else {
085: // TODO dito
086: setQuery(new WildcardQuery(new Term(field, literal)));
087: }
088: } else {
089: termOrPhraseQuery(config, field, literal);
090: }
091: }
092:
093: if (negated) {
094: BooleanQuery booleanQuery = new BooleanQuery();
095: booleanQuery.add(new TermQuery(new Term(
096: Index.IS_DEFINED_FIELD_NAME, field)),
097: BooleanClause.Occur.MUST);
098: booleanQuery.add(getQuery(), BooleanClause.Occur.MUST_NOT); // prohibited
099: setQuery(booleanQuery);
100: }
101: }
102:
103: protected void parseQuery(IndexConfiguration config, String field,
104: String text) throws Exception {
105:
106: QueryParser parser = new QueryParser(field, config
107: .getAnalyzer());
108: Query finalQuery = parser.parse(text);
109:
110: setQuery(finalQuery);
111: }
112:
113: private void termOrPhraseQuery(IndexConfiguration config,
114: String field, String text) {
115: TokenStream ts = config.getAnalyzer().tokenStream(field,
116: new StringReader(text));
117:
118: ArrayList tokens = new ArrayList(20);
119: try {
120: for (Token t = ts.next(); t != null; t = ts.next()) {
121: tokens.add(t.termText());
122: }
123: } catch (IOException e) {
124: // should not happen, because we are reading from StringReader
125: }
126:
127: if (tokens.size() > 1) {
128: PhraseQuery phraseQuery = new PhraseQuery();
129: for (int i = 0, l = tokens.size(); i < l; i++) {
130: phraseQuery
131: .add(new Term(field, (String) tokens.get(i)));
132: }
133: setQuery(phraseQuery);
134: } else if (tokens.size() == 1) {
135: setQuery(new TermQuery(new Term(field, (String) tokens
136: .get(0))));
137: } else {
138: // TODO NOP query???
139: setQuery(new BooleanQuery());
140: }
141: }
142: }
|