01: package org.apache.lucene.xmlparser.builders;
02:
03: import java.io.IOException;
04: import java.io.StringReader;
05:
06: import org.apache.lucene.analysis.Analyzer;
07: import org.apache.lucene.analysis.Token;
08: import org.apache.lucene.analysis.TokenStream;
09: import org.apache.lucene.index.Term;
10: import org.apache.lucene.search.BooleanClause;
11: import org.apache.lucene.search.BooleanQuery;
12: import org.apache.lucene.search.Query;
13: import org.apache.lucene.search.TermQuery;
14: import org.apache.lucene.xmlparser.DOMUtils;
15: import org.apache.lucene.xmlparser.ParserException;
16: import org.apache.lucene.xmlparser.QueryBuilder;
17: import org.w3c.dom.Element;
18:
19: /**
20: * Licensed to the Apache Software Foundation (ASF) under one or more
21: * contributor license agreements. See the NOTICE file distributed with
22: * this work for additional information regarding copyright ownership.
23: * The ASF licenses this file to You under the Apache License, Version 2.0
24: * (the "License"); you may not use this file except in compliance with
25: * the License. You may obtain a copy of the License at
26: *
27: * http://www.apache.org/licenses/LICENSE-2.0
28: *
29: * Unless required by applicable law or agreed to in writing, software
30: * distributed under the License is distributed on an "AS IS" BASIS,
31: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
32: * See the License for the specific language governing permissions and
33: * limitations under the License.
34: */
35: /**
36: * Builds a BooleanQuery from all of the terms found in the XML element using the choice of analyzer
37: * @author maharwood
38: */
39: public class TermsQueryBuilder implements QueryBuilder {
40:
41: Analyzer analyzer;
42:
43: public TermsQueryBuilder(Analyzer analyzer) {
44: this .analyzer = analyzer;
45: }
46:
47: public Query getQuery(Element e) throws ParserException {
48:
49: String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(
50: e, "fieldName");
51: String text = DOMUtils.getNonBlankTextOrFail(e);
52:
53: BooleanQuery bq = new BooleanQuery(DOMUtils.getAttribute(e,
54: "disableCoord", false));
55: bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e,
56: "minimumNumberShouldMatch", 0));
57: TokenStream ts = analyzer.tokenStream(fieldName,
58: new StringReader(text));
59: try {
60: Token token = ts.next();
61: Term term = null;
62: while (token != null) {
63: if (term == null) {
64: term = new Term(fieldName, token.termText());
65: } else {
66: // create from previous to save fieldName.intern overhead
67: term = term.createTerm(token.termText());
68: }
69: bq.add(new BooleanClause(new TermQuery(term),
70: BooleanClause.Occur.SHOULD));
71: token = ts.next();
72: }
73: } catch (IOException ioe) {
74: throw new RuntimeException(
75: "Error constructing terms from index:" + ioe);
76: }
77: bq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));
78:
79: return bq;
80:
81: }
82:
83: }
|