001: /*
002: Licensed to the Apache Software Foundation (ASF) under one or more
003: contributor license agreements. See the NOTICE file distributed with
004: this work for additional information regarding copyright ownership.
005: The ASF licenses this file to You under the Apache License, Version 2.0
006: (the "License"); you may not use this file except in compliance with
007: the License. You may obtain a copy of the License at
008:
009: http://www.apache.org/licenses/LICENSE-2.0
010:
011: Unless required by applicable law or agreed to in writing, software
012: distributed under the License is distributed on an "AS IS" BASIS,
013: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: See the License for the specific language governing permissions and
015: limitations under the License.
016: */
017:
018: package org.apache.cocoon.bean.query;
019:
020: import java.io.IOException;
021: import java.io.Serializable;
022: import java.io.StringReader;
023: import java.util.Vector;
024: import org.apache.cocoon.components.search.LuceneXMLIndexer;
025: import org.apache.lucene.analysis.Analyzer;
026: import org.apache.lucene.analysis.Token;
027: import org.apache.lucene.analysis.TokenStream;
028: import org.apache.lucene.index.Term;
029: import org.apache.lucene.search.BooleanQuery;
030: import org.apache.lucene.search.FuzzyQuery;
031: import org.apache.lucene.search.PhraseQuery;
032: import org.apache.lucene.search.Query;
033: import org.apache.lucene.search.TermQuery;
034:
035: /**
036: * The criterion bean.
037: * <p>
038: * This object defines a <code>Bean</code> for holding a query criterion.<br/>
039: * The idea is to abstract the process of searching into a Bean to be manipulated by CForms.<br/>
040: * This Bean is designed to be persistable.
041: * </p>
042: *
043: */
044: public class SimpleLuceneCriterionBean implements
045: SimpleLuceneCriterion, Cloneable, Serializable {
046:
047: /**
048: * The Bean's ID.
049: */
050: protected Long id;
051:
052: /**
053: * The Bean's index field to seach in.
054: */
055: protected String field;
056:
057: /**
058: * The Bean's match value.
059: */
060: protected String match;
061:
062: /**
063: * The Bean's search term.
064: */
065: protected String term;
066:
067: /**
068: * Default constructor.
069: */
070: public SimpleLuceneCriterionBean() {
071: }
072:
073: /**
074: * Utility constructor.
075: *
076: * @param match the kind of match to use
077: * @param field the field to search
078: * @param term the terms to search for
079: */
080: public SimpleLuceneCriterionBean(String field, String match,
081: String term) {
082: this .field = field;
083: this .match = match;
084: this .term = term;
085: }
086:
087: public Object clone() throws CloneNotSupportedException {
088: SimpleLuceneCriterionBean criterion = (SimpleLuceneCriterionBean) super
089: .clone();
090: return criterion;
091: }
092:
093: /**
094: * Gets the <code>org.apache.lucene.search.Query</code> from the Criterion
095: * <p>
096: * The analyzer specifies which <code>org.apache.lucene.analysis.Analyzer</code> to use for this search.
097: * </p>
098: *
099: * @param analyzer The <code>org.apache.lucene.analysis.Analyzer</code> to use to extract the Terms from this Criterion
100: */
101: public Query getQuery(Analyzer analyzer) {
102: String f = this .field;
103: Query query = null;
104: if (ANY_FIELD.equals(this .field))
105: f = LuceneXMLIndexer.BODY_FIELD;
106: // extract Terms from the query string
107: TokenStream tokens = analyzer.tokenStream(f, new StringReader(
108: this .term));
109: Vector words = new Vector();
110: Token token;
111: while (true) {
112: try {
113: token = tokens.next();
114: } catch (IOException e) {
115: token = null;
116: }
117: if (token == null)
118: break;
119: words.addElement(token.termText());
120: }
121: try {
122: tokens.close();
123: } catch (IOException e) {
124: } // ignore
125:
126: // assemble the different matches
127:
128: if (ANY_MATCH.equals(this .match)) {
129: if (words.size() > 1) {
130: query = new BooleanQuery();
131: for (int i = 0; i < words.size(); i++) {
132: ((BooleanQuery) query).add(new TermQuery(new Term(
133: f, (String) words.elementAt(i))), false,
134: false);
135: }
136: } else if (words.size() == 1) {
137: query = new TermQuery(new Term(f, (String) words
138: .elementAt(0)));
139: }
140: }
141:
142: if (ALL_MATCH.equals(this .match)) {
143: if (words.size() > 1) {
144: query = new BooleanQuery();
145: for (int i = 0; i < words.size(); i++) {
146: ((BooleanQuery) query).add(new TermQuery(new Term(
147: f, (String) words.elementAt(i))), true,
148: false);
149: }
150: } else if (words.size() == 1) {
151: query = new TermQuery(new Term(f, (String) words
152: .elementAt(0)));
153: }
154: }
155:
156: if (NOT_MATCH.equals(this .match)) {
157: if (words.size() > 1) {
158: query = new BooleanQuery();
159: for (int i = 0; i < words.size(); i++) {
160: ((BooleanQuery) query).add(new TermQuery(new Term(
161: f, (String) words.elementAt(i))), true,
162: true);
163: }
164: } else if (words.size() == 1) {
165: query = new TermQuery(new Term(f, (String) words
166: .elementAt(0)));
167: }
168: }
169:
170: if (LIKE_MATCH.equals(this .match)) {
171: if (words.size() > 1) {
172: query = new BooleanQuery();
173: for (int i = 0; i < words.size(); i++) {
174: ((BooleanQuery) query).add(new FuzzyQuery(new Term(
175: f, (String) words.elementAt(i))), false,
176: false);
177: }
178: } else if (words.size() == 1) {
179: query = new FuzzyQuery(new Term(f, (String) words
180: .elementAt(0)));
181: }
182: }
183:
184: if (PHRASE_MATCH.equals(this .match)) {
185: if (words.size() > 1) {
186: query = new PhraseQuery();
187: ((PhraseQuery) query).setSlop(0);
188: for (int i = 0; i < words.size(); i++) {
189: ((PhraseQuery) query).add(new Term(f,
190: (String) words.elementAt(i)));
191: }
192: } else if (words.size() == 1) {
193: query = new TermQuery(new Term(f, (String) words
194: .elementAt(0)));
195: }
196: }
197: return query;
198: }
199:
200: /**
201: * Gets the prohibited status from the Criterion
202: */
203: public boolean isProhibited() {
204: if (NOT_MATCH.equals(this .match))
205: return true;
206: return false;
207: }
208:
209: // Bean
210:
211: /**
212: * Gets the Bean's ID
213: *
214: * @return the <code>Long</code> ID of the Bean.
215: */
216: public Long getId() {
217: return this .id;
218: }
219:
220: /**
221: * Sets the Bean's ID
222: *
223: * @param id the <code>Long</code> ID of the Bean.
224: */
225: public void setId(Long id) {
226: this .id = id;
227: }
228:
229: /**
230: * Gets the Bean's field
231: *
232: * @return the <code>String</code> field of the Bean.
233: */
234: public String getField() {
235: return this .field;
236: }
237:
238: /**
239: * Sets the Bean's field.<br/>
240: * ie. which field would you like this Criterion to search in.
241: *
242: * @param field the <code>String</code> field of the Bean.
243: */
244: public void setField(String field) {
245: this .field = field;
246: }
247:
248: /**
249: * Gets the Bean's match
250: *
251: * @return the <code>String</code> match of the Bean.
252: */
253: public String getMatch() {
254: return this .match;
255: }
256:
257: /**
258: * Sets the Bean's match.<br/>
259: * ie. what kind of match do you want performed by this Criterion.
260: *
261: * @param match the <code>String</code> match of the Bean.
262: */
263: public void setMatch(String match) {
264: this .match = match;
265: }
266:
267: /**
268: * Gets the Bean's term
269: *
270: * @return the <code>String</code> term of the Bean.
271: */
272: public String getTerm() {
273: return this .term;
274: }
275:
276: /**
277: * Sets the Bean's term.<br/>
278: * ie. the string of search terms for this <code>Criterion</code>.
279: *
280: * @param term the <code>String</code> term of the Bean.
281: */
282: public void setTerm(String term) {
283: this.term = term;
284: }
285:
286: }
|