001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.cocoon.components.search;
018:
019: import org.apache.avalon.excalibur.pool.Recyclable;
020: import org.apache.avalon.framework.activity.Disposable;
021: import org.apache.avalon.framework.configuration.Configurable;
022: import org.apache.avalon.framework.configuration.Configuration;
023: import org.apache.avalon.framework.configuration.ConfigurationException;
024: import org.apache.avalon.framework.logger.AbstractLogEnabled;
025: import org.apache.avalon.framework.service.ServiceException;
026: import org.apache.avalon.framework.service.ServiceManager;
027: import org.apache.avalon.framework.service.Serviceable;
028: import org.apache.cocoon.ProcessingException;
029: import org.apache.cocoon.util.ClassUtils;
030: import org.apache.lucene.analysis.Analyzer;
031: import org.apache.lucene.index.IndexReader;
032: import org.apache.lucene.queryParser.ParseException;
033: import org.apache.lucene.queryParser.QueryParser;
034: import org.apache.lucene.search.Hits;
035: import org.apache.lucene.search.IndexSearcher;
036: import org.apache.lucene.search.Query;
037: import org.apache.lucene.store.Directory;
038: import org.apache.lucene.store.FSDirectory;
039:
040: import java.io.File;
041: import java.io.IOException;
042:
043: /**
044: * This class provides searching via lucene.
045: *
046: * <p>
047: * In order to do searching you need a lucene Directory where the lucene generated
048: * index resides.
049: * Moreover you must know the lucene Analyzer which has been used for
050: * indexing, and which will be used for searching.
051: * </p>
052: * <p>
053: * Knowing this you can may start searching having a query which is parsable
054: * by an QueryParser, and having the name of the default field to use in
055: * searching.
056: * </p>
057: * <p>
058: * This class returns an Hit object as its search result.
059: * </p>
060: *
061: * @author <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
062: * @version CVS $Id: SimpleLuceneCocoonSearcherImpl.java 433543 2006-08-22 06:22:54Z crossley $
063: */
064: public class SimpleLuceneCocoonSearcherImpl extends AbstractLogEnabled
065: implements LuceneCocoonSearcher, Configurable, Serviceable,
066: Disposable, Recyclable {
067:
068: /**
069: * Configuration element name of lucene's Analyzer class.
070: * <p>
071: * Its value is
072: * <code>analyzer-classname</code>.
073: * </p>
074: *
075: */
076: protected final static String ANALYZER_CLASSNAME_CONFIG = "analyzer-classname";
077: /**
078: * Configuration element default value of lucene's Analyzer class.
079: * <p>
080: * Its value is,
081: * <code>org.apache.lucene.analysis.standard.StandardAnalyzer</code>.
082: * </p>
083: *
084: */
085: protected final static String ANALYZER_CLASSNAME_DEFAULT = "org.apache.lucene.analysis.standard.StandardAnalyzer";
086:
087: /**
088: * Configuration element name of default search field.
089: * <p>
090: * Its value is
091: * <code>default-seach-field</code>.
092: * </p>
093: *
094: */
095: protected final static String DEFAULT_SEARCH_FIELD_CONFIG = "default-search-field";
096: /**
097: * Configuration element default value of lucene's default search field.
098: * <p>
099: * Its value is <code>body</code>.
100: * </p>
101: *
102: */
103: protected final static String DEFAULT_SEARCH_FIELD_DEFAULT = "body";
104:
105: /**
106: * Configuration element name of default-query.
107: * <p>
108: * Its value is
109: * <code>default-query</code>.
110: * </p>
111: *
112: */
113: protected final static String DEFAULT_QUERY_CONFIG = "default-query";
114: /**
115: * Configuration element default value of default-query.
116: * <p>
117: * Its value is <code>null</code>.
118: * </p>
119: *
120: */
121: protected final static String DEFAULT_QUERY_DEFAULT = null;
122:
123: /**
124: * Configuration element name of query parser class name.
125: * <p>
126: * Its value is
127: * <code>queryparser-classname</code>.
128: * </p>
129: *
130: */
131: protected final static String QUERYPARSER_CLASSNAME_CONFIG = "queryparser-classname";
132: /**
133: * Configuration element default value of queryparser-classname.
134: * <p>
135: * Its value is
136: * <code>org.apache.lucene.queryParser.QueryParser</code>.
137: * </p>
138: *
139: */
140: protected final static String QUERYPARSER_CLASSNAME_DEFAULT = "org.apache.lucene.queryParser.QueryParser";
141:
142: /**
143: * Configuration element name of lucene's default filesystem default
144: * directory.
145: * <p>
146: * Its value is <code>directory</code>.
147: * </p>
148: *
149: */
150: protected final static String DIRECTORY_CONFIG = "directory";
151: /**
152: * Configuration element default value of filesystem default directory.
153: * <p>
154: * Its value is <code>null</code>.
155: * </p>
156: *
157: */
158: protected final static String DIRECTORY_DEFAULT = null;
159:
160: /**
161: * The service manager instance
162: *
163: */
164: protected ServiceManager manager = null;
165:
166: private String analyzerClassnameDefault = ANALYZER_CLASSNAME_DEFAULT;
167: private String defaultSearchFieldDefault = DEFAULT_SEARCH_FIELD_DEFAULT;
168: private String defaultQueryDefault = DEFAULT_QUERY_DEFAULT;
169: // private String queryparserClassnameDefault = QUERYPARSER_CLASSNAME_DEFAULT;
170: private String directoryDefault = DIRECTORY_DEFAULT;
171:
172: /**
173: * The lucene analyzer used for searching
174: */
175: private Analyzer analyzer;
176: /**
177: * The lucene directory used for searching
178: */
179: private Directory directory;
180: /**
181: * The lucene index searcher used for searching
182: */
183: private IndexSearcher indexSearcher;
184:
185: /**
186: * A lucene index reader cache to maximize sharing of
187: * lucene index readers
188: */
189: private IndexReaderCache indexReaderCache;
190:
191: /**
192: * set an analyzer, overriding the analyzerClassnameDefault.
193: *
194: * @param analyzer The new analyzer value
195: */
196: public void setAnalyzer(Analyzer analyzer) {
197: this .analyzer = analyzer;
198: }
199:
200: /**
201: * get the analyzer.
202: *
203: */
204: public Analyzer getAnalyzer() {
205: return this .analyzer;
206: }
207:
208: /**
209: *Sets the directory attribute of the SimpleLuceneCocoonSearcherImpl object
210: *
211: * @param directory The new directory value
212: */
213: public void setDirectory(Directory directory) {
214: this .directory = directory;
215: if (indexReaderCache != null) {
216: indexReaderCache.close();
217: indexReaderCache = null;
218: }
219: }
220:
221: /**
222: * Get an IndexReader.
223: * <p>
224: * As an IndexReader might be cached, it is check if the indexReader is
225: * still valid.
226: * </p>
227: *
228: * @return IndexReader an up to date indexReader
229: * @exception IOException is thrown iff it's impossible to create
230: * an IndexReader
231: */
232: public IndexReader getReader() throws IOException {
233: if (indexReaderCache == null) {
234: indexReaderCache = new IndexReaderCache();
235: }
236: return indexReaderCache.getIndexReader(directory);
237: }
238:
239: /**
240: * configure this component
241: *
242: * @param conf of this component
243: * @exception ConfigurationException is thrown iff configuration of
244: * this component fails
245: */
246: public void configure(Configuration conf)
247: throws ConfigurationException {
248: Configuration child;
249: String value;
250:
251: child = conf.getChild(ANALYZER_CLASSNAME_CONFIG, false);
252: if (child != null) {
253: // fix Bugzilla Bug 25277, use child.getValue
254: // and in all following blocks
255: value = child.getValue(ANALYZER_CLASSNAME_DEFAULT);
256: if (value != null) {
257: analyzerClassnameDefault = value;
258: try {
259: analyzer = (Analyzer) ClassUtils
260: .newInstance(analyzerClassnameDefault);
261: } catch (Exception e) {
262: throw new ConfigurationException(
263: "Cannot create analyzer of class "
264: + analyzerClassnameDefault, e);
265: }
266: }
267: }
268:
269: child = conf.getChild(DEFAULT_SEARCH_FIELD_CONFIG, false);
270: if (child != null) {
271: value = child.getValue(DEFAULT_SEARCH_FIELD_DEFAULT);
272: if (value != null) {
273: defaultSearchFieldDefault = value;
274: }
275: }
276:
277: child = conf.getChild(DEFAULT_QUERY_CONFIG, false);
278: if (child != null) {
279: value = child.getValue(DEFAULT_QUERY_DEFAULT);
280: if (value != null) {
281: defaultQueryDefault = value;
282: }
283: }
284: /*
285: child = conf.getChild(QUERYPARSER_CLASSNAME_CONFIG, false);
286: if (child != null) {
287: value = child.getValue(QUERYPARSER_CLASSNAME_DEFAULT);
288: if (value != null) {
289: queryparserClassnameDefault = value;
290: }
291: }
292: */
293: child = conf.getChild(DIRECTORY_CONFIG, false);
294: if (child != null) {
295: value = child.getValue(DIRECTORY_DEFAULT);
296: if (value != null) {
297: directoryDefault = value;
298: try {
299: setDirectory(FSDirectory.getDirectory(new File(
300: directoryDefault), false));
301: } catch (IOException ioe) {
302: throw new ConfigurationException(
303: "Cannot set index directory "
304: + directoryDefault, ioe);
305: }
306: }
307: }
308: }
309:
310: /**
311: * Set the current <code>ServiceManager</code> instance used by this
312: * <code>Serviceable</code>.
313: *
314: * @param manager manager of this component
315: * @exception ServiceException is never thrown
316: */
317: public void service(ServiceManager manager) throws ServiceException {
318: this .manager = manager;
319: }
320:
321: /**
322: * Dispose this component, releasing IndexSearcher, and IndexReaderCache.
323: */
324: public void dispose() {
325: releaseIndexSearcher();
326: releaseIndexReaderCache();
327: }
328:
329: /**
330: * Recycle this component, releasing IndexSearcher, and IndexReaderCache.
331: */
332: public void recycle() {
333: releaseIndexSearcher();
334: releaseIndexReaderCache();
335: }
336:
337: /**
338: * Search lucene index.
339: *
340: * @param query_string is lucene's query string
341: * @param default_field the lucene field to run the query
342: * @return lucene Hits
343: * @exception ProcessingException iff its not possible do run the query
344: */
345: public Hits search(String query_string, String default_field)
346: throws ProcessingException {
347: Hits hits = null;
348:
349: if (query_string == null) {
350: query_string = defaultQueryDefault;
351: }
352: if (default_field == null) {
353: default_field = defaultSearchFieldDefault;
354: }
355:
356: try {
357: Query query = QueryParser.parse(query_string,
358: default_field, analyzer);
359:
360: // release index searcher for each new search
361: releaseIndexSearcher();
362:
363: IndexSearcher indexSearcher = new IndexSearcher(getReader());
364: hits = indexSearcher.search(query);
365: // do not close indexSearcher now, as using hits needs an
366: // opened indexSearcher indexSearcher.close();
367: } catch (ParseException pe) {
368: throw new ProcessingException("Cannot parse query "
369: + query_string, pe);
370: } catch (IOException ioe) {
371: throw new ProcessingException("Cannot access hits", ioe);
372: }
373: return hits;
374: }
375:
376: /**
377: * Search lucene index.
378: * This method is designed to be used by other components, or Flowscripts
379: *
380: * @param query the lucene Query
381: * @return lucene Hits
382: * @exception ProcessingException if its not possible do run the query
383: */
384: public Hits search(Query query) throws ProcessingException {
385: Hits hits = null;
386: try {
387: // release index searcher for each new search
388: releaseIndexSearcher();
389:
390: IndexSearcher indexSearcher = new IndexSearcher(getReader());
391: hits = indexSearcher.search(query);
392: // do not close indexSearcher now, as using hits needs an
393: // opened indexSearcher indexSearcher.close();
394: } catch (IOException ioe) {
395: throw new ProcessingException("Cannot access hits", ioe);
396: }
397: return hits;
398: }
399:
400: /**
401: * Release the index searcher.
402: *
403: */
404: private void releaseIndexSearcher() {
405: if (indexSearcher != null) {
406: try {
407: indexSearcher.close();
408: } catch (IOException ioe) {
409: // ignore it
410: }
411: indexSearcher = null;
412: }
413: }
414:
415: /**
416: * Release the IndexReaderCache
417: *
418: */
419: private void releaseIndexReaderCache() {
420: if (indexReaderCache != null) {
421: indexReaderCache = null;
422: }
423: }
424:
425: /**
426: * This class should help to minimise usage of IndexReaders.
427: *
428: */
429: static class IndexReaderCache {
430: private IndexReader indexReader;
431: private long lastModified;
432:
433: /**
434: * Create an IndexReaderCache.
435: *
436: */
437: IndexReaderCache() {
438: }
439:
440: /**
441: * return cached IndexReader object.
442: *
443: * @param directory lucene index directory
444: * @return The indexReader value
445: */
446: public IndexReader getIndexReader(Directory directory)
447: throws IOException {
448: if (indexReader == null) {
449: createIndexReader(directory);
450: } else {
451: if (!indexReaderIsValid(directory)) {
452: createIndexReader(directory);
453: }
454: }
455: return indexReader;
456: }
457:
458: /**
459: * Close an opened lucene IndexReader
460: *
461: */
462: public void close() {
463: if (indexReader != null) {
464: try {
465: indexReader.close();
466: } catch (IOException ioe) {
467: // ignore it
468: }
469: indexReader = null;
470: }
471: }
472:
473: /**
474: * Check if cached IndexReader is up to date.
475: *
476: * @param directory lucene index directory
477: * @return boolean return true if there is a cached IndexReader object,
478: * and its lastModified date is greater equal than the lastModified date
479: * of its lucene Directory.
480: * @exception IOException Description of Exception
481: */
482: public boolean indexReaderIsValid(Directory directory)
483: throws IOException {
484: return indexReader != null
485: && IndexReader.getCurrentVersion(directory) == lastModified;
486: }
487:
488: /**
489: * Release all resources, most notably the lucene IndexReader.
490: *
491: * @exception Throwable Description of Exception
492: */
493: protected void finalize() throws Throwable {
494: close();
495: }
496:
497: /**
498: * Create unconditionally a lucene IndexReader.
499: *
500: * @param directory lucene index directory
501: * @exception IOException Description of Exception
502: */
503: private void createIndexReader(Directory directory)
504: throws IOException {
505: close();
506: indexReader = IndexReader.open(directory);
507: lastModified = IndexReader.getCurrentVersion(directory);
508: }
509: }
510: }
|