001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.jetspeed.search.lucene;
018:
019: import java.io.File;
020: import java.io.IOException;
021: import java.net.URL;
022: import java.util.ArrayList;
023: import java.util.Collection;
024: import java.util.HashMap;
025: import java.util.Iterator;
026: import java.util.Map;
027: import java.util.Set;
028:
029: import org.apache.commons.collections.MultiHashMap;
030: import org.apache.commons.collections.MultiMap;
031: import org.apache.commons.logging.Log;
032: import org.apache.commons.logging.LogFactory;
033: import org.apache.jetspeed.search.BaseParsedObject;
034: import org.apache.jetspeed.search.HandlerFactory;
035: import org.apache.jetspeed.search.ObjectHandler;
036: import org.apache.jetspeed.search.ParsedObject;
037: import org.apache.jetspeed.search.SearchEngine;
038: import org.apache.jetspeed.search.SearchResults;
039: import org.apache.lucene.analysis.Analyzer;
040: import org.apache.lucene.analysis.standard.StandardAnalyzer;
041: import org.apache.lucene.document.Document;
042: import org.apache.lucene.document.Field;
043: import org.apache.lucene.index.IndexReader;
044: import org.apache.lucene.index.IndexWriter;
045: import org.apache.lucene.index.Term;
046: import org.apache.lucene.queryParser.MultiFieldQueryParser;
047: import org.apache.lucene.queryParser.ParseException;
048: import org.apache.lucene.search.Hits;
049: import org.apache.lucene.search.IndexSearcher;
050: import org.apache.lucene.search.Query;
051: import org.apache.lucene.search.Searcher;
052:
053: /**
054: * @author <a href="mailto: jford@apache.org">Jeremy Ford</a>
055: *
056: */
057: public class SearchEngineImpl implements SearchEngine {
058: protected final static Log log = LogFactory
059: .getLog(SearchEngineImpl.class);
060: private File rootIndexDir = null;
061: private String analyzerClassName = null;
062: private boolean optimizeAfterUpdate = true;
063: private HandlerFactory handlerFactory;
064:
065: private static final int KEYWORD = 0;
066: private static final int TEXT = 1;
067:
068: public SearchEngineImpl(String indexRoot, String analyzerClassName,
069: boolean optimzeAfterUpdate, HandlerFactory handlerFactory)
070: throws Exception {
071: //assume it's full path for now
072: rootIndexDir = new File(indexRoot);
073: this .analyzerClassName = analyzerClassName;
074: this .optimizeAfterUpdate = optimzeAfterUpdate;
075: this .handlerFactory = handlerFactory;
076:
077: try {
078: Searcher searcher = null;
079: searcher = new IndexSearcher(rootIndexDir.getPath());
080: searcher.close();
081: } catch (Exception e) {
082: if (rootIndexDir.exists()) {
083: log.error("Failed to open Portal Registry indexes in "
084: + rootIndexDir.getPath(), e);
085: }
086: try {
087: rootIndexDir.delete();
088: rootIndexDir.mkdirs();
089:
090: IndexWriter indexWriter = new IndexWriter(rootIndexDir,
091: newAnalyzer(), true);
092: indexWriter.close();
093: indexWriter = null;
094: log.warn("Re-created Lucene Index in "
095: + rootIndexDir.getPath());
096: } catch (Exception e1) {
097: String message = "Cannot RECREATE Portlet Registry indexes in "
098: + rootIndexDir.getPath();
099: log.error(message, e1);
100: throw new Exception(message);
101: }
102: }
103: }
104:
105: /* (non-Javadoc)
106: * @see org.apache.jetspeed.search.SearchEnging#add(java.lang.Object)
107: */
108: public boolean add(Object o) {
109: Collection c = new ArrayList(1);
110: c.add(o);
111:
112: return add(c);
113: }
114:
115: /* (non-Javadoc)
116: * @see org.apache.jetspeed.search.SearchEnging#add(java.util.Collection)
117: */
118: public synchronized boolean add(Collection objects) {
119: boolean result = false;
120:
121: IndexWriter indexWriter;
122: try {
123: indexWriter = new IndexWriter(rootIndexDir, newAnalyzer(),
124: false);
125: } catch (IOException e) {
126: //logger.error("Error while creating index writer. Skipping add...", e);
127: return result;
128: }
129:
130: Iterator it = objects.iterator();
131: while (it.hasNext()) {
132: Object o = it.next();
133: // Look up appropriate handler
134: ObjectHandler handler = null;
135: try {
136: handler = handlerFactory.getHandler(o);
137: } catch (Exception e) {
138: //logger.error("Failed to create hanlder for object " + o.getClass().getName());
139: continue;
140: }
141:
142: // Parse the object
143: ParsedObject parsedObject = handler.parseObject(o);
144:
145: // Create document
146: Document doc = new Document();
147:
148: // Populate document from the parsed object
149: if (parsedObject.getKey() != null) {
150: doc.add(new Field(ParsedObject.FIELDNAME_KEY,
151: parsedObject.getKey(), Field.Store.YES,
152: Field.Index.UN_TOKENIZED));
153: }
154: if (parsedObject.getType() != null) {
155: doc.add(new Field(ParsedObject.FIELDNAME_TYPE,
156: parsedObject.getType(), Field.Store.YES,
157: Field.Index.TOKENIZED));
158: }
159: if (parsedObject.getTitle() != null) {
160: doc.add(new Field(ParsedObject.FIELDNAME_TITLE,
161: parsedObject.getTitle(), Field.Store.YES,
162: Field.Index.TOKENIZED));
163: }
164: if (parsedObject.getDescription() != null) {
165: doc.add(new Field(ParsedObject.FIELDNAME_DESCRIPTION,
166: parsedObject.getDescription(), Field.Store.YES,
167: Field.Index.TOKENIZED));
168: }
169: if (parsedObject.getContent() != null) {
170: doc.add(new Field(ParsedObject.FIELDNAME_CONTENT,
171: parsedObject.getContent(), Field.Store.YES,
172: Field.Index.TOKENIZED));
173: }
174: if (parsedObject.getLanguage() != null) {
175: doc.add(new Field(ParsedObject.FIELDNAME_LANGUAGE,
176: parsedObject.getLanguage(), Field.Store.YES,
177: Field.Index.TOKENIZED));
178: }
179: if (parsedObject.getURL() != null) {
180: doc.add(new Field(ParsedObject.FIELDNAME_URL,
181: parsedObject.getURL().toString(),
182: Field.Store.YES, Field.Index.TOKENIZED));
183: }
184: if (parsedObject.getClassName() != null) {
185: doc.add(new Field(ParsedObject.FIELDNAME_CLASSNAME,
186: parsedObject.getClassName(), Field.Store.YES,
187: Field.Index.TOKENIZED));
188: }
189:
190: String[] keywordArray = parsedObject.getKeywords();
191: if (keywordArray != null) {
192: for (int i = 0; i < keywordArray.length; ++i) {
193: String keyword = keywordArray[i];
194: doc.add(new Field(ParsedObject.FIELDNAME_KEYWORDS,
195: keyword, Field.Store.YES,
196: Field.Index.UN_TOKENIZED));
197: }
198: }
199:
200: Map keywords = parsedObject.getKeywordsMap();
201: addFieldsToDocument(doc, keywords, KEYWORD);
202:
203: Map fields = parsedObject.getFields();
204: addFieldsToDocument(doc, fields, TEXT);
205:
206: // Add the document to search index
207: try {
208: indexWriter.addDocument(doc);
209: } catch (IOException e) {
210: //logger.error("Error adding document to index.", e);
211: }
212: //logger.debug("Index Document Count = " + indexWriter.docCount());
213: //logger.info("Added '" + parsedObject.getTitle() + "' to index");
214: result = true;
215: }
216:
217: try {
218: if (optimizeAfterUpdate) {
219: indexWriter.optimize();
220: }
221: } catch (IOException e) {
222: //logger.error("Error while trying to optimize index.");
223: } finally {
224: try {
225: indexWriter.close();
226: } catch (IOException e) {
227: //logger.error("Error while closing index writer.", e);
228: }
229: }
230:
231: return result;
232: }
233:
234: /* (non-Javadoc)
235: * @see org.apache.jetspeed.search.SearchEnging#remove(java.lang.Object)
236: */
237: public boolean remove(Object o) {
238: Collection c = new ArrayList(1);
239: c.add(o);
240:
241: return remove(c);
242: }
243:
244: /* (non-Javadoc)
245: * @see org.apache.jetspeed.search.SearchEnging#remove(java.util.Collection)
246: */
247: public synchronized boolean remove(Collection objects) {
248: boolean result = false;
249:
250: try {
251: IndexReader indexReader = IndexReader
252: .open(this .rootIndexDir);
253:
254: Iterator it = objects.iterator();
255: while (it.hasNext()) {
256: Object o = it.next();
257: // Look up appropriate handler
258: ObjectHandler handler = handlerFactory.getHandler(o);
259:
260: // Parse the object
261: ParsedObject parsedObject = handler.parseObject(o);
262:
263: // Create term
264: Term term = null;
265:
266: if (parsedObject.getKey() != null) {
267: term = new Term(ParsedObject.FIELDNAME_KEY,
268: parsedObject.getKey());
269: // Remove the document from search index
270: int rc = indexReader.deleteDocuments(term);
271: //logger.info("Attempted to delete '" + term.toString() + "' from index, documents deleted = " + rc);
272: //System.out.println("Attempted to delete '" + term.toString() + "' from index, documents deleted = " + rc);
273: result = rc > 0;
274: }
275: }
276:
277: indexReader.close();
278:
279: if (optimizeAfterUpdate) {
280: optimize();
281: }
282:
283: } catch (Exception e) {
284: //logger.error("Exception", e);
285: result = false;
286: }
287:
288: return result;
289: }
290:
291: /* (non-Javadoc)
292: * @see org.apache.jetspeed.search.SearchEnging#update(java.lang.Object)
293: */
294: public boolean update(Object o) {
295: Collection c = new ArrayList(1);
296: c.add(o);
297:
298: return update(c);
299: }
300:
301: /* (non-Javadoc)
302: * @see org.apache.jetspeed.search.SearchEnging#update(java.util.Collection)
303: */
304: public synchronized boolean update(Collection objects) {
305: boolean result = false;
306:
307: try {
308: // Delete entries from index
309: remove(objects);
310: result = true;
311: } catch (Throwable e) {
312: //logger.error("Exception", e);
313: }
314:
315: try {
316: // Add entries to index
317: if (result) {
318: add(objects);
319: result = true;
320: }
321: } catch (Throwable e) {
322: //logger.error("Exception", e);
323: }
324:
325: return result;
326: }
327:
328: /* (non-Javadoc)
329: * @see org.apache.jetspeed.search.SearchEnging#optimize()
330: */
331: public synchronized boolean optimize() {
332: boolean result = false;
333:
334: try {
335: IndexWriter indexWriter = new IndexWriter(rootIndexDir,
336: newAnalyzer(), false);
337: indexWriter.optimize();
338: indexWriter.close();
339: result = true;
340: } catch (IOException e) {
341: //logger.error("Error while trying to optimize index.");
342: }
343: return result;
344: }
345:
346: /* (non-Javadoc)
347: * @see org.apache.jetspeed.search.SearchEngine#search(java.lang.String)
348: */
349: public SearchResults search(String queryString) {
350: Searcher searcher = null;
351: Hits hits = null;
352:
353: try {
354: searcher = new IndexSearcher(rootIndexDir.getPath());
355: } catch (IOException e) {
356: //logger.error("Failed to create index search using path " + rootDir.getPath());
357: return null;
358: }
359:
360: Analyzer analyzer = newAnalyzer();
361:
362: String[] searchFields = { ParsedObject.FIELDNAME_CONTENT,
363: ParsedObject.FIELDNAME_DESCRIPTION,
364: ParsedObject.FIELDNAME_FIELDS,
365: ParsedObject.FIELDNAME_KEY,
366: ParsedObject.FIELDNAME_KEYWORDS,
367: ParsedObject.FIELDNAME_LANGUAGE,
368: ParsedObject.FIELDNAME_SCORE,
369: ParsedObject.FIELDNAME_TITLE,
370: ParsedObject.FIELDNAME_TYPE,
371: ParsedObject.FIELDNAME_URL,
372: ParsedObject.FIELDNAME_CLASSNAME };
373:
374: Query query = null;
375: try {
376: String s[] = new String[searchFields.length];
377: for (int i = 0; i < s.length; i++)
378: s[i] = queryString;
379: query = MultiFieldQueryParser.parse(s, searchFields,
380: analyzer);
381: // Query query = QueryParser.parse(searchString, ParsedObject.FIELDNAME_CONTENT, analyzer);
382: } catch (ParseException e) {
383: //logger.info("Failed to parse query " + query);
384: return null;
385: }
386:
387: try {
388: hits = searcher.search(query);
389: } catch (IOException e) {
390: //logger.error("Error while peforming search.", e);
391: return null;
392: }
393:
394: int hitNum = hits.length();
395: ArrayList resultList = new ArrayList(hitNum);
396: for (int i = 0; i < hitNum; i++) {
397: ParsedObject result = new BaseParsedObject();
398: try {
399: Document doc = hits.doc(i);
400:
401: addFieldsToParsedObject(doc, result);
402:
403: result.setScore(hits.score(i));
404: Field type = doc.getField(ParsedObject.FIELDNAME_TYPE);
405: if (type != null) {
406: result.setType(type.stringValue());
407: }
408:
409: Field key = doc.getField(ParsedObject.FIELDNAME_KEY);
410: if (key != null) {
411: result.setKey(key.stringValue());
412: }
413:
414: Field description = doc
415: .getField(ParsedObject.FIELDNAME_DESCRIPTION);
416: if (description != null) {
417: result.setDescription(description.stringValue());
418: }
419:
420: Field title = doc
421: .getField(ParsedObject.FIELDNAME_TITLE);
422: if (title != null) {
423: result.setTitle(title.stringValue());
424: }
425:
426: Field content = doc
427: .getField(ParsedObject.FIELDNAME_CONTENT);
428: if (content != null) {
429: result.setContent(content.stringValue());
430: }
431:
432: Field language = doc
433: .getField(ParsedObject.FIELDNAME_LANGUAGE);
434: if (language != null) {
435: result.setLanguage(language.stringValue());
436: }
437:
438: Field classname = doc
439: .getField(ParsedObject.FIELDNAME_CLASSNAME);
440: if (classname != null) {
441: result.setClassName(classname.stringValue());
442: }
443:
444: Field url = doc.getField(ParsedObject.FIELDNAME_URL);
445: if (url != null) {
446: result.setURL(new URL(url.stringValue()));
447: }
448:
449: Field[] keywords = doc
450: .getFields(ParsedObject.FIELDNAME_KEYWORDS);
451: if (keywords != null) {
452: String[] keywordArray = new String[keywords.length];
453:
454: for (int j = 0; j < keywords.length; j++) {
455: Field keyword = keywords[j];
456: keywordArray[j] = keyword.stringValue();
457: }
458:
459: result.setKeywords(keywordArray);
460: }
461:
462: resultList.add(i, result);
463: } catch (IOException e) {
464: //logger
465: }
466: }
467:
468: if (searcher != null) {
469: try {
470: searcher.close();
471: } catch (IOException ioe) {
472: //logger.error("Closing Searcher", ioe);
473: }
474: }
475:
476: SearchResults results = new SearchResultsImpl(resultList);
477: return results;
478: }
479:
480: private Analyzer newAnalyzer() {
481: Analyzer rval = null;
482:
483: if (analyzerClassName != null) {
484: try {
485: Class analyzerClass = Class.forName(analyzerClassName);
486: rval = (Analyzer) analyzerClass.newInstance();
487: } catch (InstantiationException e) {
488: //logger.error("InstantiationException", e);
489: } catch (ClassNotFoundException e) {
490: //logger.error("ClassNotFoundException", e);
491: } catch (IllegalAccessException e) {
492: //logger.error("IllegalAccessException", e);
493: }
494: }
495:
496: if (rval == null) {
497: rval = new StandardAnalyzer();
498: }
499:
500: return rval;
501: }
502:
503: private void addFieldsToDocument(Document doc, Map fields, int type) {
504: if (fields != null) {
505: Iterator keyIter = fields.keySet().iterator();
506: while (keyIter.hasNext()) {
507: Object key = keyIter.next();
508: if (key != null) {
509: Object values = fields.get(key);
510: if (values != null) {
511: if (values instanceof Collection) {
512: Iterator valueIter = ((Collection) values)
513: .iterator();
514: while (valueIter.hasNext()) {
515: Object value = valueIter.next();
516: if (value != null) {
517: if (type == TEXT) {
518: doc
519: .add(new Field(
520: key.toString(),
521: value
522: .toString(),
523: Field.Store.YES,
524: Field.Index.UN_TOKENIZED));
525: } else {
526: doc
527: .add(new Field(
528: key.toString(),
529: value
530: .toString(),
531: Field.Store.YES,
532: Field.Index.UN_TOKENIZED));
533: }
534: }
535: }
536: } else {
537: if (type == TEXT) {
538: doc.add(new Field(key.toString(),
539: values.toString(),
540: Field.Store.YES,
541: Field.Index.UN_TOKENIZED));
542: } else {
543: doc.add(new Field(key.toString(),
544: values.toString(),
545: Field.Store.YES,
546: Field.Index.UN_TOKENIZED));
547: }
548: }
549: }
550: }
551: }
552: }
553: }
554:
555: private void addFieldsToParsedObject(Document doc, ParsedObject o) {
556: try {
557: MultiMap multiKeywords = new MultiHashMap();
558: MultiMap multiFields = new MultiHashMap();
559: HashMap fieldMap = new HashMap();
560:
561: Field classNameField = doc
562: .getField(ParsedObject.FIELDNAME_CLASSNAME);
563: if (classNameField != null) {
564: String className = classNameField.stringValue();
565: o.setClassName(className);
566: ObjectHandler handler = handlerFactory
567: .getHandler(className);
568:
569: Set fields = handler.getFields();
570: addFieldsToMap(doc, fields, multiFields);
571: addFieldsToMap(doc, fields, fieldMap);
572:
573: Set keywords = handler.getKeywords();
574: addFieldsToMap(doc, keywords, multiKeywords);
575: }
576:
577: o.setKeywordsMap(multiKeywords);
578: o.setFields(multiFields);
579: o.setFields(fieldMap);
580: } catch (Exception e) {
581: //logger.error("Error trying to add fields to parsed object.", e);
582: }
583: }
584:
585: private void addFieldsToMap(Document doc, Set fieldNames, Map fields) {
586: Iterator fieldIter = fieldNames.iterator();
587: while (fieldIter.hasNext()) {
588: String fieldName = (String) fieldIter.next();
589: Field[] docFields = doc.getFields(fieldName);
590: if (docFields != null) {
591: for (int i = 0; i < docFields.length; i++) {
592: Field field = docFields[i];
593: if (field != null) {
594: String value = field.stringValue();
595: fields.put(fieldName, value);
596: }
597: }
598: }
599: }
600: }
601: }
|