001: //The contents of this file are subject to the Mozilla Public License Version 1.1
002: //(the "License"); you may not use this file except in compliance with the
003: //License. You may obtain a copy of the License at http://www.mozilla.org/MPL/
004: //
005: //Software distributed under the License is distributed on an "AS IS" basis,
006: //WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
007: //for the specific language governing rights and
008: //limitations under the License.
009: //
010: //The Original Code is "The Columba Project"
011: //
012: //The Initial Developers of the Original Code are Frederik Dietz and Timo Stich.
013: //Portions created by Frederik Dietz and Timo Stich are Copyright (C) 2003.
014: //
015: //All Rights Reserved.
016: package org.columba.mail.folder.search;
017:
018: import java.io.File;
019: import java.io.IOException;
020: import java.io.StringReader;
021: import java.util.Arrays;
022: import java.util.LinkedList;
023: import java.util.List;
024: import java.util.ListIterator;
025: import java.util.logging.Logger;
026:
027: import javax.swing.JOptionPane;
028:
029: import org.apache.lucene.analysis.Analyzer;
030: import org.apache.lucene.analysis.Token;
031: import org.apache.lucene.analysis.TokenStream;
032: import org.apache.lucene.analysis.standard.StandardAnalyzer;
033: import org.apache.lucene.document.Document;
034: import org.apache.lucene.document.Field;
035: import org.apache.lucene.index.IndexReader;
036: import org.apache.lucene.index.IndexWriter;
037: import org.apache.lucene.index.Term;
038: import org.apache.lucene.search.BooleanQuery;
039: import org.apache.lucene.search.Hits;
040: import org.apache.lucene.search.IndexSearcher;
041: import org.apache.lucene.search.Query;
042: import org.apache.lucene.search.WildcardQuery;
043: import org.apache.lucene.store.Directory;
044: import org.apache.lucene.store.FSDirectory;
045: import org.apache.lucene.store.RAMDirectory;
046: import org.columba.api.command.IStatusObservable;
047: import org.columba.core.base.ListTools;
048: import org.columba.core.filter.FilterCriteria;
049: import org.columba.core.filter.FilterRule;
050: import org.columba.core.filter.IFilterCriteria;
051: import org.columba.core.filter.IFilterRule;
052: import org.columba.core.io.DiskIO;
053: import org.columba.core.io.StreamUtils;
054: import org.columba.mail.folder.IMailbox;
055: import org.columba.mail.folder.event.IFolderEvent;
056: import org.columba.mail.message.ICloseableIterator;
057: import org.columba.mail.message.IHeaderList;
058: import org.columba.mail.util.MailResourceLoader;
059: import org.columba.ristretto.message.MimePart;
060: import org.columba.ristretto.message.MimeTree;
061:
062: /**
063: * @author timo
064: */
065: public class LuceneQueryEngine implements QueryEngine {
066:
067: /** JDK 1.4+ logging framework logger, used for logging. */
068: private static final Logger LOG = Logger
069: .getLogger("org.columba.mail.folder.search");
070:
071: private static final int OPTIMIZE_AFTER_N_OPERATIONS = 30;
072:
073: private static final String[] CAPS = { "Body" };
074:
075: private File indexDir;
076:
077: private IndexReader fileIndexReader;
078:
079: private IndexReader ramIndexReader;
080:
081: private Directory luceneIndexDir;
082:
083: private Directory ramIndexDir;
084:
085: private long ramLastModified;
086:
087: private long luceneLastModified;
088:
089: private LinkedList deleted;
090:
091: private int operationCounter;
092:
093: private Analyzer analyzer;
094:
095: private IMailbox folder;
096:
097: /**
098: * Constructor for LuceneQueryEngine.
099: */
100: public LuceneQueryEngine(IMailbox folder) {
101: this .folder = folder;
102:
103: analyzer = new StandardAnalyzer();
104:
105: try {
106: initRAMDir();
107: } catch (IOException e) {
108: e.printStackTrace();
109: }
110:
111: luceneLastModified = -1;
112: ramLastModified = -1;
113:
114: deleted = new LinkedList();
115: operationCounter = 0;
116:
117: File folderInDir = folder.getDirectoryFile();
118: indexDir = new File(folderInDir, ".index");
119:
120: try {
121: if (!indexDir.exists()) {
122: createIndex();
123: }
124:
125: luceneIndexDir = FSDirectory.getDirectory(indexDir, false);
126: } catch (IOException e) {
127: JOptionPane.showMessageDialog(null,
128: e.getLocalizedMessage(),
129: "Error while creating Lucene Index",
130: JOptionPane.ERROR_MESSAGE);
131: }
132:
133: try {
134: // If there is an existing lock then it must be from a
135: // previous crash -> remove it!
136: if (IndexReader.isLocked(luceneIndexDir)) {
137: IndexReader.unlock(luceneIndexDir);
138: }
139: } catch (IOException e) {
140: // Remove of lock didn't work -> delete by hand
141: File commitLock = new File(indexDir, "commit.lock");
142:
143: if (commitLock.exists()) {
144: commitLock.delete();
145: }
146:
147: File writeLock = new File(indexDir, "write.lock");
148:
149: if (writeLock.exists()) {
150: writeLock.delete();
151: }
152: }
153:
154: // Check if index is consitent with mailbox
155: try {
156: if (getFileReader().numDocs() != folder.getHeaderList()
157: .count()) {
158: LOG.warning("Lucene Index includes "
159: + getFileReader().numDocs()
160: + " messages, but mailbox has "
161: + folder.getHeaderList().count());
162: sync();
163: }
164: } catch (Exception e) {
165: LOG.severe(e.getMessage());
166: e.printStackTrace();
167: }
168: }
169:
170: protected void createIndex() throws IOException {
171: DiskIO.ensureDirectory(indexDir);
172:
173: IndexWriter indexWriter = new IndexWriter(indexDir, null, true);
174: indexWriter.close();
175: }
176:
177: protected IndexReader getFileReader() {
178: try {
179: // @TODO dont use deprecated method
180: if (IndexReader.getCurrentVersion(luceneIndexDir) != luceneLastModified) {
181: fileIndexReader = IndexReader.open(luceneIndexDir);
182: // @TODO dont use deprecated method
183: luceneLastModified = IndexReader
184: .getCurrentVersion(luceneIndexDir);
185: }
186: } catch (IOException e) {
187: LOG.severe(e.getLocalizedMessage());
188: try {
189: reset();
190: } catch (Exception e1) {
191: LOG.severe(e.getLocalizedMessage());
192: }
193: }
194:
195: return fileIndexReader;
196: }
197:
198: protected IndexReader getRAMReader() {
199: try {
200: if (IndexReader.getCurrentVersion(ramIndexDir) != ramLastModified) {
201: ramIndexReader = IndexReader.open(ramIndexDir);
202: ramLastModified = IndexReader
203: .getCurrentVersion(ramIndexDir);
204: }
205: } catch (IOException e) {
206: e.printStackTrace();
207: }
208:
209: return ramIndexReader;
210: }
211:
212: private Query getLuceneQuery(IFilterRule filterRule,
213: Analyzer analyzer) {
214: IFilterCriteria criteria;
215: String field;
216: int mode;
217:
218: Query result = new BooleanQuery();
219: Query subresult = null;
220:
221: int condition = filterRule.getConditionInt();
222: boolean prohibited;
223: boolean required;
224:
225: if (condition == FilterRule.MATCH_ALL) {
226: prohibited = false;
227: required = true;
228: } else {
229: prohibited = false;
230: required = false;
231: }
232:
233: BooleanQuery termQuery = null;
234:
235: for (int i = 0; i < filterRule.count(); i++) {
236: criteria = filterRule.get(i);
237: mode = criteria.getCriteria();
238:
239: field = "Body";
240:
241: TokenStream tokenStream = analyzer.tokenStream(field,
242: new StringReader(criteria.getPatternString()));
243:
244: termQuery = new BooleanQuery();
245:
246: try {
247: Token token = tokenStream.next();
248:
249: while (token != null) {
250: String pattern = "*" + token.termText() + "*";
251: LOG.info("Field = \"" + field + "\" Text = \""
252: + pattern + "\"");
253: termQuery.add(new WildcardQuery(new Term(field,
254: pattern)), true, false);
255:
256: token = tokenStream.next();
257: }
258: } catch (IOException e) {
259: e.printStackTrace();
260: }
261:
262: switch (mode) {
263: case FilterCriteria.CONTAINS: {
264: subresult = new BooleanQuery();
265: ((BooleanQuery) subresult).add(termQuery, true, false);
266:
267: break;
268: }
269:
270: case FilterCriteria.CONTAINS_NOT: {
271: subresult = new BooleanQuery();
272: ((BooleanQuery) subresult).add(new WildcardQuery(
273: new Term("uid", "*")), true, false);
274: ((BooleanQuery) subresult).add(termQuery, false, true);
275:
276: break;
277: }
278: }
279:
280: ((BooleanQuery) result)
281: .add(subresult, required, prohibited);
282: }
283:
284: return result;
285: }
286:
287: public List queryEngine(IFilterRule filter) throws Exception {
288: Query query = getLuceneQuery(filter, analyzer);
289:
290: List result = search(query);
291:
292: ListTools.substract(result, deleted);
293:
294: if (!checkResult(result)) {
295: // Search again
296: result = search(query);
297: ListTools.substract(result, deleted);
298: }
299:
300: return result;
301: }
302:
303: protected List search(Query query) throws IOException {
304: LinkedList result = new LinkedList();
305:
306: if (getFileReader().numDocs() > 0) {
307: Hits hitsFile = new IndexSearcher(getFileReader())
308: .search(query);
309:
310: for (int i = 0; i < hitsFile.length(); i++) {
311: result.add(new Integer(hitsFile.doc(i).getField("uid")
312: .stringValue()));
313: }
314: }
315:
316: if (getRAMReader().numDocs() > 0) {
317: Hits hitsRAM = new IndexSearcher(getRAMReader())
318: .search(query);
319:
320: for (int i = 0; i < hitsRAM.length(); i++) {
321: result.add(new Integer(hitsRAM.doc(i).getField("uid")
322: .stringValue()));
323: }
324: }
325:
326: return result;
327: }
328:
329: public List queryEngine(IFilterRule filter, Object[] uids)
330: throws Exception {
331: List result = queryEngine(filter);
332:
333: ListTools.intersect(result, Arrays.asList(uids));
334:
335: return result;
336: }
337:
338: /**
339: * @see org.columba.mail.folder.SearchEngineInterface#messageAdded(IFolderEvent)
340: */
341: public void messageAdded(Object uid) throws Exception {
342: Document messageDoc = getDocument(uid);
343:
344: IndexWriter writer = new IndexWriter(ramIndexDir, analyzer,
345: false);
346: writer.addDocument(messageDoc);
347: writer.close();
348: incOperationCounter();
349: }
350:
351: private Document getDocument(Object uid) {
352: Document messageDoc = new Document();
353:
354: messageDoc.add(Field.Keyword("uid", uid.toString()));
355:
356: // Find the body text part
357: try {
358: MimeTree mimeTree = folder.getMimePartTree(uid);
359: MimePart bodyPart = mimeTree.getFirstTextPart("plain");
360:
361: if (bodyPart != null) {
362: messageDoc.add(Field.UnStored("Body", StreamUtils
363: .readCharacterStream(
364: folder.getMimePartBodyStream(uid,
365: bodyPart.getAddress()))
366: .toString()));
367: }
368: } catch (IOException e) {
369: e.printStackTrace();
370: LOG.severe(e.getMessage());
371: } catch (Exception e) {
372: e.printStackTrace();
373: LOG.severe(e.getMessage());
374: }
375:
376: return messageDoc;
377: }
378:
379: /**
380: * @see org.columba.mail.folder.SearchEngineInterface#messageRemoved(IFolderEvent)
381: */
382: public void messageRemoved(Object uid) throws Exception {
383: deleted.add(uid);
384:
385: /*
386: * try { indexLock.tryToGetLock(null); getReader().delete(new
387: * Term("uid", uid.toString())); indexLock.release(); } catch
388: * (IOException e) { JOptionPane.showMessageDialog( null,
389: * e.getMessage(), "Error while removing Message from Lucene Index",
390: * JOptionPane.ERROR_MESSAGE); }
391: */
392: }
393:
394: protected void mergeRAMtoIndex() throws IOException {
395: IndexReader ramReader = getRAMReader();
396: IndexReader fileReader = getFileReader();
397:
398: LOG.fine("Lucene: Merging RAMIndex to FileIndex");
399:
400: /*
401: * Document doc; for( int i=0; i<ramReader.numDocs(); i++) { doc =
402: * ramReader.document(i); if( !deleted.contains(new
403: * Integer(ramReader.document(i).getField("uid").stringValue())) ) {
404: * fileIndex.addDocument(doc); } }
405: */
406: ListIterator it = deleted.listIterator();
407:
408: while (it.hasNext()) {
409: String uid = it.next().toString();
410:
411: if (ramReader.delete(new Term("uid", uid)) == 0) {
412: fileReader.delete(new Term("uid", uid));
413: }
414: }
415:
416: fileReader.close();
417: ramReader.close();
418:
419: IndexWriter fileIndex = new IndexWriter(luceneIndexDir,
420: analyzer, false);
421:
422: fileIndex.addIndexes(new Directory[] { ramIndexDir });
423:
424: fileIndex.optimize();
425: fileIndex.close();
426:
427: initRAMDir();
428:
429: deleted.clear();
430: }
431:
432: private void initRAMDir() throws IOException {
433: ramIndexDir = new RAMDirectory();
434:
435: IndexWriter writer = new IndexWriter(ramIndexDir, analyzer,
436: true);
437: writer.close();
438: ramLastModified = -1;
439: }
440:
441: private void incOperationCounter() throws IOException {
442: operationCounter++;
443:
444: if (operationCounter > OPTIMIZE_AFTER_N_OPERATIONS) {
445: mergeRAMtoIndex();
446: operationCounter = 0;
447: }
448: }
449:
450: /**
451: * Returns the caps.
452: *
453: * @return String[]
454: */
455: public String[] getCaps() {
456: return CAPS;
457: }
458:
459: private boolean checkResult(List result) {
460: ListIterator it = result.listIterator();
461:
462: try {
463: while (it.hasNext()) {
464: if (!folder.exists(it.next())) {
465: result.clear();
466: sync();
467:
468: return false;
469: }
470: }
471: } catch (Exception e) {
472: e.printStackTrace();
473: }
474:
475: return true;
476: }
477:
478: /**
479: * @see org.columba.mail.folder.DefaultSearchEngine#reset()
480: */
481: public void reset() throws Exception {
482: createIndex();
483: }
484:
485: /** {@inheritDoc} */
486: public void sync() throws Exception {
487: LOG.severe("Lucene Index inconsistent - recreation forced");
488: IHeaderList hl = folder.getHeaderList();
489:
490: if (getObservable() != null) {
491: getObservable().setMessage(
492: MailResourceLoader.getString("statusbar",
493: "message", "lucene_sync"));
494: }
495:
496: getObservable().setCurrent(0);
497:
498: try {
499: createIndex();
500:
501: IndexWriter writer = new IndexWriter(luceneIndexDir,
502: analyzer, false);
503:
504: int count = hl.count();
505: getObservable().setCurrent(count);
506:
507: Object uid;
508: int i = 0;
509: ICloseableIterator it;
510: for (it = hl.keyIterator(); it.hasNext();) {
511: uid = it.next();
512:
513: writer.addDocument(getDocument(uid));
514:
515: getObservable().setCurrent(i);
516: }
517: it.close();
518:
519: getObservable().setCurrent(count);
520:
521: writer.optimize();
522: writer.close();
523: } catch (Exception e) {
524: LOG.severe("Creation of Lucene Index failed :"
525: + e.getLocalizedMessage());
526:
527: // show neat error dialog here
528: }
529: }
530:
531: public IStatusObservable getObservable() {
532: return folder.getObservable();
533: }
534:
535: public void save() {
536: try {
537: mergeRAMtoIndex();
538: } catch (IOException e) {
539: LOG.severe(e.getMessage());
540: }
541:
542: }
543:
544: }
|