001: /*
002: * $Header: /cvsroot/mvnforum/mvnforum/src/com/mvnforum/search/attachment/AttachmentIndexer.java,v 1.9 2008/01/15 11:17:57 minhnn Exp $
003: * $Author: minhnn $
004: * $Revision: 1.9 $
005: * $Date: 2008/01/15 11:17:57 $
006: *
007: * ====================================================================
008: *
009: * Copyright (C) 2002-2007 by MyVietnam.net
010: *
011: * All copyright notices regarding mvnForum MUST remain
012: * intact in the scripts and in the outputted HTML.
013: * The "powered by" text/logo with a link back to
014: * http://www.mvnForum.com and http://www.MyVietnam.net in
015: * the footer of the pages MUST remain visible when the pages
016: * are viewed on the internet or intranet.
017: *
018: * This program is free software; you can redistribute it and/or modify
019: * it under the terms of the GNU General Public License as published by
020: * the Free Software Foundation; either version 2 of the License, or
021: * any later version.
022: *
023: * This program is distributed in the hope that it will be useful,
024: * but WITHOUT ANY WARRANTY; without even the implied warranty of
025: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
026: * GNU General Public License for more details.
027: *
028: * You should have received a copy of the GNU General Public License
029: * along with this program; if not, write to the Free Software
030: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
031: *
032: * Support can be obtained from support forums at:
033: * http://www.mvnForum.com/mvnforum/index
034: *
035: * Correspondence and Marketing Questions can be sent to:
036: * info at MyVietnam net
037: *
038: * @author: Minh Nguyen
039: * @author: Dejan Krsmanovic dejan_krsmanovic@yahoo.com
040: */
041: package com.mvnforum.search.attachment;
042:
043: import java.io.IOException;
044:
045: import net.myvietnam.mvncore.exception.SearchException;
046: import net.myvietnam.mvncore.util.DateUtil;
047: import net.myvietnam.mvncore.util.TimerUtil;
048:
049: import org.apache.commons.logging.Log;
050: import org.apache.commons.logging.LogFactory;
051: import org.apache.lucene.analysis.Analyzer;
052: import org.apache.lucene.analysis.standard.StandardAnalyzer;
053: import org.apache.lucene.document.DateTools;
054: import org.apache.lucene.document.Document;
055: import org.apache.lucene.document.Field;
056: import org.apache.lucene.document.DateTools.Resolution;
057: import org.apache.lucene.index.IndexReader;
058: import org.apache.lucene.index.IndexWriter;
059: import org.apache.lucene.index.Term;
060: import org.apache.lucene.store.Directory;
061:
062: import com.mvnforum.MVNForumFactoryConfig;
063: import com.mvnforum.db.AttachmentBean;
064: import com.mvnforum.service.MvnForumServiceFactory;
065: import com.mvnforum.service.SearchService;
066:
067: public class AttachmentIndexer {
068:
069: private static Log log = LogFactory.getLog(AttachmentIndexer.class);
070:
071: //Field names (used for indexing)
072: public static final String FIELD_ATTACHMENT_ID = "AttachmentID";
073: public static final String FIELD_ATTACHMENT_DESCRIPTION = "AttachmentDescription";
074: public static final String FIELD_ATTACHMENT_NAME = "AttachmentName";
075: public static final String FIELD_POST_ID = "PostID";
076: public static final String FIELD_FORUM_ID = "ForumID";
077: public static final String FIELD_ATTACHMENT_DATE = "AttachmentDate";
078:
079: private static Analyzer analyzer;
080:
081: private static long lastOptimizeTime = 0;
082:
083: static {
084: initializeAnalyzer();
085: }
086:
087: public static void scheduleAddAttachmentTask(
088: AttachmentBean atachmentBean) {
089: AddUpdateAttachmentIndexTask task = new AddUpdateAttachmentIndexTask(
090: atachmentBean,
091: AddUpdateAttachmentIndexTask.OPERATION_ADD);
092: TimerUtil.getInstance().schedule(task, 0);
093: }
094:
095: public static void scheduleUpdateAttachmentTask(
096: AttachmentBean attachmentBean) {
097: AddUpdateAttachmentIndexTask task = new AddUpdateAttachmentIndexTask(
098: attachmentBean,
099: AddUpdateAttachmentIndexTask.OPERATION_UPDATE);
100: TimerUtil.getInstance().schedule(task, 0);
101: }
102:
103: public static void scheduleDeleteAttachmentTask(int objectID) {
104: DeleteAttachmentIndexTask task = new DeleteAttachmentIndexTask(
105: objectID);
106: TimerUtil.getInstance().schedule(task, 0);
107: }
108:
109: public static void scheduleRebuildIndexTask() {
110: int maxAttachmentID = 0;
111: RebuildAttachmentIndexTask task = new RebuildAttachmentIndexTask(
112: maxAttachmentID);
113: TimerUtil.getInstance().schedule(task, 0);
114: }
115:
116: static Analyzer getAnalyzer() {
117: return analyzer;
118: }
119:
120: /**
121: * This class will load analyzer when starting. If specified analyzer class
122: * cannot be loaded then default analyzer will be used.
123: */
124: private static void initializeAnalyzer() {
125: String analyzerClassName = MVNForumFactoryConfig
126: .getLuceneAnalyzerClassName();
127: if ((analyzerClassName == null)
128: || (analyzerClassName.equals(""))) {
129: //create standard analyzer
130: //String[] stopWords = this.loadStopWords();
131: analyzer = new StandardAnalyzer();
132: log.debug("Using StandardAnalyzer for indexing");
133: } else {
134: //try to create specified analyzer
135: try {
136: log.debug("About to load Analyzer ["
137: + analyzerClassName + "] for indexing");
138: analyzer = (Analyzer) Class.forName(analyzerClassName)
139: .newInstance();
140: } catch (Exception e) {
141: log.warn("Cannot load " + analyzerClassName
142: + ". Loading StandardAnalyzer");
143: analyzer = new StandardAnalyzer();
144: }
145: }
146: }
147:
148: /**
149: * This method is used for getting new IndexWriter. It can create new index
150: * or add Attachment to existing index. Creating new index will delete previous so it
151: * should be used for rebuilding index.
152: * @param create - true if new index should be created.
153: * - false for adding attachments to existing index
154: * @return IndexWriter object that is used for adding attachments to index
155: */
156: static IndexWriter getIndexWriter(Directory directory,
157: boolean create) throws SearchException {
158:
159: IndexWriter writer = null;
160:
161: SearchService service = MvnForumServiceFactory
162: .getMvnForumService().getSearchService();
163: //If create = false, we will create IndexWriter with false argument
164: if (create == false) {
165: try {
166: writer = new IndexWriter(directory, analyzer, false);
167: if (service.saveAttachmentOnDisk()) {
168: writer.setUseCompoundFile(true);
169: }
170: return writer;
171: } catch (IOException e) {
172: log
173: .warn(
174: "Cannot open existed index. New index will be created.",
175: e);
176: //Ignore Exception. We will try to create index with true parameter
177: }
178: }
179: // We are here in two cases: We wanted to create new index or because
180: // index doesn't existed
181: try {
182: //This will create new index and delete existing
183: service.deleteContent(directory);
184: writer = new IndexWriter(directory, analyzer, true);// actually the directory should be 'create' = true
185: if (service.saveAttachmentOnDisk()) {
186: writer.setUseCompoundFile(true);
187: }
188: return writer;
189: } catch (IOException e) {
190: //@todo : localize me
191: log.error("IOException during get index writer", e);
192: throw new SearchException(
193: "Error while creating index writer");
194: }
195: }
196:
197: /**
198: * This method is used for adding single Attachment to index
199: * Note: this method does not close the writer
200: * @param AttachmentBean A Attachment that should be indexed
201: * @param writer IndexWriter that is used for storing
202: * @throws SearchException
203: */
204: static void doIndexAttachment(AttachmentBean attachmentBean,
205: IndexWriter writer) throws SearchException {
206:
207: if (attachmentBean == null)
208: return;
209: if ((attachmentBean.getAttachFilename() == null)
210: || attachmentBean.getAttachFilename().equals("")
211: || (attachmentBean.getAttachCreationDate() == null)) {
212: return;
213: }
214:
215: //Each Atachment will be represented as a document
216: Document attachmentDocument = new Document();
217: //Document has following fields that could be queried on
218: attachmentDocument.add(new Field(FIELD_ATTACHMENT_ID, Integer
219: .toString(attachmentBean.getAttachID()),
220: Field.Store.YES, Field.Index.UN_TOKENIZED));
221: attachmentDocument.add(new Field(FIELD_ATTACHMENT_NAME,
222: attachmentBean.getAttachFilename(), Field.Store.YES,
223: Field.Index.TOKENIZED));
224: attachmentDocument.add(new Field(FIELD_ATTACHMENT_DESCRIPTION,
225: attachmentBean.getAttachDesc(), Field.Store.YES,
226: Field.Index.TOKENIZED));
227: attachmentDocument.add(new Field(FIELD_POST_ID, Integer
228: .toString(attachmentBean.getPostID()), Field.Store.YES,
229: Field.Index.UN_TOKENIZED));
230: attachmentDocument.add(new Field(FIELD_FORUM_ID, Integer
231: .toString(attachmentBean.getForumID()),
232: Field.Store.YES, Field.Index.UN_TOKENIZED));
233: attachmentDocument.add(new Field(FIELD_ATTACHMENT_DATE,
234: DateTools.dateToString(attachmentBean
235: .getAttachCreationDate(),
236: Resolution.MILLISECOND), Field.Store.YES,
237: Field.Index.UN_TOKENIZED));
238:
239: //now we have created document with fields so we can store it
240: try {
241: writer.addDocument(attachmentDocument);
242: } catch (IOException e) {
243: log.error("AtachmentIndexer.doIndexAtachment failed", e);
244: //@todo : localize me
245: throw new SearchException(
246: "Error writing new attachment to index");
247: } catch (Throwable e) {
248: log.error("AtachmentIndexer.doIndexAtachment failed", e);
249: //@todo : localize me
250: throw new SearchException(
251: "Error writing new Atachment to index");
252: }
253: }
254:
255: /**
256: * Add single Attachment to index
257: * @param AttachmentBean
258: * @throws SearchException
259: */
260: static void addAttachmentToIndex(AttachmentBean attachmentBean)
261: throws SearchException, IOException {
262:
263: Directory directory = null;
264: IndexWriter writer = null;
265: SearchService service = MvnForumServiceFactory
266: .getMvnForumService().getSearchService();
267: try {
268: directory = service.getSearchAttachmentIndexDir();
269: writer = getIndexWriter(directory, false);
270: if (writer == null) {
271: log.warn("Cannot get the IndexWriter");
272: return;
273: }
274: doIndexAttachment(attachmentBean, writer);
275:
276: // now check if we should optimize index (each hour)
277: long now = System.currentTimeMillis();
278: long timeFromLastOptimize = now - lastOptimizeTime;
279:
280: if (service.saveAttachmentOnDisk()
281: && (timeFromLastOptimize > DateUtil.HOUR)) {
282: log
283: .debug("writer.optimize() called in addAttachmentToIndex");
284: writer.optimize();
285: lastOptimizeTime = now;
286: }
287: } catch (SearchException ex) {
288: throw ex;
289: } finally {
290: if (writer != null) {
291: try {
292: writer.close();
293: } catch (IOException e) {
294: log.debug("Error closing Lucene IndexWriter", e);
295: }
296: }
297: if (directory != null) {
298: try {
299: directory.close();
300: } catch (IOException e) {
301: log.debug("Cannot close directory.", e);
302: }
303: }
304: }
305: }
306:
307: /**
308: * This method is used for deleting Attachment from index.
309: * @param AttachmentID is id of the Attachment that should be deleted
310: * @throws SearchException
311: */
312: static void deleteAttachmentFromIndex(int attachmentID)
313: throws SearchException {
314:
315: Directory directory = null;
316: IndexReader reader = null;
317: try {
318: SearchService service = MvnForumServiceFactory
319: .getMvnForumService().getSearchService();
320: directory = service.getSearchAttachmentIndexDir();
321: reader = IndexReader.open(directory);
322: if (reader == null) {
323: log.warn("Cannot get the IndexReader");
324: return;
325: }
326:
327: Term term = new Term(FIELD_ATTACHMENT_ID, String
328: .valueOf(attachmentID));
329: int deletedCount = reader.deleteDocuments(term);
330: log
331: .debug("deleteAttachmentFromIndex: deleted attachment = "
332: + deletedCount);
333: } catch (IOException e) {
334: //@todo : localize me
335: throw new SearchException(
336: "Error trying to delete Attachment with attachmentID = "
337: + attachmentID);
338: } finally {
339: if (reader != null) {
340: try {
341: reader.close();
342: } catch (IOException e) {
343: log.debug("Error closing Lucene IndexReader", e);
344: }
345: }
346: if (directory != null) {
347: try {
348: directory.close();
349: } catch (IOException e) {
350: log.debug("Cannot close directory.", e);
351: }
352: }
353: }
354: }
355:
356: public static int getNumDocs() {
357:
358: int numDocs = -1;
359: Directory directory = null;
360: IndexReader reader = null;
361: try {
362: SearchService service = MvnForumServiceFactory
363: .getMvnForumService().getSearchService();
364: directory = service.getSearchAttachmentIndexDir();
365: reader = IndexReader.open(directory);
366: if (reader == null) {
367: log.warn("Cannot get the IndexReader");
368: return -1;
369: }
370: numDocs = reader.numDocs();
371: } catch (IOException ioe) {
372: //ignore
373: ioe.printStackTrace();
374: } finally {
375: if (reader != null) {
376: try {
377: reader.close();
378: } catch (IOException e) {
379: log.debug("Error closing Lucene IndexReader", e);
380: }
381: }
382: if (directory != null) {
383: try {
384: directory.close();
385: } catch (IOException e) {
386: log.debug("Cannot close directory.", e);
387: }
388: }
389: }
390: return numDocs;
391: }
392:
393: }
|