001: /*
002: * $Header: /cvsroot/mvnforum/mvnforum/src/com/mvnforum/search/post/PostIndexer.java,v 1.23 2008/01/15 11:17:57 minhnn Exp $
003: * $Author: minhnn $
004: * $Revision: 1.23 $
005: * $Date: 2008/01/15 11:17:57 $
006: *
007: * ====================================================================
008: *
009: * Copyright (C) 2002-2007 by MyVietnam.net
010: *
011: * All copyright notices regarding mvnForum MUST remain
012: * intact in the scripts and in the outputted HTML.
013: * The "powered by" text/logo with a link back to
014: * http://www.mvnForum.com and http://www.MyVietnam.net in
015: * the footer of the pages MUST remain visible when the pages
016: * are viewed on the internet or intranet.
017: *
018: * This program is free software; you can redistribute it and/or modify
019: * it under the terms of the GNU General Public License as published by
020: * the Free Software Foundation; either version 2 of the License, or
021: * any later version.
022: *
023: * This program is distributed in the hope that it will be useful,
024: * but WITHOUT ANY WARRANTY; without even the implied warranty of
025: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
026: * GNU General Public License for more details.
027: *
028: * You should have received a copy of the GNU General Public License
029: * along with this program; if not, write to the Free Software
030: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
031: *
032: * Support can be obtained from support forums at:
033: * http://www.mvnForum.com/mvnforum/index
034: *
035: * Correspondence and Marketing Questions can be sent to:
036: * info at MyVietnam net
037: *
038: * @author: Minh Nguyen
039: * @author: Dejan Krsmanovic dejan_krsmanovic@yahoo.com
040: */
041: package com.mvnforum.search.post;
042:
043: import java.io.IOException;
044:
045: import net.myvietnam.mvncore.exception.SearchException;
046: import net.myvietnam.mvncore.util.DateUtil;
047: import net.myvietnam.mvncore.util.TimerUtil;
048:
049: import org.apache.commons.logging.Log;
050: import org.apache.commons.logging.LogFactory;
051: import org.apache.lucene.analysis.Analyzer;
052: import org.apache.lucene.analysis.standard.StandardAnalyzer;
053: import org.apache.lucene.document.*;
054: import org.apache.lucene.document.DateTools.Resolution;
055: import org.apache.lucene.index.*;
056: import org.apache.lucene.store.Directory;
057:
058: import com.mvnforum.MVNForumFactoryConfig;
059: import com.mvnforum.db.PostBean;
060: import com.mvnforum.search.IntegerFilter;
061: import com.mvnforum.service.MvnForumServiceFactory;
062: import com.mvnforum.service.SearchService;
063:
064: public class PostIndexer {
065:
066: private static Log log = LogFactory.getLog(PostIndexer.class);
067:
068: //Field names (used for indexing)
069: public static final String FIELD_POST_ID = "postID";
070: public static final String FIELD_THREAD_ID = "threadID";
071: public static final String FIELD_FORUM_ID = "forumID";
072: public static final String FIELD_MEMBER_ID = "memberID";
073: public static final String FIELD_POST_TOPIC = "postTopic";
074: public static final String FIELD_POST_BODY = "postBody";
075: public static final String FIELD_POST_DATE = "postDate";
076:
077: public static final String FIELD_WITH_ATTACHMENT = "withAttachment";
078:
079: public static final String FIELD_ATTACHMENT_COUNT = "attachmentCount";
080:
081: //public static final String PROPERTY_SEARCH_PATH = "search.path";
082: //public static final String PROPERTY_SEARCH_AUTOINDEX = "search.autoindex";
083:
084: //Timer is used for scheduling jobs
085: private static Analyzer analyzer;
086:
087: private static long lastOptimizeTime = 0;
088:
089: static {
090: initializeAnalyzer();
091: }
092:
093: public static void scheduleAddPostTask(PostBean postBean) {
094: AddUpdatePostIndexTask task = new AddUpdatePostIndexTask(
095: postBean, AddUpdatePostIndexTask.OPERATION_ADD);
096: TimerUtil.getInstance().schedule(task, 0);
097: }
098:
099: public static void scheduleUpdatePostTask(PostBean postBean) {
100: AddUpdatePostIndexTask task = new AddUpdatePostIndexTask(
101: postBean, AddUpdatePostIndexTask.OPERATION_UPDATE);
102: TimerUtil.getInstance().schedule(task, 0);
103: }
104:
105: public static void scheduleDeletePostTask(int objectID,
106: int objectType) {
107: DeletePostIndexTask task = new DeletePostIndexTask(objectID,
108: objectType);
109: TimerUtil.getInstance().schedule(task, 0);
110: }
111:
112: public static void scheduleUpdateThreadTask(int threadID) {
113: UpdateThreadIndexTask task = new UpdateThreadIndexTask(threadID);
114: TimerUtil.getInstance().schedule(task, 0);
115: }
116:
117: public static void scheduleRebuildIndexTask() {
118: int maxPostID = 0;
119: RebuildPostIndexTask task = new RebuildPostIndexTask(maxPostID);
120: TimerUtil.getInstance().schedule(task, 0);
121: }
122:
123: static Analyzer getAnalyzer() {
124: return analyzer;
125: }
126:
127: /**
128: * This class will load analyzer when starting. If specified analyzer class
129: * cannot be loaded then default analyzer will be used.
130: */
131: private static void initializeAnalyzer() {
132: String analyzerClassName = MVNForumFactoryConfig
133: .getLuceneAnalyzerClassName();
134: if ((analyzerClassName == null)
135: || (analyzerClassName.equals(""))) {
136: //create standard analyzer
137: //String[] stopWords = this.loadStopWords();
138: analyzer = new StandardAnalyzer();
139: log.debug("Using StandardAnalyzer for indexing");
140: } else {
141: //try to create specified analyzer
142: try {
143: log.debug("About to load Analyzer ["
144: + analyzerClassName + "] for indexing");
145: analyzer = (Analyzer) Class.forName(analyzerClassName)
146: .newInstance();
147: } catch (Exception e) {
148: log.warn("Cannot load " + analyzerClassName
149: + ". Loading StandardAnalyzer");
150: analyzer = new StandardAnalyzer();
151: }
152: }
153: }
154:
155: /**
156: * This method is used for getting new IndexWriter. It can create new index
157: * or add post to existing index. Creating new index will delete previous so it
158: * should be used for rebuilding index.
159: * @param create - true if new index should be created.
160: * - false for adding posts to existing index
161: * @return IndexWriter object that is used for adding posts to index
162: */
163: static IndexWriter getIndexWriter(Directory directory,
164: boolean create) throws SearchException {
165:
166: IndexWriter writer = null;
167:
168: SearchService service = MvnForumServiceFactory
169: .getMvnForumService().getSearchService();
170: //If create = false, we will create IndexWriter with false argument
171: if (create == false) {
172: try {
173: writer = new IndexWriter(directory, analyzer, false);
174: //writer.setWriteLockTimeout(100000);
175: if (service.savePostOnDisk()) {
176: writer.setUseCompoundFile(true);
177: }
178: return writer;
179: } catch (IOException e) {
180: log
181: .warn(
182: "Cannot open existed index. New index will be created.",
183: e);
184: //Ignore Exception. We will try to create index with true parameter
185: }
186: }
187: // We are here in two cases: We wanted to create new index or because
188: // index doesn't existed
189: try {
190: //This will create new index and delete existing
191: service.deleteContent(directory);
192: writer = new IndexWriter(directory, analyzer, true);// actually the directory should be 'create' = true
193: //writer.setWriteLockTimeout(100000);
194: if (service.savePostOnDisk()) {
195: writer.setUseCompoundFile(true);
196: }
197: return writer;
198: } catch (IOException e) {
199: //@todo : localize me
200: log.error("IOException during get index writer", e);
201: throw new SearchException(
202: "Error while creating index writer");
203: }
204: }
205:
206: /**
207: * This method is used for adding single post to index
208: * Note: this method does not close the writer
209: * @param post A post that should be indexed
210: * @param writer IndexWriter that is used for storing
211: * @throws SearchException
212: */
213: static void doIndexPost(PostBean post, IndexWriter writer)
214: throws SearchException {
215:
216: if (post == null)
217: return;
218: //Post must include topic and body. If not then we have nothing to index.
219: if ((post.getPostTopic() == null || post.getPostTopic().equals(
220: ""))
221: || (post.getPostBody() == null || post.getPostBody()
222: .equals(""))) {
223: return;
224: }
225:
226: //Each post will be represented as a document
227: Document postDocument = new Document();
228: //Document has following fields that could be queried on
229: postDocument.add(new Field(FIELD_POST_ID, Integer.toString(post
230: .getPostID()), Field.Store.YES,
231: Field.Index.UN_TOKENIZED));
232: postDocument.add(new Field(FIELD_THREAD_ID, Integer
233: .toString(post.getThreadID()), Field.Store.YES,
234: Field.Index.UN_TOKENIZED));
235: postDocument.add(new Field(FIELD_FORUM_ID, Integer
236: .toString(post.getForumID()), Field.Store.YES,
237: Field.Index.UN_TOKENIZED));
238: postDocument.add(new Field(FIELD_MEMBER_ID, Integer
239: .toString(post.getMemberID()), Field.Store.YES,
240: Field.Index.UN_TOKENIZED));
241: postDocument.add(new Field(FIELD_WITH_ATTACHMENT, new Boolean(
242: post.getPostAttachCount() > 0).toString(),
243: Field.Store.YES, Field.Index.UN_TOKENIZED));// make it compilable on JDK 1.3
244: postDocument.add(new Field(FIELD_ATTACHMENT_COUNT,
245: IntegerFilter.intToString(post.getPostAttachCount()),
246: Field.Store.YES, Field.Index.UN_TOKENIZED));
247: //postDocument.add(new Field(FIELD_ATTACHMENT_COUNT, Integer.toString(post.getPostAttachCount()), Field.Store.YES, Field.Index.UN_TOKENIZED));
248:
249: //document body and title is not stored since we can retrieve them from database
250: postDocument
251: .add(new Field(FIELD_POST_TOPIC, post.getPostTopic(),
252: Field.Store.NO, Field.Index.TOKENIZED));
253: postDocument.add(new Field(FIELD_POST_BODY, post.getPostBody(),
254: Field.Store.NO, Field.Index.TOKENIZED));
255: //add date field
256: postDocument.add(new Field(FIELD_POST_DATE, DateTools
257: .dateToString(post.getPostCreationDate(),
258: Resolution.MILLISECOND), Field.Store.YES,
259: Field.Index.UN_TOKENIZED));
260:
261: //now we have created document with fields so we can store it
262: try {
263: writer.addDocument(postDocument);
264: } catch (IOException e) {
265: log.error("PostIndexer.doIndexPost failed", e);
266: //@todo : localize me
267: throw new SearchException("Error writing new post to index");
268: } catch (Throwable e) {
269: log.error("PostIndexer.doIndexPost failed", e);
270: //@todo : localize me
271: throw new SearchException("Error writing new post to index");
272: }
273: }
274:
275: /**
276: * Add single post to index
277: * @param post
278: * @throws SearchException
279: */
280: static void addPostToIndex(PostBean post) throws SearchException,
281: IOException {
282:
283: Directory directory = null;
284: IndexWriter writer = null;
285: SearchService service = MvnForumServiceFactory
286: .getMvnForumService().getSearchService();
287: try {
288: directory = service.getSearchPostIndexDir();
289: writer = getIndexWriter(directory, false);
290: if (writer == null) {
291: log.warn("Cannot get the IndexWriter");
292: return;
293: }
294: doIndexPost(post, writer);
295:
296: // now check if we should optimize index (each hour)
297: long now = System.currentTimeMillis();
298: long timeFromLastOptimize = now - lastOptimizeTime;
299: if (service.savePostOnDisk()
300: && (timeFromLastOptimize > DateUtil.HOUR)) {
301: log.debug("writer.optimize() called in addPostToIndex");
302: writer.optimize();
303: lastOptimizeTime = now;
304: }
305: } catch (SearchException ex) {
306: throw ex;
307: } finally {
308: if (writer != null) {
309: try {
310: writer.close();
311: } catch (IOException e) {
312: log.debug("Error closing Lucene IndexWriter", e);
313: }
314: }
315: if (directory != null) {
316: try {
317: directory.close();
318: } catch (IOException e) {
319: log.debug("Cannot close directory.", e);
320: }
321: }
322: }
323: }
324:
325: /**
326: * This method is used for deleting post from index.
327: * @param postID id of the post that should be deleted
328: * @throws SearchException
329: */
330: static void deletePostFromIndex(int postID) throws SearchException {
331:
332: Directory directory = null;
333: IndexReader reader = null;
334: try {
335: SearchService service = MvnForumServiceFactory
336: .getMvnForumService().getSearchService();
337: directory = service.getSearchPostIndexDir();
338: reader = IndexReader.open(directory);
339: if (reader == null) {
340: log.warn("Cannot get the IndexReader");
341: return;
342: }
343:
344: Term term = new Term(FIELD_POST_ID, String.valueOf(postID));
345: int deletedCount = reader.deleteDocuments(term);
346: log.debug("deletePostFromIndex: deleted posts = "
347: + deletedCount);
348: } catch (IOException e) {
349: //@todo : localize me
350: throw new SearchException(
351: "Error trying to delete post with postID = "
352: + postID);
353: } finally {
354: if (reader != null) {
355: try {
356: reader.close();
357: } catch (IOException e) {
358: log.debug("Error closing Lucene IndexReader", e);
359: }
360: }
361: if (directory != null) {
362: try {
363: directory.close();
364: } catch (IOException e) {
365: log.debug("Cannot close directory.", e);
366: }
367: }
368: }
369: }
370:
371: /**
372: * This method is used for deleting all posts in a thread from index.
373: * @param threadID id of the thread that should be deleted
374: * @throws SearchException
375: */
376: static void deleteThreadFromIndex(int threadID)
377: throws SearchException {
378:
379: Directory directory = null;
380: IndexReader reader = null;
381: try {
382: SearchService service = MvnForumServiceFactory
383: .getMvnForumService().getSearchService();
384: directory = service.getSearchPostIndexDir();
385: reader = IndexReader.open(directory);
386: if (reader == null) {
387: log.warn("Cannot get the IndexReader");
388: return;
389: }
390:
391: Term term = new Term(FIELD_THREAD_ID, String
392: .valueOf(threadID));
393: int deletedCount = reader.deleteDocuments(term);
394: log.debug("deleteThreadFromIndex: deleted posts = "
395: + deletedCount);
396: } catch (IOException e) {
397: //@todo : localize me
398: throw new SearchException(
399: "Error trying to delete posts in index with threadID = "
400: + threadID);
401: } finally {
402: if (reader != null) {
403: try {
404: reader.close();
405: } catch (IOException e) {
406: log.debug("Error closing Lucene IndexReader", e);
407: }
408: }
409: if (directory != null) {
410: try {
411: directory.close();
412: } catch (IOException e) {
413: log.debug("Cannot close directory.", e);
414: }
415: }
416: }
417: }
418:
419: /**
420: * This method is used for deleting all posts in a forum from index.
421: * @param forumID id of the forum that should be deleted
422: * @throws SearchException
423: */
424: static void deleteForumFromIndex(int forumID)
425: throws SearchException {
426:
427: Directory directory = null;
428: IndexReader reader = null;
429: try {
430: SearchService service = MvnForumServiceFactory
431: .getMvnForumService().getSearchService();
432: directory = service.getSearchPostIndexDir();
433: reader = IndexReader.open(directory);
434: if (reader == null) {
435: log.warn("Cannot get the IndexReader");
436: return;
437: }
438:
439: Term term = new Term(FIELD_FORUM_ID, String
440: .valueOf(forumID));
441: int deletedCount = reader.deleteDocuments(term);
442: log.debug("deleteForumFromIndex: deleted posts = "
443: + deletedCount);
444: } catch (IOException e) {
445: //@todo : localize me
446: throw new SearchException(
447: "Error trying to delete posts in index with forumID = "
448: + forumID);
449: } finally {
450: if (reader != null) {
451: try {
452: reader.close();
453: } catch (IOException e) {
454: log.debug("Error closing Lucene IndexReader", e);
455: }
456: }
457: if (directory != null) {
458: try {
459: directory.close();
460: } catch (IOException e) {
461: log.debug("Cannot close directory.", e);
462: }
463: }
464: }
465: }
466:
467: public static int getNumDocs() {
468:
469: int numDocs = -1;
470: Directory directory = null;
471: IndexReader reader = null;
472: try {
473: SearchService service = MvnForumServiceFactory
474: .getMvnForumService().getSearchService();
475: directory = service.getSearchPostIndexDir();
476: reader = IndexReader.open(directory);
477: if (reader == null) {
478: log.warn("Cannot get the IndexReader");
479: return -1;
480: }
481: numDocs = reader.numDocs();
482: } catch (IOException ioe) {
483: //ignore
484: ioe.printStackTrace();
485: } finally {
486: if (reader != null) {
487: try {
488: reader.close();
489: } catch (IOException e) {
490: log.debug("Error closing Lucene IndexReader", e);
491: }
492: }
493: if (directory != null) {
494: try {
495: directory.close();
496: } catch (IOException e) {
497: log.debug("Cannot close directory.", e);
498: }
499: }
500: }
501: return numDocs;
502: }
503:
504: }
|