001: /*
002: * Copyright (c) JForum Team
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms,
006: * with or without modification, are permitted provided
007: * that the following conditions are met:
008: *
009: * 1) Redistributions of source code must retain the above
010: * copyright notice, this list of conditions and the
011: * following disclaimer.
012: * 2) Redistributions in binary form must reproduce the
013: * above copyright notice, this list of conditions and
014: * the following disclaimer in the documentation and/or
015: * other materials provided with the distribution.
016: * 3) Neither the name of "Rafael Steil" nor
017: * the names of its contributors may be used to endorse
018: * or promote products derived from this software without
019: * specific prior written permission.
020: *
021: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
022: * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
023: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
024: * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
025: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR
026: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
027: * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
028: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
029: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES
030: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
031: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
032: * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
033: * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
034: * IN CONTRACT, STRICT LIABILITY, OR TORT
035: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
036: * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
037: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
038: *
039: * Created on 18/07/2007 17:18:41
040: *
041: * The JForum Project
042: * http://www.jforum.net
043: */
044: package net.jforum.search;
045:
046: import java.io.IOException;
047: import java.util.ArrayList;
048: import java.util.Iterator;
049: import java.util.List;
050:
051: import net.jforum.entities.Post;
052: import net.jforum.exceptions.SearchException;
053: import net.jforum.util.preferences.ConfigKeys;
054: import net.jforum.util.preferences.SystemGlobals;
055:
056: import org.apache.log4j.Logger;
057: import org.apache.lucene.document.Document;
058: import org.apache.lucene.document.Field;
059: import org.apache.lucene.document.Field.Index;
060: import org.apache.lucene.document.Field.Store;
061: import org.apache.lucene.index.IndexReader;
062: import org.apache.lucene.index.IndexWriter;
063: import org.apache.lucene.index.Term;
064: import org.apache.lucene.store.Directory;
065: import org.apache.lucene.store.RAMDirectory;
066:
067: /**
068: * @author Rafael Steil
069: * @version $Id: LuceneIndexer.java,v 1.11 2007/09/01 05:46:53 rafaelsteil Exp $
070: */
071: public class LuceneIndexer {
072: private static final Logger logger = Logger
073: .getLogger(LuceneIndexer.class);
074: private static final Object MUTEX = new Object();
075:
076: private LuceneSettings settings;
077: private Directory ramDirectory;
078: private IndexWriter ramWriter;
079: private int ramNumDocs;
080: private List newDocumentAddedList = new ArrayList();
081:
082: public LuceneIndexer(LuceneSettings settings) {
083: this .settings = settings;
084: this .createRAMWriter();
085: }
086:
087: public void watchNewDocuDocumentAdded(NewDocumentAdded newDoc) {
088: this .newDocumentAddedList.add(newDoc);
089: }
090:
091: public void batchCreate(Post post) {
092: synchronized (MUTEX) {
093: try {
094: Document document = this .createDocument(post);
095: this .ramWriter.addDocument(document);
096: this .flushRAMDirectoryIfNecessary();
097: } catch (IOException e) {
098: throw new SearchException(e);
099: }
100: }
101: }
102:
103: private void createRAMWriter() {
104: try {
105: if (this .ramWriter != null) {
106: this .ramWriter.close();
107: }
108:
109: this .ramDirectory = new RAMDirectory();
110: this .ramWriter = new IndexWriter(this .ramDirectory,
111: this .settings.analyzer(), true);
112: this .ramNumDocs = SystemGlobals
113: .getIntValue(ConfigKeys.LUCENE_INDEXER_RAM_NUMDOCS);
114: } catch (IOException e) {
115: throw new SearchException(e);
116: }
117: }
118:
119: private void flushRAMDirectoryIfNecessary() {
120: if (this .ramWriter.docCount() >= this .ramNumDocs) {
121: this .flushRAMDirectory();
122: }
123: }
124:
125: public void flushRAMDirectory() {
126: synchronized (MUTEX) {
127: IndexWriter writer = null;
128:
129: try {
130: writer = new IndexWriter(this .settings.directory(),
131: this .settings.analyzer());
132: writer
133: .addIndexes(new Directory[] { this .ramDirectory });
134: writer.optimize();
135:
136: this .createRAMWriter();
137: } catch (IOException e) {
138: throw new SearchException(e);
139: } finally {
140: if (writer != null) {
141: try {
142: writer.flush();
143: writer.close();
144:
145: this .notifyNewDocumentAdded();
146: } catch (Exception e) {
147: }
148: }
149: }
150: }
151: }
152:
153: public void create(Post post) {
154: synchronized (MUTEX) {
155: IndexWriter writer = null;
156:
157: try {
158: writer = new IndexWriter(this .settings.directory(),
159: this .settings.analyzer());
160:
161: Document document = this .createDocument(post);
162: writer.addDocument(document);
163:
164: this .optimize(writer);
165:
166: if (logger.isDebugEnabled()) {
167: logger.debug("Indexed " + document);
168: }
169: } catch (Exception e) {
170: logger.error(e.toString(), e);
171: } finally {
172: if (writer != null) {
173: try {
174: writer.flush();
175: writer.close();
176:
177: this .notifyNewDocumentAdded();
178: } catch (Exception e) {
179: }
180: }
181: }
182: }
183: }
184:
185: public void update(Post post) {
186: if (this .performDelete(post)) {
187: this .create(post);
188: }
189: }
190:
191: private void optimize(IndexWriter writer) throws Exception {
192: if (writer.docCount() % 100 == 0) {
193: if (logger.isInfoEnabled()) {
194: logger
195: .info("Optimizing indexes. Current number of documents is "
196: + writer.docCount());
197: }
198:
199: writer.optimize();
200:
201: if (logger.isDebugEnabled()) {
202: logger.debug("Indexes optimized");
203: }
204: }
205: }
206:
207: private Document createDocument(Post p) {
208: Document d = new Document();
209:
210: d.add(new Field(SearchFields.Keyword.POST_ID, String.valueOf(p
211: .getId()), Store.YES, Index.UN_TOKENIZED));
212: d.add(new Field(SearchFields.Keyword.FORUM_ID, String.valueOf(p
213: .getForumId()), Store.YES, Index.UN_TOKENIZED));
214: d.add(new Field(SearchFields.Keyword.TOPIC_ID, String.valueOf(p
215: .getTopicId()), Store.YES, Index.UN_TOKENIZED));
216: d.add(new Field(SearchFields.Keyword.USER_ID, String.valueOf(p
217: .getUserId()), Store.YES, Index.UN_TOKENIZED));
218: d.add(new Field(SearchFields.Keyword.DATE, this .settings
219: .formatDateTime(p.getTime()), Store.YES,
220: Index.UN_TOKENIZED));
221:
222: // We add the subject and message text together because, when searching, we only care about the
223: // matches, not where it was performed. The real subject and contents will be fetched from the database
224: d.add(new Field(SearchFields.Indexed.CONTENTS, p.getSubject()
225: + " " + p.getText(), Store.NO, Index.TOKENIZED));
226:
227: return d;
228: }
229:
230: private void notifyNewDocumentAdded() {
231: for (Iterator iter = this .newDocumentAddedList.iterator(); iter
232: .hasNext();) {
233: ((NewDocumentAdded) iter.next()).newDocumentAdded();
234: }
235: }
236:
237: public void delete(Post p) {
238: this .performDelete(p);
239: }
240:
241: private boolean performDelete(Post p) {
242: synchronized (MUTEX) {
243: IndexReader reader = null;
244: boolean status = false;
245:
246: try {
247: reader = IndexReader.open(this .settings.directory());
248: reader.deleteDocuments(new Term(
249: SearchFields.Keyword.POST_ID, String.valueOf(p
250: .getId())));
251: status = true;
252: } catch (IOException e) {
253: logger.error(e.toString(), e);
254: } finally {
255: if (reader != null) {
256: try {
257: reader.close();
258: } catch (Exception e) {
259: }
260: }
261: }
262:
263: return status;
264: }
265: }
266: }
|