001: /*
002: * Copyright (c) JForum Team
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms,
006: * with or without modification, are permitted provided
007: * that the following conditions are met:
008: *
009: * 1) Redistributions of source code must retain the above
010: * copyright notice, this list of conditions and the
011: * following disclaimer.
012: * 2) Redistributions in binary form must reproduce the
013: * above copyright notice, this list of conditions and
014: * the following disclaimer in the documentation and/or
015: * other materials provided with the distribution.
016: * 3) Neither the name of "Rafael Steil" nor
017: * the names of its contributors may be used to endorse
018: * or promote products derived from this software without
019: * specific prior written permission.
020: *
021: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
022: * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
023: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
024: * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
025: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR
026: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
027: * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
028: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
029: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES
030: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
031: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
032: * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
033: * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
034: * IN CONTRACT, STRICT LIABILITY, OR TORT
035: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
036: * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
037: * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
038: *
039: * Created on 06/08/2007 15:20:23
040: *
041: * The JForum Project
042: * http://www.jforum.net
043: */
044: package net.jforum.search;
045:
046: import java.io.IOException;
047: import java.util.Iterator;
048: import java.util.List;
049:
050: import net.jforum.JForumExecutionContext;
051: import net.jforum.dao.DataAccessDriver;
052: import net.jforum.dao.LuceneDAO;
053: import net.jforum.entities.Post;
054: import net.jforum.exceptions.ForumException;
055: import net.jforum.util.preferences.ConfigKeys;
056: import net.jforum.util.preferences.SystemGlobals;
057:
058: import org.apache.lucene.search.IndexSearcher;
059:
060: /**
061: * @author Rafael Steil
062: * @version $Id: LuceneReindexer.java,v 1.5 2007/09/09 16:43:55 rafaelsteil Exp $
063: */
064: public class LuceneReindexer {
065: private LuceneSettings settings;
066: private LuceneReindexArgs args;
067: private boolean recreate;
068:
069: public LuceneReindexer(LuceneSettings settings,
070: LuceneReindexArgs args, boolean recreate) {
071: this .args = args;
072: this .recreate = recreate;
073: this .settings = settings;
074: }
075:
076: public void startProcess() {
077: this .reindex();
078: }
079:
080: public void startBackgroundProcess() {
081: Runnable indexingJob = new Runnable() {
082: public void run() {
083: reindex();
084: }
085: };
086:
087: SystemGlobals.setValue(ConfigKeys.LUCENE_CURRENTLY_INDEXING,
088: "1");
089:
090: Thread thread = new Thread(indexingJob);
091: thread.start();
092: }
093:
094: private void reindex() {
095: try {
096: if (recreate) {
097: this .settings.createIndexDirectory(SystemGlobals
098: .getValue(ConfigKeys.LUCENE_INDEX_WRITE_PATH));
099: }
100: } catch (IOException e) {
101: throw new ForumException(e);
102: }
103:
104: LuceneDAO dao = DataAccessDriver.getInstance().newLuceneDAO();
105:
106: IndexSearcher searcher = null;
107: LuceneSearch luceneSearch = ((LuceneManager) SearchFacade
108: .manager()).luceneSearch();
109: LuceneIndexer luceneIndexer = ((LuceneManager) SearchFacade
110: .manager()).luceneIndexer();
111:
112: int fetchCount = SystemGlobals
113: .getIntValue(ConfigKeys.LUCENE_INDEXER_DB_FETCH_COUNT);
114:
115: try {
116: if (!recreate) {
117: searcher = new IndexSearcher(this .settings.directory());
118: }
119:
120: boolean hasMorePosts = true;
121: long processStart = System.currentTimeMillis();
122:
123: int firstPostId = args.filterByMessage() ? args
124: .getFirstPostId() : dao.firstPostIdByDate(args
125: .getFromDate());
126:
127: int lastPostId = args.filterByMessage() ? args
128: .getLastPostId() : dao.lastPostIdByDate(args
129: .getToDate());
130:
131: int counter = 1;
132: int indexTotal = 0;
133: long indexRangeStart = System.currentTimeMillis();
134:
135: while (hasMorePosts) {
136: boolean contextFinished = false;
137:
138: int toPostId = firstPostId + fetchCount < lastPostId ? firstPostId
139: + fetchCount
140: : lastPostId;
141:
142: try {
143: JForumExecutionContext ex = JForumExecutionContext
144: .get();
145: JForumExecutionContext.set(ex);
146:
147: List l = dao.getPostsToIndex(firstPostId, toPostId);
148:
149: if (counter >= 5000) {
150: long end = System.currentTimeMillis();
151: System.out
152: .println("Indexed ~5000 documents in "
153: + (end - indexRangeStart)
154: + " ms (" + indexTotal
155: + " so far)");
156: indexRangeStart = end;
157: counter = 0;
158: }
159:
160: JForumExecutionContext.finish();
161: contextFinished = true;
162:
163: for (Iterator iter = l.iterator(); iter.hasNext();) {
164: if ("0"
165: .equals(SystemGlobals
166: .getValue(ConfigKeys.LUCENE_CURRENTLY_INDEXING))) {
167: hasMorePosts = false;
168: break;
169: }
170:
171: Post post = (Post) iter.next();
172:
173: if (!recreate && args.avoidDuplicatedRecords()) {
174: if (luceneSearch.findDocumentByPostId(post
175: .getId()) != null) {
176: continue;
177: }
178: }
179:
180: luceneIndexer.batchCreate(post);
181:
182: counter++;
183: indexTotal++;
184: }
185:
186: firstPostId += fetchCount;
187: hasMorePosts = hasMorePosts && l.size() > 0;
188: } finally {
189: if (!contextFinished) {
190: JForumExecutionContext.finish();
191: }
192: }
193: }
194:
195: long end = System.currentTimeMillis();
196:
197: System.out.println("**** Total: " + (end - processStart)
198: + " ms");
199: } catch (IOException e) {
200: throw new ForumException(e);
201: } finally {
202: SystemGlobals.setValue(
203: ConfigKeys.LUCENE_CURRENTLY_INDEXING, "0");
204:
205: luceneIndexer.flushRAMDirectory();
206:
207: if (searcher != null) {
208: try {
209: searcher.close();
210: } catch (Exception e) {
211: }
212: }
213: }
214: }
215: }
|