001: /**
002: * Copyright (C) 2001 Yasna.com. All rights reserved.
003: *
004: * ===================================================================
005: * The Apache Software License, Version 1.1
006: *
007: * Redistribution and use in source and binary forms, with or without
008: * modification, are permitted provided that the following conditions
009: * are met:
010: *
011: * 1. Redistributions of source code must retain the above copyright
012: * notice, this list of conditions and the following disclaimer.
013: *
014: * 2. Redistributions in binary form must reproduce the above copyright
015: * notice, this list of conditions and the following disclaimer in
016: * the documentation and/or other materials provided with the
017: * distribution.
018: *
019: * 3. The end-user documentation included with the redistribution,
020: * if any, must include the following acknowledgment:
021: * "This product includes software developed by
022: * Yasna.com (http://www.yasna.com)."
023: * Alternately, this acknowledgment may appear in the software itself,
024: * if and wherever such third-party acknowledgments normally appear.
025: *
026: * 4. The names "Yazd" and "Yasna.com" must not be used to
027: * endorse or promote products derived from this software without
028: * prior written permission. For written permission, please
029: * contact yazd@yasna.com.
030: *
031: * 5. Products derived from this software may not be called "Yazd",
032: * nor may "Yazd" appear in their name, without prior written
033: * permission of Yasna.com.
034: *
035: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
036: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
037: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
038: * DISCLAIMED. IN NO EVENT SHALL YASNA.COM OR
039: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
040: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
041: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
042: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
043: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
044: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
045: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
046: * SUCH DAMAGE.
047: * ====================================================================
048: *
049: * This software consists of voluntary contributions made by many
050: * individuals on behalf of Yasna.com. For more information
051: * on Yasna.com, please see <http://www.yasna.com>.
052: */
053:
054: /**
055: * Copyright (C) 2000 CoolServlets.com. All rights reserved.
056: *
057: * ===================================================================
058: * The Apache Software License, Version 1.1
059: *
060: * Redistribution and use in source and binary forms, with or without
061: * modification, are permitted provided that the following conditions
062: * are met:
063: *
064: * 1. Redistributions of source code must retain the above copyright
065: * notice, this list of conditions and the following disclaimer.
066: *
067: * 2. Redistributions in binary form must reproduce the above copyright
068: * notice, this list of conditions and the following disclaimer in
069: * the documentation and/or other materials provided with the
070: * distribution.
071: *
072: * 3. The end-user documentation included with the redistribution,
073: * if any, must include the following acknowledgment:
074: * "This product includes software developed by
075: * CoolServlets.com (http://www.coolservlets.com)."
076: * Alternately, this acknowledgment may appear in the software itself,
077: * if and wherever such third-party acknowledgments normally appear.
078: *
079: * 4. The names "Jive" and "CoolServlets.com" must not be used to
080: * endorse or promote products derived from this software without
081: * prior written permission. For written permission, please
082: * contact webmaster@coolservlets.com.
083: *
084: * 5. Products derived from this software may not be called "Jive",
085: * nor may "Jive" appear in their name, without prior written
086: * permission of CoolServlets.com.
087: *
088: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
089: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
090: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
091: * DISCLAIMED. IN NO EVENT SHALL COOLSERVLETS.COM OR
092: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
093: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
094: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
095: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
096: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
097: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
098: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
099: * SUCH DAMAGE.
100: * ====================================================================
101: *
102: * This software consists of voluntary contributions made by many
103: * individuals on behalf of CoolServlets.com. For more information
104: * on CoolServlets.com, please see <http://www.coolservlets.com>.
105: */package com.Yasna.forum.database;
106:
107: import java.sql.*;
108: import java.util.*;
109: import java.io.*;
110:
111: import org.apache.lucene.document.*;
112: import org.apache.lucene.analysis.*;
113: import org.apache.lucene.analysis.standard.*;
114: import org.apache.lucene.index.*;
115: import org.apache.lucene.store.*;
116:
117: import com.Yasna.forum.*;
118: import com.Yasna.util.*;
119:
120: /**
121: * Database implementation of SearchIndexer using the Lucene search package.
122: *
123: * Search indexes are stored in the "search" subdirectory of directory pointed to
124: * by the Yazd property "yazdHome".
125: */
126: public class DbSearchIndexer extends Thread implements SearchIndexer {
127:
128: /** DATABASE QUERIES **/
129: //private static final String MESSAGES_BEFORE_DATE =
130: // "SELECT messageID FROM yazdMessage WHERE modifiedDate < ?";
131: private static final String MESSAGES_BEFORE_DATE = "SELECT messageID, userID, yazdMessage.threadID, forumID, "
132: + "subject, body, yazdMessage.creationDate "
133: + "FROM yazdMessage, yazdThread WHERE yazdMessage.threadID=yazdThread.threadID "
134: + "AND yazdMessage.modifiedDate < ?";
135: private static final String MESSAGES_BEFORE_DATE_COUNT = "SELECT count(messageID) FROM yazdMessage WHERE modifiedDate < ?";
136: private static final String MESSAGES_SINCE_DATE = "SELECT messageID FROM yazdMessage WHERE modifiedDate > ? "
137: + "AND modifiedDate < ?";
138: private static final String MESSAGES_SINCE_DATE_COUNT = "SELECT count(messageID) FROM yazdMessage WHERE modifiedDate > ? "
139: + "AND modifiedDate < ?";
140: private static final String LOAD_MESSAGE = "SELECT subject, body, userID, yazdMessage.threadID, forumID, "
141: + "yazdMessage.creationDate FROM yazdMessage, yazdThread WHERE "
142: + "yazdMessage.threadID=yazdThread.threadID AND yazdMessage.messageID=?";
143:
144: /**
145: * Path to where index is stored.
146: */
147: private static String indexPath = null;
148:
149: /**
150: * Time constants (in milleseconds)
151: */
152: private static final long MINUTE = 1000 * 60;
153: private static final long HOUR = MINUTE * 60;
154:
155: /**
156: * Maintains the amount of time that should elapse until the next index.
157: */
158: private long updateInterval;
159:
160: /**
161: * Maintains the time that the last index took place.
162: */
163: private long lastIndexed;
164:
165: /**
166: * Indicates whether auto-indexing should be on or off. When on, an update
167: * will be run at the "updateInterval".
168: */
169: private boolean autoIndex = true;
170:
171: /**
172: * ForumFactory so that we can load message objects based on their ID.
173: */
174: private DbForumFactory factory;
175:
176: /**
177: * Lock so that only one indexing function can be executed at once. Not
178: * locking could impact the database integrity. Therefore, in a cluster of
179: * Yazd servers all pointed at the same db, only one indexer should be
180: * running once.
181: */
182: private Object indexLock = new Object();
183:
184: private static Analyzer analyzer = new StopAnalyzer();
185:
186: /**
187: * Creates a new DbSearchIndexer. It attempts to load properties for
188: * the update interval and when the last index occured from the Yazd
189: * properties then starts the indexing thread.
190: */
191: public DbSearchIndexer(DbForumFactory factory) {
192: this .factory = factory;
193:
194: //Default to performing updates ever 10 minutes.
195: updateInterval = 80 * MINUTE;
196: //If the update interval property exists, use that
197: String updInterval = PropertyManager
198: .getProperty("DbSearchIndexer.updateInterval");
199: try {
200: updateInterval = Long.parseLong(updInterval);
201: } catch (Exception e) { /* ignore */
202: }
203:
204: //Attempt to get the last updated time from the Yazd properties
205: String lastInd = PropertyManager
206: .getProperty("DbSearchIndexer.lastIndexed");
207: try {
208: lastIndexed = Long.parseLong(lastInd);
209: } catch (Exception e) {
210: //Something went wrong. Therefore, set lastIndexed far into the past
211: //so that we'll do a full index.
212: lastIndexed = 0;
213: }
214: //Make this a daemon thread.
215: this .setDaemon(true);
216: //Start the indexing thread.
217: start();
218: }
219:
220: public int getHoursUpdateInterval() {
221: return (int) (updateInterval / HOUR);
222: }
223:
224: public int getMinutesUpdateInterval() {
225: return (int) ((updateInterval - getHoursUpdateInterval() * HOUR) / MINUTE);
226: }
227:
228: public void setUpdateInterval(int minutes, int hours) {
229: updateInterval = (minutes * MINUTE) + (hours * HOUR);
230: //Save it to the properties
231: PropertyManager.setProperty("DbSearchIndexer.updateInterval",
232: "" + updateInterval);
233: }
234:
235: public java.util.Date getLastIndexedDate() {
236: return new java.util.Date(lastIndexed);
237: }
238:
239: public boolean isAutoIndexEnabled() {
240: return autoIndex;
241: }
242:
243: public void setAutoIndexEnabled(boolean value) {
244: autoIndex = value;
245: }
246:
247: public void addToIndex(ForumMessage message) {
248: //acquire the index lock so that no other indexing operations
249: //are performed.
250: synchronized (indexLock) {
251: IndexWriter writer = null;
252: try {
253: writer = getWriter(false);
254: addMessageToIndex(writer, message.getID(), message
255: .getUnfilteredSubject(), message
256: .getUnfilteredBody(),
257: message.getUser().getID(), message
258: .getForumThread().getID(), message
259: .getForumThread().getForum().getID(),
260: message.getCreationDate());
261: } catch (IOException ioe) {
262: ioe.printStackTrace();
263: } finally {
264: try {
265: writer.close();
266: } catch (Exception e) {
267: }
268: }
269: }
270: }
271:
272: public void removeFromIndex(ForumMessage message) {
273: //acquire the index lock so that no other indexing operations
274: //are performed.
275: synchronized (indexLock) {
276: try {
277: int[] toDelete = new int[] { message.getID() };
278: deleteMessagesFromIndex(toDelete);
279: } catch (IOException ioe) {
280: ioe.printStackTrace();
281: }
282: }
283: }
284:
285: public void updateIndex() {
286: //acquire the index lock so that no other indexing operations
287: //are performed.
288: synchronized (indexLock) {
289: long now = System.currentTimeMillis();
290: updateIndex(lastIndexed, now);
291: lastIndexed = now;
292: //Save the time as a Yazd property.
293: PropertyManager.setProperty("DbSearchIndexer.lastIndexed",
294: "" + lastIndexed);
295: }
296: }
297:
298: public void rebuildIndex() {
299: //acquire the index lock so that no other indexing operations
300: //are performed.
301: synchronized (indexLock) {
302: long now = System.currentTimeMillis();
303: rebuildIndex(now);
304: lastIndexed = now;
305: //Save the time as a Yazd property.
306: PropertyManager.setProperty("DbSearchIndexer.lastIndexed",
307: "" + lastIndexed);
308: }
309: }
310:
311: /**
312: * Indexing thread logic. It wakes up once a minute to see if any threaded
313: * action should take place.
314: */
315: public void run() {
316: while (true) {
317: //If auto indexing is on
318: if (autoIndex) {
319: long now = System.currentTimeMillis();
320: //If we want to re-index everything.
321: if (lastIndexed == 0) {
322: synchronized (indexLock) {
323: rebuildIndex(now);
324: lastIndexed = now;
325: //Save the time as a Yazd property.
326: PropertyManager.setProperty(
327: "DbSearchIndexer.lastIndexed", ""
328: + lastIndexed);
329: }
330: }
331: //We only want to do an update.
332: else {
333: long nextIndex = lastIndexed + updateInterval;
334: if (now > nextIndex) {
335: synchronized (indexLock) {
336: updateIndex(lastIndexed, now);
337: lastIndexed = now;
338: //Save the time as a Yazd property.
339: PropertyManager.setProperty(
340: "DbSearchIndexer.lastIndexed", ""
341: + lastIndexed);
342: }
343: }
344: }
345: }
346: //sleep for 1 minute and then check again.
347: try {
348: this .sleep(60000);
349: } catch (Exception e) {
350: e.printStackTrace();
351: }
352: }
353: }
354:
355: /**
356: * Indexes an indivual message. The connection is assumed to be open when
357: * passed in and will remain open after the method is done executing.
358: */
359: protected final void addMessageToIndex(IndexWriter writer,
360: int messageID, String subject, String body, int userID,
361: int threadID, int forumID, java.util.Date creationDate)
362: throws IOException {
363: if (writer == null) {
364: return;
365: }
366: //Ignore messages with a null subject or body.
367: if (subject == null || body == null) {
368: return;
369: }
370:
371: Document doc = new Document();
372: doc
373: .add(Field.Keyword("messageID", Integer
374: .toString(messageID)));
375: doc.add(new Field("userID", Integer.toString(userID), false,
376: true, false));
377: doc.add(new Field("threadID", Integer.toString(threadID),
378: false, true, false));
379: doc.add(new Field("forumID", Integer.toString(forumID), false,
380: true, false));
381: doc.add(new Field("Indexer", "FORUMS", false, true, false));
382: doc.add(Field.UnStored("subject", subject));
383: doc.add(Field.UnStored("body", body));
384: doc.add(new Field("creationDate", DateField
385: .dateToString(creationDate), false, true, false));
386:
387: writer.addDocument(doc);
388: }
389:
390: /**
391: * Deletes a message from the index.
392: */
393: protected final void deleteMessagesFromIndex(int[] messages)
394: throws IOException {
395: if (messages == null) {
396: return;
397: }
398: IndexReader reader = getReader();
399: if (reader == null) {
400: //Reader will be null if the search index doesn't exist.
401: return;
402: }
403: Term messageIDTerm;
404: for (int i = 0; i < messages.length; i++) {
405: messageIDTerm = new Term("messageID", Integer
406: .toString(messages[i]));
407: try {
408: reader.delete(messageIDTerm);
409: } catch (Exception e) {
410: }
411: }
412: try {
413: reader.close();
414: } catch (Exception e) {
415: }
416: }
417:
418: /**
419: * Rebuilds the search index from scratch. It deletes the entire index
420: * and word tables and then indexes every message up to the end time.
421: */
422: protected final void rebuildIndex(long end) {
423: System.err.println("Rebuilding index...");
424:
425: IndexWriter writer = null;
426: Connection con = null;
427: try {
428: writer = getWriter(true);
429: con = DbConnectionManager.getConnection();
430: PreparedStatement pstmt = con
431: .prepareStatement(MESSAGES_BEFORE_DATE);
432: pstmt.setString(1, Long.toString(end));
433: ResultSet rs = pstmt.executeQuery();
434: while (rs.next()) {
435: int messageID = rs.getInt(1);
436: int userID = rs.getInt(2);
437: int threadID = rs.getInt(3);
438: int forumID = rs.getInt(4);
439: String subject = rs.getString(5);
440: String body = rs.getString(6);
441: java.util.Date creationDate = new java.util.Date(Long
442: .parseLong(rs.getString(7).trim()));
443: //ForumMessage message = new DbForumMessage(messageID, factory);// factory.getMessage(messageID);
444: addMessageToIndex(writer, messageID, subject, body,
445: userID, threadID, forumID, creationDate);
446: }
447: pstmt.close();
448: } catch (Exception sqle) {
449: sqle.printStackTrace();
450: } finally {
451: try {
452: con.close();
453: } catch (Exception e) {
454: e.printStackTrace();
455: }
456: try {
457: //A rebuild of the index warrants calling optimize.
458: writer.optimize();
459: } catch (Exception e) {
460: }
461: try {
462: writer.close();
463: } catch (Exception e) {
464: e.printStackTrace();
465: }
466: }
467: System.err.println("Done rebuilding index.");
468: }
469:
470: /**
471: * Updates the index. It first deletes any messages in the index between
472: * the start and end times, and then adds all messages to the index that
473: * are between the start and end times.
474: */
475: protected final void updateIndex(long start, long end) {
476: Connection con = null;
477: PreparedStatement pstmt = null;
478: IndexWriter writer = null;
479: int[] messages = null;
480:
481: try {
482: con = DbConnectionManager.getConnection();
483: //For a clean update, we need to make sure that we first delete
484: //any index entries that were made since we last updated. This
485: //might happen if a process was calling indexMessage() between runs
486: //of this method. For this reason, the two types of indexing (manual
487: //and automatic) should not be intermixed. However, we still perform
488: //this deletion to be safe.
489: pstmt = con.prepareStatement(MESSAGES_SINCE_DATE_COUNT);
490: pstmt.setString(1, Long.toString(start));
491: pstmt.setString(2, Long.toString(end));
492: ResultSet rs = pstmt.executeQuery();
493: rs.next();
494: int messageCount = rs.getInt(1);
495: messages = new int[messageCount];
496: pstmt.close();
497: pstmt = con.prepareStatement(MESSAGES_SINCE_DATE);
498: pstmt.setString(1, Long.toString(start));
499: pstmt.setString(2, Long.toString(end));
500: rs = pstmt.executeQuery();
501: for (int i = 0; i < messages.length; i++) {
502: rs.next();
503: messages[i] = rs.getInt("messageID");
504: }
505: } catch (Exception e) {
506: e.printStackTrace();
507: } finally {
508: try {
509: pstmt.close();
510: } catch (Exception e) {
511: e.printStackTrace();
512: }
513: try {
514: con.close();
515: } catch (Exception e) {
516: e.printStackTrace();
517: }
518: }
519:
520: try {
521: deleteMessagesFromIndex(messages);
522:
523: //Finally, index all new messages;
524: writer = getWriter(false);
525: for (int i = 0; i < messages.length; i++) {
526: ForumMessage message = factory.getMessage(messages[i]);
527: addMessageToIndex(writer, message.getID(), message
528: .getUnfilteredSubject(), message
529: .getUnfilteredBody(),
530: message.getUser().getID(), message
531: .getForumThread().getID(), message
532: .getForumThread().getForum().getID(),
533: message.getCreationDate());
534: }
535: } catch (Exception e) {
536: e.printStackTrace();
537: } finally {
538: try {
539: writer.close();
540: } catch (Exception e) {
541: e.printStackTrace();
542: }
543: }
544: }
545:
546: /**
547: * Returns a Lucene IndexWriter.
548: */
549: private static IndexWriter getWriter(boolean create)
550: throws IOException {
551: if (indexPath == null) {
552: //Get path of where search index should be. It should be
553: //the search subdirectory of [yazdHome].
554: String yazdHome = PropertyManager.getProperty("yazdHome");
555: if (yazdHome == null) {
556: System.err
557: .println("ERROR: the yazdHome property is not set.");
558: throw new IOException(
559: "Unable to open index for searching "
560: + "because yazdHome was not set.");
561: }
562: indexPath = yazdHome + File.separator + "search";
563: }
564:
565: IndexWriter writer = null;
566:
567: //If create is true, we always want to create a new index writer.
568: if (create) {
569: try {
570: writer = new IndexWriter(indexPath, analyzer, true);
571: } catch (Exception e) {
572: System.err
573: .println("ERROR: Failed to create a new index writer.");
574: e.printStackTrace();
575: }
576: }
577: //Otherwise, use an existing index if it exists.
578: else {
579: if (indexExists(indexPath)) {
580: try {
581: writer = new IndexWriter(indexPath, analyzer, false);
582: } catch (Exception e) {
583: System.err
584: .println("ERROR: Failed to open an index writer.");
585: e.printStackTrace();
586: }
587: } else {
588: try {
589: writer = new IndexWriter(indexPath, analyzer, true);
590: } catch (Exception e) {
591: System.err
592: .println("ERROR: Failed to create a new index writer.");
593: e.printStackTrace();
594: }
595: }
596: }
597:
598: return writer;
599: }
600:
601: /**
602: * Returns a Lucene IndexReader.
603: */
604: private static IndexReader getReader() throws IOException {
605: if (indexPath == null) {
606: //Get path of where search index should be. It should be
607: //the search subdirectory of [yazdHome].
608: String yazdHome = PropertyManager.getProperty("yazdHome");
609: if (yazdHome == null) {
610: System.err
611: .println("ERROR: the yazdHome property is not set.");
612: throw new IOException(
613: "Unable to open index for searching "
614: + "because yazdHome was not set.");
615: }
616: indexPath = yazdHome + File.separator + "search";
617: }
618:
619: if (indexExists(indexPath)) {
620: IndexReader reader = IndexReader.open(indexPath);
621: return reader;
622: } else {
623: return null;
624: }
625: }
626:
627: /**
628: * Returns true if the search index exists at the specified path.
629: *
630: * @param indexPath the path to check for the search index at.
631: */
632: private static boolean indexExists(String indexPath) {
633: //Lucene always creates a file called "segments" -- if it exists, we
634: //assume that the search index exists.
635: File segments = new File(indexPath + File.separator
636: + "segments");
637: return segments.exists();
638: }
639: }
|