001: /*
002: * This program is free software; you can redistribute it and/or modify
003: * it under the terms of the GNU General Public License as published by
004: * the Free Software Foundation; either version 2 of the License, or
005: * (at your option) any later version.
006: *
007: * This program is distributed in the hope that it will be useful,
008: * but WITHOUT ANY WARRANTY; without even the implied warranty of
009: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
010: * GNU Library General Public License for more details.
011: *
012: * You should have received a copy of the GNU General Public License
013: * along with this program; if not, write to the Free Software
014: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
015: */
016: package dlog4j.search;
017:
018: import java.io.File;
019: import java.io.FileInputStream;
020: import java.io.FileOutputStream;
021: import java.io.IOException;
022: import java.io.InputStream;
023: import java.io.OutputStream;
024: import java.util.Date;
025: import java.util.Iterator;
026: import java.util.Properties;
027:
028: import javax.servlet.ServletException;
029:
030: import net.sf.hibernate.Query;
031: import net.sf.hibernate.Session;
032:
033: import org.apache.commons.lang.StringUtils;
034: import org.apache.commons.logging.Log;
035: import org.apache.commons.logging.LogFactory;
036: import org.apache.lucene.analysis.Analyzer;
037: import org.apache.lucene.analysis.standard.StandardAnalyzer;
038: import org.apache.lucene.document.DateField;
039: import org.apache.lucene.document.Document;
040: import org.apache.lucene.document.Field;
041: import org.apache.lucene.index.IndexWriter;
042: import org.apache.struts.action.PlugIn;
043: import org.apache.struts.action.ActionServlet;
044: import org.apache.struts.config.ModuleConfig;
045:
046: import dlog4j.ManagerBase;
047: import dlog4j.formbean.LogForm;
048: import dlog4j.formbean.ReplyForm;
049:
050: /**
051: * A search engine daemon using lucene implements struts's plugin
052: * 日记和评论的索引分开两个目录存放
053: * @author Liudong
054: */
055: public class SearchEnginePlugIn implements PlugIn, Runnable {
056:
057: /**
058: * analyzer提供了切词的方法。可替换。 建索引和搜索时,应该使用同一个analyzer。
059: */
060: private static Analyzer analyzer;
061:
062: protected static ActionServlet servlet = null;
063:
064: /* properties from plugin's struts-config.xml */
065: protected int activeInterval = 600;
066: protected static String logIndexPath = "/WEB-INF/log_index";
067: protected static String replyIndexPath = "/WEB-INF/reply_index";
068: protected String statusFile = "/WEB-INF/lastActiveTime.sav";
069:
070: protected String analyzerClass;
071: private Thread daemon;
072:
073: /**
074: * The webapp started.
075: * @see org.apache.struts.action.PlugIn#init(ActionServlet servlet, ModuleConfig config)
076: */
077: public void init(ActionServlet servlet, ModuleConfig config)
078: throws ServletException {
079: SearchEnginePlugIn.servlet = servlet;
080: if (analyzerClass == null)
081: analyzerClass = StandardAnalyzer.class.getName();
082: try {
083: analyzer = (Analyzer) Class.forName(analyzerClass)
084: .newInstance();
085: } catch (Exception e) {
086: servlet.log("Initialize Analyzer Failed.", e);
087: }
088: daemon = new Thread(this );
089: daemon.setDaemon(true);
090: daemon.start();
091: }
092:
093: /**
094: * The webapp is to be stop.
095: * @see org.apache.struts.action.PlugIn#destroy()
096: */
097: public void destroy() {
098: stop = true;
099: /* wait the daemon to terminal */
100: try {
101: Thread.sleep(1000);
102: } catch (InterruptedException e) {
103: }
104: }
105:
106: /**
107: * 得到日记索引的Writter
108: * @return
109: * @throws IOException
110: */
111: protected IndexWriter getLogIndexWriter() throws IOException {
112: String logPath = getLogIndexPath();
113: File rp = new File(logPath);
114: if (!rp.exists())
115: rp.mkdirs();
116: File segments = new File(logPath + File.separator + "segments");
117: boolean bCreate = !segments.exists();
118: return new IndexWriter(logPath, analyzer, bCreate);
119: }
120:
121: /**
122: * 得到评论索引的Writter
123: * @return
124: * @throws IOException
125: */
126: protected IndexWriter getReplyIndexWriter() throws IOException {
127: String replyPath = getReplyIndexPath();
128: File rp = new File(replyPath);
129: if (!rp.exists())
130: rp.mkdirs();
131: File segments = new File(replyPath + File.separator
132: + "segments");
133: boolean bCreate = !segments.exists();
134: return new IndexWriter(replyPath, analyzer, bCreate);
135: }
136:
137: /**
138: * 构建某个时间戳后的所有日记信息的索引
139: * @param lastLogTime
140: * @return 返回新增日记索引的数目
141: * @throws SQLException
142: */
143: protected int buildLogIndex(IndexWriter writer, LastInfo lastInfo)
144: throws Exception {
145: Session ssn = ManagerBase.getSession();
146: int logCount = 0;
147: try {
148: Date begin = new Date(lastInfo.lastLogTime);
149: String hql = "FROM "
150: + LogForm.class.getName()
151: + " AS log WHERE log.logTime>? ORDER BY log.logTime ASC";
152: Query query = ssn.createQuery(hql);
153: query.setDate(0, new Date(lastInfo.lastLogTime));
154: Iterator logs = query.list().iterator();
155: while (logs.hasNext()) {
156: LogForm log = (LogForm) logs.next();
157: if (!log.getLogTime().after(begin)
158: || log.getContent() == null)
159: continue;
160: Document doc = new Document();
161: doc.add(Field.Keyword("logId", Integer.toString(log
162: .getId())));
163: doc.add(new Field("author", log.getOwnerName(), false,
164: true, false));
165: doc.add(new Field("siteId", Integer.toString(log
166: .getSite().getId()), false, true, false));
167: doc.add(new Field("categoryId", Integer.toString(log
168: .getCategoryId()), false, true, false));
169: doc.add(Field.UnStored("title", StringUtils
170: .deleteWhitespace(log.getTitle())));
171: doc.add(Field.UnStored("content", log.getContent()));
172: doc.add(new Field("logDate", DateField.dateToString(log
173: .getLogTime()), false, true, false));
174: writer.addDocument(doc);
175: logCount++;
176: //保存该日记的时间做为时间戳
177: lastInfo.lastLogTime = log.getLogTime().getTime();
178:
179: }
180: } finally {
181: ManagerBase.closeSession(ssn);
182: }
183: return logCount;
184: }
185:
186: /**
187: * 构建某个时间戳后的所有评论信息的索引
188: * @param lastLogTime
189: * @return 返回新增评论索引的数目
190: */
191: protected int buildReplyIndex(IndexWriter writer, LastInfo lastInfo)
192: throws Exception {
193: Session ssn = ManagerBase.getSession();
194: int replyCount = 0;
195: try {
196: Date begin = new Date(lastInfo.lastReplyTime);
197: String hql = "FROM "
198: + ReplyForm.class.getName()
199: + " AS r WHERE r.writeTime>? ORDER BY r.writeTime ASC";
200: Query query = ssn.createQuery(hql);
201: query.setDate(0, new Date(lastInfo.lastReplyTime));
202: Iterator replies = query.list().iterator();
203: while (replies.hasNext()) {
204: ReplyForm reply = (ReplyForm) replies.next();
205: if (!reply.getWriteTime().after(begin))
206: continue;
207: Document doc = new Document();
208: doc.add(Field.Keyword("replyId", Integer.toString(reply
209: .getId())));
210: doc.add(Field.Keyword("logId", Integer.toString(reply
211: .getLogId())));
212: doc
213: .add(new Field("categoryId", Integer
214: .toString(reply.getLog()
215: .getCategoryId()), false, true,
216: false));
217: doc.add(new Field("author", reply.getAuthorName(),
218: false, true, false));
219: doc.add(new Field("siteId", Integer.toString(reply
220: .getSite().getId()), false, true, false));
221: doc.add(Field.UnStored("content", reply.getContent()));
222: doc.add(new Field("replyDate", DateField
223: .dateToString(reply.getWriteTime()), false,
224: true, false));
225: writer.addDocument(doc);
226: replyCount++;
227: //保存该日记的时间做为时间戳
228: lastInfo.lastReplyTime = System.currentTimeMillis();
229:
230: }
231: } finally {
232: ManagerBase.closeSession(ssn);
233: }
234: return replyCount;
235: }
236:
237: private boolean stop = false;
238:
239: /* 自动创建索引的线程入口
240: * @see java.lang.Runnable#run()
241: */
242: public void run() {
243: Log log = LogFactory.getLog(SearchEnginePlugIn.class);
244: while (!stop) {
245: IndexWriter logWriter = null;
246: IndexWriter replyWriter = null;
247: try {
248: //Check if need to rebuild index.
249: LastInfo lastInfo = getLastInfo();
250: //更新日记索引
251: logWriter = getLogIndexWriter();
252: int lc = buildLogIndex(logWriter, lastInfo);
253: logWriter.optimize();
254: log.info("Build " + lc + " log's index success.");
255: //更新评论索引
256: replyWriter = getReplyIndexWriter();
257: int rc = buildReplyIndex(replyWriter, lastInfo);
258: replyWriter.optimize();
259: log.info("Build " + rc + " reply's index success.");
260:
261: saveLastInfo(lastInfo);
262: } catch (Exception e) {
263: log.error("SearchEnginePlugIn.AutoIndexBuild", e);
264: } catch (Throwable t) {
265: log.fatal("SearchEnginePlugIn.AutoIndexBuild", t);
266: } finally {
267: if (logWriter != null)
268: try {
269: logWriter.close();
270: } catch (Exception e) {
271: }
272: if (replyWriter != null)
273: try {
274: replyWriter.close();
275: } catch (Exception e) {
276: }
277: //wait for next active.
278: try {
279: int i;
280: for (i = 0; !stop && i < 2400; i++)
281: Thread.sleep(activeInterval * 1000 / 2400);
282: if (i < 2400)
283: break;
284: } catch (Exception e) {
285: break;
286: }
287: }
288: }
289: log.info("SearchEnginePlugIn terminal.");
290: }
291:
292: /**
293: * 读取配置信息(最近一次更新日记、评论的时间)
294: * @return
295: * @throws Exception
296: */
297: private LastInfo getLastInfo() throws Exception {
298: File f_status = new File(getStatusFile());
299: Properties p = null;
300: if (f_status.exists()) {
301: InputStream is = null;
302: try {
303: is = new FileInputStream(f_status);
304: p = new Properties();
305: p.load(is);
306: } finally {
307: if (is != null)
308: is.close();
309: }
310: }
311: return new LastInfo(p);
312: }
313:
314: /**
315: * 保存配置信息(最近一次更新日记、评论的时间)
316: * @param props
317: * @throws IOException
318: */
319: private void saveLastInfo(LastInfo props) throws IOException {
320: File f_status = new File(getStatusFile());
321: OutputStream out = null;
322: try {
323: out = new FileOutputStream(f_status);
324: props.getProperties()
325: .store(out, "SearchEngine Data Saved.");
326: } finally {
327: if (out != null)
328: out.close();
329: }
330: }
331:
332: /**
333: * 配置信息类
334: * @author Liudong
335: */
336: private class LastInfo {
337: public final static String REPLY_KEY = "LAST_REPLY_TIME";
338: public final static String LOG_KEY = "LAST_LOG_TIME";
339:
340: public LastInfo(Properties p) {
341: if (p != null) {
342: try {
343: lastReplyTime = Long.parseLong(p.getProperty(
344: REPLY_KEY, "0"));
345: } catch (Exception e) {
346: }
347: try {
348: lastLogTime = Long.parseLong(p.getProperty(LOG_KEY,
349: "0"));
350: } catch (Exception e) {
351: }
352: }
353: }
354:
355: public Properties getProperties() {
356: Properties ps = new Properties();
357: ps.setProperty(LOG_KEY, String.valueOf(lastLogTime));
358: ps.setProperty(REPLY_KEY, String.valueOf(lastReplyTime));
359: return ps;
360: }
361:
362: public long lastReplyTime;
363: public long lastLogTime;
364: }
365:
366: /* 以下几个getter/setter方法用于对应PlugIn的配置信息 */
367: public int getActiveInterval() {
368: return activeInterval;
369: }
370:
371: public void setActiveInterval(int activeInterval) {
372: this .activeInterval = activeInterval;
373: }
374:
375: /**
376: * 得到日记索引目录所在的绝对路径
377: * @return
378: */
379: public static String getLogIndexPath() {
380: if (logIndexPath.toUpperCase().startsWith("/WEB-INF"))
381: return servlet.getServletContext()
382: .getRealPath(logIndexPath);
383: return logIndexPath;
384: }
385:
386: public void setLogIndexPath(String indexPath) {
387: logIndexPath = indexPath;
388: }
389:
390: /**
391: * 得到评论索引目录所在的绝对路径
392: * @return
393: */
394: public static String getReplyIndexPath() {
395: if (replyIndexPath.toUpperCase().startsWith("/WEB-INF"))
396: return servlet.getServletContext().getRealPath(
397: replyIndexPath);
398: return replyIndexPath;
399: }
400:
401: public void setReplyIndexPath(String indexPath) {
402: replyIndexPath = indexPath;
403: }
404:
405: /**
406: * 得到保存状态信息的文件所在的绝对路径
407: * @return
408: */
409: public String getStatusFile() {
410: if (statusFile.startsWith("/"))
411: return servlet.getServletContext().getRealPath(statusFile);
412: return statusFile;
413: }
414:
415: public void setStatusFile(String statusFile) {
416: this .statusFile = statusFile;
417: }
418:
419: public static Analyzer getAnalyzer() {
420: return analyzer;
421: }
422:
423: public String getAnalyzerClass() {
424: return analyzerClass;
425: }
426:
427: public void setAnalyzerClass(String analyzerClass) {
428: this.analyzerClass = analyzerClass;
429: }
430: }
|