001: package com.knowgate.lucene;
002:
003: import java.io.IOException;
004: import java.io.InputStream;
005: import java.io.File;
006:
007: import java.sql.SQLException;
008: import java.sql.Connection;
009: import java.sql.DriverManager;
010: import java.sql.Statement;
011: import java.sql.ResultSet;
012: import java.sql.PreparedStatement;
013:
014: import java.util.Properties;
015: import java.util.Date;
016: import java.util.LinkedList;
017: import java.util.ListIterator;
018:
019: import java.math.BigDecimal;
020:
021: import java.text.SimpleDateFormat;
022:
023: import javax.mail.MessagingException;
024: import javax.mail.internet.MimeBodyPart;
025:
026: import org.htmlparser.beans.StringBean;
027: import org.apache.lucene.document.Document;
028: import org.apache.lucene.document.Field;
029: import org.apache.lucene.index.IndexWriter;
030: import org.apache.lucene.index.IndexReader;
031: import org.apache.lucene.index.Term;
032: import org.apache.lucene.analysis.Analyzer;
033:
034: import com.knowgate.debug.DebugFile;
035: import com.knowgate.misc.Gadgets;
036: import com.knowgate.hipermail.DBMimePart;
037: import com.knowgate.dfs.FileSystem;
038: import org.htmlparser.Parser;
039: import org.htmlparser.util.ParserException;
040:
041: /**
042: * Indexer subclass for e-mail messages
043: * @author Sergio Montoro Ten
044: * @version 3.0
045: */
046: public class MailIndexer extends Indexer {
047:
048: private static SimpleDateFormat oFmt = new SimpleDateFormat(
049: "yyyy-MM-dd HH:mm:ss");
050:
051: public MailIndexer() {
052: }
053:
054: /**
055: * Add a single mail message to the index
056: * @param oIWrt IndexWriter
057: * @param sGuid String GUID of mime message to be indexed (from gu_mimemsg field of table k_mime_msgs)
058: * @param dNumber BigDecimal mime message number (from pg_message field of table k_mime_msgs)
059: * @param sWorkArea String GUID of WorkArea (from gu_workarea field of table k_mime_msgs)
060: * @param sContainer String Name of Category (Folder) where message is stored.
061: * This is nm_category field at k_categories table record corresponding to gu_category from k_mime_msgs
062: * @param sSubject String Subject
063: * @param sAuthor String Display name of message sender
064: * @param sRecipients String Recipients list (both display name and e-mails)
065: * @param dtSent Date
066: * @param sComments String
067: * @param oStrm InputStream Full mime message body as an InputStream (from by_content field of table k_mime_msgs)
068: * @throws ClassNotFoundException
069: * @throws IOException
070: * @throws IllegalArgumentException
071: * @throws NoSuchFieldException
072: * @throws IllegalAccessException
073: * @throws InstantiationException
074: * @throws NullPointerException
075: */
076: public static void addMail(IndexWriter oIWrt, String sGuid,
077: BigDecimal dNumber, String sWorkArea, String sContainer,
078: String sSubject, String sAuthor, String sRecipients,
079: Date dtSent, String sComments, InputStream oStrm, int iSize)
080: throws ClassNotFoundException, IOException,
081: IllegalArgumentException, NoSuchFieldException,
082: IllegalAccessException, InstantiationException,
083: NullPointerException {
084:
085: String sText;
086: String sAbstract = sGuid + "¨" + sSubject + "¨" + sAuthor + "¨"
087: + oFmt.format(dtSent) + "¨" + String.valueOf(iSize)
088: + "¨" + dNumber.toString();
089: sSubject = Gadgets.ASCIIEncode(sSubject);
090: sAuthor = Gadgets.ASCIIEncode(sAuthor);
091:
092: if (null != oStrm) {
093: StringBuffer oStrBuff = new StringBuffer();
094: try {
095: MimeBodyPart oMsgText = new MimeBodyPart(oStrm);
096: DBMimePart.parseMimePart(oStrBuff, null, sContainer,
097: "", oMsgText, 0);
098: } catch (MessagingException xcpt) {
099: if (DebugFile.trace)
100: DebugFile.writeln(xcpt.getClass().getName() + " "
101: + xcpt.getMessage() + " indexing message "
102: + sGuid + " - " + sSubject);
103: }
104: if (oStrBuff.length() > 0) {
105: if (Gadgets.indexOfIgnoreCase(oStrBuff.toString(),
106: "<html>") >= 0) {
107: Parser oPrsr = Parser.createParser(oStrBuff
108: .toString(), null);
109: StringBean oStrs = new StringBean();
110: try {
111: oPrsr.visitAllNodesWith(oStrs);
112: } catch (ParserException pe) {
113: throw new IOException(pe.getMessage());
114: }
115:
116: if (DebugFile.trace)
117: DebugFile
118: .writeln("Gadgets.ASCIIEncode(StringBean.getStrings())");
119: sText = Gadgets.ASCIIEncode(oStrs.getStrings());
120: if (DebugFile.trace)
121: DebugFile
122: .writeln("StringBean.getStrings() done");
123: } // fi (oStrBuff contains <html>)
124: else {
125: if (DebugFile.trace)
126: DebugFile
127: .writeln("Gadgets.ASCIIEncode(StringBuffer.toString())");
128: sText = Gadgets.ASCIIEncode(oStrBuff.toString());
129: if (DebugFile.trace)
130: DebugFile
131: .writeln("StringBuffer.toString() done");
132: }
133: } else {
134: sText = "";
135: }
136: } // fi (oStrm)
137: else {
138: sText = "";
139: }
140:
141: Document oDoc = new Document();
142: oDoc.add(Field.Keyword("workarea", sWorkArea));
143: oDoc.add(Field.Keyword("container", sContainer));
144: oDoc.add(Field.Keyword("guid", sGuid));
145: oDoc.add(Field.Keyword("number", dNumber.toString()));
146: oDoc.add(Field.Keyword("created", dtSent));
147: oDoc.add(Field.Keyword("size", Gadgets.leftPad(String
148: .valueOf(iSize), '0', 10)));
149: oDoc.add(Field.Text("title", sSubject));
150: oDoc.add(Field.Text("author", sAuthor));
151: oDoc.add(Field.Text("abstract", sAbstract));
152: oDoc.add(Field.Text("recipients", sRecipients.trim()));
153: oDoc.add(Field.UnStored("comments", sComments));
154: oDoc.add(Field.UnStored("text", sText));
155:
156: if (DebugFile.trace)
157: DebugFile.writeln("IndexWriter.addDocument([Document])");
158:
159: oIWrt.addDocument(oDoc);
160: } // addMail
161:
162: /**
163: * <p>Re-build full text index for a given mail folder</p>
164: * All previously indexed messages for given folder are removed from index and written back
165: * @param oProps Properties containing: luceneindex, driver, dburl, dbuser, dbpassword
166: * @param sWorkArea String GUID of WorkArea to which folder belongs
167: * @param sFolder String Folder name as in field nm_category of table k_categories
168: * @throws SQLException
169: * @throws IOException
170: * @throws ClassNotFoundException
171: * @throws IllegalArgumentException
172: * @throws NoSuchFieldException
173: * @throws IllegalAccessException
174: * @throws InstantiationException
175: */
176: public static void rebuildFolder(Properties oProps,
177: String sWorkArea, String sFolder) throws SQLException,
178: IOException, ClassNotFoundException,
179: IllegalArgumentException, NoSuchFieldException,
180: IllegalAccessException, InstantiationException {
181:
182: String sGuid, sContainer, sTitle, sAuthor, sComments;
183: Date dtCreated;
184: BigDecimal dNumber;
185: int iSize;
186:
187: final BigDecimal dZero = new BigDecimal(0);
188:
189: if (DebugFile.trace) {
190: DebugFile
191: .writeln("Begin MailIndexer.rebuildFolder([Properties]"
192: + sWorkArea + "," + sFolder + ")");
193: DebugFile.incIdent();
194: }
195:
196: // Get physical base path to index files from luceneindex property
197: String sDirectory = oProps.getProperty("luceneindex");
198:
199: if (null == sDirectory)
200: throw new NoSuchFieldException(
201: "Cannot find luceneindex property");
202:
203: // Append WorkArea and table name to luceneindex base path
204: sDirectory = Gadgets.chomp(sDirectory, File.separator)
205: + "k_mime_msgs";
206: if (null != sWorkArea)
207: sDirectory += File.separator + sWorkArea;
208:
209: if (DebugFile.trace)
210: DebugFile.writeln("index directory is " + sDirectory);
211:
212: if (null == oProps.getProperty("driver"))
213: throw new NoSuchFieldException(
214: "Cannot find driver property");
215:
216: if (null == oProps.getProperty("dburl"))
217: throw new NoSuchFieldException("Cannot find dburl property");
218:
219: if (DebugFile.trace)
220: DebugFile.writeln("Class.forName("
221: + oProps.getProperty("analyzer", DEFAULT_ANALYZER)
222: + ")");
223:
224: Class oAnalyzer = Class.forName(oProps.getProperty("analyzer",
225: DEFAULT_ANALYZER));
226:
227: if (DebugFile.trace)
228: DebugFile.writeln("Class.forName("
229: + oProps.getProperty("driver") + ")");
230:
231: Class oDriver = Class.forName(oProps.getProperty("driver"));
232:
233: if (DebugFile.trace)
234: DebugFile.writeln("IndexReader.open(" + sDirectory + ")");
235:
236: // *********************************************************************
237: // Delete every document from this folder before re-indexing
238: File oDir = new File(sDirectory);
239: if (oDir.exists()) {
240: IndexReader oReader = IndexReader.open(sDirectory);
241: int iDeleted = oReader
242: .delete(new Term("container", sFolder));
243: oReader.close();
244: } else {
245: FileSystem oFS = new FileSystem();
246: try {
247: oFS.mkdirs(sDirectory);
248: } catch (Exception e) {
249: throw new IOException(e.getClass().getName() + " "
250: + e.getMessage());
251: }
252: }
253: // *********************************************************************
254:
255: if (DebugFile.trace)
256: DebugFile.writeln("new IndexWriter(" + sDirectory
257: + ",[Analyzer], true)");
258:
259: IndexWriter oIWrt = new IndexWriter(sDirectory,
260: (Analyzer) oAnalyzer.newInstance(), true);
261:
262: if (DebugFile.trace)
263: DebugFile.writeln("DriverManager.getConnection("
264: + oProps.getProperty("dburl") + ", ...)");
265:
266: Connection oConn = DriverManager.getConnection(oProps
267: .getProperty("dburl"), oProps.getProperty("dbuser"),
268: oProps.getProperty("dbpassword"));
269:
270: Statement oStmt = oConn
271: .createStatement(ResultSet.TYPE_FORWARD_ONLY,
272: ResultSet.CONCUR_READ_ONLY);
273: ResultSet oRSet;
274:
275: PreparedStatement oRecp = oConn
276: .prepareStatement(
277: "SELECT tx_personal,tx_email FROM k_inet_addrs WHERE tp_recipient<>'to' AND gu_mimemsg=?",
278: ResultSet.TYPE_FORWARD_ONLY,
279: ResultSet.CONCUR_READ_ONLY);
280:
281: if (DebugFile.trace)
282: DebugFile
283: .writeln("Statement.executeQuery(SELECT g.gu_workarea,c.nm_category,m.gu_mimemsg,m.tx_subject,m.nm_from,m.tx_mail_from,m.pg_mimemsg,m.de_mimemsg,m.dt_sent,m.len_mimemsg,m.by_content FROM k_mime_msgs m, k_categories c WHERE m.bo_deleted<>0 AND m.bo_draft<>0 AND m.gu_category=c.gu_category AND m.gu_workarea='"
284: + sWorkArea
285: + "' AND c.nm_category='"
286: + sFolder + "')");
287:
288: oRSet = oStmt
289: .executeQuery("SELECT g.gu_workarea,c.nm_category,m.gu_mimemsg,m.tx_subject,m.nm_from,m.tx_mail_from,m.pg_mimemsg,m.de_mimemsg,m.dt_sent,m.len_mimemsg,m.by_content FROM k_mime_msgs m, k_categories c WHERE m.bo_deleted<>0 AND m.bo_draft<>0 AND m.gu_category=c.gu_category AND m.gu_workarea='"
290: + sWorkArea
291: + "' AND c.nm_category='"
292: + sFolder
293: + "'");
294:
295: while (oRSet.next()) {
296:
297: sWorkArea = oRSet.getString(1);
298: sContainer = oRSet.getString(2);
299: sGuid = oRSet.getString(3);
300: sTitle = oRSet.getString(4);
301: sAuthor = oRSet.getString(5);
302: if (oRSet.wasNull())
303: sAuthor = "";
304: sAuthor += " " + oRSet.getString(6);
305: dNumber = oRSet.getBigDecimal(7);
306: if (oRSet.wasNull())
307: dNumber = dZero;
308: sComments = oRSet.getString(8);
309: dtCreated = oRSet.getDate(9);
310: iSize = oRSet.getInt(10);
311:
312: if (DebugFile.trace)
313: DebugFile.writeln("Indexing message " + sGuid + " - "
314: + sTitle);
315:
316: InputStream oStrm = oRSet.getBinaryStream(11);
317:
318: String sRecipients = "";
319: oRecp.setString(1, sGuid);
320: ResultSet oRecs = oRecp.executeQuery();
321: while (oRecs.next()) {
322: sRecipients += oRecs.getString(1) + " "
323: + oRecs.getString(2) + " ";
324: } // wend
325: oRecs.close();
326:
327: MailIndexer.addMail(oIWrt, sGuid, dNumber, sWorkArea,
328: sContainer, sTitle, sAuthor, sRecipients,
329: dtCreated, sComments, oStrm, iSize);
330:
331: } // wend
332: oRSet.close();
333: oRecp.close();
334:
335: oStmt
336: .executeUpdate("UPDATE k_mime_msgs SET bo_indexed=1 WHERE gu_workarea='"
337: + sWorkArea
338: + "' AND gu_category IN (SELECT gu_category FROM k_categories WHERE nm_category='"
339: + sFolder + "')");
340:
341: oStmt.close();
342: oConn.close();
343:
344: if (DebugFile.trace)
345: DebugFile.writeln("IndexWriter.optimize()");
346:
347: oIWrt.optimize();
348:
349: if (DebugFile.trace)
350: DebugFile.writeln("IndexWriter.close()");
351:
352: oIWrt.close();
353:
354: if (DebugFile.trace) {
355: DebugFile.decIdent();
356: DebugFile.writeln("End Indexer.rebuild()");
357: }
358: } // rebuildFolder
359:
360: }
|