001: /*
002: Copyright (C) 2003 Know Gate S.L. All rights reserved.
003: C/Oņa, 107 1š2 28050 Madrid (Spain)
004:
005: Redistribution and use in source and binary forms, with or without
006: modification, are permitted provided that the following conditions
007: are met:
008:
009: 1. Redistributions of source code must retain the above copyright
010: notice, this list of conditions and the following disclaimer.
011:
012: 2. The end-user documentation included with the redistribution,
013: if any, must include the following acknowledgment:
014: "This product includes software parts from hipergate
015: (http://www.hipergate.org/)."
016: Alternately, this acknowledgment may appear in the software itself,
017: if and wherever such third-party acknowledgments normally appear.
018:
019: 3. The name hipergate must not be used to endorse or promote products
020: derived from this software without prior written permission.
021: Products derived from this software may not be called hipergate,
022: nor may hipergate appear in their name, without prior written
023: permission.
024:
025: This library is distributed in the hope that it will be useful,
026: but WITHOUT ANY WARRANTY; without even the implied warranty of
027: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
028:
029: You should have received a copy of hipergate License with this code;
030: if not, visit http://www.hipergate.org or mail to info@hipergate.org
031: */
032:
033: package com.knowgate.lucene;
034:
035: import java.math.BigDecimal;
036:
037: import java.util.Date;
038: import java.util.Map;
039: import java.util.HashMap;
040: import java.util.Iterator;
041: import java.util.LinkedList;
042: import java.util.ListIterator;
043: import java.text.SimpleDateFormat;
044:
045: import java.sql.DriverManager;
046: import java.sql.Connection;
047: import java.sql.Statement;
048: import java.sql.PreparedStatement;
049: import java.sql.ResultSet;
050: import java.sql.SQLException;
051:
052: import java.util.Properties;
053: import java.io.IOException;
054: import java.io.FileNotFoundException;
055: import java.io.InputStream;
056: import java.io.FileInputStream;
057: import java.io.File;
058:
059: import javax.mail.MessagingException;
060: import javax.mail.internet.MimeBodyPart;
061:
062: import org.apache.lucene.analysis.Analyzer;
063: import org.apache.lucene.index.*;
064: import org.apache.lucene.document.*;
065:
066: import org.htmlparser.beans.StringBean;
067:
068: import com.knowgate.debug.DebugFile;
069: import com.knowgate.misc.Gadgets;
070: import com.knowgate.dfs.FileSystem;
071:
072: import com.knowgate.hipermail.DBMimePart;
073:
074: /**
075: * <p>Data Feeder from hipergate tables for Lucene</p>
076: * @author Sergio Montoro Ten
077: * @version 3.0
078: * @see http://jakarta.apache.org/lucene/docs/index.html
079: */
080:
081: public class Indexer {
082:
083: public final static String DEFAULT_ANALYZER = "org.apache.lucene.analysis.StopAnalyzer";
084:
085: // ---------------------------------------------------------------------------
086:
087: private static String IfNull(Connection oConn) throws SQLException {
088: String sDBMS = oConn.getMetaData().getDatabaseProductName();
089:
090: if (sDBMS.equals("PostgreSQL"))
091: return "COALESCE";
092: else if (sDBMS.equals("Microsoft SQL Server"))
093: return "ISNULL";
094: else if (sDBMS.equals("Oracle"))
095: return "NVL";
096: else if (sDBMS.equals("MySQL"))
097: return "COALESCE";
098: else
099: return null;
100: }
101:
102: // ---------------------------------------------------------------------------
103:
104: private static boolean allowedTable(String sTableName) {
105: return sTableName.equalsIgnoreCase("k_bugs")
106: || sTableName.equalsIgnoreCase("k_newsmsgs")
107: || sTableName.equalsIgnoreCase("k_mime_msgs");
108: }
109:
110: // ---------------------------------------------------------------------------
111:
112: /**
113: * Optimize a given index
114: * @param oProps Properties Collection (typically loaded from hipergate.cnf)
115: * containing luceneindex property and (optionally) analyzer
116: * @param sTableName String Name of table to be indexed (currently only k_bugs, k_newsmsgs or k_mime_msgs are permitted)
117: * @param sWorkArea GUID of WorkArea to be optimized
118: * @throws NoSuchFieldException
119: * @throws IllegalArgumentException
120: * @throws ClassNotFoundException
121: * @throws IOException
122: * @throws InstantiationException
123: * @throws IllegalAccessException
124: */
125: public static void optimize(Properties oProps, String sTableName,
126: String sWorkArea) throws NoSuchFieldException,
127: IllegalArgumentException, ClassNotFoundException,
128: FileNotFoundException, IOException, InstantiationException,
129: IllegalAccessException {
130:
131: if (!allowedTable(sTableName))
132: throw new IllegalArgumentException(
133: "Table name must be k_bugs or k_newsmsgs or k_mime_msgs");
134:
135: if (DebugFile.trace) {
136: DebugFile.writeln("Begin Indexer.rebuild([Properties]"
137: + sTableName);
138: DebugFile.incIdent();
139: }
140:
141: String sDirectory = oProps.getProperty("luceneindex");
142:
143: if (null == sDirectory) {
144: if (DebugFile.trace)
145: DebugFile.decIdent();
146: throw new NoSuchFieldException(
147: "Cannot find luceneindex property");
148: }
149:
150: sDirectory = Gadgets.chomp(sDirectory, File.separator)
151: + sTableName.toLowerCase();
152: if (null != sWorkArea)
153: sDirectory += File.separator + sWorkArea;
154:
155: if (DebugFile.trace)
156: DebugFile.writeln("index directory is " + sDirectory);
157:
158: File oDir = new File(sDirectory);
159: if (!oDir.exists()) {
160: if (DebugFile.trace)
161: DebugFile.decIdent();
162: throw new FileNotFoundException("Directory " + sDirectory
163: + " does not exist");
164: }
165:
166: if (DebugFile.trace)
167: DebugFile.writeln("Class.forName("
168: + oProps.getProperty("analyzer", DEFAULT_ANALYZER)
169: + ")");
170:
171: Class oAnalyzer = Class.forName(oProps.getProperty("analyzer",
172: DEFAULT_ANALYZER));
173:
174: if (DebugFile.trace)
175: DebugFile.writeln("new IndexWriter(...)");
176:
177: IndexWriter oIWrt = new IndexWriter(sDirectory,
178: (Analyzer) oAnalyzer.newInstance(), true);
179:
180: if (DebugFile.trace)
181: DebugFile.writeln("IndexWriter.optimize()");
182:
183: oIWrt.optimize();
184:
185: if (DebugFile.trace)
186: DebugFile.writeln("IndexWriter.close()");
187:
188: oIWrt.close();
189:
190: if (DebugFile.trace) {
191: DebugFile.decIdent();
192: DebugFile.writeln("End Indexer.optimize()");
193: }
194: } // optimize
195:
196: // ---------------------------------------------------------------------------
197:
198: /**
199: * <p>Rebuild Full Text Index for a table</p>
200: * Indexed documents have the following fields:<br>
201: * <table border=1 cellpadding=4>
202: * <tr><td><b>Field Name</b></td><td><b>Description</b></td><td><b>Indexed</b></td><td><b>Stored</b></td></tr>
203: * <tr><td>workarea</td><td>GUID of WorkArea</td><td align=middle>Yes</td><td align=middle>Yes</td></tr>
204: * <tr><td>container</td><td>Name of Container (NewsGroup, Project, etc)</td><td align=middle>Yes</td><td align=middle>Yes</td></tr>
205: * <tr><td>guid</td><td>GUID for Retrieved Object</td><td align=middle>Yes</td><td align=middle>Yes</td></tr>
206: * <tr><td>number</td><td>Object Ordinal Identifier</td><td align=middle>Yes</td><td align=middle>Yes</td></tr>
207: * <tr><td>title</td><td>Title or Subject</td><td align=middle>Yes</td><td align=middle>Yes</td></tr>
208: * <tr><td>author</td><td>Author</td><td align=middle>Yes</td><td align=middle>Yes</td></tr>
209: * <tr><td>text</td><td>Document Text</td><td align=middle>Yes</td><td align=middle>No</td></tr>
210: * <tr><td>abstract</td><td>First 80 characters of text</td><td align=middle>No</td><td align=middle>Yes</td></tr>
211: * </table>
212: * @param oProps Properties Collection (typically loaded from hipergate.cnf) containing:<br>
213: * <b>driver</b> : Class name for JDBC driver<br>
214: * <b>dburl</b> : Database Connection URL<br>
215: * <b>dbuser</b> : Database User<br>
216: * <b>dbpassword</b> : Database User Password<br>
217: * <b>luceneindex</b> : Base path for Lucene index directories,
218: * the rebuilded index will be stored at a subdirectory called as the table name.<br>
219: * @param sTableName Name of table to be indexed (currently only k_bugs, k_newsmsgs or k_mime_msgs are permitted)
220: * <b>analyzer</b> : org.apache.lucene.analysis.Analyzer subclass name
221: * @param sWorkArea GUID of WorkArea to be rebuilt
222: * @throws NoSuchFieldException If any of the requiered properties of oProps is not found
223: * @throws ClassNotFoundException If JDBC driver or analyzer classes are not found
224: * @throws SQLException
225: * @throws IOException
226: * @throws IllegalArgumentException
227: * @throws IllegalAccessException
228: * @throws InstantiationException
229: */
230: public static void rebuild(Properties oProps, String sTableName,
231: String sWorkArea) throws SQLException, IOException,
232: ClassNotFoundException, IllegalArgumentException,
233: NoSuchFieldException, IllegalAccessException,
234: InstantiationException {
235:
236: String sGuid, sContainer, sTitle, sAuthor, sComments, sText;
237: Date dtCreated;
238: BigDecimal dNumber;
239: int iNumber, iSize;
240:
241: final BigDecimal dZero = new BigDecimal(0);
242:
243: // Check whether table name is any of the allowed ones
244: if (!allowedTable(sTableName))
245: throw new IllegalArgumentException(
246: "Table name must be k_bugs or k_newsmsgs or k_mime_msgs");
247:
248: if (DebugFile.trace) {
249: DebugFile.writeln("Begin Indexer.rebuild([Properties],"
250: + sTableName + "," + sWorkArea + ")");
251: DebugFile.incIdent();
252: }
253:
254: // Get physical base path to index files from luceneindex property
255: String sDirectory = oProps.getProperty("luceneindex");
256:
257: if (null == sDirectory) {
258: if (DebugFile.trace)
259: DebugFile.decIdent();
260: throw new NoSuchFieldException(
261: "Cannot find luceneindex property");
262: }
263:
264: // Append WorkArea and table name to luceneindex base path
265: sDirectory = Gadgets.chomp(sDirectory, File.separator)
266: + sTableName.toLowerCase();
267: if (null != sWorkArea)
268: sDirectory += File.separator + sWorkArea;
269:
270: if (DebugFile.trace)
271: DebugFile.writeln("index directory is " + sDirectory);
272:
273: if (null == oProps.getProperty("driver")) {
274: if (DebugFile.trace)
275: DebugFile.decIdent();
276: throw new NoSuchFieldException(
277: "Cannot find driver property");
278: }
279:
280: if (null == oProps.getProperty("dburl")) {
281: if (DebugFile.trace)
282: DebugFile.decIdent();
283: throw new NoSuchFieldException("Cannot find dburl property");
284: }
285:
286: if (DebugFile.trace)
287: DebugFile.writeln("Class.forName("
288: + oProps.getProperty("analyzer", DEFAULT_ANALYZER)
289: + ")");
290:
291: Class oAnalyzer = Class.forName(oProps.getProperty("analyzer",
292: DEFAULT_ANALYZER));
293:
294: if (DebugFile.trace)
295: DebugFile.writeln("Class.forName("
296: + oProps.getProperty("driver") + ")");
297:
298: Class oDriver = Class.forName(oProps.getProperty("driver"));
299:
300: if (DebugFile.trace)
301: DebugFile.writeln("IndexReader.open(" + sDirectory + ")");
302:
303: // *********************************************************************
304: // Delete every document from this table and WorkArea before re-indexing
305: File oDir = new File(sDirectory);
306: if (oDir.exists()) {
307: IndexReader oReader = IndexReader.open(sDirectory);
308: int iDeleted = oReader.delete(new Term("workarea",
309: sWorkArea));
310: oReader.close();
311: } else {
312: FileSystem oFS = new FileSystem();
313: try {
314: oFS.mkdirs(sDirectory);
315: } catch (Exception e) {
316: throw new IOException(e.getClass().getName() + " "
317: + e.getMessage());
318: }
319: }
320: // *********************************************************************
321:
322: if (DebugFile.trace)
323: DebugFile.writeln("new IndexWriter(" + sDirectory
324: + ",[Analyzer], true)");
325:
326: IndexWriter oIWrt = new IndexWriter(sDirectory,
327: (Analyzer) oAnalyzer.newInstance(), true);
328:
329: if (DebugFile.trace)
330: DebugFile.writeln("DriverManager.getConnection("
331: + oProps.getProperty("dburl") + ", ...)");
332:
333: Connection oConn = DriverManager.getConnection(oProps
334: .getProperty("dburl"), oProps.getProperty("dbuser"),
335: oProps.getProperty("dbpassword"));
336: oConn.setAutoCommit(true);
337:
338: Statement oStmt = oConn
339: .createStatement(ResultSet.TYPE_FORWARD_ONLY,
340: ResultSet.CONCUR_READ_ONLY);
341: ResultSet oRSet;
342:
343: if (sTableName.equalsIgnoreCase("k_bugs")) {
344:
345: if (DebugFile.trace)
346: DebugFile
347: .writeln("Statement.executeQuery(SELECT p.gu_workarea,p.nm_project,b.gu_bug,b.tl_bug,b.dt_modified,"
348: + IfNull(oConn)
349: + "(b.nm_reporter,''),"
350: + IfNull(oConn)
351: + "(b.tx_bug_brief,''),"
352: + IfNull(oConn)
353: + "(b.tx_comments,'') FROM k_bugs b, k_projects p WHERE b.gu_project=p.gu_project AND p.gu_owner='"
354: + sWorkArea + "')");
355:
356: oRSet = oStmt
357: .executeQuery("SELECT p.gu_owner,p.nm_project,b.gu_bug,b.pg_bug,b.tl_bug,b.dt_modified,"
358: + IfNull(oConn)
359: + "(b.nm_reporter,''),"
360: + IfNull(oConn)
361: + "(b.tx_comments,''),"
362: + IfNull(oConn)
363: + "(b.tx_bug_brief,'') FROM k_bugs b, k_projects p WHERE b.gu_project=p.gu_project AND p.gu_owner='"
364: + sWorkArea + "'");
365:
366: while (oRSet.next()) {
367: sWorkArea = oRSet.getString(1);
368: sContainer = oRSet.getString(2);
369: sGuid = oRSet.getString(3);
370: iNumber = oRSet.getInt(4);
371: sTitle = oRSet.getString(5);
372: dtCreated = oRSet.getDate(6);
373: sAuthor = oRSet.getString(7);
374: sComments = oRSet.getString(8);
375: if (null == sComments)
376: sComments = "";
377: sText = oRSet.getString(9);
378: if (null == sText)
379: sText = "";
380: BugIndexer.addBug(oIWrt, sGuid, iNumber, sWorkArea,
381: sContainer, sTitle, sAuthor, dtCreated,
382: sComments, sText);
383: } // wend
384: oRSet.close();
385: }
386:
387: else if (sTableName.equalsIgnoreCase("k_newsmsgs")) {
388:
389: if (DebugFile.trace)
390: DebugFile
391: .writeln("Statement.executeQuery(SELECT g.gu_workarea,c.nm_category,m.gu_msg,m.tx_subject,m.dt_published,"
392: + IfNull(oConn)
393: + "(b.nm_author,''),"
394: + IfNull(oConn)
395: + "(b.tx_msg,'') FROM k_newsmsgs m, k_categories c, k_newsgroups g, k_x_cat_objs x WHERE m.id_status=0 AND m.gu_msg=x.gu_object AND x.gu_category=g.gu_newsgrp AND c.gu_category=g.gu_newsgrp AND g.gu_workarea='"
396: + sWorkArea + "')");
397:
398: oRSet = oStmt
399: .executeQuery("SELECT g.gu_workarea,c.nm_category,m.gu_msg,m.tx_subject,m.dt_published,"
400: + IfNull(oConn)
401: + "(m.nm_author,''),"
402: + IfNull(oConn)
403: + "(m.tx_msg,'') FROM k_newsmsgs m, k_categories c, k_newsgroups g, k_x_cat_objs x WHERE m.id_status=0 AND m.gu_msg=x.gu_object AND x.gu_category=g.gu_newsgrp AND c.gu_category=g.gu_newsgrp AND g.gu_workarea='"
404: + sWorkArea + "'");
405:
406: while (oRSet.next()) {
407: sWorkArea = oRSet.getString(1);
408: sContainer = oRSet.getString(2);
409: sGuid = oRSet.getString(3);
410: sTitle = oRSet.getString(4);
411: dtCreated = oRSet.getDate(5);
412: sAuthor = oRSet.getString(6);
413: sText = oRSet.getString(7);
414: NewsMessageIndexer.addNewsMessage(oIWrt, sGuid,
415: sWorkArea, sContainer, sTitle, sAuthor,
416: dtCreated, sText);
417: } // wend
418: oRSet.close();
419: } else if (sTableName.equalsIgnoreCase("k_mime_msgs")) {
420:
421: LinkedList oIndexedGuids = new LinkedList();
422:
423: PreparedStatement oRecp = oConn
424: .prepareStatement(
425: "SELECT tx_personal,tx_email FROM k_inet_addrs WHERE tp_recipient<>'to' AND gu_mimemsg=?",
426: ResultSet.TYPE_FORWARD_ONLY,
427: ResultSet.CONCUR_READ_ONLY);
428:
429: if (DebugFile.trace)
430: DebugFile
431: .writeln("Statement.executeQuery(SELECT g.gu_workarea,c.nm_category,m.gu_mimemsg,m.tx_subject,m.nm_from,m.tx_mail_from,m.pg_mimemsg,m.de_mimemsg,m.dt_sent,m.len_mimemsg,m.by_content FROM k_mime_msgs m, k_categories c WHERE m.bo_deleted<>0 AND m.bo_draft<>0 AND m.gu_category=c.gu_category AND m.gu_workarea='"
432: + sWorkArea + "')");
433:
434: oRSet = oStmt
435: .executeQuery("SELECT g.gu_workarea,c.nm_category,m.gu_mimemsg,m.tx_subject,m.nm_from,m.tx_mail_from,m.pg_mimemsg,m.de_mimemsg,m.dt_sent,m.len_mimemsg,m.by_content FROM k_mime_msgs m, k_categories c WHERE m.bo_deleted<>0 AND m.bo_draft<>0 AND m.gu_category=c.gu_category AND m.gu_workarea='"
436: + sWorkArea + "'");
437:
438: while (oRSet.next()) {
439:
440: sWorkArea = oRSet.getString(1);
441: sContainer = oRSet.getString(2);
442: sGuid = oRSet.getString(3);
443: sTitle = oRSet.getString(4);
444: sAuthor = oRSet.getString(5);
445: if (oRSet.wasNull())
446: sAuthor = "";
447: sAuthor += " " + oRSet.getString(6);
448: dNumber = oRSet.getBigDecimal(7);
449: if (oRSet.wasNull())
450: dNumber = dZero;
451: sComments = oRSet.getString(8);
452: dtCreated = oRSet.getDate(9);
453: iSize = oRSet.getInt(10);
454:
455: if (DebugFile.trace)
456: DebugFile.writeln("Indexing message " + sGuid
457: + " - " + sTitle);
458:
459: InputStream oStrm = oRSet.getBinaryStream(11);
460:
461: String sRecipients = "";
462: oRecp.setString(1, sGuid);
463: ResultSet oRecs = oRecp.executeQuery();
464: while (oRecs.next()) {
465: sRecipients += oRecs.getString(1) + " "
466: + oRecs.getString(2) + " ";
467: } // wend
468: oRecs.close();
469:
470: MailIndexer.addMail(oIWrt, sGuid, dNumber, sWorkArea,
471: sContainer, sTitle, sAuthor, sRecipients,
472: dtCreated, sComments, oStrm, iSize);
473:
474: oIndexedGuids.add(sGuid);
475: } // wend
476: oRSet.close();
477: oRecp.close();
478:
479: PreparedStatement oUpdt = oConn
480: .prepareStatement("UPDATE k_mime_msgs SET bo_indexed=1 WHERE gu_mimemsg=?");
481: ListIterator oIter = oIndexedGuids.listIterator();
482: while (oIter.hasNext()) {
483: oUpdt.setObject(1, oIter.next(), java.sql.Types.CHAR);
484: oUpdt.executeUpdate();
485: } // wend
486: oUpdt.close();
487: } // fi
488:
489: oStmt.close();
490: oConn.close();
491:
492: if (DebugFile.trace)
493: DebugFile.writeln("IndexWriter.optimize()");
494:
495: oIWrt.optimize();
496:
497: if (DebugFile.trace)
498: DebugFile.writeln("IndexWriter.close()");
499:
500: oIWrt.close();
501:
502: if (DebugFile.trace) {
503: DebugFile.decIdent();
504: DebugFile.writeln("End Indexer.rebuild()");
505: }
506: } // rebuild
507:
508: /**
509: *
510: * @param oProps
511: * @param sTableName
512: * @throws SQLException
513: * @throws IOException
514: * @throws ClassNotFoundException
515: * @throws IllegalArgumentException
516: * @throws NoSuchFieldException
517: * @throws IllegalAccessException
518: * @throws InstantiationException
519: */
520: public static void rebuild(Properties oProps, String sTableName)
521: throws SQLException, IOException, ClassNotFoundException,
522: IllegalArgumentException, NoSuchFieldException,
523: IllegalAccessException, InstantiationException {
524: }
525:
526: public static void add(IndexWriter oIWrt, Map oKeywords,
527: Map oTexts, Map oUnStored) throws ClassNotFoundException,
528: IOException, IllegalArgumentException,
529: NoSuchFieldException, IllegalAccessException,
530: InstantiationException, NullPointerException {
531:
532: String sFieldName;
533: Object oFieldValue;
534: Document oDoc = new Document();
535: Iterator oKeys = oKeywords.keySet().iterator();
536: while (oKeys.hasNext()) {
537: sFieldName = (String) oKeys.next();
538: oFieldValue = oKeywords.get(sFieldName);
539: if (null == oFieldValue)
540: oFieldValue = "";
541: if (oFieldValue.getClass().getName().equals(
542: "java.util.Date"))
543: oDoc.add(Field.Keyword(sFieldName, (Date) oFieldValue));
544: else
545: oDoc.add(Field
546: .Keyword(sFieldName, (String) oFieldValue));
547: } // wend
548: Iterator oTxts = oTexts.keySet().iterator();
549: while (oTxts.hasNext()) {
550: sFieldName = (String) oTxts.next();
551: oFieldValue = oTexts.get(sFieldName);
552: if (null == oFieldValue)
553: oFieldValue = "";
554: oDoc.add(Field.Text(sFieldName, (String) oFieldValue));
555: } // wend
556: Iterator oUnStor = oUnStored.keySet().iterator();
557: while (oUnStor.hasNext()) {
558: sFieldName = (String) oUnStor.next();
559: oFieldValue = oUnStored.get(sFieldName);
560: if (null == oFieldValue)
561: oFieldValue = "";
562: oDoc.add(Field.Text(sFieldName, (String) oFieldValue));
563: } // wend
564: oIWrt.addDocument(oDoc);
565: } // add
566:
567: public static void add(String sTableName, String sDirectory,
568: String sAnalyzer, Map oKeywords, Map oTexts, Map oUnStored)
569: throws ClassNotFoundException, IOException,
570: IllegalArgumentException, NoSuchFieldException,
571: IllegalAccessException, InstantiationException,
572: NullPointerException {
573:
574: if (!allowedTable(sTableName))
575: throw new IllegalArgumentException(
576: "Table name must be k_bugs or k_newsmsgs or k_mime_msgs");
577:
578: if (null == sDirectory)
579: throw new NoSuchFieldException(
580: "Cannot find luceneindex property");
581:
582: File oDir = new File(sDirectory);
583: if (!oDir.exists()) {
584: FileSystem oFS = new FileSystem();
585: try {
586: oFS.mkdirs(sDirectory);
587: } catch (Exception e) {
588: throw new IOException(e.getClass().getName() + " "
589: + e.getMessage());
590: }
591: }
592:
593: Class oAnalyzer = Class
594: .forName((sAnalyzer == null) ? DEFAULT_ANALYZER
595: : sAnalyzer);
596:
597: IndexWriter oIWrt = new IndexWriter(sDirectory,
598: (Analyzer) oAnalyzer.newInstance(), true);
599:
600: add(oIWrt, oKeywords, oTexts, oUnStored);
601:
602: oIWrt.close();
603: } // add
604:
605: /**
606: * Add a document to the index
607: * @param sTableName k_bugs, k_newsmsgs or k_mime_msgs
608: * @param oProps Properties Collection containing luceneindex directory
609: * @param sWorkArea WorkArea for document
610: * @param sContainer GUID of Category or NewsGroup to which documento belongs
611: * @param sGUID Document GUID
612: * @param iNumber Document number (optional, may be zero)
613: * @param sTitle Document Title (optional, may be <b>null</b>)
614: * @param sText Document text (optional, may be <b>null</b>)
615: * @param sAuthor Document author (optional, may be <b>null</b>)
616: * @param sAbstract Document abstract (optional, may be <b>null</b>)
617: * @param sComments Document comments (optional, may be <b>null</b>)
618: * @throws ClassNotFoundException
619: * @throws IOException
620: * @throws IllegalArgumentException If sTableName is not one of { k_bugs, k_newsmsgs, k_mime_msgs }
621: * @throws NoSuchFieldException If luceneindex property is not found at oProps
622: * @throws IllegalAccessException
623: * @throws InstantiationException
624: * @throws NullPointerException
625: * @deprecated Use add method from Indexer subclasses instead
626: */
627:
628: public static void add(String sTableName, Properties oProps,
629: String sGUID, int iNumber, String sWorkArea,
630: String sContainer, String sTitle, String sText,
631: String sAuthor, String sAbstract, String sComments)
632: throws ClassNotFoundException, IOException,
633: IllegalArgumentException, NoSuchFieldException,
634: IllegalAccessException, InstantiationException,
635: NullPointerException {
636:
637: if (null == sGUID)
638: throw new NullPointerException(
639: "Document GUID may not be null");
640:
641: if (!sTableName.equalsIgnoreCase("k_bugs")
642: && !sTableName.equalsIgnoreCase("k_newsmsgs")
643: && !sTableName.equalsIgnoreCase("k_mime_msgs"))
644: throw new IllegalArgumentException(
645: "Table name must be k_bugs or k_newsmsgs or k_mime_msgs");
646:
647: String sDirectory = oProps.getProperty("luceneindex");
648:
649: if (null == sDirectory)
650: throw new NoSuchFieldException(
651: "Cannot find luceneindex property");
652:
653: sDirectory = Gadgets.chomp(sDirectory, File.separator)
654: + sTableName.toLowerCase() + File.separator + sWorkArea;
655: File oDir = new File(sDirectory);
656: if (!oDir.exists()) {
657: FileSystem oFS = new FileSystem();
658: try {
659: oFS.mkdirs(sDirectory);
660: } catch (Exception e) {
661: throw new IOException(e.getClass().getName() + " "
662: + e.getMessage());
663: }
664: }
665:
666: Class oAnalyzer = Class.forName(oProps.getProperty("analyzer",
667: DEFAULT_ANALYZER));
668:
669: HashMap oKeys = new HashMap(11);
670: oKeys.put("workarea", sWorkArea == null ? "" : sWorkArea);
671: oKeys.put("container", sContainer == null ? "" : sContainer);
672: oKeys.put("guid", sGUID);
673: oKeys.put("number", String.valueOf(iNumber));
674: HashMap oTexts = new HashMap(11);
675: oTexts.put("title", sTitle == null ? "" : sTitle);
676: oTexts.put("author", sAuthor == null ? "" : sAuthor);
677: oTexts.put("abstract", sAbstract == null ? "" : Gadgets.left(
678: sAbstract, 80));
679: HashMap oUnstor = new HashMap(11);
680: oUnstor.put("comments", sComments == null ? "" : sComments);
681: oUnstor.put("text", sText == null ? "" : sText);
682:
683: IndexWriter oIWrt = new IndexWriter(sDirectory,
684: (Analyzer) oAnalyzer.newInstance(), true);
685: add(oIWrt, oKeys, oTexts, oUnstor);
686: oIWrt.close();
687: } // add
688:
689: // ---------------------------------------------------------------------------
690:
691: /**
692: * Delete a document with a given GUID
693: * @param sTableName k_bugs, k_newsmsgs or k_mime_msgs
694: * @param oProps Properties Collection containing luceneindex directory
695: * @param sGuid Document GUID
696: * @return Number of documents deleted
697: * @throws IllegalArgumentException If sTableName is not one of { k_bugs, k_newsmsgs, k_mime_msgs }
698: * @throws NoSuchFieldException If luceneindex property is not found at oProps
699: * @throws IllegalAccessException
700: * @throws IOException
701: * @throws NullPointerException If sGuid is <b>null</b>
702: */
703: public static int delete(String sTableName, String sWorkArea,
704: Properties oProps, String sGuid)
705: throws IllegalArgumentException, NoSuchFieldException,
706: IllegalAccessException, IOException, NullPointerException {
707:
708: if (null == sGuid)
709: throw new NullPointerException(
710: "Document GUID may not be null");
711:
712: if (!allowedTable(sTableName))
713: throw new IllegalArgumentException(
714: "Table name must be k_bugs or k_newsmsgs or k_mime_msgs");
715:
716: String sDirectory = oProps.getProperty("luceneindex");
717:
718: if (null == sDirectory)
719: throw new NoSuchFieldException(
720: "Cannot find luceneindex property");
721:
722: sDirectory = Gadgets.chomp(sDirectory, File.separator)
723: + sTableName.toLowerCase() + File.separator + sWorkArea;
724: File oDir = new File(sDirectory);
725: if (!oDir.exists()) {
726: FileSystem oFS = new FileSystem();
727: try {
728: oFS.mkdirs(sDirectory);
729: } catch (Exception e) {
730: throw new IOException(e.getClass().getName() + " "
731: + e.getMessage());
732: }
733: }
734:
735: IndexReader oReader = IndexReader.open(sDirectory);
736:
737: int iDeleted = oReader.delete(new Term("guid", sGuid));
738:
739: oReader.close();
740:
741: return iDeleted;
742: } // delete
743:
744: // ---------------------------------------------------------------------------
745:
746: private static void printUsage() {
747: System.out.println("");
748: System.out.println("Usage:");
749: System.out
750: .println("Indexer cnf_path rebuild {k_bugs|k_newsmsgs|k_mime_msgs}");
751: System.out
752: .println("cnf_path : Full path to hipergate.cnf file");
753: }
754:
755: // ---------------------------------------------------------------------------
756:
757: /**
758: * <p>Static method for calling indexer from the command line</p>
759: * @param argv String[] Must have two arguments, the first one is the full path
760: * to hipergate.cnf or other properties file containing database connection parameters.<br>
761: * The second argument must be "rebuild".<br>
762: * The third argument is one of {k_bugs|k_newsmsgs|k_mime_msgs} indicating which table index is to be rebuilt.<br>
763: * Command line example: java -cp ... com.knowgate.lucene.Indexer /etc/hipergate.cnf rebuild k_mime_msgs
764: * @throws SQLException
765: * @throws IOException
766: * @throws ClassNotFoundException
767: * @throws IllegalArgumentException
768: * @throws NoSuchFieldException
769: * @throws IllegalAccessException
770: * @throws InstantiationException
771: */
772: public static void main(String[] argv) throws SQLException,
773: IOException, ClassNotFoundException,
774: IllegalArgumentException, NoSuchFieldException,
775: IllegalAccessException, InstantiationException {
776:
777: if (argv.length != 3)
778: printUsage();
779: else if (!argv[1].equals("rebuild")) {
780: printUsage();
781: } else if (!allowedTable(argv[2])) {
782: printUsage();
783: } else {
784: Properties oProps = new Properties();
785: FileInputStream oCNF = new FileInputStream(argv[0]);
786: oProps.load(oCNF);
787: oCNF.close();
788: rebuild(oProps, argv[2]);
789: }
790: } // main
791: }
|