001: package org.enhydra.snapperAdmin.business;
002:
003: import java.io.File;
004: import java.io.FileInputStream;
005:
006: import java.sql.Timestamp;
007: import java.util.HashMap;
008: import java.util.Iterator;
009: import java.util.Map;
010: import java.util.Properties;
011: import java.util.Vector;
012:
013: import org.enhydra.snapper.SnapperManager;
014: import org.enhydra.snapper.api.Reader;
015: import org.enhydra.snapperAdmin.SnapperAdmin;
016: import org.enhydra.snapperAdmin.spec.FileType;
017: import org.enhydra.snapperAdmin.spec.Index;
018: import org.enhydra.snapperAdmin.spec.Indexed;
019: import org.enhydra.snapperAdmin.spec.IndexedFactory;
020: import org.enhydra.snapperAdmin.spec.IndexerBaseSpec;
021: import org.enhydra.snapperAdmin.spec.IndexerBaseSpecFactory;
022: import org.enhydra.snapperAdmin.spec.Path;
023: import org.enhydra.snapperAdmin.spec.ReaderFactory;
024: import org.enhydra.snapperAdmin.spec.Site;
025: import org.enhydra.snapperAdmin.spec.SiteList;
026: import org.enhydra.snapperAdmin.spec.SiteListFactory;
027:
028: import com.lutris.logging.Logger;
029: import com.lutris.util.Config;
030: import com.lutris.util.ConfigFile;
031:
032: /**
033: * @author Igor Smirnov IndexImpl is _the_ thread that does all the indexing
034: */
035:
036: public class IndexImpl implements Index, Runnable {
037:
038: String siteOID;
039: Site site = null;
040:
041: Path[] pathArray = null;
042:
043: IndexerBaseSpec indexerBase = null;
044: String indexdir = "";
045: long startTime = 0;
046:
047: private boolean doStop, indexed;
048:
049: public void setDoStop(boolean n) throws Exception {
050: doStop = n;
051: indexerBase.setDoStop(n);
052: SnapperManager.getInstance().removeThread(siteOID);
053: indexerBase = null;
054:
055: }
056:
057: public void run() {
058: try {
059: Timestamp sts = new Timestamp(System.currentTimeMillis());
060: Properties prop = new Properties();
061: Indexed in = null;
062: // Site to index
063: try {
064: in = IndexedFactory
065: .getIndexed("org.enhydra.snapperAdmin.business.IndexedImpl");
066: SiteList sl = SiteListFactory
067: .getSiteList("org.enhydra.snapperAdmin.business.SiteListImpl");
068: site = sl.findSiteByName(siteOID);
069:
070: if (site == null) {
071: SnapperAdmin.logWarrning("Error getting site : "
072: + siteOID + " aborting index process !!!");
073: return;
074: }
075:
076: SnapperAdmin
077: .logInfo("Setting indexer for indexing site "
078: + siteOID);
079: } catch (Exception ex) {
080: SnapperAdmin
081: .logError("IndexImpl - Could not findSiteByID = "
082: + siteOID);
083: throw ex;
084: }
085:
086: // Paths for siteOID, pathTypes
087: try {
088: pathArray = site.getPathList();
089: } catch (Exception ex) {
090: SnapperAdmin
091: .logError("Error getting paths or pathType for site "
092: + siteOID);
093: pathArray = null;
094: throw ex;
095: }
096:
097: try {
098: boolean locked = false;
099: int age = 0;
100: int size = 0;
101: boolean indexContent = true;
102: boolean indexUnknownFileTypes = false;
103: boolean indexDirectory = false;
104:
105: try {
106: if (site.getDocumentGroup() != null) {
107: Vector listDocGrop = (Vector) site
108: .getDocumentGroup()
109: .getListOfDocumentGroup();
110:
111: if (listDocGrop != null) {
112: for (int j = 0; j < listDocGrop.size(); j++) {
113: FileType[] listPT = site
114: .getDocumentGroup()
115: .getListOfFILETYPES(
116: (String) listDocGrop
117: .elementAt(j));
118: if (listPT != null) {
119: for (int k = 0; k < listPT.length; k++) {
120: if (!prop.containsKey(listPT[k]
121: .getEXTENSION()))
122: prop
123: .setProperty(
124: listPT[k]
125: .getEXTENSION(),
126: listPT[k]
127: .getPARSER());
128: }
129: }
130:
131: }
132: }
133:
134: }
135:
136: } catch (Exception e) {
137: SnapperAdmin
138: .logError("Error getting site parameters! message :"
139: + e.getMessage());
140: }
141:
142: try {
143:
144: Map threads = new HashMap();
145: threads = SnapperManager.getInstance().getThreads();
146:
147: for (Iterator iterator = threads.entrySet()
148: .iterator(); iterator.hasNext();) {
149: Map.Entry entry = (Map.Entry) iterator.next();
150: String key = (String) entry.getKey();
151:
152: if (siteOID.equals(key)) {
153: locked = true;
154: }
155: }
156:
157: age = site.getMAXAGE();
158: size = site.getMAXSIZE();
159: indexContent = site.getINDEXCONTENT();
160: indexUnknownFileTypes = site
161: .getINDEXUNKNOWNFILETYPES();
162: indexDirectory = site.getINDEXDIRECTORY();
163: } catch (Exception e) {
164: SnapperAdmin
165: .logError("Error getting site parameters! message :"
166: + e.getMessage());
167: throw e;
168: }
169:
170: if (!SnapperAdmin.getIndexDiference()) {
171: if (locked) {
172: indexerBase = null;
173: SnapperAdmin
174: .logInfo("Site locked! Indexing Aborted.");
175: return;
176: }
177: }
178:
179: String indexer = SnapperAdmin.getIdexerClassName();
180: indexerBase = IndexerBaseSpecFactory
181: .getIndexerBase("org.enhydra.snapperAdmin.business.IndexerBase");
182: indexerBase.initChecker(age, size, site);
183: indexerBase.setIndexContents(indexContent);
184: indexerBase
185: .setIndexUnknownFileTypes(indexUnknownFileTypes);
186: indexerBase.setIndexFileTypes(prop);
187: indexerBase.setIndexDirectory(indexDirectory);
188: indexerBase.setIndexDiference(SnapperAdmin
189: .getIndexDiference());
190: indexerBase.setLogFileName(site.getNAME() + "_INDEX_"
191: + sts.getTime());
192: boolean ifIndexDiferenceFromBegining = false;
193:
194: //ako fajl nepostoji krecemo index u re-index modu
195:
196: if (site.getINCLUDEDB() != null
197: && site.getINCLUDEDB().length() != 0
198: && site.getINCLUDETABLE() != null
199: && site.getINCLUDETABLE().length() != 0
200: && site.getINCLUDECOLUMN() != null
201: && site.getINCLUDECOLUMN().length() != 0
202: && site.getINCLUDECOLUMNMODIFIED() != null
203: && site.getINCLUDECOLUMNMODIFIED().length() != 0) {
204:
205: if (site.getINDEXDIR().length() == 0) {
206: indexdir = SnapperAdmin.getIndexDir()
207: + File.separator + site.getNAME();
208: } else {
209: indexdir = site.getINDEXDIR();
210: }
211:
212: File includeFile = new File(indexdir, "include.txt");
213:
214: if (!includeFile.exists()) {
215: ifIndexDiferenceFromBegining = true;
216: SnapperAdmin
217: .logInfo("Starting Index in Re-Index mode");
218:
219: }
220: }
221:
222: if (!ifIndexDiferenceFromBegining) {
223: if (site.getINDEXDIR().length() == 0) {
224: indexdir = SnapperAdmin.getIndexDir()
225: + File.separator + site.getNAME();
226: } else {
227: indexdir = site.getINDEXDIR();
228: }
229:
230: File pathFile = new File(indexdir, "path.txt");
231:
232: if (!pathFile.exists()) {
233: ifIndexDiferenceFromBegining = true;
234: SnapperAdmin
235: .logInfo("Starting Index in Re-Index mode");
236: }
237: }
238:
239: try {
240: if (site.getINDEXDIR().length() == 0) {
241: indexdir = SnapperAdmin.getIndexDir()
242: + File.separator + site.getNAME();
243: SnapperAdmin
244: .logWarrning("Index directory isn't set! -> Using default");
245: } else {
246: indexdir = site.getINDEXDIR();
247: }
248: SnapperAdmin.logInfo("Index directory - "
249: + indexdir);
250:
251: if (SnapperAdmin.getIndexDiference()
252: && !ifIndexDiferenceFromBegining) {
253: SnapperAdmin
254: .logInfo("Starting Re-Index in Re-Index mode");
255: indexerBase.setUpIndexer(true, indexdir, site
256: .getLANGUAGE(), false, indexer);
257: } else {
258: long temp1 = in.readTimeFile(indexdir);
259: Timestamp temp = new Timestamp(temp1);
260: if (temp1 == 0)
261: temp = sts;
262: site.setStartTime(temp);
263: in.createTimeFile(site.getINDEXDIR(), sts);
264: indexerBase.setUpIndexer(true, indexdir, site
265: .getLANGUAGE(), true, indexer);
266: }
267:
268: } catch (Exception e) {
269: SnapperAdmin
270: .logError("Error set up index , message : "
271: + e.getMessage());
272: SnapperAdmin.logError("Cause: " + e.toString());
273: throw e;
274: }
275:
276: SnapperAdmin.logInfo(site.getNAME()
277: + " parameters: age=" + age + ", size=" + size);
278: SnapperAdmin.logInfo("\t indexContent=" + indexContent);
279: SnapperAdmin.logInfo("Indexing site: " + site.getNAME()
280: + " started");
281:
282: try {
283: if (site.getFILTERDB() != null
284: && site.getFILTERTABLE() != null
285: && site.getFILTERCOLUMN() != null
286: && site.getFILTERDB().length() != 0
287: && site.getFILTERTABLE().length() != 0
288: && site.getFILTERCOLUMN().length() != 0) {
289: indexerBase.setExclude(site.getFILTERDB(), site
290: .getFILTERTABLE(), site
291: .getFILTERCOLUMN());
292:
293: SnapperAdmin.logInfo("Using\n\tfilterDB->"
294: + site.getFILTERDB()
295: + "\n\tfilterTable->"
296: + site.getFILTERTABLE()
297: + "\n\t filterColumn->"
298: + site.getFILTERCOLUMN());
299: }
300: } catch (Exception e) {
301: SnapperAdmin
302: .logError("Error setting site filter list");
303: SnapperAdmin.logError("Cause: " + e.toString());
304: }
305:
306: try {
307: if (site.getMETADB() != null
308: && site.getMETADB().length() != 0
309: && site.getMETATABLE() != null
310: && site.getMETATABLE().length() != 0
311: && site.getMETAFILE() != null
312: && site.getMETAFILE().length() != 0
313: && site.getMETAKEY() != null
314: && site.getMETAKEY().length() != 0
315: && site.getMETAVALUE() != null
316: && site.getMETAVALUE().length() != 0) {
317: indexerBase.setMeta(site.getMETADB(), site
318: .getMETATABLE(), site.getMETAFILE(),
319: site.getMETAKEY(), site.getMETAVALUE());
320:
321: SnapperAdmin.logInfo("Using\n\tmetaDB->"
322: + site.getMETADB() + "\n\tmetaTable->"
323: + site.getMETATABLE()
324: + "\n\tmetaFile->" + site.getMETAFILE()
325: + "\n\tmetaKey->" + site.getMETAKEY()
326: + "\n\tmetaValue->"
327: + site.getMETAVALUE());
328: }
329: } catch (Exception e) {
330: SnapperAdmin.logError("Error setting meta data");
331: SnapperAdmin.logError("Cause: " + e.toString());
332: }
333:
334: try {
335: if (site.getINCLUDEDB() != null
336: && site.getINCLUDEDB().length() != 0
337: && site.getINCLUDETABLE() != null
338: && site.getINCLUDETABLE().length() != 0
339: && site.getINCLUDECOLUMN() != null
340: && site.getINCLUDECOLUMN().length() != 0
341: && site.getINCLUDECOLUMNMODIFIED() != null
342: && site.getINCLUDECOLUMNMODIFIED().length() != 0) {
343:
344: indexerBase.setInclude(site.getINCLUDEDB(),
345: site.getINCLUDETABLE(), site
346: .getINCLUDECOLUMN(), site
347: .getINCLUDECOLUMNMODIFIED());
348:
349: SnapperAdmin.logInfo("Using\n\tincludeDB->"
350: + site.getINCLUDEDB()
351: + "\n\tincludeTable->"
352: + site.getINCLUDETABLE()
353: + "\n\tincludeColumn->"
354: + site.getINCLUDECOLUMN()
355: + "\n\tincludeColumnModified->"
356: + site.getINCLUDECOLUMNMODIFIED());
357:
358: indexerBase.indexInclude();
359: // indexerBase.close();
360: // indexerBase = null;
361: indexed = true;
362: // return;
363:
364: }
365: } catch (Exception e) {
366: SnapperAdmin
367: .logError("Error indexing include list");
368: SnapperAdmin.logError("Cause: " + e.toString());
369: throw e;
370: }
371: } catch (Exception ex) {
372: }
373:
374: // Index
375: if (!indexed && pathArray != null) {
376:
377: for (int i = 0; i < pathArray.length; i++) {
378:
379: if (doStop) {
380: SnapperAdmin.logWarrning("Indexing stopped");
381: return;
382: }
383: if (pathArray[i] == null)
384: continue;
385:
386: try {
387: if (pathArray[i].getType().equals("FileSystem")) {
388: indexerBase.indexDocs(site.getNAME(), "",
389: "", pathArray[i].getRoot(),
390: "FileSystem", "", "", true);
391: if (doStop) {
392: SnapperAdmin
393: .logWarrning("Indexing stopped");
394: return;
395: }
396: }
397: if (pathArray[i].getType().equals("FTP")) {
398: indexerBase.indexDocs(site.getNAME(),
399: pathArray[i].getHost(), "",
400: pathArray[i].getRoot(), "FTP",
401: pathArray[i].getUser(),
402: pathArray[i].getPass(), true);
403: if (doStop) {
404: SnapperAdmin
405: .logWarrning("Indexing stopped");
406: return;
407: }
408: }
409: if (pathArray[i].getType().equals("UNC")) {
410: indexerBase.indexDocs(site.getNAME(),
411: pathArray[i].getHost(), "",
412: pathArray[i].getRoot(), "UNC",
413: pathArray[i].getUser(),
414: pathArray[i].getPass(), true);
415: if (doStop) {
416: SnapperAdmin
417: .logWarrning("Indexing stopped");
418: return;
419: }
420: }
421:
422: if (pathArray[i].getType().equals("WebDAV")) {
423: indexerBase.indexDocs(site.getNAME(),
424: pathArray[i].getHost(), "",
425: pathArray[i].getRoot(), "WebDAV",
426: pathArray[i].getUser(),
427: pathArray[i].getPass(), true);
428: if (doStop) {
429: SnapperAdmin
430: .logWarrning("Indexing stopped");
431: return;
432: }
433: }
434:
435: } catch (Exception e) {
436: SnapperAdmin.logError("Error indexing path "
437: + pathArray[i].getRoot());
438: SnapperAdmin.logError("Cause: " + e.toString());
439: }
440:
441: }
442: if (!(indexerBase.getIndexSizeReached())) {
443: File include = new File(indexdir, "path.txt");
444:
445: if (include.exists())
446: include.delete();
447: }
448: indexed = true;
449:
450: }
451:
452: if (prop.containsKey("NULL") && site.getMETADB() != null
453: && site.getMETADB().length() != 0
454: && site.getMETATABLE() != null
455: && site.getMETATABLE().length() != 0
456: && site.getMETAFILE() != null
457: && site.getMETAFILE().length() != 0
458: && site.getMETAKEY() != null
459: && site.getMETAKEY().length() != 0
460: && site.getMETAVALUE() != null
461: && site.getMETAVALUE().length() != 0) {
462: try {
463: indexerBase.indexMetaData();
464: } catch (Exception e) {
465: SnapperAdmin.logError("Error indexing Meta Data ");
466: SnapperAdmin.logError("Cause: " + e.toString());
467: }
468: }
469:
470: if (SnapperAdmin.getOptimizeOnIndex()) {
471: SnapperAdmin.logInfo("Optimizing index: ");
472: indexerBase.optimize();
473: SnapperAdmin.logInfo("Index optimized ");
474: } else
475: SnapperAdmin.logInfo("Not performing optimization!!!");
476:
477: Timestamp ts = new Timestamp(System.currentTimeMillis());
478: Vector vni = new Vector();
479: //Indexed in = null;
480: Reader reader = null;
481:
482: try {
483: reader = ReaderFactory
484: .createReader("org.enhydra.snapper.wrapper.lucene.LuceneReader");
485: reader.setUpReader(indexdir);
486: } catch (Exception e) {
487: SnapperAdmin
488: .logError("Error setting up reader! Document number may be incorrect!, message : "
489: + e.getMessage());
490: }
491:
492: SnapperAdmin.logInfo("Indexing site: " + site.getNAME()
493: + " finished");
494:
495: try {
496: in = IndexedFactory
497: .getIndexed("org.enhydra.snapperAdmin.business.IndexedImpl");
498: in.setName(site.getNAME());
499: in.setLASTSTART(sts.getTime());
500: in.setLASTTYPE("INDEX");
501: in.setTime(ts.getTime());
502: in.setDoc(String.valueOf(reader.getSize()));
503: in.save();
504:
505: indexerBase.close();
506: indexerBase = null;
507:
508: vni.removeAllElements();
509: vni = null;
510: } catch (Exception e) {
511: SnapperAdmin
512: .logError("Error writing index log! , message : "
513: + e.getMessage());
514: } finally {
515: if (vni != null) {
516: vni.removeAllElements();
517: vni = null;
518: }
519: if (in != null) {
520: in = null;
521: }
522: }
523:
524: if (indexerBase != null) {
525: indexerBase = null;
526: }
527:
528: if (site != null) {
529: site = null;
530: }
531:
532: try {
533: if (reader != null) {
534: reader.closeReader();
535: reader = null;
536: }
537: } catch (Exception e) {
538: e.printStackTrace();
539: } finally {
540: if (reader != null) {
541: reader = null;
542: }
543: }
544: } catch (Exception e) {
545: SnapperAdmin.logError("Error indexing site");
546: e.printStackTrace();
547: } finally {
548:
549: try {
550: String path = SnapperManager.getInstance().getTempDir()
551: + File.separator
552: + Thread.currentThread().getName();
553: File siteTempDir = new File(path);
554: FinalDeleter.deleteDir(siteTempDir);
555: } catch (Exception e) {
556: SnapperAdmin
557: .logError("Problem occured while delete Snapper Temp directory");
558: }
559:
560: try {
561: SnapperManager.getInstance().removeThread(siteOID);
562: SnapperAdmin.logInfo("Thread unregistered");
563: } catch (Exception e) {
564: SnapperAdmin.logError("Threads waren't unregistered");
565: SnapperAdmin.logError("Cause: " + e.toString());
566: }
567: }
568: }
569:
570: public void stop() {
571: }
572:
573: public IndexImpl() {
574: }
575:
576: public void index(String siteOID) {
577: this .siteOID = siteOID;
578: }
579:
580: public static void main(String[] args) {
581:
582: String confFilePath = args[0];
583: String catalianBase = args[1];
584: String siteName = args[2];
585:
586: File confFile = new File(confFilePath);
587: if (confFile.exists()) {
588: FileInputStream fis = null;
589:
590: try {
591: fis = new FileInputStream(confFile);
592: ConfigFile config = new ConfigFile(fis);
593: config.setFile(confFile);
594:
595: Config appConfig = config.getConfig();
596: appConfig.setConfigFile(config);
597: SnapperAdmin sa = new SnapperAdmin();
598:
599: String logerClassName = appConfig
600: .getString("LogClassName");
601: Logger logger = null;
602:
603: try {
604: logger = (Logger) Class.forName(logerClassName)
605: .getConstructor(
606: new Class[] { Boolean.TYPE })
607: .newInstance(
608: new Object[] { new Boolean(true) });
609: } catch (Exception e) {
610: e.printStackTrace();
611: }
612:
613: System.setProperty("catalina.base", catalianBase);
614: logger.configure(appConfig);
615:
616: String appClass = "org.enhydra.snapperAdmin.SnapperAdmin";
617:
618: sa.setLogChannel(logger.getChannel(appClass));
619: sa.startup(appConfig);
620: } catch (Exception e) {
621: e.printStackTrace();
622: } finally {
623: if (fis != null) {
624: try {
625: fis.close();
626: } catch (Exception e) {
627: }
628: fis = null;
629: }
630: }
631: } else
632: System.exit(0);
633:
634: try {
635: IndexImpl indexer = new IndexImpl();
636: indexer.index(siteName);
637: new Thread(indexer).start();
638: } catch (Exception e) {
639: e.printStackTrace();
640: }
641: }
642:
643: public void setStartTime(long time) {
644:
645: }
646:
647: }
|