0001: package org.enhydra.snapperAdmin.business;
0002:
0003: import java.io.BufferedOutputStream;
0004: import java.io.BufferedReader;
0005: import java.io.BufferedWriter;
0006: import java.io.File;
0007: import java.io.FileInputStream;
0008: import java.io.FileOutputStream;
0009: import java.io.IOException;
0010: import java.io.InputStreamReader;
0011: import java.io.OutputStreamWriter;
0012:
0013: import java.sql.Connection;
0014: import java.sql.PreparedStatement;
0015: import java.sql.ResultSet;
0016: import java.sql.SQLException;
0017: import java.text.DateFormat;
0018: import java.text.SimpleDateFormat;
0019: import java.util.ArrayList;
0020: import java.util.Date;
0021: import java.util.Iterator;
0022: import java.util.List;
0023: import java.util.Properties;
0024: import java.util.TimeZone;
0025: import java.util.TreeSet;
0026: import java.util.Vector;
0027:
0028: import javax.naming.Context;
0029: import javax.naming.InitialContext;
0030: import javax.sql.DataSource;
0031:
0032: import org.enhydra.snapper.SnapperManager;
0033: import org.enhydra.snapper.api.Indexer;
0034: import org.enhydra.snapper.api.Reader;
0035: import org.enhydra.snapper.linux.LinuxFile;
0036: import org.enhydra.snapper.parsers.fileparsers.FileParserException;
0037: import org.enhydra.snapper.parsers.fileparsers.FileParserMaster;
0038: import org.enhydra.snapper.parsers.fileparsers.utils.ParserDataConstants;
0039: import org.enhydra.snapper.win.Win32File;
0040: import org.enhydra.snapperAdmin.SnapperAdmin;
0041: import org.enhydra.snapperAdmin.business.BusinessUtil;
0042: import org.enhydra.snapperAdmin.business.DocumentStore;
0043: import org.enhydra.snapperAdmin.business.FileChecker;
0044: import org.enhydra.snapperAdmin.spec.IndexerBaseSpec;
0045: import org.enhydra.snapperAdmin.spec.ReaderFactory;
0046: import org.enhydra.snapperAdmin.spec.Site;
0047:
0048: import com.lutris.logging.Logger;
0049:
0050: public class IndexerBase implements IndexerBaseSpec {
0051:
0052: FileChecker fc;
0053:
0054: FileParserMaster parser;
0055:
0056: Indexer indexer;
0057:
0058: File indexdir;
0059:
0060: long age, size;
0061:
0062: String[] types;
0063:
0064: String storageType, sitename;
0065:
0066: Vector filtered = new Vector();
0067:
0068: TreeSet included = new TreeSet();
0069:
0070: boolean meta = false, filter = false, include = false;
0071:
0072: String metaDB = "", metaTable = "", metaFileColumn = "", metaKeyColumn = "", metaValueColumn = ""; //$NON-NLS-5$
0073:
0074: String includeDB = "", includeTable = "", includeFileColumn = "",
0075: includeModifiedColumn = "";
0076:
0077: String excludeDB = "", excludeTable = "", excludeFileColumn = "";
0078:
0079: List allfiles = new ArrayList();
0080:
0081: private boolean doStop;
0082:
0083: private String title = "";
0084:
0085: private String originalHost = "";
0086:
0087: private boolean indexContent = true;
0088:
0089: private boolean indexUnknownFileTypes = false;
0090:
0091: private boolean indexDirectory = false;
0092:
0093: private Properties prop = new Properties();
0094:
0095: private String siteName = "";
0096:
0097: private boolean printStackTrace = SnapperAdmin.printStackTrace();
0098: private boolean indexSizeReached = false;
0099:
0100: private long currentFile = 1;
0101:
0102: public IndexerBase() {
0103: }
0104:
0105: public void setUpIndexer(boolean parserAfterMetadata,
0106: String siteName, String language, boolean create,
0107: String classname) {
0108:
0109: try {
0110: this .siteName = siteName;
0111: if (filter) {
0112: filtered = getFiltered();
0113: }
0114:
0115: if (include) {
0116: included = getIncluded();
0117: }
0118:
0119: indexer = SnapperManager.getInstance().getIndexerFactory()
0120: .newIndexer();
0121: indexdir = new File(siteName);
0122: indexer.setUpIndexer(siteName, language, create,
0123: SnapperAdmin.getmaxFieldLength(), SnapperAdmin
0124: .getIndexOSspecific());
0125: sitename = siteName;
0126:
0127: } catch (Exception e) {
0128: SnapperAdmin.logError("Could not setup Indexer, message : "
0129: + e.getMessage());
0130: if (printStackTrace) {
0131: e.printStackTrace();
0132: }
0133: }
0134: }
0135:
0136: public void initChecker(int age, int size, Site site) {
0137: try {
0138: fc = new FileChecker(age, size, site);
0139: } catch (Exception e) {
0140: SnapperAdmin
0141: .logError("Could not setup FileChecker, message : "
0142: + e.getMessage());
0143: if (printStackTrace) {
0144: e.printStackTrace();
0145: }
0146: }
0147: }
0148:
0149: public void optimize() {
0150: try {
0151: this .indexer.optimize();
0152: } catch (Exception e) {
0153: SnapperAdmin
0154: .logError("Could not optimize Indexer, message : "
0155: + e.getMessage());
0156: if (printStackTrace) {
0157: e.printStackTrace();
0158: }
0159: }
0160: }
0161:
0162: public void close() {
0163: try {
0164: this .indexer.close();
0165: this .indexer = null;
0166: SnapperManager.getInstance().getIndexerFactory()
0167: .removeIndexer();
0168: } catch (Exception e) {
0169: SnapperAdmin.logError("Could not close Indexer, message : "
0170: + e.getMessage());
0171: if (printStackTrace) {
0172: e.printStackTrace();
0173: }
0174: }
0175: }
0176:
0177: public boolean getIndexSizeReached() {
0178: return indexSizeReached;
0179: }
0180:
0181: public void indexDocs(File originalFile) {
0182: if (!indexSizeReached) {
0183: String metadata = "";
0184: title = "";
0185:
0186: try {
0187: originalFile = new File(originalFile.getCanonicalPath());
0188: } catch (IOException e2) {
0189: // TODO Auto-generated catch block
0190: if (printStackTrace) {
0191: e2.printStackTrace();
0192: }
0193: }
0194: if (isFiltered(new String(originalFile.getPath())))
0195: return;
0196:
0197: if (this .meta) {
0198: try {
0199: BusinessUtil bu = new BusinessUtil();
0200: metadata = new String(bu.getMetadata(originalFile
0201: .getPath(), metaDB, metaTable,
0202: metaFileColumn, metaKeyColumn,
0203: metaValueColumn));
0204: String ln = SnapperManager.getInstance()
0205: .getLogicalNameFromDatabase();
0206: if (ln.equals("1"))
0207: title = new String(bu.title);
0208: bu.gc();
0209: bu = null;
0210: } catch (Exception ex) {
0211: SnapperAdmin.logDebug("Not metadata for file: "
0212: + originalFile.getPath() + ", message : "
0213: + ex.getMessage());
0214: if (printStackTrace) {
0215: ex.printStackTrace();
0216: }
0217: }
0218: }
0219:
0220: if (currentFile > SnapperAdmin.getmaxIndexLength()) {
0221: doStop = true;
0222: indexSizeReached = true;
0223: SnapperAdmin
0224: .logInfo("Document number limit reached!!!Terminating index process!!!");
0225: return;
0226: }
0227:
0228: parser = new FileParserMaster();
0229: parser.setIndexDirectory(indexDirectory);
0230: parser.setIndexUnknownFileTypes(indexUnknownFileTypes);
0231: parser.setLimit(SnapperManager.getInstance()
0232: .getParserLimitProperties());
0233: parser.setProp(prop);
0234: parser.setConversionTypes(SnapperAdmin
0235: .getConverterClassNames());
0236: parser.setIndexContent(indexContent);
0237: parser.setFileSizeLimit(SnapperAdmin.getMaxFileSize());
0238: parser.setTimeLimit(SnapperAdmin.getTimeLimit());
0239:
0240: try {
0241: parser.setWord2007HTMLTransformer(SnapperAdmin
0242: .getWord2007HTMLTemplates().newTransformer());
0243: parser.setWord2007TextTransformer(SnapperAdmin
0244: .getWord2007TextTemplates().newTransformer());
0245: parser.setExcel2007HTMLTransformer(SnapperAdmin
0246: .getExcel2007HTMLTemplates().newTransformer());
0247: parser.setExcel2007TextTransformer(SnapperAdmin
0248: .getExcel2007TextTemplates().newTransformer());
0249: parser.setPowerpoint2007HTMLTransformer(SnapperAdmin
0250: .getPowerpoint2007HTMLTemplates()
0251: .newTransformer());
0252: parser.setPowerpoint2007TextTransformer(SnapperAdmin
0253: .getPowerpoint2007TextTemplates()
0254: .newTransformer());
0255: } catch (Exception e) {
0256: SnapperAdmin
0257: .logError("Unable to create Office 2007 transformers!!!");
0258: }
0259:
0260: if (SnapperAdmin.toSaveConvertedFile()) {
0261: parser.setConversionPath(SnapperAdmin
0262: .pathOfConvertedFiles());
0263: }
0264:
0265: Vector parsedData;
0266:
0267: if (SnapperAdmin.log.isEnabled(Logger.DEBUG)) {
0268: SnapperAdmin.logDebug("==================== adding : "
0269: + originalFile.getPath() + " free memory = "
0270: + (Runtime.getRuntime().freeMemory() / 1024)
0271: / 1024 + " MB" + " total memory = "
0272: + (Runtime.getRuntime().totalMemory() / 1024)
0273: / 1024 + " MB");
0274: }
0275:
0276: String owner = "uknown";
0277:
0278: Long lastModified = new Long(originalFile.lastModified());
0279: Long creationDataLong = lastModified;
0280: Long accessDataLong = lastModified;
0281:
0282: if (SnapperAdmin.getIndexOSspecific()) {
0283: String osName = System.getProperty("os.name");
0284: if (osName.startsWith("Windows")) {
0285: if (Win32File.ready) {
0286: try {
0287: long creationDateTime = Win32File
0288: .getCreationDate(originalFile
0289: .getAbsolutePath());
0290: long accessDateTime = Win32File
0291: .getAccessDate(originalFile
0292: .getAbsolutePath());
0293: owner = Win32File.getFileOwner(originalFile
0294: .getAbsolutePath());
0295: if (creationDateTime > 0)
0296: creationDataLong = new Long(
0297: creationDateTime);
0298:
0299: if (accessDateTime > 0) {
0300: if (accessDateTime < creationDateTime)
0301: accessDataLong = new Long(
0302: creationDateTime);
0303: else
0304: accessDataLong = new Long(
0305: accessDateTime);
0306: }
0307:
0308: } catch (Exception e) {
0309: SnapperAdmin
0310: .logError(" OS Specific problem, message : "
0311: + e.getMessage());
0312: if (printStackTrace) {
0313: e.printStackTrace();
0314: }
0315: } catch (Throwable e) {
0316: SnapperAdmin
0317: .logError(" OS Specific problem, message : "
0318: + e.getMessage());
0319: if (printStackTrace) {
0320: e.printStackTrace();
0321: }
0322: }
0323: }
0324: } else {
0325: if (LinuxFile.ready) {
0326: try {
0327: long creationDateTime = LinuxFile
0328: .getCreationDate(originalFile
0329: .getAbsolutePath());
0330: long accessDateTime = LinuxFile
0331: .getAccessDate(originalFile
0332: .getAbsolutePath());
0333: owner = LinuxFile.getFileOwner(originalFile
0334: .getAbsolutePath());
0335: if (creationDateTime > 0)
0336: creationDataLong = new Long(
0337: creationDateTime);
0338:
0339: if (accessDateTime > 0)
0340: accessDataLong = new Long(
0341: accessDateTime);
0342:
0343: } catch (Exception e) {
0344: SnapperAdmin
0345: .logError(" OS Specific problem, message : "
0346: + e.getMessage());
0347: if (printStackTrace) {
0348: e.printStackTrace();
0349: }
0350: } catch (Throwable e) {
0351: SnapperAdmin
0352: .logError(" OS Specific problem, message : "
0353: + e.getMessage());
0354: if (printStackTrace) {
0355: e.printStackTrace();
0356: }
0357: }
0358: }
0359: }
0360: }
0361: SnapperAdmin.log.write(Logger.INFO, "Indexing file: "
0362: + originalFile.getName());
0363: if (!indexContent) {
0364: parsedData = new Vector();
0365: parsedData.add(ParserDataConstants.FILE_PATH_POSITION,
0366: originalFile.getPath());
0367: parsedData.add(ParserDataConstants.FILE_TYPE_POSITION,
0368: "");
0369: parsedData.add(ParserDataConstants.TITLE_POSITION,
0370: originalFile.getName());
0371: parsedData
0372: .add(
0373: ParserDataConstants.LAST_MODIFIED_DATE_POSITION,
0374: new Long(originalFile.lastModified()));
0375: parsedData.add(ParserDataConstants.PROPERTIES_POSITION,
0376: "");
0377: parsedData
0378: .add(ParserDataConstants.CONTENT_POSITION, "");
0379: parsedData.add(ParserDataConstants.SIZE_POSITION,
0380: new Long(originalFile.length()));
0381:
0382: parsedData.add(
0383: ParserDataConstants.CONVERTED_CONTENT_POSITION,
0384: null);
0385: parsedData
0386: .add(
0387: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION,
0388: null);
0389:
0390: parsedData.add(
0391: ParserDataConstants.MAIL_MESSAGE_SENT_DATE,
0392: null);
0393: parsedData.add(
0394: ParserDataConstants.MAIL_MESSAGE_RECEIVED_DATE,
0395: null);
0396: parsedData.add(
0397: ParserDataConstants.MAIL_MESSAGE_SUBJECT, null);
0398: parsedData.add(ParserDataConstants.MAIL_MESSAGE_FROM,
0399: null);
0400: parsedData.add(ParserDataConstants.MAIL_MESSAGE_TO,
0401: null);
0402: parsedData.add(ParserDataConstants.MAIL_MESSAGE_CC,
0403: null);
0404: parsedData.add(ParserDataConstants.MAIL_MESSAGE_BCC,
0405: null);
0406:
0407: parsedData.add(ParserDataConstants.AUTHOR, null);
0408: parsedData.add(ParserDataConstants.LAST_SAVED_BY, null);
0409: } else {
0410:
0411: try {
0412: parser.setSource(0);
0413: if (SnapperAdmin.toSaveConvertedFile())
0414: parser.setToSaveConvertedFile(true);
0415: if (SnapperAdmin.toSaveConvertedExcel())
0416: parser.setToSaveConvertedExcel(true);
0417: if (SnapperAdmin.toSaveConvertedWord())
0418: parser.setToSaveConvertedWord(true);
0419: if (SnapperAdmin.toSaveConvertedPowerPoint())
0420: parser.setToSaveConvertedPowerPoint(true);
0421: if (SnapperAdmin.toSaveConvertedExcel2007())
0422: parser.setToSaveConvertedExcel2007(true);
0423: if (SnapperAdmin.toSaveConvertedWord2007())
0424: parser.setToSaveConvertedWord2007(true);
0425: if (SnapperAdmin.toSaveConvertedPowerPoint2007())
0426: parser.setToSaveConvertedPowerPoint2007(true);
0427: parsedData = parser.parse(originalFile,
0428: SnapperManager.getInstance().getTempDir());
0429: } catch (FileParserException e) {
0430: SnapperAdmin.logDebug("File : "
0431: + originalFile.getPath()
0432: + " could not be parsed , message : "
0433: + e.getMessage());
0434: if (printStackTrace) {
0435: e.printStackTrace();
0436: }
0437: writeNotIndexedDocumentToFile(originalFile
0438: .getPath());
0439: parser.close();
0440: return;
0441: } catch (Throwable e) {
0442: SnapperAdmin.logDebug("File : "
0443: + originalFile.getName()
0444: + " could not be parsed , message : "
0445: + e.getMessage());
0446: if (printStackTrace) {
0447: e.printStackTrace();
0448: }
0449: writeNotIndexedDocumentToFile(originalFile
0450: .getPath());
0451: parser.close();
0452: return;
0453: }
0454: }
0455: try {
0456: if (title.length() == 0) {
0457: try {
0458: title = new String(parsedData.elementAt(
0459: ParserDataConstants.TITLE_POSITION)
0460: .toString());
0461: } catch (Exception e) {
0462: title = "";
0463: }
0464: }
0465:
0466: String consPath = "";
0467:
0468: try {
0469: consPath = parsedData.elementAt(
0470: ParserDataConstants.FILE_PATH_POSITION)
0471: .toString();
0472: } catch (Exception e) {
0473: consPath = originalFile.getPath();
0474: }
0475:
0476: if (SnapperManager.getInstance()
0477: .getFileSeparatorConvention() != null
0478: && SnapperManager.getInstance()
0479: .getFileSeparatorConvention()
0480: .equalsIgnoreCase("unix")) {
0481: consPath = consPath.replaceAll("\\\\", "/");
0482: originalHost = originalHost.replaceAll("\\\\", "/");
0483: }
0484:
0485: if (SnapperManager.getInstance()
0486: .getRelativeIndexPaths() != null
0487: && SnapperManager.getInstance()
0488: .getRelativeIndexPaths()
0489: .equalsIgnoreCase("true")) {
0490:
0491: if (consPath.startsWith(originalHost)) {
0492: consPath = consPath.substring(originalHost
0493: .length());
0494: if (consPath.startsWith("/"))
0495: consPath = consPath.substring(1);
0496: }
0497: }
0498:
0499: String content = "";
0500: String properties = null;
0501: try {
0502: properties = (String) parsedData.elementAt(
0503: ParserDataConstants.PROPERTIES_POSITION)
0504: .toString();
0505: } catch (Exception e) {
0506: }
0507:
0508: int propertiesLength = SnapperAdmin
0509: .getMaxPropertiesLength();
0510:
0511: if (properties != null) {
0512: if (properties.length() > propertiesLength)
0513: properties = properties.substring(0,
0514: propertiesLength);
0515: } else {
0516: properties = "";
0517: }
0518:
0519: if (SnapperAdmin.getMountTitleInContent())
0520: content = title;
0521: if (SnapperAdmin.getMountFilePathInContent()) {
0522: String formated = consPath;
0523: if (formated.indexOf("|") != -1)
0524: formated = formated.substring(0, formated
0525: .indexOf("|"));
0526: content += " " + formated;
0527: }
0528: if (SnapperAdmin.getMountPropertiesInContent())
0529: content += " " + properties;
0530: if (SnapperAdmin.getMountMetaDataInContent())
0531: content += " " + metadata;
0532:
0533: try {
0534: content += " "
0535: + parsedData
0536: .elementAt(
0537: ParserDataConstants.CONTENT_POSITION)
0538: .toString();
0539: } catch (Exception e) {
0540: }
0541:
0542: long mailSentDate = -1;
0543: long mailReceivedDate = -1;
0544: try {
0545: mailSentDate = ((Date) parsedData
0546: .elementAt(ParserDataConstants.MAIL_MESSAGE_SENT_DATE))
0547: .getTime();
0548: } catch (Exception e) {
0549: mailSentDate = -1;
0550: }
0551:
0552: try {
0553: mailReceivedDate = ((Date) parsedData
0554: .elementAt(ParserDataConstants.MAIL_MESSAGE_RECEIVED_DATE))
0555: .getTime();
0556: } catch (Exception e) {
0557: mailReceivedDate = -1;
0558: }
0559:
0560: String subject = null;
0561: String from = null;
0562: String to = null;
0563: String cc = null;
0564: String bcc = null;
0565:
0566: try {
0567: subject = (String) parsedData
0568: .elementAt(ParserDataConstants.MAIL_MESSAGE_SUBJECT);
0569: } catch (Exception e) {
0570: }
0571:
0572: try {
0573: from = (String) parsedData
0574: .elementAt(ParserDataConstants.MAIL_MESSAGE_FROM);
0575: } catch (Exception e) {
0576: }
0577:
0578: try {
0579: to = (String) parsedData
0580: .elementAt(ParserDataConstants.MAIL_MESSAGE_TO);
0581: } catch (Exception e) {
0582: }
0583:
0584: try {
0585: cc = (String) parsedData
0586: .elementAt(ParserDataConstants.MAIL_MESSAGE_CC);
0587: } catch (Exception e) {
0588: }
0589:
0590: try {
0591: bcc = (String) parsedData
0592: .elementAt(ParserDataConstants.MAIL_MESSAGE_BCC);
0593: } catch (Exception e) {
0594: }
0595: String author = null;
0596: String lastSaved = null;
0597:
0598: try {
0599: author = (String) parsedData
0600: .elementAt(ParserDataConstants.AUTHOR);
0601: } catch (Exception e) {
0602: }
0603:
0604: try {
0605: lastSaved = (String) parsedData
0606: .elementAt(ParserDataConstants.LAST_SAVED_BY);
0607: } catch (Exception e) {
0608: }
0609:
0610: String fileType = "File";
0611:
0612: try {
0613: fileType = parsedData.elementAt(
0614: ParserDataConstants.FILE_TYPE_POSITION)
0615: .toString();
0616: } catch (Exception e) {
0617: }
0618:
0619: indexer
0620: .indexDoc(
0621: ((Long) parsedData
0622: .elementAt(ParserDataConstants.LAST_MODIFIED_DATE_POSITION))
0623: .longValue(), creationDataLong
0624: .longValue(), accessDataLong
0625: .longValue(), consPath,
0626: content, fileType, title, properties,
0627: metadata, originalFile.getName(),
0628: mailSentDate, mailReceivedDate,
0629: subject, from, to, cc, bcc,
0630: originalFile.length(), owner, author,
0631: lastSaved);
0632: currentFile++;
0633: try {
0634: if (fileType.equals("xls")
0635: && SnapperAdmin.toSaveConvertedExcel()
0636: && !(SnapperAdmin.toSaveConvertedFile())) {
0637: String convertedData = parsedData
0638: .elementAt(
0639: ParserDataConstants.CONVERTED_CONTENT_POSITION)
0640: .toString();
0641: String convertedExtension = parsedData
0642: .elementAt(
0643: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
0644: .toString();
0645:
0646: if (convertedExtension.equals("html")) {
0647: String orig = originalFile
0648: .getAbsolutePath();
0649: orig = orig.replaceAll(":", "_");
0650: orig = orig + ".html";
0651:
0652: String root = SnapperAdmin
0653: .pathOfConvertedFiles();
0654:
0655: if (!root.endsWith("/")
0656: && !root.endsWith("\\"))
0657: root = root + File.separator;
0658:
0659: File convertedFile = new File(root + orig);
0660:
0661: FileOutputStream fi = null;
0662:
0663: try {
0664: File destDir = convertedFile
0665: .getParentFile();
0666: if (!destDir.exists()
0667: && !destDir.mkdirs()) {
0668: SnapperAdmin
0669: .logWarrning("Could not save converted file, could not create path "
0670: + destDir
0671: .getAbsolutePath());
0672: } else {
0673: convertedFile.createNewFile();
0674: fi = new FileOutputStream(
0675: convertedFile);
0676: fi.write(convertedData.getBytes());
0677: fi.flush();
0678: }
0679: } catch (Exception e) {
0680: SnapperAdmin
0681: .logWarrning("Could not save converted file : "
0682: + originalFile
0683: .getAbsolutePath()
0684: + " , message : "
0685: + e.getMessage());
0686: if (printStackTrace) {
0687: e.printStackTrace();
0688: }
0689:
0690: } finally {
0691: if (fi != null) {
0692: try {
0693: fi.close();
0694: } catch (Exception e1) {
0695: }
0696: fi = null;
0697: }
0698: }
0699: }
0700: }
0701:
0702: if (fileType.equals("doc")
0703: && SnapperAdmin.toSaveConvertedWord()
0704: && !(SnapperAdmin.toSaveConvertedFile())) {
0705: String convertedData = parsedData
0706: .elementAt(
0707: ParserDataConstants.CONVERTED_CONTENT_POSITION)
0708: .toString();
0709: String convertedExtension = parsedData
0710: .elementAt(
0711: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
0712: .toString();
0713:
0714: if (convertedExtension.equals("html")) {
0715: String orig = originalFile
0716: .getAbsolutePath();
0717: orig = orig.replaceAll(":", "_");
0718: orig = orig + ".html";
0719:
0720: String root = SnapperAdmin
0721: .pathOfConvertedFiles();
0722:
0723: if (!root.endsWith("/")
0724: && !root.endsWith("\\"))
0725: root = root + File.separator;
0726:
0727: File convertedFile = new File(root + orig);
0728:
0729: FileOutputStream fi = null;
0730:
0731: try {
0732: File destDir = convertedFile
0733: .getParentFile();
0734: if (!destDir.exists()
0735: && !destDir.mkdirs()) {
0736: SnapperAdmin
0737: .logWarrning("Could not save converted file, could not create path "
0738: + destDir
0739: .getAbsolutePath());
0740: } else {
0741: convertedFile.createNewFile();
0742: fi = new FileOutputStream(
0743: convertedFile);
0744: fi.write(convertedData.getBytes());
0745: fi.flush();
0746: }
0747: } catch (Exception e) {
0748: SnapperAdmin
0749: .logWarrning("Could not save converted file : "
0750: + originalFile
0751: .getAbsolutePath()
0752: + " , message : "
0753: + e.getMessage());
0754: if (printStackTrace) {
0755: e.printStackTrace();
0756: }
0757:
0758: } finally {
0759: if (fi != null) {
0760: try {
0761: fi.close();
0762: } catch (Exception e1) {
0763: }
0764: fi = null;
0765: }
0766: }
0767: }
0768: }
0769:
0770: if (fileType.equals("ppt")
0771: && SnapperAdmin.toSaveConvertedPowerPoint()
0772: && !(SnapperAdmin.toSaveConvertedFile())) {
0773: String convertedData = parsedData
0774: .elementAt(
0775: ParserDataConstants.CONVERTED_CONTENT_POSITION)
0776: .toString();
0777: String convertedExtension = parsedData
0778: .elementAt(
0779: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
0780: .toString();
0781:
0782: if (convertedExtension.equals("html")) {
0783: String orig = originalFile
0784: .getAbsolutePath();
0785: orig = orig.replaceAll(":", "_");
0786: orig = orig + ".html";
0787:
0788: String root = SnapperAdmin
0789: .pathOfConvertedFiles();
0790:
0791: if (!root.endsWith("/")
0792: && !root.endsWith("\\"))
0793: root = root + File.separator;
0794:
0795: File convertedFile = new File(root + orig);
0796:
0797: FileOutputStream fi = null;
0798:
0799: try {
0800: File destDir = convertedFile
0801: .getParentFile();
0802: if (!destDir.exists()
0803: && !destDir.mkdirs()) {
0804: SnapperAdmin
0805: .logWarrning("Could not save converted file, could not create path "
0806: + destDir
0807: .getAbsolutePath());
0808: } else {
0809: convertedFile.createNewFile();
0810: fi = new FileOutputStream(
0811: convertedFile);
0812: fi.write(convertedData.getBytes());
0813: fi.flush();
0814: }
0815: } catch (Exception e) {
0816: SnapperAdmin
0817: .logWarrning("Could not save converted file : "
0818: + originalFile
0819: .getAbsolutePath()
0820: + " , message : "
0821: + e.getMessage());
0822: if (printStackTrace) {
0823: e.printStackTrace();
0824: }
0825:
0826: } finally {
0827: if (fi != null) {
0828: try {
0829: fi.close();
0830: } catch (Exception e1) {
0831: }
0832: fi = null;
0833: }
0834: }
0835: }
0836: }
0837:
0838: if (fileType.equals("docx")
0839: && SnapperAdmin.toSaveConvertedWord2007()
0840: && !(SnapperAdmin.toSaveConvertedFile())) {
0841: String convertedData = parsedData
0842: .elementAt(
0843: ParserDataConstants.CONVERTED_CONTENT_POSITION)
0844: .toString();
0845: String convertedExtension = parsedData
0846: .elementAt(
0847: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
0848: .toString();
0849:
0850: if (convertedExtension.equals("html")) {
0851: String orig = originalFile
0852: .getAbsolutePath();
0853: orig = orig.replaceAll(":", "_");
0854: orig = orig + ".html";
0855:
0856: String root = SnapperAdmin
0857: .pathOfConvertedFiles();
0858:
0859: if (!root.endsWith("/")
0860: && !root.endsWith("\\"))
0861: root = root + File.separator;
0862:
0863: File convertedFile = new File(root + orig);
0864:
0865: FileOutputStream fi = null;
0866:
0867: try {
0868: File destDir = convertedFile
0869: .getParentFile();
0870: if (!destDir.exists()
0871: && !destDir.mkdirs()) {
0872: SnapperAdmin
0873: .logWarrning("Could not save converted file, could not create path "
0874: + destDir
0875: .getAbsolutePath());
0876: } else {
0877: convertedFile.createNewFile();
0878: fi = new FileOutputStream(
0879: convertedFile);
0880: fi.write(convertedData.getBytes());
0881: fi.flush();
0882: }
0883: } catch (Exception e) {
0884: SnapperAdmin
0885: .logWarrning("Could not save converted file : "
0886: + originalFile
0887: .getAbsolutePath()
0888: + " , message : "
0889: + e.getMessage());
0890: if (printStackTrace) {
0891: e.printStackTrace();
0892: }
0893:
0894: } finally {
0895: if (fi != null) {
0896: try {
0897: fi.close();
0898: } catch (Exception e1) {
0899: }
0900: fi = null;
0901: }
0902: }
0903: }
0904: }
0905:
0906: if (fileType.equals("xlsx")
0907: && SnapperAdmin.toSaveConvertedExcel2007()
0908: && !(SnapperAdmin.toSaveConvertedFile())) {
0909: String convertedData = parsedData
0910: .elementAt(
0911: ParserDataConstants.CONVERTED_CONTENT_POSITION)
0912: .toString();
0913: String convertedExtension = parsedData
0914: .elementAt(
0915: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
0916: .toString();
0917:
0918: if (convertedExtension.equals("html")) {
0919: String orig = originalFile
0920: .getAbsolutePath();
0921: orig = orig.replaceAll(":", "_");
0922: orig = orig + ".html";
0923:
0924: String root = SnapperAdmin
0925: .pathOfConvertedFiles();
0926:
0927: if (!root.endsWith("/")
0928: && !root.endsWith("\\"))
0929: root = root + File.separator;
0930:
0931: File convertedFile = new File(root + orig);
0932:
0933: FileOutputStream fi = null;
0934:
0935: try {
0936: File destDir = convertedFile
0937: .getParentFile();
0938: if (!destDir.exists()
0939: && !destDir.mkdirs()) {
0940: SnapperAdmin
0941: .logWarrning("Could not save converted file, could not create path "
0942: + destDir
0943: .getAbsolutePath());
0944: } else {
0945: convertedFile.createNewFile();
0946: fi = new FileOutputStream(
0947: convertedFile);
0948: fi.write(convertedData.getBytes());
0949: fi.flush();
0950: }
0951: } catch (Exception e) {
0952: SnapperAdmin
0953: .logWarrning("Could not save converted file : "
0954: + originalFile
0955: .getAbsolutePath()
0956: + " , message : "
0957: + e.getMessage());
0958: if (printStackTrace) {
0959: e.printStackTrace();
0960: }
0961:
0962: } finally {
0963: if (fi != null) {
0964: try {
0965: fi.close();
0966: } catch (Exception e1) {
0967: }
0968: fi = null;
0969: }
0970: }
0971: }
0972: }
0973:
0974: if (fileType.equals("pptx")
0975: && SnapperAdmin
0976: .toSaveConvertedPowerPoint2007()
0977: && !(SnapperAdmin.toSaveConvertedFile())) {
0978: String convertedData = parsedData
0979: .elementAt(
0980: ParserDataConstants.CONVERTED_CONTENT_POSITION)
0981: .toString();
0982: String convertedExtension = parsedData
0983: .elementAt(
0984: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
0985: .toString();
0986:
0987: if (convertedExtension.equals("html")) {
0988: String orig = originalFile
0989: .getAbsolutePath();
0990: orig = orig.replaceAll(":", "_");
0991: orig = orig + ".html";
0992:
0993: String root = SnapperAdmin
0994: .pathOfConvertedFiles();
0995:
0996: if (!root.endsWith("/")
0997: && !root.endsWith("\\"))
0998: root = root + File.separator;
0999:
1000: File convertedFile = new File(root + orig);
1001:
1002: FileOutputStream fi = null;
1003:
1004: try {
1005: File destDir = convertedFile
1006: .getParentFile();
1007: if (!destDir.exists()
1008: && !destDir.mkdirs()) {
1009: SnapperAdmin
1010: .logWarrning("Could not save converted file, could not create path "
1011: + destDir
1012: .getAbsolutePath());
1013: } else {
1014: convertedFile.createNewFile();
1015: fi = new FileOutputStream(
1016: convertedFile);
1017: fi.write(convertedData.getBytes());
1018: fi.flush();
1019: }
1020: } catch (Exception e) {
1021: SnapperAdmin
1022: .logWarrning("Could not save converted file : "
1023: + originalFile
1024: .getAbsolutePath()
1025: + " , message : "
1026: + e.getMessage());
1027: if (printStackTrace) {
1028: e.printStackTrace();
1029: }
1030:
1031: } finally {
1032: if (fi != null) {
1033: try {
1034: fi.close();
1035: } catch (Exception e1) {
1036: }
1037: fi = null;
1038: }
1039: }
1040: }
1041: }
1042:
1043: if (SnapperAdmin.toSaveConvertedFile()
1044: && parsedData
1045: .elementAt(ParserDataConstants.CONVERTED_CONTENT_POSITION) != null
1046: && parsedData
1047: .elementAt(ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION) != null) {
1048: String convertedData = parsedData
1049: .elementAt(
1050: ParserDataConstants.CONVERTED_CONTENT_POSITION)
1051: .toString();
1052: String convertedExtension = parsedData
1053: .elementAt(
1054: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
1055: .toString();
1056:
1057: if (convertedExtension.equals("html")) {
1058: String orig = originalFile
1059: .getAbsolutePath();
1060: orig = orig.replaceAll(":", "_");
1061: orig = orig + ".html";
1062:
1063: String root = SnapperAdmin
1064: .pathOfConvertedFiles();
1065:
1066: if (!root.endsWith("/")
1067: && !root.endsWith("\\"))
1068: root = root + File.separator;
1069:
1070: File convertedFile = new File(root + orig);
1071:
1072: FileOutputStream fi = null;
1073:
1074: try {
1075: File destDir = convertedFile
1076: .getParentFile();
1077: if (!destDir.exists()
1078: && !destDir.mkdirs()) {
1079: SnapperAdmin
1080: .logWarrning("Could not save converted file, could not create path "
1081: + destDir
1082: .getAbsolutePath());
1083: } else {
1084: convertedFile.createNewFile();
1085: fi = new FileOutputStream(
1086: convertedFile);
1087: fi.write(convertedData.getBytes());
1088: fi.flush();
1089: }
1090: } catch (Exception e) {
1091: SnapperAdmin
1092: .logWarrning("Could not save converted file : "
1093: + originalFile
1094: .getAbsolutePath()
1095: + " , message : "
1096: + e.getMessage());
1097: if (printStackTrace) {
1098: e.printStackTrace();
1099: }
1100:
1101: } finally {
1102: if (fi != null) {
1103: try {
1104: fi.close();
1105: } catch (Exception e1) {
1106: }
1107: fi = null;
1108: }
1109: }
1110: }
1111: }
1112: } catch (Exception ex) {
1113: SnapperAdmin
1114: .logWarrning("Could not save converted file : "
1115: + originalFile.getAbsolutePath()
1116: + " , message : " + ex.getMessage());
1117: if (printStackTrace) {
1118: ex.printStackTrace();
1119: }
1120: }
1121:
1122: } catch (Exception ex) {
1123: SnapperAdmin.logError("Could not index document : "
1124: + originalFile.getName() + " message : "
1125: + ex.getMessage());
1126: if (printStackTrace) {
1127: ex.printStackTrace();
1128: }
1129:
1130: } finally {
1131: if (parsedData != null)
1132: parsedData.removeAllElements();
1133: parsedData = null;
1134: }
1135: while (parser.hasNext()) {
1136: SnapperAdmin
1137: .logDebug("Parse container file curent time : "
1138: + (new SimpleDateFormat("HH:mm:ss "))
1139: .format(new Date(System
1140: .currentTimeMillis())));
1141:
1142: try {
1143: parsedData = parser.getNext();
1144: } catch (FileParserException e) {
1145: //SnapperAdmin.log.write(Logger.INFO, "File inside container : " + originalFile.getName()
1146: // + " could not be parsed , message : " + e.getMessage());
1147: SnapperAdmin.logDebug("File : " + e.getFileName()
1148: + " could not be parsed , message : "
1149: + e.getMessage());
1150: if (printStackTrace) {
1151: e.printStackTrace();
1152: }
1153:
1154: String fn = e.getFileName();
1155: if (fn != null)
1156: writeNotIndexedDocumentToFile(fn);
1157: continue;
1158: } catch (Throwable e) {
1159: SnapperAdmin.logDebug("File inside container : "
1160: + originalFile.getName()
1161: + " could not be parsed , message : "
1162: + e.getMessage());
1163: if (printStackTrace) {
1164: e.printStackTrace();
1165: }
1166: parser.close();
1167: return;
1168: }
1169:
1170: try {
1171:
1172: try {
1173: title = new String(parsedData.elementAt(
1174: ParserDataConstants.TITLE_POSITION)
1175: .toString());
1176: } catch (Exception e) {
1177: title = "";
1178: }
1179:
1180: String consPath = "";
1181:
1182: try {
1183: consPath = parsedData.elementAt(
1184: ParserDataConstants.FILE_PATH_POSITION)
1185: .toString();
1186: } catch (Exception e) {
1187: }
1188:
1189: if (SnapperManager.getInstance()
1190: .getFileSeparatorConvention() != null
1191: && SnapperManager.getInstance()
1192: .getFileSeparatorConvention()
1193: .equalsIgnoreCase("unix")) {
1194: consPath = consPath.replaceAll("\\\\", "/");
1195: originalHost = originalHost.replaceAll("\\\\",
1196: "/");
1197: }
1198:
1199: if (SnapperManager.getInstance()
1200: .getRelativeIndexPaths() != null
1201: && SnapperManager.getInstance()
1202: .getRelativeIndexPaths()
1203: .equalsIgnoreCase("true")) {
1204:
1205: if (consPath.startsWith(originalHost)) {
1206: consPath = consPath.substring(originalHost
1207: .length());
1208: if (consPath.startsWith("/"))
1209: consPath = consPath.substring(1);
1210: }
1211: }
1212:
1213: String content = "";
1214: String properties = "";
1215:
1216: try {
1217: properties = (String) parsedData
1218: .elementAt(
1219: ParserDataConstants.PROPERTIES_POSITION)
1220: .toString();
1221: } catch (Exception e) {
1222: }
1223:
1224: int propertiesLength = SnapperAdmin
1225: .getMaxPropertiesLength();
1226:
1227: if (properties != null) {
1228: if (properties.length() > propertiesLength)
1229: properties = properties.substring(0,
1230: propertiesLength);
1231: } else {
1232: properties = "";
1233: }
1234:
1235: if (SnapperAdmin.getMountTitleInContent())
1236: content = title;
1237: if (SnapperAdmin.getMountFilePathInContent()) {
1238: String formated = consPath;
1239: if (formated.indexOf("|") != -1)
1240: formated = formated.substring(0, formated
1241: .indexOf("|"));
1242: content += " " + formated;
1243: }
1244: if (SnapperAdmin.getMountPropertiesInContent())
1245: content += " " + properties;
1246:
1247: try {
1248: content += " "
1249: + parsedData
1250: .elementAt(
1251: ParserDataConstants.CONTENT_POSITION)
1252: .toString();
1253: } catch (Exception e) {
1254: }
1255:
1256: SnapperAdmin.logDebug("adding container file : "
1257: + consPath
1258: + " into index , curent time : "
1259: + (new SimpleDateFormat("HH:mm:ss "))
1260: .format(new Date(System
1261: .currentTimeMillis())));
1262:
1263: long mailSentDate = -1;
1264: long mailReceivedDate = -1;
1265:
1266: try {
1267: mailSentDate = ((Date) parsedData
1268: .elementAt(ParserDataConstants.MAIL_MESSAGE_SENT_DATE))
1269: .getTime();
1270: } catch (Exception e) {
1271: mailSentDate = -1;
1272: }
1273:
1274: try {
1275: mailReceivedDate = ((Date) parsedData
1276: .elementAt(ParserDataConstants.MAIL_MESSAGE_RECEIVED_DATE))
1277: .getTime();
1278: } catch (Exception e) {
1279: mailReceivedDate = -1;
1280: }
1281:
1282: String subject = null;
1283: String from = null;
1284: String to = null;
1285: String cc = null;
1286: String bcc = null;
1287:
1288: try {
1289: subject = (String) parsedData
1290: .elementAt(ParserDataConstants.MAIL_MESSAGE_SUBJECT);
1291: } catch (Exception e) {
1292: }
1293:
1294: try {
1295: from = (String) parsedData
1296: .elementAt(ParserDataConstants.MAIL_MESSAGE_FROM);
1297: } catch (Exception e) {
1298: }
1299:
1300: try {
1301: to = (String) parsedData
1302: .elementAt(ParserDataConstants.MAIL_MESSAGE_TO);
1303: } catch (Exception e) {
1304: }
1305: try {
1306: cc = (String) parsedData
1307: .elementAt(ParserDataConstants.MAIL_MESSAGE_CC);
1308: } catch (Exception e) {
1309: }
1310:
1311: try {
1312: bcc = (String) parsedData
1313: .elementAt(ParserDataConstants.MAIL_MESSAGE_BCC);
1314: } catch (Exception e) {
1315: }
1316:
1317: String author = null;
1318: String lastSaved = null;
1319:
1320: try {
1321: author = (String) parsedData
1322: .elementAt(ParserDataConstants.AUTHOR);
1323: } catch (Exception e) {
1324: }
1325:
1326: try {
1327: lastSaved = (String) parsedData
1328: .elementAt(ParserDataConstants.LAST_SAVED_BY);
1329: } catch (Exception e) {
1330: }
1331:
1332: String fileType = "File";
1333:
1334: try {
1335: fileType = parsedData.elementAt(
1336: ParserDataConstants.FILE_TYPE_POSITION)
1337: .toString();
1338: } catch (Exception e) {
1339: }
1340:
1341: indexer
1342: .indexDoc(
1343: ((Long) parsedData
1344: .elementAt(ParserDataConstants.LAST_MODIFIED_DATE_POSITION))
1345: .longValue(),
1346: creationDataLong.longValue(),
1347: accessDataLong.longValue(),
1348: consPath,
1349: content,
1350: fileType,
1351: title,
1352: properties,
1353: "",
1354: originalFile.getName(),
1355: mailSentDate,
1356: mailReceivedDate,
1357: subject,
1358: from,
1359: to,
1360: cc,
1361: bcc,
1362: ((Long) parsedData
1363: .elementAt(ParserDataConstants.SIZE_POSITION))
1364: .longValue(), owner,
1365: author, lastSaved);
1366:
1367: SnapperAdmin
1368: .logDebug("done adding container file : "
1369: + consPath
1370: + " into index, free memory = "
1371: + (Runtime.getRuntime()
1372: .freeMemory() / 1024)
1373: / 1024
1374: + " MB"
1375: + " total memory = "
1376: + (Runtime.getRuntime()
1377: .totalMemory() / 1024)
1378: / 1024 + " MB");
1379:
1380: try {
1381: if (fileType.equals("xls")
1382: && SnapperAdmin.toSaveConvertedExcel()
1383: && !(SnapperAdmin.toSaveConvertedFile())) {
1384: String convertedData = parsedData
1385: .elementAt(
1386: ParserDataConstants.CONVERTED_CONTENT_POSITION)
1387: .toString();
1388: String convertedExtension = parsedData
1389: .elementAt(
1390: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
1391: .toString();
1392:
1393: if (convertedExtension.equals("html")) {
1394: consPath = consPath
1395: .replaceAll(":", "_");
1396:
1397: char first = 0x7c;
1398: char second = 0x28;
1399: consPath = consPath.replace(first,
1400: second);
1401: consPath = consPath + ".html";
1402:
1403: String root = SnapperAdmin
1404: .pathOfConvertedFiles();
1405:
1406: if (!root.endsWith("/")
1407: && !root.endsWith("\\"))
1408: root = root + File.separator;
1409:
1410: File convertedFile = new File(root
1411: + consPath);
1412:
1413: FileOutputStream fi = null;
1414: try {
1415: File destDir = convertedFile
1416: .getParentFile();
1417: if (!destDir.exists()
1418: && !destDir.mkdirs()) {
1419: SnapperAdmin
1420: .logWarrning("Could not save converted file, could not create path "
1421: + destDir
1422: .getAbsolutePath());
1423: } else {
1424: convertedFile.createNewFile();
1425: fi = new FileOutputStream(
1426: convertedFile);
1427: fi.write(convertedData
1428: .getBytes());
1429: fi.flush();
1430: }
1431: } catch (Exception e) {
1432: SnapperAdmin
1433: .logWarrning("Could not save converted file : "
1434: + consPath
1435: + " , message : "
1436: + e.getMessage());
1437: } finally {
1438:
1439: if (fi != null) {
1440: try {
1441: fi.close();
1442: } catch (Exception e) {
1443: }
1444:
1445: fi = null;
1446: }
1447: }
1448: }
1449: }
1450:
1451: if (fileType.equals("doc")
1452: && SnapperAdmin.toSaveConvertedWord()
1453: && !(SnapperAdmin.toSaveConvertedFile())) {
1454: String convertedData = parsedData
1455: .elementAt(
1456: ParserDataConstants.CONVERTED_CONTENT_POSITION)
1457: .toString();
1458: String convertedExtension = parsedData
1459: .elementAt(
1460: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
1461: .toString();
1462:
1463: if (convertedExtension.equals("html")) {
1464: consPath = consPath
1465: .replaceAll(":", "_");
1466:
1467: char first = 0x7c;
1468: char second = 0x28;
1469: consPath = consPath.replace(first,
1470: second);
1471: consPath = consPath + ".html";
1472:
1473: String root = SnapperAdmin
1474: .pathOfConvertedFiles();
1475:
1476: if (!root.endsWith("/")
1477: && !root.endsWith("\\"))
1478: root = root + File.separator;
1479:
1480: File convertedFile = new File(root
1481: + consPath);
1482:
1483: FileOutputStream fi = null;
1484: try {
1485: File destDir = convertedFile
1486: .getParentFile();
1487: if (!destDir.exists()
1488: && !destDir.mkdirs()) {
1489: SnapperAdmin
1490: .logWarrning("Could not save converted file, could not create path "
1491: + destDir
1492: .getAbsolutePath());
1493: } else {
1494: convertedFile.createNewFile();
1495: fi = new FileOutputStream(
1496: convertedFile);
1497: fi.write(convertedData
1498: .getBytes());
1499: fi.flush();
1500: }
1501: } catch (Exception e) {
1502: SnapperAdmin
1503: .logWarrning("Could not save converted file : "
1504: + consPath
1505: + " , message : "
1506: + e.getMessage());
1507: } finally {
1508:
1509: if (fi != null) {
1510: try {
1511: fi.close();
1512: } catch (Exception e) {
1513: }
1514:
1515: fi = null;
1516: }
1517: }
1518: }
1519: }
1520:
1521: if (fileType.equals("ppt")
1522: && SnapperAdmin
1523: .toSaveConvertedPowerPoint()
1524: && !(SnapperAdmin.toSaveConvertedFile())) {
1525: String convertedData = parsedData
1526: .elementAt(
1527: ParserDataConstants.CONVERTED_CONTENT_POSITION)
1528: .toString();
1529: String convertedExtension = parsedData
1530: .elementAt(
1531: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
1532: .toString();
1533:
1534: if (convertedExtension.equals("html")) {
1535: consPath = consPath
1536: .replaceAll(":", "_");
1537:
1538: char first = 0x7c;
1539: char second = 0x28;
1540: consPath = consPath.replace(first,
1541: second);
1542: consPath = consPath + ".html";
1543:
1544: String root = SnapperAdmin
1545: .pathOfConvertedFiles();
1546:
1547: if (!root.endsWith("/")
1548: && !root.endsWith("\\"))
1549: root = root + File.separator;
1550:
1551: File convertedFile = new File(root
1552: + consPath);
1553:
1554: FileOutputStream fi = null;
1555: try {
1556: File destDir = convertedFile
1557: .getParentFile();
1558: if (!destDir.exists()
1559: && !destDir.mkdirs()) {
1560: SnapperAdmin
1561: .logWarrning("Could not save converted file, could not create path "
1562: + destDir
1563: .getAbsolutePath());
1564: } else {
1565: convertedFile.createNewFile();
1566: fi = new FileOutputStream(
1567: convertedFile);
1568: fi.write(convertedData
1569: .getBytes());
1570: fi.flush();
1571: }
1572: } catch (Exception e) {
1573: SnapperAdmin
1574: .logWarrning("Could not save converted file : "
1575: + consPath
1576: + " , message : "
1577: + e.getMessage());
1578: } finally {
1579:
1580: if (fi != null) {
1581: try {
1582: fi.close();
1583: } catch (Exception e) {
1584: }
1585:
1586: fi = null;
1587: }
1588: }
1589: }
1590: }
1591: if (SnapperAdmin.toSaveConvertedFile()
1592: && parsedData
1593: .elementAt(ParserDataConstants.CONVERTED_CONTENT_POSITION) != null
1594: && parsedData
1595: .elementAt(ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION) != null) {
1596: String convertedData = parsedData
1597: .elementAt(
1598: ParserDataConstants.CONVERTED_CONTENT_POSITION)
1599: .toString();
1600: String convertedExtension = parsedData
1601: .elementAt(
1602: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
1603: .toString();
1604:
1605: if (convertedExtension.equals("html")) {
1606: consPath = consPath
1607: .replaceAll(":", "_");
1608: char first = 0x7c;
1609: char second = 0x28;
1610: consPath = consPath.replace(first,
1611: second);
1612: consPath = consPath + ".html";
1613:
1614: String root = SnapperAdmin
1615: .pathOfConvertedFiles();
1616:
1617: if (!root.endsWith("/")
1618: && !root.endsWith("\\"))
1619: root = root + File.separator;
1620:
1621: File convertedFile = new File(root
1622: + consPath);
1623: FileOutputStream fi = null;
1624: try {
1625: File destDir = convertedFile
1626: .getParentFile();
1627: if (!destDir.exists()
1628: && !destDir.mkdirs()) {
1629: SnapperAdmin
1630: .logWarrning("Could not save converted file, could not create path "
1631: + destDir
1632: .getAbsolutePath());
1633: } else {
1634: convertedFile.createNewFile();
1635: fi = new FileOutputStream(
1636: convertedFile);
1637: fi.write(convertedData
1638: .getBytes());
1639: fi.flush();
1640:
1641: }
1642: } catch (Exception e) {
1643: SnapperAdmin
1644: .logWarrning("Could not save converted file : "
1645: + consPath
1646: + " , message : "
1647: + e.getMessage());
1648:
1649: } finally {
1650:
1651: if (fi != null) {
1652: try {
1653: fi.close();
1654: } catch (Exception e) {
1655: }
1656:
1657: fi = null;
1658: }
1659: }
1660: }
1661: }
1662: } catch (Exception ex) {
1663: SnapperAdmin
1664: .logWarrning("Could not save converted file : "
1665: + originalFile
1666: .getAbsolutePath()
1667: + " , message : "
1668: + ex.getMessage());
1669: if (printStackTrace) {
1670: ex.printStackTrace();
1671: }
1672:
1673: }
1674:
1675: } catch (Exception ex) {
1676: SnapperAdmin
1677: .logError("Could not index document : "
1678: + parsedData
1679: .elementAt(
1680: ParserDataConstants.FILE_PATH_POSITION)
1681: .toString() + " message : "
1682: + ex.getMessage());
1683: if (printStackTrace) {
1684: ex.printStackTrace();
1685: }
1686:
1687: } finally {
1688: if (parsedData != null)
1689: parsedData.removeAllElements();
1690: parsedData = null;
1691: }
1692: }
1693: SnapperAdmin.log.write(Logger.INFO, "Done indexing file");
1694: SnapperAdmin
1695: .logDebug("done adding : "
1696: + originalFile.getName()
1697: + " curent time : "
1698: + new SimpleDateFormat("HH:mm:ss ")
1699: .format(new Date(System
1700: .currentTimeMillis())));
1701:
1702: originalFile = null;
1703: if (parser != null)
1704: parser.close();
1705: parser = null;
1706: metadata = null;
1707: }
1708: }
1709:
1710: public void indexFTPDocs(File tempFile, String originalFile,
1711: long timestamp, String owner, long createdDate) {
1712:
1713: currentFile++;
1714: if (!indexSizeReached) {
1715: String metadata = "";
1716: title = "";
1717:
1718: if (isFiltered(new String(originalFile)))
1719: return;
1720:
1721: if (this .meta) {
1722: try {
1723: BusinessUtil bu = new BusinessUtil();
1724: metadata = new String(bu.getMetadata(originalFile,
1725: this .metaDB, this .metaTable,
1726: this .metaFileColumn, this .metaKeyColumn,
1727: this .metaValueColumn));
1728: String ln = SnapperManager.getInstance()
1729: .getLogicalNameFromDatabase();
1730: if (ln.equals("1"))
1731: this .title = new String(bu.title);
1732: bu.gc();
1733: bu = null;
1734: } catch (Exception ex) {
1735: //SnapperAdmin.log.write(Logger.INFO, "Not metadata for file: " + originalFile);
1736: SnapperAdmin.logDebug("Not metadata for file: "
1737: + originalFile);
1738: if (printStackTrace) {
1739: ex.printStackTrace();
1740: }
1741:
1742: }
1743: }
1744:
1745: if (currentFile > SnapperAdmin.getmaxIndexLength()) {
1746: doStop = true;
1747: indexSizeReached = true;
1748: SnapperAdmin
1749: .logInfo("Document number limit reached!!!Terminating index process!!!");
1750: return;
1751: }
1752: parser = new FileParserMaster();
1753: parser.setIndexDirectory(indexDirectory);
1754: parser.setIndexUnknownFileTypes(indexUnknownFileTypes);
1755: parser.setLimit(SnapperManager.getInstance()
1756: .getParserLimitProperties());
1757: parser.setProp(prop);
1758: parser.setConversionTypes(SnapperAdmin
1759: .getConverterClassNames());
1760: parser.setIndexContent(indexContent);
1761: if (SnapperAdmin.toSaveConvertedFile()) {
1762: parser.setConversionPath(SnapperAdmin
1763: .pathOfConvertedFiles());
1764: }
1765:
1766: Vector parsedData;
1767:
1768: if (!indexContent) {
1769: parsedData = new Vector();
1770: parsedData.add(ParserDataConstants.FILE_PATH_POSITION,
1771: originalFile);
1772: parsedData.add(ParserDataConstants.FILE_TYPE_POSITION,
1773: "");
1774: parsedData.add(ParserDataConstants.TITLE_POSITION,
1775: originalFile);
1776: parsedData
1777: .add(
1778: ParserDataConstants.LAST_MODIFIED_DATE_POSITION,
1779: new Long(timestamp));
1780: parsedData.add(ParserDataConstants.PROPERTIES_POSITION,
1781: "");
1782: parsedData
1783: .add(ParserDataConstants.CONTENT_POSITION, "");
1784: parsedData.add(ParserDataConstants.SIZE_POSITION,
1785: new Long(originalFile.length()));
1786:
1787: parsedData.add(
1788: ParserDataConstants.CONVERTED_CONTENT_POSITION,
1789: null);
1790: parsedData
1791: .add(
1792: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION,
1793: null);
1794:
1795: parsedData.add(
1796: ParserDataConstants.MAIL_MESSAGE_SENT_DATE,
1797: null);
1798: parsedData.add(
1799: ParserDataConstants.MAIL_MESSAGE_RECEIVED_DATE,
1800: null);
1801: parsedData.add(
1802: ParserDataConstants.MAIL_MESSAGE_SUBJECT, null);
1803: parsedData.add(ParserDataConstants.MAIL_MESSAGE_FROM,
1804: null);
1805: parsedData.add(ParserDataConstants.MAIL_MESSAGE_TO,
1806: null);
1807: parsedData.add(ParserDataConstants.MAIL_MESSAGE_CC,
1808: null);
1809: parsedData.add(ParserDataConstants.MAIL_MESSAGE_BCC,
1810: null);
1811:
1812: parsedData.add(ParserDataConstants.AUTHOR, null);
1813: parsedData.add(ParserDataConstants.LAST_SAVED_BY, null);
1814:
1815: } else {
1816:
1817: try {
1818: parser.setSource(0);
1819: if (SnapperAdmin.toSaveConvertedFile())
1820: parser.setToSaveConvertedFile(true);
1821: if (SnapperAdmin.toSaveConvertedExcel())
1822: parser.setToSaveConvertedExcel(true);
1823: if (SnapperAdmin.toSaveConvertedWord())
1824: parser.setToSaveConvertedWord(true);
1825: if (SnapperAdmin.toSaveConvertedPowerPoint())
1826: parser.setToSaveConvertedPowerPoint(true);
1827: if (SnapperAdmin.toSaveConvertedExcel2007())
1828: parser.setToSaveConvertedExcel2007(true);
1829: if (SnapperAdmin.toSaveConvertedWord2007())
1830: parser.setToSaveConvertedWord2007(true);
1831: if (SnapperAdmin.toSaveConvertedPowerPoint2007())
1832: parser.setToSaveConvertedPowerPoint2007(true);
1833: parsedData = parser.parse(tempFile, SnapperManager
1834: .getInstance().getTempDir());
1835: } catch (FileParserException e) {
1836: SnapperAdmin.logDebug("File : " + e.getFileName()
1837: + " could not be parsed , message : "
1838: + e.getMessage());
1839: if (printStackTrace) {
1840: e.printStackTrace();
1841: }
1842: writeNotIndexedDocumentToFile(new String(
1843: originalFile));
1844: parser.close();
1845: return;
1846: } catch (Throwable e) {
1847: SnapperAdmin.logDebug("File : " + originalFile
1848: + " could not be parsed , message "
1849: + e.getMessage());
1850: if (printStackTrace) {
1851: e.printStackTrace();
1852: }
1853: writeNotIndexedDocumentToFile(originalFile);
1854: parser.close();
1855: return;
1856: }
1857: }
1858:
1859: SnapperAdmin.logDebug("adding : " + originalFile);
1860:
1861: try {
1862: if (title.length() == 0) {
1863: try {
1864: title = parsedData.elementAt(
1865: ParserDataConstants.TITLE_POSITION)
1866: .toString();
1867: } catch (Exception e) {
1868: title = "";
1869: }
1870: }
1871:
1872: if (SnapperManager.getInstance()
1873: .getFileSeparatorConvention() != null
1874: && SnapperManager.getInstance()
1875: .getFileSeparatorConvention()
1876: .equalsIgnoreCase("unix")) {
1877: originalFile = originalFile.replaceAll("\\\\", "/");
1878: originalHost = originalHost.replaceAll("\\\\", "/");
1879: }
1880:
1881: if (SnapperManager.getInstance()
1882: .getRelativeIndexPaths() != null
1883: && SnapperManager.getInstance()
1884: .getRelativeIndexPaths()
1885: .equalsIgnoreCase("true")) {
1886:
1887: if (originalFile.startsWith(originalHost)) {
1888: originalFile = originalFile
1889: .substring(originalHost.length());
1890: if (originalFile.startsWith("/"))
1891: originalFile = originalFile.substring(1);
1892: }
1893: }
1894:
1895: String content = "";
1896: String properties = "";
1897:
1898: try {
1899: properties = (String) parsedData.elementAt(
1900: ParserDataConstants.PROPERTIES_POSITION)
1901: .toString();
1902: } catch (Exception e) {
1903: }
1904:
1905: int propertiesLength = SnapperAdmin
1906: .getMaxPropertiesLength();
1907:
1908: if (properties != null) {
1909: if (properties.length() > propertiesLength)
1910: properties = properties.substring(0,
1911: propertiesLength);
1912: } else {
1913: properties = "";
1914: }
1915:
1916: if (SnapperAdmin.getMountTitleInContent())
1917: content = title;
1918: if (SnapperAdmin.getMountFilePathInContent()) {
1919: String formated = originalFile;
1920: if (formated.indexOf("|") != -1)
1921: formated = formated.substring(0, formated
1922: .indexOf("|"));
1923: content += " " + formated;
1924: }
1925: if (SnapperAdmin.getMountPropertiesInContent())
1926: content += " " + properties;
1927: if (SnapperAdmin.getMountMetaDataInContent())
1928: content += " " + metadata;
1929:
1930: try {
1931: content += " "
1932: + parsedData
1933: .elementAt(
1934: ParserDataConstants.CONTENT_POSITION)
1935: .toString();
1936: } catch (Exception e) {
1937: }
1938:
1939: long mailSentDate = -1;
1940: long mailReceivedDate = -1;
1941:
1942: try {
1943: mailSentDate = ((Date) parsedData
1944: .elementAt(ParserDataConstants.MAIL_MESSAGE_SENT_DATE))
1945: .getTime();
1946: } catch (Exception e) {
1947: mailSentDate = -1;
1948: }
1949:
1950: try {
1951: mailReceivedDate = ((Date) parsedData
1952: .elementAt(ParserDataConstants.MAIL_MESSAGE_RECEIVED_DATE))
1953: .getTime();
1954: } catch (Exception e) {
1955: mailReceivedDate = -1;
1956: }
1957:
1958: String subject = null;
1959: String from = null;
1960: String to = null;
1961: String cc = null;
1962: String bcc = null;
1963:
1964: try {
1965: subject = (String) parsedData
1966: .elementAt(ParserDataConstants.MAIL_MESSAGE_SUBJECT);
1967: } catch (Exception e) {
1968: }
1969:
1970: try {
1971: from = (String) parsedData
1972: .elementAt(ParserDataConstants.MAIL_MESSAGE_FROM);
1973: } catch (Exception e) {
1974: }
1975:
1976: try {
1977: to = (String) parsedData
1978: .elementAt(ParserDataConstants.MAIL_MESSAGE_TO);
1979: } catch (Exception e) {
1980: }
1981:
1982: try {
1983: cc = (String) parsedData
1984: .elementAt(ParserDataConstants.MAIL_MESSAGE_CC);
1985: } catch (Exception e) {
1986: }
1987:
1988: try {
1989: bcc = (String) parsedData
1990: .elementAt(ParserDataConstants.MAIL_MESSAGE_BCC);
1991: } catch (Exception e) {
1992: }
1993:
1994: String author = null;
1995: String lastSaved = null;
1996:
1997: try {
1998: author = (String) parsedData
1999: .elementAt(ParserDataConstants.AUTHOR);
2000: } catch (Exception e) {
2001: }
2002:
2003: try {
2004: lastSaved = (String) parsedData
2005: .elementAt(ParserDataConstants.LAST_SAVED_BY);
2006: } catch (Exception e) {
2007: }
2008:
2009: String fileType = "File";
2010:
2011: try {
2012: fileType = parsedData.elementAt(
2013: ParserDataConstants.FILE_TYPE_POSITION)
2014: .toString();
2015: } catch (Exception e) {
2016: }
2017:
2018: indexer.indexDoc(timestamp, createdDate, -1,
2019: originalFile, content, fileType, title,
2020: properties, "", tempFile.getName(),
2021: mailSentDate, mailReceivedDate, subject, from,
2022: to, cc, bcc, tempFile.length(), owner, author,
2023: lastSaved);
2024: currentFile++;
2025: try {
2026: if (fileType.equals("xls")
2027: && SnapperAdmin.toSaveConvertedExcel()
2028: && !(SnapperAdmin.toSaveConvertedFile())) {
2029: String convertedData = parsedData
2030: .elementAt(
2031: ParserDataConstants.CONVERTED_CONTENT_POSITION)
2032: .toString();
2033: String convertedExtension = parsedData
2034: .elementAt(
2035: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
2036: .toString();
2037:
2038: if (convertedExtension.equals("html")) {
2039: String orig = originalFile;
2040: orig = orig.replaceAll(":", "_");
2041: orig = orig + ".html";
2042:
2043: String root = SnapperAdmin
2044: .pathOfConvertedFiles();
2045:
2046: if (!root.endsWith("/")
2047: && !root.endsWith("\\"))
2048: root = root + File.separator;
2049:
2050: File convertedFile = new File(root + orig);
2051: FileOutputStream fi = null;
2052: try {
2053: File destDir = convertedFile
2054: .getParentFile();
2055: if (!destDir.exists()
2056: && !destDir.mkdirs()) {
2057: SnapperAdmin
2058: .logWarrning("Could not save converted file, could not create path "
2059: + destDir
2060: .getAbsolutePath());
2061: } else {
2062: convertedFile.createNewFile();
2063: fi = new FileOutputStream(
2064: convertedFile);
2065: fi.write(convertedData.getBytes());
2066: fi.flush();
2067: }
2068: } catch (Exception e) {
2069: SnapperAdmin
2070: .logWarrning("Could not save converted file : "
2071: + originalFile
2072: + " , message : "
2073: + e.getMessage());
2074: } finally {
2075:
2076: if (fi != null) {
2077: try {
2078: fi.close();
2079: } catch (Exception e) {
2080: }
2081:
2082: fi = null;
2083: }
2084: }
2085: }
2086: }
2087:
2088: if (fileType.equals("doc")
2089: && SnapperAdmin.toSaveConvertedWord()
2090: && !(SnapperAdmin.toSaveConvertedFile())) {
2091: String convertedData = parsedData
2092: .elementAt(
2093: ParserDataConstants.CONVERTED_CONTENT_POSITION)
2094: .toString();
2095: String convertedExtension = parsedData
2096: .elementAt(
2097: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
2098: .toString();
2099:
2100: if (convertedExtension.equals("html")) {
2101: String orig = originalFile;
2102: orig = orig.replaceAll(":", "_");
2103: orig = orig + ".html";
2104:
2105: String root = SnapperAdmin
2106: .pathOfConvertedFiles();
2107:
2108: if (!root.endsWith("/")
2109: && !root.endsWith("\\"))
2110: root = root + File.separator;
2111:
2112: File convertedFile = new File(root + orig);
2113: FileOutputStream fi = null;
2114: try {
2115: File destDir = convertedFile
2116: .getParentFile();
2117: if (!destDir.exists()
2118: && !destDir.mkdirs()) {
2119: SnapperAdmin
2120: .logWarrning("Could not save converted file, could not create path "
2121: + destDir
2122: .getAbsolutePath());
2123: } else {
2124: convertedFile.createNewFile();
2125: fi = new FileOutputStream(
2126: convertedFile);
2127: fi.write(convertedData.getBytes());
2128: fi.flush();
2129: }
2130: } catch (Exception e) {
2131: SnapperAdmin
2132: .logWarrning("Could not save converted file : "
2133: + originalFile
2134: + " , message : "
2135: + e.getMessage());
2136: } finally {
2137:
2138: if (fi != null) {
2139: try {
2140: fi.close();
2141: } catch (Exception e) {
2142: }
2143:
2144: fi = null;
2145: }
2146: }
2147: }
2148: }
2149:
2150: if (fileType.equals("ppt")
2151: && SnapperAdmin.toSaveConvertedPowerPoint()
2152: && !(SnapperAdmin.toSaveConvertedFile())) {
2153: String convertedData = parsedData
2154: .elementAt(
2155: ParserDataConstants.CONVERTED_CONTENT_POSITION)
2156: .toString();
2157: String convertedExtension = parsedData
2158: .elementAt(
2159: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
2160: .toString();
2161:
2162: if (convertedExtension.equals("html")) {
2163: String orig = originalFile;
2164: orig = orig.replaceAll(":", "_");
2165: orig = orig + ".html";
2166:
2167: String root = SnapperAdmin
2168: .pathOfConvertedFiles();
2169:
2170: if (!root.endsWith("/")
2171: && !root.endsWith("\\"))
2172: root = root + File.separator;
2173:
2174: File convertedFile = new File(root + orig);
2175: FileOutputStream fi = null;
2176: try {
2177: File destDir = convertedFile
2178: .getParentFile();
2179: if (!destDir.exists()
2180: && !destDir.mkdirs()) {
2181: SnapperAdmin
2182: .logWarrning("Could not save converted file, could not create path "
2183: + destDir
2184: .getAbsolutePath());
2185: } else {
2186: convertedFile.createNewFile();
2187: fi = new FileOutputStream(
2188: convertedFile);
2189: fi.write(convertedData.getBytes());
2190: fi.flush();
2191: }
2192: } catch (Exception e) {
2193: SnapperAdmin
2194: .logWarrning("Could not save converted file : "
2195: + originalFile
2196: + " , message : "
2197: + e.getMessage());
2198: } finally {
2199:
2200: if (fi != null) {
2201: try {
2202: fi.close();
2203: } catch (Exception e) {
2204: }
2205:
2206: fi = null;
2207: }
2208: }
2209: }
2210: }
2211:
2212: if (SnapperAdmin.toSaveConvertedFile()
2213: && parsedData
2214: .elementAt(ParserDataConstants.CONVERTED_CONTENT_POSITION) != null
2215: && parsedData
2216: .elementAt(ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION) != null) {
2217: String convertedData = parsedData
2218: .elementAt(
2219: ParserDataConstants.CONVERTED_CONTENT_POSITION)
2220: .toString();
2221: String convertedExtension = parsedData
2222: .elementAt(
2223: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
2224: .toString();
2225:
2226: if (convertedExtension.equals("html")) {
2227: String orig = originalFile;
2228: orig = orig.replaceAll(":", "_");
2229: orig = orig + ".html";
2230:
2231: String root = SnapperAdmin
2232: .pathOfConvertedFiles();
2233:
2234: if (!root.endsWith("/")
2235: && !root.endsWith("\\"))
2236: root = root + File.separator;
2237:
2238: File convertedFile = new File(root + orig);
2239: FileOutputStream fi = null;
2240: try {
2241: File destDir = convertedFile
2242: .getParentFile();
2243: if (!destDir.exists()
2244: && !destDir.mkdirs()) {
2245: SnapperAdmin
2246: .logWarrning("Could not save converted file, could not create path "
2247: + destDir
2248: .getAbsolutePath());
2249: } else {
2250: convertedFile.createNewFile();
2251: fi = new FileOutputStream(
2252: convertedFile);
2253: fi.write(convertedData.getBytes());
2254: fi.flush();
2255: }
2256: } catch (Exception e) {
2257: SnapperAdmin
2258: .logWarrning("Could not save converted file : "
2259: + originalFile
2260: + " , message : "
2261: + e.getMessage());
2262: } finally {
2263:
2264: if (fi != null) {
2265: try {
2266: fi.close();
2267: } catch (Exception e) {
2268: }
2269:
2270: fi = null;
2271: }
2272: }
2273: }
2274: }
2275: } catch (Exception ex) {
2276: SnapperAdmin
2277: .logWarrning("Could not save converted file : "
2278: + originalFile
2279: + " , message : "
2280: + ex.getMessage());
2281: if (printStackTrace) {
2282: ex.printStackTrace();
2283: }
2284:
2285: }
2286: } catch (Exception ex) {
2287: SnapperAdmin.logError("Could not index document : "
2288: + originalFile + " message : "
2289: + ex.getMessage());
2290: if (printStackTrace) {
2291: ex.printStackTrace();
2292: }
2293: } finally {
2294: if (parsedData != null)
2295: parsedData.removeAllElements();
2296: parsedData = null;
2297: }
2298:
2299: while (parser.hasNext()) {
2300: try {
2301: parsedData = parser.getNext();
2302: } catch (FileParserException e) {
2303: SnapperAdmin.logDebug("File : " + e.getFileName()
2304: + " could not be parsed , message : "
2305: + e.getMessage());
2306: if (printStackTrace) {
2307: e.printStackTrace();
2308: }
2309: String fn = e.getFileName();
2310: if (fn != null) {
2311: if (fn.indexOf("|") != -1)
2312: fn = originalFile
2313: + fn.substring(fn.indexOf("|"));
2314: writeNotIndexedDocumentToFile(fn);
2315: }
2316: continue;
2317: } catch (Throwable e) {
2318: SnapperAdmin.logDebug("File inaide container : "
2319: + originalFile
2320: + " could not be parsed , message : "
2321: + e.getMessage());
2322: if (printStackTrace) {
2323: e.printStackTrace();
2324: }
2325: parser.close();
2326: return;
2327: }
2328:
2329: try {
2330:
2331: try {
2332: title = new String(parsedData.elementAt(
2333: ParserDataConstants.TITLE_POSITION)
2334: .toString());
2335: } catch (Exception e) {
2336: title = "";
2337: }
2338:
2339: String consPath = parsedData.elementAt(
2340: ParserDataConstants.FILE_PATH_POSITION)
2341: .toString();
2342:
2343: if (consPath.indexOf("|") != -1)
2344: consPath = originalFile
2345: + consPath.substring(consPath
2346: .indexOf("|"));
2347:
2348: if (SnapperManager.getInstance()
2349: .getFileSeparatorConvention() != null
2350: && SnapperManager.getInstance()
2351: .getFileSeparatorConvention()
2352: .equalsIgnoreCase("unix")) {
2353: consPath = consPath.replaceAll("\\\\", "/");
2354: originalHost = originalHost.replaceAll("\\\\",
2355: "/");
2356: }
2357:
2358: if (SnapperManager.getInstance()
2359: .getRelativeIndexPaths() != null
2360: && SnapperManager.getInstance()
2361: .getRelativeIndexPaths()
2362: .equalsIgnoreCase("true")) {
2363:
2364: if (consPath.startsWith(originalHost)) {
2365: consPath = consPath.substring(originalHost
2366: .length());
2367: if (consPath.startsWith("/"))
2368: consPath = consPath.substring(1);
2369: }
2370: }
2371:
2372: String content = "";
2373: String properties = "";
2374:
2375: try {
2376: properties = (String) parsedData
2377: .elementAt(
2378: ParserDataConstants.PROPERTIES_POSITION)
2379: .toString();
2380: } catch (Exception e) {
2381: }
2382:
2383: int propertiesLength = SnapperAdmin
2384: .getMaxPropertiesLength();
2385:
2386: if (properties != null) {
2387: if (properties.length() > propertiesLength)
2388: properties = properties.substring(0,
2389: propertiesLength);
2390: } else {
2391: properties = "";
2392: }
2393:
2394: if (SnapperAdmin.getMountTitleInContent())
2395: content = title;
2396: if (SnapperAdmin.getMountFilePathInContent()) {
2397: String formated = consPath;
2398: if (formated.indexOf("|") != -1)
2399: formated = formated.substring(0, formated
2400: .indexOf("|"));
2401: content += " " + formated;
2402: }
2403: if (SnapperAdmin.getMountPropertiesInContent())
2404: content += " " + properties;
2405:
2406: try {
2407: content += " "
2408: + parsedData
2409: .elementAt(
2410: ParserDataConstants.CONTENT_POSITION)
2411: .toString();
2412: } catch (Exception e) {
2413: }
2414:
2415: long mailSentDate = -1;
2416: long mailReceivedDate = -1;
2417:
2418: try {
2419: mailSentDate = ((Date) parsedData
2420: .elementAt(ParserDataConstants.MAIL_MESSAGE_SENT_DATE))
2421: .getTime();
2422: } catch (Exception e) {
2423: mailSentDate = -1;
2424: }
2425:
2426: try {
2427: mailReceivedDate = ((Date) parsedData
2428: .elementAt(ParserDataConstants.MAIL_MESSAGE_RECEIVED_DATE))
2429: .getTime();
2430: } catch (Exception e) {
2431: mailReceivedDate = -1;
2432: }
2433:
2434: String subject = null;
2435: String from = null;
2436: String to = null;
2437: String cc = null;
2438: String bcc = null;
2439:
2440: try {
2441: subject = (String) parsedData
2442: .elementAt(ParserDataConstants.MAIL_MESSAGE_SUBJECT);
2443: } catch (Exception e) {
2444: }
2445:
2446: try {
2447: from = (String) parsedData
2448: .elementAt(ParserDataConstants.MAIL_MESSAGE_FROM);
2449: } catch (Exception e) {
2450: }
2451:
2452: try {
2453: to = (String) parsedData
2454: .elementAt(ParserDataConstants.MAIL_MESSAGE_TO);
2455: } catch (Exception e) {
2456: }
2457:
2458: try {
2459: cc = (String) parsedData
2460: .elementAt(ParserDataConstants.MAIL_MESSAGE_CC);
2461: } catch (Exception e) {
2462: }
2463:
2464: try {
2465: bcc = (String) parsedData
2466: .elementAt(ParserDataConstants.MAIL_MESSAGE_BCC);
2467: } catch (Exception e) {
2468: }
2469:
2470: String author = null;
2471: String lastSaved = null;
2472:
2473: try {
2474: author = (String) parsedData
2475: .elementAt(ParserDataConstants.AUTHOR);
2476: } catch (Exception e) {
2477: }
2478:
2479: try {
2480: lastSaved = (String) parsedData
2481: .elementAt(ParserDataConstants.LAST_SAVED_BY);
2482: } catch (Exception e) {
2483: }
2484:
2485: String fileType = "File";
2486:
2487: try {
2488: fileType = parsedData.elementAt(
2489: ParserDataConstants.FILE_TYPE_POSITION)
2490: .toString();
2491: } catch (Exception e) {
2492: }
2493:
2494: indexer
2495: .indexDoc(
2496: ((Long) parsedData
2497: .elementAt(ParserDataConstants.LAST_MODIFIED_DATE_POSITION))
2498: .longValue(),
2499: createdDate,
2500: -1,
2501: consPath,
2502: content,
2503: fileType,
2504: title,
2505: properties,
2506: "",
2507: tempFile.getName(),
2508: mailSentDate,
2509: mailReceivedDate,
2510: subject,
2511: from,
2512: to,
2513: cc,
2514: bcc,
2515: ((Long) parsedData
2516: .elementAt(ParserDataConstants.SIZE_POSITION))
2517: .longValue(), owner,
2518: author, lastSaved);
2519: currentFile++;
2520: try {
2521: if (fileType.equals("xls")
2522: && SnapperAdmin.toSaveConvertedExcel()
2523: && !(SnapperAdmin.toSaveConvertedFile())) {
2524: String convertedData = parsedData
2525: .elementAt(
2526: ParserDataConstants.CONVERTED_CONTENT_POSITION)
2527: .toString();
2528: String convertedExtension = parsedData
2529: .elementAt(
2530: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
2531: .toString();
2532:
2533: if (convertedExtension.equals("html")) {
2534: consPath = consPath
2535: .replaceAll(":", "_");
2536:
2537: char first = 0x7c;
2538: char second = 0x28;
2539: consPath = consPath.replace(first,
2540: second);
2541: consPath = consPath + ".html";
2542:
2543: String root = SnapperAdmin
2544: .pathOfConvertedFiles();
2545:
2546: if (!root.endsWith("/")
2547: && !root.endsWith("\\"))
2548: root = root + File.separator;
2549:
2550: File convertedFile = new File(root
2551: + consPath);
2552:
2553: FileOutputStream fi = null;
2554: try {
2555: File destDir = convertedFile
2556: .getParentFile();
2557: if (!destDir.exists()
2558: && !destDir.mkdirs()) {
2559: SnapperAdmin
2560: .logWarrning("Could not save converted file, could not create path "
2561: + destDir
2562: .getAbsolutePath());
2563: } else {
2564: convertedFile.createNewFile();
2565: fi = new FileOutputStream(
2566: convertedFile);
2567: fi.write(convertedData
2568: .getBytes());
2569: fi.flush();
2570: }
2571: } catch (Exception e) {
2572: SnapperAdmin
2573: .logWarrning("Could not save converted file : "
2574: + consPath
2575: + " , message : "
2576: + e.getMessage());
2577: } finally {
2578:
2579: if (fi != null) {
2580: try {
2581: fi.close();
2582: } catch (Exception e) {
2583: }
2584:
2585: fi = null;
2586: }
2587: }
2588: }
2589: }
2590:
2591: if (fileType.equals("doc")
2592: && SnapperAdmin.toSaveConvertedWord()
2593: && !(SnapperAdmin.toSaveConvertedFile())) {
2594: String convertedData = parsedData
2595: .elementAt(
2596: ParserDataConstants.CONVERTED_CONTENT_POSITION)
2597: .toString();
2598: String convertedExtension = parsedData
2599: .elementAt(
2600: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
2601: .toString();
2602:
2603: if (convertedExtension.equals("html")) {
2604: consPath = consPath
2605: .replaceAll(":", "_");
2606:
2607: char first = 0x7c;
2608: char second = 0x28;
2609: consPath = consPath.replace(first,
2610: second);
2611: consPath = consPath + ".html";
2612:
2613: String root = SnapperAdmin
2614: .pathOfConvertedFiles();
2615:
2616: if (!root.endsWith("/")
2617: && !root.endsWith("\\"))
2618: root = root + File.separator;
2619:
2620: File convertedFile = new File(root
2621: + consPath);
2622:
2623: FileOutputStream fi = null;
2624: try {
2625: File destDir = convertedFile
2626: .getParentFile();
2627: if (!destDir.exists()
2628: && !destDir.mkdirs()) {
2629: SnapperAdmin
2630: .logWarrning("Could not save converted file, could not create path "
2631: + destDir
2632: .getAbsolutePath());
2633: } else {
2634: convertedFile.createNewFile();
2635: fi = new FileOutputStream(
2636: convertedFile);
2637: fi.write(convertedData
2638: .getBytes());
2639: fi.flush();
2640: }
2641: } catch (Exception e) {
2642: SnapperAdmin
2643: .logWarrning("Could not save converted file : "
2644: + consPath
2645: + " , message : "
2646: + e.getMessage());
2647: } finally {
2648:
2649: if (fi != null) {
2650: try {
2651: fi.close();
2652: } catch (Exception e) {
2653: }
2654:
2655: fi = null;
2656: }
2657: }
2658: }
2659: }
2660:
2661: if (fileType.equals("ppt")
2662: && SnapperAdmin
2663: .toSaveConvertedPowerPoint()
2664: && !(SnapperAdmin.toSaveConvertedFile())) {
2665: String convertedData = parsedData
2666: .elementAt(
2667: ParserDataConstants.CONVERTED_CONTENT_POSITION)
2668: .toString();
2669: String convertedExtension = parsedData
2670: .elementAt(
2671: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
2672: .toString();
2673:
2674: if (convertedExtension.equals("html")) {
2675: consPath = consPath
2676: .replaceAll(":", "_");
2677:
2678: char first = 0x7c;
2679: char second = 0x28;
2680: consPath = consPath.replace(first,
2681: second);
2682: consPath = consPath + ".html";
2683:
2684: String root = SnapperAdmin
2685: .pathOfConvertedFiles();
2686:
2687: if (!root.endsWith("/")
2688: && !root.endsWith("\\"))
2689: root = root + File.separator;
2690:
2691: File convertedFile = new File(root
2692: + consPath);
2693:
2694: FileOutputStream fi = null;
2695: try {
2696: File destDir = convertedFile
2697: .getParentFile();
2698: if (!destDir.exists()
2699: && !destDir.mkdirs()) {
2700: SnapperAdmin
2701: .logWarrning("Could not save converted file, could not create path "
2702: + destDir
2703: .getAbsolutePath());
2704: } else {
2705: convertedFile.createNewFile();
2706: fi = new FileOutputStream(
2707: convertedFile);
2708: fi.write(convertedData
2709: .getBytes());
2710: fi.flush();
2711: }
2712: } catch (Exception e) {
2713: SnapperAdmin
2714: .logWarrning("Could not save converted file : "
2715: + consPath
2716: + " , message : "
2717: + e.getMessage());
2718: } finally {
2719:
2720: if (fi != null) {
2721: try {
2722: fi.close();
2723: } catch (Exception e) {
2724: }
2725:
2726: fi = null;
2727: }
2728: }
2729: }
2730: }
2731: if (SnapperAdmin.toSaveConvertedFile()
2732: && parsedData
2733: .elementAt(ParserDataConstants.CONVERTED_CONTENT_POSITION) != null
2734: && parsedData
2735: .elementAt(ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION) != null) {
2736: String convertedData = parsedData
2737: .elementAt(
2738: ParserDataConstants.CONVERTED_CONTENT_POSITION)
2739: .toString();
2740: String convertedExtension = parsedData
2741: .elementAt(
2742: ParserDataConstants.CONVERTED_CONTENT_EXTENSION_POSITION)
2743: .toString();
2744:
2745: if (convertedExtension.equals("html")) {
2746: consPath = consPath
2747: .replaceAll(":", "_");
2748:
2749: char first = 0x7c;
2750: char second = 0x28;
2751: consPath = consPath.replace(first,
2752: second);
2753: consPath = consPath + ".html";
2754:
2755: String root = SnapperAdmin
2756: .pathOfConvertedFiles();
2757:
2758: if (!root.endsWith("/")
2759: && !root.endsWith("\\"))
2760: root = root + File.separator;
2761:
2762: File convertedFile = new File(root
2763: + consPath);
2764:
2765: FileOutputStream fi = null;
2766: try {
2767: File destDir = convertedFile
2768: .getParentFile();
2769: if (!destDir.exists()
2770: && !destDir.mkdirs()) {
2771: SnapperAdmin
2772: .logWarrning("Could not save converted file, could not create path "
2773: + destDir
2774: .getAbsolutePath());
2775: } else {
2776: convertedFile.createNewFile();
2777: fi = new FileOutputStream(
2778: convertedFile);
2779: fi.write(convertedData
2780: .getBytes());
2781: fi.flush();
2782: }
2783: } catch (Exception e) {
2784: SnapperAdmin
2785: .logWarrning("Could not save converted file : "
2786: + consPath
2787: + " , message : "
2788: + e.getMessage());
2789: } finally {
2790:
2791: if (fi != null) {
2792: try {
2793: fi.close();
2794: } catch (Exception e) {
2795: }
2796:
2797: fi = null;
2798: }
2799: }
2800: }
2801: }
2802: } catch (Exception ex) {
2803: SnapperAdmin
2804: .logWarrning("Could not save converted file : "
2805: + consPath
2806: + " , message : "
2807: + ex.getMessage());
2808: if (printStackTrace) {
2809: ex.printStackTrace();
2810: }
2811: }
2812:
2813: } catch (Exception ex) {
2814: SnapperAdmin
2815: .logError("Could not index document : "
2816: + parsedData
2817: .elementAt(
2818: ParserDataConstants.FILE_PATH_POSITION)
2819: .toString() + " message : "
2820: + ex.getMessage());
2821: if (printStackTrace) {
2822: ex.printStackTrace();
2823: }
2824: } finally {
2825: if (parsedData != null)
2826: parsedData.removeAllElements();
2827: parsedData = null;
2828: }
2829: }
2830:
2831: SnapperAdmin.logDebug("done adding : " + originalFile);
2832:
2833: originalFile = null;
2834: if (parser != null)
2835: parser.close();
2836: parser = null;
2837: metadata = null;
2838: }
2839: }
2840:
2841: public void indexDocs(String siteName, String host,
2842: String language, String location, String protocol,
2843: String username, String password, boolean create) {
2844: Date start = new Date();
2845: originalHost = location;
2846:
2847: if (included.size() > 0) {
2848:
2849: } else if (protocol.equals("FileSystem")) {
2850: indexFS(location, start);
2851: } else if (protocol.equals("FTP")) {
2852: indexFTP(host, location, username, password, start);
2853: } else if (protocol.equals("UNC")) {
2854: indexUNC(host, location, username, password, start);
2855: } else if (protocol.equals("WebDAV")) {
2856: indexWebDAV(host, location, username, password, start);
2857: }
2858: parser = null;
2859: }
2860:
2861: private void indexWebDAV(String host, String location,
2862: String username, String password, Date start) {
2863: if (!indexSizeReached) {
2864: try {
2865: DocumentStore ds = new DocumentStore(username,
2866: location, "WebDAV", host, "", username,
2867: password, SnapperManager.getInstance()
2868: .getTempDir());
2869: ds.retrieveWebDAVFileNames();
2870: Vector origfiles = ds.getOriginalFiles();
2871: Vector timestamps = ds.getTimeStamps();
2872: Vector createdDates = ds.getCreation();
2873:
2874: if (origfiles != null) {
2875: for (int p = 0; p < origfiles.size(); p++) {
2876: if (doStop)
2877: return;
2878: File tempFile = null;
2879:
2880: try {
2881: tempFile = ds
2882: .getWebDAVFile((String) (origfiles
2883: .elementAt(p)));
2884: } catch (Exception e) {
2885: SnapperAdmin
2886: .logError("Problem occured while geting WebDAV file : "
2887: + (String) (origfiles
2888: .elementAt(p))
2889: + " message : "
2890: + e.getMessage());
2891: if (printStackTrace) {
2892: e.printStackTrace();
2893: }
2894: tempFile = null;
2895: }
2896:
2897: if (tempFile != null)
2898: if (fileOK(tempFile)) {
2899: try {
2900: indexFTPDocs(tempFile,
2901: (String) origfiles
2902: .elementAt(p),
2903: ((Long) timestamps
2904: .elementAt(p))
2905: .longValue(), null,
2906: ((Long) createdDates
2907: .elementAt(p))
2908: .longValue());
2909: } catch (Exception e) {
2910: SnapperAdmin
2911: .logError("Problem occured while indexing WebDAV file : "
2912: + (String) (origfiles
2913: .elementAt(p))
2914: + " message : "
2915: + e.getMessage());
2916: if (printStackTrace) {
2917: e.printStackTrace();
2918: }
2919: }
2920: }
2921: try {
2922: tempFile.delete();
2923: } catch (Exception e) {
2924: }
2925: }
2926: }
2927: ds.closeWebDAV();
2928: SnapperManager.getInstance().getIndexerFactory()
2929: .removeIndexer();
2930: Date end = new Date();
2931: SnapperAdmin.logInfo(end.getTime() - start.getTime()
2932: + " total milliseconds, for WebDAV path");
2933:
2934: } catch (Exception e) {
2935: SnapperAdmin
2936: .logError("Problem occured in index WebDAV : "
2937: + e.getMessage());
2938: if (printStackTrace) {
2939: e.printStackTrace();
2940: }
2941:
2942: }
2943: }
2944: }
2945:
2946: private void indexUNC(String host, String location,
2947: String username, String password, Date start) {
2948: if (!indexSizeReached) {
2949: location = (new File(location)).getPath();
2950: currentPath = location;
2951: try {
2952:
2953: if (indexDiference) {
2954: BufferedReader d = null;
2955: try {
2956: File include = new File(indexdir, "path.txt");
2957:
2958: if (!include.exists())
2959: include.createNewFile();
2960:
2961: FileInputStream fis = new FileInputStream(
2962: include);
2963: d = new BufferedReader(new InputStreamReader(
2964: fis));
2965: String listIndexedPath = d.readLine();
2966: lastIndexedDirectory = d.readLine();
2967:
2968: if (listIndexedPath != null
2969: && listIndexedPath.equals("finished"))
2970: return;
2971:
2972: if (listIndexedPath == null
2973: || listIndexedPath.equals("")) {
2974: startToIndex = true;
2975: } else {
2976:
2977: if (lastIndexedDirectory != null
2978: && !lastIndexedDirectory.equals("")) {
2979: lastIndexedFile = new File(
2980: lastIndexedDirectory);
2981:
2982: String temp1 = lastIndexedFile
2983: .getPath();
2984: String temp2 = location;
2985:
2986: if (!temp1.endsWith(File.separator))
2987: temp1 = temp1 + File.separator;
2988:
2989: if (!temp2.endsWith(File.separator))
2990: temp2 = temp2 + File.separator;
2991: // mora jer moze da se desi ime fildera lola i lola1
2992: // a on nesme da prodje
2993: if (!temp1.startsWith(temp2))
2994: return;
2995:
2996: if (temp1.equals(temp2)) {
2997: startToIndex = true;
2998: }
2999: } else {
3000: startToIndex = true;
3001: }
3002: }
3003:
3004: } catch (IOException e) {
3005: if (printStackTrace) {
3006: e.printStackTrace();
3007: }
3008: } finally {
3009:
3010: if (d != null) {
3011: try {
3012: d.close();
3013: } catch (IOException ioe) {
3014: }
3015: d = null;
3016: }
3017: }
3018: }
3019:
3020: DocumentStore ds = new DocumentStore(username,
3021: location, "UNC", host, "", username, password,
3022: SnapperManager.getInstance().getTempDir());
3023: File[] uncFiles = ds.retrieveUNCFiles(location);
3024:
3025: if (uncFiles != null) {
3026: java.util.Arrays.sort(uncFiles);
3027: for (int p = 0; p < uncFiles.length; p++) {
3028: if (doStop)
3029: return;
3030: File fl = new File(host + File.separator
3031: + location, uncFiles[p].getName());
3032:
3033: if (fl.isDirectory()) {
3034: String[] fls = fl.list();
3035: if (fls != null) {
3036: java.util.Arrays.sort(fls);
3037: indexSubfolders(fl, fls);
3038: }
3039: }
3040: if (fileOK(uncFiles[p])) {
3041: {
3042: if (!indexDiference) {
3043: try {
3044: indexDocs(new File(location,
3045: uncFiles[p].getName()));
3046: } catch (Exception e) {
3047: SnapperAdmin
3048: .logError("Problem occured while indexing UNC file : "
3049: + location
3050: + uncFiles[p]
3051: .getName()
3052: + ", message : "
3053: + e
3054: .getMessage());
3055: if (printStackTrace) {
3056: e.printStackTrace();
3057: }
3058: }
3059: } else if (indexDiference
3060: && startToIndex) {
3061: try {
3062: Reader reader = ReaderFactory
3063: .createReader("org.enhydra.snapper.wrapper.lucene.LuceneReader");
3064: reader.setUpReader(siteName);
3065: reader.unlock();
3066:
3067: String consPath = fl.getPath();
3068:
3069: if (SnapperManager
3070: .getInstance()
3071: .getFileSeparatorConvention() != null
3072: && SnapperManager
3073: .getInstance()
3074: .getFileSeparatorConvention()
3075: .equalsIgnoreCase(
3076: "unix")) {
3077: consPath = consPath
3078: .replaceAll("\\\\",
3079: "/");
3080: originalHost = originalHost
3081: .replaceAll("\\\\",
3082: "/");
3083: }
3084:
3085: if (SnapperManager
3086: .getInstance()
3087: .getRelativeIndexPaths() != null
3088: && SnapperManager
3089: .getInstance()
3090: .getRelativeIndexPaths()
3091: .equalsIgnoreCase(
3092: "true")) {
3093:
3094: if (consPath
3095: .startsWith(originalHost)) {
3096: consPath = consPath
3097: .substring(originalHost
3098: .length());
3099: if (consPath
3100: .startsWith("/"))
3101: consPath = consPath
3102: .substring(1);
3103: }
3104: }
3105:
3106: if (!reader
3107: .fileExists(consPath)) {
3108: indexDocs(fl);
3109: }
3110: reader.closeReader();
3111: } catch (Exception e) {
3112: SnapperAdmin
3113: .logError("Problem : "
3114: + e
3115: .getMessage());
3116: if (printStackTrace) {
3117: e.printStackTrace();
3118: }
3119: }
3120: }
3121: }
3122: }
3123: }
3124: }
3125: Date end = new Date();
3126: SnapperManager.getInstance().getIndexerFactory()
3127: .removeIndexer();
3128:
3129: SnapperAdmin.logInfo(end.getTime() - start.getTime()
3130: + " total milliseconds, for UNC path");
3131:
3132: } catch (Exception e) {
3133: SnapperAdmin.logError("Problem occured in index UNC : "
3134: + e.getMessage());
3135: if (printStackTrace) {
3136: e.printStackTrace();
3137: }
3138:
3139: return;
3140: }
3141:
3142: BufferedWriter d = null;
3143: try {
3144: File include = new File(indexdir, "path.txt");
3145:
3146: if (!include.exists())
3147: include.createNewFile();
3148:
3149: FileOutputStream fis = new FileOutputStream(include);
3150: d = new BufferedWriter(new OutputStreamWriter(fis));
3151: d.write("");
3152: d.write("");
3153:
3154: } catch (IOException e) {
3155:
3156: } finally {
3157: if (d != null) {
3158: try {
3159: d.close();
3160: } catch (IOException ioe) {
3161: }
3162: }
3163: }
3164: }
3165: }
3166:
3167: /**
3168: * @param host
3169: * @param location
3170: * @param username
3171: * @param password
3172: * @param start
3173: */
3174: private void indexFTP(String host, String location,
3175: String username, String password, Date start) {
3176: if (!indexSizeReached) {
3177: try {
3178: DocumentStore ds = new DocumentStore(username,
3179: location, "FTP", host, "", username, password,
3180: SnapperManager.getInstance().getTempDir());
3181: if (!ds.connect())
3182: return;
3183:
3184: ds.retrieveFTPFileNames();
3185: Vector origfiles = ds.getOriginalFiles();
3186: Vector timestamps = ds.getTimeStamps();
3187: Vector owners = ds.getOwners();
3188:
3189: if (origfiles != null) {
3190: for (int p = 0; p < origfiles.size(); p++) {
3191: if (doStop)
3192: return;
3193: File tempFile = null;
3194:
3195: try {
3196: tempFile = ds
3197: .getFTPFile((String) (origfiles
3198: .elementAt(p)));
3199: } catch (Exception e) {
3200: SnapperAdmin
3201: .logError("Problem occured while geting FTP file : "
3202: + (String) (origfiles
3203: .elementAt(p))
3204: + " message : "
3205: + e.getMessage());
3206: if (printStackTrace) {
3207: e.printStackTrace();
3208: }
3209: tempFile = null;
3210: }
3211:
3212: if (tempFile != null)
3213:
3214: if (fileOK(tempFile)) {
3215: try {
3216: indexFTPDocs(tempFile,
3217: (String) origfiles
3218: .elementAt(p),
3219: ((Long) timestamps
3220: .elementAt(p))
3221: .longValue(),
3222: (String) owners
3223: .elementAt(p), -1);
3224: } catch (Exception e) {
3225: SnapperAdmin
3226: .logError("Problem occured while indexing FTP file : "
3227: + (String) (origfiles
3228: .elementAt(p))
3229: + " message : "
3230: + e.getMessage());
3231: if (printStackTrace) {
3232: e.printStackTrace();
3233: }
3234: }
3235: }
3236:
3237: try {
3238: tempFile.delete();
3239: } catch (Exception e) {
3240: }
3241:
3242: }
3243: }
3244: ds.closeFTP();
3245: SnapperManager.getInstance().getIndexerFactory()
3246: .removeIndexer();
3247: Date end = new Date();
3248:
3249: SnapperAdmin.logInfo(end.getTime() - start.getTime()
3250: + " total milliseconds, for FTP path");
3251:
3252: } catch (Exception e) {
3253: SnapperAdmin.logError("Problem occured in index FTP : "
3254: + e.getMessage());
3255: if (printStackTrace) {
3256: e.printStackTrace();
3257: }
3258: }
3259: }
3260: }
3261:
3262: /**
3263: * @param location
3264: * @param start
3265: */
3266:
3267: private boolean indexDiference = false;
3268:
3269: private String lastIndexedDirectory = null;
3270:
3271: private boolean startToIndex = false;
3272:
3273: private String currentPath = "";
3274:
3275: private void indexFS(String location, Date start) {
3276: if (!indexSizeReached) {
3277: location = (new File(location)).getPath();
3278: currentPath = location;
3279: if (indexDiference) {
3280: BufferedReader d = null;
3281: try {
3282: File include = new File(indexdir, "path.txt");
3283:
3284: if (!include.exists())
3285: include.createNewFile();
3286:
3287: FileInputStream fis = new FileInputStream(include);
3288: d = new BufferedReader(new InputStreamReader(fis));
3289: String listIndexedPath = d.readLine();
3290: lastIndexedDirectory = d.readLine();
3291:
3292: if (listIndexedPath != null
3293: && listIndexedPath.equals("finished"))
3294: return;
3295:
3296: if (listIndexedPath == null
3297: || listIndexedPath.equals("")) {
3298: startToIndex = true;
3299: } else {
3300:
3301: if (lastIndexedDirectory != null
3302: && !lastIndexedDirectory.equals("")) {
3303: lastIndexedFile = new File(
3304: lastIndexedDirectory);
3305:
3306: String temp1 = lastIndexedFile.getPath();
3307: String temp2 = location;
3308:
3309: if (!temp1.endsWith(File.separator))
3310: temp1 = temp1 + File.separator;
3311:
3312: if (!temp2.endsWith(File.separator))
3313: temp2 = temp2 + File.separator;
3314: // mora jer moze da se desi ime fildera lola i lola1 a
3315: // on nesme da prodje
3316: if (!temp1.startsWith(temp2))
3317: return;
3318:
3319: if (temp1.equals(temp2)) {
3320: startToIndex = true;
3321: }
3322: } else {
3323: startToIndex = true;
3324: }
3325: }
3326: } catch (IOException e) {
3327: if (printStackTrace) {
3328: e.printStackTrace();
3329: }
3330: } finally {
3331:
3332: if (d != null) {
3333: try {
3334: d.close();
3335: } catch (IOException ioe) {
3336: }
3337: d = null;
3338: }
3339: }
3340: }
3341:
3342: File loc;
3343: try {
3344: loc = new File(location);
3345: if (loc.canRead()) {
3346: if (loc.isDirectory()) {
3347:
3348: String[] files = loc.list();
3349:
3350: if (files != null) {
3351:
3352: java.util.Arrays.sort(files);
3353:
3354: for (int p = 0; p < files.length; p++) {
3355: if (doStop)
3356: return;
3357: File fl = new File(location, files[p]);
3358:
3359: if (fl.isDirectory()) {
3360: String[] fls = fl.list();
3361:
3362: if (fls != null) {
3363: java.util.Arrays.sort(fls);
3364: indexSubfolders(fl, fls);
3365: }
3366: }
3367:
3368: if (fileOK(new File(location, files[p]))) {
3369: if (!indexDiference) {
3370: indexDocs(new File(location,
3371: files[p]));
3372: } else if (indexDiference
3373: && startToIndex) {
3374: try {
3375: Reader reader = ReaderFactory
3376: .createReader("org.enhydra.snapper.wrapper.lucene.LuceneReader");
3377: reader
3378: .setUpReader(siteName);
3379: reader.unlock();
3380:
3381: String consPath = fl
3382: .getPath();
3383:
3384: if (SnapperManager
3385: .getInstance()
3386: .getFileSeparatorConvention() != null
3387: && SnapperManager
3388: .getInstance()
3389: .getFileSeparatorConvention()
3390: .equalsIgnoreCase(
3391: "unix")) {
3392: consPath = consPath
3393: .replaceAll(
3394: "\\\\",
3395: "/");
3396: originalHost = originalHost
3397: .replaceAll(
3398: "\\\\",
3399: "/");
3400: }
3401:
3402: if (SnapperManager
3403: .getInstance()
3404: .getRelativeIndexPaths() != null
3405: && SnapperManager
3406: .getInstance()
3407: .getRelativeIndexPaths()
3408: .equalsIgnoreCase(
3409: "true")) {
3410:
3411: if (consPath
3412: .startsWith(originalHost)) {
3413: consPath = consPath
3414: .substring(originalHost
3415: .length());
3416: if (consPath
3417: .startsWith("/"))
3418: consPath = consPath
3419: .substring(1);
3420: }
3421: }
3422:
3423: if (!reader
3424: .fileExists(consPath)) {
3425: indexDocs(fl);
3426: }
3427:
3428: reader.closeReader();
3429: } catch (Exception e) {
3430: SnapperAdmin
3431: .logError("Problem : "
3432: + e
3433: .getMessage());
3434: if (printStackTrace) {
3435: e.printStackTrace();
3436: }
3437: }
3438:
3439: }
3440: }
3441: }
3442: }
3443: }
3444: }
3445:
3446: Date end = new Date();
3447: SnapperManager.getInstance().getIndexerFactory()
3448: .removeIndexer();
3449:
3450: SnapperAdmin.logInfo(end.getTime() - start.getTime()
3451: + " total milliseconds, for FS path");
3452:
3453: } catch (Exception e) {
3454: SnapperAdmin
3455: .logError("Problem occured in index File System path : "
3456: + e.getMessage());
3457: if (printStackTrace) {
3458: e.printStackTrace();
3459: }
3460:
3461: return;
3462: }
3463:
3464: BufferedWriter d = null;
3465: if (!indexSizeReached) {
3466: try {
3467: File include = new File(indexdir, "path.txt");
3468:
3469: if (!include.exists())
3470: include.createNewFile();
3471:
3472: FileOutputStream fis = new FileOutputStream(include);
3473: d = new BufferedWriter(new OutputStreamWriter(fis));
3474: d.write("");
3475: d.write("");
3476:
3477: } catch (IOException e) {
3478:
3479: } finally {
3480: if (d != null) {
3481: try {
3482: d.close();
3483: } catch (IOException ioe) {
3484: }
3485: }
3486: }
3487: }
3488: }
3489: }
3490:
3491: public boolean fileOK(File file) {
3492: try {
3493: boolean retVal = fc.check(file);
3494: return retVal;
3495: } catch (Exception e) {
3496: SnapperAdmin.logError("Problem : " + e.getMessage());
3497: if (printStackTrace) {
3498: e.printStackTrace();
3499: }
3500: }
3501:
3502: return false;
3503: }
3504:
3505: private void indexSubfoldersInclude(File dir, String[] files) {
3506:
3507: BufferedWriter d = null;
3508: try {
3509: File include = new File(indexdir, "include.txt");
3510:
3511: if (!include.exists())
3512: include.createNewFile();
3513:
3514: FileOutputStream fis = new FileOutputStream(include);
3515: d = new BufferedWriter(new OutputStreamWriter(fis));
3516: d.write(dir.getPath());
3517:
3518: } catch (IOException e) {
3519: if (printStackTrace) {
3520: e.printStackTrace();
3521: }
3522: } finally {
3523: if (d != null) {
3524: try {
3525: d.close();
3526: } catch (IOException ioe) {
3527: }
3528: }
3529: }
3530:
3531: if (indexDiference && !startToIndex) {
3532: String temp1 = lastIndexedDirectory;
3533: String temp2 = dir.getPath();
3534:
3535: if (!temp1.endsWith(File.separator))
3536: temp1 = lastIndexedDirectory + File.separator;
3537:
3538: if (!temp2.endsWith(File.separator))
3539: temp2 = dir.getPath() + File.separator;
3540: // mora jer moze da se desi ime fildera lola i lola1 a on nesme da
3541: // prodje
3542: if (!temp1.startsWith(temp2))
3543: return;
3544:
3545: if (dir.getPath().equals(lastIndexedDirectory)) {
3546: startToIndex = true;
3547: }
3548: }
3549:
3550: if (files != null) {
3551: for (int q = 0; q < files.length; q++) {
3552: if (doStop)
3553: return;
3554: File fl = new File(dir, files[q]);
3555: if (fl.isDirectory()) {
3556: String[] fls = fl.list();
3557: if (fls != null) {
3558: java.util.Arrays.sort(fls);
3559: indexSubfoldersInclude(fl, fls);
3560: }
3561: }
3562: if (fileOK(new File(dir, files[q]))) {
3563: try {
3564: if (!indexDiference) {
3565: indexDocs(new File(dir, files[q]));
3566: } else if ((indexDiference && startToIndex)
3567: && ((lastIndexedDirectory != null && dir
3568: .getPath().equals(
3569: lastIndexedDirectory)) || (lastIndexedFile != null && dir
3570: .getPath().equals(
3571: lastIndexedFile
3572: .getParent())))) {
3573: try {
3574: Reader reader = ReaderFactory
3575: .createReader("org.enhydra.snapper.wrapper.lucene.LuceneReader");
3576: reader.setUpReader(siteName);
3577: reader.unlock();
3578:
3579: String consPath = (new File(dir,
3580: files[q])).getPath();
3581:
3582: if (SnapperManager.getInstance()
3583: .getFileSeparatorConvention() != null
3584: && SnapperManager
3585: .getInstance()
3586: .getFileSeparatorConvention()
3587: .equalsIgnoreCase(
3588: "unix")) {
3589: consPath = consPath.replaceAll(
3590: "\\\\", "/");
3591: originalHost = originalHost
3592: .replaceAll("\\\\", "/");
3593: }
3594:
3595: if (SnapperManager.getInstance()
3596: .getRelativeIndexPaths() != null
3597: && SnapperManager
3598: .getInstance()
3599: .getRelativeIndexPaths()
3600: .equalsIgnoreCase(
3601: "true")) {
3602:
3603: if (consPath
3604: .startsWith(originalHost)) {
3605: consPath = consPath
3606: .substring(originalHost
3607: .length());
3608: if (consPath.startsWith("/"))
3609: consPath = consPath
3610: .substring(1);
3611: }
3612: }
3613:
3614: if (!reader.fileExists(consPath)) {
3615: indexDocs(new File(dir, files[q]));
3616: }
3617:
3618: reader.closeReader();
3619: } catch (Exception e) {
3620: SnapperAdmin.logError("Problem : "
3621: + e.getMessage());
3622: if (printStackTrace) {
3623: e.printStackTrace();
3624: }
3625: }
3626:
3627: } else if (indexDiference && startToIndex) {
3628: indexDocs(new File(dir, files[q]));
3629: }
3630: } catch (Exception ex) {
3631: SnapperAdmin.logError("Problem : "
3632: + ex.getMessage());
3633: if (printStackTrace) {
3634: ex.printStackTrace();
3635: }
3636: }
3637: }
3638: }
3639: }
3640: }
3641:
3642: private void indexSubfolders(File dir, String[] files) {
3643:
3644: BufferedWriter d = null;
3645: try {
3646: File include = new File(indexdir, "path.txt");
3647: File currentPathFile = new File(currentPath);
3648: if (!include.exists())
3649: include.createNewFile();
3650:
3651: FileOutputStream fis = new FileOutputStream(include);
3652: d = new BufferedWriter(new OutputStreamWriter(fis));
3653: d.write(currentPathFile.getPath() + "\n");
3654: d.write(dir.getPath());
3655:
3656: } catch (IOException e) {
3657:
3658: } finally {
3659: if (d != null) {
3660: try {
3661: d.close();
3662: } catch (IOException ioe) {
3663: }
3664: }
3665: }
3666:
3667: if (indexDiference && !startToIndex) {
3668: String temp1 = lastIndexedDirectory;
3669: String temp2 = dir.getPath();
3670:
3671: if (!temp1.endsWith(File.separator))
3672: temp1 = lastIndexedDirectory + File.separator;
3673:
3674: if (!temp2.endsWith(File.separator))
3675: temp2 = dir.getPath() + File.separator;
3676: // mora jer moze da se desi ime fildera lola i lola1 a on nesme da
3677: // prodje
3678: if (!temp1.startsWith(temp2))
3679: return;
3680:
3681: if (dir.getPath().equals(lastIndexedDirectory)) {
3682: startToIndex = true;
3683: }
3684: }
3685:
3686: if (files != null) {
3687: for (int q = 0; q < files.length; q++) {
3688: if (doStop)
3689: return;
3690: File fl = new File(dir, files[q]);
3691: if (fl.isDirectory()) {
3692: String[] fls = fl.list();
3693: if (fls != null) {
3694: java.util.Arrays.sort(fls);
3695: indexSubfolders(fl, fls);
3696: }
3697: }
3698: if (fileOK(new File(dir, files[q]))) {
3699: try {
3700: if (!indexDiference) {
3701: indexDocs(new File(dir, files[q]));
3702: } else if ((indexDiference && startToIndex)
3703: && ((lastIndexedDirectory != null && dir
3704: .getPath().equals(
3705: lastIndexedDirectory)) || (lastIndexedFile != null && dir
3706: .getPath().equals(
3707: lastIndexedFile
3708: .getParent())))) {
3709: try {
3710: Reader reader = ReaderFactory
3711: .createReader("org.enhydra.snapper.wrapper.lucene.LuceneReader");
3712: reader.setUpReader(siteName);
3713: reader.unlock();
3714:
3715: String consPath = (new File(dir,
3716: files[q])).getPath();
3717:
3718: if (SnapperManager.getInstance()
3719: .getFileSeparatorConvention() != null
3720: && SnapperManager
3721: .getInstance()
3722: .getFileSeparatorConvention()
3723: .equalsIgnoreCase(
3724: "unix")) {
3725: consPath = consPath.replaceAll(
3726: "\\\\", "/");
3727: originalHost = originalHost
3728: .replaceAll("\\\\", "/");
3729: }
3730:
3731: if (SnapperManager.getInstance()
3732: .getRelativeIndexPaths() != null
3733: && SnapperManager
3734: .getInstance()
3735: .getRelativeIndexPaths()
3736: .equalsIgnoreCase(
3737: "true")) {
3738:
3739: if (consPath
3740: .startsWith(originalHost)) {
3741: consPath = consPath
3742: .substring(originalHost
3743: .length());
3744: if (consPath.startsWith("/"))
3745: consPath = consPath
3746: .substring(1);
3747: }
3748: }
3749:
3750: if (!reader.fileExists(consPath)) {
3751: indexDocs(new File(dir, files[q]));
3752: }
3753:
3754: reader.closeReader();
3755: } catch (Exception e) {
3756: SnapperAdmin.logError("Problem : "
3757: + e.getMessage());
3758: if (printStackTrace) {
3759: e.printStackTrace();
3760: }
3761: }
3762:
3763: } else if (indexDiference && startToIndex) {
3764: indexDocs(new File(dir, files[q]));
3765: }
3766: } catch (Exception ex) {
3767: SnapperAdmin.logError("Problem : "
3768: + ex.getMessage());
3769: if (printStackTrace) {
3770: ex.printStackTrace();
3771: }
3772: }
3773: }
3774: }
3775: }
3776: }
3777:
3778: public Vector getNotIndexed() {
3779: return null;
3780: }
3781:
3782: private boolean checksize() {
3783: String[] files = indexdir.list();
3784: long length = 0;
3785: for (int q = 0; q < files.length; q++) {
3786: length += (new File(indexdir, files[q])).length();
3787: }
3788: if (length >= SnapperAdmin.getmaxIndexLength()) {
3789: SnapperAdmin.logDebug("***INDEX GREATER THAN THAN "
3790: + SnapperAdmin.getmaxIndexLength() + " bytes ***");
3791: SnapperAdmin.logInfo("Aborting index!!!");
3792: return false;
3793: }
3794: return true;
3795: }
3796:
3797: private boolean isFiltered(String filePath) {
3798: String path = filePath;
3799: path = path.replaceAll("\\\\", "/");
3800: if (filtered.contains(path)) {
3801: SnapperAdmin.logDebug("File filtered! ---> " + filePath);
3802: return true;
3803: }
3804: return false;
3805: }
3806:
3807: public void setMeta(String db, String table, String file,
3808: String key, String value) {
3809: this .metaDB = db;
3810: this .metaTable = table;
3811: this .metaFileColumn = file;
3812: this .metaKeyColumn = key;
3813: this .metaValueColumn = value;
3814: this .meta = true;
3815: }
3816:
3817: public void setInclude(String db, String table, String column,
3818: String modified) {
3819: this .includeDB = db;
3820: this .includeTable = table;
3821: this .includeFileColumn = column;
3822: this .includeModifiedColumn = modified;
3823: this .include = true;
3824: }
3825:
3826: public void setExclude(String db, String table, String column) {
3827: this .excludeDB = db;
3828: this .excludeTable = table;
3829: this .excludeFileColumn = column;
3830: this .filter = true;
3831: }
3832:
3833: public Vector getFiltered() throws Exception {
3834: BusinessUtil bu = new BusinessUtil();
3835: return bu.getFilteredFiles(excludeDB, excludeTable,
3836: excludeFileColumn);
3837: }
3838:
3839: public TreeSet getIncluded() {
3840: try {
3841: BusinessUtil bu = new BusinessUtil();
3842: return bu.getIncludedFiles(includeDB, includeTable,
3843: includeFileColumn);
3844: } catch (Exception e) {
3845: SnapperAdmin.logError("Problem : " + e.getMessage());
3846: if (printStackTrace) {
3847: e.printStackTrace();
3848: }
3849: return new TreeSet();
3850: }
3851: }
3852:
3853: public void setDoStop(boolean n) {
3854: doStop = n;
3855: }
3856:
3857: File[] lastTen = new File[10];
3858:
3859: int i = 0;
3860:
3861: File lastIndexedFile = null;
3862:
3863: public void indexInclude() {
3864:
3865: Date start = new Date();
3866:
3867: if (indexDiference) {
3868: BufferedReader d = null;
3869: try {
3870: File include = new File(indexdir, "include.txt");
3871:
3872: if (!include.exists())
3873: include.createNewFile();
3874:
3875: FileInputStream fis = new FileInputStream(include);
3876: d = new BufferedReader(new InputStreamReader(fis));
3877: lastIndexedDirectory = d.readLine();
3878:
3879: if (lastIndexedDirectory == null
3880: || lastIndexedDirectory.equals(""))
3881: startToIndex = true;
3882: else
3883: lastIndexedFile = new File(lastIndexedDirectory);
3884:
3885: } catch (IOException e) {
3886:
3887: } finally {
3888:
3889: if (d != null) {
3890: try {
3891: d.close();
3892: } catch (IOException ioe) {
3893: }
3894: d = null;
3895: }
3896: }
3897: }
3898:
3899: try {
3900: BusinessUtil bu = new BusinessUtil();
3901: TreeSet included = bu.getIncludedFiles(this .includeDB,
3902: this .includeTable, this .includeFileColumn);
3903: Iterator itr = included.iterator();
3904:
3905: if (lastIndexedFile == null) {
3906: startToIndex = true;
3907: }
3908:
3909: while (itr.hasNext()) {
3910: if (doStop)
3911: return;
3912: File fl = new File(String
3913: .valueOf(itr.next().toString()));
3914:
3915: if (indexDiference && !startToIndex) {
3916:
3917: if (i > 9)
3918: i = 0;
3919: lastTen[i] = fl;
3920: i++;
3921: if (lastIndexedFile.isFile()) {
3922: if (lastIndexedFile.equals(fl)) {
3923: startToIndex = true;
3924: manageLastTen();
3925: }
3926: continue;
3927: } else {
3928: String temp1 = lastIndexedFile.getPath();
3929: String temp2 = fl.getPath();
3930:
3931: if (!temp1.endsWith(File.separator))
3932: temp1 = temp1 + File.separator;
3933:
3934: if (!temp2.endsWith(File.separator))
3935: temp2 = temp2 + File.separator;
3936: // mora jer moze da se desi ime fildera lola i lola1 a
3937: // on nesme da prodje
3938: if (!temp1.startsWith(temp2))
3939: continue;
3940:
3941: if (temp1.equals(temp2)) {
3942: startToIndex = true;
3943: lastTen[i] = null;
3944: }
3945:
3946: }
3947: }
3948:
3949: if (fl.exists()) {
3950: if (fl.isDirectory()) {
3951: String[] fls = fl.list();
3952: if (fls != null) {
3953: java.util.Arrays.sort(fls);
3954: indexSubfoldersInclude(fl, fls);
3955: }
3956: }
3957: if (fileOK(fl)) {
3958: BufferedWriter d = null;
3959: try {
3960: File include = new File(indexdir,
3961: "include.txt");
3962:
3963: if (!include.exists())
3964: include.createNewFile();
3965:
3966: FileOutputStream fis = new FileOutputStream(
3967: include);
3968: d = new BufferedWriter(
3969: new OutputStreamWriter(fis));
3970: d.write(fl.getPath());
3971: } catch (IOException e) {
3972:
3973: } finally {
3974: if (d != null) {
3975: try {
3976: d.close();
3977: } catch (IOException ioe) {
3978: }
3979: }
3980: }
3981:
3982: if ((!indexDiference || startToIndex)
3983: && fl.getPath().equals(
3984: lastIndexedDirectory)) {
3985: try {
3986: Reader reader = ReaderFactory
3987: .createReader("org.enhydra.snapper.wrapper.lucene.LuceneReader");
3988: reader.setUpReader(siteName);
3989: reader.unlock();
3990:
3991: String consPath = fl.getPath();
3992:
3993: if (SnapperManager.getInstance()
3994: .getFileSeparatorConvention() != null
3995: && SnapperManager
3996: .getInstance()
3997: .getFileSeparatorConvention()
3998: .equalsIgnoreCase(
3999: "unix")) {
4000: consPath = consPath.replaceAll(
4001: "\\\\", "/");
4002: originalHost = originalHost
4003: .replaceAll("\\\\", "/");
4004: }
4005:
4006: if (SnapperManager.getInstance()
4007: .getRelativeIndexPaths() != null
4008: && SnapperManager
4009: .getInstance()
4010: .getRelativeIndexPaths()
4011: .equalsIgnoreCase(
4012: "true")) {
4013:
4014: if (consPath
4015: .startsWith(originalHost)) {
4016: consPath = consPath
4017: .substring(originalHost
4018: .length());
4019: if (consPath.startsWith("/"))
4020: consPath = consPath
4021: .substring(1);
4022: }
4023: }
4024:
4025: if (!reader.fileExists(consPath)) {
4026: indexDocs(fl);
4027: }
4028: reader.closeReader();
4029: } catch (Exception e) {
4030: SnapperAdmin.logError("Problem : "
4031: + e.getMessage());
4032: if (printStackTrace) {
4033: e.printStackTrace();
4034: }
4035: }
4036:
4037: } else if (!indexDiference || startToIndex) {
4038: indexDocs(fl);
4039: }
4040: }
4041: }
4042: fl = null;
4043: }
4044: included = null;
4045: bu.gc();
4046: bu = null;
4047: Date end = new Date();
4048: this .fc = null;
4049:
4050: SnapperManager.getInstance().getIndexerFactory()
4051: .removeIndexer();
4052:
4053: SnapperAdmin.logInfo(end.getTime() - start.getTime()
4054: + " total milliseconds, for include list ");
4055:
4056: File include = new File(indexdir, "include.txt");
4057:
4058: if (include.exists()) {
4059: include.delete();
4060: }
4061:
4062: } catch (Exception e) {
4063: SnapperAdmin
4064: .logError("Unexpected problem in index include list : "
4065: + e.getMessage());
4066: if (printStackTrace) {
4067: e.printStackTrace();
4068: }
4069:
4070: }
4071:
4072: }
4073:
4074: private void manageLastTen() {
4075: try {
4076: Reader reader = ReaderFactory
4077: .createReader("org.enhydra.snapper.wrapper.lucene.LuceneReader");
4078: reader.setUpReader(siteName);
4079: reader.unlock();
4080: for (int j = 0; j < 10; j++) {
4081: File temp = lastTen[j];
4082:
4083: try {
4084: if (temp != null) {
4085: if (reader.fileExists(temp.getPath())) {
4086: lastTen[j] = null;
4087: }
4088: }
4089: } catch (Exception e) {
4090: SnapperAdmin.logError("Unexpected problem : "
4091: + e.getMessage());
4092: if (printStackTrace) {
4093: e.printStackTrace();
4094: }
4095: }
4096: }
4097: reader.closeReader();
4098: } catch (Exception e) {
4099: SnapperAdmin.logError("Unexpected problem : "
4100: + e.getMessage());
4101: if (printStackTrace) {
4102: e.printStackTrace();
4103: }
4104: }
4105:
4106: for (int j = 0; j < 10; j++) {
4107: File temp = lastTen[j];
4108:
4109: if (temp != null && temp.isDirectory()) {
4110: String[] fls = temp.list();
4111: if (fls != null) {
4112: java.util.Arrays.sort(fls);
4113: indexSubfoldersInclude(temp, fls);
4114: }
4115: }
4116: if (temp != null && fileOK(temp)) {
4117: BufferedWriter d = null;
4118: try {
4119: File include = new File(indexdir, "include.txt");
4120:
4121: if (!include.exists())
4122: include.createNewFile();
4123:
4124: FileOutputStream fis = new FileOutputStream(include);
4125: d = new BufferedWriter(new OutputStreamWriter(fis));
4126: d.write(temp.getPath());
4127: } catch (IOException e) {
4128:
4129: } finally {
4130: if (d != null) {
4131: try {
4132: d.close();
4133: } catch (IOException ioe) {
4134: }
4135: }
4136: }
4137: indexDocs(temp);
4138: }
4139: }
4140: }
4141:
4142: public void setIndexContents(boolean index) {
4143: indexContent = index;
4144: }
4145:
4146: public void setIndexUnknownFileTypes(boolean index) {
4147: indexUnknownFileTypes = index;
4148: }
4149:
4150: public void setIndexDirectory(boolean index) {
4151: indexDirectory = index;
4152: }
4153:
4154: public void setIndexFileTypes(Properties prop) {
4155: this .prop = prop;
4156: }
4157:
4158: public void setIndexDiference(boolean index) {
4159: indexDiference = index;
4160: }
4161:
4162: private void writeNotIndexedDocumentToFile(String line) {
4163:
4164: BufferedOutputStream resultOS = null;
4165:
4166: try {
4167: String outputFileS = SnapperAdmin.getLogDirectory();
4168: outputFileS = outputFileS + logFileName + ".txt";
4169: File outputFile = new File(outputFileS);
4170:
4171: if (!outputFile.exists())
4172: outputFile.createNewFile();
4173:
4174: resultOS = new BufferedOutputStream(new FileOutputStream(
4175: outputFile, true));
4176: line = line + "\n";
4177: resultOS.write(line.getBytes());
4178: resultOS.flush();
4179:
4180: } catch (Exception e) {
4181: SnapperAdmin
4182: .logError("Problem ocured while trying to log not indexed file "
4183: + e.getMessage());
4184: if (printStackTrace) {
4185: e.printStackTrace();
4186: }
4187: } finally {
4188:
4189: if (resultOS != null) {
4190: try {
4191: resultOS.close();
4192: } catch (Exception e) {
4193: }
4194: resultOS = null;
4195: }
4196:
4197: }
4198: }
4199:
4200: String logFileName = "";
4201:
4202: public void setLogFileName(String logFileName) {
4203: this .logFileName = logFileName;
4204: }
4205:
4206: public void indexMetaData() {
4207:
4208: String pattern = SnapperManager.getInstance()
4209: .getDocumentUpdatePattern();
4210:
4211: try {
4212: BusinessUtil bu = new BusinessUtil();
4213: TreeSet list = bu.getMetadataList(metaDB, metaTable,
4214: metaFileColumn);
4215:
4216: Context envCtx = (Context) new InitialContext()
4217: .lookup("java:comp/env");
4218: DataSource ds = (DataSource) envCtx.lookup(metaDB);
4219:
4220: Reader reader = ReaderFactory
4221: .createReader("org.enhydra.snapper.wrapper.lucene.LuceneReader");
4222: reader.setUpReader(indexdir.getAbsolutePath());
4223:
4224: if (null != list) {
4225:
4226: Iterator itr = list.iterator();
4227:
4228: while (itr.hasNext()) {
4229:
4230: String filePath = (String) itr.next();
4231:
4232: if (null == filePath)
4233: continue;
4234:
4235: File tempFile = new File(filePath);
4236:
4237: String consPath = tempFile.getPath();
4238:
4239: if (SnapperManager.getInstance()
4240: .getFileSeparatorConvention() != null
4241: && SnapperManager.getInstance()
4242: .getFileSeparatorConvention()
4243: .equalsIgnoreCase("unix")) {
4244: consPath = consPath.replaceAll("\\\\", "/");
4245: originalHost = originalHost.replaceAll("\\\\",
4246: "/");
4247: }
4248:
4249: if (SnapperManager.getInstance()
4250: .getRelativeIndexPaths() != null
4251: && SnapperManager.getInstance()
4252: .getRelativeIndexPaths()
4253: .equalsIgnoreCase("true")) {
4254:
4255: if (consPath.startsWith(originalHost)) {
4256: consPath = consPath.substring(originalHost
4257: .length());
4258: if (consPath.startsWith("/"))
4259: consPath = consPath.substring(1);
4260: }
4261: }
4262:
4263: if (!reader.fileExists(consPath)) {
4264:
4265: StringBuffer metadata = new StringBuffer();
4266: Connection connection = null;
4267: PreparedStatement pstmt = null;
4268: ResultSet rs1 = null;
4269: // QueryBuilder qb1 = null;
4270: // DBConnection dbconn = null;
4271: // dbconn =
4272: // dbm.findLogicalDatabase(metaDB).allocateConnection();
4273: // RDBTable t = new RDBTable(metaTable);
4274: // qb1 = new QueryBuilder(metaTable);
4275: // qb1.addWhere(new RDBColumn(t, metaFileColumn),
4276: // filePath);
4277:
4278: // if (!(SnapperManager.getInstance().getFetchSize() <
4279: // 1))
4280: // qb1.setCurrentFetchSize(SnapperManager.getInstance().getFetchSize());
4281:
4282: // rs1 = qb1.executeQuery(dbconn);
4283:
4284: try {
4285: connection = ds.getConnection();
4286: filePath = filePath.replaceAll("'", "''");
4287: String sql_query = "SELECT "
4288: + metaKeyColumn + ","
4289: + metaValueColumn + " FROM "
4290: + metaTable + " WHERE "
4291: + metaFileColumn + "='" + filePath
4292: + "'";
4293: pstmt = connection
4294: .prepareStatement(sql_query);
4295: if (!(SnapperManager.getInstance()
4296: .getFetchSize() < 1))
4297: pstmt.setFetchSize(SnapperManager
4298: .getInstance().getFetchSize());
4299: rs1 = pstmt.executeQuery();
4300:
4301: String title = tempFile.getName();
4302: String content = "";
4303: String modified = "";
4304:
4305: if (null != rs1) {
4306: while (rs1.next()) {
4307:
4308: String key = rs1
4309: .getString(metaKeyColumn);
4310:
4311: if (null != key)
4312: key = key.trim();
4313: else
4314: key = "";
4315:
4316: String value = rs1
4317: .getString(metaValueColumn);
4318:
4319: if (null != value)
4320: value = value.trim();
4321: else
4322: value = "";
4323:
4324: metadata.append(key + " = " + value
4325: + " \n ");
4326:
4327: if (key.equals(SnapperManager
4328: .getInstance()
4329: .getDocumentLogicalName())) {
4330: String ln = SnapperManager
4331: .getInstance()
4332: .getLogicalNameFromDatabase();
4333: if (ln.equals("1"))
4334: title = value;
4335: }
4336:
4337: if (key.equals(SnapperManager
4338: .getInstance()
4339: .getDocumentUpdate()))
4340: modified = value;
4341:
4342: }
4343: }
4344:
4345: long modifiedLong = System
4346: .currentTimeMillis();
4347:
4348: try {
4349: DateFormat df;
4350:
4351: if (pattern == null
4352: || pattern.equalsIgnoreCase(""))
4353: df = new SimpleDateFormat();
4354: else
4355: df = new SimpleDateFormat(pattern);
4356: df.setTimeZone(TimeZone
4357: .getTimeZone("GMT"));
4358: if (!modified.equals("")) {
4359: Date modifDate = df.parse(modified);
4360: modifiedLong = modifDate.getTime();
4361: }
4362:
4363: } catch (Exception ex) {
4364: SnapperAdmin
4365: .logWarrning("Problem in index Meta Data, transformation of time :"
4366: + ex.getMessage());
4367: if (printStackTrace) {
4368: ex.printStackTrace();
4369: }
4370: }
4371:
4372: if (SnapperAdmin.getMountTitleInContent())
4373: content = title;
4374: if (SnapperAdmin
4375: .getMountFilePathInContent()) {
4376: String formated = consPath;
4377: if (formated.indexOf("|") != -1)
4378: formated = formated.substring(0,
4379: formated.indexOf("|"));
4380: content += " " + formated;
4381: }
4382: if (SnapperAdmin
4383: .getMountMetaDataInContent())
4384: content += " " + metadata.toString();
4385:
4386: indexer.indexDoc(modifiedLong, -1, -1,
4387: consPath, content, "NULL", title,
4388: "", metadata.toString(), tempFile
4389: .getName(), -1, -1, null,
4390: null, null, null, null, -1, null,
4391: null, null);
4392:
4393: } catch (Exception ex) {
4394: SnapperAdmin
4395: .logWarrning("Problem in index Meta Data document : "
4396: + ex.getMessage());
4397: if (printStackTrace) {
4398: ex.printStackTrace();
4399: }
4400: } finally {
4401: try {
4402: if (rs1 != null)
4403: rs1.close();
4404: } catch (SQLException e) {
4405: }
4406: try {
4407: if (pstmt != null)
4408: pstmt.close();
4409: } catch (SQLException e) {
4410: }
4411: try {
4412: if (connection != null)
4413: connection.close();
4414: } catch (SQLException e) {
4415: }
4416: }
4417: }
4418: }
4419:
4420: } else {
4421: SnapperAdmin.logWarrning(" Result set is empty !");
4422: }
4423: } catch (Exception ex) {
4424: SnapperAdmin.logWarrning("Problem in index Meta Data :"
4425: + ex.getMessage());
4426: if (printStackTrace) {
4427: ex.printStackTrace();
4428: }
4429: }
4430:
4431: }
4432:
4433: }
|