001: /*
002: * Copyright 2003-2005 Michael Franken, Zilverline.
003: *
004: * The contents of this file, or the files included with this file, are subject to
005: * the current version of ZILVERLINE Collaborative Source License for the
006: * Zilverline Search Engine (the "License"); You may not use this file except in
007: * compliance with the License.
008: *
009: * You may obtain a copy of the License at
010: *
011: * http://www.zilverline.org.
012: *
013: * See the License for the rights, obligations and
014: * limitations governing use of the contents of the file.
015: *
016: * The Original and Upgraded Code is the Zilverline Search Engine. The developer of
017: * the Original and Upgraded Code is Michael Franken. Michael Franken owns the
018: * copyrights in the portions it created. All Rights Reserved.
019: *
020: */
021:
022: package org.zilverline.service;
023:
024: import java.io.File;
025: import java.util.Map;
026: import java.util.Properties;
027:
028: import org.apache.commons.logging.Log;
029: import org.apache.commons.logging.LogFactory;
030:
031: import org.springframework.test.AbstractDependencyInjectionSpringContextTests;
032:
033: import org.zilverline.core.ExtractorFactory;
034: import org.zilverline.core.FileSystemCollection;
035: import org.zilverline.core.Handler;
036: import org.zilverline.core.IndexException;
037: import org.zilverline.util.FileUtils;
038:
039: /**
040: * IndexServiceTest
041: *
042: * @author Michael Franken
043: */
044: public class TestIndexService extends
045: AbstractDependencyInjectionSpringContextTests {
046:
047: /** logger for Commons logging. */
048: private static Log log = LogFactory.getLog(TestSearchService.class);
049:
050: protected String[] getConfigLocations() {
051: return new String[] { "applicationContext.xml" };
052: }
053:
054: public final void testDoIndex() {
055: IndexServiceImpl service = (IndexServiceImpl) applicationContext
056: .getBean("indexService");
057: assertNotNull(service);
058: try {
059: File contentDirectory = new File("test\\data\\");
060:
061: assertTrue(contentDirectory.exists());
062: assertTrue(contentDirectory.isDirectory());
063:
064: CollectionManager manager = new CollectionManagerImpl();
065: File tempDirectory = new File(System
066: .getProperty("java.io.tmpdir"), "zilverline");
067: File cacheBaseDir = new File(tempDirectory, "cache");
068: File indexBaseDir = new File(tempDirectory, "index");
069:
070: manager.setCacheBaseDir(cacheBaseDir);
071: manager.setIndexBaseDir(indexBaseDir);
072:
073: FileSystemCollection col1 = new FileSystemCollection();
074:
075: col1.setName("test");
076: assertEquals("test", col1.getName());
077: col1.setContentDir(contentDirectory);
078: col1.setKeepCache(true);
079: col1
080: .setDescription("A number of different file formats for testing purposes");
081: manager.addCollection(col1);
082: log.debug(col1.getName() + " has ^^^^^^^^^^ "
083: + col1.getNumberOfDocs() + " documents.");
084: col1.init();
085: log.debug(col1.getName() + " has ^^^^^^^^^^ "
086: + col1.getNumberOfDocs() + " documents.");
087:
088: assertEquals(col1.getContentDir().getAbsolutePath(),
089: contentDirectory.getAbsolutePath());
090: assertEquals(col1.getCacheDirWithManagerDefaults()
091: .getAbsolutePath(), new File(cacheBaseDir, "test")
092: .getAbsolutePath());
093: assertEquals(col1.getIndexDirWithManagerDefaults()
094: .getAbsolutePath(), new File(indexBaseDir, "test")
095: .getAbsolutePath());
096:
097: assertTrue(col1.existsOnDisk());
098:
099: // remove the index and cache
100: File indexDir = col1.getIndexDirWithManagerDefaults();
101: File cacheDir = col1.getCacheDirWithManagerDefaults();
102:
103: if (indexDir.exists()) {
104: assertTrue(FileUtils.removeDir(indexDir));
105: }
106:
107: if (cacheDir.exists()) {
108: assertTrue(FileUtils.removeDir(cacheDir));
109: }
110:
111: Properties extprops = new Properties();
112:
113: ExtractorFactory ef = new ExtractorFactory();
114:
115: ef.setCaseSensitive(false);
116:
117: // add handler extractors
118: extprops.put("pdf",
119: "org.zilverline.extractors.PDFExtractor");
120: extprops.put("doc",
121: "org.zilverline.extractors.WordExtractor");
122: extprops.put("rtf",
123: "org.zilverline.extractors.RTFExtractor");
124: extprops.put("html",
125: "org.zilverline.extractors.HTMLExtractor");
126: extprops.put("htm",
127: "org.zilverline.extractors.HTMLExtractor");
128: extprops.put("txt",
129: "org.zilverline.extractors.TextExtractor");
130: extprops.put("xls",
131: "org.zilverline.extractors.ExcelExtractor");
132: extprops.put("ppt",
133: "org.zilverline.extractors.PowerPointExtractor");
134: ef.setMappings(extprops);
135:
136: col1.init();
137: assertEquals(0, col1.getNumberOfDocs());
138: assertFalse(col1.isIndexValid());
139:
140: // define archive handler, no handlers yet
141: Handler handler = new Handler();
142: Map props = new Properties();
143: handler.setCaseSensitive(false);
144: handler.setMappings(props);
145: manager.setArchiveHandler(handler);
146:
147: manager.setFactory(ef);
148: service.setCollectionManager(manager);
149: service.doIndex(new String[] { col1.getName() }, true);
150: while (col1.isIndexingInProgress()) {
151: Thread.sleep(100);
152: log.debug("zzz");
153: }
154: assertTrue(col1.isIndexValid());
155:
156: // no archive handler, so no unpacking of archives
157: // 8 docs: a doc, a rtf, a pdf, a html, a txt, a xls, a ppt and a txt in a dir
158: int num = 8;
159:
160: assertEquals(num, col1.getNumberOfDocs());
161: assertFalse(cacheDir.exists());
162:
163: // add handler for zip
164: props.put("zip", "");
165: handler.setCaseSensitive(false);
166: handler.setMappings(props);
167: service.doIndex(new String[] { col1.getName() }, true);
168: while (col1.isIndexingInProgress()) {
169: Thread.sleep(100);
170: log.debug("zzz");
171: }
172:
173: // 4 files in: a zip, a zip in zip, a zip in a dir, and a zip with a dir.
174: // Two of them are identical, so 1 gets skipped. Zips themselves are added as well
175: num += 6;
176: assertEquals("Testing zip support: ", num, col1
177: .getNumberOfDocs());
178:
179: // add handler for rar
180: props.put("RAR", "UnRar.exe x -o+ -inul");
181: handler.setMappings(props);
182: assertTrue("Testing casesensitivity of mappings", handler
183: .getMappings().containsKey("rar"));
184: service.doIndex(new String[] { col1.getName() }, true);
185: while (col1.isIndexingInProgress()) {
186: Thread.sleep(100);
187: log.debug("zzz");
188: }
189:
190: // 2 more file in rar: 1 duplicate
191: num += 2;
192:
193: // Using funny name: "some RAR file with a very nasty long filename.rar" accepted
194: assertEquals("Testing rar support: ", num, col1
195: .getNumberOfDocs());
196: assertTrue(cacheDir.exists());
197:
198: // add handler for chm
199: props.put("CHM", "hh.exe -decompile .");
200: handler.setCaseSensitive(true);
201: handler.setMappings(props);
202: assertFalse("Testing casesensitivity of mappings", handler
203: .getMappings().containsKey("chm"));
204:
205: //props.put("chm", "hh.exe -decompile .");
206: handler.setCaseSensitive(false);
207: handler.setMappings(props);
208: assertTrue("Testing casesensitivity of mappings", handler
209: .getMappings().containsKey("chm"));
210: service.doIndex(new String[] { col1.getName() }, true);
211: while (col1.isIndexingInProgress()) {
212: Thread.sleep(100);
213: log.debug("zzz");
214: }
215:
216: // 13 individual files in "Another compiled help file with a longer
217: // name than any file.chm"
218: num += 14;
219: assertEquals("Testing chm support: ", num, col1
220: .getNumberOfDocs());
221:
222: // add handler for rar
223: props.put("hlp", "d:\\helpdeco\\helpdeco.exe /r /y /n");
224: handler.setMappings(props);
225: assertTrue("Testing casesensitivity of mappings", handler
226: .canUnPack("HLP"));
227: service.doIndex(new String[] { col1.getName() }, true);
228: while (col1.isIndexingInProgress()) {
229: Thread.sleep(100);
230: log.debug("zzz");
231: }
232:
233: // 1 more file EXNED31.HLP: archive and extracted RTF
234: num += 2;
235:
236: assertEquals("Testing hlp support: ", num, col1
237: .getNumberOfDocs());
238: assertTrue(cacheDir.exists());
239:
240: assertTrue("Should delete tempDirectory: " + tempDirectory,
241: FileUtils.removeDir(tempDirectory));
242: } catch (Exception e) {
243: fail("Should not happen, Exception: " + e.getMessage());
244: }
245: }
246:
247: public void testIndex() {
248: IndexServiceImpl service = (IndexServiceImpl) applicationContext
249: .getBean("indexService");
250: assertNotNull(service);
251: try {
252: service.reIndex();
253: service.index();
254: service.store();
255: } catch (IndexException e) {
256: fail(e.getMessage());
257: }
258: }
259:
260: // public final void testPerformanceDoIndex() {
261: // try {
262: // File contentDirectory = new File("d:/spring-framework-1.2.7/docs");
263: //
264: // CollectionManager manager = new CollectionManagerImpl();
265: // File tempDirectory = new File(System.getProperty("java.io.tmpdir"), "zilverline");
266: // File cacheBaseDir = new File(tempDirectory, "cache");
267: // File indexBaseDir = new File(tempDirectory, "index");
268: //
269: // manager.setCacheBaseDir(cacheBaseDir);
270: // manager.setIndexBaseDir(indexBaseDir);
271: //
272: // FileSystemCollection col1 = new FileSystemCollection();
273: //
274: // col1.setName("test");
275: // assertEquals("test", col1.getName());
276: // col1.setContentDir(contentDirectory);
277: // col1.setDescription("A number of HTML for performance testing purposes");
278: // manager.addCollection(col1);
279: // assertTrue(col1.existsOnDisk());
280: //
281: // // remove the index and cache
282: // File indexDir = col1.getIndexDirWithManagerDefaults();
283: // File cacheDir = col1.getCacheDirWithManagerDefaults();
284: //
285: // if (indexDir.exists()) {
286: // assertTrue(FileUtils.removeDir(indexDir));
287: // }
288: //
289: // if (cacheDir.exists()) {
290: // assertTrue(FileUtils.removeDir(cacheDir));
291: // }
292: //
293: // Properties extprops = new Properties();
294: //
295: // ExtractorFactory ef = new ExtractorFactory();
296: //
297: // ef.setCaseSensitive(false);
298: //
299: // // add handler extractors
300: // String exStr = "org.zilverline.extractors.HTMLExtractor"; // 1:05
301: // //String exStr = "org.zilverline.extractors.HTMLLuceneExtractor"; // 1:19
302: // //String exStr = "org.zilverline.extractors.HTMLJTidyExtractor"; // 2:16
303: // extprops.put("html", exStr);
304: // extprops.put("htm", exStr);
305: // ef.setMappings(extprops);
306: //
307: // col1.init();
308: // assertEquals(0, col1.getNumberOfDocs());
309: // assertFalse(col1.isIndexValid());
310: //
311: // // define archive handler, no handlers yet
312: // Handler handler = new Handler();
313: // Map props = new Properties();
314: // handler.setCaseSensitive(false);
315: // handler.setMappings(props);
316: // manager.setArchiveHandler(handler);
317: //
318: // manager.setFactory(ef);
319: // col1.index(true);
320: // assertTrue(col1.isIndexValid());
321: //
322: // assertEquals("Testing chm support: ", 2930, col1.getNumberOfDocs());
323: // assertTrue("Should delete tempDirectory: " + tempDirectory, FileUtils.removeDir(tempDirectory));
324: // }
325: // catch (Exception e) {
326: // fail("Should not happen, Exception: " + e.getMessage());
327: // }
328: // }
329:
330: }
|