01: package org.apache.lucene.analysis;
02:
03: import java.io.BufferedInputStream;
04: import java.io.File;
05: import java.io.FileInputStream;
06: import java.io.StringReader;
07: import java.net.URLDecoder;
08: import java.util.ArrayList;
09: import java.util.List;
10:
11: import junit.framework.TestCase;
12:
13: import org.contineo.core.searchengine.crawler.LuceneAnalyzerFactory;
14:
15: public class AnalyzeFileTest extends TestCase {
16:
17: private Analyzer sbitAnal;
18:
19: public AnalyzeFileTest() {
20: sbitAnal = LuceneAnalyzerFactory.getAnalyzer("it");
21: }
22:
23: /**
24: * A helper method that analizes a string
25: *
26: * @param a the Analyzer to use
27: * @param input an input String to analyze
28: * @throws Exception in case an error occurs
29: */
30: private String[] getAnalysisResult(
31: org.apache.lucene.analysis.Analyzer a, String input)
32: throws Exception {
33: TokenStream ts = a
34: .tokenStream("dummy", new StringReader(input));
35: List<String> resultList = new ArrayList<String>();
36: while (true) {
37: Token token = ts.next();
38: if (token == null)
39: break;
40: resultList.add(token.termText());
41: }
42: return resultList.toArray(new String[0]);
43: }
44:
45: public void testSnowballAnalyzer() throws Exception {
46:
47: File file = new File(URLDecoder.decode(getClass()
48: .getClassLoader()
49: .getResource("AnalyzeFileTest_enc.txt").getPath(),
50: "UTF-8"));
51: BufferedInputStream bis = new BufferedInputStream(
52: new FileInputStream(file));
53: StringBuffer content = new StringBuffer();
54: int ichar = 0;
55:
56: while ((ichar = bis.read()) > 0) {
57: content.append((char) ichar);
58: }
59:
60: long start = System.currentTimeMillis();
61: String[] result2 = getAnalysisResult(sbitAnal, content
62: .toString());
63: long end = System.currentTimeMillis() - start;
64: System.out.println("Elab time millis: " + end);
65:
66: for (String token : result2) {
67: System.out.println(token);
68: }
69:
70: assertTrue(result2.length > 0);
71: }
72:
73: }
|