01: package org.apache.lucene;
02:
03: /**
04: * Licensed to the Apache Software Foundation (ASF) under one or more
05: * contributor license agreements. See the NOTICE file distributed with
06: * this work for additional information regarding copyright ownership.
07: * The ASF licenses this file to You under the Apache License, Version 2.0
08: * (the "License"); you may not use this file except in compliance with
09: * the License. You may obtain a copy of the License at
10: *
11: * http://www.apache.org/licenses/LICENSE-2.0
12: *
13: * Unless required by applicable law or agreed to in writing, software
14: * distributed under the License is distributed on an "AS IS" BASIS,
15: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16: * See the License for the specific language governing permissions and
17: * limitations under the License.
18: */
19:
20: import org.apache.lucene.analysis.SimpleAnalyzer;
21: import org.apache.lucene.analysis.Analyzer;
22: import org.apache.lucene.analysis.TokenStream;
23: import org.apache.lucene.analysis.Token;
24:
25: import java.io.Reader;
26: import java.io.StringReader;
27: import java.io.File;
28: import java.io.FileInputStream;
29: import java.io.BufferedReader;
30: import java.io.InputStreamReader;
31: import java.util.Date;
32:
33: class AnalysisTest {
34: public static void main(String[] args) {
35: try {
36: test("This is a test", true);
37: // FIXME: OG: what's with this hard-coded file name??
38: test(new File("words.txt"), false);
39: } catch (Exception e) {
40: System.out.println(" caught a " + e.getClass()
41: + "\n with message: " + e.getMessage());
42: }
43: }
44:
45: static void test(File file, boolean verbose) throws Exception {
46: long bytes = file.length();
47: System.out.println(" Reading test file containing " + bytes
48: + " bytes.");
49:
50: FileInputStream is = new FileInputStream(file);
51: BufferedReader ir = new BufferedReader(
52: new InputStreamReader(is));
53:
54: test(ir, verbose, bytes);
55:
56: ir.close();
57: }
58:
59: static void test(String text, boolean verbose) throws Exception {
60: System.out.println(" Tokenizing string: " + text);
61: test(new StringReader(text), verbose, text.length());
62: }
63:
64: static void test(Reader reader, boolean verbose, long bytes)
65: throws Exception {
66: Analyzer analyzer = new SimpleAnalyzer();
67: TokenStream stream = analyzer.tokenStream(null, reader);
68:
69: Date start = new Date();
70:
71: int count = 0;
72: for (Token t = stream.next(); t != null; t = stream.next()) {
73: if (verbose) {
74: System.out.println("Text=" + t.termText() + " start="
75: + t.startOffset() + " end=" + t.endOffset());
76: }
77: count++;
78: }
79:
80: Date end = new Date();
81:
82: long time = end.getTime() - start.getTime();
83: System.out.println(time + " milliseconds to extract " + count
84: + " tokens");
85: System.out.println((time * 1000.0) / count
86: + " microseconds/token");
87: System.out.println((bytes * 1000.0 * 60.0 * 60.0)
88: / (time * 1000000.0) + " megabytes/hour");
89: }
90: }
|