01: package org.apache.lucene.analysis.el;
02:
03: /**
04: * Copyright 2005 The Apache Software Foundation
05: *
06: * Licensed under the Apache License, Version 2.0 (the "License");
07: * you may not use this file except in compliance with the License.
08: * You may obtain a copy of the License at
09: *
10: * http://www.apache.org/licenses/LICENSE-2.0
11: *
12: * Unless required by applicable law or agreed to in writing, software
13: * distributed under the License is distributed on an "AS IS" BASIS,
14: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15: * See the License for the specific language governing permissions and
16: * limitations under the License.
17: */
18:
19: import java.io.StringReader;
20:
21: import org.apache.lucene.analysis.Analyzer;
22: import org.apache.lucene.analysis.Token;
23: import org.apache.lucene.analysis.TokenStream;
24:
25: import junit.framework.TestCase;
26:
27: /**
28: * A unit test class for verifying the correct operation of the GreekAnalyzer.
29: *
30: * @author past
31: */
32: public class GreekAnalyzerTest extends TestCase {
33:
34: /**
35: * A helper method copied from org.apache.lucene.analysis.TestAnalyzers.
36: *
37: * @param a the Analyzer to test
38: * @param input an input String to analyze
39: * @param output a String[] with the results of the analysis
40: * @throws Exception in case an error occurs
41: */
42: private void assertAnalyzesTo(Analyzer a, String input,
43: String[] output) throws Exception {
44: TokenStream ts = a
45: .tokenStream("dummy", new StringReader(input));
46: for (int i = 0; i < output.length; i++) {
47: Token t = ts.next();
48: assertNotNull(t);
49: assertEquals(t.termText(), output[i]);
50: }
51: assertNull(ts.next());
52: ts.close();
53: }
54:
55: /**
56: * Test the analysis of various greek strings.
57: *
58: * @throws Exception in case an error occurs
59: */
60: public void testAnalyzer() throws Exception {
61: Analyzer a = new GreekAnalyzer();
62: // Verify the correct analysis of capitals and small accented letters
63: assertAnalyzesTo(
64: a,
65: "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
66: new String[] {
67: "\u03bc\u03b9\u03b1",
68: "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1",
69: "\u03ba\u03b1\u03bb\u03b7",
70: "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1",
71: "\u03c3\u03b5\u03b9\u03c1\u03b1",
72: "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
73: "\u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03b7\u03c3",
74: "\u03b3\u03bb\u03c9\u03c3\u03c3\u03b1\u03c3" });
75: // Verify the correct analysis of small letters with diaeresis and the elimination
76: // of punctuation marks
77: assertAnalyzesTo(
78: a,
79: "\u03a0\u03c1\u03bf\u03ca\u03cc\u03bd\u03c4\u03b1 (\u03ba\u03b1\u03b9) [\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03ad\u03c2] - \u0391\u039d\u0391\u0393\u039a\u0395\u03a3",
80: new String[] {
81: "\u03c0\u03c1\u03bf\u03b9\u03bf\u03bd\u03c4\u03b1",
82: "\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03b5\u03c3",
83: "\u03b1\u03bd\u03b1\u03b3\u03ba\u03b5\u03c3" });
84: // Verify the correct analysis of capital accented letters and capitalletters with diaeresis,
85: // as well as the elimination of stop words
86: assertAnalyzesTo(
87: a,
88: "\u03a0\u03a1\u039f\u03ab\u03a0\u039f\u0398\u0395\u03a3\u0395\u0399\u03a3 \u0386\u03c8\u03bf\u03b3\u03bf\u03c2, \u03bf \u03bc\u03b5\u03c3\u03c4\u03cc\u03c2 \u03ba\u03b1\u03b9 \u03bf\u03b9 \u03ac\u03bb\u03bb\u03bf\u03b9",
89: new String[] {
90: "\u03c0\u03c1\u03bf\u03c5\u03c0\u03bf\u03b8\u03b5\u03c3\u03b5\u03b9\u03c3",
91: "\u03b1\u03c8\u03bf\u03b3\u03bf\u03c3",
92: "\u03bc\u03b5\u03c3\u03c4\u03bf\u03c3",
93: "\u03b1\u03bb\u03bb\u03bf\u03b9" });
94: }
95:
96: }
|