01: package org.apache.lucene.analysis.ru;
02:
03: /**
04: * Licensed to the Apache Software Foundation (ASF) under one or more
05: * contributor license agreements. See the NOTICE file distributed with
06: * this work for additional information regarding copyright ownership.
07: * The ASF licenses this file to You under the Apache License, Version 2.0
08: * (the "License"); you may not use this file except in compliance with
09: * the License. You may obtain a copy of the License at
10: *
11: * http://www.apache.org/licenses/LICENSE-2.0
12: *
13: * Unless required by applicable law or agreed to in writing, software
14: * distributed under the License is distributed on an "AS IS" BASIS,
15: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16: * See the License for the specific language governing permissions and
17: * limitations under the License.
18: */
19:
20: import junit.framework.TestCase;
21:
22: import java.io.BufferedReader;
23: import java.io.File;
24: import java.io.InputStreamReader;
25: import java.io.FileInputStream;
26: import java.util.ArrayList;
27:
28: public class TestRussianStem extends TestCase {
29: private ArrayList words = new ArrayList();
30: private ArrayList stems = new ArrayList();
31:
32: public TestRussianStem(String name) {
33: super (name);
34: }
35:
36: /**
37: * @see TestCase#setUp()
38: */
39: protected void setUp() throws Exception {
40: super .setUp();
41: //System.out.println(new java.util.Date());
42: String str;
43:
44: File dataDir = new File(System.getProperty("dataDir", "./bin"));
45:
46: // open and read words into an array list
47: BufferedReader inWords = new BufferedReader(
48: new InputStreamReader(
49: new FileInputStream(
50: new File(dataDir,
51: "/org/apache/lucene/analysis/ru/wordsUnicode.txt")),
52: "Unicode"));
53: while ((str = inWords.readLine()) != null) {
54: words.add(str);
55: }
56: inWords.close();
57:
58: // open and read stems into an array list
59: BufferedReader inStems = new BufferedReader(
60: new InputStreamReader(
61: new FileInputStream(
62: new File(dataDir,
63: "/org/apache/lucene/analysis/ru/stemsUnicode.txt")),
64: "Unicode"));
65: while ((str = inStems.readLine()) != null) {
66: stems.add(str);
67: }
68: inStems.close();
69: }
70:
71: /**
72: * @see TestCase#tearDown()
73: */
74: protected void tearDown() throws Exception {
75: super .tearDown();
76: }
77:
78: public void testStem() {
79: for (int i = 0; i < words.size(); i++) {
80: //if ( (i % 100) == 0 ) System.err.println(i);
81: String realStem = RussianStemmer.stem(
82: (String) words.get(i),
83: RussianCharsets.UnicodeRussian);
84: assertEquals("unicode", stems.get(i), realStem);
85: }
86: }
87:
88: }
|