01: package org.apache.lucene.analysis.th;
02:
03: /**
04: * Licensed to the Apache Software Foundation (ASF) under one or more
05: * contributor license agreements. See the NOTICE file distributed with
06: * this work for additional information regarding copyright ownership.
07: * The ASF licenses this file to You under the Apache License, Version 2.0
08: * (the "License"); you may not use this file except in compliance with
09: * the License. You may obtain a copy of the License at
10: *
11: * http://www.apache.org/licenses/LICENSE-2.0
12: *
13: * Unless required by applicable law or agreed to in writing, software
14: * distributed under the License is distributed on an "AS IS" BASIS,
15: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16: * See the License for the specific language governing permissions and
17: * limitations under the License.
18: */
19:
20: import java.io.StringReader;
21: import junit.framework.TestCase;
22: import org.apache.lucene.analysis.Analyzer;
23: import org.apache.lucene.analysis.Token;
24: import org.apache.lucene.analysis.TokenStream;
25:
26: /**
27: * Test case for ThaiAnalyzer, modified from TestFrenchAnalyzer
28: *
29: * @author Samphan Raruenrom <samphan@osdev.co.th>
30: * @version 0.1
31: */
32:
33: public class TestThaiAnalyzer extends TestCase {
34:
35: public void assertAnalyzesTo(Analyzer a, String input,
36: String[] output) throws Exception {
37:
38: TokenStream ts = a
39: .tokenStream("dummy", new StringReader(input));
40:
41: for (int i = 0; i < output.length; i++) {
42: Token t = ts.next();
43: assertNotNull(t);
44: assertEquals(t.termText(), output[i]);
45: }
46: assertNull(ts.next());
47: ts.close();
48: }
49:
50: public void testAnalyzer() throws Exception {
51: ThaiAnalyzer analyzer = new ThaiAnalyzer();
52:
53: assertAnalyzesTo(analyzer, "", new String[] {});
54:
55: assertAnalyzesTo(
56: analyzer,
57: "à¸?ารที่ได้ต้à¸à¸‡à¹?สดงว่างานดี",
58: new String[] { "�าร", "ที่", "ได้",
59: "ต้à¸à¸‡", "à¹?สดง", "ว่า",
60: "งาน", "ดี" });
61:
62: assertAnalyzesTo(
63: analyzer,
64: "บริษัทชื่ภXY&Z - คุย�ับ xyz@demo.com",
65: new String[] { "บริษัท", "ชื่à¸",
66: "xy&z", "คุย", "�ับ",
67: "xyz@demo.com" });
68:
69: // English stop words
70: assertAnalyzesTo(
71: analyzer,
72: "ประโยคว่า The quick brown fox jumped over the lazy dogs",
73: new String[] { "ประโยค", "ว่า",
74: "quick", "brown", "fox", "jumped", "over",
75: "lazy", "dogs" });
76: }
77: }
|