01: package org.apache.lucene.analysis.fr;
02:
03: /**
04: * Licensed to the Apache Software Foundation (ASF) under one or more
05: * contributor license agreements. See the NOTICE file distributed with
06: * this work for additional information regarding copyright ownership.
07: * The ASF licenses this file to You under the Apache License, Version 2.0
08: * (the "License"); you may not use this file except in compliance with
09: * the License. You may obtain a copy of the License at
10: *
11: * http://www.apache.org/licenses/LICENSE-2.0
12: *
13: * Unless required by applicable law or agreed to in writing, software
14: * distributed under the License is distributed on an "AS IS" BASIS,
15: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16: * See the License for the specific language governing permissions and
17: * limitations under the License.
18: */
19:
20: import java.io.IOException;
21: import java.io.StringReader;
22: import java.util.ArrayList;
23: import java.util.HashSet;
24: import java.util.List;
25: import java.util.Set;
26:
27: import junit.framework.TestCase;
28:
29: import org.apache.lucene.analysis.Token;
30: import org.apache.lucene.analysis.TokenFilter;
31: import org.apache.lucene.analysis.Tokenizer;
32: import org.apache.lucene.analysis.standard.StandardTokenizer;
33:
34: /**
35: * @author Mathieu Lecarme<mlecarme@openwide.fr>
36: *
37: */
38: public class TestElision extends TestCase {
39:
40: public void testElision() {
41: String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
42: Tokenizer tokenizer = new StandardTokenizer(new StringReader(
43: test));
44: Set articles = new HashSet();
45: articles.add("l");
46: articles.add("M");
47: TokenFilter filter = new ElisionFilter(tokenizer, articles);
48: List tas = filtre(filter);
49: assertEquals("embrouille", tas.get(4));
50: assertEquals("O'brian", tas.get(6));
51: assertEquals("enfin", tas.get(7));
52: }
53:
54: private List filtre(TokenFilter filter) {
55: List tas = new ArrayList();
56: try {
57: boolean encore = true;
58: Token token;
59: while (encore) {
60: token = filter.next();
61: encore = token != null;
62: if (token != null)
63: tas.add(token.termText());
64: }
65: } catch (IOException e) {
66: e.printStackTrace();
67: }
68: return tas;
69: }
70:
71: }
|