01: package org.apache.lucene.analysis.snowball;
02:
03: /**
04: * Licensed to the Apache Software Foundation (ASF) under one or more
05: * contributor license agreements. See the NOTICE file distributed with
06: * this work for additional information regarding copyright ownership.
07: * The ASF licenses this file to You under the Apache License, Version 2.0
08: * (the "License"); you may not use this file except in compliance with
09: * the License. You may obtain a copy of the License at
10: *
11: * http://www.apache.org/licenses/LICENSE-2.0
12: *
13: * Unless required by applicable law or agreed to in writing, software
14: * distributed under the License is distributed on an "AS IS" BASIS,
15: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16: * See the License for the specific language governing permissions and
17: * limitations under the License.
18: */
19:
20: import java.io.IOException;
21:
22: import java.lang.reflect.Method;
23:
24: import net.sf.snowball.SnowballProgram;
25: import net.sf.snowball.ext.*;
26:
27: import org.apache.lucene.analysis.Token;
28: import org.apache.lucene.analysis.TokenFilter;
29: import org.apache.lucene.analysis.TokenStream;
30:
31: /** A filter that stems words using a Snowball-generated stemmer.
32: *
33: * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a
34: * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
35: * {@link EnglishStemmer} is named "English".
36: */
37:
38: public class SnowballFilter extends TokenFilter {
39: private static final Object[] EMPTY_ARGS = new Object[0];
40:
41: private SnowballProgram stemmer;
42: private Method stemMethod;
43:
44: /** Construct the named stemming filter.
45: *
46: * @param in the input tokens to stem
47: * @param name the name of a stemmer
48: */
49: public SnowballFilter(TokenStream in, String name) {
50: super (in);
51: try {
52: Class stemClass = Class.forName("net.sf.snowball.ext."
53: + name + "Stemmer");
54: stemmer = (SnowballProgram) stemClass.newInstance();
55: // why doesn't the SnowballProgram class have an (abstract?) stem method?
56: stemMethod = stemClass.getMethod("stem", new Class[0]);
57: } catch (Exception e) {
58: throw new RuntimeException(e.toString());
59: }
60: }
61:
62: /** Returns the next input Token, after being stemmed */
63: public final Token next() throws IOException {
64: Token token = input.next();
65: if (token == null)
66: return null;
67: stemmer.setCurrent(token.termText());
68: try {
69: stemMethod.invoke(stemmer, EMPTY_ARGS);
70: } catch (Exception e) {
71: throw new RuntimeException(e.toString());
72: }
73:
74: Token newToken = new Token(stemmer.getCurrent(), token
75: .startOffset(), token.endOffset(), token.type());
76: newToken.setPositionIncrement(token.getPositionIncrement());
77: return newToken;
78: }
79: }
|