01: package org.apache.lucene.analysis.snowball;
02:
03: /**
04: * Licensed to the Apache Software Foundation (ASF) under one or more
05: * contributor license agreements. See the NOTICE file distributed with
06: * this work for additional information regarding copyright ownership.
07: * The ASF licenses this file to You under the Apache License, Version 2.0
08: * (the "License"); you may not use this file except in compliance with
09: * the License. You may obtain a copy of the License at
10: *
11: * http://www.apache.org/licenses/LICENSE-2.0
12: *
13: * Unless required by applicable law or agreed to in writing, software
14: * distributed under the License is distributed on an "AS IS" BASIS,
15: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16: * See the License for the specific language governing permissions and
17: * limitations under the License.
18: */
19:
20: import org.apache.lucene.analysis.*;
21: import org.apache.lucene.analysis.standard.*;
22:
23: import net.sf.snowball.ext.*;
24:
25: import java.io.Reader;
26: import java.util.Set;
27:
28: /** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
29: * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}.
30: *
31: * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a
32: * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
33: * {@link EnglishStemmer} is named "English".
34: */
35: public class SnowballAnalyzer extends Analyzer {
36: private String name;
37: private Set stopSet;
38:
39: /** Builds the named analyzer with no stop words. */
40: public SnowballAnalyzer(String name) {
41: this .name = name;
42: }
43:
44: /** Builds the named analyzer with the given stop words. */
45: public SnowballAnalyzer(String name, String[] stopWords) {
46: this (name);
47: stopSet = StopFilter.makeStopSet(stopWords);
48: }
49:
50: /** Constructs a {@link StandardTokenizer} filtered by a {@link
51: StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
52: public TokenStream tokenStream(String fieldName, Reader reader) {
53: TokenStream result = new StandardTokenizer(reader);
54: result = new StandardFilter(result);
55: result = new LowerCaseFilter(result);
56: if (stopSet != null)
57: result = new StopFilter(result, stopSet);
58: result = new SnowballFilter(result, name);
59: return result;
60: }
61: }
|