01: /**********************************************************************************
02: * $URL: https://source.sakaiproject.org/svn/search/tags/sakai_2-4-1/search-impl/impl/src/java/org/sakaiproject/search/index/impl/SnowballAnalyzerFactory.java $
03: * $Id: SnowballAnalyzerFactory.java 29315 2007-04-20 14:28:12Z ajpoland@iupui.edu $
04: ***********************************************************************************
05: *
06: * Copyright (c) 2003, 2004, 2005, 2006 The Sakai Foundation.
07: *
08: * Licensed under the Educational Community License, Version 1.0 (the "License");
09: * you may not use this file except in compliance with the License.
10: * You may obtain a copy of the License at
11: *
12: * http://www.opensource.org/licenses/ecl1.php
13: *
14: * Unless required by applicable law or agreed to in writing, software
15: * distributed under the License is distributed on an "AS IS" BASIS,
16: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17: * See the License for the specific language governing permissions and
18: * limitations under the License.
19: *
20: **********************************************************************************/package org.sakaiproject.search.index.impl;
21:
22: import java.io.BufferedReader;
23: import java.io.InputStreamReader;
24: import java.io.Reader;
25: import java.util.ArrayList;
26:
27: import org.apache.commons.logging.Log;
28: import org.apache.commons.logging.LogFactory;
29: import org.apache.lucene.analysis.Analyzer;
30: import org.apache.lucene.analysis.PorterStemFilter;
31: import org.apache.lucene.analysis.TokenStream;
32: import org.apache.lucene.analysis.standard.StandardAnalyzer;
33: import org.sakaiproject.search.api.SearchService;
34: import org.sakaiproject.search.index.AnalyzerFactory;
35:
36: /**
37: * Snowball stemming algorithm
38: *
39: * @author ieb
40: */
41: public class SnowballAnalyzerFactory implements AnalyzerFactory {
42: private static final Log log = LogFactory
43: .getLog(SnowballAnalyzerFactory.class);
44:
45: private static String[] stopWords = null;
46: static {
47: try {
48: ArrayList<String> al = new ArrayList<String>();
49: BufferedReader br = new BufferedReader(
50: new InputStreamReader(
51: SnowballAnalyzerFactory.class
52: .getResourceAsStream("/org/sakaiproject/search/component/bundle/stopwords.txt")));
53: for (String line = br.readLine(); line != null; line = br
54: .readLine()) {
55: al.add(line.trim());
56: }
57: br.close();
58: stopWords = al.toArray(new String[0]);
59: } catch (Exception ex) {
60: log.error("Failed to load Stop words into Analyzer", ex);
61: }
62: }
63:
64: public Analyzer newAnalyzer() {
65: return new StemAnalyzer();
66: }
67:
68: public class StemAnalyzer extends Analyzer {
69: StandardAnalyzer keywordAnalyzer;
70:
71: public StemAnalyzer() {
72: keywordAnalyzer = new StandardAnalyzer(stopWords);
73: }
74:
75: /*
76: * (non-Javadoc)
77: *
78: * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
79: */
80: public TokenStream tokenStream(String fieldName, Reader reader) {
81: if (SearchService.FIELD_CONTENTS.equals(fieldName)) {
82: return new PorterStemFilter(keywordAnalyzer
83: .tokenStream(fieldName, reader));
84: } else {
85: return keywordAnalyzer.tokenStream(fieldName, reader);
86: }
87: }
88: }
89:
90: }
|