01: /**
02: * Licensed to the Apache Software Foundation (ASF) under one or more
03: * contributor license agreements. See the NOTICE file distributed with
04: * this work for additional information regarding copyright ownership.
05: * The ASF licenses this file to You under the Apache License, Version 2.0
06: * (the "License"); you may not use this file except in compliance with
07: * the License. You may obtain a copy of the License at
08: *
09: * http://www.apache.org/licenses/LICENSE-2.0
10: *
11: * Unless required by applicable law or agreed to in writing, software
12: * distributed under the License is distributed on an "AS IS" BASIS,
13: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14: * See the License for the specific language governing permissions and
15: * limitations under the License.
16: */package org.apache.cocoon.components.search.analyzer;
17:
18: import org.apache.avalon.framework.configuration.Configuration;
19: import org.apache.avalon.framework.configuration.ConfigurationException;
20: import org.apache.lucene.analysis.standard.StandardAnalyzer;
21:
22: /**
23: * Configurable Stopword Analyzer
24: *
25: * Config file:
26: *
27: * <stopWords><stopWord>a </stopWord> <stopWord>the </stopWord> <stopWord>but
28: * </stopWord> </stopWords>
29: *
30: * @author Nicolas Maisonneuve
31: */
32: public class ConfigurableStopwordAnalyzer extends ConfigurableAnalyzer {
33:
34: /** The element containing a stop word. */
35: private static final String STOP_WORD_ELEMENT = "stopword";
36:
37: /**
38: * Configures the analyzer.(stop words)
39: */
40: public void configure(Configuration configuration)
41: throws ConfigurationException {
42: String[] words = stopTableBuilder(configuration);
43: logger.info("stop words number: " + words.length);
44: analyzer = new StandardAnalyzer(words);
45: }
46:
47: /**
48: * Build Stop Table
49: *
50: * @param conf
51: * Configuration file (above the STOP_WORDS ELEMENT)
52: * @throws ConfigurationException
53: * @return String[] array with all excluded words
54: */
55: static public String[] stopTableBuilder(Configuration conf)
56: throws ConfigurationException {
57:
58: Configuration[] cStops = conf.getChildren(STOP_WORD_ELEMENT);
59: if (cStops != null) {
60: final String[] words = new String[cStops.length];
61: for (int i = 0; i < cStops.length; i++) {
62: words[i] = cStops[i].getValue();
63: }
64: return words;
65: }
66:
67: final String[] words = new String[0];
68: return words;
69: }
70:
71: }
|