01: /**
02: * Licensed to the Apache Software Foundation (ASF) under one or more
03: * contributor license agreements. See the NOTICE file distributed with
04: * this work for additional information regarding copyright ownership.
05: * The ASF licenses this file to You under the Apache License, Version 2.0
06: * (the "License"); you may not use this file except in compliance with
07: * the License. You may obtain a copy of the License at
08: *
09: * http://www.apache.org/licenses/LICENSE-2.0
10: *
11: * Unless required by applicable law or agreed to in writing, software
12: * distributed under the License is distributed on an "AS IS" BASIS,
13: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14: * See the License for the specific language governing permissions and
15: * limitations under the License.
16: */package org.apache.solr.analysis;
17:
18: import java.util.HashMap;
19: import java.util.Map;
20:
21: import org.apache.commons.codec.Encoder;
22: import org.apache.commons.codec.language.DoubleMetaphone;
23: import org.apache.commons.codec.language.Metaphone;
24: import org.apache.commons.codec.language.RefinedSoundex;
25: import org.apache.commons.codec.language.Soundex;
26: import org.apache.lucene.analysis.TokenStream;
27: import org.apache.solr.core.SolrException;
28:
29: /**
30: * Create tokens based on phonetic encoders
31: *
32: * http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html
33: *
34: * This takes two arguments:
35: * "encoder" required, one of "DoubleMetaphone", "Metaphone", "Soundex", "RefinedSoundex"
36: *
37: * "inject" (default=true) add tokens to the stream with the offset=0
38: *
39: * @version $Id: PhoneticFilterFactory.java 542679 2007-05-29 22:28:21Z ryan $
40: * @see PhoneticFilter
41: */
42: public class PhoneticFilterFactory extends BaseTokenFilterFactory {
43: public static final String ENCODER = "encoder";
44: public static final String INJECT = "inject"; // boolean
45:
46: private static final Map<String, Class<? extends Encoder>> registry;
47: static {
48: registry = new HashMap<String, Class<? extends Encoder>>();
49: registry.put("DoubleMetaphone".toUpperCase(),
50: DoubleMetaphone.class);
51: registry.put("Metaphone".toUpperCase(), Metaphone.class);
52: registry.put("Soundex".toUpperCase(), Soundex.class);
53: registry.put("RefinedSoundex".toUpperCase(),
54: RefinedSoundex.class);
55: }
56:
57: protected boolean inject = true;
58: protected String name = null;
59: protected Encoder encoder = null;
60:
61: @Override
62: public void init(Map<String, String> args) {
63: super .init(args);
64:
65: if (args.get("inject") != null) {
66: inject = Boolean.getBoolean(args.get(INJECT));
67: }
68:
69: String name = args.get(ENCODER);
70: if (name == null) {
71: throw new SolrException(
72: SolrException.ErrorCode.SERVER_ERROR,
73: "Missing required parameter: " + ENCODER + " ["
74: + registry.keySet() + "]");
75: }
76: Class<? extends Encoder> clazz = registry.get(name
77: .toUpperCase());
78: if (clazz == null) {
79: throw new SolrException(
80: SolrException.ErrorCode.SERVER_ERROR,
81: "Unknown encoder: " + name + " ["
82: + registry.keySet() + "]");
83: }
84:
85: try {
86: encoder = clazz.newInstance();
87: } catch (Exception e) {
88: throw new SolrException(
89: SolrException.ErrorCode.SERVER_ERROR,
90: "Error initializing: " + name + "/" + clazz, e);
91: }
92: }
93:
94: public TokenStream create(TokenStream input) {
95: return new PhoneticFilter(input, encoder, name, inject);
96: }
97: }
|