001: /*
002: * Copyright 2004-2006 the original author or authors.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.compass.core.lucene.engine.analyzer;
018:
019: import java.util.ArrayList;
020: import java.util.HashMap;
021: import java.util.Map;
022: import java.util.StringTokenizer;
023:
024: import org.apache.commons.logging.Log;
025: import org.apache.commons.logging.LogFactory;
026: import org.apache.lucene.analysis.Analyzer;
027: import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
028: import org.compass.core.Resource;
029: import org.compass.core.config.CompassSettings;
030: import org.compass.core.config.ConfigurationException;
031: import org.compass.core.engine.SearchEngineException;
032: import org.compass.core.lucene.LuceneEnvironment;
033: import org.compass.core.lucene.engine.LuceneSettings;
034: import org.compass.core.lucene.engine.analyzer.synonym.SynonymAnalyzerTokenFilterProvider;
035: import org.compass.core.mapping.CompassMapping;
036: import org.compass.core.mapping.ResourceAnalyzerController;
037: import org.compass.core.mapping.ResourceMapping;
038: import org.compass.core.mapping.ResourcePropertyMapping;
039: import org.compass.core.util.ClassUtils;
040: import org.compass.core.util.StringUtils;
041:
042: /**
043: * Manages all the configured Lucene analyzers within Compass.
044: *
045: * @author kimchy
046: */
047: public class LuceneAnalyzerManager {
048:
049: private static final Log log = LogFactory
050: .getLog(LuceneAnalyzerManager.class);
051:
052: private HashMap<String, Analyzer> analyzers = new HashMap<String, Analyzer>();
053:
054: private Analyzer defaultAnalyzer;
055:
056: private Analyzer searchAnalyzer;
057:
058: private HashMap<String, Analyzer> aliasAnalyzers = new HashMap<String, Analyzer>();
059:
060: private HashMap<String, LuceneAnalyzerTokenFilterProvider> analyzersFilters = new HashMap<String, LuceneAnalyzerTokenFilterProvider>();
061:
062: private CompassMapping mapping;
063:
064: private LuceneSettings luceneSettings;
065:
066: public void configure(CompassSettings settings,
067: CompassMapping mapping, LuceneSettings luceneSettings)
068: throws SearchEngineException {
069: checkNotUsingOldVersionsAnalyzerSettings(settings);
070: this .mapping = mapping;
071: this .luceneSettings = luceneSettings;
072: buildAnalyzersFilters(settings);
073: buildAnalyzers(settings, mapping);
074: }
075:
076: private void buildAnalyzersFilters(CompassSettings settings) {
077: Map<String, CompassSettings> analyzerFilterSettingGroups = settings
078: .getSettingGroups(LuceneEnvironment.AnalyzerFilter.PREFIX);
079: for (String analyzerFilterName : analyzerFilterSettingGroups
080: .keySet()) {
081: if (log.isInfoEnabled()) {
082: log.info("Building analyzer filter ["
083: + analyzerFilterName + "]");
084: }
085: CompassSettings analyzerFilterSettings = analyzerFilterSettingGroups
086: .get(analyzerFilterName);
087: String analyzerFilterType = analyzerFilterSettings
088: .getSetting(LuceneEnvironment.AnalyzerFilter.TYPE);
089: if (analyzerFilterType == null) {
090: throw new SearchEngineException(
091: "Failed to locate analyzer filter ["
092: + analyzerFilterName
093: + "] type, it must be set");
094: }
095: try {
096: if (analyzerFilterType
097: .equals(LuceneEnvironment.AnalyzerFilter.SYNONYM_TYPE)) {
098: analyzerFilterType = SynonymAnalyzerTokenFilterProvider.class
099: .getName();
100: }
101: LuceneAnalyzerTokenFilterProvider provider = (LuceneAnalyzerTokenFilterProvider) ClassUtils
102: .forName(analyzerFilterType,
103: settings.getClassLoader())
104: .newInstance();
105: provider.configure(analyzerFilterSettings);
106: analyzersFilters.put(analyzerFilterName, provider);
107: } catch (Exception e) {
108: throw new SearchEngineException(
109: "Failed to create analyzer filter ["
110: + analyzerFilterName + "]", e);
111: }
112: }
113: }
114:
115: private void buildAnalyzers(CompassSettings settings,
116: CompassMapping mapping) {
117: Map<String, CompassSettings> analyzerSettingGroups = settings
118: .getSettingGroups(LuceneEnvironment.Analyzer.PREFIX);
119: for (String analyzerName : analyzerSettingGroups.keySet()) {
120: if (log.isInfoEnabled()) {
121: log.info("Building analyzer [" + analyzerName + "]");
122: }
123: Analyzer analyzer = buildAnalyzer(analyzerName,
124: analyzerSettingGroups.get(analyzerName));
125: analyzers.put(analyzerName, analyzer);
126: }
127: defaultAnalyzer = analyzers
128: .get(LuceneEnvironment.Analyzer.DEFAULT_GROUP);
129: if (defaultAnalyzer == null) {
130: // if no default anayzer is defined, we need to configre one
131: defaultAnalyzer = buildAnalyzer(
132: LuceneEnvironment.Analyzer.DEFAULT_GROUP,
133: new CompassSettings(settings.getClassLoader()));
134: analyzers.put(LuceneEnvironment.Analyzer.DEFAULT_GROUP,
135: defaultAnalyzer);
136: }
137: searchAnalyzer = analyzers
138: .get(LuceneEnvironment.Analyzer.SEARCH_GROUP);
139: if (searchAnalyzer == null) {
140: searchAnalyzer = defaultAnalyzer;
141: }
142: // build the analyzers for the different resources
143: buildAnalyzerPerAlias(mapping);
144: }
145:
146: private void buildAnalyzerPerAlias(CompassMapping mapping)
147: throws SearchEngineException {
148: for (ResourceMapping resourceMapping : mapping
149: .getRootMappings()) {
150: String alias = resourceMapping.getAlias();
151: String resourceAnalyzerName = LuceneEnvironment.Analyzer.DEFAULT_GROUP;
152: if (resourceMapping.getAnalyzer() != null) {
153: resourceAnalyzerName = resourceMapping.getAnalyzer();
154: }
155: Analyzer resourceAnalyzer = buildAnalyzerPerResourcePropertyIfNeeded(
156: resourceMapping, resourceAnalyzerName);
157: aliasAnalyzers.put(alias, resourceAnalyzer);
158: }
159: }
160:
161: /**
162: * Returns the default Lucene {@link Analyzer} for Compass.
163: */
164: public Analyzer getDefaultAnalyzer() {
165: return defaultAnalyzer;
166: }
167:
168: /**
169: * Returns the search Lucene {@link Analyzer}.
170: */
171: public Analyzer getSearchAnalyzer() {
172: return searchAnalyzer;
173: }
174:
175: /**
176: * Returns the Lucene {@link Analyzer} registed under the given name.
177: */
178: public Analyzer getAnalyzer(String analyzerName) {
179: return analyzers.get(analyzerName);
180: }
181:
182: /**
183: * Returns the Lucene {@link Analyzer} for the given alias. Might build a per field analyzer
184: * if the resource has more than one analyzer against one of its properties.
185: */
186: public Analyzer getAnalyzerByAlias(String alias) {
187: return aliasAnalyzers.get(alias);
188: }
189:
190: public Analyzer getAnalyzerByAliasMustExists(String alias)
191: throws SearchEngineException {
192: Analyzer analyzer = aliasAnalyzers.get(alias);
193: if (analyzer == null) {
194: throw new SearchEngineException(
195: "No analyzer is defined for alias [" + alias + "]");
196: }
197: return analyzer;
198: }
199:
200: /**
201: * Returns the Lucene {@link Analyzer} based on the give {@link Resource}. Will build a specifc
202: * per field analyzr if the given {@link Resource} has properties with different analyzers.
203: * Will also take into account if the resource has an analyzer controller based on the analyzer
204: * controller property value.
205: */
206: public Analyzer getAnalyzerByResource(Resource resource)
207: throws SearchEngineException {
208: String alias = resource.getAlias();
209: ResourceMapping resourceMapping = mapping
210: .getRootMappingByAlias(alias);
211: if (resourceMapping.getAnalyzerController() == null) {
212: return aliasAnalyzers.get(alias);
213: }
214: ResourceAnalyzerController analyzerController = resourceMapping
215: .getAnalyzerController();
216: String analyzerPropertyName = analyzerController
217: .getAnalyzerResourcePropertyName();
218: String analyzerName = resource.getValue(analyzerPropertyName);
219: if (analyzerName == null) {
220: analyzerName = analyzerController.getNullAnalyzer();
221: }
222: return buildAnalyzerPerResourcePropertyIfNeeded(
223: resourceMapping, analyzerName);
224: }
225:
226: public Analyzer getAnalyzerMustExist(String analyzerName)
227: throws SearchEngineException {
228: Analyzer analyzer = analyzers.get(analyzerName);
229: if (analyzer == null) {
230: throw new SearchEngineException(
231: "No analyzer is defined for analyzer name ["
232: + analyzerName + "]");
233: }
234: return analyzer;
235: }
236:
237: private Analyzer buildAnalyzer(String analyzerName,
238: CompassSettings settings) {
239: String analyzerFactorySetting = settings.getSetting(
240: LuceneEnvironment.Analyzer.FACTORY,
241: DefaultLuceneAnalyzerFactory.class.getName());
242: LuceneAnalyzerFactory analyzerFactory;
243: try {
244: analyzerFactory = (LuceneAnalyzerFactory) ClassUtils
245: .forName(analyzerFactorySetting,
246: settings.getClassLoader()).newInstance();
247: } catch (Exception e) {
248: throw new SearchEngineException(
249: "Cannot create Analyzer factory ["
250: + analyzerFactorySetting
251: + "]. Please verify the analyzer factory setting at ["
252: + LuceneEnvironment.Analyzer.FACTORY + "]",
253: e);
254: }
255: Analyzer analyzer = analyzerFactory.createAnalyzer(
256: analyzerName, settings);
257: String filters = settings
258: .getSetting(LuceneEnvironment.Analyzer.FILTERS);
259: if (filters != null) {
260: StringTokenizer tokenizer = new StringTokenizer(filters,
261: ",");
262: ArrayList<LuceneAnalyzerTokenFilterProvider> filterProviders = new ArrayList<LuceneAnalyzerTokenFilterProvider>();
263: while (tokenizer.hasMoreTokens()) {
264: String filterProviderLookupName = tokenizer.nextToken();
265: if (!StringUtils.hasText(filterProviderLookupName)) {
266: continue;
267: }
268: LuceneAnalyzerTokenFilterProvider provider = analyzersFilters
269: .get(filterProviderLookupName);
270: if (provider == null) {
271: throw new SearchEngineException(
272: "Failed to located filter provider ["
273: + filterProviderLookupName
274: + "] for analyzer [" + analyzerName
275: + "]");
276: }
277: filterProviders.add(provider);
278: }
279: analyzer = new LuceneAnalyzerFilterWrapper(
280: analyzer,
281: filterProviders
282: .toArray(new LuceneAnalyzerTokenFilterProvider[filterProviders
283: .size()]));
284: }
285: return analyzer;
286: }
287:
288: private Analyzer buildAnalyzerPerResourcePropertyIfNeeded(
289: ResourceMapping resourceMapping, String resourceAnalyzerName) {
290: Analyzer resourceAnalyzer = getAnalyzerMustExist(resourceAnalyzerName);
291: // create the per field analyzer only if there is one that is
292: // specific to a resource property or
293: if (resourceMapping.hasSpecificAnalyzerPerResourceProperty()) {
294: PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(
295: resourceAnalyzer);
296: ResourcePropertyMapping[] propertyMappings = resourceMapping
297: .getResourcePropertyMappings();
298: for (ResourcePropertyMapping propertyMapping : propertyMappings) {
299: if (propertyMapping.getAnalyzer() != null) {
300: Analyzer propertyAnalyzer = getAnalyzer(propertyMapping
301: .getAnalyzer());
302: if (propertyAnalyzer == null) {
303: throw new SearchEngineException(
304: "Failed to find analyzer ["
305: + propertyMapping.getAnalyzer()
306: + "] for alias ["
307: + resourceMapping.getAlias()
308: + "] and property ["
309: + propertyMapping.getName()
310: + "]");
311: }
312: perFieldAnalyzerWrapper.addAnalyzer(propertyMapping
313: .getPath().getPath(), propertyAnalyzer);
314: }
315: }
316: return perFieldAnalyzerWrapper;
317: }
318: return resourceAnalyzer;
319: }
320:
321: private void checkNotUsingOldVersionsAnalyzerSettings(
322: CompassSettings settings) throws SearchEngineException {
323: // just so upgrades will be simpler
324: if (settings.getSetting("compass.engine.analyzer.factory") != null) {
325: throw new ConfigurationException(
326: "Old analyzer setting for analyzer factory, use [compass.engine.analyzer.default.*] instead");
327: }
328: if (settings.getSetting("compass.engine.analyzer") != null) {
329: throw new ConfigurationException(
330: "Old analyzer setting for analyzer, use [compass.engine.analyzer.default.*] instead");
331: }
332: if (settings.getSetting("compass.engine.analyzer.stopwords") != null) {
333: throw new ConfigurationException(
334: "Old analyzer setting for stopwords, use [compass.engine.analyzer.default.*] instead");
335: }
336: }
337: }
|