001: /*
002: * $Header$
003: * $Revision: 6826 $
004: * $Date: 2007-06-18 05:08:26 -0700 $
005: *
006: * ====================================================================
007: *
008: * Copyright 1999-2004 The Apache Software Foundation
009: *
010: * Licensed under the Apache License, Version 2.0 (the "License");
011: * you may not use this file except in compliance with the License.
012: * You may obtain a copy of the License at
013: *
014: * http://www.apache.org/licenses/LICENSE-2.0
015: *
016: * Unless required by applicable law or agreed to in writing, software
017: * distributed under the License is distributed on an "AS IS" BASIS,
018: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
019: * See the License for the specific language governing permissions and
020: * limitations under the License.
021: *
022: */
023: package org.apache.slide.index.lucene;
024:
025: import java.io.Reader;
026: import java.text.ParseException;
027: import java.text.SimpleDateFormat;
028: import java.util.Date;
029: import java.util.Enumeration;
030: import java.util.HashMap;
031: import java.util.HashSet;
032: import java.util.Iterator;
033: import java.util.Locale;
034: import java.util.Map;
035: import java.util.Set;
036:
037: import nl.hippo.slide.index.SubLuceneIndexerDASL;
038: import nl.hippo.slide.index.analysis.KeywordAnalyzer;
039: import nl.hippo.slide.index.analysis.LowercaseKeywordAnalyzer;
040:
041: import org.apache.lucene.analysis.Analyzer;
042: import org.apache.lucene.analysis.SimpleAnalyzer;
043: import org.apache.lucene.analysis.TokenStream;
044: import org.apache.slide.content.NodeRevisionNumber;
045: import org.apache.slide.search.IndexException;
046: import org.apache.slide.util.conf.Configuration;
047: import org.apache.slide.util.conf.ConfigurationException;
048:
049: /**
050: * Holds all configuration infos about indexing.
051: */
052: public class IndexConfiguration {
053: protected boolean indexAll = true;
054: protected boolean storeAll = true;
055:
056: protected Set stringProperties = new HashSet();
057: /** maps field names (properies) to analyzers. */
058: protected Map textProperties = new HashMap();
059: protected Set dateProperties = new HashSet();
060: protected Set intProperties = new HashSet();
061: protected Set supportsIsdefinedProperties = new HashSet();
062: protected Set indexedProperties = new HashSet();
063: protected int optimizeThreshold = 100;
064: protected AnalyzerImpl analyzer = new AnalyzerImpl();
065: protected String indexPath = null;
066: protected boolean indexAsynchron = false;
067: protected String namespaceName = null;
068: protected Set knownResourceTypes = new HashSet();
069:
070: protected boolean caseSensitive = false;
071:
072: public final static String TOKENIZED_PREFIX = "tokenized_";
073:
074: public boolean isCaseSensitive() {
075: return caseSensitive;
076: }
077:
078: public void setCaseSensitive(boolean caseSensitive) {
079: this .caseSensitive = caseSensitive;
080: }
081:
082: //default type
083: public void addStringProperty(String namespace, String name) {
084: String key = generateFieldName(namespace, name);
085: this .stringProperties.add(key);
086: this .indexedProperties.add(key);
087: }
088:
089: public boolean isStringProperty(String namespace, String name) {
090: String key = generateFieldName(namespace, name);
091:
092: if (indexAll)
093: return true;
094: else
095: return this .stringProperties.contains(key);
096: }
097:
098: public void addDateProperty(String namespace, String name) {
099: String key = generateFieldName(namespace, name);
100: this .dateProperties.add(key);
101: this .indexedProperties.add(key);
102: }
103:
104: public boolean isDateProperty(String namespace, String name) {
105: String key = generateFieldName(namespace, name);
106: return this .dateProperties.contains(key);
107: }
108:
109: public void addIntProperty(String namespace, String name) {
110: String key = generateFieldName(namespace, name);
111: this .intProperties.add(key);
112: this .indexedProperties.add(key);
113: }
114:
115: public boolean isIntProperty(String namespace, String name) {
116: String key = generateFieldName(namespace, name);
117: return this .intProperties.contains(key);
118: }
119:
120: public void addSupportsIsdefinedProperty(String namespace,
121: String name) {
122: String key = generateFieldName(namespace, name);
123: this .supportsIsdefinedProperties.add(key);
124: this .indexedProperties.add(key);
125: }
126:
127: public boolean supportsIsDefined(String namespace, String name) {
128: //String key = generateFieldName(namespace, name);
129: return true;
130: //return this.supportsIsdefinedProperties.contains(key);
131: }
132:
133: public void addTextProperty(String namespace, String name,
134: Analyzer analyzer) {
135: String key = generateFieldName(namespace, name);
136: this .textProperties.put(key, analyzer);
137: this .indexedProperties.add(key);
138: }
139:
140: public boolean isTextProperty(String namespace, String name) {
141: String key = generateFieldName(namespace, name);
142: return this .textProperties.containsKey(key);
143: }
144:
145: public void setIndexAll(boolean indexAll) {
146: this .indexAll = indexAll;
147: }
148:
149: public void setStoreAll(boolean storeAll) {
150: this .storeAll = storeAll;
151: }
152:
153: // all properties are indexed
154: public boolean isIndexedProperty(String namespace, String name) {
155: String key = generateFieldName(namespace, name);
156: if (!indexAll)
157: return this .indexedProperties.contains(key);
158: else
159: return true;
160: }
161:
162: /**
163: * Tests whether ops <code>eq</code>, <code>lt</code>, <code>ge</code>,etc
164: * @param namespace
165: * @param name
166: * @return
167: */
168: public boolean isComparableProperty(String namespace, String name) {
169: String key = generateFieldName(namespace, name);
170:
171: // if not explicitly made a text property
172: return !this .textProperties.containsKey(key);
173:
174: // return this.stringProperties.contains(key) ||
175: // this.intProperties.contains(key) ||
176: // this.dateProperties.contains(key);
177: }
178:
179: Iterator knownResourceTypes() {
180: return knownResourceTypes.iterator();
181: }
182:
183: public String getIndexPath() {
184: return indexPath;
185: }
186:
187: public void setIndexPath(String indexPath) {
188: this .indexPath = indexPath;
189: }
190:
191: public boolean isIndexAsynchron() {
192: return indexAsynchron;
193: }
194:
195: public void setIndexAsynchron(boolean indexAsynchron) {
196: this .indexAsynchron = indexAsynchron;
197: }
198:
199: public String getNamespaceName() {
200: return this .namespaceName;
201: }
202:
203: public void setNamespaceName(String name) {
204: this .namespaceName = name;
205: }
206:
207: public Analyzer getAnalyzer() {
208: return this .analyzer;
209: }
210:
211: public Analyzer getAnalyzerForProperty(String namespace, String name) {
212: String fieldName = generateFieldName(namespace, name);
213:
214: return getAnalyzerForField(fieldName);
215: }
216:
217: public Analyzer getAnalyzerForField(String fieldName) {
218: return (Analyzer) textProperties.get(fieldName);
219: }
220:
221: public void setContentAnalyzer(Analyzer analyzer) {
222: if (analyzer == null)
223: throw new NullPointerException();
224: this .analyzer.contentAnalyzer = analyzer;
225: }
226:
227: public void setDefaultPropertyAnalyzer(Analyzer analyzer) {
228: if (analyzer == null)
229: throw new NullPointerException();
230: this .analyzer.defaultAnalyzer = analyzer;
231: }
232:
233: public void addKnownResourceType(String namespace, String name) {
234: this .knownResourceTypes.add(name); // TODO do we need the namespace
235: }
236:
237: void readResourceTypeConfiguration(Configuration conf) {
238: for (Enumeration e = conf.getConfigurations("resource-type"); e
239: .hasMoreElements();) {
240: Configuration resourceType = (Configuration) e
241: .nextElement();
242:
243: String n; //, ns;
244: try {
245: n = resourceType.getAttribute("name");
246: //ns = resourceType.getAttribute("namespace");
247: } catch (ConfigurationException ex) {
248: continue;
249: }
250:
251: this .knownResourceTypes.add(n); // TODO do we need the namespace
252: }
253: }
254:
255: void readPropertyConfiguration(Configuration conf)
256: throws IndexException {
257: for (Enumeration e = conf.getConfigurations("property"); e
258: .hasMoreElements();) {
259: Configuration property = (Configuration) e.nextElement();
260:
261: String n, ns;
262: try {
263: n = property.getAttribute("name");
264: ns = property.getAttribute("namespace");
265: } catch (ConfigurationException ex) {
266: continue;
267: }
268:
269: Configuration child;
270: try {
271: child = property.getConfiguration("string");
272: addStringProperty(ns, n);
273: addSupportsIsdefinedProperty(ns, n);
274: } catch (ConfigurationException ex) {
275: }
276:
277: try {
278: child = property.getConfiguration("integer");
279: addIntProperty(ns, n);
280: addSupportsIsdefinedProperty(ns, n);
281: } catch (ConfigurationException ex) {
282: }
283:
284: try {
285: child = property.getConfiguration("date");
286: addDateProperty(ns, n);
287: addSupportsIsdefinedProperty(ns, n);
288: } catch (ConfigurationException ex) {
289: }
290:
291: try {
292: child = property.getConfiguration("text");
293: String clsName;
294: try {
295: clsName = child.getAttribute("analyzer");
296: } catch (ConfigurationException ex) {
297: clsName = "org.apache.lucene.analysis.SimpleAnalyzer";
298: }
299:
300: //System.err.println("indexConfig: analyzer for textfield: "+clsName);
301:
302: Analyzer analyzer;
303: try {
304: Class cls = Class.forName(clsName);
305: analyzer = (Analyzer) cls.newInstance();
306: } catch (ClassNotFoundException ex) {
307: throw new IndexException(
308: "Analyzer class not found (" + ns + ", "
309: + n + ")", ex);
310: } catch (InstantiationException ex) {
311: throw new IndexException(
312: "Can't instanciate analyzer (" + ns + ", "
313: + n + ")", ex);
314: } catch (IllegalAccessException ex) {
315: throw new IndexException(
316: "Can't instanciate analyzer (" + ns + ", "
317: + n + ")", ex);
318: } catch (ClassCastException ex) {
319: throw new IndexException(
320: "Analyzer does not extend Analyzer (" + ns
321: + ", " + n + ")", ex);
322: }
323: addTextProperty(ns, n, analyzer);
324: } catch (ConfigurationException ex) {
325: }
326:
327: try {
328: child = property.getConfiguration("is-defined");
329: addSupportsIsdefinedProperty(ns, n);
330: } catch (ConfigurationException ex) {
331: }
332: }
333: }
334:
335: // ------ data type helper -------------------------------------------------
336:
337: /**
338: * Generates a field name for "normal fields".
339: */
340: public static String generateFieldName(String namespaceUri,
341: String name) {
342: return namespaceUri + "#" + name;
343: }
344:
345: public String generateKey(String uri, NodeRevisionNumber number) {
346: return uri + "#" + number;
347: }
348:
349: public String dateToIndexString(Date date) {
350: synchronized (Index.DATE_INDEX_FORMAT) {
351: return Index.DATE_INDEX_FORMAT.format(date);
352: }
353: }
354:
355: public String intToIndexString(long value) {
356: synchronized (Index.INT_INDEX_FORMAT) {
357: if (value >= 0) {
358: return Index.INT_INDEX_FORMAT.format(value);
359: } else {
360: return Index.INT_INDEX_FORMAT
361: .format(-(Long.MAX_VALUE + value));
362: }
363: }
364: }
365:
366: private static final SimpleDateFormat formats[] = {
367: new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz",
368: Locale.US),
369: new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy",
370: Locale.US),
371: new SimpleDateFormat("EEEEEE, dd-MMM-yy HH:mm:ss zzz",
372: Locale.US),
373: new SimpleDateFormat("EEE MMMM d HH:mm:ss yyyy", Locale.US),
374: new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'") };
375:
376: /**
377: * Helper that converts property values to dates.
378: * @param value
379: * @return a date of <code>null</code> if value can't convert
380: */
381: public static Date getDateValue(Object value) {
382: if (value instanceof Date) {
383: return (Date) value;
384: } else {
385: String valstr = value.toString();
386: // Parsing the HTTP Date
387: for (int i = 0; i < formats.length; i++) {
388: try {
389: synchronized (formats[i]) {
390: return formats[i].parse(valstr);
391: }
392: } catch (ParseException e) {
393: }
394: }
395: return null;
396: }
397: }
398:
399: public String predecessor(String field, String value) {
400: StringBuffer b = new StringBuffer(value);
401: b.setCharAt(b.length() - 1,
402: (char) (b.charAt(b.length() - 1) - 1));
403: return b.toString();
404: }
405:
406: public String successor(String field, String value) {
407: StringBuffer b = new StringBuffer(value);
408: b.setCharAt(b.length() - 1,
409: (char) (b.charAt(b.length() - 1) + 1));
410: return b.toString();
411: }
412:
413: class AnalyzerImpl extends Analyzer {
414: Analyzer defaultAnalyzer = new SimpleAnalyzer();
415: Analyzer contentAnalyzer = null;
416:
417: // decide if case sensitive or not by choice of analyzer
418: Analyzer stringAnalyzer = null;
419:
420: public AnalyzerImpl() {
421: if (isCaseSensitive())
422: stringAnalyzer = new KeywordAnalyzer();
423: else
424: stringAnalyzer = new LowercaseKeywordAnalyzer();
425: }
426:
427: public TokenStream tokenStream(String fieldName, Reader reader) {
428: if (fieldName.equals(Index.CONTENT_FIELD_NAME)) {
429: return contentAnalyzer.tokenStream(fieldName, reader);
430: }
431: Analyzer analyzer = (Analyzer) textProperties
432: .get(fieldName);
433: if (analyzer == null) {
434: // test wether fieldName is part of language specific SLIDE_CONTENT, for example SLIDE_CONTENT_nl
435: if (fieldName.indexOf(Index.CONTENT_FIELD_NAME
436: .concat("_")) > -1) {
437: String locale = fieldName
438: .substring(Index.CONTENT_FIELD_NAME
439: .length() + 1);
440: analyzer = (Analyzer) textProperties
441: .get(generateFieldName(
442: SubLuceneIndexerDASL.PROPERTY_NAMESPACE_ATTR_DEFAULT,
443: locale));
444: }
445: }
446:
447: if (analyzer != null) {
448: return analyzer.tokenStream(fieldName, reader);
449: } else if (!intProperties.contains(fieldName)
450: && !dateProperties.contains(fieldName)) {
451: return stringAnalyzer.tokenStream(fieldName, reader);
452: } else {
453: // TODO should not happen, throw an exception?
454: return this.defaultAnalyzer.tokenStream(fieldName,
455: reader);
456: }
457: }
458: }
459:
460: }
|