001: /*
002: * Copyright 2001-2006 C:1 Financial Services GmbH
003: *
004: * This software is free software; you can redistribute it and/or
005: * modify it under the terms of the GNU Lesser General Public
006: * License Version 2.1, as published by the Free Software Foundation.
007: *
008: * This software is distributed in the hope that it will be useful,
009: * but WITHOUT ANY WARRANTY; without even the implied warranty of
010: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
011: * Lesser General Public License for more details.
012: *
013: * You should have received a copy of the GNU Lesser General Public
014: * License along with this library; if not, write to the Free Software
015: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
016: */
017:
018: package de.finix.contelligent.search.engine;
019:
020: import java.io.IOException;
021: import java.io.Reader;
022: import java.io.StringReader;
023: import java.util.ArrayList;
024: import java.util.Collection;
025: import java.util.HashMap;
026: import java.util.Iterator;
027: import java.util.Map;
028: import java.util.Set;
029: import java.util.Timer;
030: import java.util.TimerTask;
031:
032: import org.apache.commons.digester.Digester;
033: import org.apache.commons.logging.impl.Log4JCategoryLog;
034: import org.apache.lucene.search.DateFilter;
035: import org.apache.lucene.search.Filter;
036: import org.apache.lucene.search.Query;
037:
038: import de.finix.contelligent.CallData;
039: import de.finix.contelligent.Component;
040: import de.finix.contelligent.ComponentManager;
041: import de.finix.contelligent.ComponentPath;
042: import de.finix.contelligent.Contelligent;
043: import de.finix.contelligent.components.config.Configuration;
044: import de.finix.contelligent.content.TextContent;
045: import de.finix.contelligent.exception.ComponentPersistenceException;
046: import de.finix.contelligent.exception.ContelligentExceptionID;
047: import de.finix.contelligent.logging.LoggingService;
048: import de.finix.contelligent.search.SearchException;
049: import de.finix.contelligent.search.SearchResult;
050: import de.finix.contelligent.search.SearchResultCollector;
051: import de.finix.contelligent.xml.EchoingErrorHandler;
052: import de.finix.contelligent.xml.digester.SearchEngineRuleSet;
053: import de.finix.contelligent.xml.elements.IndexBuilderElement;
054: import de.finix.contelligent.xml.elements.IndexElement;
055: import de.finix.contelligent.xml.elements.SearchEngineElement;
056:
057: /**
058: * Functions for searching the component system
059: */
060: public class SearchEngine {
061:
062: final static org.apache.log4j.Logger log = LoggingService
063: .getLogger(SearchEngine.class);
064:
065: private Contelligent contelligent;
066:
067: private Map indices = new HashMap();
068:
069: private Timer timer = new Timer(true);
070:
071: private final static ArrayList configTypes = new ArrayList();
072:
073: static {
074: configTypes
075: .add("contelligent.configuration.IndexConfiguration");
076: }
077:
078: public SearchEngine(Contelligent contelligent) {
079: this .contelligent = contelligent;
080: }
081:
082: public void register(SearchEngineElement element)
083: throws IOException {
084: Iterator iterator = element.getIndices().iterator();
085:
086: while (iterator.hasNext()) {
087: IndexElement indexElement = (IndexElement) iterator.next();
088:
089: if (this .indices.containsKey(indexElement.getName())) {
090: throw new IllegalArgumentException("Index '"
091: + indexElement.getName() + "' already exists");
092: }
093:
094: BackupLuceneIndexBuildPolicy buildPolicy = new BackupLuceneIndexBuildPolicy(
095: indexElement,
096: contelligent
097: .getContelligentDir(Contelligent.DIR_INDEX),
098: contelligent
099: .getContelligentDir(Contelligent.DIR_INDEX_BUILD));
100: LuceneIndexLockPolicy lockPolicy = new BackupedIndexLuceneIndexLockPolicy(
101: buildPolicy);
102: this .indices.put(indexElement.getName(),
103: new LuceneIndex(indexElement, contelligent,
104: lockPolicy, buildPolicy));
105:
106: String scheduleSpec = indexElement.getSchedule();
107:
108: if (scheduleSpec != null) {
109:
110: long factor = getMillisFactor(scheduleSpec);
111: long interval = getInterval(scheduleSpec);
112: long period = factor * interval;
113:
114: timer.schedule(new IndexCrawlerTask(indexElement
115: .getName()), 0, period);
116:
117: log
118: .info("automatic index refresh scheduled for index '"
119: + indexElement.getName()
120: + "' ... update every "
121: + period
122: + " ms");
123: } else {
124: log.info("manual index refresh configured for index '"
125: + indexElement.getName() + "'");
126: }
127:
128: if (log.isDebugEnabled()) {
129: log.debug("Registered index " + indexElement.getName());
130: }
131: }
132: }
133:
134: public void registerAll(CallData callData) {
135: // First reset all previous indices
136: timer.cancel();
137: indices = new HashMap();
138:
139: try {
140: ComponentManager cm = callData.getActualManager();
141: Set paths = cm.getComponentsInSubtreeFilteredByType(
142: ComponentPath.ROOT_PATH, configTypes);
143: Iterator it = paths.iterator();
144: while (it.hasNext()) {
145: try {
146: ComponentPath cp = (ComponentPath) it.next();
147: log.debug("Reading index configuration: "
148: + cp.toPath());
149: Component comp = cm.getComponent(cp, callData);
150: if (comp instanceof Configuration) {
151: Configuration config = (Configuration) comp;
152: TextContent content = (TextContent) config
153: .getContent();
154: String text = content.getText(callData);
155: Reader reader = new StringReader(text);
156:
157: Digester digester = new Digester();
158: // digester.setClassLoader(ContelligentImpl.class.getClassLoader());
159: digester
160: .setErrorHandler(new EchoingErrorHandler(
161: "While reloading index configuration"));
162: digester.setValidating(false);
163: digester.setLogger(new Log4JCategoryLog(
164: LoggingService.getLogger(this
165: .getClass().getName()
166: + "-digester")));
167: digester.addRuleSet(new SearchEngineRuleSet("",
168: null));
169: SearchEngineElement e = (SearchEngineElement) digester
170: .parse(reader);
171: register(e);
172: }
173: } catch (Exception e) {
174: log.error("registerAll() - Exception: ", e);
175: }
176: }
177: } catch (ComponentPersistenceException e) {
178: log.error("registerAll() - Exception: ", e);
179: }
180: }
181:
182: /**
183: * @param scheduleSpec
184: * @return
185: */
186: private long getMillisFactor(String scheduleSpec) {
187:
188: if (scheduleSpec.length() == 0) {
189: throw new NumberFormatException(
190: "scheduleSpec must be of format m|h|d<time> not: '"
191: + scheduleSpec + "'");
192: }
193:
194: long interval = 1;
195: char iSig = scheduleSpec.charAt(0);
196:
197: switch (iSig) {
198: // minute
199: case 'm':
200: return 1000 * 60;
201:
202: // hour
203: case 'h':
204: return 1000 * 60 * 60;
205:
206: // day
207: case 'd':
208: return 1000 * 60 * 60 * 24;
209: default:
210: throw new NumberFormatException(
211: "scheduleSpec must be of format m|h|d[time] (e.g. 'm10', 'h', etc.) not: '"
212: + scheduleSpec + "'");
213: }
214: }
215:
216: /**
217: * @param scheduleSpec
218: * @return
219: */
220: private long getInterval(String scheduleSpec) {
221: if (scheduleSpec.length() == 0) {
222: throw new NumberFormatException(
223: "scheduleSpec must be of format m|h|d[time] (e.g. 'm10', 'h', etc.) not: '"
224: + scheduleSpec + "'");
225: }
226: String interval = scheduleSpec.substring(1);
227:
228: if (interval.length() == 0) {
229: // default interval is 1
230: return 1;
231: }
232:
233: try {
234: return Long.parseLong(interval);
235: } catch (NumberFormatException e) {
236: throw new NumberFormatException(
237: "scheduleSpec must be of format m|h|d[time] (e.g. 'm10', 'h', etc.) not: '"
238: + scheduleSpec + "'");
239: }
240: }
241:
242: public void startCrawlers() throws IOException {
243: Iterator iterator = indices.keySet().iterator();
244: while (iterator.hasNext()) {
245: String name = (String) iterator.next();
246: try {
247: startCrawler(name);
248: } catch (IOException e) {
249: log.warn("Creation of index " + name + " failed");
250: }
251: }
252: }
253:
254: /**
255: * Starts recreating the specified index. Returns false if the index is not
256: * configured.
257: */
258: public boolean startCrawler(String indexName) throws IOException {
259: LuceneIndex index = (LuceneIndex) indices.get(indexName);
260:
261: if (index == null) {
262: log.info("Unknown index: " + indexName);
263: return false;
264: }
265:
266: synchronized (index) {
267: Collection crawlers = createCrawlers(index);
268: Iterator iterator = crawlers.iterator();
269:
270: while (iterator.hasNext()) {
271: Crawler crawler = (Crawler) iterator.next();
272: crawler.run();
273: }
274: index.postCreate();
275: }
276: return true;
277: }
278:
279: private Collection createCrawlers(LuceneIndex index) {
280: Collection answer = new ArrayList();
281: Iterator iterator = index.getIndexElement().getBuilders()
282: .iterator();
283:
284: while (iterator.hasNext()) {
285: IndexBuilderElement element = (IndexBuilderElement) iterator
286: .next();
287: answer.add(new Crawler(index, element));
288: }
289: return answer;
290: }
291:
292: public SearchResult search(String query, String indexName)
293: throws SearchException {
294: return search(query, indexName, 0, -1);
295: }
296:
297: public SearchResult search(String query, String indexName,
298: SearchResultCollector collector) throws SearchException {
299: return search(query, indexName, 0, -1, collector);
300: }
301:
302: public SearchResult search(String query, String indexName,
303: int start, int num) throws SearchException {
304: return search(query, indexName, start, num, null);
305: }
306:
307: /**
308: * Searches Query <code>query</code> in index <code>index</index>
309: *
310: * @param start starting row
311: * @param num number of rows
312: * @return a <code>SearchResult</code> object
313: */
314: public SearchResult search(String query, String indexName,
315: int start, int num, SearchResultCollector collector)
316: throws SearchException {
317: LuceneIndex index = (LuceneIndex) indices.get(indexName);
318: if (index == null)
319: throw new SearchException(
320: ContelligentExceptionID.search_indexNotFound,
321: new String[] { indexName });
322: return index.search(query, start, num, collector);
323: }
324:
325: /**
326: * Search with date constraints
327: */
328: public SearchResult search(String query, String indexName,
329: long from, long to, int start, int num)
330: throws SearchException {
331: return search(query, indexName, from, to, start, num,
332: "lastModified", null);
333: }
334:
335: public SearchResult search(String query, String indexName,
336: long from, long to, int start, int num, String dateField,
337: SearchResultCollector collector) throws SearchException {
338: LuceneIndex index = (LuceneIndex) indices.get(indexName);
339:
340: Filter filter = new DateFilter(dateField, from, to);
341: return index.search(query, start, num, filter, collector);
342: }
343:
344: public SearchResult search(Query query, String indexName,
345: int start, int num, SearchResultCollector collector)
346: throws SearchException {
347: LuceneIndex index = (LuceneIndex) indices.get(indexName);
348:
349: return index.search(query, start, num, null, collector);
350: }
351:
352: public class IndexCrawlerTask extends TimerTask {
353: String indexName;
354:
355: public IndexCrawlerTask(String indexName) {
356: this .indexName = indexName;
357: }
358:
359: /*
360: * (non-Javadoc)
361: *
362: * @see java.util.TimerTask#run()
363: */
364: public void run() {
365: if (log.isDebugEnabled()) {
366: log
367: .debug("run() - starting scheduled IndexCrawler - index: '"
368: + indexName + "'");
369: }
370:
371: try {
372: startCrawler(indexName);
373: if (log.isDebugEnabled()) {
374: log.debug("run() - ended IndexCrawler - index: '"
375: + indexName + "'");
376: }
377:
378: } catch (IOException e) {
379: log.error(
380: "run() - IndexCrawler stopped with Exceptions",
381: e);
382: }
383: }
384: }
385: }
|