001: /*
002: * Created on Oct 19, 2004
003: */
004: package com.openedit.modules.search;
005:
006: import java.io.File;
007: import java.io.IOException;
008: import java.text.SimpleDateFormat;
009:
010: import org.apache.commons.logging.Log;
011: import org.apache.commons.logging.LogFactory;
012: import org.apache.lucene.analysis.Analyzer;
013: import org.apache.lucene.index.IndexReader;
014: import org.apache.lucene.index.IndexWriter;
015: import org.apache.lucene.queryParser.QueryParser;
016: import org.apache.lucene.search.BooleanQuery;
017: import org.apache.lucene.search.Filter;
018: import org.apache.lucene.search.Hits;
019: import org.apache.lucene.search.IndexSearcher;
020: import org.apache.lucene.search.Query;
021: import org.apache.lucene.search.Sort;
022: import org.apache.lucene.search.SortComparator;
023: import org.apache.lucene.search.SortField;
024: import org.apache.lucene.store.FSDirectory;
025: import org.openedit.xml.FieldArchive;
026:
027: import com.openedit.OpenEditException;
028: import com.openedit.OpenEditRuntimeException;
029: import com.openedit.hittracker.HitTracker;
030: import com.openedit.util.FileUtils;
031:
032: /**
033: * @author cburkey
034: *
035: */
036: public abstract class BaseLuceneSearch implements BaseSearchIndex {
037: private static final Log log = LogFactory
038: .getLog(BaseLuceneSearch.class);
039: protected Analyzer fieldAnalyzer;
040: protected IndexSearcher fieldLiveSearcher;
041: protected File fieldSearchDirectory;
042: protected SimpleDateFormat fieldFormat = new SimpleDateFormat(
043: "yyyy_MM_dd_HH_mm_ss");
044: protected IndexWriter fieldIndexWriter;
045: protected SortComparator fieldRandomComparator;
046: protected FieldArchive fieldFieldArchive;
047: protected String fieldBadSortField = null;
048:
049: public abstract void reIndexAll() throws OpenEditException;
050:
051: public Hits search(String inQuery) throws OpenEditException {
052: if ((inQuery == null) || (inQuery.length() == -1)) {
053: return null;
054: }
055: try {
056: QueryParser parser = new QueryParser("description",
057: getAnalyzer());
058: parser.setDefaultOperator(QueryParser.AND_OPERATOR);
059: Query query1 = parser.parse(inQuery);
060: Hits hits = getLiveSearcher().search(query1);
061:
062: log
063: .info("searched for: "
064: + query1.toString("contents") + " "
065: + hits.length()
066: + " total matching documents");
067: return hits;
068: } catch (Exception ex) {
069: log.error(ex);
070: throw new OpenEditException(ex);
071: }
072: }
073:
074: public HitTracker search(String inQuery, String inOrdering)
075: throws OpenEditException {
076: try {
077: if (fieldBadSortField != null
078: && fieldBadSortField.equals(inOrdering)) {
079: log.error("Skipping bad sort: " + inOrdering);
080: inOrdering = null;
081: }
082: //Lucene has problems with searching for dashes or underscores
083: QueryParser parser = new QueryParser("description",
084: getAnalyzer());
085: parser.setDefaultOperator(QueryParser.AND_OPERATOR);
086: Query query1 = parser.parse(inQuery);
087:
088: Hits hits = null;
089: long start = System.currentTimeMillis();
090: if (inOrdering != null) {
091: Sort sort = null;
092: if (inOrdering.equals("random")) {
093: sort = new Sort();
094: //SortComparator custom = SampleComparable.getComparator();
095: SortComparator custom = getRandomComparator();
096: sort.setSort(new SortField("id", custom));
097: } else {
098: boolean direction = false;
099: if (inOrdering.endsWith("Down")) {
100: direction = true;
101: inOrdering = inOrdering.substring(0, inOrdering
102: .length() - 4);
103: } else if (inOrdering.endsWith("Up")) {
104: direction = false;
105: inOrdering = inOrdering.substring(0, inOrdering
106: .length() - 2);
107: }
108: sort = new Sort(inOrdering, direction);
109: }
110: try {
111: hits = getLiveSearcher().search(query1, sort);
112: fieldBadSortField = null;
113: } catch (RuntimeException ex) {
114: if (ex.toString().contains(
115: "cannot determine sort type")
116: || ex.toString().contains(
117: " does not appear to be indexed")
118: || ex
119: .toString()
120: .contains(
121: "there are more terms than documents in field")) {
122: fieldBadSortField = inOrdering;
123: log.error(ex);
124: hits = getLiveSearcher().search(query1);
125: } else {
126: if (ex instanceof OpenEditRuntimeException) {
127: throw (OpenEditRuntimeException) ex;
128: }
129: throw new OpenEditException(ex);
130: }
131: }
132: } else {
133: hits = getLiveSearcher().search(query1);
134: }
135: long end = System.currentTimeMillis() - start;
136:
137: log.info("Searched for: " + query1 + " sort by "
138: + inOrdering + " [Found:" + hits.length()
139: + " total hits in " + (double) end / 1000D
140: + " seconds]");
141:
142: LuceneHitTracker tracker = new LuceneHitTracker(hits);
143: // tracker.setQuery(inQuery);
144: // tracker.setOrdering(inOrdering);
145: tracker.setIndexId(getIndexId());
146:
147: return tracker;
148: } catch (Exception ex) {
149: log.error(ex);
150: if (ex instanceof OpenEditRuntimeException) {
151: throw (OpenEditRuntimeException) ex;
152: }
153: throw new OpenEditException(ex);
154: }
155: }
156:
157: public HitTracker search(String inQuery, String inOrdering,
158: Filter filter) throws OpenEditException {
159: try {
160: if (fieldBadSortField != null
161: && fieldBadSortField.equals(inOrdering)) {
162: inOrdering = null;
163: }
164: //Lucene has problems with searching for dashes or underscores
165: QueryParser parser = new QueryParser("description",
166: getAnalyzer());
167: parser.setDefaultOperator(QueryParser.AND_OPERATOR);
168: Query query1 = parser.parse(inQuery);
169:
170: Hits hits = null;
171: long start = System.currentTimeMillis();
172: if (inOrdering != null) {
173: Sort sort = null;
174: if (inOrdering.equals("random")) {
175: sort = new Sort();
176: //SortComparator custom = SampleComparable.getComparator();
177: SortComparator custom = getRandomComparator();
178: sort.setSort(new SortField("id", custom));
179: } else {
180: boolean direction = false;
181: if (inOrdering.endsWith("Down")) {
182: direction = true;
183: inOrdering = inOrdering.substring(0, inOrdering
184: .length() - 4);
185: } else if (inOrdering.endsWith("Up")) {
186: direction = false;
187: inOrdering = inOrdering.substring(0, inOrdering
188: .length() - 2);
189: }
190: sort = new Sort(inOrdering, direction);
191: }
192: try {
193: hits = getLiveSearcher().search(query1, filter,
194: sort);
195:
196: fieldBadSortField = null;
197: } catch (RuntimeException ex) {
198: if (ex.toString().contains(
199: "cannot determine sort type")
200: || ex.toString().contains(
201: " does not appear to be indexed")
202: || ex
203: .toString()
204: .contains(
205: "there are more terms than documents in field")) {
206: fieldBadSortField = inOrdering;
207: log.error(ex);
208: hits = getLiveSearcher().search(query1);
209: } else {
210: if (ex instanceof OpenEditRuntimeException) {
211: throw (OpenEditRuntimeException) ex;
212: }
213: throw new OpenEditException(ex);
214: }
215: }
216: } else {
217: hits = getLiveSearcher().search(query1, filter);
218: }
219: long end = System.currentTimeMillis() - start;
220: log.info("Searched for: " + query1 + " sort by "
221: + inOrdering + " [Found:" + hits.length()
222: + " total hits in " + (double) end / 1000D
223: + " seconds]");
224:
225: LuceneHitTracker tracker = new LuceneHitTracker(hits);
226: // tracker.setQuery(inQuery);
227: // tracker.setOrdering(inOrdering);
228: tracker.setIndexId(getIndexId());
229:
230: return tracker;
231: } catch (Exception ex) {
232: log.error(ex);
233: if (ex instanceof OpenEditRuntimeException) {
234: throw (OpenEditRuntimeException) ex;
235: }
236: throw new OpenEditException(ex);
237: }
238: }
239:
240: public File buildIndexDir(String inName) {
241: //TODO: Remove the extra search folder
242: File indexDir = new File(getSearchDirectory()
243: + "/search/index/" + inName);
244: if (!indexDir.exists()) {
245: indexDir.mkdirs();
246: }
247: return indexDir;
248: }
249:
250: protected void setLiveSearcher(IndexSearcher inSearch) {
251: if (fieldLiveSearcher != null) {
252: try {
253: fieldLiveSearcher.close();
254: } catch (IOException ex) {
255: throw new OpenEditRuntimeException(ex);
256: }
257: }
258: fieldLiveSearcher = inSearch;
259: }
260:
261: protected IndexSearcher getLiveSearcher() {
262: if (fieldLiveSearcher == null) {
263: synchronized (this ) {
264: if (fieldLiveSearcher == null) {
265: BooleanQuery.setMaxClauseCount(100000);
266: try {
267: File index = buildIndexDir("A");
268: //TODO: Look for any restore index that might be ready to be loaded
269: File restoreIndex = buildIndexDir("restore");
270: if (IndexReader.indexExists(restoreIndex)) {
271: log
272: .info("Trying to create LiveSearcher so restoring the index");
273: restoreIndex(restoreIndex);
274: } else {
275: if (!IndexReader.indexExists(index)) {
276: log.error("No valid index found in A");
277: reIndexAll();
278: }
279: }
280: if (fieldIndexWriter != null) {
281: try {
282: fieldIndexWriter.flush();
283: } catch (Exception e) {
284: throw new OpenEditRuntimeException(e);
285: }
286: }
287: FSDirectory dir = FSDirectory
288: .getDirectory(index.getAbsolutePath());
289: fieldLiveSearcher = new IndexSearcher(dir);
290: } catch (Exception ex) {
291: if (ex instanceof OpenEditRuntimeException) {
292: throw (OpenEditRuntimeException) ex;
293: }
294: throw new OpenEditRuntimeException(ex);
295: }
296: }
297: }
298: }
299: return fieldLiveSearcher;
300: }
301:
302: public void flush() {
303: if (fieldIndexWriter != null) {
304: try {
305: fieldIndexWriter.flush();
306: setLiveSearcher(null);
307: } catch (Exception e) {
308: throw new OpenEditRuntimeException(e);
309: }
310: }
311: }
312:
313: public void setAnalyzer(Analyzer inAnalyzer) {
314: fieldAnalyzer = inAnalyzer;
315: }
316:
317: public Analyzer getAnalyzer() {
318: if (fieldAnalyzer == null) {
319: CompositeAnalyzer composite = new CompositeAnalyzer();
320: RecordLookUpAnalyzer exact = new RecordLookUpAnalyzer();
321: composite.setAnalyzer("id", exact);
322: composite.setAnalyzer("description", new StemmerAnalyzer());
323: composite.setAnalyzer("name", exact);
324: fieldAnalyzer = composite;
325: }
326: return fieldAnalyzer;
327: }
328:
329: public File getSearchDirectory() {
330: return fieldSearchDirectory;
331: }
332:
333: public void setSearchDirectory(File inSearchDirectory) {
334: fieldSearchDirectory = inSearchDirectory;
335: }
336:
337: public String getIndexId() {
338: if (fieldLiveSearcher == null) {
339: return "-1";
340: }
341: return String.valueOf(getLiveSearcher().hashCode());
342: }
343:
344: protected File buildLiveIndexDir() {
345: return buildIndexDir("A");
346: }
347:
348: public void clearIndex() {
349: try {
350: if (fieldIndexWriter != null) {
351: // fieldIndexWriter.flush();
352: }
353: if (fieldLiveSearcher != null) {
354: //fieldLiveSearcher.close();
355: setLiveSearcher(null);
356: }
357: } catch (Exception ex) {
358: throw new OpenEditRuntimeException(ex);
359: }
360:
361: }
362:
363: public IndexWriter getIndexWriter() throws IOException {
364: if (fieldIndexWriter == null) {
365: File indexDir = buildIndexDir("A");
366: FSDirectory dir = FSDirectory.getDirectory(indexDir);
367: File lock = new File(indexDir, "write.lock");
368: lock.delete();
369:
370: fieldIndexWriter = new IndexWriter(dir, true, getAnalyzer());
371: }
372: return fieldIndexWriter;
373: }
374:
375: public void setIndexWriter(IndexWriter inIndexWriter) {
376: if (fieldIndexWriter != null) {
377: try {
378: fieldIndexWriter.close();
379: } catch (IOException ex) {
380: log.error(ex);
381: }
382: }
383: fieldIndexWriter = inIndexWriter;
384: }
385:
386: public synchronized boolean restoreIndex(File inNewIndexDir)
387: throws IOException {
388: fieldBadSortField = null;
389: log.info("Trying to restore index");
390: FileUtils utils = new FileUtils();
391:
392: File restoreIndex = buildIndexDir("restore");
393: utils.deleteAll(restoreIndex);
394: utils.copyFiles(inNewIndexDir, restoreIndex); //getting ready to load it into live index
395:
396: File existing = buildIndexDir("A");
397: File temp = new File(existing.getParentFile(), "temp");
398: utils.deleteAll(temp);
399: try {
400: if (fieldLiveSearcher != null) //The problem here is old search results and locking the directory on windows.
401: {
402: fieldLiveSearcher.close();
403: }
404: if (fieldIndexWriter != null) {
405: fieldIndexWriter.close();
406: }
407: utils.move(existing, temp);
408: } catch (IOException ex) {
409: log.info("Restoring index failed putting temp back to A "
410: + ex);
411: utils.move(temp, existing);
412: }
413: utils.deleteAll(temp); //Keep things clean
414:
415: File[] remaining = existing.listFiles();
416: if (remaining != null && remaining.length > 0) {
417: setLiveSearcher(null);
418: setIndexWriter(null);
419: log.info("Restoring index returning false with "
420: + remaining.length + " files ");
421: return false;
422: }
423: utils.move(restoreIndex, existing);
424: setLiveSearcher(null);
425: setIndexWriter(null);
426: log.info("Restoring index success");
427: utils.deleteAll(restoreIndex); //Just in case
428: return true;
429: }
430:
431: public SortComparator getRandomComparator() {
432: if (fieldRandomComparator == null) {
433: fieldRandomComparator = new RandomSortComparator();
434: }
435: return fieldRandomComparator;
436: }
437:
438: public void setRandomComparator(SortComparator inRandomComparator) {
439: fieldRandomComparator = inRandomComparator;
440: }
441:
442: public FieldArchive getFieldArchive() {
443: return fieldFieldArchive;
444: }
445:
446: public void setFieldArchive(FieldArchive inFieldArchive) {
447: fieldFieldArchive = inFieldArchive;
448: }
449:
450: }
|