001: package org.contineo.core.searchengine.search;
002:
003: import java.io.File;
004: import java.io.StringReader;
005: import java.util.ArrayList;
006: import java.util.Date;
007: import java.util.List;
008: import java.util.Locale;
009: import java.util.Set;
010:
011: import org.apache.commons.logging.Log;
012: import org.apache.commons.logging.LogFactory;
013: import org.apache.lucene.analysis.Analyzer;
014: import org.apache.lucene.analysis.TokenStream;
015: import org.apache.lucene.document.Document;
016: import org.apache.lucene.queryParser.MultiFieldQueryParser;
017: import org.apache.lucene.search.Hits;
018: import org.apache.lucene.search.IndexSearcher;
019: import org.apache.lucene.search.MultiSearcher;
020: import org.apache.lucene.search.Query;
021: import org.apache.lucene.search.Searcher;
022: import org.apache.lucene.search.highlight.Highlighter;
023: import org.apache.lucene.search.highlight.QueryScorer;
024: import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
025: import org.contineo.core.searchengine.crawler.LuceneAnalyzerFactory;
026: import org.contineo.core.searchengine.util.SquareSimilarity;
027: import org.contineo.core.security.dao.MenuDAO;
028: import org.contineo.util.Context;
029: import org.contineo.util.config.SettingsConfig;
030:
031: /**
032: * @author Michael Scholz
033: */
034: public class Search {
035: protected static Log log = LogFactory.getLog(Search.class);
036:
037: private int maxHits = 40;
038:
039: private boolean moreHitsPresent = false;
040:
041: private SearchOptions options;
042:
043: private String language;
044:
045: private List<Result> results = new ArrayList<Result>();
046:
047: private int estimatedHitsNumber = 0;
048:
049: private long execTime = 0;
050:
051: public Search(SearchOptions opt, String language) {
052: this .options = opt;
053: this .language = language;
054: }
055:
056: public List<Result> search() {
057: log.info("Launch search");
058: Date start = new Date();
059:
060: results.clear();
061: moreHitsPresent = false;
062: SettingsConfig conf = (SettingsConfig) Context.getInstance()
063: .getBean(SettingsConfig.class);
064:
065: try {
066: String[] languages = options.getLanguages();
067: if ((languages == null) || (languages.length == 0)) {
068: languages = new String[] { "de", "en", "fr", "es", "it" };
069: options.setLanguages(languages);
070: }
071:
072: Searcher[] searcher = new Searcher[languages.length];
073: String indexPath = conf.getValue("indexdir");
074:
075: if (!indexPath.endsWith(File.pathSeparator)) {
076: indexPath += "/";
077: }
078:
079: for (int i = 0; i < languages.length; i++) {
080: String lang = languages[i];
081: String dir = new Locale(lang).getDisplayLanguage(
082: Locale.ENGLISH).toLowerCase();
083: searcher[i] = new IndexSearcher(indexPath + dir + "/");
084: }
085:
086: MultiSearcher multiSearcher = new MultiSearcher(searcher);
087: Analyzer analyzer = LuceneAnalyzerFactory
088: .getAnalyzer(language);
089:
090: if (options.getFields() == null) {
091: String[] fields = new String[] { "content", "keywords" };
092: options.setFields(fields);
093: }
094:
095: multiSearcher.setSimilarity(new SquareSimilarity());
096:
097: MultiFieldQueryParser parser = new MultiFieldQueryParser(
098: options.getFields(), analyzer);
099: Query query = parser.parse(options.getQueryStr());
100:
101: log.info("Full-text search");
102: Hits hits = multiSearcher.search(query);
103: log.info("End of Full-text search");
104:
105: estimatedHitsNumber = hits.length();
106:
107: MenuDAO mdao = (MenuDAO) Context.getInstance().getBean(
108: MenuDAO.class);
109: log.info("DB search");
110: Set<Integer> accessibleMenues = mdao
111: .findMenuIdByUserName(options.getUsername());
112: log.info("End of DB search");
113:
114: int maxNumFragmentsRequired = 4;
115: String fragmentSeparator = " ... ";
116:
117: Highlighter highlighter = new Highlighter(
118: new SimpleHTMLFormatter(
119: "<font style='background-color:#FFFF00'>",
120: "</font>"), new QueryScorer(query));
121: String path = conf.getValue("indexdir");
122:
123: if (!path.endsWith(File.pathSeparator)) {
124: path += "/";
125: }
126:
127: for (int i = 0; i < hits.length(); i++) {
128: if (results.size() == maxHits) {
129: // The maximum number of hits was reached for a quick query
130: moreHitsPresent = true;
131: break;
132: }
133:
134: Document doc = hits.doc(i);
135: Integer menuId = new Integer(doc.get("menuId"));
136:
137: // When user can see document with menuId then put it into
138: // result-collection.
139: if (accessibleMenues.contains(menuId)) {
140: String size = doc.get("size");
141:
142: if (size.equals("0")) {
143: size = "1";
144: }
145:
146: String content = doc.get("content");
147:
148: TokenStream stream = analyzer.tokenStream(
149: "content", new StringReader(content));
150: String summary = highlighter.getBestFragments(
151: stream, content, maxNumFragmentsRequired,
152: fragmentSeparator);
153:
154: if ((summary == null) || summary.equals("")) {
155: summary = doc.get("summary");
156: }
157:
158: Result result = new Result();
159: result.setMenuId(Integer
160: .parseInt(doc.get("menuId")));
161: result.setName(doc.get("name"));
162: result.setSize(Integer.parseInt(size));
163:
164: // result.setDocid(new Integer(doc.get("docid")));
165: // result.setPath(doc.get("path"));
166: // result.setLength(new Integer(doc.get("length")));
167: // result.setDate(new
168: // Date(Long.parseLong(doc.get("date"))));
169:
170: result.setType(doc.get("type"));
171: result.setSummary(summary);
172: result.createScore(hits.score(i));
173:
174: if (result.isRelevant(options, doc.get("date"))) {
175: results.add(result);
176: }
177: }
178: }
179: } catch (Exception e) {
180: log.error(e.getMessage(), e);
181: }
182:
183: Date finish = new Date();
184: execTime = finish.getTime() - start.getTime();
185: log.info("Search finished in " + execTime + "ms");
186: return results;
187: }
188:
189: public List<Result> getResults() {
190: return results;
191: }
192:
193: public boolean isMoreHitsPresent() {
194: return moreHitsPresent;
195: }
196:
197: public void setMoreHitsPresent(boolean moreHitsPresent) {
198: this .moreHitsPresent = moreHitsPresent;
199: }
200:
201: public int getMaxHits() {
202: return maxHits;
203: }
204:
205: public void setMaxHits(int maxHits) {
206: this .maxHits = maxHits;
207: }
208:
209: public int getEstimatedHitsNumber() {
210: return estimatedHitsNumber;
211: }
212:
213: /**
214: * Query execution time in milliseconds
215: */
216: public long getExecTime() {
217: return execTime;
218: }
219:
220: public SearchOptions getOptions() {
221: return options;
222: }
223:
224: public void setOptions(SearchOptions options) {
225: this.options = options;
226: }
227: }
|