001: //==============================================================================
002: //=== Copyright (C) 2001-2007 Food and Agriculture Organization of the
003: //=== United Nations (FAO-UN), United Nations World Food Programme (WFP)
004: //=== and United Nations Environment Programme (UNEP)
005: //===
006: //=== This program is free software; you can redistribute it and/or modify
007: //=== it under the terms of the GNU General Public License as published by
008: //=== the Free Software Foundation; either version 2 of the License, or (at
009: //=== your option) any later version.
010: //===
011: //=== This program is distributed in the hope that it will be useful, but
012: //=== WITHOUT ANY WARRANTY; without even the implied warranty of
013: //=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: //=== General Public License for more details.
015: //===
016: //=== You should have received a copy of the GNU General Public License
017: //=== along with this program; if not, write to the Free Software
018: //=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
019: //===
020: //=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
021: //=== Rome - Italy. email: geonetwork@osgeo.org
022: //==============================================================================
023:
024: package org.fao.geonet.kernel.search;
025:
026: import java.io.IOException;
027: import java.util.Comparator;
028: import java.util.Enumeration;
029: import java.util.Hashtable;
030: import java.util.Iterator;
031: import java.util.Map;
032: import java.util.Set;
033: import java.util.TreeSet;
034: import jeeves.resources.dbms.Dbms;
035: import jeeves.server.ServiceConfig;
036: import jeeves.server.UserSession;
037: import jeeves.server.context.ServiceContext;
038: import jeeves.utils.Log;
039: import jeeves.utils.Util;
040: import jeeves.utils.Xml;
041: import org.apache.lucene.document.Document;
042: import org.apache.lucene.document.Field;
043: import org.apache.lucene.index.IndexReader;
044: import org.apache.lucene.index.Term;
045: import org.apache.lucene.search.BooleanClause;
046: import org.apache.lucene.search.BooleanQuery;
047: import org.apache.lucene.search.FuzzyQuery;
048: import org.apache.lucene.search.Hits;
049: import org.apache.lucene.search.IndexSearcher;
050: import org.apache.lucene.search.MatchAllDocsQuery;
051: import org.apache.lucene.search.PhraseQuery;
052: import org.apache.lucene.search.PrefixQuery;
053: import org.apache.lucene.search.Query;
054: import org.apache.lucene.search.RangeQuery;
055: import org.apache.lucene.search.Sort;
056: import org.apache.lucene.search.SortField;
057: import org.apache.lucene.search.TermQuery;
058: import org.apache.lucene.search.WildcardQuery;
059: import org.fao.geonet.GeonetContext;
060: import org.fao.geonet.constants.Edit;
061: import org.fao.geonet.constants.Geonet;
062: import org.fao.geonet.kernel.AccessManager;
063: import org.fao.geonet.kernel.search.LuceneUtils;
064: import org.fao.geonet.lib.Lib;
065: import org.jdom.Element;
066:
067: //==============================================================================
068: // search metadata locally using lucene
069: //--------------------------------------------------------------------------------
070:
071: public class LuceneSearcher extends MetaSearcher {
072: private SearchManager _sm;
073: private String _styleSheetName;
074:
075: private IndexReader _reader;
076: private IndexSearcher _searcher;
077: private Query _query;
078: private Hits _hits;
079: private Element _elSummary;
080:
081: private int _maxSummaryKeys;
082:
083: //--------------------------------------------------------------------------------
084: // constructor
085: public LuceneSearcher(SearchManager sm, String styleSheetName) {
086: _sm = sm;
087: _styleSheetName = styleSheetName;
088: }
089:
090: //--------------------------------------------------------------------------------
091: // MetaSearcher API
092:
093: public void search(ServiceContext srvContext, Element request,
094: ServiceConfig config) throws Exception {
095: computeQuery(srvContext, request, config);
096: performQuery(request);
097: initSearchRange(srvContext);
098: }
099:
100: //--------------------------------------------------------------------------------
101:
102: public Element present(ServiceContext srvContext, Element request,
103: ServiceConfig config) throws Exception {
104: if (!isValid())
105: performQuery(request);
106:
107: updateSearchRange(request);
108:
109: GeonetContext gc = (GeonetContext) srvContext
110: .getHandlerContext(Geonet.CONTEXT_NAME);
111:
112: String sFast = request.getChildText("fast");
113: boolean fast = sFast != null && sFast.equals("true");
114:
115: // srvContext.log("METASEARCHER " + _styleSheetName + " FROM: " + from + "(" + sFrom + ")"); // DEBUG
116: // srvContext.log("METASEARCHER " + _styleSheetName + " TO: " + to + "(" + sTo + ")"); // DEBUG
117:
118: // build response
119: Element response = new Element("response");
120: response.setAttribute("from", getFrom() + "");
121: response.setAttribute("to", getTo() + "");
122:
123: response.addContent((Element) _elSummary.clone());
124:
125: if (getTo() > 0) {
126: for (int i = getFrom() - 1; i < getTo(); i++) {
127: Document doc = _hits.doc(i);
128: String id = doc.get("_id");
129: Element md = new Element("md");
130:
131: if (fast) {
132: md = getMetadataFromIndex(doc, id);
133: } else {
134: md = gc.getDataManager().getMetadata(srvContext,
135: id, false);
136: }
137:
138: //--- the search result is buffered so a metadata could have been deleted
139: //--- just before showing search results
140:
141: if (md != null) {
142: // Calculate score and add it to info elem
143: Float score = _hits.score(i);
144: Element info = md.getChild(Edit.RootChild.INFO,
145: Edit.NAMESPACE);
146: addElement(info, Edit.Info.Elem.SCORE, score
147: .toString());
148:
149: response.addContent(md);
150: }
151: }
152: }
153: return response;
154: }
155:
156: //--------------------------------------------------------------------------------
157:
158: public int getSize() {
159: return _hits.length();
160: }
161:
162: //--------------------------------------------------------------------------------
163:
164: public Element getSummary() throws Exception {
165: Element response = new Element("response");
166: response.addContent((Element) _elSummary.clone());
167: return response;
168: }
169:
170: //--------------------------------------------------------------------------------
171: // RGFIX: check this
172:
173: public void close() {
174: try {
175: _reader.close();
176: } catch (IOException e) {
177: e.printStackTrace();
178: } // DEBUG
179: }
180:
181: //--------------------------------------------------------------------------------
182: // private setup, index, delete and search functions
183:
184: private void computeQuery(ServiceContext srvContext,
185: Element request, ServiceConfig config) throws Exception {
186: String sMaxSummaryKeys = request.getChildText("maxSummaryKeys");
187: if (sMaxSummaryKeys == null)
188: sMaxSummaryKeys = config.getValue("maxSummaryKeys", "10");
189: _maxSummaryKeys = Integer.parseInt(sMaxSummaryKeys);
190:
191: GeonetContext gc = (GeonetContext) srvContext
192: .getHandlerContext(Geonet.CONTEXT_NAME);
193: AccessManager am = gc.getAccessManager();
194:
195: Dbms dbms = (Dbms) srvContext.getResourceManager().open(
196: Geonet.Res.MAIN_DB);
197: Set<String> hs = gc.getAccessManager().getUserGroups(dbms,
198: srvContext.getUserSession(), srvContext.getIpAddress());
199:
200: for (String group : hs)
201: request.addContent(new Element("group").addContent(group));
202:
203: String owner = srvContext.getUserSession().getUserId();
204:
205: if (owner != null)
206: request.addContent(new Element("owner").addContent(owner));
207:
208: //--- in case of an admin we have to show all results
209:
210: UserSession us = srvContext.getUserSession();
211:
212: if (us.isAuthenticated()) {
213: if (us.getProfile().equals(Geonet.Profile.ADMINISTRATOR))
214: request.addContent(new Element("isAdmin")
215: .addContent("true"));
216:
217: else if (us.getProfile().equals(Geonet.Profile.REVIEWER))
218: request.addContent(new Element("isReviewer")
219: .addContent("true"));
220: }
221:
222: //--- some other stuff
223:
224: Log.debug(Geonet.SEARCH_ENGINE, "CRITERIA:\n"
225: + Xml.getString(request));
226: request.addContent(Lib.db.select(dbms, "Regions", "region"));
227:
228: Element xmlQuery = _sm.transform(_styleSheetName, request);
229: Log.debug(Geonet.SEARCH_ENGINE, "XML QUERY:\n"
230: + Xml.getString(xmlQuery));
231:
232: _query = makeQuery(xmlQuery);
233: }
234:
235: //--------------------------------------------------------------------------------
236: // perform the query
237:
238: private void performQuery(Element request) throws Exception {
239: String sortBy = Util.getParam(request,
240: Geonet.SearchResult.SORT_BY,
241: Geonet.SearchResult.SortBy.RELEVANCE);
242:
243: Log.debug(Geonet.SEARCH_ENGINE, "Sorting by : " + sortBy);
244:
245: Sort sort = null;
246:
247: if (sortBy.equals(Geonet.SearchResult.SortBy.DATE))
248: sort = new Sort(
249: new SortField[] {
250: new SortField("_changeDate",
251: SortField.STRING, true),
252: SortField.FIELD_SCORE });
253:
254: else if (sortBy.equals(Geonet.SearchResult.SortBy.POPULARITY))
255: sort = new Sort(new SortField[] {
256: new SortField("_popularity", SortField.INT, true),
257: SortField.FIELD_SCORE });
258:
259: else if (sortBy.equals(Geonet.SearchResult.SortBy.RATING))
260: sort = new Sort(new SortField[] {
261: new SortField("_rating", SortField.INT, true),
262: SortField.FIELD_SCORE });
263:
264: _reader = IndexReader.open(_sm.getLuceneDir());
265: _searcher = new IndexSearcher(_reader);
266: _hits = _searcher.search(_query, sort);
267:
268: Log.debug(Geonet.SEARCH_ENGINE, "Hits found : "
269: + _hits.length());
270:
271: makeSummary();
272:
273: setValid(true);
274: }
275:
276: //--------------------------------------------------------------------------------
277: // makes a new lucene query
278: // converts to lowercase if needed as the StandardAnalyzer
279:
280: public static Query makeQuery(Element xmlQuery) throws Exception {
281: String name = xmlQuery.getName();
282: Query returnValue = null;
283:
284: if (name.equals("TermQuery")) {
285: String fld = xmlQuery.getAttributeValue("fld");
286: String txt = xmlQuery.getAttributeValue("txt")
287: .toLowerCase();
288: returnValue = new TermQuery(new Term(fld, txt));
289: } else if (name.equals("FuzzyQuery")) {
290: String fld = xmlQuery.getAttributeValue("fld");
291: Float sim = Float
292: .valueOf(xmlQuery.getAttributeValue("sim"));
293: String txt = xmlQuery.getAttributeValue("txt")
294: .toLowerCase();
295: returnValue = new FuzzyQuery(new Term(fld, txt), sim
296: .floatValue());
297: } else if (name.equals("PrefixQuery")) {
298: String fld = xmlQuery.getAttributeValue("fld");
299: String txt = xmlQuery.getAttributeValue("txt")
300: .toLowerCase();
301: returnValue = new PrefixQuery(new Term(fld, txt));
302: } else if (name.equals("MatchAllDocsQuery")) {
303: MatchAllDocsQuery query = new MatchAllDocsQuery();
304: return query;
305: } else if (name.equals("WildcardQuery")) {
306: String fld = xmlQuery.getAttributeValue("fld");
307: String txt = xmlQuery.getAttributeValue("txt")
308: .toLowerCase();
309: returnValue = new WildcardQuery(new Term(fld, txt));
310: } else if (name.equals("PhraseQuery")) {
311: PhraseQuery query = new PhraseQuery();
312: for (Iterator i = xmlQuery.getChildren().iterator(); i
313: .hasNext();) {
314: Element xmlTerm = (Element) i.next();
315: String fld = xmlTerm.getAttributeValue("fld");
316: String txt = xmlTerm.getAttributeValue("txt")
317: .toLowerCase();
318: query.add(new Term(fld, txt));
319: }
320: returnValue = query;
321: } else if (name.equals("RangeQuery")) {
322: String fld = xmlQuery.getAttributeValue("fld");
323: String lowerTxt = xmlQuery.getAttributeValue("lowerTxt");
324: String upperTxt = xmlQuery.getAttributeValue("upperTxt");
325: String sInclusive = xmlQuery.getAttributeValue("inclusive");
326: boolean inclusive = "true".equals(sInclusive);
327:
328: Term lowerTerm = (lowerTxt == null ? null : new Term(fld,
329: lowerTxt.toLowerCase()));
330: Term upperTerm = (upperTxt == null ? null : new Term(fld,
331: upperTxt.toLowerCase()));
332:
333: returnValue = new RangeQuery(lowerTerm, upperTerm,
334: inclusive);
335: } else if (name.equals("BooleanQuery")) {
336: BooleanQuery query = new BooleanQuery();
337: for (Iterator iter = xmlQuery.getChildren().iterator(); iter
338: .hasNext();) {
339: Element xmlBooleanClause = (Element) iter.next();
340: String sRequired = xmlBooleanClause
341: .getAttributeValue("required");
342: String sProhibited = xmlBooleanClause
343: .getAttributeValue("prohibited");
344: boolean required = sRequired != null
345: && sRequired.equals("true");
346: boolean prohibited = sProhibited != null
347: && sProhibited.equals("true");
348: BooleanClause.Occur occur = LuceneUtils
349: .convertRequiredAndProhibitedToOccur(required,
350: prohibited);
351: Element xmlSubQuery = (Element) xmlBooleanClause
352: .getChildren().get(0);
353: query.add(makeQuery(xmlSubQuery), occur);
354: }
355: query.setMaxClauseCount(16384); // FIXME: quick fix; using Filters should be better
356:
357: returnValue = query;
358: } else
359: throw new Exception("unknown lucene query type: " + name);
360:
361: Log.debug(Geonet.SEARCH_ENGINE, "Lucene Query: "
362: + returnValue.toString());
363: return returnValue;
364: }
365:
366: //--------------------------------------------------------------------------------
367:
368: private void makeSummary() throws Exception {
369: _elSummary = new Element("summary");
370:
371: int count = getSize();
372:
373: _elSummary.setAttribute("count", count + "");
374: _elSummary.setAttribute("type", "local");
375:
376: // count keyword frequencies
377: Element elKeywords = new Element("keywords");
378: Hashtable htKeywords = new Hashtable();
379: for (int i = 0; i < count; i++) {
380: Document doc = _hits.doc(i);
381: String keywords[] = doc.getValues("keyword");
382: if (keywords != null) // if there are no keywords lucene returns null instead of an empty array
383: for (int j = 0; j < keywords.length; j++) {
384: String keyword = keywords[j];
385: Integer keyCount = (Integer) htKeywords
386: .get(keyword);
387: if (keyCount == null)
388: keyCount = new Integer(1);
389: else
390: keyCount = new Integer(keyCount.intValue() + 1);
391: htKeywords.put(keyword, keyCount);
392: }
393: }
394: // sort keywords according to frequency
395: TreeSet setKeywords = new TreeSet(new Comparator() {
396: public int compare(Object p1, Object p2) {
397: Map.Entry me1 = (Map.Entry) p1;
398: Map.Entry me2 = (Map.Entry) p2;
399: String key1 = (String) me1.getKey();
400: String key2 = (String) me2.getKey();
401: Integer count1 = (Integer) me1.getValue();
402: Integer count2 = (Integer) me2.getValue();
403: int cmp = count2.compareTo(count1);
404: if (cmp != 0)
405: return cmp;
406: else
407: return key1.compareTo(key2);
408: }
409: });
410: setKeywords.addAll(htKeywords.entrySet());
411:
412: int nKeys = 0;
413: for (Iterator iter = setKeywords.iterator(); iter.hasNext();) {
414: if (++nKeys > _maxSummaryKeys)
415: break;
416:
417: Map.Entry me = (Map.Entry) iter.next();
418: String keyword = (String) me.getKey();
419: Integer keyCount = (Integer) me.getValue();
420:
421: Element elKeyword = new Element("keyword");
422: elKeyword.setAttribute("count", keyCount.toString());
423: elKeyword.setAttribute("name", keyword);
424: elKeywords.addContent(elKeyword);
425: }
426: _elSummary.addContent(elKeywords);
427:
428: // count categories frequencies
429: Element elCategories = new Element("categories");
430: Hashtable htCategories = new Hashtable();
431: for (int i = 0; i < count; i++) {
432: Document doc = _hits.doc(i);
433: String categories[] = doc.getValues("_cat");
434: if (categories != null) // if there are no categories lucene returns null instead of an empty array
435: for (int j = 0; j < categories.length; j++) {
436: String category = categories[j];
437: Integer catCount = (Integer) htCategories
438: .get(category);
439: if (catCount == null)
440: catCount = new Integer(1);
441: else
442: catCount = new Integer(catCount.intValue() + 1);
443: htCategories.put(category, catCount);
444: }
445: }
446: // sort categories according to name
447: TreeSet setCategories = new TreeSet(new Comparator() {
448: public int compare(Object p1, Object p2) {
449: Map.Entry me1 = (Map.Entry) p1;
450: Map.Entry me2 = (Map.Entry) p2;
451: String cat1 = (String) me1.getKey();
452: String cat2 = (String) me2.getKey();
453: return cat1.compareTo(cat2);
454: }
455: });
456: setCategories.addAll(htCategories.entrySet());
457:
458: for (Iterator iter = setCategories.iterator(); iter.hasNext();) {
459: Map.Entry me = (Map.Entry) iter.next();
460: String category = (String) me.getKey();
461: Integer catCount = (Integer) me.getValue();
462:
463: Element elCategory = new Element("category");
464: elCategory.setAttribute("count", catCount.toString());
465: elCategory.setAttribute("name", category);
466: elCategories.addContent(elCategory);
467: }
468: _elSummary.addContent(elCategories);
469:
470: // count sources frequencies
471: Element elSources = new Element("sources");
472: Hashtable htSources = new Hashtable();
473: for (int i = 0; i < count; i++) {
474: Document doc = _hits.doc(i);
475: String source = doc.get("_source");
476: Integer sourceCount = (Integer) htSources.get(source);
477: if (sourceCount == null)
478: sourceCount = new Integer(1);
479: else
480: sourceCount = new Integer(sourceCount.intValue() + 1);
481: htSources.put(source, sourceCount);
482: }
483: // sort sources according to frequency
484: TreeSet setSources = new TreeSet(new Comparator() {
485: public int compare(Object p1, Object p2) {
486: Map.Entry me1 = (Map.Entry) p1;
487: Map.Entry me2 = (Map.Entry) p2;
488: String key1 = (String) me1.getKey();
489: String key2 = (String) me2.getKey();
490: Integer count1 = (Integer) me1.getValue();
491: Integer count2 = (Integer) me2.getValue();
492: int cmp = count2.compareTo(count1);
493: if (cmp != 0)
494: return cmp;
495: else
496: return key1.compareTo(key2);
497: }
498: });
499: setSources.addAll(htSources.entrySet());
500:
501: for (Iterator iter = setSources.iterator(); iter.hasNext();) {
502: Map.Entry me = (Map.Entry) iter.next();
503: String source = (String) me.getKey();
504: Integer keyCount = (Integer) me.getValue();
505:
506: Element elSource = new Element("source");
507: elSource.setAttribute("count", keyCount.toString());
508: elSource.setAttribute("name", source);
509: elSources.addContent(elSource);
510: }
511: _elSummary.addContent(elSources);
512: }
513:
514: //--------------------------------------------------------------------------------
515:
516: private static Element getMetadataFromIndex(Document doc, String id) {
517: String root = doc.get("_root");
518: String schema = doc.get("_schema");
519: String createDate = doc.get("_createDate").toUpperCase();
520: String changeDate = doc.get("_changeDate").toUpperCase();
521: String source = doc.get("_source");
522: String uuid = doc.get("_uuid");
523:
524: Element md = new Element(root);
525:
526: Element info = new Element(Edit.RootChild.INFO, Edit.NAMESPACE);
527:
528: addElement(info, Edit.Info.Elem.ID, id);
529: addElement(info, Edit.Info.Elem.UUID, uuid);
530: addElement(info, Edit.Info.Elem.SCHEMA, schema);
531: addElement(info, Edit.Info.Elem.CREATE_DATE, createDate);
532: addElement(info, Edit.Info.Elem.CHANGE_DATE, changeDate);
533: addElement(info, Edit.Info.Elem.SOURCE, source);
534:
535: for (Enumeration enu = doc.fields(); enu.hasMoreElements();) {
536: Field field = (Field) enu.nextElement();
537: String name = field.name();
538: String value = field.stringValue();
539:
540: if (name.equals("_cat"))
541: addElement(info, Edit.Info.Elem.CATEGORY, value);
542: }
543: md.addContent(info);
544: return md;
545: }
546: }
547:
548: //==============================================================================
|