001: /*
002: * $Header$
003: * $Revision: 7957 $
004: * $Date: 2007-08-23 04:22:35 -0700 $
005: *
006: * ====================================================================
007: *
008: * Copyright 1999-2004 The Apache Software Foundation
009: *
010: * Licensed under the Apache License, Version 2.0 (the "License");
011: * you may not use this file except in compliance with the License.
012: * You may obtain a copy of the License at
013: *
014: * http://www.apache.org/licenses/LICENSE-2.0
015: *
016: * Unless required by applicable law or agreed to in writing, software
017: * distributed under the License is distributed on an "AS IS" BASIS,
018: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
019: * See the License for the specific language governing permissions and
020: * limitations under the License.
021: *
022: */
023: package org.apache.slide.index.lucene.expressions;
024:
025: import java.io.IOException;
026: import java.util.Iterator;
027: import java.util.List;
028: import java.util.StringTokenizer;
029:
030: import nl.hippo.slide.index.LuceneIndexerStore;
031:
032: import org.apache.lucene.document.Document;
033: import org.apache.lucene.index.Term;
034: import org.apache.lucene.search.BooleanClause;
035: import org.apache.lucene.search.BooleanQuery;
036: import org.apache.lucene.search.Hits;
037: import org.apache.lucene.search.IndexSearcher;
038: import org.apache.lucene.search.Query;
039: import org.apache.lucene.search.RangeQuery;
040: import org.apache.lucene.search.Sort;
041: import org.apache.lucene.search.TermQuery;
042: import org.apache.slide.common.SlideException;
043: import org.apache.slide.common.SlideTokenWrapper;
044: import org.apache.slide.common.Uri;
045: import org.apache.slide.common.UriPath;
046: import org.apache.slide.content.NodeProperty;
047: import org.apache.slide.index.lucene.Index;
048: import org.apache.slide.index.lucene.LuceneExpressionFactory;
049: import org.apache.slide.index.lucene.LuceneOrderBy;
050: import org.apache.slide.search.BadQueryException;
051: import org.apache.slide.search.InvalidQueryException;
052: import org.apache.slide.search.InvalidScopeException;
053: import org.apache.slide.search.QueryScope;
054: import org.apache.slide.search.SearchException;
055: import org.apache.slide.search.basic.AugmentableComparableResource;
056: import org.apache.slide.search.basic.IBasicExpression;
057: import org.apache.slide.search.basic.IBasicExpressionFactory;
058: import org.apache.slide.search.basic.IBasicQuery;
059: import org.apache.slide.search.basic.IBasicResultSet;
060: import org.apache.slide.search.basic.LuceneBasicQueryScope;
061: import org.apache.slide.search.basic.LuceneBasicResultSetImpl;
062: import org.apache.slide.search.basic.OrderBy;
063: import org.apache.slide.security.AccessDeniedException;
064: import org.apache.slide.store.AbstractStore;
065: import org.apache.slide.store.IndexStore;
066: import org.apache.slide.structure.ObjectNode;
067: import org.apache.slide.structure.SubjectNode;
068: import org.jdom.Element;
069:
070: /**
071: *
072: */
073: public abstract class AbstractLuceneExpression implements
074: IBasicExpression {
075:
076: protected LuceneExpressionFactory factory;
077: protected Index index;
078: private IBasicResultSet resultSet = null;
079:
080: private Query query;
081:
082: protected int offset = 0;
083: protected int maxresults = 1000;
084:
085: public AbstractLuceneExpression(Index index) {
086: this .index = index;
087: }
088:
089: protected final void setQuery(Query query) {
090: this .query = query;
091: }
092:
093: protected final Query getQuery() {
094: return this .query;
095: }
096:
097: public IBasicExpressionFactory getFactory() {
098: return factory;
099: }
100:
101: public void setFactory(IBasicExpressionFactory factory) {
102: this .factory = (LuceneExpressionFactory) factory;
103: }
104:
105: public void setOffset(int offset) {
106: if (offset >= 0)
107: this .offset = offset;
108: }
109:
110: public int getOffset() {
111: return offset;
112: }
113:
114: public void setMaxresults(int maxresults) {
115: // -1 = nolimit
116: if ((maxresults == -1) || (maxresults > 0)) {
117: this .maxresults = maxresults;
118: }
119: }
120:
121: public IBasicResultSet execute() throws SearchException {
122: return this .execute(null, -1);
123: }
124:
125: public IBasicResultSet execute(OrderBy orderBy)
126: throws SearchException {
127: return this .execute(orderBy, -1);
128: }
129:
130: protected Query getScopeQuery(QueryScope s) throws SearchException {
131:
132: IBasicQuery q = factory.getQuery();
133:
134: if (index.getLogger().isDebugEnabled()) {
135: index.getLogger().debug(
136: "scope.href = " + s.getHref() + " scope: "
137: + s.hashCode());
138: }
139: String scope = q.getSearchToken().getSlideContext()
140: .getSlidePath(s.getHref());
141:
142: if (scope.endsWith("/") && scope.length() > 1) {
143: scope = scope.substring(0, scope.length() - 1);
144: }
145:
146: // add a scope restriction, this allows negated queries too
147: BooleanQuery booleanQuery = new BooleanQuery();
148:
149: booleanQuery.add(new TermQuery(new Term(Index.SCOPE_FIELD_NAME,
150: scope)), BooleanClause.Occur.MUST);
151:
152: int queryScopeHrefDepth = getDepth(scope);
153:
154: boolean hasMinDepth = false;
155: int minDepth = 0;
156: if (s instanceof LuceneBasicQueryScope) {
157: minDepth = ((LuceneBasicQueryScope) s).getMinDepth();
158: if (minDepth > 0) {
159: hasMinDepth = true;
160: } else {
161: hasMinDepth = false;
162: minDepth = 0;
163: }
164: }
165:
166: // add depth restriction
167: switch (s.getDepth()) {
168: case QueryScope.DEPTH_INFINITY:
169: if (hasMinDepth) {
170: booleanQuery
171: .add(
172: new RangeQuery(
173: new Term(
174: Index.DEPTH_FIELD_NAME,
175: index
176: .getConfiguration()
177: .intToIndexString(
178: queryScopeHrefDepth
179: + minDepth)),
180: null /*new Term(Index.DEPTH_FIELD_NAME,
181: index.getConfiguration().intToIndexString(queryScopeHrefDepth+100))*/,
182: true), BooleanClause.Occur.MUST); //require
183: }
184: break;
185: case QueryScope.DEPTH_0:
186: booleanQuery.add(new TermQuery(new Term(
187: Index.DEPTH_FIELD_NAME, index.getConfiguration()
188: .intToIndexString(
189: queryScopeHrefDepth + minDepth))),
190: BooleanClause.Occur.MUST);
191: break;
192: case QueryScope.DEPTH_1:
193: default:
194: booleanQuery.add(
195: new RangeQuery(new Term(Index.DEPTH_FIELD_NAME,
196: index.getConfiguration().intToIndexString(
197: queryScopeHrefDepth + minDepth)),
198: new Term(Index.DEPTH_FIELD_NAME, index
199: .getConfiguration()
200: .intToIndexString(
201: queryScopeHrefDepth
202: + s.getDepth())),
203: true), BooleanClause.Occur.MUST); //require
204: }
205:
206: // add excluded scopes
207:
208: Iterator excludes = s.getExcludedScopes().iterator();
209: while (excludes.hasNext()) {
210: String exscope = (String) excludes.next();
211: exscope = q.getSearchToken().getSlideContext()
212: .getSlidePath(exscope);
213:
214: // remove trailing slashes
215: if (exscope.endsWith("/") && exscope.length() > 1) {
216: exscope = exscope.substring(0, exscope.length() - 1);
217: }
218:
219: booleanQuery.add(new TermQuery(new Term(
220: Index.SCOPE_FIELD_NAME, exscope)),
221: BooleanClause.Occur.MUST_NOT); // exclude!
222: }
223:
224: return booleanQuery;
225: }
226:
227: public Query getExecutableQuery() throws SearchException {
228: Query luceneQuery = this .getQuery();
229:
230: IBasicQuery q = factory.getQuery();
231:
232: // add a scope restriction, this allows negated queries too
233: BooleanQuery booleanQuery = new BooleanQuery();
234:
235: if (luceneQuery != null)
236: booleanQuery.add(luceneQuery, BooleanClause.Occur.MUST);
237:
238: BooleanQuery scopes = new BooleanQuery();
239: // or the scopes
240: for (int i = 0; i < q.getScopes().length; i++) {
241: scopes.add(getScopeQuery(q.getScopes()[i]),
242: BooleanClause.Occur.SHOULD);
243: }
244: booleanQuery.add(scopes, BooleanClause.Occur.MUST); // require scopes
245:
246: return booleanQuery;
247: }
248:
249: public Index getIndex() {
250: return index;
251: }
252:
253: public IBasicResultSet execute(OrderBy orderBy, int limit)
254: throws SearchException {
255:
256: if (index.getLogger().isDebugEnabled()) {
257: index.getLogger().debug("Max results " + maxresults);
258: index.getLogger().debug("Limiting search to " + limit);
259: index.getLogger().debug("This is " + this );
260: index.getLogger().debug("Location: ", new Throwable());
261: }
262:
263: if (this .resultSet != null) {
264: if (index.getLogger().isDebugEnabled()) {
265: index.getLogger().debug(
266: "Returning previous result set!");
267: }
268: return this .resultSet;
269: }
270:
271: Query luceneQuery = getExecutableQuery();
272:
273: Sort sorter = null;
274: if (orderBy != null && orderBy instanceof LuceneOrderBy) {
275: sorter = ((LuceneOrderBy) orderBy).getSorter();
276:
277: if (index.getLogger().isDebugEnabled()) {
278: index.getLogger().debug(
279: " Gotten a sorter! from LuceneOrderBy "
280: + sorter);
281: }
282: }
283:
284: IndexSearcher searcher = null;
285: try {
286: if (index.getLogger().isDebugEnabled()) {
287: index.getLogger().debug(
288: "start query execution: "
289: + luceneQuery.toString());
290: }
291: long start = System.currentTimeMillis();
292: searcher = this .index.getSearcher();
293: Hits hits = null;
294: if (index.getLogger().isDebugEnabled()) {
295: index.getLogger().debug(
296: " Lucene query is " + luceneQuery.toString());
297: }
298: if (sorter != null) {
299: hits = searcher.search(luceneQuery, sorter);
300: if (index.getLogger().isDebugEnabled()) {
301: index.getLogger().debug(
302: " Query uses sorter! " + sorter);
303: }
304: } else {
305: hits = searcher.search(luceneQuery);
306: }
307: if (index.getLogger().isDebugEnabled()) {
308: index.getLogger().debug(
309: "finished: " + hits.length() + " hits ("
310: + (System.currentTimeMillis() - start)
311: + "ms)");
312: }
313:
314: IBasicResultSet result = new LuceneBasicResultSetImpl(false);
315:
316: // Limit resultset to maxresult docs
317: if (((limit == -1) || (limit > maxresults))
318: && (hits.length() > maxresults)) {
319: limit = maxresults;
320: index
321: .getLogger()
322: .warn(
323: hits.length()
324: + " documunts found. Resultset truncated to "
325: + maxresults + " documents!");
326: }
327: int counter = 0;
328:
329: // DAV standard says DAV:score should be an integer from 0 to 1000
330: double factor = 1000.0;
331:
332: // normalize score relative to the first one (first one always 100% match?)
333: if (hits.length() >= 1)
334: factor = factor / hits.score(0);
335:
336: //long slidetook = 0;
337: int aclMiss = 0;
338: for (int i = offset, l = hits.length(); (i < l)
339: && (limit == -1 || counter < limit); i++) {
340: Document doc = hits.doc(i);
341: String uri = doc.get(Index.URI_FIELD_NAME);
342: AugmentableComparableResource resource = createResource(uri);
343: //slidetook+=(System.currentTimeMillis()-getfromslide);
344: if (resource != null) {
345: // add meta information as properties, like score and hit position?
346: resource.setExtraProperty(
347: NodeProperty.NamespaceCache.DEFAULT_URI,
348: "score", new Integer((int) Math.floor(hits
349: .score(i)
350: * factor)));
351: resource.setExtraProperty(
352: NodeProperty.NamespaceCache.SLIDE_URI,
353: "hitPosition", new Integer(i));
354: resource.setExtraProperty(
355: NodeProperty.NamespaceCache.SLIDE_URI,
356: "nrHits", new Integer(l));
357:
358: result.add(resource);
359: counter++;
360: } else {
361: aclMiss++;
362: }
363:
364: }
365: if (index.getLogger().isDebugEnabled()) {
366: index.getLogger()
367: .debug("Added " + counter + " results");
368: index.getLogger().debug(
369: "Missed ACL authenticated : " + aclMiss);
370: }
371:
372: this .resultSet = result;
373: return result;
374: } catch (InvalidScopeException e) {
375: throw e;
376: } catch (SearchException e) {
377: throw e;
378: } catch (IOException e) {
379: throw new SearchException(e);
380: } finally {
381: if (searcher != null) {
382: try {
383: this .index.releaseSearcher(searcher);
384: //searcher.close();
385: searcher = null;
386: } catch (IOException e1) {
387: // ignore
388: }
389: }
390: }
391: }
392:
393: private int getDepth(String path) {
394: StringTokenizer tokenizer = new StringTokenizer(path, "/");
395: return tokenizer.countTokens();
396: }
397:
398: protected Query negateQuery(Query query) {
399: BooleanQuery booleanQuery = new BooleanQuery();
400: booleanQuery.add(allQuery(), BooleanClause.Occur.MUST);
401: booleanQuery.add(query, BooleanClause.Occur.MUST_NOT);
402: return booleanQuery;
403: }
404:
405: protected Query allQuery() {
406: return new TermQuery(new Term(Index.SCOPE_FIELD_NAME, "/"));
407: }
408:
409: protected AugmentableComparableResource createResource(String uri)
410: throws SearchException {
411: ObjectNode node = new SubjectNode(uri); // this will return the root
412: // folder
413: AugmentableComparableResource resource = null;
414: IBasicQuery query = factory.getQuery();
415: SlideTokenWrapper token = new SlideTokenWrapper(query
416: .getSearchToken().getSlideToken());
417: token.setForceLock(false);
418:
419: try {
420: //resource = new ComparableResourceImpl(node, query.getSearchToken(),
421: // query.getScope(), factory.getPropertyProvider());
422:
423: resource = new AugmentableComparableResource(node, token,
424: query.getSearchToken().getContentHelper(), query
425: .getScope(), factory.getPropertyProvider());
426:
427: } catch (AccessDeniedException e) {
428: // ignore, just not visible
429: resource = null;
430: } catch (SlideException e) {
431: // log and skip
432: if (index.getLogger().isErrorEnabled()) {
433: index.getLogger().error(
434: "Error getting resource " + uri + ", skipping",
435: e);
436: }
437: try {
438: // notify the indexer store to remove this uri
439: Uri toberemoved = query.getSearchToken().getNamespace()
440: .getUri(new UriPath(uri).parent().toString());
441:
442: IndexStore is = ((AbstractStore) toberemoved.getStore())
443: .getContentIndexer();
444:
445: if (is instanceof LuceneIndexerStore) {
446: LuceneIndexerStore lis = (LuceneIndexerStore) is;
447: if (lis != null) {
448: lis.dropUri(uri);
449: }
450: } else {
451: is = ((AbstractStore) toberemoved.getStore())
452: .getPropertiesIndexer();
453: if (is instanceof LuceneIndexerStore) {
454: LuceneIndexerStore lis = (LuceneIndexerStore) is;
455: if (lis != null) {
456: lis.dropUri(uri);
457: }
458: }
459: }
460: } catch (Exception e2) {
461: if (index.getLogger().isErrorEnabled()) {
462: index.getLogger().error(
463: "Error removing resource " + uri
464: + " from the index", e2);
465: }
466: }
467: resource = null;
468: }
469: return resource;
470: }
471:
472: protected static Element getFirstElement(Element node) {
473: List children = node.getChildren();
474:
475: for (int i = 0; i < children.size(); i++) {
476: if (children.get(i) instanceof Element) {
477: return (Element) children.get(i);
478: }
479: }
480: return null;
481: }
482:
483: /**
484: * Returns the first <code>D:prop</code> element.
485: * @param operator
486: * @return Element
487: * @throws BadQueryException if element not found
488: */
489: public static Element getPropertyElement(Element operator)
490: throws BadQueryException {
491: Element prop = operator.getChild("prop",
492: NodeProperty.NamespaceCache.DEFAULT_NAMESPACE);
493: if (prop == null)
494: throw new InvalidQueryException("Missing prop element");
495:
496: prop = getFirstElement(prop);
497: if (prop == null)
498: throw new InvalidQueryException("Empty prop element given");
499: return prop;
500: }
501:
502: /**
503: * Retruns the first <code>D:literal</code> element.
504: * @param operator
505: * @return
506: * @throws BadQueryException if element not found
507: */
508: protected Element getLiteralElement(Element operator)
509: throws BadQueryException {
510: Element literal = operator.getChild("literal",
511: NodeProperty.NamespaceCache.DEFAULT_NAMESPACE);
512: if (literal == null)
513: throw new InvalidQueryException("Missing literal element");
514: return literal;
515: }
516:
517: protected Element getLiteral2Element(Element operator)
518: throws BadQueryException {
519: List children = operator.getChildren("literal",
520: NodeProperty.NamespaceCache.DEFAULT_NAMESPACE);
521: if (children.size() > 1) {
522: return (Element) children.get(1);
523: } else {
524: throw new InvalidQueryException(
525: "Missing second literal element");
526: }
527: }
528: }
|