001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.request;
017:
018: import static org.apache.solr.request.SolrParams.FACET;
019: import static org.apache.solr.request.SolrParams.FQ;
020: import static org.apache.solr.request.SolrParams.Q;
021:
022: import java.net.MalformedURLException;
023: import java.net.URL;
024: import java.util.ArrayList;
025: import java.util.List;
026: import java.util.Map;
027:
028: import org.apache.lucene.queryParser.QueryParser;
029: import org.apache.lucene.search.BooleanClause;
030: import org.apache.lucene.search.BooleanQuery;
031: import org.apache.lucene.search.MatchAllDocsQuery;
032: import org.apache.lucene.search.Query;
033: import org.apache.lucene.search.BooleanClause.Occur;
034: import org.apache.solr.core.SolrCore;
035: import org.apache.solr.core.SolrException;
036: import org.apache.solr.handler.RequestHandlerBase;
037: import org.apache.solr.schema.IndexSchema;
038: import org.apache.solr.search.DocListAndSet;
039: import org.apache.solr.search.DocSet;
040: import org.apache.solr.search.QueryParsing;
041: import org.apache.solr.search.SolrIndexSearcher;
042: import org.apache.solr.search.SolrQueryParser;
043: import org.apache.solr.util.DisMaxParams;
044: import org.apache.solr.util.HighlightingUtils;
045: import org.apache.solr.util.NamedList;
046: import org.apache.solr.util.SolrPluginUtils;
047:
048: /**
049: * <p>
050: * A Generic query plugin designed to be given a simple query expression
051: * from a user, which it will then query against a variety of
052: * pre-configured fields, in a variety of ways, using BooleanQueries,
053: * DisjunctionMaxQueries, and PhraseQueries.
054: * </p>
055: *
056: * <p>
057: * All of the following options may be configured for this plugin
058: * in the solrconfig as defaults, and may be overriden as request parameters
059: * </p>
060: *
061: * <ul>
062: * <li>q.alt - An alternate query to be used in cases where the main
063: * query (q) is not specified (or blank). This query should
064: * be expressed in the Standard SolrQueryParser syntax (you
065: * can use <code>q.alt=*:*</code> to denote that all documents
066: * should be returned when no query is specified)
067: * </li>
068: * <li>tie - (Tie breaker) float value to use as tiebreaker in
069: * DisjunctionMaxQueries (should be something much less than 1)
070: * </li>
071: * <li> qf - (Query Fields) fields and boosts to use when building
072: * DisjunctionMaxQueries from the users query. Format is:
073: * "<code>fieldA^1.0 fieldB^2.2</code>".
074: * This param can be specified multiple times, and the fields
075: * are additive.
076: * </li>
077: * <li> mm - (Minimum Match) this supports a wide variety of
078: * complex expressions.
079: * read {@link SolrPluginUtils#setMinShouldMatch SolrPluginUtils.setMinShouldMatch} and <a href="http://lucene.apache.org/solr/api/org/apache/solr/util/doc-files/min-should-match.html">mm expression format</a> for details.
080: * </li>
081: * <li> pf - (Phrase Fields) fields/boosts to make phrase queries out
082: * of, to boost the users query for exact matches on the specified fields.
083: * Format is: "<code>fieldA^1.0 fieldB^2.2</code>".
084: * This param can be specified multiple times, and the fields
085: * are additive.
086: * </li>
087: * <li> ps - (Phrase Slop) amount of slop on phrase queries built for pf
088: * fields.
089: * </li>
090: * <li> qs - (Query Slop) amount of slop on phrase queries explicitly
091: * specified in the "q" for qf fields.
092: * </li>
093: * <li> bq - (Boost Query) a raw lucene query that will be included in the
094: * users query to influence the score. If this is a BooleanQuery
095: * with a default boost (1.0f), then the individual clauses will be
096: * added directly to the main query. Otherwise, the query will be
097: * included as is.
098: * This param can be specified multiple times, and the boosts are
099: * are additive. NOTE: the behaviour listed above is only in effect
100: * if a single <code>bq</code> paramter is specified. Hence you can
101: * disable it by specifying an additional, blank, <code>bq</code>
102: * parameter.
103: * </li>
104: * <li> bf - (Boost Functions) functions (with optional boosts) that will be
105: * included in the users query to influence the score.
106: * Format is: "<code>funcA(arg1,arg2)^1.2
107: * funcB(arg3,arg4)^2.2</code>". NOTE: Whitespace is not allowed
108: * in the function arguments.
109: * This param can be specified multiple times, and the functions
110: * are additive.
111: * </li>
112: * <li> fq - (Filter Query) a raw lucene query that can be used
113: * to restrict the super set of products we are interested in - more
114: * efficient then using bq, but doesn't influence score.
115: * This param can be specified multiple times, and the filters
116: * are additive.
117: * </li>
118: * </ul>
119: *
120: * <p>
121: * The following options are only available as request params...
122: * </p>
123: *
124: * <ul>
125: * <li> q - (Query) the raw unparsed, unescaped, query from the user.
126: * </li>
127: * <li>sort - (Order By) list of fields and direction to sort on.
128: * </li>
129: * </ul>
130: *
131: * <pre>
132: * :TODO: document facet param support
133: *
134: * </pre>
135: */
136: public class DisMaxRequestHandler extends RequestHandlerBase {
137:
138: /**
139: * A field we can't ever find in any schema, so we can safely tell
140: * DisjunctionMaxQueryParser to use it as our defaultField, and
141: * map aliases from it to any field in our schema.
142: */
143: private static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC";
144:
145: /** shorten the class references for utilities */
146: private static class U extends SolrPluginUtils {
147: /* :NOOP */
148: }
149:
150: /** shorten the class references for utilities */
151: private static class DMP extends DisMaxParams {
152: /* :NOOP */
153: }
154:
155: public DisMaxRequestHandler() {
156: super ();
157: }
158:
159: /** Sets the default variables for any useful info it finds in the config.
160: * If a config option is not in the format expected, logs a warning
161: * and ignores it.
162: */
163: public void init(NamedList args) {
164: // Handle an old format
165: if (-1 == args.indexOf("defaults", 0)) {
166: // no explict defaults list, use all args implicitly
167: // indexOf so "<null name="defaults"/> is valid indicator of no defaults
168: defaults = SolrParams.toSolrParams(args);
169: } else {
170: // otherwise use the new one.
171: super .init(args);
172: }
173: }
174:
175: public void handleRequestBody(SolrQueryRequest req,
176: SolrQueryResponse rsp) throws Exception {
177: SolrParams params = req.getParams();
178:
179: int flags = 0;
180:
181: SolrIndexSearcher s = req.getSearcher();
182: IndexSchema schema = req.getSchema();
183:
184: Map<String, Float> queryFields = U.parseFieldBoosts(params
185: .getParams(DMP.QF));
186: Map<String, Float> phraseFields = U.parseFieldBoosts(params
187: .getParams(DMP.PF));
188:
189: float tiebreaker = params.getFloat(DMP.TIE, 0.0f);
190:
191: int pslop = params.getInt(DMP.PS, 0);
192: int qslop = params.getInt(DMP.QS, 0);
193:
194: /* a generic parser for parsing regular lucene queries */
195: QueryParser p = schema.getSolrQueryParser(null);
196:
197: /* a parser for dealing with user input, which will convert
198: * things to DisjunctionMaxQueries
199: */
200: U.DisjunctionMaxQueryParser up = new U.DisjunctionMaxQueryParser(
201: schema, IMPOSSIBLE_FIELD_NAME);
202: up.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, queryFields);
203: up.setPhraseSlop(qslop);
204:
205: /* for parsing sloppy phrases using DisjunctionMaxQueries */
206: U.DisjunctionMaxQueryParser pp = new U.DisjunctionMaxQueryParser(
207: schema, IMPOSSIBLE_FIELD_NAME);
208: pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, phraseFields);
209: pp.setPhraseSlop(pslop);
210:
211: /* the main query we will execute. we disable the coord because
212: * this query is an artificial construct
213: */
214: BooleanQuery query = new BooleanQuery(true);
215:
216: /* * * Main User Query * * */
217: Query parsedUserQuery = null;
218: String userQuery = params.get(Q);
219: Query altUserQuery = null;
220: if (userQuery == null || userQuery.trim().length() < 1) {
221: // If no query is specified, we may have an alternate
222: String altQ = params.get(DMP.ALTQ);
223: if (altQ != null) {
224: altUserQuery = p.parse(altQ);
225: query.add(altUserQuery, Occur.MUST);
226: } else {
227: throw new SolrException(
228: SolrException.ErrorCode.BAD_REQUEST,
229: "missing query string");
230: }
231: } else {
232: // There is a valid query string
233: userQuery = U.partialEscape(
234: U.stripUnbalancedQuotes(userQuery)).toString();
235:
236: String minShouldMatch = params.get(DMP.MM, "100%");
237: Query dis = up.parse(userQuery);
238: parsedUserQuery = dis;
239:
240: if (dis instanceof BooleanQuery) {
241: BooleanQuery t = new BooleanQuery();
242: U.flattenBooleanQuery(t, (BooleanQuery) dis);
243: U.setMinShouldMatch(t, minShouldMatch);
244: parsedUserQuery = t;
245: }
246: query.add(parsedUserQuery, Occur.MUST);
247:
248: /* * * Add on Phrases for the Query * * */
249:
250: /* build up phrase boosting queries */
251:
252: /* if the userQuery already has some quotes, stip them out.
253: * we've already done the phrases they asked for in the main
254: * part of the query, this is to boost docs that may not have
255: * matched those phrases but do match looser phrases.
256: */
257: String userPhraseQuery = userQuery.replace("\"", "");
258: Query phrase = pp.parse("\"" + userPhraseQuery + "\"");
259: if (null != phrase) {
260: query.add(phrase, Occur.SHOULD);
261: }
262: }
263:
264: /* * * Boosting Query * * */
265: String[] boostParams = params.getParams(DMP.BQ);
266: List<Query> boostQueries = U
267: .parseQueryStrings(req, boostParams);
268: if (null != boostQueries) {
269: if (1 == boostQueries.size() && 1 == boostParams.length) {
270: /* legacy logic */
271: Query f = boostQueries.get(0);
272: if (1.0f == f.getBoost() && f instanceof BooleanQuery) {
273: /* if the default boost was used, and we've got a BooleanQuery
274: * extract the subqueries out and use them directly
275: */
276: for (Object c : ((BooleanQuery) f).clauses()) {
277: query.add((BooleanClause) c);
278: }
279: } else {
280: query.add(f, BooleanClause.Occur.SHOULD);
281: }
282: } else {
283: for (Query f : boostQueries) {
284: query.add(f, BooleanClause.Occur.SHOULD);
285: }
286: }
287: }
288:
289: /* * * Boosting Functions * * */
290:
291: String[] boostFuncs = params.getParams(DMP.BF);
292: if (null != boostFuncs && 0 != boostFuncs.length) {
293: for (String boostFunc : boostFuncs) {
294: if (null == boostFunc || "".equals(boostFunc))
295: continue;
296: List<Query> funcs = U.parseFuncs(schema, boostFunc);
297: for (Query f : funcs) {
298: query.add(f, Occur.SHOULD);
299: }
300: }
301: }
302:
303: /* * * Restrict Results * * */
304:
305: List<Query> restrictions = U.parseFilterQueries(req);
306:
307: /* * * Generate Main Results * * */
308:
309: flags |= U.setReturnFields(req, rsp);
310:
311: DocListAndSet results = new DocListAndSet();
312: NamedList facetInfo = null;
313: if (params.getBool(FACET, false)) {
314: results = s.getDocListAndSet(query, restrictions,
315: SolrPluginUtils.getSort(req), req.getStart(), req
316: .getLimit(), flags);
317: facetInfo = getFacetInfo(req, rsp, results.docSet);
318: } else {
319: results.docList = s.getDocList(query, restrictions,
320: SolrPluginUtils.getSort(req), req.getStart(), req
321: .getLimit(), flags);
322: }
323: rsp.add("response", results.docList);
324: // pre-fetch returned documents
325: U.optimizePreFetchDocs(results.docList, query, req, rsp);
326:
327: if (null != facetInfo)
328: rsp.add("facet_counts", facetInfo);
329:
330: /* * * Debugging Info * * */
331:
332: try {
333: NamedList debug = U.doStandardDebug(req, userQuery, query,
334: results.docList);
335: if (null != debug) {
336: debug.add("altquerystring", altUserQuery);
337: if (null != boostQueries) {
338: debug.add("boost_queries", boostParams);
339: debug.add("parsed_boost_queries", QueryParsing
340: .toString(boostQueries, req.getSchema()));
341: }
342: debug.add("boostfuncs", params.getParams(DMP.BF));
343: if (null != restrictions) {
344: debug.add("filter_queries", params.getParams(FQ));
345: debug.add("parsed_filter_queries", QueryParsing
346: .toString(restrictions, req.getSchema()));
347: }
348: rsp.add("debug", debug);
349: }
350:
351: } catch (Exception e) {
352: SolrException.logOnce(SolrCore.log,
353: "Exception during debug", e);
354: rsp.add("exception_during_debug", SolrException.toStr(e));
355: }
356:
357: /* * * Highlighting/Summarizing * * */
358: if (HighlightingUtils.isHighlightingEnabled(req)
359: && parsedUserQuery != null) {
360: String[] highFields = queryFields.keySet().toArray(
361: new String[0]);
362: NamedList sumData = HighlightingUtils.doHighlighting(
363: results.docList, parsedUserQuery.rewrite(req
364: .getSearcher().getReader()), req,
365: highFields);
366: if (sumData != null)
367: rsp.add("highlighting", sumData);
368: }
369: }
370:
371: /**
372: * Fetches information about Facets for this request.
373: *
374: * Subclasses may with to override this method to provide more
375: * advanced faceting behavior.
376: * @see SimpleFacets#getFacetCounts
377: */
378: protected NamedList getFacetInfo(SolrQueryRequest req,
379: SolrQueryResponse rsp, DocSet mainSet) {
380:
381: SimpleFacets f = new SimpleFacets(req.getSearcher(), mainSet,
382: req.getParams());
383: return f.getFacetCounts();
384: }
385:
386: //////////////////////// SolrInfoMBeans methods //////////////////////
387:
388: @Override
389: public String getDescription() {
390: return "DisjunctionMax Request Handler: Does relevancy based queries "
391: + "across a variety of fields using configured boosts";
392: }
393:
394: @Override
395: public String getVersion() {
396: return "$Revision: 542679 $";
397: }
398:
399: @Override
400: public String getSourceId() {
401: return "$Id: DisMaxRequestHandler.java 542679 2007-05-29 22:28:21Z ryan $";
402: }
403:
404: @Override
405: public String getSource() {
406: return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.2/src/java/org/apache/solr/request/DisMaxRequestHandler.java $";
407: }
408:
409: @Override
410: public URL[] getDocs() {
411: try {
412: return new URL[] { new URL(
413: "http://wiki.apache.org/solr/DisMaxRequestHandler") };
414: } catch (MalformedURLException ex) {
415: return null;
416: }
417: }
418: }
|