001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.solr.search;
017:
018: import org.apache.lucene.search.*;
019: import org.apache.solr.search.function.*;
020: import org.apache.lucene.queryParser.ParseException;
021: import org.apache.lucene.queryParser.QueryParser;
022: import org.apache.lucene.document.Field;
023: import org.apache.lucene.index.Term;
024: import org.apache.solr.core.SolrCore;
025: import org.apache.solr.core.SolrException;
026: import org.apache.solr.schema.IndexSchema;
027: import org.apache.solr.schema.SchemaField;
028: import org.apache.solr.schema.FieldType;
029: import org.apache.solr.request.SolrParams;
030:
031: import java.util.ArrayList;
032: import java.util.List;
033: import java.util.regex.Pattern;
034: import java.util.logging.Level;
035: import java.io.IOException;
036:
037: /**
038: * Collection of static utilities usefull for query parsing.
039: *
040: * @author yonik
041: * @version $Id: QueryParsing.java 542679 2007-05-29 22:28:21Z ryan $
042: */
043: public class QueryParsing {
044: /** the SolrParam used to override the QueryParser "default operator" */
045: public static final String OP = "q.op";
046:
047: /**
048: * Helper utility for parsing a query using the Lucene QueryParser syntax.
049: * @param qs query expression in standard Lucene syntax
050: * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
051: */
052: public static Query parseQuery(String qs, IndexSchema schema) {
053: return parseQuery(qs, null, schema);
054: }
055:
056: /**
057: * Helper utility for parsing a query using the Lucene QueryParser syntax.
058: * @param qs query expression in standard Lucene syntax
059: * @param defaultField default field used for unqualified search terms in the query expression
060: * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
061: */
062: public static Query parseQuery(String qs, String defaultField,
063: IndexSchema schema) {
064: try {
065: Query query = schema.getSolrQueryParser(defaultField)
066: .parse(qs);
067:
068: if (SolrCore.log.isLoggable(Level.FINEST)) {
069: SolrCore.log.finest("After QueryParser:" + query);
070: }
071:
072: return query;
073:
074: } catch (ParseException e) {
075: SolrCore.log(e);
076: throw new SolrException(
077: SolrException.ErrorCode.BAD_REQUEST,
078: "Error parsing Lucene query", e);
079: }
080: }
081:
082: /**
083: * Helper utility for parsing a query using the Lucene QueryParser syntax.
084: * @param qs query expression in standard Lucene syntax
085: * @param defaultField default field used for unqualified search terms in the query expression
086: * @param params used to determine the default operator, overriding the schema specified operator
087: * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
088: */
089: public static Query parseQuery(String qs, String defaultField,
090: SolrParams params, IndexSchema schema) {
091: try {
092: SolrQueryParser parser = schema
093: .getSolrQueryParser(defaultField);
094: String opParam = params.get(OP);
095: if (opParam != null) {
096: parser
097: .setDefaultOperator("AND".equals(opParam) ? QueryParser.Operator.AND
098: : QueryParser.Operator.OR);
099: }
100: Query query = parser.parse(qs);
101:
102: if (SolrCore.log.isLoggable(Level.FINEST)) {
103: SolrCore.log.finest("After QueryParser:" + query);
104: }
105:
106: return query;
107:
108: } catch (ParseException e) {
109: SolrCore.log(e);
110: throw new SolrException(
111: SolrException.ErrorCode.BAD_REQUEST,
112: "Query parsing error: " + e.getMessage(), e);
113: }
114: }
115:
116: /***
117: * SortSpec encapsulates a Lucene Sort and a count of the number of documents
118: * to return.
119: */
120: public static class SortSpec {
121: private final Sort sort;
122: private final int num;
123:
124: SortSpec(Sort sort, int num) {
125: this .sort = sort;
126: this .num = num;
127: }
128:
129: /**
130: * Gets the Lucene Sort object, or null for the default sort
131: * by score descending.
132: */
133: public Sort getSort() {
134: return sort;
135: }
136:
137: /**
138: * Gets the number of documens to return after sorting.
139: *
140: * @return number of docs to return, or -1 for no cut off (just sort)
141: */
142: public int getCount() {
143: return num;
144: }
145: }
146:
147: private static Pattern sortSep = Pattern.compile(",");
148:
149: /**
150: * Returns null if the sortSpec is the standard sort desc.
151: *
152: * <p>
153: * The form of the sort specification string currently parsed is:
154: * </p>
155: * <pre>>
156: * SortSpec ::= SingleSort [, SingleSort]*
157: * SingleSort ::= <fieldname> SortDirection
158: * SortDirection ::= top | desc | bottom | asc
159: * </pre>
160: * Examples:
161: * <pre>
162: * score desc #normal sort by score (will return null)
163: * weight bottom #sort by weight ascending
164: * weight desc #sort by weight descending
165: * height desc,weight desc #sort by height descending, and use weight descending to break any ties
166: * height desc,weight asc #sort by height descending, using weight ascending as a tiebreaker
167: * </pre>
168: *
169: */
170: public static SortSpec parseSort(String sortSpec, IndexSchema schema) {
171: if (sortSpec == null || sortSpec.length() == 0)
172: return null;
173:
174: String[] parts = sortSep.split(sortSpec.trim());
175: if (parts.length == 0)
176: return null;
177:
178: SortField[] lst = new SortField[parts.length];
179: for (int i = 0; i < parts.length; i++) {
180: String part = parts[i].trim();
181: boolean top = true;
182:
183: int idx = part.indexOf(' ');
184: if (idx > 0) {
185: String order = part.substring(idx + 1).trim();
186: if ("desc".equals(order) || "top".equals(order)) {
187: top = true;
188: } else if ("asc".equals(order)
189: || "bottom".equals(order)) {
190: top = false;
191: } else {
192: throw new SolrException(
193: SolrException.ErrorCode.BAD_REQUEST,
194: "Unknown sort order: " + order);
195: }
196: part = part.substring(0, idx).trim();
197: } else {
198: throw new SolrException(
199: SolrException.ErrorCode.BAD_REQUEST,
200: "Missing sort order.");
201: }
202:
203: if ("score".equals(part)) {
204: if (top) {
205: // If thre is only one thing in the list, just do the regular thing...
206: if (parts.length == 1) {
207: return null; // do normal scoring...
208: }
209: lst[i] = SortField.FIELD_SCORE;
210: } else {
211: lst[i] = new SortField(null, SortField.SCORE, true);
212: }
213: } else {
214: // getField could throw an exception if the name isn't found
215: SchemaField f = null;
216: try {
217: f = schema.getField(part);
218: } catch (SolrException e) {
219: throw new SolrException(
220: SolrException.ErrorCode.BAD_REQUEST,
221: "can not sort on undefined field: " + part,
222: e);
223: }
224: if (f == null || !f.indexed()) {
225: throw new SolrException(
226: SolrException.ErrorCode.BAD_REQUEST,
227: "can not sort on unindexed field: " + part);
228: }
229: lst[i] = f.getType().getSortField(f, top);
230: }
231: }
232: // For more info on the 'num' field, -1,
233: // see: https://issues.apache.org/jira/browse/SOLR-99
234: return new SortSpec(new Sort(lst), -1);
235: }
236:
237: ///////////////////////////
238: ///////////////////////////
239: ///////////////////////////
240:
241: static FieldType writeFieldName(String name, IndexSchema schema,
242: Appendable out, int flags) throws IOException {
243: FieldType ft = null;
244: ft = schema.getFieldTypeNoEx(name);
245: out.append(name);
246: if (ft == null) {
247: out.append("(UNKNOWN FIELD " + name + ')');
248: }
249: out.append(':');
250: return ft;
251: }
252:
253: static void writeFieldVal(String val, FieldType ft, Appendable out,
254: int flags) throws IOException {
255: if (ft != null) {
256: out.append(ft.toExternal(new Field("", val,
257: Field.Store.YES, Field.Index.UN_TOKENIZED)));
258: } else {
259: out.append(val);
260: }
261: }
262:
263: /** @see #toString(Query,IndexSchema) */
264: public static void toString(Query query, IndexSchema schema,
265: Appendable out, int flags) throws IOException {
266: boolean writeBoost = true;
267:
268: if (query instanceof TermQuery) {
269: TermQuery q = (TermQuery) query;
270: Term t = q.getTerm();
271: FieldType ft = writeFieldName(t.field(), schema, out, flags);
272: writeFieldVal(t.text(), ft, out, flags);
273: } else if (query instanceof RangeQuery) {
274: RangeQuery q = (RangeQuery) query;
275: String fname = q.getField();
276: FieldType ft = writeFieldName(fname, schema, out, flags);
277: out.append(q.isInclusive() ? '[' : '{');
278: Term lt = q.getLowerTerm();
279: Term ut = q.getUpperTerm();
280: if (lt == null) {
281: out.append('*');
282: } else {
283: writeFieldVal(lt.text(), ft, out, flags);
284: }
285:
286: out.append(" TO ");
287:
288: if (ut == null) {
289: out.append('*');
290: } else {
291: writeFieldVal(ut.text(), ft, out, flags);
292: }
293:
294: out.append(q.isInclusive() ? ']' : '}');
295:
296: } else if (query instanceof ConstantScoreRangeQuery) {
297: ConstantScoreRangeQuery q = (ConstantScoreRangeQuery) query;
298: String fname = q.getField();
299: FieldType ft = writeFieldName(fname, schema, out, flags);
300: out.append(q.includesLower() ? '[' : '{');
301: String lt = q.getLowerVal();
302: String ut = q.getUpperVal();
303: if (lt == null) {
304: out.append('*');
305: } else {
306: writeFieldVal(lt, ft, out, flags);
307: }
308:
309: out.append(" TO ");
310:
311: if (ut == null) {
312: out.append('*');
313: } else {
314: writeFieldVal(ut, ft, out, flags);
315: }
316:
317: out.append(q.includesUpper() ? ']' : '}');
318: } else if (query instanceof BooleanQuery) {
319: BooleanQuery q = (BooleanQuery) query;
320: boolean needParens = false;
321:
322: if (q.getBoost() != 1.0
323: || q.getMinimumNumberShouldMatch() != 0) {
324: needParens = true;
325: }
326: if (needParens) {
327: out.append('(');
328: }
329: boolean first = true;
330: for (BooleanClause c : (List<BooleanClause>) q.clauses()) {
331: if (!first) {
332: out.append(' ');
333: } else {
334: first = false;
335: }
336:
337: if (c.isProhibited()) {
338: out.append('-');
339: } else if (c.isRequired()) {
340: out.append('+');
341: }
342: Query subQuery = c.getQuery();
343: boolean wrapQuery = false;
344:
345: // TODO: may need to put parens around other types
346: // of queries too, depending on future syntax.
347: if (subQuery instanceof BooleanQuery) {
348: wrapQuery = true;
349: }
350:
351: if (wrapQuery) {
352: out.append('(');
353: }
354:
355: toString(subQuery, schema, out, flags);
356:
357: if (wrapQuery) {
358: out.append(')');
359: }
360: }
361:
362: if (needParens) {
363: out.append(')');
364: }
365: if (q.getMinimumNumberShouldMatch() > 0) {
366: out.append('~');
367: out.append(Integer.toString(q
368: .getMinimumNumberShouldMatch()));
369: }
370:
371: } else if (query instanceof PrefixQuery) {
372: PrefixQuery q = (PrefixQuery) query;
373: Term prefix = q.getPrefix();
374: FieldType ft = writeFieldName(prefix.field(), schema, out,
375: flags);
376: out.append(prefix.text());
377: out.append('*');
378: } else if (query instanceof ConstantScorePrefixQuery) {
379: ConstantScorePrefixQuery q = (ConstantScorePrefixQuery) query;
380: Term prefix = q.getPrefix();
381: FieldType ft = writeFieldName(prefix.field(), schema, out,
382: flags);
383: out.append(prefix.text());
384: out.append('*');
385: } else if (query instanceof WildcardQuery) {
386: out.append(query.toString());
387: writeBoost = false;
388: } else if (query instanceof FuzzyQuery) {
389: out.append(query.toString());
390: writeBoost = false;
391: } else if (query instanceof ConstantScoreQuery) {
392: out.append(query.toString());
393: writeBoost = false;
394: } else {
395: out.append(query.getClass().getSimpleName() + '('
396: + query.toString() + ')');
397: writeBoost = false;
398: }
399:
400: if (writeBoost && query.getBoost() != 1.0f) {
401: out.append("^");
402: out.append(Float.toString(query.getBoost()));
403: }
404:
405: }
406:
407: /**
408: * Formats a Query for debugging, using the IndexSchema to make
409: * complex field types readable.
410: *
411: * <p>
412: * The benefit of using this method instead of calling
413: * <code>Query.toString</code> directly is that it knows about the data
414: * types of each field, so any field which is encoded in a particularly
415: * complex way is still readable. The downside is that it only knows
416: * about built in Query types, and will not be able to format custom
417: * Query classes.
418: * </p>
419: */
420: public static String toString(Query query, IndexSchema schema) {
421: try {
422: StringBuilder sb = new StringBuilder();
423: toString(query, schema, sb, 0);
424: return sb.toString();
425: } catch (Exception e) {
426: throw new RuntimeException(e);
427: }
428: }
429:
430: // simple class to help with parsing a string
431: private static class StrParser {
432: String val;
433: int pos;
434: int end;
435:
436: StrParser(String val) {
437: this .val = val;
438: end = val.length();
439: }
440:
441: void eatws() {
442: while (pos < end && Character.isWhitespace(val.charAt(pos)))
443: pos++;
444: }
445:
446: boolean opt(String s) {
447: eatws();
448: int slen = s.length();
449: if (val.regionMatches(pos, s, 0, slen)) {
450: pos += slen;
451: return true;
452: }
453: return false;
454: }
455:
456: void expect(String s) throws ParseException {
457: eatws();
458: int slen = s.length();
459: if (val.regionMatches(pos, s, 0, slen)) {
460: pos += slen;
461: } else {
462: throw new ParseException("Expected '" + s
463: + "' at position " + pos + " in '" + val + "'");
464: }
465: }
466:
467: float getFloat() throws ParseException {
468: eatws();
469: char[] arr = new char[end - pos];
470: int i;
471: for (i = 0; i < arr.length; i++) {
472: char ch = val.charAt(pos);
473: if ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-'
474: || ch == '.' || ch == 'e' || ch == 'E') {
475: pos++;
476: arr[i] = ch;
477: } else {
478: break;
479: }
480: }
481:
482: return Float.parseFloat(new String(arr, 0, i));
483: }
484:
485: String getId() throws ParseException {
486: eatws();
487: int id_start = pos;
488: while (pos < end
489: && Character.isJavaIdentifierPart(val.charAt(pos)))
490: pos++;
491: return val.substring(id_start, pos);
492: }
493:
494: char peek() {
495: eatws();
496: return pos < end ? val.charAt(pos) : 0;
497: }
498:
499: public String toString() {
500: return "'" + val + "'" + ", pos=" + pos;
501: }
502:
503: }
504:
505: /**
506: * Builds a list of String which are stringified versions of a list of Queries
507: */
508: public static List<String> toString(List<Query> queries,
509: IndexSchema schema) {
510: List<String> out = new ArrayList<String>(queries.size());
511: for (Query q : queries) {
512: out.add(QueryParsing.toString(q, schema));
513: }
514: return out;
515: }
516:
517: private static ValueSource parseValSource(StrParser sp,
518: IndexSchema schema) throws ParseException {
519: String id = sp.getId();
520: if (sp.opt("(")) {
521: // a function: could contain a fieldname or another function.
522: ValueSource vs = null;
523: if (id.equals("ord")) {
524: String field = sp.getId();
525: vs = new OrdFieldSource(field);
526: } else if (id.equals("rord")) {
527: String field = sp.getId();
528: vs = new ReverseOrdFieldSource(field);
529: } else if (id.equals("linear")) {
530: ValueSource source = parseValSource(sp, schema);
531: sp.expect(",");
532: float slope = sp.getFloat();
533: sp.expect(",");
534: float intercept = sp.getFloat();
535: vs = new LinearFloatFunction(source, slope, intercept);
536: } else if (id.equals("max")) {
537: ValueSource source = parseValSource(sp, schema);
538: sp.expect(",");
539: float val = sp.getFloat();
540: vs = new MaxFloatFunction(source, val);
541: } else if (id.equals("recip")) {
542: ValueSource source = parseValSource(sp, schema);
543: sp.expect(",");
544: float m = sp.getFloat();
545: sp.expect(",");
546: float a = sp.getFloat();
547: sp.expect(",");
548: float b = sp.getFloat();
549: vs = new ReciprocalFloatFunction(source, m, a, b);
550: } else {
551: throw new ParseException("Unknown function " + id
552: + " in FunctionQuery(" + sp + ")");
553: }
554: sp.expect(")");
555: return vs;
556: }
557:
558: SchemaField f = schema.getField(id);
559: return f.getType().getValueSource(f);
560: }
561:
562: /**
563: * Parse a function, returning a FunctionQuery
564: *
565: * <p>
566: * Syntax Examples....
567: * </p>
568: *
569: * <pre>
570: * // Numeric fields default to correct type
571: * // (ie: IntFieldSource or FloatFieldSource)
572: * // Others use implicit ord(...) to generate numeric field value
573: * myfield
574: *
575: * // OrdFieldSource
576: * ord(myfield)
577: *
578: * // ReverseOrdFieldSource
579: * rord(myfield)
580: *
581: * // LinearFloatFunction on numeric field value
582: * linear(myfield,1,2)
583: *
584: * // MaxFloatFunction of LinearFloatFunction on numeric field value or constant
585: * max(linear(myfield,1,2),100)
586: *
587: * // ReciprocalFloatFunction on numeric field value
588: * recip(myfield,1,2,3)
589: *
590: * // ReciprocalFloatFunction on ReverseOrdFieldSource
591: * recip(rord(myfield),1,2,3)
592: *
593: * // ReciprocalFloatFunction on LinearFloatFunction on ReverseOrdFieldSource
594: * recip(linear(rord(myfield),1,2),3,4,5)
595: * </pre>
596: */
597: public static FunctionQuery parseFunction(String func,
598: IndexSchema schema) throws ParseException {
599: return new FunctionQuery(parseValSource(new StrParser(func),
600: schema));
601: }
602:
603: }
|