0001: /* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParser.java */
0002: package org.apache.lucene.queryParser.precedence;
0003:
0004: import java.util.Vector;
0005: import java.io.*;
0006: import java.text.*;
0007: import java.util.*;
0008: import org.apache.lucene.index.Term;
0009: import org.apache.lucene.analysis.*;
0010: import org.apache.lucene.document.*;
0011: import org.apache.lucene.search.*;
0012: import org.apache.lucene.util.Parameter;
0013:
0014: /**
0015: * Experimental query parser variant designed to handle operator precedence
0016: * in a more sensible fashion than QueryParser. There are still some
0017: * open issues with this parser. The following tests are currently failing
0018: * in TestPrecedenceQueryParser and are disabled to make this test pass:
0019: * <ul>
0020: * <li> testSimple
0021: * <li> testWildcard
0022: * <li> testPrecedence
0023: * </ul>
0024: *
0025: * This class is generated by JavaCC. The only method that clients should need
0026: * to call is {@link #parse(String)}.
0027: *
0028: * The syntax for query strings is as follows:
0029: * A Query is a series of clauses.
0030: * A clause may be prefixed by:
0031: * <ul>
0032: * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
0033: * that the clause is required or prohibited respectively; or
0034: * <li> a term followed by a colon, indicating the field to be searched.
0035: * This enables one to construct queries which search multiple fields.
0036: * </ul>
0037: *
0038: * A clause may be either:
0039: * <ul>
0040: * <li> a term, indicating all the documents that contain this term; or
0041: * <li> a nested query, enclosed in parentheses. Note that this may be used
0042: * with a <code>+</code>/<code>-</code> prefix to require any of a set of
0043: * terms.
0044: * </ul>
0045: *
0046: * Thus, in BNF, the query grammar is:
0047: * <pre>
0048: * Query ::= ( Clause )*
0049: * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
0050: * </pre>
0051: *
0052: * <p>
0053: * Examples of appropriately formatted queries can be found in the <a
0054: * href="http://jakarta.apache.org/lucene/docs/queryparsersyntax.html">query syntax
0055: * documentation</a>.
0056: * </p>
0057: *
0058: * @author Brian Goetz
0059: * @author Peter Halacsy
0060: * @author Tatu Saloranta
0061: */
0062:
0063: public class PrecedenceQueryParser implements
0064: PrecedenceQueryParserConstants {
0065:
0066: private static final int CONJ_NONE = 0;
0067: private static final int CONJ_AND = 1;
0068: private static final int CONJ_OR = 2;
0069:
0070: private static final int MOD_NONE = 0;
0071: private static final int MOD_NOT = 10;
0072: private static final int MOD_REQ = 11;
0073:
0074: // make it possible to call setDefaultOperator() without accessing
0075: // the nested class:
0076: public static final Operator AND_OPERATOR = Operator.AND;
0077: public static final Operator OR_OPERATOR = Operator.OR;
0078:
0079: /** The actual operator that parser uses to combine query terms */
0080: private Operator operator = OR_OPERATOR;
0081:
0082: boolean lowercaseExpandedTerms = true;
0083:
0084: Analyzer analyzer;
0085: String field;
0086: int phraseSlop = 0;
0087: float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
0088: int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
0089: Locale locale = Locale.getDefault();
0090:
0091: static final class Operator extends Parameter {
0092: private Operator(String name) {
0093: super (name);
0094: }
0095:
0096: static final Operator OR = new Operator("OR");
0097: static final Operator AND = new Operator("AND");
0098: }
0099:
0100: /** Constructs a query parser.
0101: * @param f the default field for query terms.
0102: * @param a used to find terms in the query text.
0103: */
0104: public PrecedenceQueryParser(String f, Analyzer a) {
0105: this (new FastCharStream(new StringReader("")));
0106: analyzer = a;
0107: field = f;
0108: }
0109:
0110: /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
0111: * @param expression the query string to be parsed.
0112: * @throws ParseException if the parsing fails
0113: */
0114: public Query parse(String expression) throws ParseException {
0115: // optimize empty query to be empty BooleanQuery
0116: if (expression == null || expression.trim().length() == 0) {
0117: return new BooleanQuery();
0118: }
0119:
0120: ReInit(new FastCharStream(new StringReader(expression)));
0121: try {
0122: Query query = Query(field);
0123: return (query != null) ? query : new BooleanQuery();
0124: } catch (TokenMgrError tme) {
0125: throw new ParseException(tme.getMessage());
0126: } catch (BooleanQuery.TooManyClauses tmc) {
0127: throw new ParseException("Too many boolean clauses");
0128: }
0129: }
0130:
0131: /**
0132: * @return Returns the analyzer.
0133: */
0134: public Analyzer getAnalyzer() {
0135: return analyzer;
0136: }
0137:
0138: /**
0139: * @return Returns the field.
0140: */
0141: public String getField() {
0142: return field;
0143: }
0144:
0145: /**
0146: * Get the minimal similarity for fuzzy queries.
0147: */
0148: public float getFuzzyMinSim() {
0149: return fuzzyMinSim;
0150: }
0151:
0152: /**
0153: * Set the minimum similarity for fuzzy queries.
0154: * Default is 0.5f.
0155: */
0156: public void setFuzzyMinSim(float fuzzyMinSim) {
0157: this .fuzzyMinSim = fuzzyMinSim;
0158: }
0159:
0160: /**
0161: * Get the prefix length for fuzzy queries.
0162: * @return Returns the fuzzyPrefixLength.
0163: */
0164: public int getFuzzyPrefixLength() {
0165: return fuzzyPrefixLength;
0166: }
0167:
0168: /**
0169: * Set the prefix length for fuzzy queries. Default is 0.
0170: * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
0171: */
0172: public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
0173: this .fuzzyPrefixLength = fuzzyPrefixLength;
0174: }
0175:
0176: /**
0177: * Sets the default slop for phrases. If zero, then exact phrase matches
0178: * are required. Default value is zero.
0179: */
0180: public void setPhraseSlop(int phraseSlop) {
0181: this .phraseSlop = phraseSlop;
0182: }
0183:
0184: /**
0185: * Gets the default slop for phrases.
0186: */
0187: public int getPhraseSlop() {
0188: return phraseSlop;
0189: }
0190:
0191: /**
0192: * Sets the boolean operator of the QueryParser.
0193: * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
0194: * are considered optional: for example <code>capital of Hungary</code> is equal to
0195: * <code>capital OR of OR Hungary</code>.<br/>
0196: * In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the
0197: * above mentioned query is parsed as <code>capital AND of AND Hungary</code>
0198: */
0199: public void setDefaultOperator(Operator op) {
0200: this .operator = op;
0201: }
0202:
0203: /**
0204: * Gets implicit operator setting, which will be either AND_OPERATOR
0205: * or OR_OPERATOR.
0206: */
0207: public Operator getDefaultOperator() {
0208: return operator;
0209: }
0210:
0211: /**
0212: * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
0213: * lower-cased or not. Default is <code>true</code>.
0214: */
0215: public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
0216: this .lowercaseExpandedTerms = lowercaseExpandedTerms;
0217: }
0218:
0219: /**
0220: * @see #setLowercaseExpandedTerms(boolean)
0221: */
0222: public boolean getLowercaseExpandedTerms() {
0223: return lowercaseExpandedTerms;
0224: }
0225:
0226: /**
0227: * Set locale used by date range parsing.
0228: */
0229: public void setLocale(Locale locale) {
0230: this .locale = locale;
0231: }
0232:
0233: /**
0234: * Returns current locale, allowing access by subclasses.
0235: */
0236: public Locale getLocale() {
0237: return locale;
0238: }
0239:
0240: protected void addClause(Vector clauses, int conj, int modifier,
0241: Query q) {
0242: boolean required, prohibited;
0243:
0244: // If this term is introduced by AND, make the preceding term required,
0245: // unless it's already prohibited
0246: if (clauses.size() > 0 && conj == CONJ_AND) {
0247: BooleanClause c = (BooleanClause) clauses.elementAt(clauses
0248: .size() - 1);
0249: if (!c.isProhibited())
0250: c.setOccur(BooleanClause.Occur.MUST);
0251: }
0252:
0253: if (clauses.size() > 0 && operator == AND_OPERATOR
0254: && conj == CONJ_OR) {
0255: // If this term is introduced by OR, make the preceding term optional,
0256: // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
0257: // notice if the input is a OR b, first term is parsed as required; without
0258: // this modification a OR b would parsed as +a OR b
0259: BooleanClause c = (BooleanClause) clauses.elementAt(clauses
0260: .size() - 1);
0261: if (!c.isProhibited())
0262: c.setOccur(BooleanClause.Occur.SHOULD);
0263: }
0264:
0265: // We might have been passed a null query; the term might have been
0266: // filtered away by the analyzer.
0267: if (q == null)
0268: return;
0269:
0270: if (operator == OR_OPERATOR) {
0271: // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
0272: // introduced by NOT or -; make sure not to set both.
0273: prohibited = (modifier == MOD_NOT);
0274: required = (modifier == MOD_REQ);
0275: if (conj == CONJ_AND && !prohibited) {
0276: required = true;
0277: }
0278: } else {
0279: // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
0280: // if not PROHIBITED and not introduced by OR
0281: prohibited = (modifier == MOD_NOT);
0282: required = (!prohibited && conj != CONJ_OR);
0283: }
0284: if (required && !prohibited)
0285: clauses.addElement(new BooleanClause(q,
0286: BooleanClause.Occur.MUST));
0287: else if (!required && !prohibited)
0288: clauses.addElement(new BooleanClause(q,
0289: BooleanClause.Occur.SHOULD));
0290: else if (!required && prohibited)
0291: clauses.addElement(new BooleanClause(q,
0292: BooleanClause.Occur.MUST_NOT));
0293: else
0294: throw new RuntimeException(
0295: "Clause cannot be both required and prohibited");
0296: }
0297:
0298: /**
0299: * @exception ParseException throw in overridden method to disallow
0300: */
0301: protected Query getFieldQuery(String field, String queryText)
0302: throws ParseException {
0303: // Use the analyzer to get all the tokens, and then build a TermQuery,
0304: // PhraseQuery, or nothing based on the term count
0305:
0306: TokenStream source = analyzer.tokenStream(field,
0307: new StringReader(queryText));
0308: Vector v = new Vector();
0309: org.apache.lucene.analysis.Token t;
0310: int positionCount = 0;
0311: boolean severalTokensAtSamePosition = false;
0312:
0313: while (true) {
0314: try {
0315: t = source.next();
0316: } catch (IOException e) {
0317: t = null;
0318: }
0319: if (t == null)
0320: break;
0321: v.addElement(t);
0322: if (t.getPositionIncrement() == 1)
0323: positionCount++;
0324: else
0325: severalTokensAtSamePosition = true;
0326: }
0327: try {
0328: source.close();
0329: } catch (IOException e) {
0330: // ignore
0331: }
0332:
0333: if (v.size() == 0)
0334: return null;
0335: else if (v.size() == 1) {
0336: t = (org.apache.lucene.analysis.Token) v.elementAt(0);
0337: return new TermQuery(new Term(field, t.termText()));
0338: } else {
0339: if (severalTokensAtSamePosition) {
0340: if (positionCount == 1) {
0341: // no phrase query:
0342: BooleanQuery q = new BooleanQuery();
0343: for (int i = 0; i < v.size(); i++) {
0344: t = (org.apache.lucene.analysis.Token) v
0345: .elementAt(i);
0346: TermQuery currentQuery = new TermQuery(
0347: new Term(field, t.termText()));
0348: q.add(currentQuery, BooleanClause.Occur.SHOULD);
0349: }
0350: return q;
0351: } else {
0352: // phrase query:
0353: MultiPhraseQuery mpq = new MultiPhraseQuery();
0354: List multiTerms = new ArrayList();
0355: for (int i = 0; i < v.size(); i++) {
0356: t = (org.apache.lucene.analysis.Token) v
0357: .elementAt(i);
0358: if (t.getPositionIncrement() == 1
0359: && multiTerms.size() > 0) {
0360: mpq.add((Term[]) multiTerms
0361: .toArray(new Term[0]));
0362: multiTerms.clear();
0363: }
0364: multiTerms.add(new Term(field, t.termText()));
0365: }
0366: mpq.add((Term[]) multiTerms.toArray(new Term[0]));
0367: return mpq;
0368: }
0369: } else {
0370: PhraseQuery q = new PhraseQuery();
0371: q.setSlop(phraseSlop);
0372: for (int i = 0; i < v.size(); i++) {
0373: q.add(new Term(field,
0374: ((org.apache.lucene.analysis.Token) v
0375: .elementAt(i)).termText()));
0376:
0377: }
0378: return q;
0379: }
0380: }
0381: }
0382:
0383: /**
0384: * Base implementation delegates to {@link #getFieldQuery(String,String)}.
0385: * This method may be overridden, for example, to return
0386: * a SpanNearQuery instead of a PhraseQuery.
0387: *
0388: * @exception ParseException throw in overridden method to disallow
0389: */
0390: protected Query getFieldQuery(String field, String queryText,
0391: int slop) throws ParseException {
0392: Query query = getFieldQuery(field, queryText);
0393:
0394: if (query instanceof PhraseQuery) {
0395: ((PhraseQuery) query).setSlop(slop);
0396: }
0397: if (query instanceof MultiPhraseQuery) {
0398: ((MultiPhraseQuery) query).setSlop(slop);
0399: }
0400:
0401: return query;
0402: }
0403:
0404: /**
0405: * @exception ParseException throw in overridden method to disallow
0406: */
0407: protected Query getRangeQuery(String field, String part1,
0408: String part2, boolean inclusive) throws ParseException {
0409: if (lowercaseExpandedTerms) {
0410: part1 = part1.toLowerCase();
0411: part2 = part2.toLowerCase();
0412: }
0413: try {
0414: DateFormat df = DateFormat.getDateInstance(
0415: DateFormat.SHORT, locale);
0416: df.setLenient(true);
0417: Date d1 = df.parse(part1);
0418: Date d2 = df.parse(part2);
0419: part1 = DateTools
0420: .dateToString(d1, DateTools.Resolution.DAY);
0421: part2 = DateTools
0422: .dateToString(d2, DateTools.Resolution.DAY);
0423: } catch (Exception e) {
0424: }
0425:
0426: return new RangeQuery(new Term(field, part1), new Term(field,
0427: part2), inclusive);
0428: }
0429:
0430: /**
0431: * Factory method for generating query, given a set of clauses.
0432: * By default creates a boolean query composed of clauses passed in.
0433: *
0434: * Can be overridden by extending classes, to modify query being
0435: * returned.
0436: *
0437: * @param clauses Vector that contains {@link BooleanClause} instances
0438: * to join.
0439: *
0440: * @return Resulting {@link Query} object.
0441: * @exception ParseException throw in overridden method to disallow
0442: */
0443: protected Query getBooleanQuery(Vector clauses)
0444: throws ParseException {
0445: return getBooleanQuery(clauses, false);
0446: }
0447:
0448: /**
0449: * Factory method for generating query, given a set of clauses.
0450: * By default creates a boolean query composed of clauses passed in.
0451: *
0452: * Can be overridden by extending classes, to modify query being
0453: * returned.
0454: *
0455: * @param clauses Vector that contains {@link BooleanClause} instances
0456: * to join.
0457: * @param disableCoord true if coord scoring should be disabled.
0458: *
0459: * @return Resulting {@link Query} object.
0460: * @exception ParseException throw in overridden method to disallow
0461: */
0462: protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
0463: throws ParseException {
0464: if (clauses == null || clauses.size() == 0)
0465: return null;
0466:
0467: BooleanQuery query = new BooleanQuery(disableCoord);
0468: for (int i = 0; i < clauses.size(); i++) {
0469: query.add((BooleanClause) clauses.elementAt(i));
0470: }
0471: return query;
0472: }
0473:
0474: /**
0475: * Factory method for generating a query. Called when parser
0476: * parses an input term token that contains one or more wildcard
0477: * characters (? and *), but is not a prefix term token (one
0478: * that has just a single * character at the end)
0479: *<p>
0480: * Depending on settings, prefix term may be lower-cased
0481: * automatically. It will not go through the default Analyzer,
0482: * however, since normal Analyzers are unlikely to work properly
0483: * with wildcard templates.
0484: *<p>
0485: * Can be overridden by extending classes, to provide custom handling for
0486: * wildcard queries, which may be necessary due to missing analyzer calls.
0487: *
0488: * @param field Name of the field query will use.
0489: * @param termStr Term token that contains one or more wild card
0490: * characters (? or *), but is not simple prefix term
0491: *
0492: * @return Resulting {@link Query} built for the term
0493: * @exception ParseException throw in overridden method to disallow
0494: */
0495: protected Query getWildcardQuery(String field, String termStr)
0496: throws ParseException {
0497: if (lowercaseExpandedTerms) {
0498: termStr = termStr.toLowerCase();
0499: }
0500: Term t = new Term(field, termStr);
0501: return new WildcardQuery(t);
0502: }
0503:
0504: /**
0505: * Factory method for generating a query (similar to
0506: * {@link #getWildcardQuery}). Called when parser parses an input term
0507: * token that uses prefix notation; that is, contains a single '*' wildcard
0508: * character as its last character. Since this is a special case
0509: * of generic wildcard term, and such a query can be optimized easily,
0510: * this usually results in a different query object.
0511: *<p>
0512: * Depending on settings, a prefix term may be lower-cased
0513: * automatically. It will not go through the default Analyzer,
0514: * however, since normal Analyzers are unlikely to work properly
0515: * with wildcard templates.
0516: *<p>
0517: * Can be overridden by extending classes, to provide custom handling for
0518: * wild card queries, which may be necessary due to missing analyzer calls.
0519: *
0520: * @param field Name of the field query will use.
0521: * @param termStr Term token to use for building term for the query
0522: * (<b>without</b> trailing '*' character!)
0523: *
0524: * @return Resulting {@link Query} built for the term
0525: * @exception ParseException throw in overridden method to disallow
0526: */
0527: protected Query getPrefixQuery(String field, String termStr)
0528: throws ParseException {
0529: if (lowercaseExpandedTerms) {
0530: termStr = termStr.toLowerCase();
0531: }
0532: Term t = new Term(field, termStr);
0533: return new PrefixQuery(t);
0534: }
0535:
0536: /**
0537: * Factory method for generating a query (similar to
0538: * {@link #getWildcardQuery}). Called when parser parses
0539: * an input term token that has the fuzzy suffix (~) appended.
0540: *
0541: * @param field Name of the field query will use.
0542: * @param termStr Term token to use for building term for the query
0543: *
0544: * @return Resulting {@link Query} built for the term
0545: * @exception ParseException throw in overridden method to disallow
0546: */
0547: protected Query getFuzzyQuery(String field, String termStr,
0548: float minSimilarity) throws ParseException {
0549: if (lowercaseExpandedTerms) {
0550: termStr = termStr.toLowerCase();
0551: }
0552: Term t = new Term(field, termStr);
0553: return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
0554: }
0555:
0556: /**
0557: * Returns a String where the escape char has been
0558: * removed, or kept only once if there was a double escape.
0559: */
0560: private String discardEscapeChar(String input) {
0561: char[] caSource = input.toCharArray();
0562: char[] caDest = new char[caSource.length];
0563: int j = 0;
0564: for (int i = 0; i < caSource.length; i++) {
0565: if ((caSource[i] != '\\')
0566: || (i > 0 && caSource[i - 1] == '\\')) {
0567: caDest[j++] = caSource[i];
0568: }
0569: }
0570: return new String(caDest, 0, j);
0571: }
0572:
0573: /**
0574: * Returns a String where those characters that QueryParser
0575: * expects to be escaped are escaped by a preceding <code>\</code>.
0576: */
0577: public static String escape(String s) {
0578: StringBuffer sb = new StringBuffer();
0579: for (int i = 0; i < s.length(); i++) {
0580: char c = s.charAt(i);
0581: // NOTE: keep this in sync with _ESCAPED_CHAR below!
0582: if (c == '\\' || c == '+' || c == '-' || c == '!'
0583: || c == '(' || c == ')' || c == ':' || c == '^'
0584: || c == '[' || c == ']' || c == '\"' || c == '{'
0585: || c == '}' || c == '~' || c == '*' || c == '?') {
0586: sb.append('\\');
0587: }
0588: sb.append(c);
0589: }
0590: return sb.toString();
0591: }
0592:
0593: /**
0594: * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
0595: * Usage:<br>
0596: * <code>java org.apache.lucene.queryParser.QueryParser <input></code>
0597: */
0598: public static void main(String[] args) throws Exception {
0599: if (args.length == 0) {
0600: System.out
0601: .println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
0602: System.exit(0);
0603: }
0604: PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
0605: new org.apache.lucene.analysis.SimpleAnalyzer());
0606: Query q = qp.parse(args[0]);
0607: System.out.println(q.toString("field"));
0608: }
0609:
0610: // * Query ::= ( Clause )*
0611: // * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
0612: final public int Conjunction() throws ParseException {
0613: int ret = CONJ_NONE;
0614: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0615: case AND:
0616: case OR:
0617: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0618: case AND:
0619: jj_consume_token(AND);
0620: ret = CONJ_AND;
0621: break;
0622: case OR:
0623: jj_consume_token(OR);
0624: ret = CONJ_OR;
0625: break;
0626: default:
0627: jj_la1[0] = jj_gen;
0628: jj_consume_token(-1);
0629: throw new ParseException();
0630: }
0631: break;
0632: default:
0633: jj_la1[1] = jj_gen;
0634: ;
0635: }
0636: {
0637: if (true)
0638: return ret;
0639: }
0640: throw new Error("Missing return statement in function");
0641: }
0642:
0643: final public int Modifier() throws ParseException {
0644: int ret = MOD_NONE;
0645: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0646: case NOT:
0647: case PLUS:
0648: case MINUS:
0649: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0650: case PLUS:
0651: jj_consume_token(PLUS);
0652: ret = MOD_REQ;
0653: break;
0654: case MINUS:
0655: jj_consume_token(MINUS);
0656: ret = MOD_NOT;
0657: break;
0658: case NOT:
0659: jj_consume_token(NOT);
0660: ret = MOD_NOT;
0661: break;
0662: default:
0663: jj_la1[2] = jj_gen;
0664: jj_consume_token(-1);
0665: throw new ParseException();
0666: }
0667: break;
0668: default:
0669: jj_la1[3] = jj_gen;
0670: ;
0671: }
0672: {
0673: if (true)
0674: return ret;
0675: }
0676: throw new Error("Missing return statement in function");
0677: }
0678:
0679: final public Query Query(String field) throws ParseException {
0680: Vector clauses = new Vector();
0681: Query q, firstQuery = null;
0682: boolean orPresent = false;
0683: int modifier;
0684: modifier = Modifier();
0685: q = andExpression(field);
0686: addClause(clauses, CONJ_NONE, modifier, q);
0687: if (modifier == MOD_NONE)
0688: firstQuery = q;
0689: label_1: while (true) {
0690: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0691: case OR:
0692: case NOT:
0693: case PLUS:
0694: case MINUS:
0695: case LPAREN:
0696: case QUOTED:
0697: case TERM:
0698: case PREFIXTERM:
0699: case WILDTERM:
0700: case RANGEIN_START:
0701: case RANGEEX_START:
0702: case NUMBER:
0703: ;
0704: break;
0705: default:
0706: jj_la1[4] = jj_gen;
0707: break label_1;
0708: }
0709: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0710: case OR:
0711: jj_consume_token(OR);
0712: orPresent = true;
0713: break;
0714: default:
0715: jj_la1[5] = jj_gen;
0716: ;
0717: }
0718: modifier = Modifier();
0719: q = andExpression(field);
0720: addClause(clauses, orPresent ? CONJ_OR : CONJ_NONE,
0721: modifier, q);
0722: }
0723: if (clauses.size() == 1 && firstQuery != null) {
0724: if (true)
0725: return firstQuery;
0726: } else {
0727: {
0728: if (true)
0729: return getBooleanQuery(clauses);
0730: }
0731: }
0732: throw new Error("Missing return statement in function");
0733: }
0734:
0735: final public Query andExpression(String field)
0736: throws ParseException {
0737: Vector clauses = new Vector();
0738: Query q, firstQuery = null;
0739: int modifier;
0740: q = Clause(field);
0741: addClause(clauses, CONJ_NONE, MOD_NONE, q);
0742: firstQuery = q;
0743: label_2: while (true) {
0744: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0745: case AND:
0746: ;
0747: break;
0748: default:
0749: jj_la1[6] = jj_gen;
0750: break label_2;
0751: }
0752: jj_consume_token(AND);
0753: modifier = Modifier();
0754: q = Clause(field);
0755: addClause(clauses, CONJ_AND, modifier, q);
0756: }
0757: if (clauses.size() == 1 && firstQuery != null) {
0758: if (true)
0759: return firstQuery;
0760: } else {
0761: {
0762: if (true)
0763: return getBooleanQuery(clauses);
0764: }
0765: }
0766: throw new Error("Missing return statement in function");
0767: }
0768:
0769: final public Query Clause(String field) throws ParseException {
0770: Query q;
0771: Token fieldToken = null, boost = null;
0772: if (jj_2_1(2)) {
0773: fieldToken = jj_consume_token(TERM);
0774: jj_consume_token(COLON);
0775: field = discardEscapeChar(fieldToken.image);
0776: } else {
0777: ;
0778: }
0779: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0780: case QUOTED:
0781: case TERM:
0782: case PREFIXTERM:
0783: case WILDTERM:
0784: case RANGEIN_START:
0785: case RANGEEX_START:
0786: case NUMBER:
0787: q = Term(field);
0788: break;
0789: case LPAREN:
0790: jj_consume_token(LPAREN);
0791: q = Query(field);
0792: jj_consume_token(RPAREN);
0793: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0794: case CARAT:
0795: jj_consume_token(CARAT);
0796: boost = jj_consume_token(NUMBER);
0797: break;
0798: default:
0799: jj_la1[7] = jj_gen;
0800: ;
0801: }
0802: break;
0803: default:
0804: jj_la1[8] = jj_gen;
0805: jj_consume_token(-1);
0806: throw new ParseException();
0807: }
0808: if (boost != null) {
0809: float f = (float) 1.0;
0810: try {
0811: f = Float.valueOf(boost.image).floatValue();
0812: q.setBoost(f);
0813: } catch (Exception ignored) {
0814: }
0815: }
0816: {
0817: if (true)
0818: return q;
0819: }
0820: throw new Error("Missing return statement in function");
0821: }
0822:
0823: final public Query Term(String field) throws ParseException {
0824: Token term, boost = null, fuzzySlop = null, goop1, goop2;
0825: boolean prefix = false;
0826: boolean wildcard = false;
0827: boolean fuzzy = false;
0828: Query q;
0829: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0830: case TERM:
0831: case PREFIXTERM:
0832: case WILDTERM:
0833: case NUMBER:
0834: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0835: case TERM:
0836: term = jj_consume_token(TERM);
0837: break;
0838: case PREFIXTERM:
0839: term = jj_consume_token(PREFIXTERM);
0840: prefix = true;
0841: break;
0842: case WILDTERM:
0843: term = jj_consume_token(WILDTERM);
0844: wildcard = true;
0845: break;
0846: case NUMBER:
0847: term = jj_consume_token(NUMBER);
0848: break;
0849: default:
0850: jj_la1[9] = jj_gen;
0851: jj_consume_token(-1);
0852: throw new ParseException();
0853: }
0854: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0855: case FUZZY_SLOP:
0856: fuzzySlop = jj_consume_token(FUZZY_SLOP);
0857: fuzzy = true;
0858: break;
0859: default:
0860: jj_la1[10] = jj_gen;
0861: ;
0862: }
0863: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0864: case CARAT:
0865: jj_consume_token(CARAT);
0866: boost = jj_consume_token(NUMBER);
0867: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0868: case FUZZY_SLOP:
0869: fuzzySlop = jj_consume_token(FUZZY_SLOP);
0870: fuzzy = true;
0871: break;
0872: default:
0873: jj_la1[11] = jj_gen;
0874: ;
0875: }
0876: break;
0877: default:
0878: jj_la1[12] = jj_gen;
0879: ;
0880: }
0881: String termImage = discardEscapeChar(term.image);
0882: if (wildcard) {
0883: q = getWildcardQuery(field, termImage);
0884: } else if (prefix) {
0885: q = getPrefixQuery(field, discardEscapeChar(term.image
0886: .substring(0, term.image.length() - 1)));
0887: } else if (fuzzy) {
0888: float fms = fuzzyMinSim;
0889: try {
0890: fms = Float.valueOf(fuzzySlop.image.substring(1))
0891: .floatValue();
0892: } catch (Exception ignored) {
0893: }
0894: if (fms < 0.0f || fms > 1.0f) {
0895: {
0896: if (true)
0897: throw new ParseException(
0898: "Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
0899: }
0900: }
0901: q = getFuzzyQuery(field, termImage, fms);
0902: } else {
0903: q = getFieldQuery(field, termImage);
0904: }
0905: break;
0906: case RANGEIN_START:
0907: jj_consume_token(RANGEIN_START);
0908: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0909: case RANGEIN_GOOP:
0910: goop1 = jj_consume_token(RANGEIN_GOOP);
0911: break;
0912: case RANGEIN_QUOTED:
0913: goop1 = jj_consume_token(RANGEIN_QUOTED);
0914: break;
0915: default:
0916: jj_la1[13] = jj_gen;
0917: jj_consume_token(-1);
0918: throw new ParseException();
0919: }
0920: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0921: case RANGEIN_TO:
0922: jj_consume_token(RANGEIN_TO);
0923: break;
0924: default:
0925: jj_la1[14] = jj_gen;
0926: ;
0927: }
0928: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0929: case RANGEIN_GOOP:
0930: goop2 = jj_consume_token(RANGEIN_GOOP);
0931: break;
0932: case RANGEIN_QUOTED:
0933: goop2 = jj_consume_token(RANGEIN_QUOTED);
0934: break;
0935: default:
0936: jj_la1[15] = jj_gen;
0937: jj_consume_token(-1);
0938: throw new ParseException();
0939: }
0940: jj_consume_token(RANGEIN_END);
0941: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0942: case CARAT:
0943: jj_consume_token(CARAT);
0944: boost = jj_consume_token(NUMBER);
0945: break;
0946: default:
0947: jj_la1[16] = jj_gen;
0948: ;
0949: }
0950: if (goop1.kind == RANGEIN_QUOTED) {
0951: goop1.image = goop1.image.substring(1, goop1.image
0952: .length() - 1);
0953: } else {
0954: goop1.image = discardEscapeChar(goop1.image);
0955: }
0956: if (goop2.kind == RANGEIN_QUOTED) {
0957: goop2.image = goop2.image.substring(1, goop2.image
0958: .length() - 1);
0959: } else {
0960: goop2.image = discardEscapeChar(goop2.image);
0961: }
0962: q = getRangeQuery(field, goop1.image, goop2.image, true);
0963: break;
0964: case RANGEEX_START:
0965: jj_consume_token(RANGEEX_START);
0966: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0967: case RANGEEX_GOOP:
0968: goop1 = jj_consume_token(RANGEEX_GOOP);
0969: break;
0970: case RANGEEX_QUOTED:
0971: goop1 = jj_consume_token(RANGEEX_QUOTED);
0972: break;
0973: default:
0974: jj_la1[17] = jj_gen;
0975: jj_consume_token(-1);
0976: throw new ParseException();
0977: }
0978: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0979: case RANGEEX_TO:
0980: jj_consume_token(RANGEEX_TO);
0981: break;
0982: default:
0983: jj_la1[18] = jj_gen;
0984: ;
0985: }
0986: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
0987: case RANGEEX_GOOP:
0988: goop2 = jj_consume_token(RANGEEX_GOOP);
0989: break;
0990: case RANGEEX_QUOTED:
0991: goop2 = jj_consume_token(RANGEEX_QUOTED);
0992: break;
0993: default:
0994: jj_la1[19] = jj_gen;
0995: jj_consume_token(-1);
0996: throw new ParseException();
0997: }
0998: jj_consume_token(RANGEEX_END);
0999: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
1000: case CARAT:
1001: jj_consume_token(CARAT);
1002: boost = jj_consume_token(NUMBER);
1003: break;
1004: default:
1005: jj_la1[20] = jj_gen;
1006: ;
1007: }
1008: if (goop1.kind == RANGEEX_QUOTED) {
1009: goop1.image = goop1.image.substring(1, goop1.image
1010: .length() - 1);
1011: } else {
1012: goop1.image = discardEscapeChar(goop1.image);
1013: }
1014: if (goop2.kind == RANGEEX_QUOTED) {
1015: goop2.image = goop2.image.substring(1, goop2.image
1016: .length() - 1);
1017: } else {
1018: goop2.image = discardEscapeChar(goop2.image);
1019: }
1020:
1021: q = getRangeQuery(field, goop1.image, goop2.image, false);
1022: break;
1023: case QUOTED:
1024: term = jj_consume_token(QUOTED);
1025: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
1026: case FUZZY_SLOP:
1027: fuzzySlop = jj_consume_token(FUZZY_SLOP);
1028: break;
1029: default:
1030: jj_la1[21] = jj_gen;
1031: ;
1032: }
1033: switch ((jj_ntk == -1) ? jj_ntk() : jj_ntk) {
1034: case CARAT:
1035: jj_consume_token(CARAT);
1036: boost = jj_consume_token(NUMBER);
1037: break;
1038: default:
1039: jj_la1[22] = jj_gen;
1040: ;
1041: }
1042: int s = phraseSlop;
1043:
1044: if (fuzzySlop != null) {
1045: try {
1046: s = Float.valueOf(fuzzySlop.image.substring(1))
1047: .intValue();
1048: } catch (Exception ignored) {
1049: }
1050: }
1051: q = getFieldQuery(field, term.image.substring(1, term.image
1052: .length() - 1), s);
1053: break;
1054: default:
1055: jj_la1[23] = jj_gen;
1056: jj_consume_token(-1);
1057: throw new ParseException();
1058: }
1059: if (boost != null) {
1060: float f = (float) 1.0;
1061: try {
1062: f = Float.valueOf(boost.image).floatValue();
1063: } catch (Exception ignored) {
1064: /* Should this be handled somehow? (defaults to "no boost", if
1065: * boost number is invalid)
1066: */
1067: }
1068:
1069: // avoid boosting null queries, such as those caused by stop words
1070: if (q != null) {
1071: q.setBoost(f);
1072: }
1073: }
1074: {
1075: if (true)
1076: return q;
1077: }
1078: throw new Error("Missing return statement in function");
1079: }
1080:
1081: final private boolean jj_2_1(int xla) {
1082: jj_la = xla;
1083: jj_lastpos = jj_scanpos = token;
1084: try {
1085: return !jj_3_1();
1086: } catch (LookaheadSuccess ls) {
1087: return true;
1088: } finally {
1089: jj_save(0, xla);
1090: }
1091: }
1092:
1093: final private boolean jj_3_1() {
1094: if (jj_scan_token(TERM))
1095: return true;
1096: if (jj_scan_token(COLON))
1097: return true;
1098: return false;
1099: }
1100:
1101: public PrecedenceQueryParserTokenManager token_source;
1102: public Token token, jj_nt;
1103: private int jj_ntk;
1104: private Token jj_scanpos, jj_lastpos;
1105: private int jj_la;
1106: public boolean lookingAhead = false;
1107: private boolean jj_semLA;
1108: private int jj_gen;
1109: final private int[] jj_la1 = new int[24];
1110: static private int[] jj_la1_0;
1111: static {
1112: jj_la1_0();
1113: }
1114:
1115: private static void jj_la1_0() {
1116: jj_la1_0 = new int[] { 0x180, 0x180, 0xe00, 0xe00, 0xfb1f00,
1117: 0x100, 0x80, 0x8000, 0xfb1000, 0x9a0000, 0x40000,
1118: 0x40000, 0x8000, 0xc000000, 0x1000000, 0xc000000,
1119: 0x8000, 0xc0000000, 0x10000000, 0xc0000000, 0x8000,
1120: 0x40000, 0x8000, 0xfb0000, };
1121: }
1122:
1123: final private JJCalls[] jj_2_rtns = new JJCalls[1];
1124: private boolean jj_rescan = false;
1125: private int jj_gc = 0;
1126:
1127: public PrecedenceQueryParser(CharStream stream) {
1128: token_source = new PrecedenceQueryParserTokenManager(stream);
1129: token = new Token();
1130: jj_ntk = -1;
1131: jj_gen = 0;
1132: for (int i = 0; i < 24; i++)
1133: jj_la1[i] = -1;
1134: for (int i = 0; i < jj_2_rtns.length; i++)
1135: jj_2_rtns[i] = new JJCalls();
1136: }
1137:
1138: public void ReInit(CharStream stream) {
1139: token_source.ReInit(stream);
1140: token = new Token();
1141: jj_ntk = -1;
1142: jj_gen = 0;
1143: for (int i = 0; i < 24; i++)
1144: jj_la1[i] = -1;
1145: for (int i = 0; i < jj_2_rtns.length; i++)
1146: jj_2_rtns[i] = new JJCalls();
1147: }
1148:
1149: public PrecedenceQueryParser(PrecedenceQueryParserTokenManager tm) {
1150: token_source = tm;
1151: token = new Token();
1152: jj_ntk = -1;
1153: jj_gen = 0;
1154: for (int i = 0; i < 24; i++)
1155: jj_la1[i] = -1;
1156: for (int i = 0; i < jj_2_rtns.length; i++)
1157: jj_2_rtns[i] = new JJCalls();
1158: }
1159:
1160: public void ReInit(PrecedenceQueryParserTokenManager tm) {
1161: token_source = tm;
1162: token = new Token();
1163: jj_ntk = -1;
1164: jj_gen = 0;
1165: for (int i = 0; i < 24; i++)
1166: jj_la1[i] = -1;
1167: for (int i = 0; i < jj_2_rtns.length; i++)
1168: jj_2_rtns[i] = new JJCalls();
1169: }
1170:
1171: final private Token jj_consume_token(int kind)
1172: throws ParseException {
1173: Token oldToken;
1174: if ((oldToken = token).next != null)
1175: token = token.next;
1176: else
1177: token = token.next = token_source.getNextToken();
1178: jj_ntk = -1;
1179: if (token.kind == kind) {
1180: jj_gen++;
1181: if (++jj_gc > 100) {
1182: jj_gc = 0;
1183: for (int i = 0; i < jj_2_rtns.length; i++) {
1184: JJCalls c = jj_2_rtns[i];
1185: while (c != null) {
1186: if (c.gen < jj_gen)
1187: c.first = null;
1188: c = c.next;
1189: }
1190: }
1191: }
1192: return token;
1193: }
1194: token = oldToken;
1195: jj_kind = kind;
1196: throw generateParseException();
1197: }
1198:
1199: static private final class LookaheadSuccess extends java.lang.Error {
1200: }
1201:
1202: final private LookaheadSuccess jj_ls = new LookaheadSuccess();
1203:
1204: final private boolean jj_scan_token(int kind) {
1205: if (jj_scanpos == jj_lastpos) {
1206: jj_la--;
1207: if (jj_scanpos.next == null) {
1208: jj_lastpos = jj_scanpos = jj_scanpos.next = token_source
1209: .getNextToken();
1210: } else {
1211: jj_lastpos = jj_scanpos = jj_scanpos.next;
1212: }
1213: } else {
1214: jj_scanpos = jj_scanpos.next;
1215: }
1216: if (jj_rescan) {
1217: int i = 0;
1218: Token tok = token;
1219: while (tok != null && tok != jj_scanpos) {
1220: i++;
1221: tok = tok.next;
1222: }
1223: if (tok != null)
1224: jj_add_error_token(kind, i);
1225: }
1226: if (jj_scanpos.kind != kind)
1227: return true;
1228: if (jj_la == 0 && jj_scanpos == jj_lastpos)
1229: throw jj_ls;
1230: return false;
1231: }
1232:
1233: final public Token getNextToken() {
1234: if (token.next != null)
1235: token = token.next;
1236: else
1237: token = token.next = token_source.getNextToken();
1238: jj_ntk = -1;
1239: jj_gen++;
1240: return token;
1241: }
1242:
1243: final public Token getToken(int index) {
1244: Token t = lookingAhead ? jj_scanpos : token;
1245: for (int i = 0; i < index; i++) {
1246: if (t.next != null)
1247: t = t.next;
1248: else
1249: t = t.next = token_source.getNextToken();
1250: }
1251: return t;
1252: }
1253:
1254: final private int jj_ntk() {
1255: if ((jj_nt = token.next) == null)
1256: return (jj_ntk = (token.next = token_source.getNextToken()).kind);
1257: else
1258: return (jj_ntk = jj_nt.kind);
1259: }
1260:
1261: private java.util.Vector jj_expentries = new java.util.Vector();
1262: private int[] jj_expentry;
1263: private int jj_kind = -1;
1264: private int[] jj_lasttokens = new int[100];
1265: private int jj_endpos;
1266:
1267: private void jj_add_error_token(int kind, int pos) {
1268: if (pos >= 100)
1269: return;
1270: if (pos == jj_endpos + 1) {
1271: jj_lasttokens[jj_endpos++] = kind;
1272: } else if (jj_endpos != 0) {
1273: jj_expentry = new int[jj_endpos];
1274: for (int i = 0; i < jj_endpos; i++) {
1275: jj_expentry[i] = jj_lasttokens[i];
1276: }
1277: boolean exists = false;
1278: for (java.util.Enumeration e = jj_expentries.elements(); e
1279: .hasMoreElements();) {
1280: int[] oldentry = (int[]) (e.nextElement());
1281: if (oldentry.length == jj_expentry.length) {
1282: exists = true;
1283: for (int i = 0; i < jj_expentry.length; i++) {
1284: if (oldentry[i] != jj_expentry[i]) {
1285: exists = false;
1286: break;
1287: }
1288: }
1289: if (exists)
1290: break;
1291: }
1292: }
1293: if (!exists)
1294: jj_expentries.addElement(jj_expentry);
1295: if (pos != 0)
1296: jj_lasttokens[(jj_endpos = pos) - 1] = kind;
1297: }
1298: }
1299:
1300: public ParseException generateParseException() {
1301: jj_expentries.removeAllElements();
1302: boolean[] la1tokens = new boolean[32];
1303: for (int i = 0; i < 32; i++) {
1304: la1tokens[i] = false;
1305: }
1306: if (jj_kind >= 0) {
1307: la1tokens[jj_kind] = true;
1308: jj_kind = -1;
1309: }
1310: for (int i = 0; i < 24; i++) {
1311: if (jj_la1[i] == jj_gen) {
1312: for (int j = 0; j < 32; j++) {
1313: if ((jj_la1_0[i] & (1 << j)) != 0) {
1314: la1tokens[j] = true;
1315: }
1316: }
1317: }
1318: }
1319: for (int i = 0; i < 32; i++) {
1320: if (la1tokens[i]) {
1321: jj_expentry = new int[1];
1322: jj_expentry[0] = i;
1323: jj_expentries.addElement(jj_expentry);
1324: }
1325: }
1326: jj_endpos = 0;
1327: jj_rescan_token();
1328: jj_add_error_token(0, 0);
1329: int[][] exptokseq = new int[jj_expentries.size()][];
1330: for (int i = 0; i < jj_expentries.size(); i++) {
1331: exptokseq[i] = (int[]) jj_expentries.elementAt(i);
1332: }
1333: return new ParseException(token, exptokseq, tokenImage);
1334: }
1335:
1336: final public void enable_tracing() {
1337: }
1338:
1339: final public void disable_tracing() {
1340: }
1341:
1342: final private void jj_rescan_token() {
1343: jj_rescan = true;
1344: for (int i = 0; i < 1; i++) {
1345: JJCalls p = jj_2_rtns[i];
1346: do {
1347: if (p.gen > jj_gen) {
1348: jj_la = p.arg;
1349: jj_lastpos = jj_scanpos = p.first;
1350: switch (i) {
1351: case 0:
1352: jj_3_1();
1353: break;
1354: }
1355: }
1356: p = p.next;
1357: } while (p != null);
1358: }
1359: jj_rescan = false;
1360: }
1361:
1362: final private void jj_save(int index, int xla) {
1363: JJCalls p = jj_2_rtns[index];
1364: while (p.gen > jj_gen) {
1365: if (p.next == null) {
1366: p = p.next = new JJCalls();
1367: break;
1368: }
1369: p = p.next;
1370: }
1371: p.gen = jj_gen + xla - jj_la;
1372: p.first = token;
1373: p.arg = xla;
1374: }
1375:
1376: static final class JJCalls {
1377: int gen;
1378: Token first;
1379: int arg;
1380: JJCalls next;
1381: }
1382:
1383: }
|