001: package org.apache.lucene.queryParser;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.util.LuceneTestCase;
021: import org.apache.lucene.analysis.*;
022: import org.apache.lucene.analysis.Token;
023: import org.apache.lucene.analysis.standard.StandardAnalyzer;
024: import org.apache.lucene.document.DateField;
025: import org.apache.lucene.document.DateTools;
026: import org.apache.lucene.document.Document;
027: import org.apache.lucene.document.Field;
028: import org.apache.lucene.index.IndexWriter;
029: import org.apache.lucene.index.Term;
030: import org.apache.lucene.search.*;
031: import org.apache.lucene.store.RAMDirectory;
032:
033: import java.io.IOException;
034: import java.io.Reader;
035: import java.text.DateFormat;
036: import java.util.Calendar;
037: import java.util.Date;
038: import java.util.Locale;
039:
040: /**
041: * Tests QueryParser.
042: */
043: public class TestQueryParser extends LuceneTestCase {
044:
045: public static Analyzer qpAnalyzer = new QPTestAnalyzer();
046:
047: public static class QPTestFilter extends TokenFilter {
048: /**
049: * Filter which discards the token 'stop' and which expands the
050: * token 'phrase' into 'phrase1 phrase2'
051: */
052: public QPTestFilter(TokenStream in) {
053: super (in);
054: }
055:
056: boolean inPhrase = false;
057: int savedStart = 0, savedEnd = 0;
058:
059: public Token next() throws IOException {
060: if (inPhrase) {
061: inPhrase = false;
062: return new Token("phrase2", savedStart, savedEnd);
063: } else
064: for (Token token = input.next(); token != null; token = input
065: .next()) {
066: if (token.termText().equals("phrase")) {
067: inPhrase = true;
068: savedStart = token.startOffset();
069: savedEnd = token.endOffset();
070: return new Token("phrase1", savedStart,
071: savedEnd);
072: } else if (!token.termText().equals("stop"))
073: return token;
074: }
075: return null;
076: }
077: }
078:
079: public static class QPTestAnalyzer extends Analyzer {
080:
081: /** Filters LowerCaseTokenizer with StopFilter. */
082: public final TokenStream tokenStream(String fieldName,
083: Reader reader) {
084: return new QPTestFilter(new LowerCaseTokenizer(reader));
085: }
086: }
087:
088: public static class QPTestParser extends QueryParser {
089: public QPTestParser(String f, Analyzer a) {
090: super (f, a);
091: }
092:
093: protected Query getFuzzyQuery(String field, String termStr,
094: float minSimilarity) throws ParseException {
095: throw new ParseException("Fuzzy queries not allowed");
096: }
097:
098: protected Query getWildcardQuery(String field, String termStr)
099: throws ParseException {
100: throw new ParseException("Wildcard queries not allowed");
101: }
102: }
103:
104: private int originalMaxClauses;
105:
106: public void setUp() throws Exception {
107: super .setUp();
108: originalMaxClauses = BooleanQuery.getMaxClauseCount();
109: }
110:
111: public QueryParser getParser(Analyzer a) throws Exception {
112: if (a == null)
113: a = new SimpleAnalyzer();
114: QueryParser qp = new QueryParser("field", a);
115: qp.setDefaultOperator(QueryParser.OR_OPERATOR);
116: return qp;
117: }
118:
119: public Query getQuery(String query, Analyzer a) throws Exception {
120: return getParser(a).parse(query);
121: }
122:
123: public void assertQueryEquals(String query, Analyzer a,
124: String result) throws Exception {
125: Query q = getQuery(query, a);
126: String s = q.toString("field");
127: if (!s.equals(result)) {
128: fail("Query /" + query + "/ yielded /" + s
129: + "/, expecting /" + result + "/");
130: }
131: }
132:
133: public void assertQueryEquals(QueryParser qp, String field,
134: String query, String result) throws Exception {
135: Query q = qp.parse(query);
136: String s = q.toString(field);
137: if (!s.equals(result)) {
138: fail("Query /" + query + "/ yielded /" + s
139: + "/, expecting /" + result + "/");
140: }
141: }
142:
143: public void assertEscapedQueryEquals(String query, Analyzer a,
144: String result) throws Exception {
145: String escapedQuery = QueryParser.escape(query);
146: if (!escapedQuery.equals(result)) {
147: fail("Query /" + query + "/ yielded /" + escapedQuery
148: + "/, expecting /" + result + "/");
149: }
150: }
151:
152: public void assertWildcardQueryEquals(String query,
153: boolean lowercase, String result,
154: boolean allowLeadingWildcard) throws Exception {
155: QueryParser qp = getParser(null);
156: qp.setLowercaseExpandedTerms(lowercase);
157: qp.setAllowLeadingWildcard(allowLeadingWildcard);
158: Query q = qp.parse(query);
159: String s = q.toString("field");
160: if (!s.equals(result)) {
161: fail("WildcardQuery /" + query + "/ yielded /" + s
162: + "/, expecting /" + result + "/");
163: }
164: }
165:
166: public void assertWildcardQueryEquals(String query,
167: boolean lowercase, String result) throws Exception {
168: assertWildcardQueryEquals(query, lowercase, result, false);
169: }
170:
171: public void assertWildcardQueryEquals(String query, String result)
172: throws Exception {
173: QueryParser qp = getParser(null);
174: Query q = qp.parse(query);
175: String s = q.toString("field");
176: if (!s.equals(result)) {
177: fail("WildcardQuery /" + query + "/ yielded /" + s
178: + "/, expecting /" + result + "/");
179: }
180: }
181:
182: public Query getQueryDOA(String query, Analyzer a) throws Exception {
183: if (a == null)
184: a = new SimpleAnalyzer();
185: QueryParser qp = new QueryParser("field", a);
186: qp.setDefaultOperator(QueryParser.AND_OPERATOR);
187: return qp.parse(query);
188: }
189:
190: public void assertQueryEqualsDOA(String query, Analyzer a,
191: String result) throws Exception {
192: Query q = getQueryDOA(query, a);
193: String s = q.toString("field");
194: if (!s.equals(result)) {
195: fail("Query /" + query + "/ yielded /" + s
196: + "/, expecting /" + result + "/");
197: }
198: }
199:
200: public void testSimple() throws Exception {
201: assertQueryEquals("term term term", null, "term term term");
202: assertQueryEquals("türm term term", new WhitespaceAnalyzer(),
203: "türm term term");
204: assertQueryEquals("ümlaut", new WhitespaceAnalyzer(),
205: "ümlaut");
206:
207: assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
208: assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
209:
210: assertQueryEquals("a AND b", null, "+a +b");
211: assertQueryEquals("(a AND b)", null, "+a +b");
212: assertQueryEquals("c OR (a AND b)", null, "c (+a +b)");
213: assertQueryEquals("a AND NOT b", null, "+a -b");
214: assertQueryEquals("a AND -b", null, "+a -b");
215: assertQueryEquals("a AND !b", null, "+a -b");
216: assertQueryEquals("a && b", null, "+a +b");
217: assertQueryEquals("a && ! b", null, "+a -b");
218:
219: assertQueryEquals("a OR b", null, "a b");
220: assertQueryEquals("a || b", null, "a b");
221: assertQueryEquals("a OR !b", null, "a -b");
222: assertQueryEquals("a OR ! b", null, "a -b");
223: assertQueryEquals("a OR -b", null, "a -b");
224:
225: assertQueryEquals("+term -term term", null, "+term -term term");
226: assertQueryEquals("foo:term AND field:anotherTerm", null,
227: "+foo:term +anotherterm");
228: assertQueryEquals("term AND \"phrase phrase\"", null,
229: "+term +\"phrase phrase\"");
230: assertQueryEquals("\"hello there\"", null, "\"hello there\"");
231: assertTrue(getQuery("a AND b", null) instanceof BooleanQuery);
232: assertTrue(getQuery("hello", null) instanceof TermQuery);
233: assertTrue(getQuery("\"hello there\"", null) instanceof PhraseQuery);
234:
235: assertQueryEquals("germ term^2.0", null, "germ term^2.0");
236: assertQueryEquals("(term)^2.0", null, "term^2.0");
237: assertQueryEquals("(germ term)^2.0", null, "(germ term)^2.0");
238: assertQueryEquals("term^2.0", null, "term^2.0");
239: assertQueryEquals("term^2", null, "term^2.0");
240: assertQueryEquals("\"germ term\"^2.0", null,
241: "\"germ term\"^2.0");
242: assertQueryEquals("\"term germ\"^2", null, "\"term germ\"^2.0");
243:
244: assertQueryEquals("(foo OR bar) AND (baz OR boo)", null,
245: "+(foo bar) +(baz boo)");
246: assertQueryEquals("((a OR b) AND NOT c) OR d", null,
247: "(+(a b) -c) d");
248: assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)",
249: null, "+(apple \"steve jobs\") -(foo bar baz)");
250: assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"",
251: null, "+(title:dog title:cat) -author:\"bob dole\"");
252:
253: QueryParser qp = new QueryParser("field",
254: new StandardAnalyzer());
255: // make sure OR is the default:
256: assertEquals(QueryParser.OR_OPERATOR, qp.getDefaultOperator());
257: qp.setDefaultOperator(QueryParser.AND_OPERATOR);
258: assertEquals(QueryParser.AND_OPERATOR, qp.getDefaultOperator());
259: qp.setDefaultOperator(QueryParser.OR_OPERATOR);
260: assertEquals(QueryParser.OR_OPERATOR, qp.getDefaultOperator());
261: }
262:
263: public void testPunct() throws Exception {
264: Analyzer a = new WhitespaceAnalyzer();
265: assertQueryEquals("a&b", a, "a&b");
266: assertQueryEquals("a&&b", a, "a&&b");
267: assertQueryEquals(".NET", a, ".NET");
268: }
269:
270: public void testSlop() throws Exception {
271: assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2");
272: assertQueryEquals("\"term germ\"~2 flork", null,
273: "\"term germ\"~2 flork");
274: assertQueryEquals("\"term\"~2", null, "term");
275: assertQueryEquals("\" \"~2 germ", null, "germ");
276: assertQueryEquals("\"term germ\"~2^2", null,
277: "\"term germ\"~2^2.0");
278: }
279:
280: public void testNumber() throws Exception {
281: // The numbers go away because SimpleAnalzyer ignores them
282: assertQueryEquals("3", null, "");
283: assertQueryEquals("term 1.0 1 2", null, "term");
284: assertQueryEquals("term term1 term2", null, "term term term");
285:
286: Analyzer a = new StandardAnalyzer();
287: assertQueryEquals("3", a, "3");
288: assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2");
289: assertQueryEquals("term term1 term2", a, "term term1 term2");
290: }
291:
292: public void testWildcard() throws Exception {
293: assertQueryEquals("term*", null, "term*");
294: assertQueryEquals("term*^2", null, "term*^2.0");
295: assertQueryEquals("term~", null, "term~0.5");
296: assertQueryEquals("term~0.7", null, "term~0.7");
297: assertQueryEquals("term~^2", null, "term~0.5^2.0");
298: assertQueryEquals("term^2~", null, "term~0.5^2.0");
299: assertQueryEquals("term*germ", null, "term*germ");
300: assertQueryEquals("term*germ^3", null, "term*germ^3.0");
301:
302: assertTrue(getQuery("term*", null) instanceof PrefixQuery);
303: assertTrue(getQuery("term*^2", null) instanceof PrefixQuery);
304: assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
305: assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
306: FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null);
307: assertEquals(0.7f, fq.getMinSimilarity(), 0.1f);
308: assertEquals(FuzzyQuery.defaultPrefixLength, fq
309: .getPrefixLength());
310: fq = (FuzzyQuery) getQuery("term~", null);
311: assertEquals(0.5f, fq.getMinSimilarity(), 0.1f);
312: assertEquals(FuzzyQuery.defaultPrefixLength, fq
313: .getPrefixLength());
314:
315: assertParseException("term~1.1"); // value > 1, throws exception
316:
317: assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
318:
319: /* Tests to see that wild card terms are (or are not) properly
320: * lower-cased with propery parser configuration
321: */
322: // First prefix queries:
323: // by default, convert to lowercase:
324: assertWildcardQueryEquals("Term*", true, "term*");
325: // explicitly set lowercase:
326: assertWildcardQueryEquals("term*", true, "term*");
327: assertWildcardQueryEquals("Term*", true, "term*");
328: assertWildcardQueryEquals("TERM*", true, "term*");
329: // explicitly disable lowercase conversion:
330: assertWildcardQueryEquals("term*", false, "term*");
331: assertWildcardQueryEquals("Term*", false, "Term*");
332: assertWildcardQueryEquals("TERM*", false, "TERM*");
333: // Then 'full' wildcard queries:
334: // by default, convert to lowercase:
335: assertWildcardQueryEquals("Te?m", "te?m");
336: // explicitly set lowercase:
337: assertWildcardQueryEquals("te?m", true, "te?m");
338: assertWildcardQueryEquals("Te?m", true, "te?m");
339: assertWildcardQueryEquals("TE?M", true, "te?m");
340: assertWildcardQueryEquals("Te?m*gerM", true, "te?m*germ");
341: // explicitly disable lowercase conversion:
342: assertWildcardQueryEquals("te?m", false, "te?m");
343: assertWildcardQueryEquals("Te?m", false, "Te?m");
344: assertWildcardQueryEquals("TE?M", false, "TE?M");
345: assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM");
346: // Fuzzy queries:
347: assertWildcardQueryEquals("Term~", "term~0.5");
348: assertWildcardQueryEquals("Term~", true, "term~0.5");
349: assertWildcardQueryEquals("Term~", false, "Term~0.5");
350: // Range queries:
351: assertWildcardQueryEquals("[A TO C]", "[a TO c]");
352: assertWildcardQueryEquals("[A TO C]", true, "[a TO c]");
353: assertWildcardQueryEquals("[A TO C]", false, "[A TO C]");
354: // Test suffix queries: first disallow
355: try {
356: assertWildcardQueryEquals("*Term", true, "*term");
357: fail();
358: } catch (ParseException pe) {
359: // expected exception
360: }
361: try {
362: assertWildcardQueryEquals("?Term", true, "?term");
363: fail();
364: } catch (ParseException pe) {
365: // expected exception
366: }
367: // Test suffix queries: then allow
368: assertWildcardQueryEquals("*Term", true, "*term", true);
369: assertWildcardQueryEquals("?Term", true, "?term", true);
370: }
371:
372: public void testLeadingWildcardType() throws Exception {
373: QueryParser qp = getParser(null);
374: qp.setAllowLeadingWildcard(true);
375: assertEquals(WildcardQuery.class, qp.parse("t*erm*").getClass());
376: assertEquals(WildcardQuery.class, qp.parse("?term*").getClass());
377: assertEquals(WildcardQuery.class, qp.parse("*term*").getClass());
378: }
379:
380: public void testQPA() throws Exception {
381: assertQueryEquals("term term^3.0 term", qpAnalyzer,
382: "term term^3.0 term");
383: assertQueryEquals("term stop^3.0 term", qpAnalyzer, "term term");
384:
385: assertQueryEquals("term term term", qpAnalyzer,
386: "term term term");
387: assertQueryEquals("term +stop term", qpAnalyzer, "term term");
388: assertQueryEquals("term -stop term", qpAnalyzer, "term term");
389:
390: assertQueryEquals("drop AND (stop) AND roll", qpAnalyzer,
391: "+drop +roll");
392: assertQueryEquals("term +(stop) term", qpAnalyzer, "term term");
393: assertQueryEquals("term -(stop) term", qpAnalyzer, "term term");
394:
395: assertQueryEquals("drop AND stop AND roll", qpAnalyzer,
396: "+drop +roll");
397: assertQueryEquals("term phrase term", qpAnalyzer,
398: "term \"phrase1 phrase2\" term");
399: assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
400: "+term -\"phrase1 phrase2\" term");
401: assertQueryEquals("stop^3", qpAnalyzer, "");
402: assertQueryEquals("stop", qpAnalyzer, "");
403: assertQueryEquals("(stop)^3", qpAnalyzer, "");
404: assertQueryEquals("((stop))^3", qpAnalyzer, "");
405: assertQueryEquals("(stop^3)", qpAnalyzer, "");
406: assertQueryEquals("((stop)^3)", qpAnalyzer, "");
407: assertQueryEquals("(stop)", qpAnalyzer, "");
408: assertQueryEquals("((stop))", qpAnalyzer, "");
409: assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery);
410: assertTrue(getQuery("term +stop", qpAnalyzer) instanceof TermQuery);
411: }
412:
413: public void testRange() throws Exception {
414: assertQueryEquals("[ a TO z]", null, "[a TO z]");
415: assertTrue(getQuery("[ a TO z]", null) instanceof ConstantScoreRangeQuery);
416:
417: QueryParser qp = new QueryParser("field", new SimpleAnalyzer());
418: qp.setUseOldRangeQuery(true);
419: assertTrue(qp.parse("[ a TO z]") instanceof RangeQuery);
420:
421: assertQueryEquals("[ a TO z ]", null, "[a TO z]");
422: assertQueryEquals("{ a TO z}", null, "{a TO z}");
423: assertQueryEquals("{ a TO z }", null, "{a TO z}");
424: assertQueryEquals("{ a TO z }^2.0", null, "{a TO z}^2.0");
425: assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar");
426: assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar");
427: assertQueryEquals("( bar blar { a TO z}) ", null,
428: "bar blar {a TO z}");
429: assertQueryEquals("gack ( bar blar { a TO z}) ", null,
430: "gack (bar blar {a TO z})");
431: }
432:
433: /** for testing legacy DateField support */
434: private String getLegacyDate(String s) throws Exception {
435: DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
436: return DateField.dateToString(df.parse(s));
437: }
438:
439: /** for testing DateTools support */
440: private String getDate(String s, DateTools.Resolution resolution)
441: throws Exception {
442: DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
443: return getDate(df.parse(s), resolution);
444: }
445:
446: /** for testing DateTools support */
447: private String getDate(Date d, DateTools.Resolution resolution)
448: throws Exception {
449: if (resolution == null) {
450: return DateField.dateToString(d);
451: } else {
452: return DateTools.dateToString(d, resolution);
453: }
454: }
455:
456: private String getLocalizedDate(int year, int month, int day,
457: boolean extendLastDate) {
458: DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
459: Calendar calendar = Calendar.getInstance();
460: calendar.set(year, month, day);
461: if (extendLastDate) {
462: calendar.set(Calendar.HOUR_OF_DAY, 23);
463: calendar.set(Calendar.MINUTE, 59);
464: calendar.set(Calendar.SECOND, 59);
465: calendar.set(Calendar.MILLISECOND, 999);
466: }
467: return df.format(calendar.getTime());
468: }
469:
470: /** for testing legacy DateField support */
471: public void testLegacyDateRange() throws Exception {
472: String startDate = getLocalizedDate(2002, 1, 1, false);
473: String endDate = getLocalizedDate(2002, 1, 4, false);
474: Calendar endDateExpected = Calendar.getInstance();
475: endDateExpected.set(2002, 1, 4, 23, 59, 59);
476: endDateExpected.set(Calendar.MILLISECOND, 999);
477: assertQueryEquals("[ " + startDate + " TO " + endDate + "]",
478: null, "["
479: + getLegacyDate(startDate)
480: + " TO "
481: + DateField.dateToString(endDateExpected
482: .getTime()) + "]");
483: assertQueryEquals(
484: "{ " + startDate + " " + endDate + " }", null,
485: "{" + getLegacyDate(startDate) + " TO "
486: + getLegacyDate(endDate) + "}");
487: }
488:
489: public void testDateRange() throws Exception {
490: String startDate = getLocalizedDate(2002, 1, 1, false);
491: String endDate = getLocalizedDate(2002, 1, 4, false);
492: Calendar endDateExpected = Calendar.getInstance();
493: endDateExpected.set(2002, 1, 4, 23, 59, 59);
494: endDateExpected.set(Calendar.MILLISECOND, 999);
495: final String defaultField = "default";
496: final String monthField = "month";
497: final String hourField = "hour";
498: QueryParser qp = new QueryParser("field", new SimpleAnalyzer());
499:
500: // Don't set any date resolution and verify if DateField is used
501: assertDateRangeQueryEquals(qp, defaultField, startDate,
502: endDate, endDateExpected.getTime(), null);
503:
504: // set a field specific date resolution
505: qp.setDateResolution(monthField, DateTools.Resolution.MONTH);
506:
507: // DateField should still be used for defaultField
508: assertDateRangeQueryEquals(qp, defaultField, startDate,
509: endDate, endDateExpected.getTime(), null);
510:
511: // set default date resolution to MILLISECOND
512: qp.setDateResolution(DateTools.Resolution.MILLISECOND);
513:
514: // set second field specific date resolution
515: qp.setDateResolution(hourField, DateTools.Resolution.HOUR);
516:
517: // for this field no field specific date resolution has been set,
518: // so verify if the default resolution is used
519: assertDateRangeQueryEquals(qp, defaultField, startDate,
520: endDate, endDateExpected.getTime(),
521: DateTools.Resolution.MILLISECOND);
522:
523: // verify if field specific date resolutions are used for these two fields
524: assertDateRangeQueryEquals(qp, monthField, startDate, endDate,
525: endDateExpected.getTime(), DateTools.Resolution.MONTH);
526:
527: assertDateRangeQueryEquals(qp, hourField, startDate, endDate,
528: endDateExpected.getTime(), DateTools.Resolution.HOUR);
529: }
530:
531: public void assertDateRangeQueryEquals(QueryParser qp,
532: String field, String startDate, String endDate,
533: Date endDateInclusive, DateTools.Resolution resolution)
534: throws Exception {
535: assertQueryEquals(qp, field, field + ":[" + startDate + " TO "
536: + endDate + "]", "[" + getDate(startDate, resolution)
537: + " TO " + getDate(endDateInclusive, resolution) + "]");
538: assertQueryEquals(qp, field, field + ":{" + startDate + " TO "
539: + endDate + "}", "{" + getDate(startDate, resolution)
540: + " TO " + getDate(endDate, resolution) + "}");
541: }
542:
543: public void testEscaped() throws Exception {
544: Analyzer a = new WhitespaceAnalyzer();
545:
546: /*assertQueryEquals("\\[brackets", a, "\\[brackets");
547: assertQueryEquals("\\[brackets", null, "brackets");
548: assertQueryEquals("\\\\", a, "\\\\");
549: assertQueryEquals("\\+blah", a, "\\+blah");
550: assertQueryEquals("\\(blah", a, "\\(blah");
551:
552: assertQueryEquals("\\-blah", a, "\\-blah");
553: assertQueryEquals("\\!blah", a, "\\!blah");
554: assertQueryEquals("\\{blah", a, "\\{blah");
555: assertQueryEquals("\\}blah", a, "\\}blah");
556: assertQueryEquals("\\:blah", a, "\\:blah");
557: assertQueryEquals("\\^blah", a, "\\^blah");
558: assertQueryEquals("\\[blah", a, "\\[blah");
559: assertQueryEquals("\\]blah", a, "\\]blah");
560: assertQueryEquals("\\\"blah", a, "\\\"blah");
561: assertQueryEquals("\\(blah", a, "\\(blah");
562: assertQueryEquals("\\)blah", a, "\\)blah");
563: assertQueryEquals("\\~blah", a, "\\~blah");
564: assertQueryEquals("\\*blah", a, "\\*blah");
565: assertQueryEquals("\\?blah", a, "\\?blah");
566: //assertQueryEquals("foo \\&\\& bar", a, "foo \\&\\& bar");
567: //assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
568: //assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/
569:
570: assertQueryEquals("\\a", a, "a");
571:
572: assertQueryEquals("a\\-b:c", a, "a-b:c");
573: assertQueryEquals("a\\+b:c", a, "a+b:c");
574: assertQueryEquals("a\\:b:c", a, "a:b:c");
575: assertQueryEquals("a\\\\b:c", a, "a\\b:c");
576:
577: assertQueryEquals("a:b\\-c", a, "a:b-c");
578: assertQueryEquals("a:b\\+c", a, "a:b+c");
579: assertQueryEquals("a:b\\:c", a, "a:b:c");
580: assertQueryEquals("a:b\\\\c", a, "a:b\\c");
581:
582: assertQueryEquals("a:b\\-c*", a, "a:b-c*");
583: assertQueryEquals("a:b\\+c*", a, "a:b+c*");
584: assertQueryEquals("a:b\\:c*", a, "a:b:c*");
585:
586: assertQueryEquals("a:b\\\\c*", a, "a:b\\c*");
587:
588: assertQueryEquals("a:b\\-?c", a, "a:b-?c");
589: assertQueryEquals("a:b\\+?c", a, "a:b+?c");
590: assertQueryEquals("a:b\\:?c", a, "a:b:?c");
591:
592: assertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
593:
594: assertQueryEquals("a:b\\-c~", a, "a:b-c~0.5");
595: assertQueryEquals("a:b\\+c~", a, "a:b+c~0.5");
596: assertQueryEquals("a:b\\:c~", a, "a:b:c~0.5");
597: assertQueryEquals("a:b\\\\c~", a, "a:b\\c~0.5");
598:
599: assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]");
600: assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]");
601: assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]");
602:
603: assertQueryEquals(
604: "[\"c\\:\\\\temp\\\\\\~foo0.txt\" TO \"c\\:\\\\temp\\\\\\~foo9.txt\"]",
605: a, "[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]");
606:
607: assertQueryEquals("a\\\\\\+b", a, "a\\+b");
608:
609: assertQueryEquals("a \\\"b c\\\" d", a, "a \"b c\" d");
610: assertQueryEquals("\"a \\\"b c\\\" d\"", a, "\"a \"b c\" d\"");
611: assertQueryEquals("\"a \\+b c d\"", a, "\"a +b c d\"");
612:
613: assertQueryEquals("c\\:\\\\temp\\\\\\~foo.txt", a,
614: "c:\\temp\\~foo.txt");
615:
616: assertParseException("XY\\"); // there must be a character after the escape char
617:
618: // test unicode escaping
619: assertQueryEquals("a\\u0062c", a, "abc");
620: assertQueryEquals("XY\\u005a", a, "XYZ");
621: assertQueryEquals("XY\\u005A", a, "XYZ");
622: assertQueryEquals("\"a \\\\\\u0028\\u0062\\\" c\"", a,
623: "\"a \\(b\" c\"");
624:
625: assertParseException("XY\\u005G"); // test non-hex character in escaped unicode sequence
626: assertParseException("XY\\u005"); // test incomplete escaped unicode sequence
627:
628: // Tests bug LUCENE-800
629: assertQueryEquals("(item:\\\\ item:ABCD\\\\)", a,
630: "item:\\ item:ABCD\\");
631: assertParseException("(item:\\\\ item:ABCD\\\\))"); // unmatched closing paranthesis
632: assertQueryEquals("\\*", a, "*");
633: assertQueryEquals("\\\\", a, "\\"); // escaped backslash
634:
635: assertParseException("\\"); // a backslash must always be escaped
636: }
637:
638: public void testQueryStringEscaping() throws Exception {
639: Analyzer a = new WhitespaceAnalyzer();
640:
641: assertEscapedQueryEquals("a-b:c", a, "a\\-b\\:c");
642: assertEscapedQueryEquals("a+b:c", a, "a\\+b\\:c");
643: assertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c");
644: assertEscapedQueryEquals("a\\b:c", a, "a\\\\b\\:c");
645:
646: assertEscapedQueryEquals("a:b-c", a, "a\\:b\\-c");
647: assertEscapedQueryEquals("a:b+c", a, "a\\:b\\+c");
648: assertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c");
649: assertEscapedQueryEquals("a:b\\c", a, "a\\:b\\\\c");
650:
651: assertEscapedQueryEquals("a:b-c*", a, "a\\:b\\-c\\*");
652: assertEscapedQueryEquals("a:b+c*", a, "a\\:b\\+c\\*");
653: assertEscapedQueryEquals("a:b:c*", a, "a\\:b\\:c\\*");
654:
655: assertEscapedQueryEquals("a:b\\\\c*", a, "a\\:b\\\\\\\\c\\*");
656:
657: assertEscapedQueryEquals("a:b-?c", a, "a\\:b\\-\\?c");
658: assertEscapedQueryEquals("a:b+?c", a, "a\\:b\\+\\?c");
659: assertEscapedQueryEquals("a:b:?c", a, "a\\:b\\:\\?c");
660:
661: assertEscapedQueryEquals("a:b?c", a, "a\\:b\\?c");
662:
663: assertEscapedQueryEquals("a:b-c~", a, "a\\:b\\-c\\~");
664: assertEscapedQueryEquals("a:b+c~", a, "a\\:b\\+c\\~");
665: assertEscapedQueryEquals("a:b:c~", a, "a\\:b\\:c\\~");
666: assertEscapedQueryEquals("a:b\\c~", a, "a\\:b\\\\c\\~");
667:
668: assertEscapedQueryEquals("[ a - TO a+ ]", null,
669: "\\[ a \\- TO a\\+ \\]");
670: assertEscapedQueryEquals("[ a : TO a~ ]", null,
671: "\\[ a \\: TO a\\~ \\]");
672: assertEscapedQueryEquals("[ a\\ TO a* ]", null,
673: "\\[ a\\\\ TO a\\* \\]");
674:
675: // LUCENE-881
676: assertEscapedQueryEquals("|| abc ||", a, "\\|\\| abc \\|\\|");
677: assertEscapedQueryEquals("&& abc &&", a, "\\&\\& abc \\&\\&");
678: }
679:
680: public void testTabNewlineCarriageReturn() throws Exception {
681: assertQueryEqualsDOA("+weltbank +worlbank", null,
682: "+weltbank +worlbank");
683:
684: assertQueryEqualsDOA("+weltbank\n+worlbank", null,
685: "+weltbank +worlbank");
686: assertQueryEqualsDOA("weltbank \n+worlbank", null,
687: "+weltbank +worlbank");
688: assertQueryEqualsDOA("weltbank \n +worlbank", null,
689: "+weltbank +worlbank");
690:
691: assertQueryEqualsDOA("+weltbank\r+worlbank", null,
692: "+weltbank +worlbank");
693: assertQueryEqualsDOA("weltbank \r+worlbank", null,
694: "+weltbank +worlbank");
695: assertQueryEqualsDOA("weltbank \r +worlbank", null,
696: "+weltbank +worlbank");
697:
698: assertQueryEqualsDOA("+weltbank\r\n+worlbank", null,
699: "+weltbank +worlbank");
700: assertQueryEqualsDOA("weltbank \r\n+worlbank", null,
701: "+weltbank +worlbank");
702: assertQueryEqualsDOA("weltbank \r\n +worlbank", null,
703: "+weltbank +worlbank");
704: assertQueryEqualsDOA("weltbank \r \n +worlbank", null,
705: "+weltbank +worlbank");
706:
707: assertQueryEqualsDOA("+weltbank\t+worlbank", null,
708: "+weltbank +worlbank");
709: assertQueryEqualsDOA("weltbank \t+worlbank", null,
710: "+weltbank +worlbank");
711: assertQueryEqualsDOA("weltbank \t +worlbank", null,
712: "+weltbank +worlbank");
713: }
714:
715: public void testSimpleDAO() throws Exception {
716: assertQueryEqualsDOA("term term term", null,
717: "+term +term +term");
718: assertQueryEqualsDOA("term +term term", null,
719: "+term +term +term");
720: assertQueryEqualsDOA("term term +term", null,
721: "+term +term +term");
722: assertQueryEqualsDOA("term +term +term", null,
723: "+term +term +term");
724: assertQueryEqualsDOA("-term term term", null,
725: "-term +term +term");
726: }
727:
728: public void testBoost() throws Exception {
729: StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(
730: new String[] { "on" });
731: QueryParser qp = new QueryParser("field", oneStopAnalyzer);
732: Query q = qp.parse("on^1.0");
733: assertNotNull(q);
734: q = qp.parse("\"hello\"^2.0");
735: assertNotNull(q);
736: assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
737: q = qp.parse("hello^2.0");
738: assertNotNull(q);
739: assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
740: q = qp.parse("\"on\"^1.0");
741: assertNotNull(q);
742:
743: QueryParser qp2 = new QueryParser("field",
744: new StandardAnalyzer());
745: q = qp2.parse("the^3");
746: // "the" is a stop word so the result is an empty query:
747: assertNotNull(q);
748: assertEquals("", q.toString());
749: assertEquals(1.0f, q.getBoost(), 0.01f);
750: }
751:
752: public void assertParseException(String queryString)
753: throws Exception {
754: try {
755: Query q = getQuery(queryString, null);
756: } catch (ParseException expected) {
757: return;
758: }
759: fail("ParseException expected, not thrown");
760: }
761:
762: public void testException() throws Exception {
763: assertParseException("\"some phrase");
764: assertParseException("(foo bar");
765: assertParseException("foo bar))");
766: assertParseException("field:term:with:colon some more terms");
767: assertParseException("(sub query)^5.0^2.0 plus more");
768: assertParseException("secret AND illegal) AND access:confidential");
769: }
770:
771: public void testCustomQueryParserWildcard() {
772: try {
773: new QPTestParser("contents", new WhitespaceAnalyzer())
774: .parse("a?t");
775: fail("Wildcard queries should not be allowed");
776: } catch (ParseException expected) {
777: // expected exception
778: }
779: }
780:
781: public void testCustomQueryParserFuzzy() throws Exception {
782: try {
783: new QPTestParser("contents", new WhitespaceAnalyzer())
784: .parse("xunit~");
785: fail("Fuzzy queries should not be allowed");
786: } catch (ParseException expected) {
787: // expected exception
788: }
789: }
790:
791: public void testBooleanQuery() throws Exception {
792: BooleanQuery.setMaxClauseCount(2);
793: try {
794: QueryParser qp = new QueryParser("field",
795: new WhitespaceAnalyzer());
796: qp.parse("one two three");
797: fail("ParseException expected due to too many boolean clauses");
798: } catch (ParseException expected) {
799: // too many boolean clauses, so ParseException is expected
800: }
801: }
802:
803: /**
804: * This test differs from TestPrecedenceQueryParser
805: */
806: public void testPrecedence() throws Exception {
807: QueryParser qp = new QueryParser("field",
808: new WhitespaceAnalyzer());
809: Query query1 = qp.parse("A AND B OR C AND D");
810: Query query2 = qp.parse("+A +B +C +D");
811: assertEquals(query1, query2);
812: }
813:
814: public void testLocalDateFormat() throws IOException,
815: ParseException {
816: RAMDirectory ramDir = new RAMDirectory();
817: IndexWriter iw = new IndexWriter(ramDir,
818: new WhitespaceAnalyzer(), true);
819: addDateDoc("a", 2005, 12, 2, 10, 15, 33, iw);
820: addDateDoc("b", 2005, 12, 4, 22, 15, 00, iw);
821: iw.close();
822: IndexSearcher is = new IndexSearcher(ramDir);
823: assertHits(1, "[12/1/2005 TO 12/3/2005]", is);
824: assertHits(2, "[12/1/2005 TO 12/4/2005]", is);
825: assertHits(1, "[12/3/2005 TO 12/4/2005]", is);
826: assertHits(1, "{12/1/2005 TO 12/3/2005}", is);
827: assertHits(1, "{12/1/2005 TO 12/4/2005}", is);
828: assertHits(0, "{12/3/2005 TO 12/4/2005}", is);
829: is.close();
830: }
831:
832: public void testStarParsing() throws Exception {
833: final int[] type = new int[1];
834: QueryParser qp = new QueryParser("field",
835: new WhitespaceAnalyzer()) {
836: protected Query getWildcardQuery(String field,
837: String termStr) throws ParseException {
838: // override error checking of superclass
839: type[0] = 1;
840: return new TermQuery(new Term(field, termStr));
841: }
842:
843: protected Query getPrefixQuery(String field, String termStr)
844: throws ParseException {
845: // override error checking of superclass
846: type[0] = 2;
847: return new TermQuery(new Term(field, termStr));
848: }
849:
850: protected Query getFieldQuery(String field, String queryText)
851: throws ParseException {
852: type[0] = 3;
853: return super .getFieldQuery(field, queryText);
854: }
855: };
856:
857: TermQuery tq;
858:
859: tq = (TermQuery) qp.parse("foo:zoo*");
860: assertEquals("zoo", tq.getTerm().text());
861: assertEquals(2, type[0]);
862:
863: tq = (TermQuery) qp.parse("foo:zoo*^2");
864: assertEquals("zoo", tq.getTerm().text());
865: assertEquals(2, type[0]);
866: assertEquals(tq.getBoost(), 2, 0);
867:
868: tq = (TermQuery) qp.parse("foo:*");
869: assertEquals("*", tq.getTerm().text());
870: assertEquals(1, type[0]); // could be a valid prefix query in the future too
871:
872: tq = (TermQuery) qp.parse("foo:*^2");
873: assertEquals("*", tq.getTerm().text());
874: assertEquals(1, type[0]);
875: assertEquals(tq.getBoost(), 2, 0);
876:
877: tq = (TermQuery) qp.parse("*:foo");
878: assertEquals("*", tq.getTerm().field());
879: assertEquals("foo", tq.getTerm().text());
880: assertEquals(3, type[0]);
881:
882: tq = (TermQuery) qp.parse("*:*");
883: assertEquals("*", tq.getTerm().field());
884: assertEquals("*", tq.getTerm().text());
885: assertEquals(1, type[0]); // could be handled as a prefix query in the future
886:
887: tq = (TermQuery) qp.parse("(*:*)");
888: assertEquals("*", tq.getTerm().field());
889: assertEquals("*", tq.getTerm().text());
890: assertEquals(1, type[0]);
891:
892: }
893:
894: public void testStopwords() throws Exception {
895: QueryParser qp = new QueryParser("a", new StopAnalyzer(
896: new String[] { "the", "foo" }));
897: Query result = qp.parse("a:the OR a:foo");
898: assertNotNull("result is null and it shouldn't be", result);
899: assertTrue("result is not a BooleanQuery",
900: result instanceof BooleanQuery);
901: assertTrue(((BooleanQuery) result).clauses().size()
902: + " does not equal: " + 0, ((BooleanQuery) result)
903: .clauses().size() == 0);
904: result = qp.parse("a:woo OR a:the");
905: assertNotNull("result is null and it shouldn't be", result);
906: assertTrue("result is not a TermQuery",
907: result instanceof TermQuery);
908: result = qp
909: .parse("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)");
910: assertNotNull("result is null and it shouldn't be", result);
911: assertTrue("result is not a BooleanQuery",
912: result instanceof BooleanQuery);
913: System.out.println("Result: " + result);
914: assertTrue(((BooleanQuery) result).clauses().size()
915: + " does not equal: " + 2, ((BooleanQuery) result)
916: .clauses().size() == 2);
917: }
918:
919: public void testPositionIncrement() throws Exception {
920: boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
921: StopFilter.setEnablePositionIncrementsDefault(true);
922: try {
923: QueryParser qp = new QueryParser("a", new StopAnalyzer(
924: new String[] { "the", "in", "are", "this" }));
925: qp.setEnablePositionIncrements(true);
926: String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
927: // 0 2 5 7 8
928: int expectedPositions[] = { 1, 3, 4, 6, 9 };
929: PhraseQuery pq = (PhraseQuery) qp.parse(qtxt);
930: //System.out.println("Query text: "+qtxt);
931: //System.out.println("Result: "+pq);
932: Term t[] = pq.getTerms();
933: int pos[] = pq.getPositions();
934: for (int i = 0; i < t.length; i++) {
935: //System.out.println(i+". "+t[i]+" pos: "+pos[i]);
936: assertEquals("term " + i + " = " + t[i]
937: + " has wrong term-position!",
938: expectedPositions[i], pos[i]);
939: }
940: } finally {
941: StopFilter.setEnablePositionIncrementsDefault(dflt);
942: }
943: }
944:
945: public void testMatchAllDocs() throws Exception {
946: QueryParser qp = new QueryParser("field",
947: new WhitespaceAnalyzer());
948: assertEquals(new MatchAllDocsQuery(), qp.parse("*:*"));
949: assertEquals(new MatchAllDocsQuery(), qp.parse("(*:*)"));
950: BooleanQuery bq = (BooleanQuery) qp.parse("+*:* -*:*");
951: assertTrue(bq.getClauses()[0].getQuery() instanceof MatchAllDocsQuery);
952: assertTrue(bq.getClauses()[1].getQuery() instanceof MatchAllDocsQuery);
953: }
954:
955: private void assertHits(int expected, String query, IndexSearcher is)
956: throws ParseException, IOException {
957: QueryParser qp = new QueryParser("date",
958: new WhitespaceAnalyzer());
959: qp.setLocale(Locale.ENGLISH);
960: Query q = qp.parse(query);
961: Hits hits = is.search(q);
962: assertEquals(expected, hits.length());
963: }
964:
965: private static void addDateDoc(String content, int year, int month,
966: int day, int hour, int minute, int second, IndexWriter iw)
967: throws IOException {
968: Document d = new Document();
969: d.add(new Field("f", content, Field.Store.YES,
970: Field.Index.TOKENIZED));
971: Calendar cal = Calendar.getInstance();
972: cal.set(year, month - 1, day, hour, minute, second);
973: d.add(new Field("date", DateField.dateToString(cal.getTime()),
974: Field.Store.YES, Field.Index.UN_TOKENIZED));
975: iw.addDocument(d);
976: }
977:
978: public void tearDown() throws Exception {
979: super.tearDown();
980: BooleanQuery.setMaxClauseCount(originalMaxClauses);
981: }
982:
983: }
|