001: package org.apache.lucene.queryParser;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.Reader;
021:
022: import org.apache.lucene.util.LuceneTestCase;
023:
024: import org.apache.lucene.search.Query;
025: import org.apache.lucene.analysis.Analyzer;
026: import org.apache.lucene.analysis.LowerCaseFilter;
027: import org.apache.lucene.analysis.Token;
028: import org.apache.lucene.analysis.TokenFilter;
029: import org.apache.lucene.analysis.TokenStream;
030: import org.apache.lucene.analysis.standard.StandardTokenizer;
031:
032: /**
033: * Test QueryParser's ability to deal with Analyzers that return more
034: * than one token per position or that return tokens with a position
035: * increment > 1.
036: *
037: * @author Daniel Naber
038: */
039: public class TestMultiAnalyzer extends LuceneTestCase {
040:
041: private static int multiToken = 0;
042:
043: public void testMultiAnalyzer() throws ParseException {
044:
045: QueryParser qp = new QueryParser("", new MultiAnalyzer());
046:
047: // trivial, no multiple tokens:
048: assertEquals("foo", qp.parse("foo").toString());
049: assertEquals("foo", qp.parse("\"foo\"").toString());
050: assertEquals("foo foobar", qp.parse("foo foobar").toString());
051: assertEquals("\"foo foobar\"", qp.parse("\"foo foobar\"")
052: .toString());
053: assertEquals("\"foo foobar blah\"", qp.parse(
054: "\"foo foobar blah\"").toString());
055:
056: // two tokens at the same position:
057: assertEquals("(multi multi2) foo", qp.parse("multi foo")
058: .toString());
059: assertEquals("foo (multi multi2)", qp.parse("foo multi")
060: .toString());
061: assertEquals("(multi multi2) (multi multi2)", qp.parse(
062: "multi multi").toString());
063: assertEquals("+(foo (multi multi2)) +(bar (multi multi2))", qp
064: .parse("+(foo multi) +(bar multi)").toString());
065: assertEquals(
066: "+(foo (multi multi2)) field:\"bar (multi multi2)\"",
067: qp.parse("+(foo multi) field:\"bar multi\"").toString());
068:
069: // phrases:
070: assertEquals("\"(multi multi2) foo\"", qp
071: .parse("\"multi foo\"").toString());
072: assertEquals("\"foo (multi multi2)\"", qp
073: .parse("\"foo multi\"").toString());
074: assertEquals("\"foo (multi multi2) foobar (multi multi2)\"", qp
075: .parse("\"foo multi foobar multi\"").toString());
076:
077: // fields:
078: assertEquals("(field:multi field:multi2) field:foo", qp.parse(
079: "field:multi field:foo").toString());
080: assertEquals("field:\"(multi multi2) foo\"", qp.parse(
081: "field:\"multi foo\"").toString());
082:
083: // three tokens at one position:
084: assertEquals("triplemulti multi3 multi2", qp.parse(
085: "triplemulti").toString());
086: assertEquals("foo (triplemulti multi3 multi2) foobar", qp
087: .parse("foo triplemulti foobar").toString());
088:
089: // phrase with non-default slop:
090: assertEquals("\"(multi multi2) foo\"~10", qp.parse(
091: "\"multi foo\"~10").toString());
092:
093: // phrase with non-default boost:
094: assertEquals("\"(multi multi2) foo\"^2.0", qp.parse(
095: "\"multi foo\"^2").toString());
096:
097: // phrase after changing default slop
098: qp.setPhraseSlop(99);
099: assertEquals("\"(multi multi2) foo\"~99 bar", qp.parse(
100: "\"multi foo\" bar").toString());
101: assertEquals("\"(multi multi2) foo\"~99 \"foo bar\"~2", qp
102: .parse("\"multi foo\" \"foo bar\"~2").toString());
103: qp.setPhraseSlop(0);
104:
105: // non-default operator:
106: qp.setDefaultOperator(QueryParser.AND_OPERATOR);
107: assertEquals("+(multi multi2) +foo", qp.parse("multi foo")
108: .toString());
109:
110: }
111:
112: public void testMultiAnalyzerWithSubclassOfQueryParser()
113: throws ParseException {
114:
115: DumbQueryParser qp = new DumbQueryParser("",
116: new MultiAnalyzer());
117: qp.setPhraseSlop(99); // modified default slop
118:
119: // direct call to (super's) getFieldQuery to demonstrate differnce
120: // between phrase and multiphrase with modified default slop
121: assertEquals("\"foo bar\"~99", qp.getSuperFieldQuery("",
122: "foo bar").toString());
123: assertEquals("\"(multi multi2) bar\"~99", qp
124: .getSuperFieldQuery("", "multi bar").toString());
125:
126: // ask sublcass to parse phrase with modified default slop
127: assertEquals("\"(multi multi2) foo\"~99 bar", qp.parse(
128: "\"multi foo\" bar").toString());
129:
130: }
131:
132: public void testPosIncrementAnalyzer() throws ParseException {
133: QueryParser qp = new QueryParser("", new PosIncrementAnalyzer());
134: assertEquals("quick brown", qp.parse("the quick brown")
135: .toString());
136: assertEquals("\"quick brown\"", qp.parse("\"the quick brown\"")
137: .toString());
138: assertEquals("quick brown fox", qp.parse("the quick brown fox")
139: .toString());
140: assertEquals("\"quick brown fox\"", qp.parse(
141: "\"the quick brown fox\"").toString());
142: }
143:
144: /**
145: * Expands "multi" to "multi" and "multi2", both at the same position,
146: * and expands "triplemulti" to "triplemulti", "multi3", and "multi2".
147: */
148: private class MultiAnalyzer extends Analyzer {
149:
150: public MultiAnalyzer() {
151: }
152:
153: public TokenStream tokenStream(String fieldName, Reader reader) {
154: TokenStream result = new StandardTokenizer(reader);
155: result = new TestFilter(result);
156: result = new LowerCaseFilter(result);
157: return result;
158: }
159: }
160:
161: private final class TestFilter extends TokenFilter {
162:
163: private org.apache.lucene.analysis.Token prevToken;
164:
165: public TestFilter(TokenStream in) {
166: super (in);
167: }
168:
169: public final org.apache.lucene.analysis.Token next()
170: throws java.io.IOException {
171: if (multiToken > 0) {
172: org.apache.lucene.analysis.Token token = new org.apache.lucene.analysis.Token(
173: "multi" + (multiToken + 1), prevToken
174: .startOffset(), prevToken.endOffset(),
175: prevToken.type());
176: token.setPositionIncrement(0);
177: multiToken--;
178: return token;
179: } else {
180: org.apache.lucene.analysis.Token t = input.next();
181: prevToken = t;
182: if (t == null)
183: return null;
184: String text = t.termText();
185: if (text.equals("triplemulti")) {
186: multiToken = 2;
187: return t;
188: } else if (text.equals("multi")) {
189: multiToken = 1;
190: return t;
191: } else {
192: return t;
193: }
194: }
195: }
196: }
197:
198: /**
199: * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1).
200: * Does not work correctly for input other than "the quick brown ...".
201: */
202: private class PosIncrementAnalyzer extends Analyzer {
203:
204: public PosIncrementAnalyzer() {
205: }
206:
207: public TokenStream tokenStream(String fieldName, Reader reader) {
208: TokenStream result = new StandardTokenizer(reader);
209: result = new TestPosIncrementFilter(result);
210: result = new LowerCaseFilter(result);
211: return result;
212: }
213: }
214:
215: private final class TestPosIncrementFilter extends TokenFilter {
216:
217: public TestPosIncrementFilter(TokenStream in) {
218: super (in);
219: }
220:
221: public final org.apache.lucene.analysis.Token next()
222: throws java.io.IOException {
223: for (Token t = input.next(); t != null; t = input.next()) {
224: if (t.termText().equals("the")) {
225: // stopword, do nothing
226: } else if (t.termText().equals("quick")) {
227: org.apache.lucene.analysis.Token token = new org.apache.lucene.analysis.Token(
228: t.termText(), t.startOffset(), t
229: .endOffset(), t.type());
230: token.setPositionIncrement(2);
231: return token;
232: } else {
233: org.apache.lucene.analysis.Token token = new org.apache.lucene.analysis.Token(
234: t.termText(), t.startOffset(), t
235: .endOffset(), t.type());
236: token.setPositionIncrement(1);
237: return token;
238: }
239: }
240: return null;
241: }
242: }
243:
244: /** a very simple subclass of QueryParser */
245: private final static class DumbQueryParser extends QueryParser {
246:
247: public DumbQueryParser(String f, Analyzer a) {
248: super (f, a);
249: }
250:
251: /** expose super's version */
252: public Query getSuperFieldQuery(String f, String t)
253: throws ParseException {
254: return super .getFieldQuery(f, t);
255: }
256:
257: /** wrap super's version */
258: protected Query getFieldQuery(String f, String t)
259: throws ParseException {
260: return new DumbQueryWrapper(getSuperFieldQuery(f, t));
261: }
262: }
263:
264: /**
265: * A very simple wrapper to prevent instanceof checks but uses
266: * the toString of the query it wraps.
267: */
268: private final static class DumbQueryWrapper extends Query {
269:
270: private Query q;
271:
272: public DumbQueryWrapper(Query q) {
273: super ();
274: this .q = q;
275: }
276:
277: public String toString(String f) {
278: return q.toString(f);
279: }
280: }
281:
282: }
|