001: package org.apache.lucene.search;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.index.Term;
021: import org.apache.lucene.index.IndexWriter;
022: import org.apache.lucene.queryParser.QueryParser;
023: import org.apache.lucene.search.PhraseQuery;
024: import org.apache.lucene.search.Hits;
025: import org.apache.lucene.search.IndexSearcher;
026: import org.apache.lucene.store.RAMDirectory;
027: import org.apache.lucene.analysis.Analyzer;
028: import org.apache.lucene.analysis.StopAnalyzer;
029: import org.apache.lucene.analysis.StopFilter;
030: import org.apache.lucene.analysis.Token;
031: import org.apache.lucene.analysis.TokenStream;
032: import org.apache.lucene.analysis.WhitespaceAnalyzer;
033: import org.apache.lucene.document.Document;
034: import org.apache.lucene.document.Field;
035:
036: import java.io.Reader;
037: import java.io.StringReader;
038:
039: import org.apache.lucene.util.LuceneTestCase;
040:
041: /**
042: * Term position unit test.
043: *
044: *
045: * @version $Revision: 607591 $
046: */
047: public class TestPositionIncrement extends LuceneTestCase {
048:
049: public void testSetPosition() throws Exception {
050: Analyzer analyzer = new Analyzer() {
051: public TokenStream tokenStream(String fieldName,
052: Reader reader) {
053: return new TokenStream() {
054: private final String[] TOKENS = { "1", "2", "3",
055: "4", "5" };
056: private final int[] INCREMENTS = { 1, 2, 1, 0, 1 };
057: private int i = 0;
058:
059: public Token next() {
060: if (i == TOKENS.length)
061: return null;
062: Token t = new Token(TOKENS[i], i, i);
063: t.setPositionIncrement(INCREMENTS[i]);
064: i++;
065: return t;
066: }
067: };
068: }
069: };
070: RAMDirectory store = new RAMDirectory();
071: IndexWriter writer = new IndexWriter(store, analyzer, true);
072: Document d = new Document();
073: d.add(new Field("field", "bogus", Field.Store.YES,
074: Field.Index.TOKENIZED));
075: writer.addDocument(d);
076: writer.optimize();
077: writer.close();
078:
079: IndexSearcher searcher = new IndexSearcher(store);
080: PhraseQuery q;
081: Hits hits;
082:
083: q = new PhraseQuery();
084: q.add(new Term("field", "1"));
085: q.add(new Term("field", "2"));
086: hits = searcher.search(q);
087: assertEquals(0, hits.length());
088:
089: // same as previous, just specify positions explicitely.
090: q = new PhraseQuery();
091: q.add(new Term("field", "1"), 0);
092: q.add(new Term("field", "2"), 1);
093: hits = searcher.search(q);
094: assertEquals(0, hits.length());
095:
096: // specifying correct positions should find the phrase.
097: q = new PhraseQuery();
098: q.add(new Term("field", "1"), 0);
099: q.add(new Term("field", "2"), 2);
100: hits = searcher.search(q);
101: assertEquals(1, hits.length());
102:
103: q = new PhraseQuery();
104: q.add(new Term("field", "2"));
105: q.add(new Term("field", "3"));
106: hits = searcher.search(q);
107: assertEquals(1, hits.length());
108:
109: q = new PhraseQuery();
110: q.add(new Term("field", "3"));
111: q.add(new Term("field", "4"));
112: hits = searcher.search(q);
113: assertEquals(0, hits.length());
114:
115: // phrase query would find it when correct positions are specified.
116: q = new PhraseQuery();
117: q.add(new Term("field", "3"), 0);
118: q.add(new Term("field", "4"), 0);
119: hits = searcher.search(q);
120: assertEquals(1, hits.length());
121:
122: // phrase query should fail for non existing searched term
123: // even if there exist another searched terms in the same searched position.
124: q = new PhraseQuery();
125: q.add(new Term("field", "3"), 0);
126: q.add(new Term("field", "9"), 0);
127: hits = searcher.search(q);
128: assertEquals(0, hits.length());
129:
130: // multi-phrase query should succed for non existing searched term
131: // because there exist another searched terms in the same searched position.
132: MultiPhraseQuery mq = new MultiPhraseQuery();
133: mq.add(new Term[] { new Term("field", "3"),
134: new Term("field", "9") }, 0);
135: hits = searcher.search(mq);
136: assertEquals(1, hits.length());
137:
138: q = new PhraseQuery();
139: q.add(new Term("field", "2"));
140: q.add(new Term("field", "4"));
141: hits = searcher.search(q);
142: assertEquals(1, hits.length());
143:
144: q = new PhraseQuery();
145: q.add(new Term("field", "3"));
146: q.add(new Term("field", "5"));
147: hits = searcher.search(q);
148: assertEquals(1, hits.length());
149:
150: q = new PhraseQuery();
151: q.add(new Term("field", "4"));
152: q.add(new Term("field", "5"));
153: hits = searcher.search(q);
154: assertEquals(1, hits.length());
155:
156: q = new PhraseQuery();
157: q.add(new Term("field", "2"));
158: q.add(new Term("field", "5"));
159: hits = searcher.search(q);
160: assertEquals(0, hits.length());
161:
162: // analyzer to introduce stopwords and increment gaps
163: Analyzer stpa = new Analyzer() {
164: final WhitespaceAnalyzer a = new WhitespaceAnalyzer();
165:
166: public TokenStream tokenStream(String fieldName,
167: Reader reader) {
168: TokenStream ts = a.tokenStream(fieldName, reader);
169: return new StopFilter(ts, new String[] { "stop" });
170: }
171: };
172:
173: // should not find "1 2" because there is a gap of 1 in the index
174: QueryParser qp = new QueryParser("field", stpa);
175: q = (PhraseQuery) qp.parse("\"1 2\"");
176: hits = searcher.search(q);
177: assertEquals(0, hits.length());
178:
179: // omitted stop word cannot help because stop filter swallows the increments.
180: q = (PhraseQuery) qp.parse("\"1 stop 2\"");
181: hits = searcher.search(q);
182: assertEquals(0, hits.length());
183:
184: // query parser alone won't help, because stop filter swallows the increments.
185: qp.setEnablePositionIncrements(true);
186: q = (PhraseQuery) qp.parse("\"1 stop 2\"");
187: hits = searcher.search(q);
188: assertEquals(0, hits.length());
189:
190: boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
191: try {
192: // stop filter alone won't help, because query parser swallows the increments.
193: qp.setEnablePositionIncrements(false);
194: StopFilter.setEnablePositionIncrementsDefault(true);
195: q = (PhraseQuery) qp.parse("\"1 stop 2\"");
196: hits = searcher.search(q);
197: assertEquals(0, hits.length());
198:
199: // when both qp qnd stopFilter propagate increments, we should find the doc.
200: qp.setEnablePositionIncrements(true);
201: q = (PhraseQuery) qp.parse("\"1 stop 2\"");
202: hits = searcher.search(q);
203: assertEquals(1, hits.length());
204: } finally {
205: StopFilter.setEnablePositionIncrementsDefault(dflt);
206: }
207: }
208:
209: /**
210: * Basic analyzer behavior should be to keep sequential terms in one
211: * increment from one another.
212: */
213: public void testIncrementingPositions() throws Exception {
214: Analyzer analyzer = new WhitespaceAnalyzer();
215: TokenStream ts = analyzer.tokenStream("field",
216: new StringReader("one two three four five"));
217:
218: while (true) {
219: Token token = ts.next();
220: if (token == null)
221: break;
222: assertEquals(token.termText(), 1, token
223: .getPositionIncrement());
224: }
225: }
226: }
|