001: package org.apache.lucene.analysis;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.util.LuceneTestCase;
021:
022: import java.io.StringReader;
023: import java.io.IOException;
024: import java.util.Set;
025: import java.util.HashSet;
026:
027: public class TestStopAnalyzer extends LuceneTestCase {
028:
029: private StopAnalyzer stop = new StopAnalyzer();
030: private Set inValidTokens = new HashSet();
031:
032: public TestStopAnalyzer(String s) {
033: super (s);
034: }
035:
036: protected void setUp() throws Exception {
037: super .setUp();
038: for (int i = 0; i < StopAnalyzer.ENGLISH_STOP_WORDS.length; i++) {
039: inValidTokens.add(StopAnalyzer.ENGLISH_STOP_WORDS[i]);
040: }
041: }
042:
043: public void testDefaults() throws IOException {
044: assertTrue(stop != null);
045: StringReader reader = new StringReader(
046: "This is a test of the english stop analyzer");
047: TokenStream stream = stop.tokenStream("test", reader);
048: assertTrue(stream != null);
049: Token token = null;
050: while ((token = stream.next()) != null) {
051: assertFalse(inValidTokens.contains(token.termText()));
052: }
053: }
054:
055: public void testStopList() throws IOException {
056: Set stopWordsSet = new HashSet();
057: stopWordsSet.add("good");
058: stopWordsSet.add("test");
059: stopWordsSet.add("analyzer");
060: StopAnalyzer newStop = new StopAnalyzer((String[]) stopWordsSet
061: .toArray(new String[3]));
062: StringReader reader = new StringReader(
063: "This is a good test of the english stop analyzer");
064: TokenStream stream = newStop.tokenStream("test", reader);
065: assertNotNull(stream);
066: Token token = null;
067: while ((token = stream.next()) != null) {
068: String text = token.termText();
069: assertFalse(stopWordsSet.contains(text));
070: assertEquals(1, token.getPositionIncrement()); // by default stop tokenizer does not apply increments.
071: }
072: }
073:
074: public void testStopListPositions() throws IOException {
075: boolean defaultEnable = StopFilter
076: .getEnablePositionIncrementsDefault();
077: StopFilter.setEnablePositionIncrementsDefault(true);
078: try {
079: Set stopWordsSet = new HashSet();
080: stopWordsSet.add("good");
081: stopWordsSet.add("test");
082: stopWordsSet.add("analyzer");
083: StopAnalyzer newStop = new StopAnalyzer(
084: (String[]) stopWordsSet.toArray(new String[3]));
085: StringReader reader = new StringReader(
086: "This is a good test of the english stop analyzer with positions");
087: int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1 };
088: TokenStream stream = newStop.tokenStream("test", reader);
089: assertNotNull(stream);
090: Token token = null;
091: int i = 0;
092: while ((token = stream.next()) != null) {
093: String text = token.termText();
094: assertFalse(stopWordsSet.contains(text));
095: assertEquals(expectedIncr[i++], token
096: .getPositionIncrement());
097: }
098: } finally {
099: StopFilter
100: .setEnablePositionIncrementsDefault(defaultEnable);
101: }
102: }
103:
104: }
|