001: package org.apache.lucene.analysis.ngram;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.analysis.Token;
021:
022: import java.io.StringReader;
023:
024: import junit.framework.TestCase;
025:
026: /**
027: * Tests {@link EdgeNGramTokenizer} for correctness.
028: * @author Otis Gospodnetic
029: */
030: public class EdgeNGramTokenizerTest extends TestCase {
031: private StringReader input;
032:
033: public void setUp() {
034: input = new StringReader("abcde");
035: }
036:
037: public void testInvalidInput() throws Exception {
038: boolean gotException = false;
039: try {
040: new EdgeNGramTokenizer(input,
041: EdgeNGramTokenizer.Side.FRONT, 0, 0);
042: } catch (IllegalArgumentException e) {
043: gotException = true;
044: }
045: assertTrue(gotException);
046: }
047:
048: public void testInvalidInput2() throws Exception {
049: boolean gotException = false;
050: try {
051: new EdgeNGramTokenizer(input,
052: EdgeNGramTokenizer.Side.FRONT, 2, 1);
053: } catch (IllegalArgumentException e) {
054: gotException = true;
055: }
056: assertTrue(gotException);
057: }
058:
059: public void testInvalidInput3() throws Exception {
060: boolean gotException = false;
061: try {
062: new EdgeNGramTokenizer(input,
063: EdgeNGramTokenizer.Side.FRONT, -1, 2);
064: } catch (IllegalArgumentException e) {
065: gotException = true;
066: }
067: assertTrue(gotException);
068: }
069:
070: public void testFrontUnigram() throws Exception {
071: EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input,
072: EdgeNGramTokenizer.Side.FRONT, 1, 1);
073: Token token = null;
074: token = tokenizer.next();
075: assertEquals("(a,0,1)", token.toString());
076: token = tokenizer.next();
077: assertNull(token);
078: }
079:
080: public void testBackUnigram() throws Exception {
081: EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input,
082: EdgeNGramTokenizer.Side.BACK, 1, 1);
083: Token token = null;
084: token = tokenizer.next();
085: assertEquals("(e,4,5)", token.toString());
086: token = tokenizer.next();
087: assertNull(token);
088: }
089:
090: public void testOversizedNgrams() throws Exception {
091: EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input,
092: EdgeNGramTokenizer.Side.FRONT, 6, 6);
093: Token token = null;
094: token = tokenizer.next();
095: assertNull(token);
096: }
097:
098: public void testFrontRangeOfNgrams() throws Exception {
099: EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input,
100: EdgeNGramTokenizer.Side.FRONT, 1, 3);
101: Token token = null;
102: token = tokenizer.next();
103: assertEquals("(a,0,1)", token.toString());
104: token = tokenizer.next();
105: assertEquals("(ab,0,2)", token.toString());
106: token = tokenizer.next();
107: assertEquals("(abc,0,3)", token.toString());
108: token = tokenizer.next();
109: assertNull(token);
110: }
111:
112: public void testBackRangeOfNgrams() throws Exception {
113: EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input,
114: EdgeNGramTokenizer.Side.BACK, 1, 3);
115: Token token = null;
116: token = tokenizer.next();
117: assertEquals("(e,4,5)", token.toString());
118: token = tokenizer.next();
119: assertEquals("(de,3,5)", token.toString());
120: token = tokenizer.next();
121: assertEquals("(cde,2,5)", token.toString());
122: token = tokenizer.next();
123: assertNull(token);
124: }
125: }
|