001: package org.apache.lucene.search.spell;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import java.io.IOException;
021:
022: import junit.framework.TestCase;
023:
024: import org.apache.lucene.analysis.SimpleAnalyzer;
025: import org.apache.lucene.document.Document;
026: import org.apache.lucene.document.Field;
027: import org.apache.lucene.index.CorruptIndexException;
028: import org.apache.lucene.index.IndexReader;
029: import org.apache.lucene.index.IndexWriter;
030: import org.apache.lucene.store.Directory;
031: import org.apache.lucene.store.RAMDirectory;
032: import org.apache.lucene.util.English;
033:
034: /**
035: * Spell checker test case
036: *
037: *
038: */
039: public class TestSpellChecker extends TestCase {
040: private SpellChecker spellChecker;
041: private Directory userindex, spellindex;
042:
043: protected void setUp() throws Exception {
044: super .setUp();
045:
046: //create a user index
047: userindex = new RAMDirectory();
048: IndexWriter writer = new IndexWriter(userindex,
049: new SimpleAnalyzer(), true);
050:
051: for (int i = 0; i < 1000; i++) {
052: Document doc = new Document();
053: doc.add(new Field("field1", English.intToEnglish(i),
054: Field.Store.YES, Field.Index.TOKENIZED));
055: doc.add(new Field("field2", English.intToEnglish(i + 1),
056: Field.Store.YES, Field.Index.TOKENIZED)); // + word thousand
057: writer.addDocument(doc);
058: }
059: writer.close();
060:
061: // create the spellChecker
062: spellindex = new RAMDirectory();
063: spellChecker = new SpellChecker(spellindex);
064: }
065:
066: public void testBuild() throws CorruptIndexException, IOException {
067: IndexReader r = IndexReader.open(userindex);
068:
069: spellChecker.clearIndex();
070:
071: addwords(r, "field1");
072: int num_field1 = this .numdoc();
073:
074: addwords(r, "field2");
075: int num_field2 = this .numdoc();
076:
077: assertEquals(num_field2, num_field1 + 1);
078:
079: // test small word
080: String[] similar = spellChecker.suggestSimilar("fvie", 2);
081: assertEquals(1, similar.length);
082: assertEquals(similar[0], "five");
083:
084: similar = spellChecker.suggestSimilar("five", 2);
085: assertEquals(1, similar.length);
086: assertEquals(similar[0], "nine"); // don't suggest a word for itself
087:
088: similar = spellChecker.suggestSimilar("fiv", 2);
089: assertEquals(1, similar.length);
090: assertEquals(similar[0], "five");
091:
092: similar = spellChecker.suggestSimilar("ive", 2);
093: assertEquals(1, similar.length);
094: assertEquals(similar[0], "five");
095:
096: similar = spellChecker.suggestSimilar("fives", 2);
097: assertEquals(1, similar.length);
098: assertEquals(similar[0], "five");
099:
100: similar = spellChecker.suggestSimilar("fie", 2);
101: assertEquals(1, similar.length);
102: assertEquals(similar[0], "five");
103:
104: similar = spellChecker.suggestSimilar("fi", 2);
105: assertEquals(0, similar.length);
106:
107: // test restraint to a field
108: similar = spellChecker.suggestSimilar("tousand", 10, r,
109: "field1", false);
110: assertEquals(0, similar.length); // there isn't the term thousand in the field field1
111:
112: similar = spellChecker.suggestSimilar("tousand", 10, r,
113: "field2", false);
114: assertEquals(1, similar.length); // there is the term thousand in the field field2
115: }
116:
117: private void addwords(IndexReader r, String field)
118: throws IOException {
119: long time = System.currentTimeMillis();
120: spellChecker.indexDictionary(new LuceneDictionary(r, field));
121: time = System.currentTimeMillis() - time;
122: //System.out.println("time to build " + field + ": " + time);
123: }
124:
125: private int numdoc() throws IOException {
126: IndexReader rs = IndexReader.open(spellindex);
127: int num = rs.numDocs();
128: assertTrue(num != 0);
129: //System.out.println("num docs: " + num);
130: rs.close();
131: return num;
132: }
133:
134: }
|