001: /*
002: * Copyright 2001-2004 The Apache Software Foundation.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.apache.commons.codec.language;
018:
019: import junit.framework.Test;
020: import junit.framework.TestSuite;
021: import org.apache.commons.codec.StringEncoder;
022: import org.apache.commons.codec.StringEncoderAbstractTest;
023:
024: /**
025: * @version $Id: MetaphoneTest.java,v 1.12 2004/04/19 01:14:29 ggregory Exp $
026: * @author Apache Software Foundation
027: */
028: public class MetaphoneTest extends StringEncoderAbstractTest {
029:
030: public static Test suite() {
031: return (new TestSuite(MetaphoneTest.class));
032: }
033:
034: private Metaphone metaphone = null;
035:
036: public MetaphoneTest(String name) {
037: super (name);
038: }
039:
040: public void assertIsMetaphoneEqual(String source, String[] matches) {
041: // match source to all matches
042: for (int i = 0; i < matches.length; i++) {
043: assertTrue("Source: " + source
044: + ", should have same Metaphone as: " + matches[i],
045: this .getMetaphone().isMetaphoneEqual(source,
046: matches[i]));
047: }
048: // match to each other
049: for (int i = 0; i < matches.length; i++) {
050: for (int j = 0; j < matches.length; j++) {
051: assertTrue(this .getMetaphone().isMetaphoneEqual(
052: matches[i], matches[j]));
053: }
054: }
055: }
056:
057: public void assertMetaphoneEqual(String[][] pairs) {
058: this .validateFixture(pairs);
059: for (int i = 0; i < pairs.length; i++) {
060: String name0 = pairs[i][0];
061: String name1 = pairs[i][1];
062: String failMsg = "Expected match between " + name0
063: + " and " + name1;
064: assertTrue(failMsg, this .getMetaphone().isMetaphoneEqual(
065: name0, name1));
066: assertTrue(failMsg, this .getMetaphone().isMetaphoneEqual(
067: name1, name0));
068: }
069: }
070:
071: /**
072: * @return Returns the metaphone.
073: */
074: private Metaphone getMetaphone() {
075: return this .metaphone;
076: }
077:
078: protected StringEncoder makeEncoder() {
079: return new Metaphone();
080: }
081:
082: /**
083: * @param metaphone
084: * The metaphone to set.
085: */
086: private void setMetaphone(Metaphone metaphone) {
087: this .metaphone = metaphone;
088: }
089:
090: public void setUp() throws Exception {
091: super .setUp();
092: this .setMetaphone(new Metaphone());
093: }
094:
095: public void tearDown() throws Exception {
096: super .tearDown();
097: this .setMetaphone(null);
098: }
099:
100: public void testIsMetaphoneEqual1() {
101: this .assertMetaphoneEqual(new String[][] { { "Case", "case" },
102: { "CASE", "Case" }, { "caSe", "cAsE" },
103: { "quick", "cookie" } });
104: }
105:
106: /**
107: * Matches computed from http://www.lanw.com/java/phonetic/default.htm
108: */
109: public void testIsMetaphoneEqual2() {
110: this .assertMetaphoneEqual(new String[][] {
111: { "Lawrence", "Lorenza" }, { "Gary", "Cahra" }, });
112: }
113:
114: /**
115: * Initial AE case.
116: *
117: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
118: */
119: public void testIsMetaphoneEqualAero() {
120: this .assertIsMetaphoneEqual("Aero", new String[] { "Eure" });
121: }
122:
123: /**
124: * Initial WH case.
125: *
126: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
127: */
128: public void testIsMetaphoneEqualWhite() {
129: this .assertIsMetaphoneEqual("White", new String[] { "Wade",
130: "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit",
131: "Wittie", "Witty", "Wood", "Woodie", "Woody" });
132: }
133:
134: /**
135: * Initial A, not followed by an E case.
136: *
137: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
138: */
139: public void testIsMetaphoneEqualAlbert() {
140: this .assertIsMetaphoneEqual("Albert", new String[] { "Ailbert",
141: "Alberik", "Albert", "Alberto", "Albrecht" });
142: }
143:
144: /**
145: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
146: */
147: public void testIsMetaphoneEqualGary() {
148: this .assertIsMetaphoneEqual("Gary", new String[] { "Cahra",
149: "Cara", "Carey", "Cari", "Caria", "Carie", "Caro",
150: "Carree", "Carri", "Carrie", "Carry", "Cary", "Cora",
151: "Corey", "Cori", "Corie", "Correy", "Corri", "Corrie",
152: "Corry", "Cory", "Gray", "Kara", "Kare", "Karee",
153: "Kari", "Karia", "Karie", "Karrah", "Karrie", "Karry",
154: "Kary", "Keri", "Kerri", "Kerrie", "Kerry", "Kira",
155: "Kiri", "Kora", "Kore", "Kori", "Korie", "Korrie",
156: "Korry" });
157: }
158:
159: /**
160: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
161: */
162: public void testIsMetaphoneEqualJohn() {
163: this .assertIsMetaphoneEqual("John", new String[] { "Gena",
164: "Gene", "Genia", "Genna", "Genni", "Gennie", "Genny",
165: "Giana", "Gianna", "Gina", "Ginni", "Ginnie", "Ginny",
166: "Jaine", "Jan", "Jana", "Jane", "Janey", "Jania",
167: "Janie", "Janna", "Jany", "Jayne", "Jean", "Jeana",
168: "Jeane", "Jeanie", "Jeanna", "Jeanne", "Jeannie",
169: "Jen", "Jena", "Jeni", "Jenn", "Jenna", "Jennee",
170: "Jenni", "Jennie", "Jenny", "Jinny", "Jo Ann",
171: "Jo-Ann", "Jo-Anne", "Joan", "Joana", "Joane",
172: "Joanie", "Joann", "Joanna", "Joanne", "Joeann",
173: "Johna", "Johnna", "Joni", "Jonie", "Juana", "June",
174: "Junia", "Junie" });
175: }
176:
177: /**
178: * Initial KN case.
179: *
180: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
181: */
182: public void testIsMetaphoneEqualKnight() {
183: this .assertIsMetaphoneEqual("Knight", new String[] { "Hynda",
184: "Nada", "Nadia", "Nady", "Nat", "Nata", "Natty",
185: "Neda", "Nedda", "Nedi", "Netta", "Netti", "Nettie",
186: "Netty", "Nita", "Nydia" });
187: }
188:
189: /**
190: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
191: */
192: public void testIsMetaphoneEqualMary() {
193: this .assertIsMetaphoneEqual("Mary", new String[] { "Mair",
194: "Maire", "Mara", "Mareah", "Mari", "Maria", "Marie",
195: "Mary", "Maura", "Maure", "Meara", "Merrie", "Merry",
196: "Mira", "Moira", "Mora", "Moria", "Moyra", "Muire",
197: "Myra", "Myrah" });
198: }
199:
200: /**
201: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
202: */
203: public void testIsMetaphoneEqualParis() {
204: this .assertIsMetaphoneEqual("Paris", new String[] { "Pearcy",
205: "Perris", "Piercy", "Pierz", "Pryse" });
206: }
207:
208: /**
209: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
210: */
211: public void testIsMetaphoneEqualPeter() {
212: this .assertIsMetaphoneEqual("Peter", new String[] { "Peadar",
213: "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter",
214: "Pietro", "Piotr" });
215: }
216:
217: /**
218: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
219: */
220: public void testIsMetaphoneEqualRay() {
221: this .assertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey",
222: "Roi", "Roy", "Ruy" });
223: }
224:
225: /**
226: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
227: */
228: public void testIsMetaphoneEqualSusan() {
229: this .assertIsMetaphoneEqual("Susan", new String[] { "Siusan",
230: "Sosanna", "Susan", "Susana", "Susann", "Susanna",
231: "Susannah", "Susanne", "Suzann", "Suzanna", "Suzanne",
232: "Zuzana" });
233: }
234:
235: /**
236: * Initial WR case.
237: *
238: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
239: */
240: public void testIsMetaphoneEqualWright() {
241: this .assertIsMetaphoneEqual("Wright", new String[] { "Rota",
242: "Rudd", "Ryde" });
243: }
244:
245: /**
246: * Match data computed from http://www.lanw.com/java/phonetic/default.htm
247: */
248: public void testIsMetaphoneEqualXalan() {
249: this .assertIsMetaphoneEqual("Xalan", new String[] { "Celene",
250: "Celina", "Celine", "Selena", "Selene", "Selina",
251: "Seline", "Suellen", "Xylina" });
252: }
253:
254: public void testMetaphone() {
255: assertEquals("HL", this .getMetaphone().metaphone("howl"));
256: assertEquals("TSTN", this .getMetaphone().metaphone("testing"));
257: assertEquals("0", this .getMetaphone().metaphone("The"));
258: assertEquals("KK", this .getMetaphone().metaphone("quick"));
259: assertEquals("BRN", this .getMetaphone().metaphone("brown"));
260: assertEquals("FKS", this .getMetaphone().metaphone("fox"));
261: assertEquals("JMPT", this .getMetaphone().metaphone("jumped"));
262: assertEquals("OFR", this .getMetaphone().metaphone("over"));
263: assertEquals("0", this .getMetaphone().metaphone("the"));
264: assertEquals("LS", this .getMetaphone().metaphone("lazy"));
265: assertEquals("TKS", this .getMetaphone().metaphone("dogs"));
266: }
267:
268: public void testWordEndingInMB() {
269: assertEquals("KM", this .getMetaphone().metaphone("COMB"));
270: assertEquals("TM", this .getMetaphone().metaphone("TOMB"));
271: assertEquals("WM", this .getMetaphone().metaphone("WOMB"));
272: }
273:
274: public void testDiscardOfSCEOrSCIOrSCY() {
275: assertEquals("SNS", this .getMetaphone().metaphone("SCIENCE"));
276: assertEquals("SN", this .getMetaphone().metaphone("SCENE"));
277: assertEquals("S", this .getMetaphone().metaphone("SCY"));
278: }
279:
280: public void testWordsWithCIA() {
281: assertEquals("XP", this .getMetaphone().metaphone("CIAPO"));
282: }
283:
284: public void testTranslateOfSCHAndCH() {
285: assertEquals("SKTL", this .getMetaphone().metaphone("SCHEDULE"));
286: assertEquals("SKMT", this .getMetaphone().metaphone("SCHEMATIC"));
287:
288: assertEquals("KRKT", this .getMetaphone().metaphone("CHARACTER"));
289: assertEquals("TX", this .getMetaphone().metaphone("TEACH"));
290: }
291:
292: public void testTranslateToJOfDGEOrDGIOrDGY() {
293: assertEquals("TJ", this .getMetaphone().metaphone("DODGY"));
294: assertEquals("TJ", this .getMetaphone().metaphone("DODGE"));
295: assertEquals("AJMT", this .getMetaphone().metaphone("ADGIEMTI"));
296: }
297:
298: public void testDiscardOfSilentHAfterG() {
299: assertEquals("KNT", this .getMetaphone().metaphone("GHENT"));
300: assertEquals("B", this .getMetaphone().metaphone("BAUGH"));
301: }
302:
303: public void testDiscardOfSilentGN() {
304: assertEquals("N", this .getMetaphone().metaphone("GNU"));
305: assertEquals("SNT", this .getMetaphone().metaphone("SIGNED"));
306: }
307:
308: public void testPHTOF() {
309: assertEquals("FX", this .getMetaphone().metaphone("PHISH"));
310: }
311:
312: public void testSHAndSIOAndSIAToX() {
313: assertEquals("XT", this .getMetaphone().metaphone("SHOT"));
314: assertEquals("OTXN", this .getMetaphone().metaphone("ODSIAN"));
315: assertEquals("PLXN", this .getMetaphone().metaphone("PULSION"));
316: }
317:
318: public void testTIOAndTIAToX() {
319: assertEquals("OX", this .getMetaphone().metaphone("OTIA"));
320: assertEquals("PRXN", this .getMetaphone().metaphone("PORTION"));
321: }
322:
323: public void testTCH() {
324: assertEquals("RX", this .getMetaphone().metaphone("RETCH"));
325: assertEquals("WX", this .getMetaphone().metaphone("WATCH"));
326: }
327:
328: public void testExceedLength() {
329: // should be AKSKS, but istruncated by Max Code Length
330: assertEquals("AKSK", this .getMetaphone().metaphone("AXEAXE"));
331: }
332:
333: public void testSetMaxLengthWithTruncation() {
334: // should be AKSKS, but istruncated by Max Code Length
335: this .getMetaphone().setMaxCodeLen(6);
336: assertEquals("AKSKSK", this .getMetaphone().metaphone(
337: "AXEAXEAXE"));
338: }
339:
340: public void validateFixture(String[][] pairs) {
341: if (pairs.length == 0) {
342: fail("Test fixture is empty");
343: }
344: for (int i = 0; i < pairs.length; i++) {
345: if (pairs[i].length != 2) {
346: fail("Error in test fixture in the data array at index "
347: + i);
348: }
349: }
350: }
351:
352: }
|