001: /*
002: * $Id: SoundexFunction.java,v 1.4 2005/03/03 02:22:00 ahimanikya Exp $
003: * =======================================================================
004: * Copyright (c) 2002-2005 Axion Development Team. All rights reserved.
005: *
006: * Redistribution and use in source and binary forms, with or without
007: * modification, are permitted provided that the following conditions
008: * are met:
009: *
010: * 1. Redistributions of source code must retain the above
011: * copyright notice, this list of conditions and the following
012: * disclaimer.
013: *
014: * 2. Redistributions in binary form must reproduce the above copyright
015: * notice, this list of conditions and the following disclaimer in
016: * the documentation and/or other materials provided with the
017: * distribution.
018: *
019: * 3. The names "Tigris", "Axion", nor the names of its contributors may
020: * not be used to endorse or promote products derived from this
021: * software without specific prior written permission.
022: *
023: * 4. Products derived from this software may not be called "Axion", nor
024: * may "Tigris" or "Axion" appear in their names without specific prior
025: * written permission.
026: *
027: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
028: * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
029: * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
030: * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
031: * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
032: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
033: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
034: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
035: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
036: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
037: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
038: * =======================================================================
039: */
040:
041: package org.axiondb.functions;
042:
043: import org.axiondb.AxionException;
044: import org.axiondb.DataType;
045: import org.axiondb.FunctionFactory;
046: import org.axiondb.RowDecorator;
047: import org.axiondb.types.StringType;
048:
049: /**
050: * @version $Revision: 1.4 $ $Date: 2005/03/03 02:22:00 $
051: * @author Sudhendra Seshachala
052: * @author Ahimanikya Satapathy
053: */
054: public class SoundexFunction extends BaseFunction implements
055: ScalarFunction, FunctionFactory {
056:
057: /** Creates a new instance of Class */
058: public SoundexFunction() {
059: super ("SOUNDEX");
060: }
061:
062: /** Creates a new instance of Class */
063: public SoundexFunction(String name) {
064: super (name);
065: }
066:
067: public ConcreteFunction makeNewInstance() {
068: return new SoundexFunction();
069: }
070:
071: /** {@link StringType} */
072: public DataType getDataType() {
073: return RETURN_TYPE;
074: }
075:
076: public Object evaluate(RowDecorator row) throws AxionException {
077: Object val = getArgument(0).evaluate(row);
078: String intVal = (String) (ARG_TYPE.convert(val));
079: return soundex(intVal);
080: }
081:
082: public boolean isValid() {
083: return (getArgumentCount() == 1);
084: }
085:
086: /**
087: * Returns a four character code representing the sound of the given
088: * <code>String</code>. Non-ASCCI characters in the input <code>String</code> are
089: * ignored.
090: * <p>
091: * This method was rewritten for Axion to comply with the description at <a
092: * href="http://www.nara.gov/genealogy/coding.html">
093: * http://www.nara.gov/genealogy/coding.html </a>.
094: * <p>
095: *
096: * @param s the <code>String</code> for which to calculate the 4 character
097: * <code>SOUNDEX</code> value
098: * @return the 4 character <code>SOUNDEX</code> value for the given
099: * <code>String</code>
100: */
101: public static String soundex(String s) {
102:
103: if (s == null) {
104: return s;
105: }
106:
107: s = s.toUpperCase();
108:
109: int len = s.length();
110: char b[] = new char[] { '0', '0', '0', '0' };
111: char lastdigit = '0';
112:
113: for (int i = 0, j = 0; i < len && j < 4; i++) {
114: char c = s.charAt(i);
115: char newdigit;
116:
117: if ("AEIOUY".indexOf(c) != -1) {
118: newdigit = '7';
119: } else if (c == 'H' || c == 'W') {
120: newdigit = '8';
121: } else if ("BFPV".indexOf(c) != -1) {
122: newdigit = '1';
123: } else if ("CGJKQSXZ".indexOf(c) != -1) {
124: newdigit = '2';
125: } else if (c == 'D' || c == 'T') {
126: newdigit = '3';
127: } else if (c == 'L') {
128: newdigit = '4';
129: } else if (c == 'M' || c == 'N') {
130: newdigit = '5';
131: } else if (c == 'R') {
132: newdigit = '6';
133: } else {
134: continue;
135: }
136:
137: if (j == 0) {
138: b[j++] = c;
139: lastdigit = newdigit;
140: } else if (newdigit <= '6') {
141: if (newdigit != lastdigit) {
142: b[j++] = newdigit;
143: lastdigit = newdigit;
144: }
145: } else if (newdigit == '7') {
146: lastdigit = newdigit;
147: }
148: }
149:
150: return new String(b, 0, 4);
151: }
152:
153: private static final DataType ARG_TYPE = new StringType();
154: private static final DataType RETURN_TYPE = new StringType();
155: }
|