001: package com.quadcap.util.text;
002:
003: /* Copyright 1997 - 2003 Quadcap Software. All rights reserved.
004: *
005: * This software is distributed under the Quadcap Free Software License.
006: * This software may be used or modified for any purpose, personal or
007: * commercial. Open Source redistributions are permitted. Commercial
008: * redistribution of larger works derived from, or works which bundle
009: * this software requires a "Commercial Redistribution License"; see
010: * http://www.quadcap.com/purchase.
011: *
012: * Redistributions qualify as "Open Source" under one of the following terms:
013: *
014: * Redistributions are made at no charge beyond the reasonable cost of
015: * materials and delivery.
016: *
017: * Redistributions are accompanied by a copy of the Source Code or by an
018: * irrevocable offer to provide a copy of the Source Code for up to three
019: * years at the cost of materials and delivery. Such redistributions
020: * must allow further use, modification, and redistribution of the Source
021: * Code under substantially the same terms as this license.
022: *
023: * Redistributions of source code must retain the copyright notices as they
024: * appear in each source code file, these license terms, and the
025: * disclaimer/limitation of liability set forth as paragraph 6 below.
026: *
027: * Redistributions in binary form must reproduce this Copyright Notice,
028: * these license terms, and the disclaimer/limitation of liability set
029: * forth as paragraph 6 below, in the documentation and/or other materials
030: * provided with the distribution.
031: *
032: * The Software is provided on an "AS IS" basis. No warranty is
033: * provided that the Software is free of defects, or fit for a
034: * particular purpose.
035: *
036: * Limitation of Liability. Quadcap Software shall not be liable
037: * for any damages suffered by the Licensee or any third party resulting
038: * from use of the Software.
039: */
040:
041: /**
042: * SOUNDEX Utilities.
043: *
044: * @author Stan Bailes
045: */
046: public class Soundex {
047: static final OctetMap alpha = new OctetMap('a', 'z');
048: static final OctetMap Alpha = new OctetMap('A', 'Z');
049: static {
050: Alpha.include('a', 'z');
051: }
052:
053: /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
054: static final String sMap = "01230120022455012623010202";
055:
056: static final char scode(int c) {
057: if (alpha.has(c)) {
058: return sMap.charAt(c - 'a');
059: } else {
060: return sMap.charAt(c - 'A');
061: }
062: }
063:
064: public static final String soundex(String s) {
065: char[] ret = new char[4];
066: char last = 'x';
067: int pos = 0;
068: for (int i = 0; i < s.length() && pos < 4; i++) {
069: int c = s.charAt(i) & 0xff;
070: if (Alpha.has(c)) {
071: if (pos == 0) {
072: ret[pos++] = Character.toUpperCase((char) c);
073: } else {
074: char code = scode(c);
075: if (code != '0' && code != last) {
076: ret[pos++] = code;
077: last = code;
078: }
079: }
080: }
081: }
082: if (pos == 0)
083: return "";
084: while (pos < 4)
085: ret[pos++] = '0';
086: return new String(ret);
087: }
088:
089: public static final int difference(String a, String b) {
090: String sa = soundex(a);
091: String sb = soundex(b);
092: int diff = 0;
093: for (int i = 0; i < 4; i++) {
094: if (sa.charAt(i) == sb.charAt(i))
095: diff++;
096: }
097: return diff;
098: }
099:
100: //#ifndef RELEASE
101: static String[] data = { "blather", "blabber", "Smith", "Smyth",
102: "abcdefghijklmnopqrstuvwxyz", "a", "b", "bed", "BBD",
103: "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "smithers", "smothers",
104: "brothers" };
105:
106: public static void main(String[] args) {
107: for (int i = 0; i < data.length; i++) {
108: System.out.println(soundex(data[i]) + ": " + data[i]);
109: }
110: }
111: //#endif
112: }
|