001: /*
002: * @(#)$Id: EditDistance.java,v 1.1.4.2 2007/05/31 22:00:01 ofung Exp $
003: */
004:
005: /*
006: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
007: *
008: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
009: *
010: * The contents of this file are subject to the terms of either the GNU
011: * General Public License Version 2 only ("GPL") or the Common Development
012: * and Distribution License("CDDL") (collectively, the "License"). You
013: * may not use this file except in compliance with the License. You can obtain
014: * a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html
015: * or glassfish/bootstrap/legal/LICENSE.txt. See the License for the specific
016: * language governing permissions and limitations under the License.
017: *
018: * When distributing the software, include this License Header Notice in each
019: * file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.
020: * Sun designates this particular file as subject to the "Classpath" exception
021: * as provided by Sun in the GPL Version 2 section of the License file that
022: * accompanied this code. If applicable, add the following below the License
023: * Header, with the fields enclosed by brackets [] replaced by your own
024: * identifying information: "Portions Copyrighted [year]
025: * [name of copyright owner]"
026: *
027: * Contributor(s):
028: *
029: * If you wish your version of this file to be governed by only the CDDL or
030: * only the GPL Version 2, indicate your decision by adding "[Contributor]
031: * elects to include this software in this distribution under the [CDDL or GPL
032: * Version 2] license." If you don't indicate a single choice of license, a
033: * recipient has the option to distribute your version of this file under
034: * either the CDDL, the GPL Version 2 or to extend the choice of license to
035: * its licensees as provided above. However, if you add GPL Version 2 code
036: * and therefore, elected the GPL Version 2 license, then the option applies
037: * only if the new code is made subject to such option by the copyright
038: * holder.
039: */
040: package com.sun.xml.bind.v2.util;
041:
042: import java.util.Collection;
043: import java.util.Arrays;
044:
045: /**
046: * Computes the string edit distance.
047: *
048: * <p>
049: * Refer to a computer science text book for the definition
050: * of the "string edit distance".
051: *
052: * @author
053: * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
054: */
055: public class EditDistance {
056:
057: /**
058: * Computes the edit distance between two strings.
059: *
060: * <p>
061: * The complexity is O(nm) where n=a.length() and m=b.length().
062: */
063: public static int editDistance(String a, String b) {
064: return new EditDistance(a, b).calc();
065: }
066:
067: /**
068: * Finds the string in the <code>group</code> closest to
069: * <code>key</code> and returns it.
070: *
071: * @return null if group.length==0.
072: */
073: public static String findNearest(String key, String[] group) {
074: return findNearest(key, Arrays.asList(group));
075: }
076:
077: /**
078: * Finds the string in the <code>group</code> closest to
079: * <code>key</code> and returns it.
080: *
081: * @return null if group.length==0.
082: */
083: public static String findNearest(String key,
084: Collection<String> group) {
085: int c = Integer.MAX_VALUE;
086: String r = null;
087:
088: for (String s : group) {
089: int ed = editDistance(key, s);
090: if (c > ed) {
091: c = ed;
092: r = s;
093: }
094: }
095: return r;
096: }
097:
098: /** cost vector. */
099: private int[] cost;
100: /** back buffer. */
101: private int[] back;
102:
103: /** Two strings to be compared. */
104: private final String a, b;
105:
106: private EditDistance(String a, String b) {
107: this .a = a;
108: this .b = b;
109: cost = new int[a.length() + 1];
110: back = new int[a.length() + 1]; // back buffer
111:
112: for (int i = 0; i <= a.length(); i++)
113: cost[i] = i;
114: }
115:
116: /**
117: * Swaps two buffers.
118: */
119: private void flip() {
120: int[] t = cost;
121: cost = back;
122: back = t;
123: }
124:
125: private int min(int a, int b, int c) {
126: return Math.min(a, Math.min(b, c));
127: }
128:
129: private int calc() {
130: for (int j = 0; j < b.length(); j++) {
131: flip();
132: cost[0] = j + 1;
133: for (int i = 0; i < a.length(); i++) {
134: int match = (a.charAt(i) == b.charAt(j)) ? 0 : 1;
135: cost[i + 1] = min(back[i] + match, cost[i] + 1,
136: back[i + 1] + 1);
137: }
138: }
139: return cost[a.length()];
140: }
141: }
|