001: package org.apache.lucene.search.spell;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: /**
021: * Edit distance class
022: */
023: final class TRStringDistance {
024:
025: final char[] sa;
026: final int n;
027: final int[][][] cache = new int[30][][];
028:
029: /**
030: * Optimized to run a bit faster than the static getDistance().
031: * In one benchmark times were 5.3sec using ctr vs 8.5sec w/ static method, thus 37% faster.
032: */
033: public TRStringDistance(String target) {
034: sa = target.toCharArray();
035: n = sa.length;
036: }
037:
038: //*****************************
039: // Compute Levenshtein distance
040: //*****************************
041: public final int getDistance(String other) {
042: int d[][]; // matrix
043: int cost; // cost
044:
045: // Step 1
046: final char[] ta = other.toCharArray();
047: final int m = ta.length;
048: if (n == 0) {
049: return m;
050: }
051: if (m == 0) {
052: return n;
053: }
054:
055: if (m >= cache.length) {
056: d = form(n, m);
057: } else if (cache[m] != null) {
058: d = cache[m];
059: } else {
060: d = cache[m] = form(n, m);
061:
062: // Step 3
063:
064: }
065: for (int i = 1; i <= n; i++) {
066: final char s_i = sa[i - 1];
067:
068: // Step 4
069:
070: for (int j = 1; j <= m; j++) {
071: final char t_j = ta[j - 1];
072:
073: // Step 5
074:
075: if (s_i == t_j) { // same
076: cost = 0;
077: } else { // not a match
078: cost = 1;
079:
080: // Step 6
081:
082: }
083: d[i][j] = min3(d[i - 1][j] + 1, d[i][j - 1] + 1,
084: d[i - 1][j - 1] + cost);
085:
086: }
087:
088: }
089:
090: // Step 7
091: return d[n][m];
092:
093: }
094:
095: /**
096: *
097: */
098: private static int[][] form(int n, int m) {
099: int[][] d = new int[n + 1][m + 1];
100: // Step 2
101:
102: for (int i = 0; i <= n; i++) {
103: d[i][0] = i;
104:
105: }
106: for (int j = 0; j <= m; j++) {
107: d[0][j] = j;
108: }
109: return d;
110: }
111:
112: //****************************
113: // Get minimum of three values
114: //****************************
115: private static int min3(int a, int b, int c) {
116: int mi = a;
117: if (b < mi) {
118: mi = b;
119: }
120: if (c < mi) {
121: mi = c;
122: }
123: return mi;
124:
125: }
126: }
|