001: /*******************************************************************************
002: * Copyright (c) 2000, 2006 IBM Corporation and others.
003: * All rights reserved. This program and the accompanying materials
004: * are made available under the terms of the Eclipse Public License v1.0
005: * which accompanies this distribution, and is available at
006: * http://www.eclipse.org/legal/epl-v10.html
007: *
008: * Contributors:
009: * IBM Corporation - initial API and implementation
010: *******************************************************************************/package org.eclipse.ui.internal.texteditor.quickdiff.compare.rangedifferencer;
011:
012: import java.util.LinkedList;
013: import java.util.List;
014:
015: import org.eclipse.core.runtime.Assert;
016: import org.eclipse.core.runtime.IProgressMonitor;
017: import org.eclipse.core.runtime.NullProgressMonitor;
018:
019: /**
020: * Levenshtein distance and edit script computation using a dynamic programming algorithm.
021: * The algorithm is O(n*m) in time where n and m are the number of elements in
022: * the two ranges to compare. It does not implement the greedy Ukkonen algorithm.
023: *
024: * @since 3.1
025: */
026: public final class Levenshtein {
027: /* debug output */
028: private static final boolean DEBUG = false;
029: private static final boolean MATRIX = false;
030:
031: /*
032: * asserts - enable conditional compilation by not making this a trace option.
033: * @since 3.2
034: */
035: private static final boolean ASSERT = false;
036:
037: /* edit cost constants */
038: private static final int COST_DELETE = 1;
039: private static final int COST_INSERT = 1;
040: private static final int COST_CHANGE = 1;
041: private static final int SKIP = Integer.MAX_VALUE;
042:
043: private static final RangeDifference[] EMPTY_DIFFERENCES = new RangeDifference[0];
044: private static final int NO_DISTANCE = 0;
045:
046: private interface CellComputer {
047: int computeCell(int row, int col);
048: }
049:
050: private final class DefaultCellComputer implements CellComputer {
051: public int computeCell(int row, int column) {
052: if (row == fRowStart)
053: return computeNullRow(column);
054: else if (column == fColStart)
055: return computeNullColumn(row);
056: else
057: return computeInnerCell(row, column);
058: }
059:
060: private int computeNullRow(int column) {
061: // initialize first row, [0,i] = i if it is valid
062: return Math.abs(column - fColStart);
063: }
064:
065: private int computeNullColumn(int row) {
066: // initialize first column
067: return Math.abs(row - fRowStart);
068: }
069:
070: private int computeInnerCell(int row, int col) {
071: int fromAbove = sum(getAt(row - fStep, col), COST_INSERT);
072: int fromLeft = sum(getAt(row, col - fStep), COST_DELETE);
073: int minDiag = getAt(row - fStep, col - fStep);
074:
075: int minCellValue = Math.min(Math.min(fromAbove, fromLeft),
076: minDiag);
077: int minCost = minCost(row, col, minCellValue);
078:
079: if (minCellValue == fromAbove || minCellValue == fromLeft)
080: return minCellValue;
081:
082: if (ASSERT)
083: Assert.isTrue(minCellValue == minDiag
084: && fromAbove >= minDiag && fromLeft >= minDiag);
085:
086: int nextCharCost = rangesEqual(row, col) ? 0 : COST_CHANGE;
087: minCost = sum(minCost, nextCharCost);
088: int cost = minDiag + nextCharCost;
089: return cost;
090: }
091: }
092:
093: /**
094: * Reduces the needed comparisons - can not be used for hirschberg as we
095: * don't have global values there.
096: */
097: private final class OptimizedCellComputer implements CellComputer {
098: public int computeCell(int row, int column) {
099: if (row == fRowStart)
100: return computeNullRow(column);
101: else if (column == fColStart)
102: return computeNullColumn(row);
103: else
104: return computeInnerCell(row, column);
105: }
106:
107: private int computeNullRow(int column) {
108: // initialize first row, [0,i] = i if it is valid
109: if (minCost(fRowStart, column, Math.abs(column - fColStart)) > fMaxCost)
110: return Levenshtein.SKIP;
111: return Math.abs(column - fColStart);
112: }
113:
114: private int computeNullColumn(int row) {
115: // initialize first column
116: if (minCost(row, fColStart, Math.abs(row - fRowStart)) > fMaxCost)
117: return Levenshtein.SKIP;
118: return Math.abs(row - fRowStart);
119: }
120:
121: private int computeInnerCell(int row, int col) {
122: int fromAbove = sum(getAt(row - fStep, col),
123: Levenshtein.COST_INSERT);
124: int fromLeft = sum(getAt(row, col - fStep),
125: Levenshtein.COST_DELETE);
126: int minDiag = getAt(row - fStep, col - fStep);
127:
128: int minCellValue = Math.min(Math.min(fromAbove, fromLeft),
129: minDiag);
130: int minCost = minCost(row, col, minCellValue);
131:
132: if (minCost > fMaxCost) {
133: return Levenshtein.SKIP;
134: } else if (minCellValue == fromAbove
135: || minCellValue == fromLeft) {
136: return minCellValue;
137: } else {
138: if (ASSERT)
139: Assert.isTrue(minCellValue == minDiag
140: && fromAbove >= minDiag
141: && fromLeft >= minDiag);
142:
143: int nextCharCost = rangesEqual(row, col) ? 0
144: : Levenshtein.COST_CHANGE;
145: minCost = Levenshtein.sum(minCost, nextCharCost);
146: if (minCost > fMaxCost)
147: return Levenshtein.SKIP;
148: int cost = minDiag + nextCharCost;
149: fMaxCost = Math.min(fMaxCost, maxCost(row, col, cost));
150: return cost;
151: }
152: }
153: }
154:
155: /* the domain ranges we compare */
156: private IRangeComparator fLeft;
157: private IRangeComparator fRight;
158: private IProgressMonitor fProgressMonitor;
159:
160: /* algorithmic variables - may or may not be used by a method, always
161: * nulled out by clear().
162: */
163: /* package visible for testing */
164:
165: /** The matrix for full blown N * M edit distance and script computation. */
166: int[][] fMatrix;
167: /** The two columns for dynamic algorithms - the last row. */
168: int[] fPreviousRow;
169: /** The two columns for dynamic algorithms - the current row. */
170: int[] fCurrentRow;
171: /** Used by hirschberg's algorithm to store the result of one run. */
172: private int[] fResultRow;
173: /** Direction of the matrix calculation - either <code>1</code> or <code>-1</code>. */
174: private int fStep;
175: /** The current row of the dynamic matrix calculation. */
176: private int fRow;
177: /** The first row of the matrix to calculate. */
178: private int fRowStart;
179: /** The last (inclusive) row of the matrix. */
180: private int fRowEnd;
181: /** The first column of the matrix to calculate. */
182: private int fColStart;
183: /** The last (inclusive) column of the matrix. */
184: private int fColEnd;
185: /**
186: * Maximum cost of the remaining computation given the current state of the
187: * computation. For edit distance calculation, this may be used to prune
188: * impossible cells.
189: */
190: private int fMaxCost;
191:
192: /* statistics collection */
193: /** Keeps track of the number of needed comparisons. */
194: private long fComparisons;
195:
196: /**
197: * For each row, Hirschberg stores the column of the optimal alignment
198: * in the next row of the matrix.
199: */
200: private int[] fOptimalSplitColumn;
201: /**
202: * For each row, this stores whether the domain elements at the [row,column]
203: * returned equality, where column is the value in <code>fOptimalSplitColumn</code>.
204: */
205: private boolean[] fOptimalSplitValues;
206: /** List of differences computed by the walkback methods. */
207: private List fDiffs;
208:
209: /** Normal matrix cell computer. */
210: final CellComputer fStandardCC = new DefaultCellComputer();
211: /** Optimized cell computer for that prunes impossible cells. */
212: final CellComputer fOptimizedCC = new OptimizedCellComputer();
213: /** The current cell computer. */
214: CellComputer fCellComputer = fStandardCC;
215:
216: /**
217: * Convenience method to compute the edit script between two range
218: * comparators, see <code>RangeDifferencer</code>.
219: *
220: * @param left the left hand side domain range
221: * @param right the right hand side domain range
222: * @return the edit script from left to right
223: */
224: public static RangeDifference[] findDifferences(
225: IRangeComparator left, IRangeComparator right) {
226: Levenshtein levenshtein = new Levenshtein(left, right);
227: return levenshtein.editScriptHirschberg();
228: }
229:
230: /**
231: * Convenience method to compute the edit script between two range
232: * comparators, see <code>RangeDifferencer</code>.
233: *
234: * @param pm a progress monitor, or <code>null</code> if no progress should be reported
235: * @param left the left hand side domain range
236: * @param right the right hand side domain range
237: * @return the edit script from left to right
238: */
239: public static RangeDifference[] findDifferences(
240: IProgressMonitor pm, IRangeComparator left,
241: IRangeComparator right) {
242: Levenshtein levenshtein = new Levenshtein(pm, left, right);
243: return levenshtein.editScriptHirschberg();
244: }
245:
246: /**
247: * Create a new differ that operates on the two domain range comparators
248: * given.
249: *
250: * @param left the left domain range
251: * @param right the right domain range
252: */
253: public Levenshtein(IRangeComparator left, IRangeComparator right) {
254: this (null, left, right);
255: }
256:
257: /**
258: * Create a new differ that operates on the two domain range comparators
259: * given.
260: *
261: * @param pm a progress monitor, or <code>null</code> if no progress should be reported
262: * @param left the left domain range
263: * @param right the right domain range
264: */
265: public Levenshtein(IProgressMonitor pm, IRangeComparator left,
266: IRangeComparator right) {
267: if (left == null || right == null)
268: throw new NullPointerException();
269: fLeft = left;
270: fRight = right;
271: if (pm != null)
272: fProgressMonitor = pm;
273: else
274: fProgressMonitor = new NullProgressMonitor();
275: }
276:
277: /**
278: * Computes the edit distance.
279: *
280: * @return the edit distance of the two range comparators
281: */
282: public int editDistance() {
283: try {
284: fCellComputer = fOptimizedCC;
285: initRows();
286:
287: if (MATRIX)
288: printHeader(fLeft, fRight);
289:
290: internalEditDistance(1, fRight.getRangeCount(), 1, fLeft
291: .getRangeCount());
292:
293: if (fProgressMonitor.isCanceled())
294: return NO_DISTANCE;
295:
296: if (DEBUG)
297: System.out.println("" + fComparisons + " comparisons"); //$NON-NLS-1$//$NON-NLS-2$
298:
299: return getAt(fRowEnd, fColEnd);
300: } finally {
301: clear();
302: }
303: }
304:
305: /**
306: * Computes the edit script. This is quadratic in space and time.
307: *
308: * @return the shortest edit script between the two range comparators
309: */
310: public RangeDifference[] editScript() {
311: try {
312: fCellComputer = fOptimizedCC;
313: initMatrix();
314:
315: if (MATRIX)
316: printHeader(fLeft, fRight);
317:
318: // build the matrix
319: internalEditDistance(1, fRight.getRangeCount(), 1, fLeft
320: .getRangeCount());
321:
322: if (fProgressMonitor.isCanceled())
323: return EMPTY_DIFFERENCES;
324:
325: if (DEBUG)
326: System.out.println("" + fComparisons + " comparisons"); //$NON-NLS-1$//$NON-NLS-2$
327:
328: RangeDifference[] script = walkback();
329:
330: return script;
331:
332: } finally {
333: clear();
334: }
335: }
336:
337: /**
338: * Computes the edit script. This is quadratic in time but linear in space;
339: * it is about twice as slow as the <code>editScript</code> method.
340: *
341: * @return the shortest edit script between the two range comparators
342: */
343: public RangeDifference[] editScriptHirschberg() {
344: try {
345: fCellComputer = fStandardCC;
346:
347: initRows();
348: fResultRow = new int[fCurrentRow.length];
349: fOptimalSplitColumn = new int[fRight.getRangeCount() + 1];
350: fOptimalSplitValues = new boolean[fRight.getRangeCount() + 1];
351:
352: hirschberg(1, fRight.getRangeCount(), 1, fLeft
353: .getRangeCount());
354:
355: if (fProgressMonitor.isCanceled())
356: return EMPTY_DIFFERENCES;
357:
358: if (DEBUG)
359: System.out.println("" + fComparisons + " comparisons"); //$NON-NLS-1$//$NON-NLS-2$
360:
361: RangeDifference[] script = buildDifferencesHirschberg();
362:
363: return script;
364:
365: } finally {
366: clear();
367: }
368: }
369:
370: public int editDistanceHirschberg() {
371: try {
372: fCellComputer = fStandardCC;
373:
374: initRows();
375: fResultRow = new int[fLeft.getRangeCount() + 1];
376: fOptimalSplitColumn = new int[fRight.getRangeCount() + 1];
377: fOptimalSplitValues = new boolean[fRight.getRangeCount() + 1];
378:
379: int dist = hirschberg(1, fRight.getRangeCount(), 1, fLeft
380: .getRangeCount());
381:
382: if (fProgressMonitor.isCanceled())
383: return NO_DISTANCE;
384:
385: if (DEBUG)
386: System.out.println("" + fComparisons + " comparisons"); //$NON-NLS-1$//$NON-NLS-2$
387:
388: return dist;
389:
390: } finally {
391: clear();
392: }
393: }
394:
395: void initMatrix() {
396: initMatrix(fRight.getRangeCount() + 1,
397: fLeft.getRangeCount() + 1);
398: }
399:
400: void initMatrix(int rows, int columns) {
401: if (fMatrix == null || fMatrix.length < rows
402: || fMatrix[0].length < columns)
403: fMatrix = new int[rows][columns];
404: }
405:
406: void initRows() {
407: initRows(fLeft.getRangeCount() + 1);
408: }
409:
410: void initRows(int columns) {
411: if (fCurrentRow == null || fCurrentRow.length < columns)
412: fCurrentRow = new int[columns];
413: if (fPreviousRow == null || fPreviousRow.length < columns)
414: fPreviousRow = new int[columns];
415: }
416:
417: /*
418: * Fill the matrix, but do not allocate it.
419: */
420: void internalEditDistance(int rStart, int rEnd, int lStart, int lEnd) {
421:
422: if (ASSERT)
423: Assert.isTrue(rStart <= rEnd + 1);
424: if (ASSERT)
425: Assert.isTrue(lStart <= lEnd + 1);
426:
427: // build the matrix
428: fStep = 1;
429: fRowStart = rStart - fStep;
430: fRowEnd = rEnd;
431:
432: fColStart = lStart - fStep;
433: fColEnd = lEnd;
434:
435: fMaxCost = maxCost(fRowStart, fColStart, 0);
436:
437: for (fRow = fRowStart; fRow <= fRowEnd; fRow += fStep) { // for every row
438:
439: if (fProgressMonitor.isCanceled())
440: return;
441:
442: fProgressMonitor.worked(1);
443:
444: for (int col = fColStart; col <= fColEnd; col += fStep) { // for every column
445:
446: setAt(fRow, col, fCellComputer.computeCell(fRow, col));
447: }
448:
449: if (MATRIX)
450: printRow();
451:
452: swapRows();
453: }
454: }
455:
456: /*
457: * Fill the matrix, but do not allocate it.
458: */
459: void internalReverseEditDistance(int rStart, int rEnd, int lStart,
460: int lEnd) {
461:
462: if (ASSERT)
463: Assert.isTrue(rStart <= rEnd + 1);
464: if (ASSERT)
465: Assert.isTrue(lStart <= lEnd + 1);
466:
467: // build the matrix
468: fStep = -1;
469: fRowStart = rEnd - fStep;
470: fRowEnd = rStart;
471:
472: fColStart = lEnd - fStep;
473: fColEnd = lStart;
474:
475: fMaxCost = maxCost(fRowStart, fColStart, 0);
476:
477: for (fRow = fRowStart; fRow >= fRowEnd; fRow += fStep) { // for every row
478:
479: if (fProgressMonitor.isCanceled())
480: return;
481:
482: fProgressMonitor.worked(1);
483:
484: for (int col = fColStart; col >= fColEnd; col += fStep) { // for every column
485:
486: setAt(fRow, col, fCellComputer.computeCell(fRow, col));
487: }
488:
489: if (MATRIX)
490: printRow();
491:
492: swapRows();
493: }
494: }
495:
496: private void swapRows() {
497: int[] tmp;
498: tmp = fPreviousRow;
499: fPreviousRow = fCurrentRow;
500: fCurrentRow = tmp;
501: }
502:
503: private void clear() {
504: fPreviousRow = null;
505: fCurrentRow = null;
506: fMatrix = null;
507: fDiffs = null;
508: fResultRow = null;
509: fOptimalSplitColumn = null;
510: fOptimalSplitValues = null;
511: }
512:
513: /* access methods for the compare algorithm */
514:
515: /**
516: * Returns the matrix value for [row, column]. Note that not the entire
517: * matrix may be available at all times.
518: *
519: * @param row the row (right domain index)
520: * @param column (left domain index)
521: * @return the matrix value for the given row and column
522: */
523: int getAt(int row, int column) {
524:
525: // shift reverse iteration towards left by one
526: if (fStep < 0)
527: column--;
528:
529: if (fMatrix != null)
530: return fMatrix[row][column];
531:
532: if (row == fRow)
533: return fCurrentRow[column];
534:
535: if (ASSERT
536: && !(row == fRow - fStep && ((fStep > 0
537: && row >= fRowStart && row <= fRowEnd) || fStep < 0
538: && row <= fRowStart && row >= fRowEnd))) {
539: Assert.isTrue(false, "random access to matrix not allowed"); //$NON-NLS-1$
540: return SKIP; // dummy
541: }
542:
543: return fPreviousRow[column];
544: }
545:
546: /**
547: * Sets the matrix value at [row, column]. Note that not the entire
548: * matrix may be available at all times.
549: *
550: * @param row the row (right domain index)
551: * @param column (left domain index)
552: * @param value the value to set
553: */
554: private void setAt(int row, int column, int value) {
555:
556: // shift reverse iteration towards left by one
557: if (fStep < 0)
558: column--;
559:
560: if (fMatrix != null) {
561: fMatrix[row][column] = value;
562: } else {
563: if (row == fRow)
564: fCurrentRow[column] = value;
565: else if (ASSERT
566: && !(row == fRow - fStep && ((fStep > 0
567: && row >= fRowStart && row <= fRowEnd) || fStep < 0
568: && row <= fRowStart && row >= fRowEnd)))
569: Assert.isTrue(false,
570: "random access to matrix not allowed"); //$NON-NLS-1$
571: else
572: fPreviousRow[column] = value;
573: }
574: }
575:
576: /*
577: * Compares the two domain element ranges corresponding to the cell at
578: * [r,l], that is the (zero-based) elements at r - 1 and l - 1.
579: */
580: boolean rangesEqual(int r, int l) {
581: if (DEBUG)
582: fComparisons++;
583: return fLeft.rangesEqual(l - 1, fRight, r - 1);
584: }
585:
586: /*
587: * Adds two cell cost values, never exceeding SKIP.
588: */
589: private static int sum(int c1, int c2) {
590: int sum = c1 + c2;
591: if (sum < 0)
592: return SKIP;
593: return sum;
594: }
595:
596: /*
597: * Computes the best possible edit distance from cell [r,l] if getting
598: * there has cost cCur.
599: */
600: int minCost(int r, int l, int cCur) {
601: // minimal cost from cell [r,l] to [rCount, lCount] if cell cost == cost
602: // Assume that the minimum of the remaining columns / rows are equal, and just
603: // the rest of the ranges has to be inserted / deleted
604: if (cCur == SKIP)
605: return SKIP;
606: return cCur + Math.abs((fRowEnd - r) - (fColEnd - l))
607: * COST_INSERT; // can be either insert or delete
608: }
609:
610: /*
611: * Computes the worst possible edit distance from cell [r,l] if getting
612: * there has cost cCur.
613: */
614: int maxCost(int r, int l, int cCur) {
615: // maximal cost from cell [r,l] to [rCount, lCount] if cell cost == cost
616: // maximal additional cost is the maximum remaining columns / rows
617: if (cCur == SKIP)
618: return SKIP;
619: return cCur
620: + Math
621: .max(Math.abs(fRowEnd - r), Math.abs(fColEnd
622: - l)) * COST_CHANGE;
623: }
624:
625: /* classic implementation */
626:
627: private RangeDifference[] walkback() {
628: fDiffs = new LinkedList();
629:
630: int row = fRowEnd, col = fColEnd;
631: RangeDifference difference = null;
632:
633: int cell = fMatrix[row][col]; // edit distance
634:
635: while (row > 0 || col > 0) {
636: int diag, above, left;
637:
638: if (row == 0) {
639: // slide deletes along row 0
640: diag = SKIP;
641: above = SKIP;
642: left = col - 1;
643: } else if (col == 0) {
644: // slide inserts along column 0
645: diag = SKIP;
646: above = row - 1;
647: left = SKIP;
648: } else {
649: // inner cells
650: diag = fMatrix[row - 1][col - 1];
651: above = fMatrix[row - 1][col];
652: left = fMatrix[row][col - 1];
653: }
654:
655: if (left == cell - 1 && left <= diag && left <= above) {
656: // delete
657: col--;
658: difference = getChange(difference);
659: difference.fLeftStart = col;
660: difference.fLeftLength++;
661: difference.fRightStart = row;
662:
663: cell = left;
664: } else if (above == cell - 1 && above <= diag) {
665: // insert
666: row--;
667: difference = getChange(difference);
668: difference.fLeftStart = col;
669: difference.fRightStart = row;
670: difference.fRightLength++;
671:
672: cell = above;
673: } else {
674: col--;
675: row--;
676: if (cell == diag) {
677: // match
678: // alternatively, create NOCHANGE ranges for findRanges
679: difference = null;
680: } else if (cell == diag + 1) {
681: // change
682: difference = getChange(difference);
683: difference.fLeftStart = col;
684: difference.fLeftLength++;
685: difference.fRightStart = row;
686: difference.fRightLength++;
687: } else {
688: if (ASSERT)
689: Assert.isTrue(false, "illegal matrix"); //$NON-NLS-1$
690: }
691:
692: cell = diag;
693: }
694:
695: }
696:
697: return (RangeDifference[]) fDiffs
698: .toArray(new RangeDifference[fDiffs.size()]);
699: }
700:
701: private RangeDifference getChange(RangeDifference difference) {
702: if (difference != null)
703: return difference;
704:
705: difference = new RangeDifference(RangeDifference.CHANGE);
706: fDiffs.add(0, difference);
707: return difference;
708: }
709:
710: /* hirschberg's algorithm */
711:
712: private int hirschberg(int rStart, int rEnd, int lStart, int lEnd) {
713:
714: /* trivial cases */
715:
716: if (rEnd < rStart) {
717: // right range is empty
718: return lEnd - lStart + 1;
719: } else if (rStart == rEnd) {
720: // right has length 1: look for a match and split
721: internalEditDistance(rStart, rEnd, lStart, lEnd);
722: int distance = SKIP;
723: for (int col = lStart - 1; col <= lEnd; col++) {
724: distance = fPreviousRow[col];
725: if (distance == 0) {
726: fOptimalSplitColumn[rStart] = col;
727: fOptimalSplitValues[rStart] = true;
728: return 0;
729: }
730: }
731: fOptimalSplitColumn[rStart] = lEnd;
732: fOptimalSplitValues[rStart] = false;
733: if (distance == SKIP)
734: return 1;
735: return distance;
736: }
737: // else if (lEnd < lStart) {
738: // // left is empty // perhaps not necessary
739: // Arrays.fill(fOptimalSplitColumn, rStart, rEnd + 1, lEnd);
740: // Arrays.fill(fOptimalSplitValues, rStart, rEnd + 1, false);
741: // return rEnd - rStart + 1;
742: // }
743:
744: /* divide & conquer */
745:
746: // split rows at half
747: int rowSplit = (rStart + rEnd + 1) / 2 - 1;
748:
749: // compute edit distance of (r1,left) in linear space into fPreviousRow
750: internalEditDistance(rStart, rowSplit, lStart, lEnd);
751: int[] tmp = fPreviousRow;
752: fPreviousRow = fResultRow;
753: fResultRow = tmp;
754: // compute backwards edit distance of (r2,left) in linear space into fPreviousRow
755: internalReverseEditDistance(rowSplit + 1, rEnd, lStart, lEnd);
756:
757: // find optimal alignment - the column in which to split the
758: // left hand side
759: int columnSplit = SKIP, distance = SKIP;
760: for (int col = lStart - 1; col <= lEnd; col++) {
761: int sum = sum(fResultRow[col], fPreviousRow[col]);
762: if (sum < distance) {
763: distance = sum;
764: columnSplit = col;
765: }
766: }
767:
768: if (fProgressMonitor.isCanceled())
769: return NO_DISTANCE;
770:
771: if (ASSERT)
772: Assert.isTrue(distance != SKIP);
773: if (ASSERT)
774: Assert.isTrue(columnSplit != SKIP);
775:
776: if (distance == 0) {
777: // optimize for large unchanged parts
778: // no further partitioning needed, this part is equal
779: if (ASSERT)
780: Assert.isTrue(rEnd - rStart == lEnd - lStart);
781: int col = lStart;
782: int row = rStart;
783: while (row <= rEnd) {
784: fOptimalSplitColumn[row] = col;
785: fOptimalSplitValues[row] = true;
786: col++;
787: row++;
788: }
789: return distance;
790: }
791:
792: // store alignment: from [rowSplit, ?] connect to [rowSplit + 1, columnSplit]
793: fOptimalSplitColumn[rowSplit] = columnSplit;
794: fOptimalSplitValues[rowSplit] = false;
795:
796: // divide at column & conquer
797: // TODO guard against stack overflow
798: hirschberg(rStart, rowSplit, lStart, columnSplit);
799: hirschberg(rowSplit + 1, rEnd, columnSplit + 1, lEnd);
800:
801: return distance;
802: }
803:
804: private RangeDifference[] buildDifferencesHirschberg() {
805: fDiffs = new LinkedList();
806:
807: RangeDifference difference = null;
808: int previousColumn = 0;
809:
810: for (int row = 1; row < fOptimalSplitColumn.length; row++) {
811: int previousRow = row - 1;
812: int column = fOptimalSplitColumn[row]; // from (row-1), jump to column (column) in row (row)
813:
814: if (column == previousColumn + 1) {
815: // diagonal
816: if (fOptimalSplitValues[row]) {
817: // match
818: // alternatively, create NOCHANGE ranges for findRanges
819: difference = null;
820: } else {
821: // change
822: difference = getChange(difference, previousRow,
823: previousColumn);
824: difference.fLeftLength++;
825: difference.fRightLength++;
826: }
827: } else if (column == previousColumn) {
828: // downwards / insert
829: difference = getChange(difference, previousRow,
830: previousColumn);
831: difference.fRightLength++;
832:
833: } else if (column > previousColumn) {
834: // rightward / deletes
835: difference = getChange(difference, previousRow,
836: previousColumn);
837: difference.fLeftLength += column - previousColumn - 1;
838: } else {
839: if (ASSERT)
840: Assert.isTrue(false, "Illegal edit description"); //$NON-NLS-1$
841: }
842:
843: previousColumn = column;
844: }
845:
846: if (previousColumn < fLeft.getRangeCount()) {
847:
848: // trailing deletions
849: difference = getChange(difference,
850: fOptimalSplitColumn.length - 1, previousColumn);
851: difference.fLeftLength += fLeft.getRangeCount()
852: - previousColumn;
853: }
854:
855: return (RangeDifference[]) fDiffs
856: .toArray(new RangeDifference[fDiffs.size()]);
857: }
858:
859: private RangeDifference getChange(RangeDifference difference,
860: int row, int column) {
861: if (difference != null)
862: return difference;
863:
864: difference = new RangeDifference(RangeDifference.CHANGE, row,
865: 0, column, 0);
866: fDiffs.add(difference);
867: return difference;
868: }
869:
870: /* pretty printing for debug output */
871:
872: private void printRow() {
873: if (fMatrix != null)
874: print(fMatrix[fRow]);
875: else
876: print(fCurrentRow);
877: }
878:
879: private static void printHeader(IRangeComparator left,
880: IRangeComparator right) {
881: System.out.println("============================="); //$NON-NLS-1$
882: System.out.println("= s1: " + left.toString()); //$NON-NLS-1$
883: System.out.println("= s2: " + right.toString()); //$NON-NLS-1$
884: System.out.println();
885: }
886:
887: private static void print(int[] row) {
888: for (int i = 0; i < row.length; i++) {
889: System.out
890: .print("\t" + (row[i] == Integer.MAX_VALUE ? "-" : "" + row[i])); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
891: }
892: System.out.println();
893: }
894:
895: }
|