001: /*******************************************************************************
002: * Copyright (c) 2000, 2006 IBM Corporation and others.
003: * All rights reserved. This program and the accompanying materials
004: * are made available under the terms of the Eclipse Public License v1.0
005: * which accompanies this distribution, and is available at
006: * http://www.eclipse.org/legal/epl-v10.html
007: *
008: * Contributors:
009: * IBM Corporation - initial API and implementation
010: *******************************************************************************/package org.eclipse.compare.internal;
011:
012: import org.eclipse.core.runtime.OperationCanceledException;
013: import org.eclipse.core.runtime.SubMonitor;
014:
015: /* Used to determine the change set responsible for each line */
016: public abstract class LCS {
017:
018: private int max_differences; // the maximum number of differences
019: // from
020:
021: // each end to consider
022:
023: private int length;
024:
025: /**
026: * Myers' algorithm for longest common subsequence. O((M + N)D) worst
027: * case time, O(M + N + D^2) expected time, O(M + N) space
028: * (http://citeseer.ist.psu.edu/myers86ond.html)
029: *
030: * Note: Beyond implementing the algorithm as described in the paper I
031: * have added diagonal range compression which helps when finding the
032: * LCS of a very long and a very short sequence, also bound the running
033: * time to (N + M)^1.5 when both sequences are very long.
034: *
035: * After this method is called, the longest common subsequence is
036: * available by calling getResult() where result[0] is composed of
037: * entries from l1 and result[1] is composed of entries from l2
038: *
039: * @param subMonitor
040: */
041: public void longestCommonSubsequence(SubMonitor subMonitor,
042: LCSSettings settings) {
043: int length1 = getLength1();
044: int length2 = getLength2();
045: if (length1 == 0 || length2 == 0) {
046: length = 0;
047: return;
048: }
049:
050: max_differences = (length1 + length2 + 1) / 2; // ceil((N+M)/2)
051: if ((double) length1 * (double) length2 > settings.getTooLong()) {
052: // limit complexity to D^POW_LIMIT for long sequences
053: max_differences = (int) Math.pow(max_differences, settings
054: .getPowLimit() - 1.0);
055: }
056:
057: initializeLcs(length1);
058:
059: subMonitor.beginTask(null, length1);
060:
061: /*
062: * The common prefixes and suffixes are always part of some LCS, include
063: * them now to reduce our search space
064: */
065: int forwardBound;
066: int max = Math.min(length1, length2);
067: for (forwardBound = 0; forwardBound < max
068: && isRangeEqual(forwardBound, forwardBound); forwardBound++) {
069: setLcs(forwardBound, forwardBound);
070: worked(subMonitor, 1);
071: }
072:
073: int backBoundL1 = length1 - 1;
074: int backBoundL2 = length2 - 1;
075:
076: while (backBoundL1 >= forwardBound
077: && backBoundL2 >= forwardBound
078: && isRangeEqual(backBoundL1, backBoundL2)) {
079: setLcs(backBoundL1, backBoundL2);
080: backBoundL1--;
081: backBoundL2--;
082: worked(subMonitor, 1);
083: }
084:
085: length = forwardBound
086: + length1
087: - backBoundL1
088: - 1
089: + lcs_rec(forwardBound, backBoundL1, forwardBound,
090: backBoundL2, new int[2][length1 + length2 + 1],
091: new int[3], subMonitor);
092:
093: }
094:
095: /**
096: * The recursive helper function for Myers' LCS. Computes the LCS of
097: * l1[bottoml1 .. topl1] and l2[bottoml2 .. topl2] fills in the
098: * appropriate location in lcs and returns the length
099: *
100: * @param l1
101: * The 1st sequence
102: * @param bottoml1
103: * Index in the 1st sequence to start from (inclusive)
104: * @param topl1
105: * Index in the 1st sequence to end on (inclusive)
106: * @param l2
107: * The 2nd sequence
108: * @param bottoml2
109: * Index in the 2nd sequence to start from (inclusive)
110: * @param topl2
111: * Index in the 2nd sequence to end on (inclusive)
112: * @param V
113: * should be allocated as int[2][l1.length + l2.length +
114: * 1], used to store furthest reaching D-paths
115: * @param snake
116: * should be allocated as int[3], used to store the
117: * beginning x, y coordinates and the length of the
118: * latest snake traversed
119: * @param subMonitor
120: * @param lcs
121: * should be allocated as TextLine[2][l1.length], used to
122: * store the common points found to be part of the LCS
123: * where lcs[0] references lines of l1 and lcs[1]
124: * references lines of l2.
125: *
126: * @return the length of the LCS
127: */
128: private int lcs_rec(int bottoml1, int topl1, int bottoml2,
129: int topl2, int[][] V, int[] snake, SubMonitor subMonitor) {
130:
131: // check that both sequences are non-empty
132: if (bottoml1 > topl1 || bottoml2 > topl2) {
133: return 0;
134: }
135:
136: int d = find_middle_snake(bottoml1, topl1, bottoml2, topl2, V,
137: snake);
138: // System.out.println(snake[0] + " " + snake[1] + " " + snake[2]);
139:
140: // need to store these so we don't lose them when they're overwritten by
141: // the recursion
142: int len = snake[2];
143: int startx = snake[0];
144: int starty = snake[1];
145:
146: // the middle snake is part of the LCS, store it
147: for (int i = 0; i < len; i++) {
148: setLcs(startx + i, starty + i);
149: worked(subMonitor, 1);
150: }
151:
152: if (d > 1) {
153: return len
154: + lcs_rec(bottoml1, startx - 1, bottoml2,
155: starty - 1, V, snake, subMonitor)
156: + lcs_rec(startx + len, topl1, starty + len, topl2,
157: V, snake, subMonitor);
158: } else if (d == 1) {
159: /*
160: * In this case the sequences differ by exactly 1 line. We have
161: * already saved all the lines after the difference in the for
162: * loop above, now we need to save all the lines before the
163: * difference.
164: */
165: int max = Math.min(startx - bottoml1, starty - bottoml2);
166: for (int i = 0; i < max; i++) {
167: setLcs(bottoml1 + i, bottoml2 + i);
168: worked(subMonitor, 1);
169: }
170: return max + len;
171: }
172:
173: return len;
174: }
175:
176: private void worked(SubMonitor subMonitor, int work) {
177: if (subMonitor.isCanceled())
178: throw new OperationCanceledException();
179: subMonitor.worked(work);
180: }
181:
182: /**
183: * Helper function for Myers' LCS algorithm to find the middle snake for
184: * l1[bottoml1..topl1] and l2[bottoml2..topl2] The x, y coodrdinates of
185: * the start of the middle snake are saved in snake[0], snake[1]
186: * respectively and the length of the snake is saved in s[2].
187: *
188: * @param l1
189: * The 1st sequence
190: * @param bottoml1
191: * Index in the 1st sequence to start from (inclusive)
192: * @param topl1
193: * Index in the 1st sequence to end on (inclusive)
194: * @param l2
195: * The 2nd sequence
196: * @param bottoml2
197: * Index in the 2nd sequence to start from (inclusive)
198: * @param topl2
199: * Index in the 2nd sequence to end on (inclusive)
200: * @param V
201: * should be allocated as int[2][l1.length + l2.length +
202: * 1], used to store furthest reaching D-paths
203: * @param snake
204: * should be allocated as int[3], used to store the
205: * beginning x, y coordinates and the length of the
206: * middle snake
207: *
208: * @return The number of differences (SES) between l1[bottoml1..topl1]
209: * and l2[bottoml2..topl2]
210: */
211: private int find_middle_snake(int bottoml1, int topl1,
212: int bottoml2, int topl2, int[][] V, int[] snake) {
213: int N = topl1 - bottoml1 + 1;
214: int M = topl2 - bottoml2 + 1;
215: // System.out.println("N: " + N + " M: " + M + " bottom: " + bottoml1 +
216: // ", " +
217: // bottoml2 + " top: " + topl1 + ", " + topl2);
218: int delta = N - M;
219: boolean isEven;
220: if ((delta & 1) == 1) {
221: isEven = false;
222: } else {
223: isEven = true;
224: }
225:
226: int limit = Math.min(max_differences, (N + M + 1) / 2); // ceil((N+M)/2)
227:
228: int value_to_add_forward; // a 0 or 1 that we add to the start
229: // offset
230: // to make it odd/even
231: if ((M & 1) == 1) {
232: value_to_add_forward = 1;
233: } else {
234: value_to_add_forward = 0;
235: }
236:
237: int value_to_add_backward;
238: if ((N & 1) == 1) {
239: value_to_add_backward = 1;
240: } else {
241: value_to_add_backward = 0;
242: }
243:
244: int start_forward = -M;
245: int end_forward = N;
246: int start_backward = -N;
247: int end_backward = M;
248:
249: V[0][limit + 1] = 0;
250: V[1][limit - 1] = N;
251: for (int d = 0; d <= limit; d++) {
252:
253: int start_diag = Math.max(value_to_add_forward
254: + start_forward, -d);
255: int end_diag = Math.min(end_forward, d);
256: value_to_add_forward = 1 - value_to_add_forward;
257:
258: // compute forward furthest reaching paths
259: for (int k = start_diag; k <= end_diag; k += 2) {
260: int x;
261: if (k == -d
262: || (k < d && V[0][limit + k - 1] < V[0][limit
263: + k + 1])) {
264: x = V[0][limit + k + 1];
265: } else {
266: x = V[0][limit + k - 1] + 1;
267: }
268:
269: int y = x - k;
270:
271: snake[0] = x + bottoml1;
272: snake[1] = y + bottoml2;
273: snake[2] = 0;
274: // System.out.println("1 x: " + x + " y: " + y + " k: " + k + "
275: // d: " + d );
276: while (x < N && y < M
277: && isRangeEqual(x + bottoml1, y + bottoml2)) {
278: x++;
279: y++;
280: snake[2]++;
281: }
282: V[0][limit + k] = x;
283: // System.out.println(x + " " + V[1][limit+k -delta] + " " + k +
284: // " " + delta);
285: if (!isEven && k >= delta - d + 1 && k <= delta + d - 1
286: && x >= V[1][limit + k - delta]) {
287: // System.out.println("Returning: " + (2*d-1));
288: return 2 * d - 1;
289: }
290:
291: // check to see if we can cut down the diagonal range
292: if (x >= N && end_forward > k - 1) {
293: end_forward = k - 1;
294: } else if (y >= M) {
295: start_forward = k + 1;
296: value_to_add_forward = 0;
297: }
298: }
299:
300: start_diag = Math.max(value_to_add_backward
301: + start_backward, -d);
302: end_diag = Math.min(end_backward, d);
303: value_to_add_backward = 1 - value_to_add_backward;
304:
305: // compute backward furthest reaching paths
306: for (int k = start_diag; k <= end_diag; k += 2) {
307: int x;
308: if (k == d
309: || (k != -d && V[1][limit + k - 1] < V[1][limit
310: + k + 1])) {
311: x = V[1][limit + k - 1];
312: } else {
313: x = V[1][limit + k + 1] - 1;
314: }
315:
316: int y = x - k - delta;
317:
318: snake[2] = 0;
319: // System.out.println("2 x: " + x + " y: " + y + " k: " + k + "
320: // d: " + d);
321: while (x > 0
322: && y > 0
323: && isRangeEqual(x - 1 + bottoml1, y - 1
324: + bottoml2)) {
325: x--;
326: y--;
327: snake[2]++;
328: }
329: V[1][limit + k] = x;
330:
331: if (isEven && k >= -delta - d && k <= d - delta
332: && x <= V[0][limit + k + delta]) {
333: // System.out.println("Returning: " + 2*d);
334: snake[0] = bottoml1 + x;
335: snake[1] = bottoml2 + y;
336:
337: return 2 * d;
338: }
339:
340: // check to see if we can cut down our diagonal range
341: if (x <= 0) {
342: start_backward = k + 1;
343: value_to_add_backward = 0;
344: } else if (y <= 0 && end_backward > k - 1) {
345: end_backward = k - 1;
346: }
347: }
348: }
349:
350: /*
351: * computing the true LCS is too expensive, instead find the diagonal
352: * with the most progress and pretend a midle snake of length 0 occurs
353: * there.
354: */
355:
356: int[] most_progress = findMostProgress(M, N, limit, V);
357:
358: snake[0] = bottoml1 + most_progress[0];
359: snake[1] = bottoml2 + most_progress[1];
360: snake[2] = 0;
361: return 5; /*
362: * HACK: since we didn't really finish the LCS
363: * computation we don't really know the length of the
364: * SES. We don't do anything with the result anyway,
365: * unless it's <=1. We know for a fact SES > 1 so 5 is
366: * as good a number as any to return here
367: */
368: }
369:
370: /**
371: * Takes the array with furthest reaching D-paths from an LCS
372: * computation and returns the x,y coordinates and progress made in the
373: * middle diagonal among those with maximum progress, both from the
374: * front and from the back.
375: *
376: * @param M
377: * the length of the 1st sequence for which LCS is being
378: * computed
379: * @param N
380: * the length of the 2nd sequence for which LCS is being
381: * computed
382: * @param limit
383: * the number of steps made in an attempt to find the LCS
384: * from the front and back
385: * @param V
386: * the array storing the furthest reaching D-paths for
387: * the LCS computation
388: * @return The result as an array of 3 integers where result[0] is the x
389: * coordinate of the current location in the diagonal with the
390: * most progress, result[1] is the y coordinate of the current
391: * location in the diagonal with the most progress and result[2]
392: * is the amount of progress made in that diagonal
393: */
394: private static int[] findMostProgress(int M, int N, int limit,
395: int[][] V) {
396: int delta = N - M;
397:
398: int forward_start_diag;
399: if ((M & 1) == (limit & 1)) {
400: forward_start_diag = Math.max(-M, -limit);
401: } else {
402: forward_start_diag = Math.max(1 - M, -limit);
403: }
404:
405: int forward_end_diag = Math.min(N, limit);
406:
407: int backward_start_diag;
408: if ((N & 1) == (limit & 1)) {
409: backward_start_diag = Math.max(-N, -limit);
410: } else {
411: backward_start_diag = Math.max(1 - N, -limit);
412: }
413:
414: int backward_end_diag = Math.min(M, limit);
415:
416: int[][] max_progress = new int[Math.max(forward_end_diag
417: - forward_start_diag, backward_end_diag
418: - backward_start_diag) / 2 + 1][3];
419: int num_progress = 0; // the 1st entry is current, it is initialized
420: // with 0s
421:
422: // first search the forward diagonals
423: for (int k = forward_start_diag; k <= forward_end_diag; k += 2) {
424: int x = V[0][limit + k];
425: int y = x - k;
426: if (x > N || y > M) {
427: continue;
428: }
429:
430: int progress = x + y;
431: if (progress > max_progress[0][2]) {
432: num_progress = 0;
433: max_progress[0][0] = x;
434: max_progress[0][1] = y;
435: max_progress[0][2] = progress;
436: } else if (progress == max_progress[0][2]) {
437: num_progress++;
438: max_progress[num_progress][0] = x;
439: max_progress[num_progress][1] = y;
440: max_progress[num_progress][2] = progress;
441: }
442: }
443:
444: boolean max_progress_forward = true; // initially the maximum
445: // progress is in the forward
446: // direction
447:
448: // now search the backward diagonals
449: for (int k = backward_start_diag; k <= backward_end_diag; k += 2) {
450: int x = V[1][limit + k];
451: int y = x - k - delta;
452: if (x < 0 || y < 0) {
453: continue;
454: }
455:
456: int progress = N - x + M - y;
457: if (progress > max_progress[0][2]) {
458: num_progress = 0;
459: max_progress_forward = false;
460: max_progress[0][0] = x;
461: max_progress[0][1] = y;
462: max_progress[0][2] = progress;
463: } else if (progress == max_progress[0][2]
464: && !max_progress_forward) {
465: num_progress++;
466: max_progress[num_progress][0] = x;
467: max_progress[num_progress][1] = y;
468: max_progress[num_progress][2] = progress;
469: }
470: }
471:
472: // return the middle diagonal with maximal progress.
473: return max_progress[num_progress / 2];
474: }
475:
476: protected abstract int getLength2();
477:
478: protected abstract int getLength1();
479:
480: protected abstract boolean isRangeEqual(int i1, int i2);
481:
482: protected abstract void setLcs(int sl1, int sl2);
483:
484: protected abstract void initializeLcs(int lcsLength);
485:
486: public int getLength() {
487: return length;
488: }
489: }
|