001: /*
002: * Copyright (C) 2001, 2002 Robert MacGrogan
003: *
004: * This program is free software; you can redistribute it and/or modify
005: * it under the terms of the GNU General Public License as published by
006: * the Free Software Foundation; either version 2 of the License, or
007: * (at your option) any later version.
008: *
009: * This program is distributed in the hope that it will be useful,
010: * but WITHOUT ANY WARRANTY; without even the implied warranty of
011: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
012: * GNU General Public License for more details.
013: *
014: * You should have received a copy of the GNU General Public License
015: * along with this program; if not, write to the Free Software
016: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
017: *
018: *
019: * $Archive: SourceJammer$
020: * $FileName: GnuDiffAlgorithm.java$
021: * $FileID: 4697$
022: *
023: * Last change:
024: * $AuthorName: Rob MacGrogan$
025: * $Date: 5/1/03 6:40 PM$
026: * $Comment: GNU diff algorithm.$
027: */
028: package JLibDiff;
029:
030: import java.util.Arrays;
031: import java.util.Hashtable;
032: import java.util.Vector;
033:
034: /**
035: * Title: $FileName: GnuDiffAlgorithm.java$
036: * @version $VerNum: 1$
037: * @author $AuthorName: Rob MacGrogan$<br><br>
038: *
039: * $Description: GNU diff algorithm.$<br>
040: * $KeyWordsOff: $<br><br>
041: *
042: * This class implements GNU diff and is adapted and copied (where possible)
043: * from Java implementation of this program created by Stuart D Gathman.<br><br>
044: *
045: * The code has been significantly modified by Robert MacGrogan to
046: * fit into SourceJammer and use the JLibDiff objects. <br><br>
047: *
048: * Please note that this module is released under the GNU Public License only.
049: * If the version of SourceJammer you are using contains this module it is
050: * released as GPL and can only be re-distributed as GPL.<br><br>
051: *
052: * Original file header and copyright information follows:<br><br>
053:
054: A class to compare vectors of objects. The result of comparison
055: is a list of <code>change</code> objects which form an
056: edit script. The objects compared are traditionally lines
057: of text from two files. Comparison options such as "ignore
058: whitespace" are implemented by modifying the <code>equals</code>
059: and <code>hashcode</code> methods for the objects compared.
060: <p>
061: The basic algorithm is described in: </br>
062: "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
063: Algorithmica Vol. 1 No. 2, 1986, p 251.
064: <p>
065: This class outputs different results from GNU diff 1.15 on some
066: inputs. Our results are actually better (smaller change list, smaller
067: total size of changes), but it would be nice to know why. Perhaps
068: there is a memory overwrite bug in GNU diff 1.15.
069:
070: @author Stuart D. Gathman, translated from GNU diff 1.15
071: Copyright (C) 2000 Business Management Systems, Inc.
072: <p>
073: This program is free software; you can redistribute it and/or modify
074: it under the terms of the GNU General Public License as published by
075: the Free Software Foundation; either version 1, or (at your option)
076: any later version.
077: <p>
078: This program is distributed in the hope that it will be useful,
079: but WITHOUT ANY WARRANTY; without even the implied warranty of
080: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
081: GNU General Public License for more details.
082: <p>
083: You should have received a copy of the <a href=COPYING.txt>
084: GNU General Public License</a>
085: along with this program; if not, write to the Free Software
086: Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
087: *
088: *
089: */
090: public class GnuDiffAlgorithm implements DiffAlgorithm, DiffMaker {
091:
092: private int equivMax = 1;
093: private boolean noDiscards = false;
094: private FileData[] fileInfo = new FileData[2];
095:
096: private int[] xvec, yvec; /* Vectors being compared. */
097: private int[] fdiag;
098: private int[] bdiag;
099: private int fdiagoff, bdiagoff;
100: private int cost;
101: public boolean heuristic = false;
102:
103: /**
104: * @see JLibDiff.DiffAlgorithm#makeDiff(String[], String[])
105: */
106: public Vector makeDiff(String[] A, String[] B) {
107: Hashtable h = new Hashtable(A.length + B.length);
108:
109: fileInfo[0] = new FileData(A, h, this );
110: fileInfo[1] = new FileData(B, h, this );
111:
112: discardConfusingLines();
113:
114: xvec = fileInfo[0].getUndiscarded();
115: yvec = fileInfo[1].getUndiscarded();
116:
117: int diags = fileInfo[0].getNumLinesNotDiscarded()
118: + fileInfo[1].getNumLinesNotDiscarded() + 3;
119: fdiag = new int[diags];
120: fdiagoff = fileInfo[1].getNumLinesNotDiscarded() + 1;
121: bdiag = new int[diags];
122: bdiagoff = fileInfo[1].getNumLinesNotDiscarded() + 1;
123:
124: compareseq(0, fileInfo[0].getNumLinesNotDiscarded(), 0,
125: fileInfo[1].getNumLinesNotDiscarded());
126: fdiag = null;
127: bdiag = null;
128:
129: /* Modify the results slightly to make them prettier
130: in cases where that can validly be done. */
131:
132: shiftBoundaries();
133: Vector v = buildChangeList(A, B);
134: return v;
135: }
136:
137: public void setEol(String s) {
138: //no implementation necessary in this class.
139: }
140:
141: private void compareseq(int xoff, int xlim, int yoff, int ylim) {
142: /* Slide down the bottom initial diagonal. */
143: while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
144: ++xoff;
145: ++yoff;
146: }
147: /* Slide up the top initial diagonal. */
148: while (xlim > xoff && ylim > yoff
149: && xvec[xlim - 1] == yvec[ylim - 1]) {
150: --xlim;
151: --ylim;
152: }
153:
154: /* Handle simple cases. */
155: if (xoff == xlim)
156: while (yoff < ylim)
157: fileInfo[1].getChangedFlags()[1 + fileInfo[1]
158: .getRealindexes()[yoff++]] = true;
159: else if (yoff == ylim)
160: while (xoff < xlim)
161: fileInfo[0].getChangedFlags()[1 + fileInfo[0]
162: .getRealindexes()[xoff++]] = true;
163: else {
164: /* Find a point of correspondence in the middle of the files. */
165:
166: int d = diag(xoff, xlim, yoff, ylim);
167: int c = cost;
168: int f = fdiag[fdiagoff + d];
169: int b = bdiag[bdiagoff + d];
170:
171: if (c == 1) {
172: /* This should be impossible, because it implies that
173: one of the two subsequences is empty,
174: and that case was handled above without calling `diag'.
175: Let's verify that this is true. */
176: throw new IllegalArgumentException("Empty subsequence");
177: } else {
178: /* Use that point to split this problem into two subproblems. */
179: compareseq(xoff, b, yoff, b - d);
180: /* This used to use f instead of b,
181: but that is incorrect!
182: It is not necessarily the case that diagonal d
183: has a snake from b to f. */
184: compareseq(b, xlim, b - d, ylim);
185: }
186: }
187: }
188:
189: private int diag(int xoff, int xlim, int yoff, int ylim) {
190: final int[] fd = fdiag; // Give the compiler a chance.
191: final int[] bd = bdiag; // Additional help for the compiler.
192: final int[] xv = xvec; // Still more help for the compiler.
193: final int[] yv = yvec; // And more and more . . .
194: final int dmin = xoff - ylim; // Minimum valid diagonal.
195: final int dmax = xlim - yoff; // Maximum valid diagonal.
196: final int fmid = xoff - yoff; // Center diagonal of top-down search.
197: final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
198: int fmin = fmid, fmax = fmid; // Limits of top-down search.
199: int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
200: /* True if southeast corner is on an odd
201: diagonal with respect to the northwest. */
202: final boolean odd = (fmid - bmid & 1) != 0;
203:
204: fd[fdiagoff + fmid] = xoff;
205: bd[bdiagoff + bmid] = xlim;
206:
207: for (int c = 1;; ++c) {
208: int d; /* Active diagonal. */
209: boolean big_snake = false;
210:
211: /* Extend the top-down search by an edit step in each diagonal. */
212: if (fmin > dmin)
213: fd[fdiagoff + --fmin - 1] = -1;
214: else
215: ++fmin;
216: if (fmax < dmax)
217: fd[fdiagoff + ++fmax + 1] = -1;
218: else
219: --fmax;
220: for (d = fmax; d >= fmin; d -= 2) {
221: int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff
222: + d + 1];
223:
224: if (tlo >= thi)
225: x = tlo + 1;
226: else
227: x = thi;
228: oldx = x;
229: y = x - d;
230: while (x < xlim && y < ylim && xv[x] == yv[y]) {
231: ++x;
232: ++y;
233: }
234: if (x - oldx > 20)
235: big_snake = true;
236: fd[fdiagoff + d] = x;
237: if (odd && bmin <= d && d <= bmax
238: && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
239: cost = 2 * c - 1;
240: return d;
241: }
242: }
243:
244: /* Similar extend the bottom-up search. */
245: if (bmin > dmin)
246: bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
247: else
248: ++bmin;
249: if (bmax < dmax)
250: bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
251: else
252: --bmax;
253: for (d = bmax; d >= bmin; d -= 2) {
254: int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff
255: + d + 1];
256:
257: if (tlo < thi)
258: x = tlo;
259: else
260: x = thi - 1;
261: oldx = x;
262: y = x - d;
263: while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
264: --x;
265: --y;
266: }
267: if (oldx - x > 20)
268: big_snake = true;
269: bd[bdiagoff + d] = x;
270: if (!odd && fmin <= d && d <= fmax
271: && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
272: cost = 2 * c;
273: return d;
274: }
275: }
276:
277: /* Heuristic: check occasionally for a diagonal that has made
278: lots of progress compared with the edit distance.
279: If we have any such, find the one that has made the most
280: progress and return it as if it had succeeded.
281:
282: With this heuristic, for files with a constant small density
283: of changes, the algorithm is linear in the file size. */
284:
285: if (c > 200 && big_snake && heuristic) {
286: int best = 0;
287: int bestpos = -1;
288:
289: for (d = fmax; d >= fmin; d -= 2) {
290: int dd = d - fmid;
291: if ((fd[fdiagoff + d] - xoff) * 2 - dd > 12 * (c + (dd > 0 ? dd
292: : -dd))) {
293: if (fd[fdiagoff + d] * 2 - dd > best
294: && fd[fdiagoff + d] - xoff > 20
295: && fd[fdiagoff + d] - d - yoff > 20) {
296: int k;
297: int x = fd[fdiagoff + d];
298:
299: /* We have a good enough best diagonal;
300: now insist that it end with a significant snake. */
301: for (k = 1; k <= 20; k++)
302: if (xvec[x - k] != yvec[x - d - k])
303: break;
304:
305: if (k == 21) {
306: best = fd[fdiagoff + d] * 2 - dd;
307: bestpos = d;
308: }
309: }
310: }
311: }
312: if (best > 0) {
313: cost = 2 * c - 1;
314: return bestpos;
315: }
316:
317: best = 0;
318: for (d = bmax; d >= bmin; d -= 2) {
319: int dd = d - bmid;
320: if ((xlim - bd[bdiagoff + d]) * 2 + dd > 12 * (c + (dd > 0 ? dd
321: : -dd))) {
322: if ((xlim - bd[bdiagoff + d]) * 2 + dd > best
323: && xlim - bd[bdiagoff + d] > 20
324: && ylim - (bd[bdiagoff + d] - d) > 20) {
325: /* We have a good enough best diagonal;
326: now insist that it end with a significant snake. */
327: int k;
328: int x = bd[bdiagoff + d];
329:
330: for (k = 0; k < 20; k++)
331: if (xvec[x + k] != yvec[x - d + k])
332: break;
333: if (k == 20) {
334: best = (xlim - bd[bdiagoff + d]) * 2
335: + dd;
336: bestpos = d;
337: }
338: }
339: }
340: }
341: if (best > 0) {
342: cost = 2 * c - 1;
343: return bestpos;
344: }
345: }
346: }
347: }
348:
349: private void discardConfusingLines() {
350: fileInfo[0].discardConfusingLines(fileInfo[1]);
351: fileInfo[1].discardConfusingLines(fileInfo[0]);
352: }
353:
354: private void shiftBoundaries() {
355: fileInfo[0].shift_boundaries(fileInfo[1]);
356: fileInfo[1].shift_boundaries(fileInfo[0]);
357: }
358:
359: private Object[] getLines(int firstLine, int numLines,
360: Object[] parent) {
361: Object[] lines = null;
362: numLines = numLines * -1;
363: if (numLines > 0) {
364: lines = new Object[numLines];
365: System.arraycopy(parent, firstLine - numLines, lines, 0,
366: numLines);
367: }
368: return lines;
369: }
370:
371: private Vector buildChangeList(String[] A, String[] B) {
372:
373: Vector v = new Vector();
374:
375: final boolean[] changed0 = fileInfo[0].getChangedFlags();
376: final boolean[] changed1 = fileInfo[1].getChangedFlags();
377: final int len0 = fileInfo[0].getNumLinesInBuffer();
378: final int len1 = fileInfo[1].getNumLinesInBuffer();
379: int i0 = len0, i1 = len1;
380:
381: /* Note that changedN[-1] does exist, and contains 0. */
382:
383: while (i0 >= 0 || i1 >= 0) {
384: if (changed0[i0] || changed1[i1]) {
385: int line0 = i0, line1 = i1;
386:
387: /* Find # lines changed here in each file. */
388: while (changed0[i0])
389: --i0;
390: while (changed1[i1])
391: --i1;
392:
393: //Get changed lines.
394: Object[] lines0 = getLines(line0, i0 - line0, A);
395: Object[] lines1 = getLines(line1, i1 - line1, B);
396:
397: /* Record this change. */
398: int numDeleted = (line0 - i0);
399: int numAdded = (line1 - i1);
400:
401: Hunk newHunk = null;
402: if (numAdded > 0 && numDeleted <= 0) {
403: HunkAdd add = new HunkAdd();
404: add.ld1 = i0;
405: add.ld2 = (line1 - numAdded) + 1;
406: add.lf2 = line1;
407: add.b = new Vector(Arrays.asList(lines1));
408: newHunk = add;
409: } else if (numAdded <= 0 && numDeleted > 0) {
410: HunkDel del = new HunkDel();
411: del.ld1 = (line0 - numDeleted) + 1;
412: del.ld2 = i1;
413: del.lf1 = line0;
414: del.a = new Vector(Arrays.asList(lines0));
415: newHunk = del;
416: } else {
417: HunkChange change = new HunkChange();
418: change.ld1 = (line0 - numDeleted) + 1;
419: change.ld2 = (line1 - numAdded) + 1;
420: change.lf1 = line0;
421: change.lf2 = line1;
422: change.a = new Vector(Arrays.asList(lines0));
423: change.b = new Vector(Arrays.asList(lines1));
424: newHunk = change;
425: }
426:
427: v.add(0, newHunk);
428:
429: /* Record this change. */
430: }
431:
432: /* We have reached lines in the two files that match each other. */
433: i0--;
434: i1--;
435: }
436: return v;
437: }
438:
439: /**
440: * Returns the equivMax.
441: * @return int
442: */
443: public int getEquivMax() {
444: return equivMax;
445: }
446:
447: public void incrimentEquivMax() {
448: equivMax++;
449: }
450:
451: /**
452: * Returns the noDiscards.
453: * @return boolean
454: */
455: public boolean isNoDiscards() {
456: return noDiscards;
457: }
458:
459: }
|