001: // This file is part of KeY - Integrated Deductive Software Design
002: // Copyright (C) 2001-2007 Universitaet Karlsruhe, Germany
003: // Universitaet Koblenz-Landau, Germany
004: // Chalmers University of Technology, Sweden
005: //
006: // The KeY system is protected by the GNU General Public License.
007: // See LICENSE.TXT for details.
008: //
009:
010: package de.uka.ilkd.key.proof;
011:
012: import java.util.Hashtable;
013: import java.util.Vector;
014:
015: import de.uka.ilkd.key.logic.IntIterator;
016: import de.uka.ilkd.key.logic.PosInTerm;
017:
018: /** A class to compare vectors of objects. The result of comparison
019: is a list of <code>change</code> objects which form an
020: edit script. The objects compared are traditionally lines
021: of text from two files. Comparison options such as "ignore
022: whitespace" are implemented by modifying the <code>equals</code>
023: and <code>hashcode</code> methods for the objects compared.
024: <p>
025: The basic algorithm is described in: </br>
026: "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
027: Algorithmica Vol. 1 No. 2, 1986, p 251.
028: <p>
029: This class outputs different results from GNU diff 1.15 on some
030: inputs. Our results are actually better (smaller change list, smaller
031: total size of changes), but it would be nice to know why. Perhaps
032: there is a memory overwrite bug in GNU diff 1.15.
033:
034: @author Stuart D. Gathman, translated from GNU diff 1.15
035: Copyright (C) 2000 Business Management Systems, Inc.
036: <p>
037: This program is free software; you can redistribute it and/or modify
038: it under the terms of the GNU General Public License as published by
039: the Free Software Foundation; either version 1, or (at your option)
040: any later version.
041: <p>
042: This program is distributed in the hope that it will be useful,
043: but WITHOUT ANY WARRANTY; without even the implied warranty of
044: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
045: GNU General Public License for more details.
046: <p>
047: You should have received a copy of the <a href=COPYING.txt>
048: GNU General Public License</a>
049: along with this program; if not, write to the Free Software
050: Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
051:
052: */
053:
054: public class DiffMyers {
055:
056: /** Prepare to find differences between two arrays. Each element of
057: the arrays is translated to an "equivalence number" based on
058: the result of <code>equals</code>. The original Object arrays
059: are no longer needed for computing the differences. They will
060: be needed again later to print the results of the comparison as
061: an edit script, if desired.
062: */
063: public DiffMyers(PosInTerm a, PosInTerm b) {
064: Hashtable h = new Hashtable(a.depth() + b.depth());
065: filevec[0] = new file_data(a, h);
066: filevec[1] = new file_data(b, h);
067: }
068:
069: public DiffMyers(Vector a, Vector b) {
070: Hashtable h = new Hashtable(a.size() + b.size());
071: filevec[0] = new file_data(a, h);
072: filevec[1] = new file_data(b, h);
073: }
074:
075: /** 1 more than the maximum equivalence value used for this or its
076: sibling file. */
077: private int equiv_max = 1;
078:
079: /** When set to true, the comparison uses a heuristic to speed it up.
080: With this heuristic, for files with a constant small density
081: of changes, the algorithm is linear in the file size. */
082: public boolean heuristic = false;
083:
084: /** When set to true, the algorithm returns a guarranteed minimal
085: set of changes. This makes things slower, sometimes much slower. */
086: public boolean no_discards = true;
087:
088: private int[] xvec, yvec; /* Vectors being compared. */
089: private int[] fdiag; /* Vector, indexed by diagonal, containing
090: the X coordinate of the point furthest
091: along the given diagonal in the forward
092: search of the edit matrix. */
093: private int[] bdiag; /* Vector, indexed by diagonal, containing
094: the X coordinate of the point furthest
095: along the given diagonal in the backward
096: search of the edit matrix. */
097: private int fdiagoff, bdiagoff;
098: private final file_data[] filevec = new file_data[2];
099: private int cost;
100:
101: /** Find the midpoint of the shortest edit script for a specified
102: portion of the two files.
103:
104: We scan from the beginnings of the files, and simultaneously from the ends,
105: doing a breadth-first search through the space of edit-sequence.
106: When the two searches meet, we have found the midpoint of the shortest
107: edit sequence.
108:
109: The value returned is the number of the diagonal on which the midpoint lies.
110: The diagonal number equals the number of inserted lines minus the number
111: of deleted lines (counting only lines before the midpoint).
112: The edit cost is stored into COST; this is the total number of
113: lines inserted or deleted (counting only lines before the midpoint).
114:
115: This function assumes that the first lines of the specified portions
116: of the two files do not match, and likewise that the last lines do not
117: match. The caller must trim matching lines from the beginning and end
118: of the portions it is going to specify.
119:
120: Note that if we return the "wrong" diagonal value, or if
121: the value of bdiag at that diagonal is "wrong",
122: the worst this can do is cause suboptimal diff output.
123: It cannot cause incorrect diff output. */
124:
125: private int diag(int xoff, int xlim, int yoff, int ylim) {
126: final int[] fd = fdiag; // Give the compiler a chance.
127: final int[] bd = bdiag; // Additional help for the compiler.
128: final int[] xv = xvec; // Still more help for the compiler.
129: final int[] yv = yvec; // And more and more . . .
130: final int dmin = xoff - ylim; // Minimum valid diagonal.
131: final int dmax = xlim - yoff; // Maximum valid diagonal.
132: final int fmid = xoff - yoff; // Center diagonal of top-down search.
133: final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
134: int fmin = fmid, fmax = fmid; // Limits of top-down search.
135: int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
136: /* True if southeast corner is on an odd
137: diagonal with respect to the northwest. */
138: final boolean odd = (fmid - bmid & 1) != 0;
139:
140: fd[fdiagoff + fmid] = xoff;
141: bd[bdiagoff + bmid] = xlim;
142:
143: for (int c = 1;; ++c) {
144: int d; /* Active diagonal. */
145: boolean big_snake = false;
146:
147: /* Extend the top-down search by an edit step in each diagonal. */
148: if (fmin > dmin)
149: fd[fdiagoff + --fmin - 1] = -1;
150: else
151: ++fmin;
152: if (fmax < dmax)
153: fd[fdiagoff + ++fmax + 1] = -1;
154: else
155: --fmax;
156: for (d = fmax; d >= fmin; d -= 2) {
157: int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff
158: + d + 1];
159:
160: if (tlo >= thi)
161: x = tlo + 1;
162: else
163: x = thi;
164: oldx = x;
165: y = x - d;
166: while (x < xlim && y < ylim && xv[x] == yv[y]) {
167: ++x;
168: ++y;
169: }
170: if (x - oldx > 20)
171: big_snake = true;
172: fd[fdiagoff + d] = x;
173: if (odd && bmin <= d && d <= bmax
174: && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
175: cost = 2 * c - 1;
176: return d;
177: }
178: }
179:
180: /* Similar extend the bottom-up search. */
181: if (bmin > dmin)
182: bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
183: else
184: ++bmin;
185: if (bmax < dmax)
186: bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
187: else
188: --bmax;
189: for (d = bmax; d >= bmin; d -= 2) {
190: int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff
191: + d + 1];
192:
193: if (tlo < thi)
194: x = tlo;
195: else
196: x = thi - 1;
197: oldx = x;
198: y = x - d;
199: while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
200: --x;
201: --y;
202: }
203: if (oldx - x > 20)
204: big_snake = true;
205: bd[bdiagoff + d] = x;
206: if (!odd && fmin <= d && d <= fmax
207: && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
208: cost = 2 * c;
209: return d;
210: }
211: }
212:
213: /* Heuristic: check occasionally for a diagonal that has made
214: lots of progress compared with the edit distance.
215: If we have any such, find the one that has made the most
216: progress and return it as if it had succeeded.
217:
218: With this heuristic, for files with a constant small density
219: of changes, the algorithm is linear in the file size. */
220:
221: if (c > 200 && big_snake && heuristic) {
222: int best = 0;
223: int bestpos = -1;
224:
225: for (d = fmax; d >= fmin; d -= 2) {
226: int dd = d - fmid;
227: if ((fd[fdiagoff + d] - xoff) * 2 - dd > 12 * (c + (dd > 0 ? dd
228: : -dd))) {
229: if (fd[fdiagoff + d] * 2 - dd > best
230: && fd[fdiagoff + d] - xoff > 20
231: && fd[fdiagoff + d] - d - yoff > 20) {
232: int k;
233: int x = fd[fdiagoff + d];
234:
235: /* We have a good enough best diagonal;
236: now insist that it end with a significant snake. */
237: for (k = 1; k <= 20; k++)
238: if (xvec[x - k] != yvec[x - d - k])
239: break;
240:
241: if (k == 21) {
242: best = fd[fdiagoff + d] * 2 - dd;
243: bestpos = d;
244: }
245: }
246: }
247: }
248: if (best > 0) {
249: cost = 2 * c - 1;
250: return bestpos;
251: }
252:
253: best = 0;
254: for (d = bmax; d >= bmin; d -= 2) {
255: int dd = d - bmid;
256: if ((xlim - bd[bdiagoff + d]) * 2 + dd > 12 * (c + (dd > 0 ? dd
257: : -dd))) {
258: if ((xlim - bd[bdiagoff + d]) * 2 + dd > best
259: && xlim - bd[bdiagoff + d] > 20
260: && ylim - (bd[bdiagoff + d] - d) > 20) {
261: /* We have a good enough best diagonal;
262: now insist that it end with a significant snake. */
263: int k;
264: int x = bd[bdiagoff + d];
265:
266: for (k = 0; k < 20; k++)
267: if (xvec[x + k] != yvec[x - d + k])
268: break;
269: if (k == 20) {
270: best = (xlim - bd[bdiagoff + d]) * 2
271: + dd;
272: bestpos = d;
273: }
274: }
275: }
276: }
277: if (best > 0) {
278: cost = 2 * c - 1;
279: return bestpos;
280: }
281: }
282: }
283: }
284:
285: /** Compare in detail contiguous subsequences of the two files
286: which are known, as a whole, to match each other.
287:
288: The results are recorded in the vectors filevec[N].changed_flag, by
289: storing a 1 in the element for each line that is an insertion or deletion.
290:
291: The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
292:
293: Note that XLIM, YLIM are exclusive bounds.
294: All line numbers are origin-0 and discarded lines are not counted. */
295:
296: private void compareseq(int xoff, int xlim, int yoff, int ylim) {
297: /* Slide down the bottom initial diagonal. */
298: while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
299: ++xoff;
300: ++yoff;
301: }
302: /* Slide up the top initial diagonal. */
303: while (xlim > xoff && ylim > yoff
304: && xvec[xlim - 1] == yvec[ylim - 1]) {
305: --xlim;
306: --ylim;
307: }
308:
309: /* Handle simple cases. */
310: if (xoff == xlim)
311: while (yoff < ylim)
312: filevec[1].changed_flag[1 + filevec[1].realindexes[yoff++]] = true;
313: else if (yoff == ylim)
314: while (xoff < xlim)
315: filevec[0].changed_flag[1 + filevec[0].realindexes[xoff++]] = true;
316: else {
317: /* Find a point of correspondence in the middle of the files. */
318:
319: int d = diag(xoff, xlim, yoff, ylim);
320: int c = cost;
321: int f = fdiag[fdiagoff + d];
322: int b = bdiag[bdiagoff + d];
323:
324: if (c == 1) {
325: /* This should be impossible, because it implies that
326: one of the two subsequences is empty,
327: and that case was handled above without calling `diag'.
328: Let's verify that this is true. */
329: throw new IllegalArgumentException("Empty subsequence");
330: } else {
331: /* Use that point to split this problem into two subproblems. */
332: compareseq(xoff, b, yoff, b - d);
333: /* This used to use f instead of b,
334: but that is incorrect!
335: It is not necessarily the case that diagonal d
336: has a snake from b to f. */
337: compareseq(b, xlim, b - d, ylim);
338: }
339: }
340: }
341:
342: /** Discard lines from one file that have no matches in the other file.
343: */
344:
345: private void discard_confusing_lines() {
346: filevec[0].discard_confusing_lines(filevec[1]);
347: filevec[1].discard_confusing_lines(filevec[0]);
348: }
349:
350: private boolean inhibit = false;
351:
352: /** Adjust inserts/deletes of blank lines to join changes
353: as much as possible.
354: */
355:
356: private void shift_boundaries() {
357: if (inhibit)
358: return;
359: filevec[0].shift_boundaries(filevec[1]);
360: filevec[1].shift_boundaries(filevec[0]);
361: }
362:
363: /** Scan the tables of which lines are inserted and deleted,
364: producing an edit script in reverse order. */
365:
366: private change build_reverse_script() {
367: change script = null;
368: final boolean[] changed0 = filevec[0].changed_flag;
369: final boolean[] changed1 = filevec[1].changed_flag;
370: final int len0 = filevec[0].buffered_lines;
371: final int len1 = filevec[1].buffered_lines;
372:
373: /* Note that changedN[len0] does exist, and contains 0. */
374:
375: int i0 = 0, i1 = 0;
376:
377: while (i0 < len0 || i1 < len1) {
378: if (changed0[1 + i0] || changed1[1 + i1]) {
379: int line0 = i0, line1 = i1;
380:
381: /* Find # lines changed here in each file. */
382: while (changed0[1 + i0])
383: ++i0;
384: while (changed1[1 + i1])
385: ++i1;
386:
387: /* Record this change. */
388: script = new change(line0, line1, i0 - line0, i1
389: - line1, script);
390: }
391:
392: /* We have reached lines in the two files that match each other. */
393: i0++;
394: i1++;
395: }
396:
397: return script;
398: }
399:
400: mapping map;
401:
402: /** Scan the tables of which lines are inserted and deleted,
403: producing an edit script in forward order. */
404: // also builds a mapping (line correspondence)
405: private change build_script() {
406: change script = null;
407: map = null;
408: final boolean[] changed0 = filevec[0].changed_flag;
409: final boolean[] changed1 = filevec[1].changed_flag;
410: final int len0 = filevec[0].buffered_lines;
411: final int len1 = filevec[1].buffered_lines;
412: int i0 = len0, i1 = len1;
413:
414: /* Note that changedN[-1] does exist, and contains 0. */
415:
416: while (i0 >= 0 || i1 >= 0) {
417: if (changed0[i0] || changed1[i1]) {
418: int line0 = i0, line1 = i1;
419:
420: /* Find # lines changed here in each file. */
421: while (changed0[i0])
422: --i0;
423: while (changed1[i1])
424: --i1;
425:
426: /* Record this change. */
427: script = new change(i0, i1, line0 - i0, line1 - i1,
428: script);
429: }
430:
431: /* We have reached lines in the two files that match each other. */
432: i0--;
433: i1--;
434: if (i0 >= 0)
435: map = new mapping(i0, i1, map);
436: }
437:
438: return script;
439: }
440:
441: public mapping getMapping() {
442: return map;
443: }
444:
445: public change diff_2() {
446: return diff_2(false);
447: }
448:
449: /* Report the differences of two files. */
450: public change diff_2(final boolean reverse) {
451:
452: /* Some lines are obviously insertions or deletions
453: because they don't match anything. Detect them now,
454: and avoid even thinking about them in the main comparison algorithm. */
455:
456: discard_confusing_lines();
457:
458: /* Now do the main comparison algorithm, considering just the
459: undiscarded lines. */
460:
461: xvec = filevec[0].undiscarded;
462: yvec = filevec[1].undiscarded;
463:
464: int diags = filevec[0].nondiscarded_lines
465: + filevec[1].nondiscarded_lines + 3;
466: fdiag = new int[diags];
467: fdiagoff = filevec[1].nondiscarded_lines + 1;
468: bdiag = new int[diags];
469: bdiagoff = filevec[1].nondiscarded_lines + 1;
470:
471: compareseq(0, filevec[0].nondiscarded_lines, 0,
472: filevec[1].nondiscarded_lines);
473: fdiag = null;
474: bdiag = null;
475:
476: /* Modify the results slightly to make them prettier
477: in cases where that can validly be done. */
478:
479: shift_boundaries();
480:
481: /* Get the results of comparison in the form of a chain
482: of `struct change's -- an edit script. */
483:
484: if (reverse)
485: return build_reverse_script();
486: else
487: return build_script();
488: }
489:
490: /** The result of comparison is an "edit script": a chain of change objects.
491: Each change represents one place where some lines are deleted
492: and some are inserted.
493:
494: LINE0 and LINE1 are the first affected lines in the two files (origin 0).
495: DELETED is the number of lines deleted here from file 0.
496: INSERTED is the number of lines inserted here in file 1.
497:
498: If DELETED is 0 then LINE0 is the number of the line before
499: which the insertion was done; vice versa for INSERTED and LINE1. */
500:
501: public static class change {
502: /** Previous or next edit command. */
503: public change link;
504: /** # lines of file 1 changed here. */
505: public int inserted;
506: /** # lines of file 0 changed here. */
507: public int deleted;
508: /** Line number of 1st deleted line. */
509: public final int line0;
510: /** Line number of 1st inserted line. */
511: public final int line1;
512:
513: /** Cons an additional entry onto the front of an edit script OLD.
514: LINE0 and LINE1 are the first affected lines in the two files (origin 0).
515: DELETED is the number of lines deleted here from file 0.
516: INSERTED is the number of lines inserted here in file 1.
517:
518: If DELETED is 0 then LINE0 is the number of the line before
519: which the insertion was done; vice versa for INSERTED and LINE1. */
520: change(int line0, int line1, int deleted, int inserted,
521: change old) {
522: this .line0 = line0;
523: this .line1 = line1;
524: this .inserted = inserted;
525: this .deleted = deleted;
526: this .link = old;
527: //System.err.println(line0+","+line1+","+inserted+","+deleted);
528: }
529:
530: public String toString() {
531: String s = "";
532: if (deleted > 0)
533: s += "deleted " + deleted + " at " + line0 + "\n";
534: if (inserted > 0)
535: s += "inserted " + inserted + " at " + line1 + "\n";
536: if (link != null)
537: s += link;
538: return s;
539: }
540:
541: public int diminishingPenalty() {
542: int p = 0;
543: for (int i = 0; i < deleted; i++)
544: p += 100 / (line0 + i + 1);
545: for (int i = 0; i < inserted; i++)
546: p += 75 / (line1 + i + 1);
547: p = -p;
548: if (link == null)
549: return p;
550: else
551: return p + link.diminishingPenalty();
552: }
553:
554: public int uniformPenalty() {
555: int p = 10 * deleted + 5 * inserted;
556: p = -p;
557: if (link == null)
558: return p;
559: else
560: return p + link.uniformPenalty();
561: }
562:
563: }
564:
565: public static class mapping {
566: /** Previous or next edit command. */
567: public mapping link;
568: public int from;
569: public int to;
570:
571: mapping(int from, int to, mapping old) {
572: this .from = from;
573: this .to = to;
574: this .link = old;
575: }
576:
577: public int from() {
578: return from;
579: }
580:
581: public int to() {
582: return to;
583: }
584:
585: public String toString() {
586: String s = "";
587: s += "from " + from + " to " + to + "\n";
588: if (link != null)
589: s += link;
590: return s;
591: }
592:
593: public mapping next() {
594: return link;
595: }
596:
597: }
598:
599: /** Data on one input file being compared.
600: */
601:
602: class file_data {
603:
604: /** Allocate changed array for the results of comparison. */
605: void clear() {
606: /* Allocate a flag for each line of each file, saying whether that line
607: is an insertion or deletion.
608: Allocate an extra element, always zero, at each end of each vector.
609: */
610: changed_flag = new boolean[buffered_lines + 2];
611: }
612:
613: /** Return equiv_count[I] as the number of lines in this file
614: that fall in equivalence class I.
615: @return the array of equivalence class counts.
616: */
617: int[] equivCount() {
618: int[] equiv_count = new int[equiv_max];
619: for (int i = 0; i < buffered_lines; ++i)
620: ++equiv_count[equivs[i]];
621: return equiv_count;
622: }
623:
624: /** Discard lines that have no matches in another file.
625:
626: A line which is discarded will not be considered by the actual
627: comparison algorithm; it will be as if that line were not in the file.
628: The file's `realindexes' table maps virtual line numbers
629: (which don't count the discarded lines) into real line numbers;
630: this is how the actual comparison algorithm produces results
631: that are comprehensible when the discarded lines are counted.
632: <p>
633: When we discard a line, we also mark it as a deletion or insertion
634: so that it will be printed in the output.
635: @param f the other file
636: */
637: void discard_confusing_lines(file_data f) {
638: clear();
639: /* Set up table of which lines are going to be discarded. */
640: final byte[] discarded = discardable(f.equivCount());
641:
642: /* Don't really discard the provisional lines except when they occur
643: in a run of discardables, with nonprovisionals at the beginning
644: and end. */
645: filterDiscards(discarded);
646:
647: /* Actually discard the lines. */
648: discard(discarded);
649: }
650:
651: /** Mark to be discarded each line that matches no line of another file.
652: If a line matches many lines, mark it as provisionally discardable.
653: @see equivCount()
654: @param counts The count of each equivalence number for the other file.
655: @return 0=nondiscardable, 1=discardable or 2=provisionally discardable
656: for each line
657: */
658:
659: private byte[] discardable(final int[] counts) {
660: final int end = buffered_lines;
661: final byte[] discards = new byte[end];
662: final int[] equivals = this .equivs;
663: int many = 5;
664: int tem = end / 64;
665:
666: /* Multiply MANY by approximate square root of number of lines.
667: That is the threshold for provisionally discardable lines. */
668: while ((tem = tem >> 2) > 0)
669: many *= 2;
670:
671: for (int i = 0; i < end; i++) {
672: int nmatch;
673: if (equivals[i] == 0)
674: continue;
675: nmatch = counts[equivals[i]];
676: if (nmatch == 0)
677: discards[i] = 1;
678: else if (nmatch > many)
679: discards[i] = 2;
680: }
681: return discards;
682: }
683:
684: /** Don't really discard the provisional lines except when they occur
685: in a run of discardables, with nonprovisionals at the beginning
686: and end. */
687:
688: private void filterDiscards(final byte[] discards) {
689: final int end = buffered_lines;
690:
691: for (int i = 0; i < end; i++) {
692: /* Cancel provisional discards not in middle of run of discards. */
693: if (discards[i] == 2)
694: discards[i] = 0;
695: else if (discards[i] != 0) {
696: /* We have found a nonprovisional discard. */
697: int j;
698: int length;
699: int provisional = 0;
700:
701: /* Find end of this run of discardable lines.
702: Count how many are provisionally discardable. */
703: for (j = i; j < end; j++) {
704: if (discards[j] == 0)
705: break;
706: if (discards[j] == 2)
707: ++provisional;
708: }
709:
710: /* Cancel provisional discards at end, and shrink the run. */
711: while (j > i && discards[j - 1] == 2) {
712: discards[--j] = 0;
713: --provisional;
714: }
715:
716: /* Now we have the length of a run of discardable lines
717: whose first and last are not provisional. */
718: length = j - i;
719:
720: /* If 1/4 of the lines in the run are provisional,
721: cancel discarding of all provisional lines in the run. */
722: if (provisional * 4 > length) {
723: while (j > i)
724: if (discards[--j] == 2)
725: discards[j] = 0;
726: } else {
727: int consec;
728: int minimum = 1;
729: int tem = length / 4;
730:
731: /* MINIMUM is approximate square root of LENGTH/4.
732: A subrun of two or more provisionals can stand
733: when LENGTH is at least 16.
734: A subrun of 4 or more can stand when LENGTH >= 64. */
735: while ((tem = tem >> 2) > 0)
736: minimum *= 2;
737: minimum++;
738:
739: /* Cancel any subrun of MINIMUM or more provisionals
740: within the larger run. */
741: for (j = 0, consec = 0; j < length; j++)
742: if (discards[i + j] != 2)
743: consec = 0;
744: else if (minimum == ++consec)
745: /* Back up to start of subrun, to cancel it all. */
746: j -= consec;
747: else if (minimum < consec)
748: discards[i + j] = 0;
749:
750: /* Scan from beginning of run
751: until we find 3 or more nonprovisionals in a row
752: or until the first nonprovisional at least 8 lines in.
753: Until that point, cancel any provisionals. */
754: for (j = 0, consec = 0; j < length; j++) {
755: if (j >= 8 && discards[i + j] == 1)
756: break;
757: if (discards[i + j] == 2) {
758: consec = 0;
759: discards[i + j] = 0;
760: } else if (discards[i + j] == 0)
761: consec = 0;
762: else
763: consec++;
764: if (consec == 3)
765: break;
766: }
767:
768: /* I advances to the last line of the run. */
769: i += length - 1;
770:
771: /* Same thing, from end. */
772: for (j = 0, consec = 0; j < length; j++) {
773: if (j >= 8 && discards[i - j] == 1)
774: break;
775: if (discards[i - j] == 2) {
776: consec = 0;
777: discards[i - j] = 0;
778: } else if (discards[i - j] == 0)
779: consec = 0;
780: else
781: consec++;
782: if (consec == 3)
783: break;
784: }
785: }
786: }
787: }
788: }
789:
790: /** Actually discard the lines.
791: @param discards flags lines to be discarded
792: */
793: private void discard(final byte[] discards) {
794: final int end = buffered_lines;
795: int j = 0;
796: for (int i = 0; i < end; ++i)
797: if (no_discards || discards[i] == 0) {
798: undiscarded[j] = equivs[i];
799: realindexes[j++] = i;
800: } else
801: changed_flag[1 + i] = true;
802: nondiscarded_lines = j;
803: }
804:
805: file_data(Vector data, Hashtable h) {
806: buffered_lines = data.size();
807:
808: equivs = new int[buffered_lines];
809: undiscarded = new int[buffered_lines];
810: realindexes = new int[buffered_lines];
811:
812: for (int i = 0; i < data.size(); ++i) {
813: Integer ir = (Integer) h.get(data.elementAt(i));
814: if (ir == null)
815: h.put(data.elementAt(i), new Integer(
816: equivs[i] = equiv_max++));
817: else
818: equivs[i] = ir.intValue();
819: }
820: }
821:
822: file_data(PosInTerm data, Hashtable h) {
823: buffered_lines = data.depth();
824:
825: equivs = new int[buffered_lines];
826: undiscarded = new int[buffered_lines];
827: realindexes = new int[buffered_lines];
828:
829: IntIterator it = data.reverseIterator();
830:
831: for (int i = buffered_lines - 1; i >= 0; i--) {
832: equivs[i] = it.next();
833: if (equivs[i] > equiv_max)
834: equiv_max = equivs[i];
835: }
836: equiv_max++; // will be used as array length
837: }
838:
839: /** Adjust inserts/deletes of blank lines to join changes
840: as much as possible.
841:
842: We do something when a run of changed lines include a blank
843: line at one end and have an excluded blank line at the other.
844: We are free to choose which blank line is included.
845: `compareseq' always chooses the one at the beginning,
846: but usually it is cleaner to consider the following blank line
847: to be the "change". The only exception is if the preceding blank line
848: would join this change to other changes.
849: @param f the file being compared against
850: */
851:
852: void shift_boundaries(file_data f) {
853: final boolean[] changed = changed_flag;
854: final boolean[] other_changed = f.changed_flag;
855: int i = 0;
856: int j = 0;
857: int i_end = buffered_lines;
858: int preceding = -1;
859: int other_preceding = -1;
860:
861: for (;;) {
862: int start, end, other_start;
863:
864: /* Scan forwards to find beginning of another run of changes.
865: Also keep track of the corresponding point in the other file. */
866:
867: while (i < i_end && !changed[1 + i]) {
868: while (other_changed[1 + j++])
869: /* Non-corresponding lines in the other file
870: will count as the preceding batch of changes. */
871: other_preceding = j;
872: i++;
873: }
874:
875: if (i == i_end)
876: break;
877:
878: start = i;
879: other_start = j;
880:
881: for (;;) {
882: /* Now find the end of this run of changes. */
883:
884: while (i < i_end && changed[1 + i])
885: i++;
886: end = i;
887:
888: /* If the first changed line matches the following unchanged one,
889: and this run does not follow right after a previous run,
890: and there are no lines deleted from the other file here,
891: then classify the first changed line as unchanged
892: and the following line as changed in its place. */
893:
894: /* You might ask, how could this run follow right after another?
895: Only because the previous run was shifted here. */
896:
897: if (end != i_end
898: && equivs[start] == equivs[end]
899: && !other_changed[1 + j]
900: && end != i_end
901: && !((preceding >= 0 && start == preceding) || (other_preceding >= 0 && other_start == other_preceding))) {
902: changed[1 + end++] = true;
903: changed[1 + start++] = false;
904: ++i;
905: /* Since one line-that-matches is now before this run
906: instead of after, we must advance in the other file
907: to keep in synch. */
908: ++j;
909: } else
910: break;
911: }
912:
913: preceding = i;
914: other_preceding = j;
915: }
916: }
917:
918: /** Number of elements (lines) in this file. */
919: final int buffered_lines;
920:
921: /** Vector, indexed by line number, containing an equivalence code for
922: each line. It is this vector that is actually compared with that
923: of another file to generate differences. */
924: private final int[] equivs;
925:
926: /** Vector, like the previous one except that
927: the elements for discarded lines have been squeezed out. */
928: final int[] undiscarded;
929:
930: /** Vector mapping virtual line numbers (not counting discarded lines)
931: to real ones (counting those lines). Both are origin-0. */
932: final int[] realindexes;
933:
934: /** Total number of nondiscarded lines. */
935: int nondiscarded_lines;
936:
937: /** Array, indexed by real origin-1 line number,
938: containing true for a line that is an insertion or a deletion.
939: The results of comparison are stored here. */
940: boolean[] changed_flag;
941:
942: }
943:
944: public static void main(String argv[]) {
945: PosInTerm p1 = PosInTerm.TOP_LEVEL.down(1).down(2).down(3)
946: .down(4);
947: PosInTerm p2 = PosInTerm.TOP_LEVEL.down(0).down(1).down(3)
948: .down(5);
949: System.out.println(new DiffMyers(p1, p2).diff_2());
950: }
951:
952: }
|