001: package org.incava.util.diff;
002:
003: import java.util.*;
004:
005: /**
006: * Compares two collections, returning a list of the additions, changes, and
007: * deletions between them. A <code>Comparator</code> may be passed as an
008: * argument to the constructor, and will thus be used. If not provided, the
009: * initial value in the <code>a</code> ("from") collection will be looked at to
010: * see if it supports the <code>Comparable</code> interface. If so, its
011: * <code>equals</code> and <code>compareTo</code> methods will be invoked on the
012: * instances in the "from" and "to" collections; otherwise, for speed, hash
013: * codes from the objects will be used instead for comparison.
014: *
015: * <p>The file FileDiff.java shows an example usage of this class, in an
016: * application similar to the Unix "diff" program.</p>
017: */
018: public class Diff {
019: /**
020: * The source array, AKA the "from" values.
021: */
022: protected Object[] a;
023:
024: /**
025: * The target array, AKA the "to" values.
026: */
027: protected Object[] b;
028:
029: /**
030: * The list of differences, as <code>Difference</code> instances.
031: */
032: protected List diffs = new ArrayList();
033:
034: /**
035: * The pending, uncommitted difference.
036: */
037: private Difference pending;
038:
039: /**
040: * The comparator used, if any.
041: */
042: private Comparator comparator;
043:
044: /**
045: * The thresholds.
046: */
047: private TreeMap thresh;
048:
049: /**
050: * Constructs the Diff object for the two arrays, using the given comparator.
051: */
052: public Diff(Object[] a, Object[] b, Comparator comp) {
053: this .a = a;
054: this .b = b;
055: this .comparator = comp;
056: this .thresh = null; // created in getLongestCommonSubsequences
057: }
058:
059: /**
060: * Constructs the Diff object for the two arrays, using the default
061: * comparison mechanism between the objects, such as <code>equals</code> and
062: * <code>compareTo</code>.
063: */
064: public Diff(Object[] a, Object[] b) {
065: this (a, b, null);
066: }
067:
068: /**
069: * Constructs the Diff object for the two collections, using the given
070: * comparator.
071: */
072: public Diff(Collection a, Collection b, Comparator comp) {
073: this (a.toArray(), b.toArray(), comp);
074: }
075:
076: /**
077: * Constructs the Diff object for the two collections, using the default
078: * comparison mechanism between the objects, such as <code>equals</code> and
079: * <code>compareTo</code>.
080: */
081: public Diff(Collection a, Collection b) {
082: this (a, b, null);
083: }
084:
085: /**
086: * Runs diff and returns the results.
087: */
088: public List diff() {
089: traverseSequences();
090:
091: // add the last difference, if pending:
092: if (pending != null) {
093: diffs.add(pending);
094: }
095:
096: return diffs;
097: }
098:
099: /**
100: * Traverses the sequences, seeking the longest common subsequences,
101: * invoking the methods <code>finishedA</code>, <code>finishedB</code>,
102: * <code>onANotB</code>, and <code>onBNotA</code>.
103: */
104: protected void traverseSequences() {
105: Integer[] matches = getLongestCommonSubsequences();
106:
107: int lastA = a.length - 1;
108: int lastB = b.length - 1;
109: int bi = 0;
110: int ai;
111:
112: int lastMatch = matches.length - 1;
113:
114: for (ai = 0; ai <= lastMatch; ++ai) {
115: Integer bLine = matches[ai];
116:
117: if (bLine == null) {
118: onANotB(ai, bi);
119: } else {
120: while (bi < bLine.intValue()) {
121: onBNotA(ai, bi++);
122: }
123:
124: onMatch(ai, bi++);
125: }
126: }
127:
128: boolean calledFinishA = false;
129: boolean calledFinishB = false;
130:
131: while (ai <= lastA || bi <= lastB) {
132:
133: // last A?
134: if (ai == lastA + 1 && bi <= lastB) {
135: if (!calledFinishA && callFinishedA()) {
136: finishedA(lastA);
137: calledFinishA = true;
138: } else {
139: while (bi <= lastB) {
140: onBNotA(ai, bi++);
141: }
142: }
143: }
144:
145: // last B?
146: if (bi == lastB + 1 && ai <= lastA) {
147: if (!calledFinishB && callFinishedB()) {
148: finishedB(lastB);
149: calledFinishB = true;
150: } else {
151: while (ai <= lastA) {
152: onANotB(ai++, bi);
153: }
154: }
155: }
156:
157: if (ai <= lastA) {
158: onANotB(ai++, bi);
159: }
160:
161: if (bi <= lastB) {
162: onBNotA(ai, bi++);
163: }
164: }
165: }
166:
167: /**
168: * Override and return true in order to have <code>finishedA</code> invoked
169: * at the last element in the <code>a</code> array.
170: */
171: protected boolean callFinishedA() {
172: return false;
173: }
174:
175: /**
176: * Override and return true in order to have <code>finishedB</code> invoked
177: * at the last element in the <code>b</code> array.
178: */
179: protected boolean callFinishedB() {
180: return false;
181: }
182:
183: /**
184: * Invoked at the last element in <code>a</code>, if
185: * <code>callFinishedA</code> returns true.
186: */
187: protected void finishedA(int lastA) {
188: }
189:
190: /**
191: * Invoked at the last element in <code>b</code>, if
192: * <code>callFinishedB</code> returns true.
193: */
194: protected void finishedB(int lastB) {
195: }
196:
197: /**
198: * Invoked for elements in <code>a</code> and not in <code>b</code>.
199: */
200: protected void onANotB(int ai, int bi) {
201: if (pending == null) {
202: pending = new Difference(ai, ai, bi, -1);
203: } else {
204: pending.setDeleted(ai);
205: }
206: }
207:
208: /**
209: * Invoked for elements in <code>b</code> and not in <code>a</code>.
210: */
211: protected void onBNotA(int ai, int bi) {
212: if (pending == null) {
213: pending = new Difference(ai, -1, bi, bi);
214: } else {
215: pending.setAdded(bi);
216: }
217: }
218:
219: /**
220: * Invoked for elements matching in <code>a</code> and <code>b</code>.
221: */
222: protected void onMatch(int ai, int bi) {
223: if (pending == null) {
224: // no current pending
225: } else {
226: diffs.add(pending);
227: pending = null;
228: }
229: }
230:
231: /**
232: * Compares the two objects, using the comparator provided with the
233: * constructor, if any.
234: */
235: protected boolean equals(Object x, Object y) {
236: return comparator == null ? x.equals(y) : comparator.compare(x,
237: y) == 0;
238: }
239:
240: /**
241: * Returns an array of the longest common subsequences.
242: */
243: public Integer[] getLongestCommonSubsequences() {
244: int aStart = 0;
245: int aEnd = a.length - 1;
246:
247: int bStart = 0;
248: int bEnd = b.length - 1;
249:
250: TreeMap matches = new TreeMap();
251:
252: while (aStart <= aEnd && bStart <= bEnd
253: && equals(a[aStart], b[bStart])) {
254: matches.put(new Integer(aStart++), new Integer(bStart++));
255: }
256:
257: while (aStart <= aEnd && bStart <= bEnd
258: && equals(a[aEnd], b[bEnd])) {
259: matches.put(new Integer(aEnd--), new Integer(bEnd--));
260: }
261:
262: Map bMatches = null;
263: if (comparator == null) {
264: if (a.length > 0 && a[0] instanceof Comparable) {
265: // this uses the Comparable interface
266: bMatches = new TreeMap();
267: } else {
268: // this just uses hashCode()
269: bMatches = new HashMap();
270: }
271: } else {
272: // we don't really want them sorted, but this is the only Map
273: // implementation (as of JDK 1.4) that takes a comparator.
274: bMatches = new TreeMap(comparator);
275: }
276:
277: for (int bi = bStart; bi <= bEnd; ++bi) {
278: Object element = b[bi];
279: Object key = element;
280: List positions = (List) bMatches.get(key);
281: if (positions == null) {
282: positions = new ArrayList();
283: bMatches.put(key, positions);
284: }
285: positions.add(new Integer(bi));
286: }
287:
288: thresh = new TreeMap();
289: Map links = new HashMap();
290:
291: for (int i = aStart; i <= aEnd; ++i) {
292: Object aElement = a[i]; // keygen here.
293: List positions = (List) bMatches.get(aElement);
294:
295: if (positions != null) {
296: Integer k = new Integer(0);
297: ListIterator pit = positions.listIterator(positions
298: .size());
299: while (pit.hasPrevious()) {
300: Integer j = (Integer) pit.previous();
301:
302: k = insert(j, k);
303:
304: if (k == null) {
305: // nothing
306: } else {
307: Object value = k.intValue() > 0 ? links
308: .get(new Integer(k.intValue() - 1))
309: : null;
310: links.put(k, new Object[] { value,
311: new Integer(i), j });
312: }
313: }
314: }
315: }
316:
317: if (thresh.size() > 0) {
318: Integer ti = (Integer) thresh.lastKey();
319: Object[] link = (Object[]) links.get(ti);
320: while (link != null) {
321: Integer x = (Integer) link[1];
322: Integer y = (Integer) link[2];
323: matches.put(x, y);
324: link = (Object[]) link[0];
325: }
326: }
327:
328: return toArray(matches);
329: }
330:
331: /**
332: * Converts the map (indexed by java.lang.Integers) into an array.
333: */
334: protected static Integer[] toArray(TreeMap map) {
335: int size = map.size() == 0 ? 0 : 1 + ((Integer) map.lastKey())
336: .intValue();
337: Integer[] ary = new Integer[size];
338: Iterator it = map.keySet().iterator();
339:
340: while (it.hasNext()) {
341: Integer idx = (Integer) it.next();
342: Integer val = (Integer) map.get(idx);
343: ary[idx.intValue()] = val;
344: }
345: return ary;
346: }
347:
348: /**
349: * Returns whether the integer is not zero (including if it is not null).
350: */
351: protected static boolean isNonzero(Integer i) {
352: return i != null && i.intValue() != 0;
353: }
354:
355: /**
356: * Returns whether the value in the map for the given index is greater than
357: * the given value.
358: */
359: protected boolean isGreaterThan(Integer index, Integer val) {
360: Integer lhs = (Integer) thresh.get(index);
361: return lhs != null && val != null && lhs.compareTo(val) > 0;
362: }
363:
364: /**
365: * Returns whether the value in the map for the given index is less than
366: * the given value.
367: */
368: protected boolean isLessThan(Integer index, Integer val) {
369: Integer lhs = (Integer) thresh.get(index);
370: return lhs != null && (val == null || lhs.compareTo(val) < 0);
371: }
372:
373: /**
374: * Returns the value for the greatest key in the map.
375: */
376: protected Integer getLastValue() {
377: return (Integer) thresh.get(thresh.lastKey());
378: }
379:
380: /**
381: * Adds the given value to the "end" of the threshold map, that is, with the
382: * greatest index/key.
383: */
384: protected void append(Integer value) {
385: Integer addIdx = null;
386: if (thresh.size() == 0) {
387: addIdx = new Integer(0);
388: } else {
389: Integer lastKey = (Integer) thresh.lastKey();
390: addIdx = new Integer(lastKey.intValue() + 1);
391: }
392: thresh.put(addIdx, value);
393: }
394:
395: /**
396: * Inserts the given values into the threshold map.
397: */
398: protected Integer insert(Integer j, Integer k) {
399: if (isNonzero(k) && isGreaterThan(k, j)
400: && isLessThan(new Integer(k.intValue() - 1), j)) {
401: thresh.put(k, j);
402: } else {
403: int hi = -1;
404:
405: if (isNonzero(k)) {
406: hi = k.intValue();
407: } else if (thresh.size() > 0) {
408: hi = ((Integer) thresh.lastKey()).intValue();
409: }
410:
411: // off the end?
412: if (hi == -1 || j.compareTo(getLastValue()) > 0) {
413: append(j);
414: k = new Integer(hi + 1);
415: } else {
416: // binary search for insertion point:
417: int lo = 0;
418:
419: while (lo <= hi) {
420: int index = (hi + lo) / 2;
421: Integer val = (Integer) thresh.get(new Integer(
422: index));
423: int cmp = j.compareTo(val);
424:
425: if (cmp == 0) {
426: return null;
427: } else if (cmp > 0) {
428: lo = index + 1;
429: } else {
430: hi = index - 1;
431: }
432: }
433:
434: thresh.put(new Integer(lo), j);
435: k = new Integer(lo);
436: }
437: }
438:
439: return k;
440: }
441:
442: }
|