001: /*
002: * ====================================================================
003: *
004: * The Apache Software License, Version 1.1
005: *
006: * Copyright (c) 1999-2003 The Apache Software Foundation. All rights
007: * reserved.
008: *
009: * Redistribution and use in source and binary forms, with or without
010: * modification, are permitted provided that the following conditions
011: * are met:
012: *
013: * 1. Redistributions of source code must retain the above copyright
014: * notice, this list of conditions and the following disclaimer.
015: *
016: * 2. Redistributions in binary form must reproduce the above copyright
017: * notice, this list of conditions and the following disclaimer in
018: * the documentation and/or other materials provided with the
019: * distribution.
020: *
021: * 3. The end-user documentation included with the redistribution, if
022: * any, must include the following acknowledgement:
023: * "This product includes software developed by the
024: * Apache Software Foundation (http://www.apache.org/)."
025: * Alternately, this acknowledgement may appear in the software itself,
026: * if and wherever such third-party acknowledgements normally appear.
027: *
028: * 4. The names "The Jakarta Project", "Commons", and "Apache Software
029: * Foundation" must not be used to endorse or promote products derived
030: * from this software without prior written permission. For written
031: * permission, please contact apache@apache.org.
032: *
033: * 5. Products derived from this software may not be called "Apache"
034: * nor may "Apache" appear in their names without prior written
035: * permission of the Apache Software Foundation.
036: *
037: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
038: * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
039: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
040: * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
041: * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
042: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
043: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
044: * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
045: * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
046: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
047: * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
048: * SUCH DAMAGE.
049: * ====================================================================
050: *
051: * This software consists of voluntary contributions made by many
052: * individuals on behalf of the Apache Software Foundation. For more
053: * information on the Apache Software Foundation, please see
054: * <http://www.apache.org/>.
055: *
056: */
057:
058: package org.apache.commons.jrcs.diff.myers;
059:
060: import org.apache.commons.jrcs.diff.Chunk;
061: import org.apache.commons.jrcs.diff.Delta;
062: import org.apache.commons.jrcs.diff.Diff;
063: import org.apache.commons.jrcs.diff.DiffAlgorithm;
064: import org.apache.commons.jrcs.diff.DifferentiationFailedException;
065: import org.apache.commons.jrcs.diff.Revision;
066:
067: /**
068: * A clean-room implementation of <a
069: * href="http://www.cs.arizona.edu/people/gene/"> Eugene Myers</a> differencing
070: * algorithm.
071: * <p>
072: * See the paper at <a
073: * href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">
074: * http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps</a>
075: *
076: * @version $Revision: 7756 $ $Date: 2006-04-12 18:30:19 +0100 (Wed, 12 Apr
077: * 2006) $
078: * @author <a href="mailto:juanco@suigeneris.org">Juanco Anez</a>
079: * @see Delta
080: * @see Revision
081: * @see Diff
082: */
083: public class MyersDiff implements DiffAlgorithm {
084: /**
085: * Constructs an instance of the Myers differencing algorithm.
086: */
087: public MyersDiff() {
088: }
089:
090: /**
091: * {@inheritDoc}
092: */
093: public Revision diff(Object[] orig, Object[] rev)
094: throws DifferentiationFailedException {
095: PathNode path = buildPath(orig, rev);
096: return buildRevision(path, orig, rev);
097: }
098:
099: /**
100: * Computes the minimum diffpath that expresses de differences between the
101: * original and revised sequences, according to Gene Myers differencing
102: * algorithm.
103: *
104: * @param orig
105: * The original sequence.
106: * @param rev
107: * The revised sequence.
108: * @return A minimum {@link PathNode Path} accross the differences graph.
109: * @throws DifferentiationFailedException
110: * if a diff path could not be found.
111: */
112: public static PathNode buildPath(Object[] orig, Object[] rev)
113: throws DifferentiationFailedException {
114: if (orig == null)
115: throw new IllegalArgumentException(
116: "original sequence is null");
117: if (rev == null)
118: throw new IllegalArgumentException(
119: "revised sequence is null");
120:
121: // these are local constants
122: final int N = orig.length;
123: final int M = rev.length;
124:
125: final int MAX = N + M + 1;
126: final int size = 1 + 2 * MAX;
127: final int middle = (size + 1) / 2;
128: final PathNode diagonal[] = new PathNode[size];
129:
130: PathNode path = null;
131:
132: diagonal[middle + 1] = new Snake(0, -1, null);
133: for (int d = 0; d < MAX; d++) {
134: for (int k = -d; k <= d; k += 2) {
135: final int kmiddle = middle + k;
136: final int kplus = kmiddle + 1;
137: final int kminus = kmiddle - 1;
138: PathNode prev = null;
139:
140: int i;
141: if ((k == -d)
142: || (k != d && diagonal[kminus].i < diagonal[kplus].i)) {
143: i = diagonal[kplus].i;
144: prev = diagonal[kplus];
145: } else {
146: i = diagonal[kminus].i + 1;
147: prev = diagonal[kminus];
148: }
149:
150: diagonal[kminus] = null; // no longer used
151:
152: int j = i - k;
153:
154: PathNode node = new DiffNode(i, j, prev);
155:
156: // orig and rev are zero-based
157: // but the algorithm is one-based
158: // that's why there's no +1 when indexing the sequences
159: while (i < N && j < M && orig[i].equals(rev[j])) {
160: i++;
161: j++;
162: }
163: if (i > node.i)
164: node = new Snake(i, j, node);
165:
166: diagonal[kmiddle] = node;
167:
168: if (i >= N && j >= M) {
169: return diagonal[kmiddle];
170: }
171: }
172: diagonal[middle + d - 1] = null;
173:
174: }
175: // According to Myers, this cannot happen
176: throw new DifferentiationFailedException(
177: "could not find a diff path");
178: }
179:
180: /**
181: * Constructs a {@link Revision} from a difference path.
182: *
183: * @param path
184: * The path.
185: * @param orig
186: * The original sequence.
187: * @param rev
188: * The revised sequence.
189: * @return A {@link Revision} script corresponding to the path.
190: * @throws DifferentiationFailedException
191: * if a {@link Revision} could not be built from the given path.
192: */
193: public static Revision buildRevision(PathNode path, Object[] orig,
194: Object[] rev) {
195: if (path == null)
196: throw new IllegalArgumentException("path is null");
197: if (orig == null)
198: throw new IllegalArgumentException(
199: "original sequence is null");
200: if (rev == null)
201: throw new IllegalArgumentException(
202: "revised sequence is null");
203:
204: Revision revision = new Revision();
205: if (path.isSnake())
206: path = path.prev;
207: while (path != null && path.prev != null && path.prev.j >= 0) {
208: if (path.isSnake())
209: throw new IllegalStateException(
210: "bad diffpath: found snake when looking for diff");
211: int i = path.i;
212: int j = path.j;
213:
214: path = path.prev;
215: int ianchor = path.i;
216: int janchor = path.j;
217:
218: Delta delta = Delta.newDelta(new Chunk(orig, ianchor, i
219: - ianchor), new Chunk(rev, janchor, j - janchor));
220: revision.insertDelta(delta);
221: if (path.isSnake())
222: path = path.prev;
223: }
224: return revision;
225: }
226:
227: }
|