001: // Diff.java
002: // ---------
003: // part of YaCy
004: // (C) by Michael Peter Christen; mc@anomic.de
005: // first published on http://www.anomic.de
006: // Frankfurt, Germany, 2007
007: // Created 03.02.2007
008: //
009: // This file is contributed by Franz Brausze
010: //
011: // $LastChangedDate: $
012: // $LastChangedRevision: $
013: // $LastChangedBy: $
014: //
015: // This program is free software; you can redistribute it and/or modify
016: // it under the terms of the GNU General Public License as published by
017: // the Free Software Foundation; either version 2 of the License, or
018: // (at your option) any later version.
019: //
020: // This program is distributed in the hope that it will be useful,
021: // but WITHOUT ANY WARRANTY; without even the implied warranty of
022: // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
023: // GNU General Public License for more details.
024: //
025: // You should have received a copy of the GNU General Public License
026: // along with this program; if not, write to the Free Software
027: // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
028: //
029: // Using this software in any meaning (reading, learning, copying, compiling,
030: // running) means that you agree that the Author(s) is (are) not responsible
031: // for cost, loss of data or any harm that may be caused directly or indirectly
032: // by usage of this softare or this documentation. The usage of this software
033: // is on your own risk. The installation and usage (starting/running) of this
034: // software may allow other people or application to access your computer and
035: // any attached devices and is highly dependent on the configuration of the
036: // software which must be done by the user of the software; the author(s) is
037: // (are) also not responsible for proper configuration and usage of the
038: // software, even if provoked by documentation provided together with
039: // the software.
040: //
041: // Any changes to this file according to the GPL as documented in the file
042: // gpl.txt aside this file in the shipment you received can be done to the
043: // lines that follows this copyright notice here, but changes must not be
044: // done inside the copyright notive above. A re-distribution must contain
045: // the intact and unchanged copyright notice.
046: // Contributions and changes to the program code must be marked as such.
047:
048: package de.anomic.data;
049:
050: import java.util.ArrayList;
051:
052: /**
053: * This class provides a diff-functionality.
054: */
055: public class diff {
056:
057: private final ArrayList<Part> parts = new ArrayList<Part>();
058: private final Object[] o;
059: private final Object[] n;
060:
061: /**
062: * @param o the original <code>String</code>
063: * @param n the new <code>String</code>
064: * @throws NullPointerException if one of the arguments is <code>null</code>
065: */
066: public diff(String o, String n) {
067: this (o, n, 1);
068: }
069:
070: /**
071: * @param o the original <code>String</code>
072: * @param n the new <code>String</code>
073: * @param minConsecutive the minimum number of consecutive equal characters in
074: * both Strings. Smaller seperations will only be performed on the end of either
075: * String if needed
076: * @throws NullPointerException if <code>o</code> or <code>n</code> is
077: * <code>null</code>
078: */
079: public diff(String o, String n, int minConsecutive) {
080: if (o == null || n == null)
081: throw new NullPointerException(
082: "neither o nor n must be null");
083: this .o = new Comparable[o.length()];
084: for (int i = 0; i < o.length(); i++)
085: this .o[i] = new Character(o.charAt(i));
086: this .n = new Comparable[n.length()];
087: for (int i = 0; i < n.length(); i++)
088: this .n[i] = new Character(n.charAt(i));
089: parse((minConsecutive > 0) ? minConsecutive : 1);
090: }
091:
092: public diff(Object[] o, Object[] n, int minConsecutive) {
093: if (o == null || n == null)
094: throw new NullPointerException(
095: "neither o nor n must be null");
096: this .o = o;
097: this .n = n;
098: parse((minConsecutive > 0) ? minConsecutive : 1);
099: }
100:
101: private void parse(int minLength) {
102: /* Matrix: find as long diagonals as possible,
103: * delete the old horizontally and add the new vertically
104: *
105: * ~ OLD ~
106: * |T|H|E| |F|I|R|S|T| |S|E|N|T|E|N|C|E|
107: * T|#| | | | | | | |#| | | | |#| | | | |
108: * H| |#| | | | | | | | | | | | | | | | |
109: * E| | |#| | | | | | | | |#| | |#| | |#|
110: * | | | |#| | | | | |#| | | | | | | | |
111: * N| | | | | | | | | | | | |#| | |#| | |
112: * E| | |#| | | | | | | | |#| | |#| | |#|
113: * ~ X| | | | | | | | | | | | | | | | | | |
114: * N T|#| | | | | | | |#| | | | |#| | | | |
115: * E | | | |#| | | | | |#| | | | | | | | |
116: * W S| | | | | | | |#| | |#| | | | | | | |
117: * ~ E| | |#| | | | | | | | |#| | |#| | |#|
118: * N| | | | | | | | | | | | |#| | |#| | |
119: * T|#| | | | | | | |#| | | | |#| | | | |
120: * E| | |#| | | | | | | | |#| | |#| | |#|
121: * N| | | | | | | | | | | | |#| | |#| | |
122: * C| | | | | | | | | | | | | | | | |#| |
123: * E| | |#| | | | | | | | |#| | |#| | |#|
124: */
125: boolean[][] matrix = new boolean[this .n.length][this .o.length];
126: for (int y = 0; y < this .n.length; y++)
127: for (int x = 0; x < this .o.length; x++)
128: matrix[y][x] = this .o[x].equals(this .n[y]);
129:
130: int s = 0, t = 0;
131: int[] tmp;
132: while ((tmp = findDiagonal(s, t, matrix, minLength)) != null) {
133: addReplacementParts(s, t, tmp[0], tmp[1]);
134: this .parts.add(new Part(Part.UNCHANGED, tmp[0], s = tmp[0]
135: + tmp[2]));
136: t = tmp[1] + tmp[2];
137: }
138: addReplacementParts(s, t, this .o.length, this .n.length);
139: }
140:
141: private void addReplacementParts(int startx, int starty, int endx,
142: int endy) {
143: if (startx < endx)
144: this .parts.add(new Part(Part.DELETED, startx, endx));
145: if (starty < endy)
146: this .parts.add(new Part(Part.ADDED, starty, endy));
147: }
148:
149: /** Search for a diagonal with minimal length <code>minLength</code> line by line in a submatrix
150: * <code>{ x, y, matrix[0].length, matrix.length}</code> of the <code>matrix</code>:<br>
151: * <code> {_1,__,__} -> X axis</code><br>
152: * <code> ,{__,_1,__} </code><br>
153: * <code> ,{__,__,_1} </code><br>
154: * <ul>
155: * TODO: some optimisation ideas
156: * <li>search for a better algorithm on the inet!!! :) </li>
157: * <li>pass only the part of the matrix where the search takes place - not the whole matrix everytime</li>
158: * <li>break the inner loop if the rest of the matrix is smaller than minLength (and no diagonal has been found yet) </li>
159: * <li>return diagonal topologicaly closest to the {0,0} </li>
160: * </ul>
161: * @param x the starting position of the search on the optical horizontal axis
162: * @param y the starting position of the search on the optical vertical axis<br>
163: * @param matrix the matrix to search through
164: * @param minLength the minimal desired length of a diagonal to find
165: * @return a vector in the form <code>{ diagStartX, diagStartY, diagLength }</code> where <code> diagLength >= minLength</code>
166: */
167: private static int[] findDiagonal(int x, int y, boolean[][] matrix,
168: int minLength) {
169: int rx, ry, yy, xx, i;
170: for (yy = y; yy < matrix.length; yy++)
171: for (xx = x; xx < matrix[yy].length; xx++)
172: if (matrix[yy][xx]) { // reverse order! [y][x]
173: // save position
174: rx = xx;
175: ry = yy;
176: // follow diagonal as long as far as possible
177: for (i = 1; (yy + i) < matrix.length
178: && (xx + i) < matrix[yy].length; i++)
179: if (!matrix[yy + i][xx + i])
180: break;
181: if (i >= minLength)
182: return new int[] { rx, ry, i }; // swap back the x and y axes for better readability
183: }
184: return null;
185: }
186:
187: /**
188: * @return the original <code>Object[]</code> passed to this class on instantiation
189: */
190: public Object[] getOriginal() {
191: return this .o;
192: }
193:
194: /**
195: * @return the new <code>Object[]</code> passed to this class on instantiation
196: */
197: public Object[] getNew() {
198: return this .n;
199: }
200:
201: /**
202: * A diff is composed of different parts. Each of these parts stands for an
203: * operation, like "do nothing", "add" or "delete".
204: *
205: * @see Part
206: * @return all parts this diff consists of in correct order
207: */
208: public Part[] getParts() {
209: return this .parts.toArray(new Part[this .parts.size()]);
210: }
211:
212: public String toString() {
213: StringBuffer sb = new StringBuffer(this .parts.size() * 20);
214: for (int j = 0; j < this .parts.size(); j++)
215: sb.append(this .parts.get(j).toString()).append("\n");
216: return new String(sb);
217: }
218:
219: /**
220: * This class represents a part of the diff, meaning one operation
221: * (or one line of a "normal" diff)
222: */
223: public class Part {
224:
225: /** The string this diff-part cares about has not been changed */
226: public static final int UNCHANGED = 0;
227: /** The string this diff-part cares about has been added in the new version */
228: public static final int ADDED = 1;
229: /** The string this diff-part cares about has been removed in the new version */
230: public static final int DELETED = 2;
231:
232: private final int action;
233: private final int posOld;
234: private final int posNew;
235:
236: private Part(int action, int posOld, int posNew) {
237: this .action = action;
238: this .posOld = posOld;
239: this .posNew = posNew;
240: }
241:
242: /**
243: * @return whether the string shan't be changed, shall be added or deleted
244: */
245: public int getAction() {
246: return this .action;
247: }
248:
249: public int getPosOld() {
250: return this .posOld;
251: }
252:
253: public int getPosNew() {
254: return this .posNew;
255: }
256:
257: /**
258: * @return the plain string this diff-part cares about
259: */
260: public String getString() {
261: final StringBuffer sb = new StringBuffer(this .posNew
262: - this .posOld);
263: if (this .action == ADDED) {
264: for (int i = this .posOld; i < this .posNew; i++)
265: sb.append(diff.this .n[i]);
266: } else {
267: for (int i = this .posOld; i < this .posNew; i++)
268: sb.append(diff.this .o[i]);
269: }
270: return new String(sb);
271: }
272:
273: /**
274: * @return the string this diff-part cares about in typical diff-notation:
275: * <dl>
276: * <dt>unchanged</dt><dd>"<code> STRING</code>"</dd>
277: * <dt>added</dt><dd>"<code>+ STRING</code>"</dd>
278: * <dt>deleted</dt><dd>"<code>- STRING</code>"</dd>
279: * </dl>
280: */
281: public String toString() {
282: return ((this .action == UNCHANGED) ? " "
283: : (this .action == ADDED) ? "+" : "-")
284: + " " + getString();
285: }
286: }
287:
288: public static String toHTML(diff[] diffs) {
289: StringBuffer sb = new StringBuffer(diffs.length * 60);
290: diff.Part[] ps;
291: for (int i = 0; i < diffs.length; i++) {
292: sb.append("<p class=\"diff\">\n");
293: ps = diffs[i].getParts();
294: for (int j = 0; j < ps.length; j++) {
295: sb.append("<span\nclass=\"");
296: switch (ps[j].getAction()) {
297: case diff.Part.UNCHANGED:
298: sb.append("unchanged");
299: break;
300: case diff.Part.ADDED:
301: sb.append("added");
302: break;
303: case diff.Part.DELETED:
304: sb.append("deleted");
305: break;
306: }
307: sb.append("\">").append(
308: htmlTools.encodeUnicode2html(ps[j].getString(),
309: true).replaceAll("\n", "<br />"));
310: sb.append("</span>");
311: }
312: sb.append("</p>");
313: }
314: return new String(sb);
315: }
316: }
|