001: /**
002: * Copyright (c) 2000-2008 Liferay, Inc. All rights reserved.
003: *
004: * Permission is hereby granted, free of charge, to any person obtaining a copy
005: * of this software and associated documentation files (the "Software"), to deal
006: * in the Software without restriction, including without limitation the rights
007: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
008: * copies of the Software, and to permit persons to whom the Software is
009: * furnished to do so, subject to the following conditions:
010: *
011: * The above copyright notice and this permission notice shall be included in
012: * all copies or substantial portions of the Software.
013: *
014: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
017: * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
019: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
020: * SOFTWARE.
021: */package com.liferay.util.diff;
022:
023: import com.liferay.portal.kernel.util.StringMaker;
024: import com.liferay.portal.kernel.util.StringPool;
025: import com.liferay.util.FileUtil;
026:
027: import java.io.Reader;
028:
029: import java.util.ArrayList;
030: import java.util.Iterator;
031: import java.util.List;
032:
033: import org.incava.util.diff.Diff;
034: import org.incava.util.diff.Difference;
035:
036: /**
037: * <a href="DiffUtil.java.html"><b><i>View Source</i></b></a>
038: *
039: * <p>
040: * This class can compare two different versions of a text. Source refers to
041: * the earliest version of the text and target refers to a modified version of
042: * source. Changes are considered either as a removal from the source or as an
043: * addition to the target. This class detects changes to an entire line and also
044: * detects changes within lines, such as, removal or addition of characters.
045: * Take a look at <code>DiffTest</code> to see the expected inputs and outputs.
046: * </p>
047: *
048: * @author Bruno Farache
049: *
050: */
051: public class DiffUtil {
052:
053: public static final String OPEN_INS = "<ins>";
054:
055: public static final String CLOSE_INS = "</ins>";
056:
057: public static final String OPEN_DEL = "<del>";
058:
059: public static final String CLOSE_DEL = "</del>";
060:
061: public static final String CONTEXT_LINE = "#context#line#";
062:
063: /**
064: * This is a diff method with default values.
065: *
066: * @param source the <code>Reader</code> of the source text
067: * @param target the <code>Reader</code> of the target text
068: * @return an array containing two lists of <code>DiffResults</code>,
069: * the first element contains DiffResults related to changes
070: * in source and the second element to changes in target
071: */
072: public static List[] diff(Reader source, Reader target) {
073: int margin = 2;
074:
075: return diff(source, target, OPEN_INS, CLOSE_INS, OPEN_DEL,
076: CLOSE_DEL, margin);
077: }
078:
079: /**
080: * The main entrance of this class. This method will compare the two texts,
081: * highlight the changes by enclosing them with markers and return a list
082: * of <code>DiffResults</code>.
083: *
084: * @param source the <code>Reader</code> of the source text, this can
085: * be for example, an instance of FileReader or StringReader
086: * @param target the <code>Reader</code> of the target text
087: * @param addedMarkerStart the initial marker for highlighting
088: * additions
089: * @param addedMarkerEnd the end marker for highlighting additions
090: * @param deletedMarkerStart the initial marker for highlighting
091: * removals
092: * @param deletedMarkerEnd the end marker for highlighting removals
093: * @param margin the number of lines that will be added before the
094: * first changed line
095: * @return an array containing two lists of <code>DiffResults</code>,
096: * the first element contains DiffResults related to changes
097: * in source and the second element to changes in target
098: */
099: public static List[] diff(Reader source, Reader target,
100: String addedMarkerStart, String addedMarkerEnd,
101: String deletedMarkerStart, String deletedMarkerEnd,
102: int margin) {
103:
104: List sourceResults = new ArrayList();
105: List targetResults = new ArrayList();
106:
107: List[] results = new List[] { sourceResults, targetResults };
108:
109: // Convert the texts to Lists where each element are lines of the texts.
110:
111: List sourceStringList = FileUtil.toList(source);
112: List targetStringList = FileUtil.toList(target);
113:
114: // Make a a Diff of these lines and iterate over their Differences.
115:
116: Diff diff = new Diff(sourceStringList, targetStringList);
117:
118: List differences = diff.diff();
119:
120: Iterator itr = differences.iterator();
121:
122: while (itr.hasNext()) {
123: Difference difference = (Difference) itr.next();
124:
125: if (difference.getAddedEnd() == Difference.NONE) {
126:
127: // Lines were deleted from source only.
128:
129: _highlightLines(sourceStringList, deletedMarkerStart,
130: deletedMarkerEnd, difference.getDeletedStart(),
131: difference.getDeletedEnd());
132:
133: margin = _calculateMargin(sourceResults, targetResults,
134: difference.getDeletedStart(), difference
135: .getAddedStart(), margin);
136:
137: List changedLines = _addMargins(sourceResults,
138: sourceStringList, difference.getDeletedStart(),
139: margin);
140:
141: _addResults(sourceResults, sourceStringList,
142: changedLines, difference.getDeletedStart(),
143: difference.getDeletedEnd());
144:
145: changedLines = _addMargins(targetResults,
146: targetStringList, difference.getAddedStart(),
147: margin);
148:
149: int deletedLines = difference.getDeletedEnd() + 1
150: - difference.getDeletedStart();
151:
152: for (int i = 0; i < deletedLines; i++) {
153: changedLines.add(CONTEXT_LINE);
154: }
155:
156: DiffResult diffResult = new DiffResult(difference
157: .getDeletedStart(), changedLines);
158:
159: targetResults.add(diffResult);
160: } else if (difference.getDeletedEnd() == Difference.NONE) {
161:
162: // Lines were added to target only.
163:
164: _highlightLines(targetStringList, addedMarkerStart,
165: addedMarkerEnd, difference.getAddedStart(),
166: difference.getAddedEnd());
167:
168: margin = _calculateMargin(sourceResults, targetResults,
169: difference.getDeletedStart(), difference
170: .getAddedStart(), margin);
171:
172: List changedLines = _addMargins(sourceResults,
173: sourceStringList, difference.getDeletedStart(),
174: margin);
175:
176: int addedLines = difference.getAddedEnd() + 1
177: - difference.getAddedStart();
178:
179: for (int i = 0; i < addedLines; i++) {
180: changedLines.add(CONTEXT_LINE);
181: }
182:
183: DiffResult diffResult = new DiffResult(difference
184: .getAddedStart(), changedLines);
185:
186: sourceResults.add(diffResult);
187:
188: changedLines = _addMargins(targetResults,
189: targetStringList, difference.getAddedStart(),
190: margin);
191:
192: _addResults(targetResults, targetStringList,
193: changedLines, difference.getAddedStart(),
194: difference.getAddedEnd());
195: } else {
196:
197: // Lines were deleted from source and added to target at the
198: // same position. It needs to check for characters differences.
199:
200: _checkCharDiffs(sourceResults, targetResults,
201: sourceStringList, targetStringList,
202: addedMarkerStart, addedMarkerEnd,
203: deletedMarkerStart, deletedMarkerEnd,
204: difference, margin);
205: }
206: }
207:
208: return results;
209: }
210:
211: private static List _addMargins(List results, List stringList,
212: int beginPos, int margin) {
213:
214: List changedLines = new ArrayList();
215:
216: if (margin == 0 || beginPos == 0) {
217: return changedLines;
218: }
219:
220: int i = beginPos - margin;
221:
222: for (; i < 0; i++) {
223: changedLines.add(CONTEXT_LINE);
224: }
225:
226: for (; i < beginPos; i++) {
227: if (i < stringList.size()) {
228: changedLines.add(stringList.get(i));
229: }
230: }
231:
232: return changedLines;
233: }
234:
235: private static void _addResults(List results, List stringList,
236: List changedLines, int start, int end) {
237:
238: changedLines.addAll(stringList.subList(start, end + 1));
239:
240: DiffResult diffResult = new DiffResult(start, changedLines);
241:
242: results.add(diffResult);
243: }
244:
245: private static int _calculateMargin(List sourceResults,
246: List targetResults, int sourceBeginPos, int targetBeginPos,
247: int margin) {
248:
249: int sourceMargin = _checkOverlapping(sourceResults,
250: sourceBeginPos, margin);
251: int targetMargin = _checkOverlapping(targetResults,
252: targetBeginPos, margin);
253:
254: if (sourceMargin < targetMargin) {
255: return sourceMargin;
256: }
257:
258: return targetMargin;
259: }
260:
261: private static void _checkCharDiffs(List sourceResults,
262: List targetResults, List sourceStringList,
263: List targetStringList, String addedMarkerStart,
264: String addedMarkerEnd, String deletedMarkerStart,
265: String deletedMarkerEnd, Difference difference, int margin) {
266:
267: boolean aligned = false;
268:
269: int i = difference.getDeletedStart();
270: int j = difference.getAddedStart();
271:
272: // A line with changed characters may have its position shifted some
273: // lines above or below. These for loops will try to align these lines.
274: // While these lines are not aligned, highlight them as either additions
275: // or deletions.
276:
277: for (; i <= difference.getDeletedEnd(); i++) {
278: for (; j <= difference.getAddedEnd(); j++) {
279: if (_lineDiff(sourceResults, targetResults,
280: sourceStringList, targetStringList,
281: addedMarkerStart, addedMarkerEnd,
282: deletedMarkerStart, deletedMarkerEnd, i, j,
283: false)) {
284:
285: aligned = true;
286:
287: break;
288: } else {
289: _highlightLines(targetStringList, addedMarkerStart,
290: addedMarkerEnd, j, j);
291:
292: DiffResult targetResult = new DiffResult(j,
293: targetStringList.subList(j, j + 1));
294:
295: targetResults.add(targetResult);
296:
297: sourceResults.add(new DiffResult(j, CONTEXT_LINE));
298: }
299: }
300:
301: if (aligned) {
302: break;
303: } else {
304: _highlightLines(sourceStringList, deletedMarkerStart,
305: deletedMarkerEnd, i, i);
306:
307: DiffResult sourceResult = new DiffResult(i,
308: sourceStringList.subList(i, i + 1));
309:
310: sourceResults.add(sourceResult);
311:
312: targetResults.add(new DiffResult(i, CONTEXT_LINE));
313: }
314: }
315:
316: i = i + 1;
317: j = j + 1;
318:
319: // Lines are aligned, check for differences of the following lines.
320:
321: for (; i <= difference.getDeletedEnd()
322: && j <= difference.getAddedEnd(); i++, j++) {
323:
324: _lineDiff(sourceResults, targetResults, sourceStringList,
325: targetStringList, addedMarkerStart, addedMarkerEnd,
326: deletedMarkerStart, deletedMarkerEnd, i, j, true);
327: }
328:
329: // After the for loop above, some lines might remained unchecked.
330: // They are considered as deletions or additions.
331:
332: for (; i <= difference.getDeletedEnd(); i++) {
333: _highlightLines(sourceStringList, deletedMarkerStart,
334: deletedMarkerEnd, i, i);
335:
336: DiffResult sourceResult = new DiffResult(i,
337: sourceStringList.subList(i, i + 1));
338:
339: sourceResults.add(sourceResult);
340:
341: targetResults.add(new DiffResult(i, CONTEXT_LINE));
342: }
343:
344: for (; j <= difference.getAddedEnd(); j++) {
345: _highlightLines(targetStringList, addedMarkerStart,
346: addedMarkerEnd, j, j);
347:
348: DiffResult targetResult = new DiffResult(j,
349: targetStringList.subList(j, j + 1));
350:
351: targetResults.add(targetResult);
352:
353: sourceResults.add(new DiffResult(j, CONTEXT_LINE));
354: }
355: }
356:
357: private static int _checkOverlapping(List results, int beginPos,
358: int margin) {
359:
360: if (results.size() == 0 || (beginPos - margin) < 0) {
361: return margin;
362: }
363:
364: DiffResult lastDiff = (DiffResult) results
365: .get(results.size() - 1);
366:
367: if (lastDiff.getChangedLines().size() == 0) {
368: return margin;
369: }
370:
371: int lastChangedLine = (lastDiff.getLineNumber() - 1)
372: + lastDiff.getChangedLines().size();
373:
374: int currentChangedLine = beginPos - margin;
375:
376: if ((lastDiff.getChangedLines().size() == 1)
377: && (lastDiff.getChangedLines().get(0)
378: .equals(CONTEXT_LINE))) {
379:
380: currentChangedLine = currentChangedLine + 1;
381: }
382:
383: if (currentChangedLine < lastChangedLine) {
384: return margin + currentChangedLine - lastChangedLine;
385: }
386:
387: return margin;
388: }
389:
390: private static void _highlightChars(List stringList,
391: String markerStart, String markerEnd, int beginPos,
392: int endPos) {
393:
394: String start = markerStart + stringList.get(beginPos);
395:
396: stringList.set(beginPos, start);
397:
398: String end = stringList.get(endPos) + markerEnd;
399:
400: stringList.set(endPos, end);
401: }
402:
403: private static void _highlightLines(List stringList,
404: String markerStart, String markerEnd, int beginPos,
405: int endPos) {
406:
407: for (int i = beginPos; i <= endPos; i++) {
408: stringList.set(i, markerStart + stringList.get(i)
409: + markerEnd);
410: }
411: }
412:
413: private static boolean _lineDiff(List sourceResults,
414: List targetResults, List sourceStringList,
415: List targetStringList, String addedMarkerStart,
416: String addedMarkerEnd, String deletedMarkerStart,
417: String deletedMarkerEnd, int sourceChangedLine,
418: int targetChangedLine, boolean aligned) {
419:
420: String source = (String) sourceStringList
421: .get(sourceChangedLine);
422: String target = (String) targetStringList
423: .get(targetChangedLine);
424:
425: // Convert the lines to lists where each element are chars of the lines.
426:
427: List sourceList = _toList(source);
428: List targetList = _toList(target);
429:
430: Diff diff = new Diff(sourceList, targetList);
431:
432: List differences = diff.diff();
433:
434: Iterator itr = differences.iterator();
435:
436: int deletedChars = 0;
437: int addedChars = 0;
438:
439: // The following while loop will calculate how many characters of
440: // the source line need to be changed to be equals to the target line.
441:
442: while (itr.hasNext() && !aligned) {
443: Difference difference = (Difference) itr.next();
444:
445: if (difference.getDeletedEnd() != Difference.NONE) {
446: deletedChars = deletedChars
447: + (difference.getDeletedEnd()
448: - difference.getDeletedStart() + 1);
449: }
450:
451: if (difference.getAddedEnd() != Difference.NONE) {
452: addedChars = addedChars
453: + (difference.getAddedEnd()
454: - difference.getAddedStart() + 1);
455: }
456: }
457:
458: // If a lot of changes were needed (more than half of the source line
459: // length), consider this as not aligned yet.
460:
461: if ((deletedChars > (sourceList.size() / 2))
462: || (addedChars > sourceList.size() / 2)) {
463:
464: return false;
465: }
466:
467: itr = differences.iterator();
468:
469: boolean sourceChanged = false;
470: boolean targetChanged = false;
471:
472: // Iterate over Differences between chars of these lines.
473:
474: while (itr.hasNext()) {
475: Difference difference = (Difference) itr.next();
476:
477: if (difference.getAddedEnd() == Difference.NONE) {
478:
479: // Chars were deleted from source only.
480:
481: _highlightChars(sourceList, deletedMarkerStart,
482: deletedMarkerEnd, difference.getDeletedStart(),
483: difference.getDeletedEnd());
484:
485: sourceChanged = true;
486: } else if (difference.getDeletedEnd() == Difference.NONE) {
487:
488: // Chars were added to target only.
489:
490: _highlightChars(targetList, addedMarkerStart,
491: addedMarkerEnd, difference.getAddedStart(),
492: difference.getAddedEnd());
493:
494: targetChanged = true;
495: } else {
496:
497: // Chars were both deleted and added.
498:
499: _highlightChars(sourceList, deletedMarkerStart,
500: deletedMarkerEnd, difference.getDeletedStart(),
501: difference.getDeletedEnd());
502:
503: sourceChanged = true;
504:
505: _highlightChars(targetList, addedMarkerStart,
506: addedMarkerEnd, difference.getAddedStart(),
507: difference.getAddedEnd());
508:
509: targetChanged = true;
510: }
511: }
512:
513: if (sourceChanged) {
514: DiffResult sourceResult = new DiffResult(sourceChangedLine,
515: _toString(sourceList));
516:
517: sourceResults.add(sourceResult);
518:
519: if (!targetChanged) {
520: DiffResult targetResult = new DiffResult(
521: targetChangedLine, target);
522:
523: targetResults.add(targetResult);
524: }
525: }
526:
527: if (targetChanged) {
528: if (!sourceChanged) {
529: DiffResult sourceResult = new DiffResult(
530: sourceChangedLine, source);
531:
532: sourceResults.add(sourceResult);
533: }
534:
535: DiffResult targetResult = new DiffResult(targetChangedLine,
536: _toString(targetList));
537:
538: targetResults.add(targetResult);
539: }
540:
541: return true;
542: }
543:
544: private static List _toList(String line) {
545: String[] stringArray = line.split(StringPool.BLANK);
546:
547: List result = new ArrayList();
548:
549: for (int i = 1; i < stringArray.length; i++) {
550: result.add(stringArray[i]);
551: }
552:
553: return result;
554: }
555:
556: private static String _toString(List line) {
557: StringMaker sm = new StringMaker();
558:
559: Iterator itr = line.iterator();
560:
561: while (itr.hasNext()) {
562: sm.append((String) itr.next());
563: }
564:
565: return sm.toString();
566: }
567:
568: }
|