001: package org.wings.util;
002:
003: import javax.swing.event.DocumentEvent;
004: import javax.swing.text.Document;
005: import javax.swing.text.Element;
006: import java.util.ArrayList;
007: import java.util.Collections;
008: import java.util.HashMap;
009: import java.util.Iterator;
010: import java.util.List;
011: import java.util.Map;
012: import java.util.Vector;
013:
014: /**
015: * This class takes two Strings an generates the shortes list of necessarry
016: * change operations to transform the source string into the target string.
017: * <p>For more information about the used algorithm refer to:
018: * <ul>
019: * <li>http://www.merriampark.com/ld.htm</li>
020: * <li>http://www.msci.memphis.edu/~giri/compbio/f99/ningxu/NOTE10.html</li>
021: * <li>http://www.itu.dk/courses/AVA/E2005/StringEditDistance.pdf</li>
022: * </ul>
023: * <p/>Original source extracted from Glazed Lists (http://publicobject.com/glazedlists/)
024: *
025: * Implementation of Eugene W. Myer's paper, "An O(ND) Difference Algorithm and Its
026: * Variations", the same algorithm found in GNU diff.
027: * <p/>
028: * <p>Note that this is a cleanroom implementation of this popular algorithm that is
029: * particularly suited for the Java programmer. The variable names are descriptive and the
030: * approach is more object-oriented than Myer's sample algorithm.
031: *
032: * @author <a href="mailto:jesse@odel.on.ca">Jesse Wilson</a>
033: */
034: public final class EditTranscriptGenerator {
035: /**
036: * The exact calculation of document events is very memory and cpu intesive.
037: * For texts beeing longer than this limit we use a dumb approximation.
038: * (OL) turning this off until we actually can do something useful with it.
039: */
040: private static final int MAX_LENGTH_FOR_TRANSCRIPT_GENERATION = 0;
041:
042: /**
043: * Generates the shorted edit transcript needed to transform the source String into the target String.
044: * The needed changes are noted down as {@link DocumentEvent}s.
045: *
046: * @return A list of {@link DocumentEvent}s either of type {@link javax.swing.event.DocumentEvent.EventType#INSERT}
047: * or {@link javax.swing.event.DocumentEvent.EventType#REMOVE} with correct sourceIndexes and lengths.
048: */
049: public static List generateEvents(String source, String target) {
050: /* turning off complex handling until we need it */
051: // if (((source == null) || source.length() < MAX_LENGTH_FOR_TRANSCRIPT_GENERATION) &&
052: // ((target == null) || target.length() < MAX_LENGTH_FOR_TRANSCRIPT_GENERATION))
053: // return calculateEventsByStringDistance(source, target);
054: // else
055: return calculateEventsByDumbApproximation(source, target);
056: }
057:
058: /**
059: * Generate the document events by using the algorithm.
060: * @return A list of {@link DocumentEvent}s either of type {@link javax.swing.event.DocumentEvent.EventType#INSERT}
061: * or {@link javax.swing.event.DocumentEvent.EventType#REMOVE} with correct sourceIndexes and lengths.
062: */
063: private static List calculateEventsByStringDistance(String source,
064: String target) {
065: final List editScript = shortestEditScript(new StringDiffMatcher(
066: source, target));
067:
068: final Vector actions = new Vector();
069:
070: // target is x axis. Changes in X mean advance target index
071: // source is y axis. Changes to y mean advance source index
072: int targetIndex = 0;
073: int sourceIndex = 0;
074:
075: // walk through points, applying changes as they arrive
076: Point previousPoint = null;
077: for (Iterator i = editScript.iterator(); i.hasNext();) {
078: Point currentPoint = (Point) i.next();
079:
080: // skip the first point
081: if (previousPoint == null) {
082: previousPoint = currentPoint;
083: continue;
084: }
085:
086: // figure out what the relationship in the values is
087: int deltaX = currentPoint.getX() - previousPoint.getX();
088: int deltaY = currentPoint.getY() - previousPoint.getY();
089:
090: if (deltaX == deltaY) {
091: // handle an update
092: targetIndex += deltaX;
093: sourceIndex += deltaY;
094: } else if (deltaX == 1 && deltaY == 0) {
095: // handle a remove
096: addOrUpdateChangeEvent(sourceIndex, actions,
097: DocumentEvent.EventType.REMOVE);
098: } else if (deltaX == 0 && deltaY == 1) {
099: // handle an insert
100: addOrUpdateChangeEvent(sourceIndex, actions,
101: DocumentEvent.EventType.INSERT);
102: sourceIndex++;
103: targetIndex++;
104: } else {
105: // should never be reached
106: throw new IllegalStateException();
107: }
108:
109: // the next previous point is this current point
110: previousPoint = currentPoint;
111: }
112: return actions;
113: }
114:
115: /**
116: * Insert next atomic change (1 character) into event queue. Either consolidate as continuation with
117: * an existing event in the queue or create a new one.
118: *
119: * @param sourceIndex character index in source string
120: * @param actions Current queue of document events
121: * @param eventType what happens at the source index?
122: */
123: private static void addOrUpdateChangeEvent(int sourceIndex,
124: Vector actions, DocumentEvent.EventType eventType) {
125: int offset = sourceIndex;
126: int length = 1;
127: final Document sourceDocument = null; // dummy - we do not have a reference.
128:
129: // First change is always a new event
130: if (actions.size() == 0) {
131: SimpleDocumentEvent newEvent = new SimpleDocumentEvent(
132: offset, length, sourceDocument, eventType);
133: actions.add(newEvent);
134: } else {
135: // Is this a contiunuation of the last event type?
136: if (((DocumentEvent) actions.lastElement()).getType()
137: .equals(eventType)) {
138:
139: SimpleDocumentEvent docEvent = (SimpleDocumentEvent) actions
140: .lastElement();
141: offset = docEvent.getOffset();
142: length = docEvent.getLength();
143: // Continuation break for an insert? New event
144: if ((sourceIndex != (offset + length))
145: && eventType
146: .equals(DocumentEvent.EventType.INSERT)) {
147: offset = sourceIndex;
148: length = 1;
149: DocumentEvent newEvent = new SimpleDocumentEvent(
150: offset, length, sourceDocument, eventType);
151: actions.add(newEvent);
152: }
153: // New remove?
154: else if ((sourceIndex != offset)
155: && eventType
156: .equals(DocumentEvent.EventType.REMOVE)) {
157: offset = sourceIndex;
158: length = 1;
159: DocumentEvent newEvent = new SimpleDocumentEvent(
160: offset, length, sourceDocument, eventType);
161: actions.add(newEvent);
162: }
163: // It contiunation? So just consolidate with existing event by increasing lenght by one
164: else {
165: docEvent.increaseLength();
166: }
167: }
168: // Anderes Event folgt
169: else {
170: DocumentEvent newEvent = new SimpleDocumentEvent(
171: offset, length, sourceDocument, eventType);
172: actions.add(newEvent);
173: }
174: }
175:
176: }
177:
178: /**
179: * Our simple implementation of {@link DocumentEvent}
180: */
181: private static class SimpleDocumentEvent implements DocumentEvent {
182: private int offset;
183: private int length;
184: private Document document;
185: private EventType type;
186:
187: public SimpleDocumentEvent(int offset, int length,
188: Document document, EventType type) {
189: this .offset = offset;
190: this .length = length;
191: this .document = document;
192: this .type = type;
193: }
194:
195: public ElementChange getChange(Element elem) {
196: return null;
197: }
198:
199: public int getOffset() {
200: return offset;
201: }
202:
203: public int getLength() {
204: return length;
205: }
206:
207: public Document getDocument() {
208: return document;
209: }
210:
211: public EventType getType() {
212: return type;
213: }
214:
215: void increaseLength() {
216: length += 1;
217: }
218:
219: public String toString() {
220: return "SimpleDocumentEvent{" + "offset=" + offset
221: + ", length=" + length + ", document=" + document
222: + ", type=" + type + "}";
223: }
224: }
225:
226: /**
227: * Calculate the length of the longest common subsequence for the specified input.
228: */
229: private final static List shortestEditScript(StringDiffMatcher input) {
230: // calculate limits based on the size of the input matcher
231: int N = input.getAlphaLength();
232: int M = input.getBetaLength();
233: Point maxPoint = new Point(N, M);
234: int maxSteps = N + M;
235:
236: // use previous round furthest reaching D-path to determine the
237: // new furthest reaching (D+1)-path
238: Map furthestReachingPoints = new HashMap();
239:
240: // walk through in stages, each stage adding one non-diagonal.
241: // D == count of non-diagonals in current stage
242: for (int D = 0; D <= maxSteps; D++) {
243:
244: // exploit diagonals in order to save storing both X and Y
245: // diagonal k means every point on k, (k = x - y)
246: for (int k = -D; k <= D; k += 2) {
247: // the furthest reaching D-path on the left and right diagonals
248: // either of these may be null. The terms 'below left' and 'above right'
249: // refer to the diagonals that the points are on and may not be
250: // representative of the point positions
251: Point belowLeft = (Point) furthestReachingPoints
252: .get(new Integer(k - 1));
253: Point aboveRight = (Point) furthestReachingPoints
254: .get(new Integer(k + 1));
255:
256: // the new furthest reaching point to create
257: Point point;
258:
259: // first round: we have matched zero in word X
260: if (furthestReachingPoints.isEmpty()) {
261: point = new Point(0, 0);
262:
263: // if this is the leftmost diagonal, or the left edge is further
264: // than the right edge, our new X is that value and our y is one greater
265: // (shift verically by one)
266: } else if (k == -D
267: || (k != D && belowLeft.getX() < aboveRight
268: .getX())) {
269: point = aboveRight.createDeltaPoint(0, 1);
270:
271: // if the right edge is further than the left edge, use that x
272: // and keep y the same (shift horizontally by one)
273: } else {
274: point = belowLeft.createDeltaPoint(1, 0);
275: }
276:
277: // match as much diagonal as possible from the previous endpoint
278: while (point.isLessThan(maxPoint)
279: && input.matchPair(point.getX(), point.getY())) {
280: point = point.incrementDiagonally();
281: }
282:
283: // save this furthest reaching path
284: furthestReachingPoints.put(new Integer(k), point);
285:
286: // if we're past the end, we have a solution!
287: if (point.isEqualToOrGreaterThan(maxPoint)) {
288: return point.trail();
289: }
290: }
291: }
292: // no solution was found
293: throw new IllegalStateException();
294: }
295:
296: /**
297: * Generate a very simple list of document events to avoid cost intensive distance calculation:
298: * Remove all existing characters, add all new characters.
299: * @return A list of {@link DocumentEvent}s either of type {@link javax.swing.event.DocumentEvent.EventType#INSERT}
300: * or {@link javax.swing.event.DocumentEvent.EventType#REMOVE} with correct sourceIndexes and lengths.
301: */
302: private static List calculateEventsByDumbApproximation(
303: String source, String target) {
304: List events = new ArrayList(2);
305: if (source != null)
306: events.add(new SimpleDocumentEvent(0, source.length(),
307: null, DocumentEvent.EventType.REMOVE));
308: if (target != null)
309: events.add(new SimpleDocumentEvent(0, target.length(),
310: null, DocumentEvent.EventType.INSERT));
311:
312: return events;
313: }
314:
315: /**
316: * Models an X and Y point in a path. The top-left corner of the axis is the point (0,
317: * 0). This is the lowest point in both the x and y dimensions. Negative points are
318: * not allowed.
319: */
320: private final static class Point {
321: private int x = 0;
322: private int y = 0;
323: private Point predecessor = null;
324:
325: /**
326: * Create a new point with the specified coordinates and no predecessor.
327: */
328: public Point(int x, int y) {
329: this .x = x;
330: this .y = y;
331: }
332:
333: /**
334: * Creates a new point from this point by shifting its values as specified. The
335: * new point keeps a reference to its source in order to create a path later.
336: */
337: public Point createDeltaPoint(int deltaX, int deltaY) {
338: Point result = new Point(x + deltaX, y + deltaY);
339: result.predecessor = this ;
340: return result;
341: }
342:
343: /**
344: * Shifts <code>x</code> and <code>y</code> values down and to the
345: * right by one.
346: */
347: public Point incrementDiagonally() {
348: Point result = createDeltaPoint(1, 1);
349:
350: // shortcut to the predecessor (to save memory!)
351: if (predecessor != null) {
352: int deltaX = result.x - predecessor.x;
353: int deltaY = result.y - predecessor.y;
354:
355: if (deltaX == deltaY) {
356: result.predecessor = this .predecessor;
357: }
358: }
359:
360: return result;
361: }
362:
363: public int getX() {
364: return x;
365: }
366:
367: public int getY() {
368: return y;
369: }
370:
371: public boolean isLessThan(Point other) {
372: return x < other.x && y < other.y;
373: }
374:
375: public boolean isEqualToOrGreaterThan(Point other) {
376: return x >= other.x && y >= other.y;
377: }
378:
379: public String toString() {
380: return "(" + x + "," + y + ")";
381: }
382:
383: /**
384: * Get a trail from the original point to this point. This is a list of all points
385: * created via a series of {@link #createDeltaPoint(int,int)} calls.
386: */
387: public List trail() {
388: List reverse = new ArrayList();
389: Point current = this ;
390: while (current != null) {
391: reverse.add(current);
392: current = current.predecessor;
393: }
394: Collections.reverse(reverse);
395: return reverse;
396: }
397: }
398:
399: /**
400: * Determines if the values at the specified points match or not.
401: *
402: * <p>This class specifies that each element should specify a character value.
403: * This is for testing and debugging only and it is safe for implementing
404: * classes to throw {@link UnsupportedOperationException} for both the
405: * {@link #alphaAt(int)} and {@link #betaAt(int)} methods.
406: */
407: private final static class StringDiffMatcher {
408: private String alpha;
409: private String beta;
410:
411: public StringDiffMatcher(String alpha, String beta) {
412: this .alpha = alpha;
413: this .beta = beta;
414: }
415:
416: public int getAlphaLength() {
417: return alpha.length();
418: }
419:
420: public char alphaAt(int index) {
421: return alpha.charAt(index);
422: }
423:
424: public char betaAt(int index) {
425: return beta.charAt(index);
426: }
427:
428: public int getBetaLength() {
429: return beta.length();
430: }
431:
432: public boolean matchPair(int alphaIndex, int betaIndex) {
433: return alpha.charAt(alphaIndex) == beta.charAt(betaIndex);
434: }
435: }
436: }
|