001: /*
002: * Copyright (C) Jahia Ltd. All rights reserved.
003: *
004: * This software is published under the terms of the Jahia Open Software
005: * License version 1.1, a copy of which has been included with this
006: * distribution in the LICENSE.txt file.
007: */
008: package org.jahia.sqlprofiler;
009:
010: import java.util.ArrayList;
011:
012: /**
013: * <p>Title: StringDiff</p>
014: * <p>Description: This class implements a string comparison algorithm that
015: * allows to know the similirity sequences, etc. The main method is the diff
016: * method which performs the difference algorithm and sets all the other
017: * variables.</p>
018: * <p>Copyright: Copyright (c) 2003</p>
019: * <p>Company: Jahia Ltd</p>
020: * @author Serge Huber
021: * @version 1.0
022: */
023:
024: public class StringDiff {
025:
026: private java.util.ArrayList leftSequences;
027: private java.util.ArrayList rightSequences;
028: private String left;
029: private String right;
030: private int sequenceCount;
031: private int sameCharCount;
032:
033: public StringDiff() {
034: }
035:
036: /**
037: * <p>Title: Sequence position</p>
038: * <p>Description: This class is used to store sequence positions in a
039: * string. A sequence position is compose of two coordinates, a starting
040: * and an ending position for a sequence that is present in both strings.
041: * These coordinates can then be used to build visual differentiators for
042: * the strings, or whatever one can imagine.</p>
043: * <p>Copyright: Copyright (c) 2003</p>
044: * <p>Company: </p>
045: * @author Serge Huber
046: * @version 1.0
047: */
048: public class SequencePos {
049: private int startPos;
050: private int endPos;
051:
052: public SequencePos(int startPos, int endPos) {
053: this .startPos = startPos;
054: this .endPos = endPos;
055: }
056:
057: public int getStartPos() {
058: return this .startPos;
059: }
060:
061: public int getEndPos() {
062: return this .endPos;
063: }
064: }
065:
066: /**
067: * This is the main method of this class, that actually performs the
068: * matching between the two strings. The result of this operation is stored
069: * in class fields such as leftSequences and rightSequences, sameCharCount,
070: * sequenceCount, etc... These results stay available until the next call
071: * to the diff method is done.
072: * @param left the first string to compare
073: * @param right the second string to compare the first one with
074: */
075: public void diff(String left, String right) {
076: int curLeftPos = 0;
077: int curRightPos = 0;
078: this .sameCharCount = 0;
079: this .sequenceCount = 0;
080:
081: this .leftSequences = new ArrayList();
082: this .rightSequences = new ArrayList();
083:
084: this .left = left;
085: this .right = right;
086:
087: // first let's handle all the special cases...
088:
089: if (left == null) {
090: return;
091: }
092: if (right == null) {
093: return;
094: }
095:
096: if (left.length() < 2) {
097: if (right.startsWith(left)) {
098: SequencePos leftSequencePos = new SequencePos(0, left
099: .length());
100: leftSequences.add(leftSequencePos);
101: rightSequences.add(leftSequencePos);
102: return;
103: }
104: }
105: if (right.length() < 2) {
106: if (left.startsWith(right)) {
107: SequencePos rightSequencePos = new SequencePos(0, right
108: .length());
109: leftSequences.add(rightSequencePos);
110: rightSequences.add(rightSequencePos);
111: return;
112: }
113: }
114:
115: // special cases have been handled, now to the general algorithm...
116:
117: // first we follow all the similar sequences if they are similar for
118: // more than one character.
119: if ((((curLeftPos + 1) < left.length())
120: && ((curRightPos + 1) < right.length()) && (left
121: .charAt(curLeftPos + 1) == (right
122: .charAt(curRightPos + 1))))
123: || (((curLeftPos + 1) == left.length()) && ((curRightPos + 1) == right
124: .length()))) {
125:
126: int leftSequenceStartPos = curLeftPos;
127: int rightSequenceStartPos = curRightPos;
128:
129: while ((curLeftPos < left.length())
130: && (curRightPos < right.length())
131: && (left.charAt(curLeftPos) == right
132: .charAt(curRightPos))) {
133: System.out.print(left.charAt(curLeftPos));
134: sameCharCount++;
135: curLeftPos++;
136: curRightPos++;
137: }
138: if (curLeftPos != 0) {
139: sequenceCount++;
140: SequencePos newLeftSequencePos = new SequencePos(
141: leftSequenceStartPos, curLeftPos);
142: SequencePos newRightSequencePos = new SequencePos(
143: rightSequenceStartPos, curRightPos);
144: leftSequences.add(newLeftSequencePos);
145: rightSequences.add(newRightSequencePos);
146: }
147:
148: }
149:
150: while (curLeftPos < left.length()) {
151: // we now have differences in the two strings, lets try to find
152: // another simitude join point.
153: int tempRightPos = curRightPos;
154: while ((tempRightPos < right.length() && (left
155: .charAt(curLeftPos) != right.charAt(tempRightPos)))) {
156: tempRightPos++;
157: }
158: // if we arrived here it means we either found a new similitude or
159: // we looked through the whole right string.
160: if (tempRightPos == right.length()) {
161: // we arrived at the end of the right string without finding
162: // anything, let's move one character in the left string and
163: // continue searching...
164: curLeftPos++;
165: } else {
166: // we found similitudes again, let's eat them up only if we
167: // have more than one similar character.
168: if ((((curLeftPos + 1) < left.length())
169: && ((tempRightPos + 1) < right.length()) && (left
170: .charAt(curLeftPos + 1) == (right
171: .charAt(tempRightPos + 1))))
172: || (((curLeftPos + 1) == left.length()) && ((tempRightPos + 1) == right
173: .length()))) {
174: System.out.print("?");
175: curRightPos = tempRightPos;
176: int leftSequenceStartPos = curLeftPos;
177: int rightSequenceStartPos = curRightPos;
178: while ((curLeftPos < left.length())
179: && (curRightPos < right.length())
180: && (left.charAt(curLeftPos) == right
181: .charAt(curRightPos))) {
182: System.out.print(left.charAt(curLeftPos));
183: sameCharCount++;
184: curLeftPos++;
185: curRightPos++;
186: }
187: SequencePos newLeftSequencePos = new SequencePos(
188: leftSequenceStartPos, curLeftPos);
189: SequencePos newRightSequencePos = new SequencePos(
190: rightSequenceStartPos, curRightPos);
191: leftSequences.add(newLeftSequencePos);
192: rightSequences.add(newRightSequencePos);
193: sequenceCount++;
194: } else {
195: curLeftPos++;
196: }
197: }
198: }
199:
200: // ok we finished looking through the left string, but the right
201: // string might be longer, let's make sure we add the characters to the
202: // diff list.
203:
204: }
205:
206: /**
207: * Returns the array of sequence positions of similar chars for the left
208: * string
209: * @return an ArrayList of SequencePos objects that contain the position of
210: * the sequences that were found in both strings.
211: */
212: public java.util.ArrayList getLeftSequences() {
213: return leftSequences;
214: }
215:
216: /**
217: * Returns the array of sequence positions of similar chars for the right
218: * string
219: * @return an ArrayList of SequencePos objects that contain the position of
220: * the sequences that were found in both strings.
221: */
222: public java.util.ArrayList getRightSequences() {
223: return rightSequences;
224: }
225:
226: /**
227: * @return the last used left string
228: */
229: public String getLeft() {
230: return left;
231: }
232:
233: /**
234: * @return the last used right string
235: */
236: public String getRight() {
237: return right;
238: }
239:
240: /**
241: * @return the number of sequences that are present in both strings
242: */
243: public int getSequenceCount() {
244: return sequenceCount;
245: }
246:
247: /**
248: * @return the full count of characters that are in all the sequences that
249: * are the same between both strings
250: */
251: public int getSameCharCount() {
252: return sameCharCount;
253: }
254:
255: private static void displayResults(StringDiff stringDiff) {
256: System.out.println("\nsameCharCount="
257: + stringDiff.getSameCharCount() + " sequenceCount="
258: + stringDiff.getSequenceCount());
259: for (int i = 0; i < stringDiff.getLeftSequences().size(); i++) {
260: SequencePos leftSequencePos = (SequencePos) stringDiff
261: .getLeftSequences().get(i);
262: SequencePos rightSequencePos = (SequencePos) stringDiff
263: .getRightSequences().get(i);
264: System.out.println("sequence "
265: + i
266: + " : left=["
267: + leftSequencePos.getStartPos()
268: + ","
269: + leftSequencePos.getEndPos()
270: + "]=["
271: + stringDiff.getLeft().substring(
272: leftSequencePos.getStartPos(),
273: leftSequencePos.getEndPos())
274: + "] right=["
275: + rightSequencePos.getStartPos()
276: + ","
277: + rightSequencePos.getEndPos()
278: + "]=["
279: + stringDiff.getRight().substring(
280: rightSequencePos.getStartPos(),
281: rightSequencePos.getEndPos()) + "]");
282: }
283:
284: }
285:
286: /**
287: * Runs a few tests cases to make sure the diff algorithm is correct.
288: * @param args
289: */
290: public static void main(String[] args) {
291: StringDiff stringDiff = new StringDiff();
292:
293: // test cases.
294: stringDiff
295: .diff(
296: "select * from jahia_fields_data where iasdfd_jahia_fields_data=10 order by id_jahia_fields_datba",
297: "select * from jahia_fkjahdflields_data where id_jahiadsfa_fields_data=23 order by id_jahia_fields_data");
298: displayResults(stringDiff);
299: stringDiff
300: .diff(
301: "select * from jahia_fields_data where id_jahia_fields_data=10 order by",
302: "select * from jahia_fields_data where id_jahia_fields_data=10 order by id_jahia_fields_data");
303: displayResults(stringDiff);
304: stringDiff.diff("a", "a");
305: displayResults(stringDiff);
306: stringDiff.diff("", "");
307: displayResults(stringDiff);
308: stringDiff.diff("", "a");
309: displayResults(stringDiff);
310: stringDiff.diff("a", "");
311: displayResults(stringDiff);
312: stringDiff.diff("aa", "a");
313: displayResults(stringDiff);
314: stringDiff.diff("a", "aa");
315: displayResults(stringDiff);
316: stringDiff.diff("aa", "aa");
317: displayResults(stringDiff);
318: stringDiff.diff(null, null);
319: displayResults(stringDiff);
320: stringDiff.diff("a", null);
321: displayResults(stringDiff);
322: stringDiff.diff(null, "a");
323: displayResults(stringDiff);
324:
325: }
326:
327: }
|