001: /*
002: * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
003: *
004: * This file is part of Resin(R) Open Source
005: *
006: * Each copy or derived work must preserve the copyright notice and this
007: * notice unmodified.
008: *
009: * Resin Open Source is free software; you can redistribute it and/or modify
010: * it under the terms of the GNU General Public License as published by
011: * the Free Software Foundation; either version 2 of the License, or
012: * (at your option) any later version.
013: *
014: * Resin Open Source is distributed in the hope that it will be useful,
015: * but WITHOUT ANY WARRANTY; without even the implied warranty of
016: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
017: * of NON-INFRINGEMENT. See the GNU General Public License for more
018: * details.
019: *
020: * You should have received a copy of the GNU General Public License
021: * along with Resin Open Source; if not, write to the
022: * Free SoftwareFoundation, Inc.
023: * 59 Temple Place, Suite 330
024: * Boston, MA 02111-1307 USA
025: *
026: * @author Scott Ferguson
027: */
028:
029: package com.caucho.util;
030:
031: import com.caucho.vfs.*;
032:
033: import java.io.*;
034: import java.util.*;
035:
036: public class BinaryDiff {
037: int _oldLength;
038: ArrayList<TempBuffer> _oldBuffers;
039: byte[][] _oldBytes;
040:
041: int _chunkSize = 1;
042: int _chunkCount;
043:
044: int[] _suffixArray;
045:
046: int[] _charMin;
047: int[] _charMax;
048:
049: public void delta(OutputStream out, InputStream oldFile,
050: InputStream newFile) throws IOException {
051: readBuffers(oldFile);
052:
053: processOrigFile();
054:
055: processNewFile(out, newFile);
056: }
057:
058: protected void add(OutputStream out, byte[] buffer, int offset,
059: int length) throws IOException {
060: System.out.println("ADD(" + length + "): '"
061: + new String(buffer, offset, length) + "'");
062: }
063:
064: protected void copy(OutputStream out, int offset, int length)
065: throws IOException {
066: System.out.println("COPY(" + offset + "," + length + ")");
067: }
068:
069: private void readBuffers(InputStream oldFile) throws IOException {
070: _oldBuffers = new ArrayList<TempBuffer>();
071: _oldLength = 0;
072:
073: while (true) {
074: TempBuffer buf = TempBuffer.allocate();
075: byte[] buffer = buf.getBuffer();
076:
077: int sublen = readAll(oldFile, buffer);
078:
079: if (sublen < 0) {
080: TempBuffer.free(buf);
081: break;
082: }
083:
084: _oldLength += sublen;
085: _oldBuffers.add(buf);
086:
087: if (sublen < buffer.length)
088: break;
089: }
090:
091: _oldBytes = new byte[_oldBuffers.size()][];
092: for (int i = 0; i < _oldBytes.length; i++) {
093: _oldBytes[i] = _oldBuffers.get(i).getBuffer();
094: }
095: }
096:
097: private void processOrigFile() {
098: _chunkCount = _oldLength / _chunkSize;
099:
100: _suffixArray = new int[_chunkCount];
101:
102: int chunkSize = _chunkSize;
103: int chunkCount = _chunkCount;
104: int[] suffixArray = _suffixArray;
105:
106: for (int i = 0; i < chunkCount; i++) {
107: suffixArray[i] = i * chunkSize;
108: }
109:
110: byte[][] data = _oldBytes;
111:
112: sort(suffixArray, _oldBytes, TempBuffer.SIZE, _chunkSize);
113:
114: int[] charMin = _charMin = new int[256];
115: int[] charMax = _charMax = new int[256];
116:
117: for (int i = 0; i < suffixArray.length; i++) {
118: int offset = suffixArray[i];
119:
120: int ch = data[offset / TempBuffer.SIZE][offset
121: % TempBuffer.SIZE] & 0xff;
122:
123: if (charMin[ch] == 0)
124: charMin[ch] = i;
125:
126: charMax[ch] = i + 1;
127: }
128: }
129:
130: private static void sort(int[] suffixArray, byte[][] data,
131: int bufLength, int chunkSize) {
132: int length = suffixArray.length;
133: int dataLength = length * chunkSize;
134:
135: for (int i = 0; i < suffixArray.length - 1; i++) {
136: for (int j = suffixArray.length - 2; i <= j; j--) {
137: int a = suffixArray[j];
138: int b = suffixArray[j + 1];
139:
140: if (suffixCompareTo(a, b, data, dataLength, bufLength) > 0) {
141: int temp = suffixArray[j];
142: suffixArray[j] = suffixArray[j + 1];
143: suffixArray[j + 1] = temp;
144: }
145: }
146: }
147: }
148:
149: private static int suffixCompareTo(int a, int b, byte[][] data,
150: int length, int bufLength) {
151: int sublen = length - a;
152:
153: if (length - b < sublen)
154: sublen = length - b;
155:
156: for (int i = 0; i < sublen; i++) {
157: int a1 = data[a / bufLength][a % bufLength];
158: int b1 = data[b / bufLength][b % bufLength];
159:
160: if (a1 < b1)
161: return -1;
162: else if (b1 < a1)
163: return 1;
164:
165: a++;
166: b++;
167: }
168:
169: return 0;
170: }
171:
172: private void processNewFile(OutputStream out, InputStream newIn)
173: throws IOException {
174: TempBuffer tempBuf = TempBuffer.allocate();
175: byte[] buffer = tempBuf.getBuffer();
176: ;
177:
178: int length = readAll(newIn, buffer);
179:
180: int prevOffset = 0;
181: int offset = 0;
182:
183: int[] charMin = _charMin;
184: int[] charMax = _charMax;
185: int minLength = 8;
186:
187: byte[][] data = _oldBytes;
188: int[] suffixArray = _suffixArray;
189:
190: loop: for (; offset + minLength < length; offset += 1) {
191: int ch = buffer[offset] & 0xff;
192:
193: int min = charMin[ch];
194: int max = charMax[ch];
195:
196: byte ch2 = buffer[offset + 1];
197:
198: for (int i = min; i < max; i++) {
199: int suffixOffset = suffixArray[i];
200: int dataOffset = suffixOffset + 1;
201:
202: byte d1 = data[suffixOffset / TempBuffer.SIZE][suffixOffset
203: % TempBuffer.SIZE];
204: byte d2 = data[dataOffset / TempBuffer.SIZE][dataOffset
205: % TempBuffer.SIZE];
206: if (d2 == ch2) {
207: dataOffset++;
208: int bufOffset = offset + 2;
209:
210: match_loop: while (bufOffset < length
211: && dataOffset < _oldLength) {
212: byte bufCh = buffer[bufOffset];
213: byte dataCh = data[dataOffset / TempBuffer.SIZE][dataOffset
214: % TempBuffer.SIZE];
215:
216: if (bufCh == dataCh) {
217: bufOffset += 1;
218: dataOffset += 1;
219: } else if (bufCh < dataCh) {
220: if (dataOffset - suffixOffset >= minLength) {
221: if (prevOffset < offset)
222: add(out, buffer, prevOffset, offset
223: - prevOffset);
224:
225: offset = bufOffset;
226: copy(out, suffixOffset, dataOffset
227: - suffixOffset);
228: prevOffset = offset;
229: continue loop;
230: } else
231: break;
232: } else {
233: for (i++; i < max; i++) {
234: int newSuffix = suffixArray[i];
235: int newOffset = newSuffix
236: + (dataOffset - suffixOffset);
237:
238: boolean isMatch = true;
239: for (int j = newOffset; newSuffix <= j; j--) {
240: dataCh = data[j / TempBuffer.SIZE][j
241: % TempBuffer.SIZE];
242: bufCh = buffer[offset
243: + (j - newSuffix)];
244:
245: if (bufCh != dataCh) {
246: isMatch = false;
247: break;
248: }
249: }
250:
251: if (isMatch) {
252: dataOffset = newOffset;
253: suffixOffset = newSuffix;
254: continue match_loop;
255: }
256: }
257:
258: if (dataOffset - suffixOffset >= minLength) {
259: if (prevOffset < offset)
260: add(out, buffer, prevOffset, offset
261: - prevOffset);
262:
263: offset = bufOffset;
264: prevOffset = offset;
265: copy(out, suffixOffset, dataOffset
266: - suffixOffset);
267: continue loop;
268: } else
269: break;
270: }
271: }
272:
273: if (dataOffset - suffixOffset >= minLength) {
274: if (prevOffset < offset)
275: add(out, buffer, prevOffset, offset
276: - prevOffset);
277:
278: offset = bufOffset;
279: prevOffset = offset;
280: copy(out, suffixOffset, dataOffset
281: - suffixOffset);
282: }
283:
284: break;
285: } else if (ch2 < d2)
286: break;
287: }
288: }
289:
290: if (prevOffset < length)
291: add(out, buffer, prevOffset, length - prevOffset);
292:
293: TempBuffer.free(tempBuf);
294: }
295:
296: private int readAll(InputStream is, byte[] buffer)
297: throws IOException {
298: int offset = 0;
299:
300: while (offset < buffer.length) {
301: int sublen = buffer.length - offset;
302:
303: sublen = is.read(buffer, offset, sublen);
304:
305: if (sublen < 0)
306: return offset > 0 ? offset : -1;
307:
308: offset += sublen;
309: }
310:
311: return buffer.length;
312: }
313: }
|