001: /*
002: *******************************************************************************
003: * Copyright (C) 1996-2006, International Business Machines Corporation and *
004: * others. All Rights Reserved. *
005: *******************************************************************************
006: */
007: package com.ibm.icu.dev.test.compression;
008:
009: import com.ibm.icu.text.UnicodeCompressor;
010: import com.ibm.icu.text.UnicodeDecompressor;
011: import com.ibm.icu.dev.test.TestFmwk;
012:
013: public class ExhaustiveTest extends TestFmwk {
014: public static void main(String args[]) throws Exception {
015: new ExhaustiveTest().run(args);
016: }
017:
018: /** Test simple compress/decompress API, returning # of errors */
019: public void testSimple() throws Exception {
020: for (int i = 0; i < fTestCases.length; i++) {
021: simpleTest(fTestCases[i]);
022: }
023: }
024:
025: private void simpleTest(String s) throws Exception {
026: byte[] compressed = UnicodeCompressor.compress(s);
027: String res = UnicodeDecompressor.decompress(compressed);
028: if (logDiffs(s.toCharArray(), s.length(), res.toCharArray(),
029: res.length()) == false) {
030: logln(s.length() + " chars ===> " + compressed.length
031: + " bytes ===> " + res.length() + " chars");
032: } else {
033: logln("Compressed:");
034: printBytes(compressed, compressed.length);
035: errln("testSimple did not compress correctly");
036: }
037: }
038:
039: /** Test iterative compress/decompress API, returning # of errors */
040: public void testIterative() throws Exception {
041: for (int i = 0; i < fTestCases.length; i++) {
042: myTest(fTestCases[i].toCharArray(), fTestCases[i].length());
043: }
044: }
045:
046: private void myTest(char[] chars, int len) {
047: UnicodeCompressor myCompressor = new UnicodeCompressor();
048: UnicodeDecompressor myDecompressor = new UnicodeDecompressor();
049:
050: // variables for my compressor
051: int myByteCount = 0;
052: int myCharCount = 0;
053: int myCompressedSize = Math.max(512, 3 * len);
054: byte[] myCompressed = new byte[myCompressedSize];
055: int myDecompressedSize = Math.max(2, 2 * len);
056: char[] myDecompressed = new char[myDecompressedSize];
057: int[] unicharsRead = new int[1];
058: int[] bytesRead = new int[1];
059:
060: myByteCount = myCompressor.compress(chars, 0, len,
061: unicharsRead, myCompressed, 0, myCompressedSize);
062:
063: myCharCount = myDecompressor.decompress(myCompressed, 0,
064: myByteCount, bytesRead, myDecompressed, 0,
065: myDecompressedSize);
066:
067: if (logDiffs(chars, len, myDecompressed, myCharCount) == false) {
068: logln(len + " chars ===> " + myByteCount + " bytes ===> "
069: + myCharCount + " chars");
070: } else {
071: logln("Compressed:");
072: printBytes(myCompressed, myByteCount);
073: errln("Iterative test failed");
074: }
075: }
076:
077: /** Test iterative compress/decompress API */
078: public void testMultipass() throws Exception {
079: for (int i = 0; i < fTestCases.length; i++) {
080: myMultipassTest(fTestCases[i].toCharArray(), fTestCases[i]
081: .length());
082: }
083: }
084:
085: private void myMultipassTest(char[] chars, int len)
086: throws Exception {
087: UnicodeCompressor myCompressor = new UnicodeCompressor();
088: UnicodeDecompressor myDecompressor = new UnicodeDecompressor();
089:
090: // variables for my compressor
091:
092: // for looping
093: int byteBufferSize = 4;//Math.max(4, len / 4);
094: byte[] byteBuffer = new byte[byteBufferSize];
095: // real target
096: int compressedSize = Math.max(512, 3 * len);
097: byte[] compressed = new byte[compressedSize];
098:
099: // for looping
100: int unicharBufferSize = 2;//byteBufferSize;
101: char[] unicharBuffer = new char[unicharBufferSize];
102: // real target
103: int decompressedSize = Math.max(2, 2 * len);
104: char[] decompressed = new char[decompressedSize];
105:
106: int bytesWritten = 0;
107: int unicharsWritten = 0;
108:
109: int[] unicharsRead = new int[1];
110: int[] bytesRead = new int[1];
111:
112: int totalCharsCompressed = 0;
113: int totalBytesWritten = 0;
114:
115: int totalBytesDecompressed = 0;
116: int totalCharsWritten = 0;
117:
118: // not used boolean err = false;
119:
120: // perform the compression in a loop
121: do {
122:
123: // do the compression
124: bytesWritten = myCompressor.compress(chars,
125: totalCharsCompressed, len, unicharsRead,
126: byteBuffer, 0, byteBufferSize);
127:
128: // copy the current set of bytes into the target buffer
129: System.arraycopy(byteBuffer, 0, compressed,
130: totalBytesWritten, bytesWritten);
131:
132: // update the no. of characters compressed
133: totalCharsCompressed += unicharsRead[0];
134:
135: // update the no. of bytes written
136: totalBytesWritten += bytesWritten;
137:
138: /*System.out.logln("Compression pass complete. Compressed "
139: + unicharsRead[0] + " chars into "
140: + bytesWritten + " bytes.");*/
141: } while (totalCharsCompressed < len);
142:
143: if (totalCharsCompressed != len) {
144: errln("ERROR: Number of characters compressed("
145: + totalCharsCompressed + ") != len(" + len + ")");
146: } else {
147: logln("MP: " + len + " chars ===> " + totalBytesWritten
148: + " bytes.");
149: }
150:
151: // perform the decompression in a loop
152: do {
153:
154: // do the decompression
155: unicharsWritten = myDecompressor.decompress(compressed,
156: totalBytesDecompressed, totalBytesWritten,
157: bytesRead, unicharBuffer, 0, unicharBufferSize);
158:
159: // copy the current set of chars into the target buffer
160: System.arraycopy(unicharBuffer, 0, decompressed,
161: totalCharsWritten, unicharsWritten);
162:
163: // update the no. of bytes decompressed
164: totalBytesDecompressed += bytesRead[0];
165:
166: // update the no. of chars written
167: totalCharsWritten += unicharsWritten;
168:
169: /*System.out.logln("Decompression pass complete. Decompressed "
170: + bytesRead[0] + " bytes into "
171: + unicharsWritten + " chars.");*/
172: } while (totalBytesDecompressed < totalBytesWritten);
173:
174: if (totalBytesDecompressed != totalBytesWritten) {
175: errln("ERROR: Number of bytes decompressed("
176: + totalBytesDecompressed
177: + ") != totalBytesWritten(" + totalBytesWritten
178: + ")");
179: } else {
180: logln("MP: " + totalBytesWritten + " bytes ===> "
181: + totalCharsWritten + " chars.");
182: }
183:
184: if (logDiffs(chars, len, decompressed, totalCharsWritten)) {
185: errln("ERROR: buffer contents incorrect");
186: }
187: }
188:
189: /** Print differences between two character buffers */
190: private boolean logDiffs(char[] s1, int s1len, char[] s2, int s2len) {
191: boolean result = false;
192:
193: if (s1len != s2len) {
194: logln("====================");
195: logln("Length doesn't match: expected " + s1len + ", got "
196: + s2len);
197: logln("Expected:");
198: printChars(s1, s1len);
199: logln("Got:");
200: printChars(s2, s2len);
201: result = true;
202: }
203:
204: int len = Math.min(s1len, s2len);
205: for (int i = 0; i < len; ++i) {
206: if (s1[i] != s2[i]) {
207: if (result == false) {
208: logln("====================");
209: }
210: logln("First difference at char " + i);
211: logln("Exp. char: " + Integer.toHexString(s1[i]));
212: logln("Got char : " + Integer.toHexString(s2[i]));
213: logln("Expected:");
214: printChars(s1, s1len);
215: logln("Got:");
216: printChars(s2, s2len);
217: result = true;
218: break;
219: }
220: }
221:
222: return result;
223: }
224:
225: // generate a string of characters, with simulated runs of characters
226: /*private static char[] randomChars(int len, Random random) {
227: char[] result = new char [len];
228: int runLen = 0;
229: int used = 0;
230:
231: while(used < len) {
232: runLen = (int) (30 * random.nextDouble());
233: if(used + runLen >= len) {
234: runLen = len - used;
235: }
236: randomRun(result, used, runLen, random);
237: used += runLen;
238: }
239:
240: return result;
241: }*/
242:
243: // generate a run of characters in a "window"
244: /*private static void randomRun(char[] target, int pos, int len, Random random) {
245: int offset = (int) (0xFFFF * random.nextDouble());
246:
247: // don't overflow 16 bits
248: if(offset > 0xFF80) {
249: offset = 0xFF80;
250: }
251:
252: for(int i = pos; i < pos + len; i++) {
253: target[i] = (char)(offset + (0x7F * random.nextDouble()));
254: }
255: }*/
256:
257: private static final String[] fTestCases = {
258: "Hello \u9292 \u9192 World!",
259: "Hell\u0429o \u9292 \u9192 W\u0084rld!",
260: "Hell\u0429o \u9292 \u9292W\u0084rld!",
261:
262: "\u0648\u06c8", // catch missing reset
263: "\u0648\u06c8",
264:
265: "\u4444\uE001", // lowest quotable
266: "\u4444\uf2FF", // highest quotable
267: "\u4444\uf188\u4444",
268: "\u4444\uf188\uf288",
269: "\u4444\uf188abc\0429\uf288",
270: "\u9292\u2222",
271: "Hell\u0429\u04230o \u9292 \u9292W\u0084\u0192rld!",
272: "Hell\u0429o \u9292 \u9292W\u0084rld!",
273: "Hello World!123456",
274: "Hello W\u0081\u011f\u0082!", // Latin 1 run
275:
276: "abc\u0301\u0302", // uses SQn for u301 u302
277: "abc\u4411d", // uses SQU
278: "abc\u4411\u4412d",// uses SCU
279: "abc\u0401\u0402\u047f\u00a5\u0405", // uses SQn for ua5
280: "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", // SJIS like data
281: "\u9292\u2222",
282: "\u9191\u9191\u3041\u9191\u3041\u3041\u3000",
283: "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c",
284: "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002",
285:
286: "", // empty input
287: "\u0000", // smallest BMP character
288: "\uFFFF", // largest BMP character
289:
290: "\ud800\udc00", // smallest surrogate
291: "\ud8ff\udcff", // largest surrogate pair
292:
293: // regression tests
294: "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa",
295: "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c",
296: "\u0041\u00df\u0401\u015f",
297: "\u9066\u2123abc",
298: "\ud266\u43d7\\\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5",
299: "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489"
300:
301: };
302:
303: //==========================
304: // Compression modes
305: //==========================
306: private final static int SINGLEBYTEMODE = 0;
307: private final static int UNICODEMODE = 1;
308:
309: //==========================
310: // Single-byte mode tags
311: //==========================
312: private final static int SDEFINEX = 0x0B;
313: //private final static int SRESERVED = 0x0C; // this is a reserved value
314: private final static int SQUOTEU = 0x0E;
315: private final static int SSWITCHU = 0x0F;
316:
317: private final static int SQUOTE0 = 0x01;
318: private final static int SQUOTE1 = 0x02;
319: private final static int SQUOTE2 = 0x03;
320: private final static int SQUOTE3 = 0x04;
321: private final static int SQUOTE4 = 0x05;
322: private final static int SQUOTE5 = 0x06;
323: private final static int SQUOTE6 = 0x07;
324: private final static int SQUOTE7 = 0x08;
325:
326: private final static int SSWITCH0 = 0x10;
327: private final static int SSWITCH1 = 0x11;
328: private final static int SSWITCH2 = 0x12;
329: private final static int SSWITCH3 = 0x13;
330: private final static int SSWITCH4 = 0x14;
331: private final static int SSWITCH5 = 0x15;
332: private final static int SSWITCH6 = 0x16;
333: private final static int SSWITCH7 = 0x17;
334:
335: private final static int SDEFINE0 = 0x18;
336: private final static int SDEFINE1 = 0x19;
337: private final static int SDEFINE2 = 0x1A;
338: private final static int SDEFINE3 = 0x1B;
339: private final static int SDEFINE4 = 0x1C;
340: private final static int SDEFINE5 = 0x1D;
341: private final static int SDEFINE6 = 0x1E;
342: private final static int SDEFINE7 = 0x1F;
343:
344: //==========================
345: // Unicode mode tags
346: //==========================
347: private final static int USWITCH0 = 0xE0;
348: private final static int USWITCH1 = 0xE1;
349: private final static int USWITCH2 = 0xE2;
350: private final static int USWITCH3 = 0xE3;
351: private final static int USWITCH4 = 0xE4;
352: private final static int USWITCH5 = 0xE5;
353: private final static int USWITCH6 = 0xE6;
354: private final static int USWITCH7 = 0xE7;
355:
356: private final static int UDEFINE0 = 0xE8;
357: private final static int UDEFINE1 = 0xE9;
358: private final static int UDEFINE2 = 0xEA;
359: private final static int UDEFINE3 = 0xEB;
360: private final static int UDEFINE4 = 0xEC;
361: private final static int UDEFINE5 = 0xED;
362: private final static int UDEFINE6 = 0xEE;
363: private final static int UDEFINE7 = 0xEF;
364:
365: private final static int UQUOTEU = 0xF0;
366: private final static int UDEFINEX = 0xF1;
367:
368: //private final static int URESERVED = 0xF2; // this is a reserved value
369:
370: /* Print out an array of characters, with non-printables (for me)
371: displayed as hex values */
372: private void printChars(char[] chars, int len) {
373: for (int i = 0; i < len; i++) {
374: int c = (int) chars[i];
375: if (c < 0x0020 || c >= 0x7f) {
376: log("[0x");
377: log(Integer.toHexString(c));
378: log("]");
379: } else {
380: log(String.valueOf((char) c));
381: }
382: }
383: logln("");
384: }
385:
386: private void printBytes(byte[] byteBuffer, int len) {
387: int curByteIndex = 0;
388: int byteBufferLimit = len;
389: int mode = SINGLEBYTEMODE;
390: int aByte = 0x00;
391:
392: if (len > byteBuffer.length) {
393: logln("Warning: printBytes called with length too large. Truncating");
394: byteBufferLimit = byteBuffer.length;
395: }
396:
397: while (curByteIndex < byteBufferLimit) {
398: switch (mode) {
399: case SINGLEBYTEMODE:
400: while (curByteIndex < byteBufferLimit
401: && mode == SINGLEBYTEMODE) {
402: aByte = ((int) byteBuffer[curByteIndex++]) & 0xFF;
403: switch (aByte) {
404: default:
405: log(Integer.toHexString(((int) aByte) & 0xFF)
406: + " ");
407: break;
408: // quote unicode
409: case SQUOTEU:
410: log("SQUOTEU ");
411: if (curByteIndex < byteBufferLimit) {
412: log(Integer
413: .toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF)
414: + " ");
415: }
416: if (curByteIndex < byteBufferLimit) {
417: log(Integer
418: .toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF)
419: + " ");
420: }
421: break;
422:
423: // switch to Unicode mode
424: case SSWITCHU:
425: log("SSWITCHU ");
426: mode = UNICODEMODE;
427: break;
428:
429: // handle all quote tags
430: case SQUOTE0:
431: case SQUOTE1:
432: case SQUOTE2:
433: case SQUOTE3:
434: case SQUOTE4:
435: case SQUOTE5:
436: case SQUOTE6:
437: case SQUOTE7:
438: log("SQUOTE" + (aByte - SQUOTE0) + " ");
439: if (curByteIndex < byteBufferLimit) {
440: log(Integer
441: .toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF)
442: + " ");
443: }
444: break;
445:
446: // handle all switch tags
447: case SSWITCH0:
448: case SSWITCH1:
449: case SSWITCH2:
450: case SSWITCH3:
451: case SSWITCH4:
452: case SSWITCH5:
453: case SSWITCH6:
454: case SSWITCH7:
455: log("SSWITCH" + (aByte - SSWITCH0) + " ");
456: break;
457:
458: // handle all define tags
459: case SDEFINE0:
460: case SDEFINE1:
461: case SDEFINE2:
462: case SDEFINE3:
463: case SDEFINE4:
464: case SDEFINE5:
465: case SDEFINE6:
466: case SDEFINE7:
467: log("SDEFINE" + (aByte - SDEFINE0) + " ");
468: if (curByteIndex < byteBufferLimit) {
469: log(Integer
470: .toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF)
471: + " ");
472: }
473: break;
474:
475: // handle define extended tag
476: case SDEFINEX:
477: log("SDEFINEX ");
478: if (curByteIndex < byteBufferLimit) {
479: log(Integer
480: .toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF)
481: + " ");
482: }
483: if (curByteIndex < byteBufferLimit) {
484: log(Integer
485: .toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF)
486: + " ");
487: }
488: break;
489:
490: } // end switch
491: } // end while
492: break;
493:
494: case UNICODEMODE:
495: while (curByteIndex < byteBufferLimit
496: && mode == UNICODEMODE) {
497: aByte = ((int) byteBuffer[curByteIndex++]) & 0xFF;
498: switch (aByte) {
499: // handle all define tags
500: case UDEFINE0:
501: case UDEFINE1:
502: case UDEFINE2:
503: case UDEFINE3:
504: case UDEFINE4:
505: case UDEFINE5:
506: case UDEFINE6:
507: case UDEFINE7:
508: log("UDEFINE" + (aByte - UDEFINE0) + " ");
509: if (curByteIndex < byteBufferLimit) {
510: log(Integer
511: .toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF)
512: + " ");
513: }
514: mode = SINGLEBYTEMODE;
515: break;
516:
517: // handle define extended tag
518: case UDEFINEX:
519: log("UDEFINEX ");
520: if (curByteIndex < byteBufferLimit) {
521: log(Integer
522: .toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF)
523: + " ");
524: }
525: if (curByteIndex < byteBufferLimit) {
526: log(Integer
527: .toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF)
528: + " ");
529: }
530: break;
531:
532: // handle all switch tags
533: case USWITCH0:
534: case USWITCH1:
535: case USWITCH2:
536: case USWITCH3:
537: case USWITCH4:
538: case USWITCH5:
539: case USWITCH6:
540: case USWITCH7:
541: log("USWITCH" + (aByte - USWITCH0) + " ");
542: mode = SINGLEBYTEMODE;
543: break;
544:
545: // quote unicode
546: case UQUOTEU:
547: log("UQUOTEU ");
548: if (curByteIndex < byteBufferLimit) {
549: log(Integer
550: .toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF)
551: + " ");
552: }
553: if (curByteIndex < byteBufferLimit) {
554: log(Integer
555: .toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF)
556: + " ");
557: }
558: break;
559:
560: default:
561: log(Integer.toHexString(((int) aByte) & 0xFF)
562: + " ");
563: if (curByteIndex < byteBufferLimit) {
564: log(Integer
565: .toHexString(((int) byteBuffer[curByteIndex++]) & 0xFF)
566: + " ");
567: }
568: break;
569:
570: } // end switch
571: } // end while
572: break;
573:
574: } // end switch( mode )
575: } // end while
576:
577: logln("");
578: }
579: }
|