001: /*
002: * Copyright (C) The Apache Software Foundation. All rights reserved.
003: *
004: * This software is published under the terms of the Apache Software License
005: * version 1.1, a copy of which has been included with this distribution in
006: * the LICENSE.txt file.
007: */
008: package installer;
009:
010: import java.io.IOException;
011: import java.io.InputStream;
012:
013: /**
014: * An input stream that decompresses from the BZip2 format (without the file
015: * header chars) to be read as any other stream.
016: *
017: * @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a>
018: */
019: public class CBZip2InputStream extends InputStream implements
020: BZip2Constants {
021: private static final int START_BLOCK_STATE = 1;
022: private static final int RAND_PART_A_STATE = 2;
023: private static final int RAND_PART_B_STATE = 3;
024: private static final int RAND_PART_C_STATE = 4;
025: private static final int NO_RAND_PART_A_STATE = 5;
026: private static final int NO_RAND_PART_B_STATE = 6;
027: private static final int NO_RAND_PART_C_STATE = 7;
028:
029: private CRC m_crc = new CRC();
030: private boolean[] m_inUse = new boolean[256];
031: private char[] m_seqToUnseq = new char[256];
032: private char[] m_unseqToSeq = new char[256];
033: private char[] m_selector = new char[MAX_SELECTORS];
034: private char[] m_selectorMtf = new char[MAX_SELECTORS];
035:
036: /*
037: * freq table collected to save a pass over the data
038: * during decompression.
039: */
040: private int[] m_unzftab = new int[256];
041:
042: private int[][] m_limit = new int[N_GROUPS][MAX_ALPHA_SIZE];
043: private int[][] m_base = new int[N_GROUPS][MAX_ALPHA_SIZE];
044: private int[][] m_perm = new int[N_GROUPS][MAX_ALPHA_SIZE];
045: private int[] m_minLens = new int[N_GROUPS];
046:
047: private boolean m_streamEnd;
048: private int m_currentChar = -1;
049:
050: private int m_currentState = START_BLOCK_STATE;
051: private int m_rNToGo;
052: private int m_rTPos;
053: private int m_tPos;
054:
055: private int i2;
056: private int count;
057: private int chPrev;
058: private int ch2;
059: private int j2;
060: private char z;
061:
062: private boolean m_blockRandomised;
063:
064: /*
065: * always: in the range 0 .. 9.
066: * The current block size is 100000 * this number.
067: */
068: private int m_blockSize100k;
069: private int m_bsBuff;
070: private int m_bsLive;
071:
072: private InputStream m_input;
073:
074: private int m_computedBlockCRC;
075: private int m_computedCombinedCRC;
076:
077: /*
078: * index of the last char in the block, so
079: * the block size == last + 1.
080: */
081: private int m_last;
082: private char[] m_ll8;
083: private int m_nInUse;
084:
085: /*
086: * index in zptr[] of original string after sorting.
087: */
088: private int m_origPtr;
089:
090: private int m_storedBlockCRC;
091: private int m_storedCombinedCRC;
092: private int[] m_tt;
093:
094: public CBZip2InputStream(final InputStream input) {
095: bsSetStream(input);
096: initialize();
097: initBlock();
098: setupBlock();
099: }
100:
101: private static void badBlockHeader() {
102: cadvise();
103: }
104:
105: private static void blockOverrun() {
106: cadvise();
107: }
108:
109: private static void cadvise() {
110: System.out.println("CRC Error");
111: //throw new CCoruptionError();
112: }
113:
114: private static void compressedStreamEOF() {
115: cadvise();
116: }
117:
118: private static void crcError() {
119: cadvise();
120: }
121:
122: public int read() {
123: if (m_streamEnd) {
124: return -1;
125: } else {
126: int retChar = m_currentChar;
127: switch (m_currentState) {
128: case START_BLOCK_STATE:
129: break;
130: case RAND_PART_A_STATE:
131: break;
132: case RAND_PART_B_STATE:
133: setupRandPartB();
134: break;
135: case RAND_PART_C_STATE:
136: setupRandPartC();
137: break;
138: case NO_RAND_PART_A_STATE:
139: break;
140: case NO_RAND_PART_B_STATE:
141: setupNoRandPartB();
142: break;
143: case NO_RAND_PART_C_STATE:
144: setupNoRandPartC();
145: break;
146: default:
147: break;
148: }
149: return retChar;
150: }
151: }
152:
153: private void setDecompressStructureSizes(int newSize100k) {
154: if (!(0 <= newSize100k && newSize100k <= 9
155: && 0 <= m_blockSize100k && m_blockSize100k <= 9)) {
156: // throw new IOException("Invalid block size");
157: }
158:
159: m_blockSize100k = newSize100k;
160:
161: if (newSize100k == 0) {
162: return;
163: }
164:
165: int n = BASE_BLOCK_SIZE * newSize100k;
166: m_ll8 = new char[n];
167: m_tt = new int[n];
168: }
169:
170: private void setupBlock() {
171: int[] cftab = new int[257];
172: char ch;
173:
174: cftab[0] = 0;
175: for (int i = 1; i <= 256; i++) {
176: cftab[i] = m_unzftab[i - 1];
177: }
178: for (int i = 1; i <= 256; i++) {
179: cftab[i] += cftab[i - 1];
180: }
181:
182: for (int i = 0; i <= m_last; i++) {
183: ch = m_ll8[i];
184: m_tt[cftab[ch]] = i;
185: cftab[ch]++;
186: }
187: cftab = null;
188:
189: m_tPos = m_tt[m_origPtr];
190:
191: count = 0;
192: i2 = 0;
193: ch2 = 256;
194: /*
195: * not a char and not EOF
196: */
197: if (m_blockRandomised) {
198: m_rNToGo = 0;
199: m_rTPos = 0;
200: setupRandPartA();
201: } else {
202: setupNoRandPartA();
203: }
204: }
205:
206: private void setupNoRandPartA() {
207: if (i2 <= m_last) {
208: chPrev = ch2;
209: ch2 = m_ll8[m_tPos];
210: m_tPos = m_tt[m_tPos];
211: i2++;
212:
213: m_currentChar = ch2;
214: m_currentState = NO_RAND_PART_B_STATE;
215: m_crc.updateCRC(ch2);
216: } else {
217: endBlock();
218: initBlock();
219: setupBlock();
220: }
221: }
222:
223: private void setupNoRandPartB() {
224: if (ch2 != chPrev) {
225: m_currentState = NO_RAND_PART_A_STATE;
226: count = 1;
227: setupNoRandPartA();
228: } else {
229: count++;
230: if (count >= 4) {
231: z = m_ll8[m_tPos];
232: m_tPos = m_tt[m_tPos];
233: m_currentState = NO_RAND_PART_C_STATE;
234: j2 = 0;
235: setupNoRandPartC();
236: } else {
237: m_currentState = NO_RAND_PART_A_STATE;
238: setupNoRandPartA();
239: }
240: }
241: }
242:
243: private void setupNoRandPartC() {
244: if (j2 < z) {
245: m_currentChar = ch2;
246: m_crc.updateCRC(ch2);
247: j2++;
248: } else {
249: m_currentState = NO_RAND_PART_A_STATE;
250: i2++;
251: count = 0;
252: setupNoRandPartA();
253: }
254: }
255:
256: private void setupRandPartA() {
257: if (i2 <= m_last) {
258: chPrev = ch2;
259: ch2 = m_ll8[m_tPos];
260: m_tPos = m_tt[m_tPos];
261: if (m_rNToGo == 0) {
262: m_rNToGo = RAND_NUMS[m_rTPos];
263: m_rTPos++;
264: if (m_rTPos == 512) {
265: m_rTPos = 0;
266: }
267: }
268: m_rNToGo--;
269: ch2 ^= ((m_rNToGo == 1) ? 1 : 0);
270: i2++;
271:
272: m_currentChar = ch2;
273: m_currentState = RAND_PART_B_STATE;
274: m_crc.updateCRC(ch2);
275: } else {
276: endBlock();
277: initBlock();
278: setupBlock();
279: }
280: }
281:
282: private void setupRandPartB() {
283: if (ch2 != chPrev) {
284: m_currentState = RAND_PART_A_STATE;
285: count = 1;
286: setupRandPartA();
287: } else {
288: count++;
289: if (count >= 4) {
290: z = m_ll8[m_tPos];
291: m_tPos = m_tt[m_tPos];
292: if (m_rNToGo == 0) {
293: m_rNToGo = RAND_NUMS[m_rTPos];
294: m_rTPos++;
295: if (m_rTPos == 512) {
296: m_rTPos = 0;
297: }
298: }
299: m_rNToGo--;
300: z ^= ((m_rNToGo == 1) ? 1 : 0);
301: j2 = 0;
302: m_currentState = RAND_PART_C_STATE;
303: setupRandPartC();
304: } else {
305: m_currentState = RAND_PART_A_STATE;
306: setupRandPartA();
307: }
308: }
309: }
310:
311: private void setupRandPartC() {
312: if (j2 < z) {
313: m_currentChar = ch2;
314: m_crc.updateCRC(ch2);
315: j2++;
316: } else {
317: m_currentState = RAND_PART_A_STATE;
318: i2++;
319: count = 0;
320: setupRandPartA();
321: }
322: }
323:
324: private void getAndMoveToFrontDecode() {
325: int nextSym;
326:
327: int limitLast = BASE_BLOCK_SIZE * m_blockSize100k;
328: m_origPtr = readVariableSizedInt(24);
329:
330: recvDecodingTables();
331: int EOB = m_nInUse + 1;
332: int groupNo = -1;
333: int groupPos = 0;
334:
335: /*
336: * Setting up the unzftab entries here is not strictly
337: * necessary, but it does save having to do it later
338: * in a separate pass, and so saves a block's worth of
339: * cache misses.
340: */
341: for (int i = 0; i <= 255; i++) {
342: m_unzftab[i] = 0;
343: }
344:
345: final char[] yy = new char[256];
346: for (int i = 0; i <= 255; i++) {
347: yy[i] = (char) i;
348: }
349:
350: m_last = -1;
351: int zt;
352: int zn;
353: int zvec;
354: int zj;
355: groupNo++;
356: groupPos = G_SIZE - 1;
357:
358: zt = m_selector[groupNo];
359: zn = m_minLens[zt];
360: zvec = bsR(zn);
361: while (zvec > m_limit[zt][zn]) {
362: zn++;
363:
364: while (m_bsLive < 1) {
365: int zzi;
366: try {
367: zzi = m_input.read();
368: } catch (IOException e) {
369: compressedStreamEOF();
370: break;
371: }
372: if (zzi == -1) {
373: compressedStreamEOF();
374: break;
375: }
376: m_bsBuff = (m_bsBuff << 8) | (zzi & 0xff);
377: m_bsLive += 8;
378: }
379:
380: zj = (m_bsBuff >> (m_bsLive - 1)) & 1;
381: m_bsLive--;
382:
383: zvec = (zvec << 1) | zj;
384: }
385: nextSym = m_perm[zt][zvec - m_base[zt][zn]];
386:
387: while (true) {
388: if (nextSym == EOB) {
389: break;
390: }
391:
392: if (nextSym == RUNA || nextSym == RUNB) {
393: char ch;
394: int s = -1;
395: int N = 1;
396: do {
397: if (nextSym == RUNA) {
398: s = s + (0 + 1) * N;
399: } else// if( nextSym == RUNB )
400: {
401: s = s + (1 + 1) * N;
402: }
403: N = N * 2;
404:
405: if (groupPos == 0) {
406: groupNo++;
407: groupPos = G_SIZE;
408: }
409: groupPos--;
410: zt = m_selector[groupNo];
411: zn = m_minLens[zt];
412: zvec = bsR(zn);
413: while (zvec > m_limit[zt][zn]) {
414: zn++;
415:
416: while (m_bsLive < 1) {
417: int zzi;
418: char thech = 0;
419: try {
420: thech = (char) m_input.read();
421: } catch (IOException e) {
422: compressedStreamEOF();
423: }
424: if (thech == -1) {
425: compressedStreamEOF();
426: }
427: zzi = thech;
428: m_bsBuff = (m_bsBuff << 8) | (zzi & 0xff);
429: m_bsLive += 8;
430: }
431:
432: zj = (m_bsBuff >> (m_bsLive - 1)) & 1;
433: m_bsLive--;
434: zvec = (zvec << 1) | zj;
435: }
436:
437: nextSym = m_perm[zt][zvec - m_base[zt][zn]];
438:
439: } while (nextSym == RUNA || nextSym == RUNB);
440:
441: s++;
442: ch = m_seqToUnseq[yy[0]];
443: m_unzftab[ch] += s;
444:
445: while (s > 0) {
446: m_last++;
447: m_ll8[m_last] = ch;
448: s--;
449: }
450:
451: if (m_last >= limitLast) {
452: blockOverrun();
453: }
454: continue;
455: } else {
456: char tmp;
457: m_last++;
458: if (m_last >= limitLast) {
459: blockOverrun();
460: }
461:
462: tmp = yy[nextSym - 1];
463: m_unzftab[m_seqToUnseq[tmp]]++;
464: m_ll8[m_last] = m_seqToUnseq[tmp];
465:
466: /*
467: * This loop is hammered during decompression,
468: * hence the unrolling.
469: * for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
470: */
471: int j = nextSym - 1;
472: for (; j > 3; j -= 4) {
473: yy[j] = yy[j - 1];
474: yy[j - 1] = yy[j - 2];
475: yy[j - 2] = yy[j - 3];
476: yy[j - 3] = yy[j - 4];
477: }
478: for (; j > 0; j--) {
479: yy[j] = yy[j - 1];
480: }
481:
482: yy[0] = tmp;
483:
484: if (groupPos == 0) {
485: groupNo++;
486: groupPos = G_SIZE;
487: }
488: groupPos--;
489: zt = m_selector[groupNo];
490: zn = m_minLens[zt];
491: zvec = bsR(zn);
492: while (zvec > m_limit[zt][zn]) {
493: zn++;
494:
495: while (m_bsLive < 1) {
496: char ch = 0;
497: try {
498: ch = (char) m_input.read();
499: } catch (IOException e) {
500: compressedStreamEOF();
501: }
502:
503: m_bsBuff = (m_bsBuff << 8) | (ch & 0xff);
504: m_bsLive += 8;
505: }
506:
507: zj = (m_bsBuff >> (m_bsLive - 1)) & 1;
508: m_bsLive--;
509:
510: zvec = (zvec << 1) | zj;
511: }
512: nextSym = m_perm[zt][zvec - m_base[zt][zn]];
513:
514: continue;
515: }
516: }
517: }
518:
519: private void bsFinishedWithStream() {
520: m_input = null;
521: }
522:
523: private int readVariableSizedInt(final int numBits) {
524: return bsR(numBits);
525: }
526:
527: private char readUnsignedChar() {
528: return (char) bsR(8);
529: }
530:
531: private int readInt() {
532: int u = 0;
533: u = (u << 8) | bsR(8);
534: u = (u << 8) | bsR(8);
535: u = (u << 8) | bsR(8);
536: u = (u << 8) | bsR(8);
537: return u;
538: }
539:
540: private int bsR(final int n) {
541: while (m_bsLive < n) {
542: char ch = 0;
543: try {
544: ch = (char) m_input.read();
545: } catch (final IOException ioe) {
546: compressedStreamEOF();
547: }
548:
549: if (ch == -1) {
550: compressedStreamEOF();
551: }
552:
553: m_bsBuff = (m_bsBuff << 8) | (ch & 0xff);
554: m_bsLive += 8;
555: }
556:
557: final int result = (m_bsBuff >> (m_bsLive - n))
558: & ((1 << n) - 1);
559: m_bsLive -= n;
560: return result;
561: }
562:
563: private void bsSetStream(final InputStream input) {
564: m_input = input;
565: m_bsLive = 0;
566: m_bsBuff = 0;
567: }
568:
569: private void complete() {
570: m_storedCombinedCRC = readInt();
571: if (m_storedCombinedCRC != m_computedCombinedCRC) {
572: crcError();
573: }
574:
575: bsFinishedWithStream();
576: m_streamEnd = true;
577: }
578:
579: private void endBlock() {
580: m_computedBlockCRC = m_crc.getFinalCRC();
581: /*
582: * A bad CRC is considered a fatal error.
583: */
584: if (m_storedBlockCRC != m_computedBlockCRC) {
585: crcError();
586: }
587:
588: m_computedCombinedCRC = (m_computedCombinedCRC << 1)
589: | (m_computedCombinedCRC >>> 31);
590: m_computedCombinedCRC ^= m_computedBlockCRC;
591: }
592:
593: private void hbCreateDecodeTables(final int[] limit,
594: final int[] base, final int[] perm, final char[] length,
595: final int minLen, final int maxLen, final int alphaSize) {
596: int pp = 0;
597: for (int i = minLen; i <= maxLen; i++) {
598: for (int j = 0; j < alphaSize; j++) {
599: if (length[j] == i) {
600: perm[pp] = j;
601: pp++;
602: }
603: }
604: }
605:
606: for (int i = 0; i < MAX_CODE_LEN; i++) {
607: base[i] = 0;
608: }
609:
610: for (int i = 0; i < alphaSize; i++) {
611: base[length[i] + 1]++;
612: }
613:
614: for (int i = 1; i < MAX_CODE_LEN; i++) {
615: base[i] += base[i - 1];
616: }
617:
618: for (int i = 0; i < MAX_CODE_LEN; i++) {
619: limit[i] = 0;
620: }
621:
622: int vec = 0;
623: for (int i = minLen; i <= maxLen; i++) {
624: vec += (base[i + 1] - base[i]);
625: limit[i] = vec - 1;
626: vec <<= 1;
627: }
628:
629: for (int i = minLen + 1; i <= maxLen; i++) {
630: base[i] = ((limit[i - 1] + 1) << 1) - base[i];
631: }
632: }
633:
634: private void initBlock() {
635: final char magic1 = readUnsignedChar();
636: final char magic2 = readUnsignedChar();
637: final char magic3 = readUnsignedChar();
638: final char magic4 = readUnsignedChar();
639: final char magic5 = readUnsignedChar();
640: final char magic6 = readUnsignedChar();
641: if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45
642: && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) {
643: complete();
644: return;
645: }
646:
647: if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59
648: || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) {
649: badBlockHeader();
650: m_streamEnd = true;
651: return;
652: }
653:
654: m_storedBlockCRC = readInt();
655:
656: if (bsR(1) == 1) {
657: m_blockRandomised = true;
658: } else {
659: m_blockRandomised = false;
660: }
661:
662: // currBlockNo++;
663: getAndMoveToFrontDecode();
664:
665: m_crc.initialiseCRC();
666: m_currentState = START_BLOCK_STATE;
667: }
668:
669: private void initialize() {
670: final char magic3 = readUnsignedChar();
671: final char magic4 = readUnsignedChar();
672: if (magic3 != 'h' || magic4 < '1' || magic4 > '9') {
673: bsFinishedWithStream();
674: m_streamEnd = true;
675: return;
676: }
677:
678: setDecompressStructureSizes(magic4 - '0');
679: m_computedCombinedCRC = 0;
680: }
681:
682: private void makeMaps() {
683: m_nInUse = 0;
684: for (int i = 0; i < 256; i++) {
685: if (m_inUse[i]) {
686: m_seqToUnseq[m_nInUse] = (char) i;
687: m_unseqToSeq[i] = (char) m_nInUse;
688: m_nInUse++;
689: }
690: }
691: }
692:
693: private void recvDecodingTables() {
694: buildInUseTable();
695: makeMaps();
696: final int alphaSize = m_nInUse + 2;
697:
698: /*
699: * Now the selectors
700: */
701: final int groupCount = bsR(3);
702: final int selectorCount = bsR(15);
703: for (int i = 0; i < selectorCount; i++) {
704: int run = 0;
705: while (bsR(1) == 1) {
706: run++;
707: }
708: m_selectorMtf[i] = (char) run;
709: }
710:
711: /*
712: * Undo the MTF values for the selectors.
713: */
714: final char[] pos = new char[N_GROUPS];
715: for (char v = 0; v < groupCount; v++) {
716: pos[v] = v;
717: }
718:
719: for (int i = 0; i < selectorCount; i++) {
720: int v = m_selectorMtf[i];
721: final char tmp = pos[v];
722: while (v > 0) {
723: pos[v] = pos[v - 1];
724: v--;
725: }
726: pos[0] = tmp;
727: m_selector[i] = tmp;
728: }
729:
730: final char[][] len = new char[N_GROUPS][MAX_ALPHA_SIZE];
731: /*
732: * Now the coding tables
733: */
734: for (int i = 0; i < groupCount; i++) {
735: int curr = bsR(5);
736: for (int j = 0; j < alphaSize; j++) {
737: while (bsR(1) == 1) {
738: if (bsR(1) == 0) {
739: curr++;
740: } else {
741: curr--;
742: }
743: }
744: len[i][j] = (char) curr;
745: }
746: }
747:
748: /*
749: * Create the Huffman decoding tables
750: */
751: for (int k = 0; k < groupCount; k++) {
752: int minLen = 32;
753: int maxLen = 0;
754: for (int i = 0; i < alphaSize; i++) {
755: if (len[k][i] > maxLen) {
756: maxLen = len[k][i];
757: }
758: if (len[k][i] < minLen) {
759: minLen = len[k][i];
760: }
761: }
762: hbCreateDecodeTables(m_limit[k], m_base[k], m_perm[k],
763: len[k], minLen, maxLen, alphaSize);
764: m_minLens[k] = minLen;
765: }
766: }
767:
768: private void buildInUseTable() {
769: final boolean[] inUse16 = new boolean[16];
770:
771: /*
772: * Receive the mapping table
773: */
774: for (int i = 0; i < 16; i++) {
775: if (bsR(1) == 1) {
776: inUse16[i] = true;
777: } else {
778: inUse16[i] = false;
779: }
780: }
781:
782: for (int i = 0; i < 256; i++) {
783: m_inUse[i] = false;
784: }
785:
786: for (int i = 0; i < 16; i++) {
787: if (inUse16[i]) {
788: for (int j = 0; j < 16; j++) {
789: if (bsR(1) == 1) {
790: m_inUse[i * 16 + j] = true;
791: }
792: }
793: }
794: }
795: }
796: }
|