001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.commons.vfs.provider.bzip2;
018:
019: import java.io.IOException;
020: import java.io.InputStream;
021:
022: /*
023: * This package is based on the work done by Keiron Liddle, Aftex Software
024: * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
025: * great code.
026: */
027:
028: /**
029: * An input stream that decompresses from the BZip2 format (without the file
030: * header chars) to be read as any other stream.
031: *
032: * @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a>
033: */
034: class CBZip2InputStream extends InputStream implements BZip2Constants {
035: private static final int START_BLOCK_STATE = 1;
036: private static final int RAND_PART_A_STATE = 2;
037: private static final int RAND_PART_B_STATE = 3;
038: private static final int RAND_PART_C_STATE = 4;
039: private static final int NO_RAND_PART_A_STATE = 5;
040: private static final int NO_RAND_PART_B_STATE = 6;
041: private static final int NO_RAND_PART_C_STATE = 7;
042:
043: private CRC m_crc = new CRC();
044: private boolean[] m_inUse = new boolean[256];
045: private char[] m_seqToUnseq = new char[256];
046: private char[] m_unseqToSeq = new char[256];
047: private char[] m_selector = new char[MAX_SELECTORS];
048: private char[] m_selectorMtf = new char[MAX_SELECTORS];
049:
050: /*
051: * freq table collected to save a pass over the data
052: * during decompression.
053: */
054: private int[] m_unzftab = new int[256];
055:
056: private int[][] m_limit = new int[N_GROUPS][MAX_ALPHA_SIZE];
057: private int[][] m_base = new int[N_GROUPS][MAX_ALPHA_SIZE];
058: private int[][] m_perm = new int[N_GROUPS][MAX_ALPHA_SIZE];
059: private int[] m_minLens = new int[N_GROUPS];
060:
061: private boolean m_streamEnd;
062: private int m_currentChar = -1;
063:
064: private int m_currentState = START_BLOCK_STATE;
065: private int m_rNToGo;
066: private int m_rTPos;
067: private int m_tPos;
068:
069: private int i2;
070: private int count;
071: private int chPrev;
072: private int ch2;
073: private int j2;
074: private char z;
075:
076: private boolean m_blockRandomised;
077:
078: /*
079: * always: in the range 0 .. 9.
080: * The current block size is 100000 * this number.
081: */
082: private int m_blockSize100k;
083: private int m_bsBuff;
084: private int m_bsLive;
085:
086: private InputStream m_input;
087:
088: private int m_computedBlockCRC;
089: private int m_computedCombinedCRC;
090:
091: /*
092: * index of the last char in the block, so
093: * the block size == last + 1.
094: */
095: private int m_last;
096: private char[] m_ll8;
097: private int m_nInUse;
098:
099: /*
100: * index in zptr[] of original string after sorting.
101: */
102: private int m_origPtr;
103:
104: private int m_storedBlockCRC;
105: private int m_storedCombinedCRC;
106: private int[] m_tt;
107:
108: CBZip2InputStream(final InputStream input) {
109: bsSetStream(input);
110: initialize();
111: initBlock();
112: setupBlock();
113: }
114:
115: private static void badBlockHeader() {
116: cadvise();
117: }
118:
119: private static void blockOverrun() {
120: cadvise();
121: }
122:
123: private static void cadvise() {
124: System.out.println("CRC Error");
125: //throw new CCoruptionError();
126: }
127:
128: private static void compressedStreamEOF() {
129: cadvise();
130: }
131:
132: private static void crcError() {
133: cadvise();
134: }
135:
136: /**
137: * a fake <code>available</code> which always returns 1 as long as the stream is not at end.
138: * This is required to make this stream work if wrapped in an BufferedInputStream.
139: *
140: * @author imario@apache.org
141: */
142: public int available() throws IOException {
143: if (!m_streamEnd) {
144: return 1;
145: }
146: return 0;
147: }
148:
149: public int read() {
150: if (m_streamEnd) {
151: return -1;
152: } else {
153: int retChar = m_currentChar;
154: switch (m_currentState) {
155: case START_BLOCK_STATE:
156: break;
157: case RAND_PART_A_STATE:
158: break;
159: case RAND_PART_B_STATE:
160: setupRandPartB();
161: break;
162: case RAND_PART_C_STATE:
163: setupRandPartC();
164: break;
165: case NO_RAND_PART_A_STATE:
166: break;
167: case NO_RAND_PART_B_STATE:
168: setupNoRandPartB();
169: break;
170: case NO_RAND_PART_C_STATE:
171: setupNoRandPartC();
172: break;
173: default:
174: break;
175: }
176: return retChar;
177: }
178: }
179:
180: private void setDecompressStructureSizes(int newSize100k) {
181: if (!(0 <= newSize100k && newSize100k <= 9
182: && 0 <= m_blockSize100k && m_blockSize100k <= 9)) {
183: // throw new IOException("Invalid block size");
184: }
185:
186: m_blockSize100k = newSize100k;
187:
188: if (newSize100k == 0) {
189: return;
190: }
191:
192: int n = BASE_BLOCK_SIZE * newSize100k;
193: m_ll8 = new char[n];
194: m_tt = new int[n];
195: }
196:
197: private void setupBlock() {
198: int[] cftab = new int[257];
199: char ch;
200:
201: cftab[0] = 0;
202: for (int i = 1; i <= 256; i++) {
203: cftab[i] = m_unzftab[i - 1];
204: }
205: for (int i = 1; i <= 256; i++) {
206: cftab[i] += cftab[i - 1];
207: }
208:
209: for (int i = 0; i <= m_last; i++) {
210: ch = m_ll8[i];
211: m_tt[cftab[ch]] = i;
212: cftab[ch]++;
213: }
214: cftab = null;
215:
216: m_tPos = m_tt[m_origPtr];
217:
218: count = 0;
219: i2 = 0;
220: ch2 = 256;
221: /*
222: * not a char and not EOF
223: */
224: if (m_blockRandomised) {
225: m_rNToGo = 0;
226: m_rTPos = 0;
227: setupRandPartA();
228: } else {
229: setupNoRandPartA();
230: }
231: }
232:
233: private void setupNoRandPartA() {
234: if (i2 <= m_last) {
235: chPrev = ch2;
236: ch2 = m_ll8[m_tPos];
237: m_tPos = m_tt[m_tPos];
238: i2++;
239:
240: m_currentChar = ch2;
241: m_currentState = NO_RAND_PART_B_STATE;
242: m_crc.updateCRC(ch2);
243: } else {
244: endBlock();
245: initBlock();
246: setupBlock();
247: }
248: }
249:
250: private void setupNoRandPartB() {
251: if (ch2 != chPrev) {
252: m_currentState = NO_RAND_PART_A_STATE;
253: count = 1;
254: setupNoRandPartA();
255: } else {
256: count++;
257: if (count >= 4) {
258: z = m_ll8[m_tPos];
259: m_tPos = m_tt[m_tPos];
260: m_currentState = NO_RAND_PART_C_STATE;
261: j2 = 0;
262: setupNoRandPartC();
263: } else {
264: m_currentState = NO_RAND_PART_A_STATE;
265: setupNoRandPartA();
266: }
267: }
268: }
269:
270: private void setupNoRandPartC() {
271: if (j2 < z) {
272: m_currentChar = ch2;
273: m_crc.updateCRC(ch2);
274: j2++;
275: } else {
276: m_currentState = NO_RAND_PART_A_STATE;
277: i2++;
278: count = 0;
279: setupNoRandPartA();
280: }
281: }
282:
283: private void setupRandPartA() {
284: if (i2 <= m_last) {
285: chPrev = ch2;
286: ch2 = m_ll8[m_tPos];
287: m_tPos = m_tt[m_tPos];
288: if (m_rNToGo == 0) {
289: m_rNToGo = RAND_NUMS[m_rTPos];
290: m_rTPos++;
291: if (m_rTPos == 512) {
292: m_rTPos = 0;
293: }
294: }
295: m_rNToGo--;
296: ch2 ^= ((m_rNToGo == 1) ? 1 : 0);
297: i2++;
298:
299: m_currentChar = ch2;
300: m_currentState = RAND_PART_B_STATE;
301: m_crc.updateCRC(ch2);
302: } else {
303: endBlock();
304: initBlock();
305: setupBlock();
306: }
307: }
308:
309: private void setupRandPartB() {
310: if (ch2 != chPrev) {
311: m_currentState = RAND_PART_A_STATE;
312: count = 1;
313: setupRandPartA();
314: } else {
315: count++;
316: if (count >= 4) {
317: z = m_ll8[m_tPos];
318: m_tPos = m_tt[m_tPos];
319: if (m_rNToGo == 0) {
320: m_rNToGo = RAND_NUMS[m_rTPos];
321: m_rTPos++;
322: if (m_rTPos == 512) {
323: m_rTPos = 0;
324: }
325: }
326: m_rNToGo--;
327: z ^= ((m_rNToGo == 1) ? 1 : 0);
328: j2 = 0;
329: m_currentState = RAND_PART_C_STATE;
330: setupRandPartC();
331: } else {
332: m_currentState = RAND_PART_A_STATE;
333: setupRandPartA();
334: }
335: }
336: }
337:
338: private void setupRandPartC() {
339: if (j2 < z) {
340: m_currentChar = ch2;
341: m_crc.updateCRC(ch2);
342: j2++;
343: } else {
344: m_currentState = RAND_PART_A_STATE;
345: i2++;
346: count = 0;
347: setupRandPartA();
348: }
349: }
350:
351: private void getAndMoveToFrontDecode() {
352: int nextSym;
353:
354: int limitLast = BASE_BLOCK_SIZE * m_blockSize100k;
355: m_origPtr = readVariableSizedInt(24);
356:
357: recvDecodingTables();
358: int EOB = m_nInUse + 1;
359: int groupNo = -1;
360: int groupPos = 0;
361:
362: /*
363: * Setting up the unzftab entries here is not strictly
364: * necessary, but it does save having to do it later
365: * in a separate pass, and so saves a block's worth of
366: * cache misses.
367: */
368: for (int i = 0; i <= 255; i++) {
369: m_unzftab[i] = 0;
370: }
371:
372: final char[] yy = new char[256];
373: for (int i = 0; i <= 255; i++) {
374: yy[i] = (char) i;
375: }
376:
377: m_last = -1;
378: int zt;
379: int zn;
380: int zvec;
381: int zj;
382: groupNo++;
383: groupPos = G_SIZE - 1;
384:
385: zt = m_selector[groupNo];
386: zn = m_minLens[zt];
387: zvec = bsR(zn);
388: while (zvec > m_limit[zt][zn]) {
389: zn++;
390:
391: while (m_bsLive < 1) {
392: int zzi;
393: char thech = 0;
394: try {
395: thech = (char) m_input.read();
396: } catch (IOException e) {
397: compressedStreamEOF();
398: }
399: if (thech == -1) {
400: compressedStreamEOF();
401: }
402: zzi = thech;
403: m_bsBuff = (m_bsBuff << 8) | (zzi & 0xff);
404: m_bsLive += 8;
405: }
406:
407: zj = (m_bsBuff >> (m_bsLive - 1)) & 1;
408: m_bsLive--;
409:
410: zvec = (zvec << 1) | zj;
411: }
412: nextSym = m_perm[zt][zvec - m_base[zt][zn]];
413:
414: while (true) {
415: if (nextSym == EOB) {
416: break;
417: }
418:
419: if (nextSym == RUNA || nextSym == RUNB) {
420: char ch;
421: int s = -1;
422: int N = 1;
423: do {
424: if (nextSym == RUNA) {
425: s = s + (0 + 1) * N;
426: } else// if( nextSym == RUNB )
427: {
428: s = s + (1 + 1) * N;
429: }
430: N = N * 2;
431:
432: if (groupPos == 0) {
433: groupNo++;
434: groupPos = G_SIZE;
435: }
436: groupPos--;
437: zt = m_selector[groupNo];
438: zn = m_minLens[zt];
439: zvec = bsR(zn);
440: while (zvec > m_limit[zt][zn]) {
441: zn++;
442:
443: while (m_bsLive < 1) {
444: int zzi;
445: char thech = 0;
446: try {
447: thech = (char) m_input.read();
448: } catch (IOException e) {
449: compressedStreamEOF();
450: }
451: if (thech == -1) {
452: compressedStreamEOF();
453: }
454: zzi = thech;
455: m_bsBuff = (m_bsBuff << 8) | (zzi & 0xff);
456: m_bsLive += 8;
457: }
458:
459: zj = (m_bsBuff >> (m_bsLive - 1)) & 1;
460: m_bsLive--;
461: zvec = (zvec << 1) | zj;
462: }
463:
464: nextSym = m_perm[zt][zvec - m_base[zt][zn]];
465:
466: } while (nextSym == RUNA || nextSym == RUNB);
467:
468: s++;
469: ch = m_seqToUnseq[yy[0]];
470: m_unzftab[ch] += s;
471:
472: while (s > 0) {
473: m_last++;
474: m_ll8[m_last] = ch;
475: s--;
476: }
477:
478: if (m_last >= limitLast) {
479: blockOverrun();
480: }
481: continue;
482: } else {
483: char tmp;
484: m_last++;
485: if (m_last >= limitLast) {
486: blockOverrun();
487: }
488:
489: tmp = yy[nextSym - 1];
490: m_unzftab[m_seqToUnseq[tmp]]++;
491: m_ll8[m_last] = m_seqToUnseq[tmp];
492:
493: /*
494: * This loop is hammered during decompression,
495: * hence the unrolling.
496: * for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
497: */
498: int j = nextSym - 1;
499: for (; j > 3; j -= 4) {
500: yy[j] = yy[j - 1];
501: yy[j - 1] = yy[j - 2];
502: yy[j - 2] = yy[j - 3];
503: yy[j - 3] = yy[j - 4];
504: }
505: for (; j > 0; j--) {
506: yy[j] = yy[j - 1];
507: }
508:
509: yy[0] = tmp;
510:
511: if (groupPos == 0) {
512: groupNo++;
513: groupPos = G_SIZE;
514: }
515: groupPos--;
516: zt = m_selector[groupNo];
517: zn = m_minLens[zt];
518: zvec = bsR(zn);
519: while (zvec > m_limit[zt][zn]) {
520: zn++;
521:
522: while (m_bsLive < 1) {
523: char ch = 0;
524: try {
525: ch = (char) m_input.read();
526: } catch (IOException e) {
527: compressedStreamEOF();
528: }
529:
530: m_bsBuff = (m_bsBuff << 8) | (ch & 0xff);
531: m_bsLive += 8;
532: }
533:
534: zj = (m_bsBuff >> (m_bsLive - 1)) & 1;
535: m_bsLive--;
536:
537: zvec = (zvec << 1) | zj;
538: }
539: nextSym = m_perm[zt][zvec - m_base[zt][zn]];
540:
541: continue;
542: }
543: }
544: }
545:
546: private void bsFinishedWithStream() {
547: if (m_input != null) {
548: try {
549: m_input.close();
550: } catch (IOException e) {
551: }
552: }
553: m_input = null;
554: }
555:
556: private int readVariableSizedInt(final int numBits) {
557: return bsR(numBits);
558: }
559:
560: private char readUnsignedChar() {
561: return (char) bsR(8);
562: }
563:
564: private int readInt() {
565: int u = 0;
566: u = (u << 8) | bsR(8);
567: u = (u << 8) | bsR(8);
568: u = (u << 8) | bsR(8);
569: u = (u << 8) | bsR(8);
570: return u;
571: }
572:
573: private int bsR(final int n) {
574: while (m_bsLive < n) {
575: char ch = 0;
576: try {
577: ch = (char) m_input.read();
578: } catch (final IOException ioe) {
579: compressedStreamEOF();
580: }
581:
582: if (ch == -1) {
583: compressedStreamEOF();
584: }
585:
586: m_bsBuff = (m_bsBuff << 8) | (ch & 0xff);
587: m_bsLive += 8;
588: }
589:
590: final int result = (m_bsBuff >> (m_bsLive - n))
591: & ((1 << n) - 1);
592: m_bsLive -= n;
593: return result;
594: }
595:
596: private void bsSetStream(final InputStream input) {
597: m_input = input;
598: m_bsLive = 0;
599: m_bsBuff = 0;
600: }
601:
602: private void complete() {
603: m_storedCombinedCRC = readInt();
604: if (m_storedCombinedCRC != m_computedCombinedCRC) {
605: crcError();
606: }
607:
608: bsFinishedWithStream();
609: m_streamEnd = true;
610: }
611:
612: private void endBlock() {
613: m_computedBlockCRC = m_crc.getFinalCRC();
614: /*
615: * A bad CRC is considered a fatal error.
616: */
617: if (m_storedBlockCRC != m_computedBlockCRC) {
618: crcError();
619: }
620:
621: m_computedCombinedCRC = (m_computedCombinedCRC << 1)
622: | (m_computedCombinedCRC >>> 31);
623: m_computedCombinedCRC ^= m_computedBlockCRC;
624: }
625:
626: private void hbCreateDecodeTables(final int[] limit,
627: final int[] base, final int[] perm, final char[] length,
628: final int minLen, final int maxLen, final int alphaSize) {
629: int pp = 0;
630: for (int i = minLen; i <= maxLen; i++) {
631: for (int j = 0; j < alphaSize; j++) {
632: if (length[j] == i) {
633: perm[pp] = j;
634: pp++;
635: }
636: }
637: }
638:
639: for (int i = 0; i < MAX_CODE_LEN; i++) {
640: base[i] = 0;
641: }
642:
643: for (int i = 0; i < alphaSize; i++) {
644: base[length[i] + 1]++;
645: }
646:
647: for (int i = 1; i < MAX_CODE_LEN; i++) {
648: base[i] += base[i - 1];
649: }
650:
651: for (int i = 0; i < MAX_CODE_LEN; i++) {
652: limit[i] = 0;
653: }
654:
655: int vec = 0;
656: for (int i = minLen; i <= maxLen; i++) {
657: vec += (base[i + 1] - base[i]);
658: limit[i] = vec - 1;
659: vec <<= 1;
660: }
661:
662: for (int i = minLen + 1; i <= maxLen; i++) {
663: base[i] = ((limit[i - 1] + 1) << 1) - base[i];
664: }
665: }
666:
667: private void initBlock() {
668: final char magic1 = readUnsignedChar();
669: final char magic2 = readUnsignedChar();
670: final char magic3 = readUnsignedChar();
671: final char magic4 = readUnsignedChar();
672: final char magic5 = readUnsignedChar();
673: final char magic6 = readUnsignedChar();
674: if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45
675: && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) {
676: complete();
677: return;
678: }
679:
680: if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59
681: || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) {
682: badBlockHeader();
683: m_streamEnd = true;
684: return;
685: }
686:
687: m_storedBlockCRC = readInt();
688:
689: if (bsR(1) == 1) {
690: m_blockRandomised = true;
691: } else {
692: m_blockRandomised = false;
693: }
694:
695: // currBlockNo++;
696: getAndMoveToFrontDecode();
697:
698: m_crc.initialiseCRC();
699: m_currentState = START_BLOCK_STATE;
700: }
701:
702: private void initialize() {
703: final char magic3 = readUnsignedChar();
704: final char magic4 = readUnsignedChar();
705: if (magic3 != 'h' || magic4 < '1' || magic4 > '9') {
706: bsFinishedWithStream();
707: m_streamEnd = true;
708: return;
709: }
710:
711: setDecompressStructureSizes(magic4 - '0');
712: m_computedCombinedCRC = 0;
713: }
714:
715: private void makeMaps() {
716: m_nInUse = 0;
717: for (int i = 0; i < 256; i++) {
718: if (m_inUse[i]) {
719: m_seqToUnseq[m_nInUse] = (char) i;
720: m_unseqToSeq[i] = (char) m_nInUse;
721: m_nInUse++;
722: }
723: }
724: }
725:
726: private void recvDecodingTables() {
727: buildInUseTable();
728: makeMaps();
729: final int alphaSize = m_nInUse + 2;
730:
731: /*
732: * Now the selectors
733: */
734: final int groupCount = bsR(3);
735: final int selectorCount = bsR(15);
736: for (int i = 0; i < selectorCount; i++) {
737: int run = 0;
738: while (bsR(1) == 1) {
739: run++;
740: }
741: m_selectorMtf[i] = (char) run;
742: }
743:
744: /*
745: * Undo the MTF values for the selectors.
746: */
747: final char[] pos = new char[N_GROUPS];
748: for (char v = 0; v < groupCount; v++) {
749: pos[v] = v;
750: }
751:
752: for (int i = 0; i < selectorCount; i++) {
753: int v = m_selectorMtf[i];
754: final char tmp = pos[v];
755: while (v > 0) {
756: pos[v] = pos[v - 1];
757: v--;
758: }
759: pos[0] = tmp;
760: m_selector[i] = tmp;
761: }
762:
763: final char[][] len = new char[N_GROUPS][MAX_ALPHA_SIZE];
764: /*
765: * Now the coding tables
766: */
767: for (int i = 0; i < groupCount; i++) {
768: int curr = bsR(5);
769: for (int j = 0; j < alphaSize; j++) {
770: while (bsR(1) == 1) {
771: if (bsR(1) == 0) {
772: curr++;
773: } else {
774: curr--;
775: }
776: }
777: len[i][j] = (char) curr;
778: }
779: }
780:
781: /*
782: * Create the Huffman decoding tables
783: */
784: for (int k = 0; k < groupCount; k++) {
785: int minLen = 32;
786: int maxLen = 0;
787: for (int i = 0; i < alphaSize; i++) {
788: if (len[k][i] > maxLen) {
789: maxLen = len[k][i];
790: }
791: if (len[k][i] < minLen) {
792: minLen = len[k][i];
793: }
794: }
795: hbCreateDecodeTables(m_limit[k], m_base[k], m_perm[k],
796: len[k], minLen, maxLen, alphaSize);
797: m_minLens[k] = minLen;
798: }
799: }
800:
801: private void buildInUseTable() {
802: final boolean[] inUse16 = new boolean[16];
803:
804: /*
805: * Receive the mapping table
806: */
807: for (int i = 0; i < 16; i++) {
808: if (bsR(1) == 1) {
809: inUse16[i] = true;
810: } else {
811: inUse16[i] = false;
812: }
813: }
814:
815: for (int i = 0; i < 256; i++) {
816: m_inUse[i] = false;
817: }
818:
819: for (int i = 0; i < 16; i++) {
820: if (inUse16[i]) {
821: for (int j = 0; j < 16; j++) {
822: if (bsR(1) == 1) {
823: m_inUse[i * 16 + j] = true;
824: }
825: }
826: }
827: }
828: }
829:
830: public void close() throws IOException {
831: bsFinishedWithStream();
832: }
833: }
|