0001: /*
0002: * Licensed to the Apache Software Foundation (ASF) under one or more
0003: * contributor license agreements. See the NOTICE file distributed with
0004: * this work for additional information regarding copyright ownership.
0005: * The ASF licenses this file to You under the Apache License, Version 2.0
0006: * (the "License"); you may not use this file except in compliance with
0007: * the License. You may obtain a copy of the License at
0008: *
0009: * http://www.apache.org/licenses/LICENSE-2.0
0010: *
0011: * Unless required by applicable law or agreed to in writing, software
0012: * distributed under the License is distributed on an "AS IS" BASIS,
0013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014: * See the License for the specific language governing permissions and
0015: * limitations under the License.
0016: *
0017: */
0018:
0019: /*
0020: * This package is based on the work done by Keiron Liddle, Aftex Software
0021: * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
0022: * great code.
0023: */
0024:
0025: package org.apache.tools.bzip2;
0026:
0027: import java.io.OutputStream;
0028: import java.io.IOException;
0029:
0030: /**
0031: * An output stream that compresses into the BZip2 format (without the file
0032: * header chars) into another stream.
0033:
0034: * <p>The compression requires large amounts of memory. Thus you
0035: * should call the {@link #close() close()} method as soon as
0036: * possible, to force <tt>CBZip2OutputStream</tt> to release the
0037: * allocated memory.</p>
0038: *
0039: * <p>You can shrink the amount of allocated memory and maybe raise
0040: * the compression speed by choosing a lower blocksize, which in turn
0041: * may cause a lower compression ratio. You can avoid unnecessary
0042: * memory allocation by avoiding using a blocksize which is bigger
0043: * than the size of the input. </p>
0044: *
0045: * <p>You can compute the memory usage for compressing by the
0046: * following formula:</p>
0047: * <pre>
0048: * <code>400k + (9 * blocksize)</code>.
0049: * </pre>
0050: *
0051: * <p>To get the memory required for decompression by {@link
0052: * CBZip2InputStream CBZip2InputStream} use</p>
0053: * <pre>
0054: * <code>65k + (5 * blocksize)</code>.
0055: * </pre>
0056: *
0057: * <table width="100%" border="1">
0058: * <colgroup>
0059: * <col width="33%" />
0060: * <col width="33%" />
0061: * <col width="33%" />
0062: * </colgroup>
0063: * <tr>
0064: * <th colspan="3">Memory usage by blocksize</th>
0065: * </tr><tr>
0066: * <th align="right">Blocksize</th>
0067: * <th align="right">Compression<br>memory usage</th>
0068: * <th align="right">Decompression<br>memory usage</th>
0069: * </tr><tr>
0070: * <td align="right">100k</td>
0071: * <td align="right">1300k</td>
0072: * <td align="right"> 565k</td>
0073: * </tr><tr>
0074: * <td align="right">200k</td>
0075: * <td align="right">2200k</td>
0076: * <td align="right">1065k</td>
0077: * </tr><tr>
0078: * <td align="right">300k</td>
0079: * <td align="right">3100k</td>
0080: * <td align="right">1565k</td>
0081: * </tr><tr>
0082: * <td align="right">400k</td>
0083: * <td align="right">4000k</td>
0084: * <td align="right">2065k</td>
0085: * </tr><tr>
0086: * <td align="right">500k</td>
0087: * <td align="right">4900k</td>
0088: * <td align="right">2565k</td>
0089: * </tr><tr>
0090: * <td align="right">600k</td>
0091: * <td align="right">5800k</td>
0092: * <td align="right">3065k</td>
0093: * </tr><tr>
0094: * <td align="right">700k</td>
0095: * <td align="right">6700k</td>
0096: * <td align="right">3565k</td>
0097: * </tr><tr>
0098: * <td align="right">800k</td>
0099: * <td align="right">7600k</td>
0100: * <td align="right">4065k</td>
0101: * </tr><tr>
0102: * <td align="right">900k</td>
0103: * <td align="right">8500k</td>
0104: * <td align="right">4565k</td>
0105: * </tr>
0106: * </table>
0107: *
0108: * <p>For decompression <tt>CBZip2InputStream</tt> allocates less
0109: * memory if the bzipped input is smaller than one block.</p>
0110: *
0111: * <p>Instances of this class are not threadsafe.</p>
0112: *
0113: * <p>
0114: * TODO: Update to BZip2 1.0.1
0115: * </p>
0116: *
0117: */
0118: public class CBZip2OutputStream extends OutputStream implements
0119: BZip2Constants {
0120:
0121: /**
0122: * The minimum supported blocksize <tt> == 1</tt>.
0123: */
0124: public static final int MIN_BLOCKSIZE = 1;
0125:
0126: /**
0127: * The maximum supported blocksize <tt> == 9</tt>.
0128: */
0129: public static final int MAX_BLOCKSIZE = 9;
0130:
0131: /**
0132: * This constant is accessible by subclasses for historical purposes.
0133: * If you don't know what it means then you don't need it.
0134: */
0135: protected static final int SETMASK = (1 << 21);
0136:
0137: /**
0138: * This constant is accessible by subclasses for historical purposes.
0139: * If you don't know what it means then you don't need it.
0140: */
0141: protected static final int CLEARMASK = (~SETMASK);
0142:
0143: /**
0144: * This constant is accessible by subclasses for historical purposes.
0145: * If you don't know what it means then you don't need it.
0146: */
0147: protected static final int GREATER_ICOST = 15;
0148:
0149: /**
0150: * This constant is accessible by subclasses for historical purposes.
0151: * If you don't know what it means then you don't need it.
0152: */
0153: protected static final int LESSER_ICOST = 0;
0154:
0155: /**
0156: * This constant is accessible by subclasses for historical purposes.
0157: * If you don't know what it means then you don't need it.
0158: */
0159: protected static final int SMALL_THRESH = 20;
0160:
0161: /**
0162: * This constant is accessible by subclasses for historical purposes.
0163: * If you don't know what it means then you don't need it.
0164: */
0165: protected static final int DEPTH_THRESH = 10;
0166:
0167: /**
0168: * This constant is accessible by subclasses for historical purposes.
0169: * If you don't know what it means then you don't need it.
0170: */
0171: protected static final int WORK_FACTOR = 30;
0172:
0173: /**
0174: * This constant is accessible by subclasses for historical purposes.
0175: * If you don't know what it means then you don't need it.
0176: * <p>
0177: If you are ever unlucky/improbable enough
0178: to get a stack overflow whilst sorting,
0179: increase the following constant and try
0180: again. In practice I have never seen the
0181: stack go above 27 elems, so the following
0182: limit seems very generous.
0183: * </p>
0184: */
0185: protected static final int QSORT_STACK_SIZE = 1000;
0186:
0187: /**
0188: * Knuth's increments seem to work better than Incerpi-Sedgewick
0189: * here. Possibly because the number of elems to sort is usually
0190: * small, typically <= 20.
0191: */
0192: private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093,
0193: 3280, 9841, 29524, 88573, 265720, 797161, 2391484 };
0194:
0195: /**
0196: * This method is accessible by subclasses for historical purposes.
0197: * If you don't know what it does then you don't need it.
0198: */
0199: protected static void hbMakeCodeLengths(char[] len, int[] freq,
0200: int alphaSize, int maxLen) {
0201: /*
0202: Nodes and heap entries run from 1. Entry 0
0203: for both the heap and nodes is a sentinel.
0204: */
0205: final int[] heap = new int[MAX_ALPHA_SIZE * 2];
0206: final int[] weight = new int[MAX_ALPHA_SIZE * 2];
0207: final int[] parent = new int[MAX_ALPHA_SIZE * 2];
0208:
0209: for (int i = alphaSize; --i >= 0;) {
0210: weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
0211: }
0212:
0213: for (boolean tooLong = true; tooLong;) {
0214: tooLong = false;
0215:
0216: int nNodes = alphaSize;
0217: int nHeap = 0;
0218: heap[0] = 0;
0219: weight[0] = 0;
0220: parent[0] = -2;
0221:
0222: for (int i = 1; i <= alphaSize; i++) {
0223: parent[i] = -1;
0224: nHeap++;
0225: heap[nHeap] = i;
0226:
0227: int zz = nHeap;
0228: int tmp = heap[zz];
0229: while (weight[tmp] < weight[heap[zz >> 1]]) {
0230: heap[zz] = heap[zz >> 1];
0231: zz >>= 1;
0232: }
0233: heap[zz] = tmp;
0234: }
0235:
0236: // assert (nHeap < (MAX_ALPHA_SIZE + 2)) : nHeap;
0237:
0238: while (nHeap > 1) {
0239: int n1 = heap[1];
0240: heap[1] = heap[nHeap];
0241: nHeap--;
0242:
0243: int yy = 0;
0244: int zz = 1;
0245: int tmp = heap[1];
0246:
0247: while (true) {
0248: yy = zz << 1;
0249:
0250: if (yy > nHeap) {
0251: break;
0252: }
0253:
0254: if ((yy < nHeap)
0255: && (weight[heap[yy + 1]] < weight[heap[yy]])) {
0256: yy++;
0257: }
0258:
0259: if (weight[tmp] < weight[heap[yy]]) {
0260: break;
0261: }
0262:
0263: heap[zz] = heap[yy];
0264: zz = yy;
0265: }
0266:
0267: heap[zz] = tmp;
0268:
0269: int n2 = heap[1];
0270: heap[1] = heap[nHeap];
0271: nHeap--;
0272:
0273: yy = 0;
0274: zz = 1;
0275: tmp = heap[1];
0276:
0277: while (true) {
0278: yy = zz << 1;
0279:
0280: if (yy > nHeap) {
0281: break;
0282: }
0283:
0284: if ((yy < nHeap)
0285: && (weight[heap[yy + 1]] < weight[heap[yy]])) {
0286: yy++;
0287: }
0288:
0289: if (weight[tmp] < weight[heap[yy]]) {
0290: break;
0291: }
0292:
0293: heap[zz] = heap[yy];
0294: zz = yy;
0295: }
0296:
0297: heap[zz] = tmp;
0298: nNodes++;
0299: parent[n1] = parent[n2] = nNodes;
0300:
0301: final int weight_n1 = weight[n1];
0302: final int weight_n2 = weight[n2];
0303: weight[nNodes] = (((weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00)) | (1 + (((weight_n1 & 0x000000ff) > (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff)
0304: : (weight_n2 & 0x000000ff))));
0305:
0306: parent[nNodes] = -1;
0307: nHeap++;
0308: heap[nHeap] = nNodes;
0309:
0310: tmp = 0;
0311: zz = nHeap;
0312: tmp = heap[zz];
0313: final int weight_tmp = weight[tmp];
0314: while (weight_tmp < weight[heap[zz >> 1]]) {
0315: heap[zz] = heap[zz >> 1];
0316: zz >>= 1;
0317: }
0318: heap[zz] = tmp;
0319:
0320: }
0321:
0322: // assert (nNodes < (MAX_ALPHA_SIZE * 2)) : nNodes;
0323:
0324: for (int i = 1; i <= alphaSize; i++) {
0325: int j = 0;
0326: int k = i;
0327:
0328: for (int parent_k; (parent_k = parent[k]) >= 0;) {
0329: k = parent_k;
0330: j++;
0331: }
0332:
0333: len[i - 1] = (char) j;
0334: if (j > maxLen) {
0335: tooLong = true;
0336: }
0337: }
0338:
0339: if (tooLong) {
0340: for (int i = 1; i < alphaSize; i++) {
0341: int j = weight[i] >> 8;
0342: j = 1 + (j >> 1);
0343: weight[i] = j << 8;
0344: }
0345: }
0346: }
0347: }
0348:
0349: private static void hbMakeCodeLengths(final byte[] len,
0350: final int[] freq, final Data dat, final int alphaSize,
0351: final int maxLen) {
0352: /*
0353: Nodes and heap entries run from 1. Entry 0
0354: for both the heap and nodes is a sentinel.
0355: */
0356: final int[] heap = dat.heap;
0357: final int[] weight = dat.weight;
0358: final int[] parent = dat.parent;
0359:
0360: for (int i = alphaSize; --i >= 0;) {
0361: weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
0362: }
0363:
0364: for (boolean tooLong = true; tooLong;) {
0365: tooLong = false;
0366:
0367: int nNodes = alphaSize;
0368: int nHeap = 0;
0369: heap[0] = 0;
0370: weight[0] = 0;
0371: parent[0] = -2;
0372:
0373: for (int i = 1; i <= alphaSize; i++) {
0374: parent[i] = -1;
0375: nHeap++;
0376: heap[nHeap] = i;
0377:
0378: int zz = nHeap;
0379: int tmp = heap[zz];
0380: while (weight[tmp] < weight[heap[zz >> 1]]) {
0381: heap[zz] = heap[zz >> 1];
0382: zz >>= 1;
0383: }
0384: heap[zz] = tmp;
0385: }
0386:
0387: while (nHeap > 1) {
0388: int n1 = heap[1];
0389: heap[1] = heap[nHeap];
0390: nHeap--;
0391:
0392: int yy = 0;
0393: int zz = 1;
0394: int tmp = heap[1];
0395:
0396: while (true) {
0397: yy = zz << 1;
0398:
0399: if (yy > nHeap) {
0400: break;
0401: }
0402:
0403: if ((yy < nHeap)
0404: && (weight[heap[yy + 1]] < weight[heap[yy]])) {
0405: yy++;
0406: }
0407:
0408: if (weight[tmp] < weight[heap[yy]]) {
0409: break;
0410: }
0411:
0412: heap[zz] = heap[yy];
0413: zz = yy;
0414: }
0415:
0416: heap[zz] = tmp;
0417:
0418: int n2 = heap[1];
0419: heap[1] = heap[nHeap];
0420: nHeap--;
0421:
0422: yy = 0;
0423: zz = 1;
0424: tmp = heap[1];
0425:
0426: while (true) {
0427: yy = zz << 1;
0428:
0429: if (yy > nHeap) {
0430: break;
0431: }
0432:
0433: if ((yy < nHeap)
0434: && (weight[heap[yy + 1]] < weight[heap[yy]])) {
0435: yy++;
0436: }
0437:
0438: if (weight[tmp] < weight[heap[yy]]) {
0439: break;
0440: }
0441:
0442: heap[zz] = heap[yy];
0443: zz = yy;
0444: }
0445:
0446: heap[zz] = tmp;
0447: nNodes++;
0448: parent[n1] = parent[n2] = nNodes;
0449:
0450: final int weight_n1 = weight[n1];
0451: final int weight_n2 = weight[n2];
0452: weight[nNodes] = ((weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00))
0453: | (1 + (((weight_n1 & 0x000000ff) > (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff)
0454: : (weight_n2 & 0x000000ff)));
0455:
0456: parent[nNodes] = -1;
0457: nHeap++;
0458: heap[nHeap] = nNodes;
0459:
0460: tmp = 0;
0461: zz = nHeap;
0462: tmp = heap[zz];
0463: final int weight_tmp = weight[tmp];
0464: while (weight_tmp < weight[heap[zz >> 1]]) {
0465: heap[zz] = heap[zz >> 1];
0466: zz >>= 1;
0467: }
0468: heap[zz] = tmp;
0469:
0470: }
0471:
0472: for (int i = 1; i <= alphaSize; i++) {
0473: int j = 0;
0474: int k = i;
0475:
0476: for (int parent_k; (parent_k = parent[k]) >= 0;) {
0477: k = parent_k;
0478: j++;
0479: }
0480:
0481: len[i - 1] = (byte) j;
0482: if (j > maxLen) {
0483: tooLong = true;
0484: }
0485: }
0486:
0487: if (tooLong) {
0488: for (int i = 1; i < alphaSize; i++) {
0489: int j = weight[i] >> 8;
0490: j = 1 + (j >> 1);
0491: weight[i] = j << 8;
0492: }
0493: }
0494: }
0495: }
0496:
0497: /**
0498: Index of the last char in the block, so
0499: the block size == last + 1.
0500: */
0501: private int last;
0502:
0503: /**
0504: * Index in fmap[] of original string after sorting.
0505: */
0506: private int origPtr;
0507:
0508: /**
0509: Always: in the range 0 .. 9.
0510: The current block size is 100000 * this number.
0511: */
0512: private final int blockSize100k;
0513:
0514: private boolean blockRandomised;
0515:
0516: private int bsBuff;
0517: private int bsLive;
0518: private final CRC crc = new CRC();
0519:
0520: private int nInUse;
0521:
0522: private int nMTF;
0523:
0524: /*
0525: * Used when sorting. If too many long comparisons
0526: * happen, we stop sorting, randomise the block
0527: * slightly, and try again.
0528: */
0529: private int workDone;
0530: private int workLimit;
0531: private boolean firstAttempt;
0532:
0533: private int currentChar = -1;
0534: private int runLength = 0;
0535:
0536: private int blockCRC;
0537: private int combinedCRC;
0538: private int allowableBlockSize;
0539:
0540: /**
0541: * All memory intensive stuff.
0542: */
0543: private CBZip2OutputStream.Data data;
0544:
0545: private OutputStream out;
0546:
0547: /**
0548: * Chooses a blocksize based on the given length of the data to compress.
0549: *
0550: * @return
0551: * The blocksize, between {@link #MIN_BLOCKSIZE} and {@link #MAX_BLOCKSIZE}
0552: * both inclusive. For a negative <tt>inputLength</tt> this method returns
0553: * <tt>MAX_BLOCKSIZE</tt> always.
0554: *
0555: * @param inputLength
0556: * The length of the data which will be compressed by
0557: * <tt>CBZip2OutputStream</tt>.
0558: */
0559: public static int chooseBlockSize(long inputLength) {
0560: return (inputLength > 0) ? (int) Math.min(
0561: (inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
0562: }
0563:
0564: /**
0565: * Constructs a new <tt>CBZip2OutputStream</tt> with a blocksize of 900k.
0566: *
0567: * <p><b>Attention: </b>The caller is resonsible to write the two
0568: * BZip2 magic bytes <tt>"BZ"</tt> to the specified stream prior
0569: * to calling this constructor.</p>
0570: *
0571: * @param out * the destination stream.
0572: *
0573: * @throws IOException
0574: * if an I/O error occurs in the specified stream.
0575: * @throws NullPointerException
0576: * if <code>out == null</code>.
0577: */
0578: public CBZip2OutputStream(final OutputStream out)
0579: throws IOException {
0580: this (out, MAX_BLOCKSIZE);
0581: }
0582:
0583: /**
0584: * Constructs a new <tt>CBZip2OutputStream</tt> with specified blocksize.
0585: *
0586: * <p><b>Attention: </b>The caller is resonsible to write the two
0587: * BZip2 magic bytes <tt>"BZ"</tt> to the specified stream prior
0588: * to calling this constructor.</p>
0589: *
0590: *
0591: * @param out
0592: * the destination stream.
0593: * @param blockSize
0594: * the blockSize as 100k units.
0595: *
0596: * @throws IOException
0597: * if an I/O error occurs in the specified stream.
0598: * @throws IllegalArgumentException
0599: * if <code>(blockSize < 1) || (blockSize > 9)</code>.
0600: * @throws NullPointerException
0601: * if <code>out == null</code>.
0602: *
0603: * @see #MIN_BLOCKSIZE
0604: * @see #MAX_BLOCKSIZE
0605: */
0606: public CBZip2OutputStream(final OutputStream out,
0607: final int blockSize) throws IOException {
0608: super ();
0609:
0610: if (blockSize < 1) {
0611: throw new IllegalArgumentException("blockSize(" + blockSize
0612: + ") < 1");
0613: }
0614: if (blockSize > 9) {
0615: throw new IllegalArgumentException("blockSize(" + blockSize
0616: + ") > 9");
0617: }
0618:
0619: this .blockSize100k = blockSize;
0620: this .out = out;
0621: init();
0622: }
0623:
0624: public void write(final int b) throws IOException {
0625: if (this .out != null) {
0626: write0(b);
0627: } else {
0628: throw new IOException("closed");
0629: }
0630: }
0631:
0632: private void writeRun() throws IOException {
0633: final int lastShadow = this .last;
0634:
0635: if (lastShadow < this .allowableBlockSize) {
0636: final int currentCharShadow = this .currentChar;
0637: final Data dataShadow = this .data;
0638: dataShadow.inUse[currentCharShadow] = true;
0639: final byte ch = (byte) currentCharShadow;
0640:
0641: int runLengthShadow = this .runLength;
0642: this .crc.updateCRC(currentCharShadow, runLengthShadow);
0643:
0644: switch (runLengthShadow) {
0645: case 1:
0646: dataShadow.block[lastShadow + 2] = ch;
0647: this .last = lastShadow + 1;
0648: break;
0649:
0650: case 2:
0651: dataShadow.block[lastShadow + 2] = ch;
0652: dataShadow.block[lastShadow + 3] = ch;
0653: this .last = lastShadow + 2;
0654: break;
0655:
0656: case 3: {
0657: final byte[] block = dataShadow.block;
0658: block[lastShadow + 2] = ch;
0659: block[lastShadow + 3] = ch;
0660: block[lastShadow + 4] = ch;
0661: this .last = lastShadow + 3;
0662: }
0663: break;
0664:
0665: default: {
0666: runLengthShadow -= 4;
0667: dataShadow.inUse[runLengthShadow] = true;
0668: final byte[] block = dataShadow.block;
0669: block[lastShadow + 2] = ch;
0670: block[lastShadow + 3] = ch;
0671: block[lastShadow + 4] = ch;
0672: block[lastShadow + 5] = ch;
0673: block[lastShadow + 6] = (byte) runLengthShadow;
0674: this .last = lastShadow + 5;
0675: }
0676: break;
0677:
0678: }
0679: } else {
0680: endBlock();
0681: initBlock();
0682: writeRun();
0683: }
0684: }
0685:
0686: /**
0687: * Overriden to close the stream.
0688: */
0689: protected void finalize() throws Throwable {
0690: close();
0691: super .finalize();
0692: }
0693:
0694: public void close() throws IOException {
0695: OutputStream outShadow = this .out;
0696: if (outShadow != null) {
0697: try {
0698: if (this .runLength > 0) {
0699: writeRun();
0700: }
0701: this .currentChar = -1;
0702: endBlock();
0703: endCompression();
0704: outShadow.close();
0705: } finally {
0706: this .out = null;
0707: this .data = null;
0708: }
0709: }
0710: }
0711:
0712: public void flush() throws IOException {
0713: OutputStream outShadow = this .out;
0714: if (outShadow != null) {
0715: outShadow.flush();
0716: }
0717: }
0718:
0719: private void init() throws IOException {
0720: // write magic: done by caller who created this stream
0721: //this.out.write('B');
0722: //this.out.write('Z');
0723:
0724: this .data = new Data(this .blockSize100k);
0725:
0726: /* Write `magic' bytes h indicating file-format == huffmanised,
0727: followed by a digit indicating blockSize100k.
0728: */
0729: bsPutUByte('h');
0730: bsPutUByte('0' + this .blockSize100k);
0731:
0732: this .combinedCRC = 0;
0733: initBlock();
0734: }
0735:
0736: private void initBlock() {
0737: // blockNo++;
0738: this .crc.initialiseCRC();
0739: this .last = -1;
0740: // ch = 0;
0741:
0742: boolean[] inUse = this .data.inUse;
0743: for (int i = 256; --i >= 0;) {
0744: inUse[i] = false;
0745: }
0746:
0747: /* 20 is just a paranoia constant */
0748: this .allowableBlockSize = (this .blockSize100k * BZip2Constants.baseBlockSize) - 20;
0749: }
0750:
0751: private void endBlock() throws IOException {
0752: this .blockCRC = this .crc.getFinalCRC();
0753: this .combinedCRC = (this .combinedCRC << 1)
0754: | (this .combinedCRC >>> 31);
0755: this .combinedCRC ^= this .blockCRC;
0756:
0757: // empty block at end of file
0758: if (this .last == -1) {
0759: return;
0760: }
0761:
0762: /* sort the block and establish posn of original string */
0763: blockSort();
0764:
0765: /*
0766: A 6-byte block header, the value chosen arbitrarily
0767: as 0x314159265359 :-). A 32 bit value does not really
0768: give a strong enough guarantee that the value will not
0769: appear by chance in the compressed datastream. Worst-case
0770: probability of this event, for a 900k block, is about
0771: 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
0772: For a compressed file of size 100Gb -- about 100000 blocks --
0773: only a 48-bit marker will do. NB: normal compression/
0774: decompression do *not* rely on these statistical properties.
0775: They are only important when trying to recover blocks from
0776: damaged files.
0777: */
0778: bsPutUByte(0x31);
0779: bsPutUByte(0x41);
0780: bsPutUByte(0x59);
0781: bsPutUByte(0x26);
0782: bsPutUByte(0x53);
0783: bsPutUByte(0x59);
0784:
0785: /* Now the block's CRC, so it is in a known place. */
0786: bsPutInt(this .blockCRC);
0787:
0788: /* Now a single bit indicating randomisation. */
0789: if (this .blockRandomised) {
0790: bsW(1, 1);
0791: } else {
0792: bsW(1, 0);
0793: }
0794:
0795: /* Finally, block's contents proper. */
0796: moveToFrontCodeAndSend();
0797: }
0798:
0799: private void endCompression() throws IOException {
0800: /*
0801: Now another magic 48-bit number, 0x177245385090, to
0802: indicate the end of the last block. (sqrt(pi), if
0803: you want to know. I did want to use e, but it contains
0804: too much repetition -- 27 18 28 18 28 46 -- for me
0805: to feel statistically comfortable. Call me paranoid.)
0806: */
0807: bsPutUByte(0x17);
0808: bsPutUByte(0x72);
0809: bsPutUByte(0x45);
0810: bsPutUByte(0x38);
0811: bsPutUByte(0x50);
0812: bsPutUByte(0x90);
0813:
0814: bsPutInt(this .combinedCRC);
0815: bsFinishedWithStream();
0816: }
0817:
0818: /**
0819: * Returns the blocksize parameter specified at construction time.
0820: */
0821: public final int getBlockSize() {
0822: return this .blockSize100k;
0823: }
0824:
0825: public void write(final byte[] buf, int offs, final int len)
0826: throws IOException {
0827: if (offs < 0) {
0828: throw new IndexOutOfBoundsException("offs(" + offs
0829: + ") < 0.");
0830: }
0831: if (len < 0) {
0832: throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
0833: }
0834: if (offs + len > buf.length) {
0835: throw new IndexOutOfBoundsException("offs(" + offs
0836: + ") + len(" + len + ") > buf.length(" + buf.length
0837: + ").");
0838: }
0839: if (this .out == null) {
0840: throw new IOException("stream closed");
0841: }
0842:
0843: for (int hi = offs + len; offs < hi;) {
0844: write0(buf[offs++]);
0845: }
0846: }
0847:
0848: private void write0(int b) throws IOException {
0849: if (this .currentChar != -1) {
0850: b &= 0xff;
0851: if (this .currentChar == b) {
0852: if (++this .runLength > 254) {
0853: writeRun();
0854: this .currentChar = -1;
0855: this .runLength = 0;
0856: }
0857: // else nothing to do
0858: } else {
0859: writeRun();
0860: this .runLength = 1;
0861: this .currentChar = b;
0862: }
0863: } else {
0864: this .currentChar = b & 0xff;
0865: this .runLength++;
0866: }
0867: }
0868:
0869: private static void hbAssignCodes(final int[] code,
0870: final byte[] length, final int minLen, final int maxLen,
0871: final int alphaSize) {
0872: int vec = 0;
0873: for (int n = minLen; n <= maxLen; n++) {
0874: for (int i = 0; i < alphaSize; i++) {
0875: if ((length[i] & 0xff) == n) {
0876: code[i] = vec;
0877: vec++;
0878: }
0879: }
0880: vec <<= 1;
0881: }
0882: }
0883:
0884: private void bsFinishedWithStream() throws IOException {
0885: while (this .bsLive > 0) {
0886: int ch = this .bsBuff >> 24;
0887: this .out.write(ch); // write 8-bit
0888: this .bsBuff <<= 8;
0889: this .bsLive -= 8;
0890: }
0891: }
0892:
0893: private void bsW(final int n, final int v) throws IOException {
0894: final OutputStream outShadow = this .out;
0895: int bsLiveShadow = this .bsLive;
0896: int bsBuffShadow = this .bsBuff;
0897:
0898: while (bsLiveShadow >= 8) {
0899: outShadow.write(bsBuffShadow >> 24); // write 8-bit
0900: bsBuffShadow <<= 8;
0901: bsLiveShadow -= 8;
0902: }
0903:
0904: this .bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
0905: this .bsLive = bsLiveShadow + n;
0906: }
0907:
0908: private void bsPutUByte(final int c) throws IOException {
0909: bsW(8, c);
0910: }
0911:
0912: private void bsPutInt(final int u) throws IOException {
0913: bsW(8, (u >> 24) & 0xff);
0914: bsW(8, (u >> 16) & 0xff);
0915: bsW(8, (u >> 8) & 0xff);
0916: bsW(8, u & 0xff);
0917: }
0918:
0919: private void sendMTFValues() throws IOException {
0920: final byte[][] len = this .data.sendMTFValues_len;
0921: final int alphaSize = this .nInUse + 2;
0922:
0923: for (int t = N_GROUPS; --t >= 0;) {
0924: byte[] len_t = len[t];
0925: for (int v = alphaSize; --v >= 0;) {
0926: len_t[v] = GREATER_ICOST;
0927: }
0928: }
0929:
0930: /* Decide how many coding tables to use */
0931: // assert (this.nMTF > 0) : this.nMTF;
0932: final int nGroups = (this .nMTF < 200) ? 2
0933: : (this .nMTF < 600) ? 3 : (this .nMTF < 1200) ? 4
0934: : (this .nMTF < 2400) ? 5 : 6;
0935:
0936: /* Generate an initial set of coding tables */
0937: sendMTFValues0(nGroups, alphaSize);
0938:
0939: /*
0940: Iterate up to N_ITERS times to improve the tables.
0941: */
0942: final int nSelectors = sendMTFValues1(nGroups, alphaSize);
0943:
0944: /* Compute MTF values for the selectors. */
0945: sendMTFValues2(nGroups, nSelectors);
0946:
0947: /* Assign actual codes for the tables. */
0948: sendMTFValues3(nGroups, alphaSize);
0949:
0950: /* Transmit the mapping table. */
0951: sendMTFValues4();
0952:
0953: /* Now the selectors. */
0954: sendMTFValues5(nGroups, nSelectors);
0955:
0956: /* Now the coding tables. */
0957: sendMTFValues6(nGroups, alphaSize);
0958:
0959: /* And finally, the block data proper */
0960: sendMTFValues7(nSelectors);
0961: }
0962:
0963: private void sendMTFValues0(final int nGroups, final int alphaSize) {
0964: final byte[][] len = this .data.sendMTFValues_len;
0965: final int[] mtfFreq = this .data.mtfFreq;
0966:
0967: int remF = this .nMTF;
0968: int gs = 0;
0969:
0970: for (int nPart = nGroups; nPart > 0; nPart--) {
0971: final int tFreq = remF / nPart;
0972: int ge = gs - 1;
0973: int aFreq = 0;
0974:
0975: for (final int a = alphaSize - 1; (aFreq < tFreq)
0976: && (ge < a);) {
0977: aFreq += mtfFreq[++ge];
0978: }
0979:
0980: if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
0981: && (((nGroups - nPart) & 1) != 0)) {
0982: aFreq -= mtfFreq[ge--];
0983: }
0984:
0985: final byte[] len_np = len[nPart - 1];
0986: for (int v = alphaSize; --v >= 0;) {
0987: if ((v >= gs) && (v <= ge)) {
0988: len_np[v] = LESSER_ICOST;
0989: } else {
0990: len_np[v] = GREATER_ICOST;
0991: }
0992: }
0993:
0994: gs = ge + 1;
0995: remF -= aFreq;
0996: }
0997: }
0998:
0999: private int sendMTFValues1(final int nGroups, final int alphaSize) {
1000: final Data dataShadow = this .data;
1001: final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
1002: final int[] fave = dataShadow.sendMTFValues_fave;
1003: final short[] cost = dataShadow.sendMTFValues_cost;
1004: final char[] sfmap = dataShadow.sfmap;
1005: final byte[] selector = dataShadow.selector;
1006: final byte[][] len = dataShadow.sendMTFValues_len;
1007: final byte[] len_0 = len[0];
1008: final byte[] len_1 = len[1];
1009: final byte[] len_2 = len[2];
1010: final byte[] len_3 = len[3];
1011: final byte[] len_4 = len[4];
1012: final byte[] len_5 = len[5];
1013: final int nMTFShadow = this .nMTF;
1014:
1015: int nSelectors = 0;
1016:
1017: for (int iter = 0; iter < N_ITERS; iter++) {
1018: for (int t = nGroups; --t >= 0;) {
1019: fave[t] = 0;
1020: int[] rfreqt = rfreq[t];
1021: for (int i = alphaSize; --i >= 0;) {
1022: rfreqt[i] = 0;
1023: }
1024: }
1025:
1026: nSelectors = 0;
1027:
1028: for (int gs = 0; gs < this .nMTF;) {
1029: /* Set group start & end marks. */
1030:
1031: /*
1032: Calculate the cost of this group as coded
1033: by each of the coding tables.
1034: */
1035:
1036: final int ge = Math
1037: .min(gs + G_SIZE - 1, nMTFShadow - 1);
1038:
1039: if (nGroups == N_GROUPS) {
1040: // unrolled version of the else-block
1041:
1042: short cost0 = 0;
1043: short cost1 = 0;
1044: short cost2 = 0;
1045: short cost3 = 0;
1046: short cost4 = 0;
1047: short cost5 = 0;
1048:
1049: for (int i = gs; i <= ge; i++) {
1050: final int icv = sfmap[i];
1051: cost0 += len_0[icv] & 0xff;
1052: cost1 += len_1[icv] & 0xff;
1053: cost2 += len_2[icv] & 0xff;
1054: cost3 += len_3[icv] & 0xff;
1055: cost4 += len_4[icv] & 0xff;
1056: cost5 += len_5[icv] & 0xff;
1057: }
1058:
1059: cost[0] = cost0;
1060: cost[1] = cost1;
1061: cost[2] = cost2;
1062: cost[3] = cost3;
1063: cost[4] = cost4;
1064: cost[5] = cost5;
1065:
1066: } else {
1067: for (int t = nGroups; --t >= 0;) {
1068: cost[t] = 0;
1069: }
1070:
1071: for (int i = gs; i <= ge; i++) {
1072: final int icv = sfmap[i];
1073: for (int t = nGroups; --t >= 0;) {
1074: cost[t] += len[t][icv] & 0xff;
1075: }
1076: }
1077: }
1078:
1079: /*
1080: Find the coding table which is best for this group,
1081: and record its identity in the selector table.
1082: */
1083: int bt = -1;
1084: for (int t = nGroups, bc = 999999999; --t >= 0;) {
1085: final int cost_t = cost[t];
1086: if (cost_t < bc) {
1087: bc = cost_t;
1088: bt = t;
1089: }
1090: }
1091:
1092: fave[bt]++;
1093: selector[nSelectors] = (byte) bt;
1094: nSelectors++;
1095:
1096: /*
1097: Increment the symbol frequencies for the selected table.
1098: */
1099: final int[] rfreq_bt = rfreq[bt];
1100: for (int i = gs; i <= ge; i++) {
1101: rfreq_bt[sfmap[i]]++;
1102: }
1103:
1104: gs = ge + 1;
1105: }
1106:
1107: /*
1108: Recompute the tables based on the accumulated frequencies.
1109: */
1110: for (int t = 0; t < nGroups; t++) {
1111: hbMakeCodeLengths(len[t], rfreq[t], this .data,
1112: alphaSize, 20);
1113: }
1114: }
1115:
1116: return nSelectors;
1117: }
1118:
1119: private void sendMTFValues2(final int nGroups, final int nSelectors) {
1120: // assert (nGroups < 8) : nGroups;
1121:
1122: final Data dataShadow = this .data;
1123: byte[] pos = dataShadow.sendMTFValues2_pos;
1124:
1125: for (int i = nGroups; --i >= 0;) {
1126: pos[i] = (byte) i;
1127: }
1128:
1129: for (int i = 0; i < nSelectors; i++) {
1130: final byte ll_i = dataShadow.selector[i];
1131: byte tmp = pos[0];
1132: int j = 0;
1133:
1134: while (ll_i != tmp) {
1135: j++;
1136: byte tmp2 = tmp;
1137: tmp = pos[j];
1138: pos[j] = tmp2;
1139: }
1140:
1141: pos[0] = tmp;
1142: dataShadow.selectorMtf[i] = (byte) j;
1143: }
1144: }
1145:
1146: private void sendMTFValues3(final int nGroups, final int alphaSize) {
1147: int[][] code = this .data.sendMTFValues_code;
1148: byte[][] len = this .data.sendMTFValues_len;
1149:
1150: for (int t = 0; t < nGroups; t++) {
1151: int minLen = 32;
1152: int maxLen = 0;
1153: final byte[] len_t = len[t];
1154: for (int i = alphaSize; --i >= 0;) {
1155: final int l = len_t[i] & 0xff;
1156: if (l > maxLen) {
1157: maxLen = l;
1158: }
1159: if (l < minLen) {
1160: minLen = l;
1161: }
1162: }
1163:
1164: // assert (maxLen <= 20) : maxLen;
1165: // assert (minLen >= 1) : minLen;
1166:
1167: hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
1168: }
1169: }
1170:
1171: private void sendMTFValues4() throws IOException {
1172: final boolean[] inUse = this .data.inUse;
1173: final boolean[] inUse16 = this .data.sentMTFValues4_inUse16;
1174:
1175: for (int i = 16; --i >= 0;) {
1176: inUse16[i] = false;
1177: final int i16 = i * 16;
1178: for (int j = 16; --j >= 0;) {
1179: if (inUse[i16 + j]) {
1180: inUse16[i] = true;
1181: }
1182: }
1183: }
1184:
1185: for (int i = 0; i < 16; i++) {
1186: bsW(1, inUse16[i] ? 1 : 0);
1187: }
1188:
1189: final OutputStream outShadow = this .out;
1190: int bsLiveShadow = this .bsLive;
1191: int bsBuffShadow = this .bsBuff;
1192:
1193: for (int i = 0; i < 16; i++) {
1194: if (inUse16[i]) {
1195: final int i16 = i * 16;
1196: for (int j = 0; j < 16; j++) {
1197: // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
1198: while (bsLiveShadow >= 8) {
1199: outShadow.write(bsBuffShadow >> 24); // write 8-bit
1200: bsBuffShadow <<= 8;
1201: bsLiveShadow -= 8;
1202: }
1203: if (inUse[i16 + j]) {
1204: bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1205: }
1206: bsLiveShadow++;
1207: }
1208: }
1209: }
1210:
1211: this .bsBuff = bsBuffShadow;
1212: this .bsLive = bsLiveShadow;
1213: }
1214:
1215: private void sendMTFValues5(final int nGroups, final int nSelectors)
1216: throws IOException {
1217: bsW(3, nGroups);
1218: bsW(15, nSelectors);
1219:
1220: final OutputStream outShadow = this .out;
1221: final byte[] selectorMtf = this .data.selectorMtf;
1222:
1223: int bsLiveShadow = this .bsLive;
1224: int bsBuffShadow = this .bsBuff;
1225:
1226: for (int i = 0; i < nSelectors; i++) {
1227: for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1228: // inlined: bsW(1, 1);
1229: while (bsLiveShadow >= 8) {
1230: outShadow.write(bsBuffShadow >> 24);
1231: bsBuffShadow <<= 8;
1232: bsLiveShadow -= 8;
1233: }
1234: bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1235: bsLiveShadow++;
1236: }
1237:
1238: // inlined: bsW(1, 0);
1239: while (bsLiveShadow >= 8) {
1240: outShadow.write(bsBuffShadow >> 24);
1241: bsBuffShadow <<= 8;
1242: bsLiveShadow -= 8;
1243: }
1244: //bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1245: bsLiveShadow++;
1246: }
1247:
1248: this .bsBuff = bsBuffShadow;
1249: this .bsLive = bsLiveShadow;
1250: }
1251:
1252: private void sendMTFValues6(final int nGroups, final int alphaSize)
1253: throws IOException {
1254: final byte[][] len = this .data.sendMTFValues_len;
1255: final OutputStream outShadow = this .out;
1256:
1257: int bsLiveShadow = this .bsLive;
1258: int bsBuffShadow = this .bsBuff;
1259:
1260: for (int t = 0; t < nGroups; t++) {
1261: byte[] len_t = len[t];
1262: int curr = len_t[0] & 0xff;
1263:
1264: // inlined: bsW(5, curr);
1265: while (bsLiveShadow >= 8) {
1266: outShadow.write(bsBuffShadow >> 24); // write 8-bit
1267: bsBuffShadow <<= 8;
1268: bsLiveShadow -= 8;
1269: }
1270: bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
1271: bsLiveShadow += 5;
1272:
1273: for (int i = 0; i < alphaSize; i++) {
1274: int lti = len_t[i] & 0xff;
1275: while (curr < lti) {
1276: // inlined: bsW(2, 2);
1277: while (bsLiveShadow >= 8) {
1278: outShadow.write(bsBuffShadow >> 24); // write 8-bit
1279: bsBuffShadow <<= 8;
1280: bsLiveShadow -= 8;
1281: }
1282: bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
1283: bsLiveShadow += 2;
1284:
1285: curr++; /* 10 */
1286: }
1287:
1288: while (curr > lti) {
1289: // inlined: bsW(2, 3);
1290: while (bsLiveShadow >= 8) {
1291: outShadow.write(bsBuffShadow >> 24); // write 8-bit
1292: bsBuffShadow <<= 8;
1293: bsLiveShadow -= 8;
1294: }
1295: bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
1296: bsLiveShadow += 2;
1297:
1298: curr--; /* 11 */
1299: }
1300:
1301: // inlined: bsW(1, 0);
1302: while (bsLiveShadow >= 8) {
1303: outShadow.write(bsBuffShadow >> 24); // write 8-bit
1304: bsBuffShadow <<= 8;
1305: bsLiveShadow -= 8;
1306: }
1307: // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1308: bsLiveShadow++;
1309: }
1310: }
1311:
1312: this .bsBuff = bsBuffShadow;
1313: this .bsLive = bsLiveShadow;
1314: }
1315:
1316: private void sendMTFValues7(final int nSelectors)
1317: throws IOException {
1318: final Data dataShadow = this .data;
1319: final byte[][] len = dataShadow.sendMTFValues_len;
1320: final int[][] code = dataShadow.sendMTFValues_code;
1321: final OutputStream outShadow = this .out;
1322: final byte[] selector = dataShadow.selector;
1323: final char[] sfmap = dataShadow.sfmap;
1324: final int nMTFShadow = this .nMTF;
1325:
1326: int selCtr = 0;
1327:
1328: int bsLiveShadow = this .bsLive;
1329: int bsBuffShadow = this .bsBuff;
1330:
1331: for (int gs = 0; gs < nMTFShadow;) {
1332: final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1333: final int selector_selCtr = selector[selCtr] & 0xff;
1334: final int[] code_selCtr = code[selector_selCtr];
1335: final byte[] len_selCtr = len[selector_selCtr];
1336:
1337: while (gs <= ge) {
1338: final int sfmap_i = sfmap[gs];
1339:
1340: //
1341: // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1342: // code_selCtr[sfmap_i]);
1343: //
1344: while (bsLiveShadow >= 8) {
1345: outShadow.write(bsBuffShadow >> 24);
1346: bsBuffShadow <<= 8;
1347: bsLiveShadow -= 8;
1348: }
1349: final int n = len_selCtr[sfmap_i] & 0xFF;
1350: bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
1351: bsLiveShadow += n;
1352:
1353: gs++;
1354: }
1355:
1356: gs = ge + 1;
1357: selCtr++;
1358: }
1359:
1360: this .bsBuff = bsBuffShadow;
1361: this .bsLive = bsLiveShadow;
1362: }
1363:
1364: private void moveToFrontCodeAndSend() throws IOException {
1365: bsW(24, this .origPtr);
1366: generateMTFValues();
1367: sendMTFValues();
1368: }
1369:
1370: /**
1371: * This is the most hammered method of this class.
1372: *
1373: * <p>This is the version using unrolled loops. Normally I never
1374: * use such ones in Java code. The unrolling has shown a
1375: * noticable performance improvement on JRE 1.4.2 (Linux i586 /
1376: * HotSpot Client). Of course it depends on the JIT compiler of
1377: * the vm.</p>
1378: */
1379: private boolean mainSimpleSort(final Data dataShadow, final int lo,
1380: final int hi, final int d) {
1381: final int bigN = hi - lo + 1;
1382: if (bigN < 2) {
1383: return this .firstAttempt
1384: && (this .workDone > this .workLimit);
1385: }
1386:
1387: int hp = 0;
1388: while (INCS[hp] < bigN) {
1389: hp++;
1390: }
1391:
1392: final int[] fmap = dataShadow.fmap;
1393: final char[] quadrant = dataShadow.quadrant;
1394: final byte[] block = dataShadow.block;
1395: final int lastShadow = this .last;
1396: final int lastPlus1 = lastShadow + 1;
1397: final boolean firstAttemptShadow = this .firstAttempt;
1398: final int workLimitShadow = this .workLimit;
1399: int workDoneShadow = this .workDone;
1400:
1401: // Following block contains unrolled code which could be shortened by
1402: // coding it in additional loops.
1403:
1404: HP: while (--hp >= 0) {
1405: final int h = INCS[hp];
1406: final int mj = lo + h - 1;
1407:
1408: for (int i = lo + h; i <= hi;) {
1409: // copy
1410: for (int k = 3; (i <= hi) && (--k >= 0); i++) {
1411: final int v = fmap[i];
1412: final int vd = v + d;
1413: int j = i;
1414:
1415: // for (int a;
1416: // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd,
1417: // block, quadrant, lastShadow);
1418: // j -= h) {
1419: // fmap[j] = a;
1420: // }
1421: //
1422: // unrolled version:
1423:
1424: // start inline mainGTU
1425: boolean onceRunned = false;
1426: int a = 0;
1427:
1428: HAMMER: while (true) {
1429: if (onceRunned) {
1430: fmap[j] = a;
1431: if ((j -= h) <= mj) {
1432: break HAMMER;
1433: }
1434: } else {
1435: onceRunned = true;
1436: }
1437:
1438: a = fmap[j - h];
1439: int i1 = a + d;
1440: int i2 = vd;
1441:
1442: // following could be done in a loop, but
1443: // unrolled it for performance:
1444: if (block[i1 + 1] == block[i2 + 1]) {
1445: if (block[i1 + 2] == block[i2 + 2]) {
1446: if (block[i1 + 3] == block[i2 + 3]) {
1447: if (block[i1 + 4] == block[i2 + 4]) {
1448: if (block[i1 + 5] == block[i2 + 5]) {
1449: if (block[(i1 += 6)] == block[(i2 += 6)]) {
1450: int x = lastShadow;
1451: X: while (x > 0) {
1452: x -= 4;
1453:
1454: if (block[i1 + 1] == block[i2 + 1]) {
1455: if (quadrant[i1] == quadrant[i2]) {
1456: if (block[i1 + 2] == block[i2 + 2]) {
1457: if (quadrant[i1 + 1] == quadrant[i2 + 1]) {
1458: if (block[i1 + 3] == block[i2 + 3]) {
1459: if (quadrant[i1 + 2] == quadrant[i2 + 2]) {
1460: if (block[i1 + 4] == block[i2 + 4]) {
1461: if (quadrant[i1 + 3] == quadrant[i2 + 3]) {
1462: if ((i1 += 4) >= lastPlus1) {
1463: i1 -= lastPlus1;
1464: }
1465: if ((i2 += 4) >= lastPlus1) {
1466: i2 -= lastPlus1;
1467: }
1468: workDoneShadow++;
1469: continue X;
1470: } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) {
1471: continue HAMMER;
1472: } else {
1473: break HAMMER;
1474: }
1475: } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
1476: continue HAMMER;
1477: } else {
1478: break HAMMER;
1479: }
1480: } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) {
1481: continue HAMMER;
1482: } else {
1483: break HAMMER;
1484: }
1485: } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
1486: continue HAMMER;
1487: } else {
1488: break HAMMER;
1489: }
1490: } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) {
1491: continue HAMMER;
1492: } else {
1493: break HAMMER;
1494: }
1495: } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
1496: continue HAMMER;
1497: } else {
1498: break HAMMER;
1499: }
1500: } else if ((quadrant[i1] > quadrant[i2])) {
1501: continue HAMMER;
1502: } else {
1503: break HAMMER;
1504: }
1505: } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
1506: continue HAMMER;
1507: } else {
1508: break HAMMER;
1509: }
1510:
1511: }
1512: break HAMMER;
1513: } // while x > 0
1514: else {
1515: if ((block[i1] & 0xff) > (block[i2] & 0xff)) {
1516: continue HAMMER;
1517: } else {
1518: break HAMMER;
1519: }
1520: }
1521: } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) {
1522: continue HAMMER;
1523: } else {
1524: break HAMMER;
1525: }
1526: } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
1527: continue HAMMER;
1528: } else {
1529: break HAMMER;
1530: }
1531: } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
1532: continue HAMMER;
1533: } else {
1534: break HAMMER;
1535: }
1536: } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
1537: continue HAMMER;
1538: } else {
1539: break HAMMER;
1540: }
1541: } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
1542: continue HAMMER;
1543: } else {
1544: break HAMMER;
1545: }
1546:
1547: } // HAMMER
1548: // end inline mainGTU
1549:
1550: fmap[j] = v;
1551: }
1552:
1553: if (firstAttemptShadow && (i <= hi)
1554: && (workDoneShadow > workLimitShadow)) {
1555: break HP;
1556: }
1557: }
1558: }
1559:
1560: this .workDone = workDoneShadow;
1561: return firstAttemptShadow && (workDoneShadow > workLimitShadow);
1562: }
1563:
1564: private static void vswap(int[] fmap, int p1, int p2, int n) {
1565: n += p1;
1566: while (p1 < n) {
1567: int t = fmap[p1];
1568: fmap[p1++] = fmap[p2];
1569: fmap[p2++] = t;
1570: }
1571: }
1572:
1573: private static byte med3(byte a, byte b, byte c) {
1574: return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b
1575: : a > c ? c : a);
1576: }
1577:
1578: private void blockSort() {
1579: this .workLimit = WORK_FACTOR * this .last;
1580: this .workDone = 0;
1581: this .blockRandomised = false;
1582: this .firstAttempt = true;
1583: mainSort();
1584:
1585: if (this .firstAttempt && (this .workDone > this .workLimit)) {
1586: randomiseBlock();
1587: this .workLimit = this .workDone = 0;
1588: this .firstAttempt = false;
1589: mainSort();
1590: }
1591:
1592: int[] fmap = this .data.fmap;
1593: this .origPtr = -1;
1594: for (int i = 0, lastShadow = this .last; i <= lastShadow; i++) {
1595: if (fmap[i] == 0) {
1596: this .origPtr = i;
1597: break;
1598: }
1599: }
1600:
1601: // assert (this.origPtr != -1) : this.origPtr;
1602: }
1603:
1604: /**
1605: * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2
1606: */
1607: private void mainQSort3(final Data dataShadow, final int loSt,
1608: final int hiSt, final int dSt) {
1609: final int[] stack_ll = dataShadow.stack_ll;
1610: final int[] stack_hh = dataShadow.stack_hh;
1611: final int[] stack_dd = dataShadow.stack_dd;
1612: final int[] fmap = dataShadow.fmap;
1613: final byte[] block = dataShadow.block;
1614:
1615: stack_ll[0] = loSt;
1616: stack_hh[0] = hiSt;
1617: stack_dd[0] = dSt;
1618:
1619: for (int sp = 1; --sp >= 0;) {
1620: final int lo = stack_ll[sp];
1621: final int hi = stack_hh[sp];
1622: final int d = stack_dd[sp];
1623:
1624: if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) {
1625: if (mainSimpleSort(dataShadow, lo, hi, d)) {
1626: return;
1627: }
1628: } else {
1629: final int d1 = d + 1;
1630: final int med = med3(block[fmap[lo] + d1],
1631: block[fmap[hi] + d1],
1632: block[fmap[(lo + hi) >> 1] + d1]) & 0xff;
1633:
1634: int unLo = lo;
1635: int unHi = hi;
1636: int ltLo = lo;
1637: int gtHi = hi;
1638:
1639: while (true) {
1640: while (unLo <= unHi) {
1641: final int n = ((int) block[fmap[unLo] + d1] & 0xff)
1642: - med;
1643: if (n == 0) {
1644: final int temp = fmap[unLo];
1645: fmap[unLo++] = fmap[ltLo];
1646: fmap[ltLo++] = temp;
1647: } else if (n < 0) {
1648: unLo++;
1649: } else {
1650: break;
1651: }
1652: }
1653:
1654: while (unLo <= unHi) {
1655: final int n = ((int) block[fmap[unHi] + d1] & 0xff)
1656: - med;
1657: if (n == 0) {
1658: final int temp = fmap[unHi];
1659: fmap[unHi--] = fmap[gtHi];
1660: fmap[gtHi--] = temp;
1661: } else if (n > 0) {
1662: unHi--;
1663: } else {
1664: break;
1665: }
1666: }
1667:
1668: if (unLo <= unHi) {
1669: final int temp = fmap[unLo];
1670: fmap[unLo++] = fmap[unHi];
1671: fmap[unHi--] = temp;
1672: } else {
1673: break;
1674: }
1675: }
1676:
1677: if (gtHi < ltLo) {
1678: stack_ll[sp] = lo;
1679: stack_hh[sp] = hi;
1680: stack_dd[sp] = d1;
1681: sp++;
1682: } else {
1683: int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo)
1684: : (unLo - ltLo);
1685: vswap(fmap, lo, unLo - n, n);
1686: int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi)
1687: : (gtHi - unHi);
1688: vswap(fmap, unLo, hi - m + 1, m);
1689:
1690: n = lo + unLo - ltLo - 1;
1691: m = hi - (gtHi - unHi) + 1;
1692:
1693: stack_ll[sp] = lo;
1694: stack_hh[sp] = n;
1695: stack_dd[sp] = d;
1696: sp++;
1697:
1698: stack_ll[sp] = n + 1;
1699: stack_hh[sp] = m - 1;
1700: stack_dd[sp] = d1;
1701: sp++;
1702:
1703: stack_ll[sp] = m;
1704: stack_hh[sp] = hi;
1705: stack_dd[sp] = d;
1706: sp++;
1707: }
1708: }
1709: }
1710: }
1711:
1712: private void mainSort() {
1713: final Data dataShadow = this .data;
1714: final int[] runningOrder = dataShadow.mainSort_runningOrder;
1715: final int[] copy = dataShadow.mainSort_copy;
1716: final boolean[] bigDone = dataShadow.mainSort_bigDone;
1717: final int[] ftab = dataShadow.ftab;
1718: final byte[] block = dataShadow.block;
1719: final int[] fmap = dataShadow.fmap;
1720: final char[] quadrant = dataShadow.quadrant;
1721: final int lastShadow = this .last;
1722: final int workLimitShadow = this .workLimit;
1723: final boolean firstAttemptShadow = this .firstAttempt;
1724:
1725: // Set up the 2-byte frequency table
1726: for (int i = 65537; --i >= 0;) {
1727: ftab[i] = 0;
1728: }
1729:
1730: /*
1731: In the various block-sized structures, live data runs
1732: from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
1733: set up the overshoot area for block.
1734: */
1735: for (int i = 0; i < NUM_OVERSHOOT_BYTES; i++) {
1736: block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1];
1737: }
1738: for (int i = lastShadow + NUM_OVERSHOOT_BYTES; --i >= 0;) {
1739: quadrant[i] = 0;
1740: }
1741: block[0] = block[lastShadow + 1];
1742:
1743: // Complete the initial radix sort:
1744:
1745: int c1 = block[0] & 0xff;
1746: for (int i = 0; i <= lastShadow; i++) {
1747: final int c2 = block[i + 1] & 0xff;
1748: ftab[(c1 << 8) + c2]++;
1749: c1 = c2;
1750: }
1751:
1752: for (int i = 1; i <= 65536; i++)
1753: ftab[i] += ftab[i - 1];
1754:
1755: c1 = block[1] & 0xff;
1756: for (int i = 0; i < lastShadow; i++) {
1757: final int c2 = block[i + 2] & 0xff;
1758: fmap[--ftab[(c1 << 8) + c2]] = i;
1759: c1 = c2;
1760: }
1761:
1762: fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8)
1763: + (block[1] & 0xff)]] = lastShadow;
1764:
1765: /*
1766: Now ftab contains the first loc of every small bucket.
1767: Calculate the running order, from smallest to largest
1768: big bucket.
1769: */
1770: for (int i = 256; --i >= 0;) {
1771: bigDone[i] = false;
1772: runningOrder[i] = i;
1773: }
1774:
1775: for (int h = 364; h != 1;) {
1776: h /= 3;
1777: for (int i = h; i <= 255; i++) {
1778: final int vv = runningOrder[i];
1779: final int a = ftab[(vv + 1) << 8] - ftab[vv << 8];
1780: final int b = h - 1;
1781: int j = i;
1782: for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j
1783: - h]) {
1784: runningOrder[j] = ro;
1785: j -= h;
1786: if (j <= b) {
1787: break;
1788: }
1789: }
1790: runningOrder[j] = vv;
1791: }
1792: }
1793:
1794: /*
1795: The main sorting loop.
1796: */
1797: for (int i = 0; i <= 255; i++) {
1798: /*
1799: Process big buckets, starting with the least full.
1800: */
1801: final int ss = runningOrder[i];
1802:
1803: // Step 1:
1804: /*
1805: Complete the big bucket [ss] by quicksorting
1806: any unsorted small buckets [ss, j]. Hopefully
1807: previous pointer-scanning phases have already
1808: completed many of the small buckets [ss, j], so
1809: we don't have to sort them at all.
1810: */
1811: for (int j = 0; j <= 255; j++) {
1812: final int sb = (ss << 8) + j;
1813: final int ftab_sb = ftab[sb];
1814: if ((ftab_sb & SETMASK) != SETMASK) {
1815: final int lo = ftab_sb & CLEARMASK;
1816: final int hi = (ftab[sb + 1] & CLEARMASK) - 1;
1817: if (hi > lo) {
1818: mainQSort3(dataShadow, lo, hi, 2);
1819: if (firstAttemptShadow
1820: && (this .workDone > workLimitShadow)) {
1821: return;
1822: }
1823: }
1824: ftab[sb] = ftab_sb | SETMASK;
1825: }
1826: }
1827:
1828: // Step 2:
1829: // Now scan this big bucket so as to synthesise the
1830: // sorted order for small buckets [t, ss] for all t != ss.
1831:
1832: for (int j = 0; j <= 255; j++) {
1833: copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
1834: }
1835:
1836: for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) {
1837: final int fmap_j = fmap[j];
1838: c1 = block[fmap_j] & 0xff;
1839: if (!bigDone[c1]) {
1840: fmap[copy[c1]] = (fmap_j == 0) ? lastShadow
1841: : (fmap_j - 1);
1842: copy[c1]++;
1843: }
1844: }
1845:
1846: for (int j = 256; --j >= 0;)
1847: ftab[(j << 8) + ss] |= SETMASK;
1848:
1849: // Step 3:
1850: /*
1851: The ss big bucket is now done. Record this fact,
1852: and update the quadrant descriptors. Remember to
1853: update quadrants in the overshoot area too, if
1854: necessary. The "if (i < 255)" test merely skips
1855: this updating for the last bucket processed, since
1856: updating for the last bucket is pointless.
1857: */
1858: bigDone[ss] = true;
1859:
1860: if (i < 255) {
1861: final int bbStart = ftab[ss << 8] & CLEARMASK;
1862: final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK)
1863: - bbStart;
1864: int shifts = 0;
1865:
1866: while ((bbSize >> shifts) > 65534) {
1867: shifts++;
1868: }
1869:
1870: for (int j = 0; j < bbSize; j++) {
1871: final int a2update = fmap[bbStart + j];
1872: final char qVal = (char) (j >> shifts);
1873: quadrant[a2update] = qVal;
1874: if (a2update < NUM_OVERSHOOT_BYTES) {
1875: quadrant[a2update + lastShadow + 1] = qVal;
1876: }
1877: }
1878: }
1879:
1880: }
1881: }
1882:
1883: private void randomiseBlock() {
1884: final boolean[] inUse = this .data.inUse;
1885: final byte[] block = this .data.block;
1886: final int lastShadow = this .last;
1887:
1888: for (int i = 256; --i >= 0;)
1889: inUse[i] = false;
1890:
1891: int rNToGo = 0;
1892: int rTPos = 0;
1893: for (int i = 0, j = 1; i <= lastShadow; i = j, j++) {
1894: if (rNToGo == 0) {
1895: rNToGo = (char) BZip2Constants.rNums[rTPos];
1896: if (++rTPos == 512) {
1897: rTPos = 0;
1898: }
1899: }
1900:
1901: rNToGo--;
1902: block[j] ^= ((rNToGo == 1) ? 1 : 0);
1903:
1904: // handle 16 bit signed numbers
1905: inUse[block[j] & 0xff] = true;
1906: }
1907:
1908: this .blockRandomised = true;
1909: }
1910:
1911: private void generateMTFValues() {
1912: final int lastShadow = this .last;
1913: final Data dataShadow = this .data;
1914: final boolean[] inUse = dataShadow.inUse;
1915: final byte[] block = dataShadow.block;
1916: final int[] fmap = dataShadow.fmap;
1917: final char[] sfmap = dataShadow.sfmap;
1918: final int[] mtfFreq = dataShadow.mtfFreq;
1919: final byte[] unseqToSeq = dataShadow.unseqToSeq;
1920: final byte[] yy = dataShadow.generateMTFValues_yy;
1921:
1922: // make maps
1923: int nInUseShadow = 0;
1924: for (int i = 0; i < 256; i++) {
1925: if (inUse[i]) {
1926: unseqToSeq[i] = (byte) nInUseShadow;
1927: nInUseShadow++;
1928: }
1929: }
1930: this .nInUse = nInUseShadow;
1931:
1932: final int eob = nInUseShadow + 1;
1933:
1934: for (int i = eob; i >= 0; i--) {
1935: mtfFreq[i] = 0;
1936: }
1937:
1938: for (int i = nInUseShadow; --i >= 0;) {
1939: yy[i] = (byte) i;
1940: }
1941:
1942: int wr = 0;
1943: int zPend = 0;
1944:
1945: for (int i = 0; i <= lastShadow; i++) {
1946: final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
1947: byte tmp = yy[0];
1948: int j = 0;
1949:
1950: while (ll_i != tmp) {
1951: j++;
1952: byte tmp2 = tmp;
1953: tmp = yy[j];
1954: yy[j] = tmp2;
1955: }
1956: yy[0] = tmp;
1957:
1958: if (j == 0) {
1959: zPend++;
1960: } else {
1961: if (zPend > 0) {
1962: zPend--;
1963: while (true) {
1964: if ((zPend & 1) == 0) {
1965: sfmap[wr] = RUNA;
1966: wr++;
1967: mtfFreq[RUNA]++;
1968: } else {
1969: sfmap[wr] = RUNB;
1970: wr++;
1971: mtfFreq[RUNB]++;
1972: }
1973:
1974: if (zPend >= 2) {
1975: zPend = (zPend - 2) >> 1;
1976: } else {
1977: break;
1978: }
1979: }
1980: zPend = 0;
1981: }
1982: sfmap[wr] = (char) (j + 1);
1983: wr++;
1984: mtfFreq[j + 1]++;
1985: }
1986: }
1987:
1988: if (zPend > 0) {
1989: zPend--;
1990: while (true) {
1991: if ((zPend & 1) == 0) {
1992: sfmap[wr] = RUNA;
1993: wr++;
1994: mtfFreq[RUNA]++;
1995: } else {
1996: sfmap[wr] = RUNB;
1997: wr++;
1998: mtfFreq[RUNB]++;
1999: }
2000:
2001: if (zPend >= 2) {
2002: zPend = (zPend - 2) >> 1;
2003: } else {
2004: break;
2005: }
2006: }
2007: }
2008:
2009: sfmap[wr] = (char) eob;
2010: mtfFreq[eob]++;
2011: this .nMTF = wr + 1;
2012: }
2013:
2014: private static final class Data extends Object {
2015:
2016: // with blockSize 900k
2017: final boolean[] inUse = new boolean[256]; // 256 byte
2018: final byte[] unseqToSeq = new byte[256]; // 256 byte
2019: final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
2020: final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
2021: final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
2022:
2023: final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
2024: final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 byte
2025: final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
2026: final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
2027: final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
2028: final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
2029: final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
2030: final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
2031:
2032: final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte
2033: final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte
2034: final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte
2035:
2036: final int[] mainSort_runningOrder = new int[256]; // 1024 byte
2037: final int[] mainSort_copy = new int[256]; // 1024 byte
2038: final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte
2039:
2040: final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
2041: final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
2042: final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
2043:
2044: final int[] ftab = new int[65537]; // 262148 byte
2045: // ------------
2046: // 333408 byte
2047:
2048: final byte[] block; // 900021 byte
2049: final int[] fmap; // 3600000 byte
2050: final char[] sfmap; // 3600000 byte
2051: // ------------
2052: // 8433529 byte
2053: // ============
2054:
2055: /**
2056: * Array instance identical to sfmap, both are used only temporarily and indepently,
2057: * so we do not need to allocate additional memory.
2058: */
2059: final char[] quadrant;
2060:
2061: Data(int blockSize100k) {
2062: super ();
2063:
2064: final int n = blockSize100k * BZip2Constants.baseBlockSize;
2065: this .block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
2066: this .fmap = new int[n];
2067: this .sfmap = new char[2 * n];
2068: this.quadrant = this.sfmap;
2069: }
2070:
2071: }
2072:
2073: }
|