0001: /*
0002: * Copyright 2002-2005 Sun Microsystems, Inc. All Rights Reserved.
0003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0004: *
0005: * This code is free software; you can redistribute it and/or modify it
0006: * under the terms of the GNU General Public License version 2 only, as
0007: * published by the Free Software Foundation. Sun designates this
0008: * particular file as subject to the "Classpath" exception as provided
0009: * by Sun in the LICENSE file that accompanied this code.
0010: *
0011: * This code is distributed in the hope that it will be useful, but WITHOUT
0012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0014: * version 2 for more details (a copy is included in the LICENSE file that
0015: * accompanied this code).
0016: *
0017: * You should have received a copy of the GNU General Public License version
0018: * 2 along with this work; if not, write to the Free Software Foundation,
0019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0020: *
0021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0022: * CA 95054 USA or visit www.sun.com if you need additional information or
0023: * have any questions.
0024: */
0025:
0026: package com.sun.java.util.jar.pack;
0027:
0028: import java.io.*;
0029: import java.util.*;
0030: import java.util.zip.*;
0031:
0032: /**
0033: * Heuristic chooser of basic encodings.
0034: * Runs "zip" to measure the apparent information content after coding.
0035: * @author John Rose
0036: * @version 1.22, 05/05/07
0037: */
0038: class CodingChooser implements Constants {
0039: int verbose;
0040: int effort;
0041: boolean optUseHistogram = true;
0042: boolean optUsePopulationCoding = true;
0043: boolean optUseAdaptiveCoding = true;
0044: boolean disablePopCoding;
0045: boolean disableRunCoding;
0046: boolean topLevel = true;
0047:
0048: // Derived from effort; >1 (<1) means try more (less) experiments
0049: // when looking to beat a best score.
0050: double fuzz;
0051:
0052: Coding[] allCodingChoices;
0053: Choice[] choices;
0054: ByteArrayOutputStream context;
0055: CodingChooser popHelper;
0056: CodingChooser runHelper;
0057:
0058: Random stress; // If not null, stress mode oracle.
0059:
0060: // Element in sorted set of coding choices:
0061: static class Choice {
0062: final Coding coding;
0063: final int index; // index in choices
0064: final int[] distance; // cache of distance
0065:
0066: Choice(Coding coding, int index, int[] distance) {
0067: this .coding = coding;
0068: this .index = index;
0069: this .distance = distance;
0070: }
0071:
0072: // These variables are reset and reused:
0073: int searchOrder; // order in which it is checked
0074: int minDistance; // min distance from already-checked choices
0075: int zipSize; // size of encoding in sample, zipped output
0076: int byteSize; // size of encoding in sample (debug only)
0077: int histSize; // size of encoding, according to histogram
0078:
0079: void reset() {
0080: searchOrder = Integer.MAX_VALUE;
0081: minDistance = Integer.MAX_VALUE;
0082: zipSize = byteSize = histSize = -1;
0083: }
0084:
0085: boolean isExtra() {
0086: return index < 0;
0087: }
0088:
0089: public String toString() {
0090: return stringForDebug();
0091: }
0092:
0093: private String stringForDebug() {
0094: String s = "";
0095: if (searchOrder < Integer.MAX_VALUE)
0096: s += " so: " + searchOrder;
0097: if (minDistance < Integer.MAX_VALUE)
0098: s += " md: " + minDistance;
0099: if (zipSize > 0)
0100: s += " zs: " + zipSize;
0101: if (byteSize > 0)
0102: s += " bs: " + byteSize;
0103: if (histSize > 0)
0104: s += " hs: " + histSize;
0105: return "Choice[" + index + "] " + s + " " + coding;
0106: }
0107: }
0108:
0109: CodingChooser(int effort, Coding[] allCodingChoices) {
0110: PropMap p200 = Utils.currentPropMap();
0111: if (p200 != null) {
0112: this .verbose = Math.max(p200
0113: .getInteger(Utils.DEBUG_VERBOSE), p200
0114: .getInteger(Utils.COM_PREFIX + "verbose.coding"));
0115: this .optUseHistogram = !p200.getBoolean(Utils.COM_PREFIX
0116: + "no.histogram");
0117: this .optUsePopulationCoding = !p200
0118: .getBoolean(Utils.COM_PREFIX
0119: + "no.population.coding");
0120: this .optUseAdaptiveCoding = !p200
0121: .getBoolean(Utils.COM_PREFIX + "no.adaptive.coding");
0122: int stress = p200.getInteger(Utils.COM_PREFIX
0123: + "stress.coding");
0124: if (stress != 0)
0125: this .stress = new Random(stress);
0126: }
0127:
0128: this .effort = effort;
0129: // The following line "makes sense" but is too much
0130: // work for a simple heuristic.
0131: //if (effort > 5) zipDef.setLevel(effort);
0132:
0133: this .allCodingChoices = allCodingChoices;
0134:
0135: // If effort = 9, look carefully at any solution
0136: // whose initial metrics are within 1% of the best
0137: // so far. If effort = 1, look carefully only at
0138: // solutions whose initial metrics promise a 1% win.
0139: this .fuzz = 1 + (0.0025 * (effort - MID_EFFORT));
0140:
0141: int nc = 0;
0142: for (int i = 0; i < allCodingChoices.length; i++) {
0143: if (allCodingChoices[i] == null)
0144: continue;
0145: nc++;
0146: }
0147: choices = new Choice[nc];
0148: nc = 0;
0149: for (int i = 0; i < allCodingChoices.length; i++) {
0150: if (allCodingChoices[i] == null)
0151: continue;
0152: int[] distance = new int[choices.length];
0153: choices[nc++] = new Choice(allCodingChoices[i], i, distance);
0154: }
0155: for (int i = 0; i < choices.length; i++) {
0156: Coding ci = choices[i].coding;
0157: assert (ci.distanceFrom(ci) == 0);
0158: for (int j = 0; j < i; j++) {
0159: Coding cj = choices[j].coding;
0160: int dij = ci.distanceFrom(cj);
0161: assert (dij > 0);
0162: assert (dij == cj.distanceFrom(ci));
0163: choices[i].distance[j] = dij;
0164: choices[j].distance[i] = dij;
0165: }
0166: }
0167: }
0168:
0169: Choice makeExtraChoice(Coding coding) {
0170: int[] distance = new int[choices.length];
0171: for (int i = 0; i < distance.length; i++) {
0172: Coding ci = choices[i].coding;
0173: int dij = coding.distanceFrom(ci);
0174: assert (dij > 0);
0175: assert (dij == ci.distanceFrom(coding));
0176: distance[i] = dij;
0177: }
0178: Choice c = new Choice(coding, -1, distance);
0179: c.reset();
0180: return c;
0181: }
0182:
0183: ByteArrayOutputStream getContext() {
0184: if (context == null)
0185: context = new ByteArrayOutputStream(1 << 16);
0186: return context;
0187: }
0188:
0189: // These variables are reset and reused:
0190: private int[] values;
0191: private int start, end; // slice of values
0192: private int[] deltas;
0193: private int min, max;
0194: private Histogram vHist;
0195: private Histogram dHist;
0196: private int searchOrder;
0197: private Choice regularChoice;
0198: private Choice bestChoice;
0199: private CodingMethod bestMethod;
0200: private int bestByteSize;
0201: private int bestZipSize;
0202: private int targetSize; // fuzzed target byte size
0203:
0204: private void reset(int[] values, int start, int end) {
0205: this .values = values;
0206: this .start = start;
0207: this .end = end;
0208: this .deltas = null;
0209: this .min = Integer.MAX_VALUE;
0210: this .max = Integer.MIN_VALUE;
0211: this .vHist = null;
0212: this .dHist = null;
0213: this .searchOrder = 0;
0214: this .regularChoice = null;
0215: this .bestChoice = null;
0216: this .bestMethod = null;
0217: this .bestZipSize = Integer.MAX_VALUE;
0218: this .bestByteSize = Integer.MAX_VALUE;
0219: this .targetSize = Integer.MAX_VALUE;
0220: }
0221:
0222: public static final int MIN_EFFORT = 1;
0223: public static final int MID_EFFORT = 5;
0224: public static final int MAX_EFFORT = 9;
0225:
0226: public static final int POP_EFFORT = MID_EFFORT - 1;
0227: public static final int RUN_EFFORT = MID_EFFORT - 2;
0228:
0229: public static final int BYTE_SIZE = 0;
0230: public static final int ZIP_SIZE = 1;
0231:
0232: CodingMethod choose(int[] values, int start, int end,
0233: Coding regular, int[] sizes) {
0234: // Save the value array
0235: reset(values, start, end);
0236:
0237: if (effort <= MIN_EFFORT || start >= end) {
0238: if (sizes != null) {
0239: int[] computed = computeSizePrivate(regular);
0240: sizes[BYTE_SIZE] = computed[BYTE_SIZE];
0241: sizes[ZIP_SIZE] = computed[ZIP_SIZE];
0242: }
0243: return regular;
0244: }
0245:
0246: if (optUseHistogram) {
0247: getValueHistogram();
0248: getDeltaHistogram();
0249: }
0250:
0251: for (int i = start; i < end; i++) {
0252: int val = values[i];
0253: if (min > val)
0254: min = val;
0255: if (max < val)
0256: max = val;
0257: }
0258:
0259: // Find all the preset choices that might be worth looking at:
0260: int numChoices = markUsableChoices(regular);
0261:
0262: if (stress != null) {
0263: // Make a random choice.
0264: int rand = stress.nextInt(numChoices * 2 + 4);
0265: CodingMethod coding = null;
0266: for (int i = 0; i < choices.length; i++) {
0267: Choice c = choices[i];
0268: if (c.searchOrder >= 0 && rand-- == 0) {
0269: coding = c.coding;
0270: break;
0271: }
0272: }
0273: if (coding == null) {
0274: if ((rand & 7) != 0) {
0275: coding = regular;
0276: } else {
0277: // Pick a totally random coding 6% of the time.
0278: coding = stressCoding(min, max);
0279: }
0280: }
0281: if (!disablePopCoding && optUsePopulationCoding
0282: && effort >= POP_EFFORT) {
0283: coding = stressPopCoding(coding);
0284: }
0285: if (!disableRunCoding && optUseAdaptiveCoding
0286: && effort >= RUN_EFFORT) {
0287: coding = stressAdaptiveCoding(coding);
0288: }
0289: return coding;
0290: }
0291:
0292: double searchScale = 1.0;
0293: for (int x = effort; x < MAX_EFFORT; x++) {
0294: searchScale /= 1.414; // every 2 effort points doubles work
0295: }
0296: int searchOrderLimit = (int) Math
0297: .ceil(numChoices * searchScale);
0298:
0299: // Start by evaluating the "regular" choice.
0300: bestChoice = regularChoice;
0301: evaluate(regularChoice);
0302: int maxd = updateDistances(regularChoice);
0303:
0304: // save these first-cut numbers for later
0305: int zipSize1 = bestZipSize;
0306: int byteSize1 = bestByteSize;
0307:
0308: if (regularChoice.coding == regular && topLevel) {
0309: // Give credit for being the default; no band header is needed.
0310: // Rather than increasing every other size value by the band
0311: // header amount, we decrement this one metric, to give it an edge.
0312: // Decreasing zipSize by a byte length is conservatively correct,
0313: // especially considering that the escape byte is not likely to
0314: // zip well with other bytes in the band.
0315: int X = BandStructure.encodeEscapeValue(_meta_canon_max,
0316: regular);
0317: if (regular.canRepresentSigned(X)) {
0318: int Xlen = regular.getLength(X); // band coding header
0319: //regularChoice.histSize -= Xlen; // keep exact byteSize
0320: //regularChoice.byteSize -= Xlen; // keep exact byteSize
0321: regularChoice.zipSize -= Xlen;
0322: bestByteSize = regularChoice.byteSize;
0323: bestZipSize = regularChoice.zipSize;
0324: }
0325: }
0326:
0327: int dscale = 1;
0328: // Continually select a new choice to evaluate.
0329: while (searchOrder < searchOrderLimit) {
0330: Choice nextChoice;
0331: if (dscale > maxd)
0332: dscale = 1; // cycle dscale values!
0333: int dhi = maxd / dscale;
0334: int dlo = maxd / (dscale *= 2) + 1;
0335: nextChoice = findChoiceNear(bestChoice, dhi, dlo);
0336: if (nextChoice == null)
0337: continue;
0338: assert (nextChoice.coding.canRepresent(min, max));
0339: evaluate(nextChoice);
0340: int nextMaxd = updateDistances(nextChoice);
0341: if (nextChoice == bestChoice) {
0342: maxd = nextMaxd;
0343: if (verbose > 5)
0344: Utils.log.info("maxd = " + maxd);
0345: }
0346: }
0347:
0348: // Record best "plain coding" choice.
0349: Coding plainBest = bestChoice.coding;
0350: assert (plainBest == bestMethod);
0351:
0352: if (verbose > 2) {
0353: Utils.log.info("chooser: plain result=" + bestChoice
0354: + " after " + bestChoice.searchOrder + " rounds, "
0355: + (regularChoice.zipSize - bestZipSize)
0356: + " fewer bytes than regular " + regular);
0357: }
0358: bestChoice = null;
0359:
0360: if (!disablePopCoding && optUsePopulationCoding
0361: && effort >= POP_EFFORT && bestMethod instanceof Coding) {
0362: tryPopulationCoding(plainBest);
0363: }
0364:
0365: if (!disableRunCoding && optUseAdaptiveCoding
0366: && effort >= RUN_EFFORT && bestMethod instanceof Coding) {
0367: tryAdaptiveCoding(plainBest);
0368: }
0369:
0370: // Pass back the requested information:
0371: if (sizes != null) {
0372: sizes[BYTE_SIZE] = bestByteSize;
0373: sizes[ZIP_SIZE] = bestZipSize;
0374: }
0375: if (verbose > 1) {
0376: Utils.log.info("chooser: result=" + bestMethod + " "
0377: + (zipSize1 - bestZipSize)
0378: + " fewer bytes than regular " + regular + "; win="
0379: + pct(zipSize1 - bestZipSize, zipSize1));
0380: }
0381: CodingMethod bestMethod = this .bestMethod;
0382: reset(null, 0, 0); // for GC
0383: return bestMethod;
0384: }
0385:
0386: CodingMethod choose(int[] values, int start, int end, Coding regular) {
0387: return choose(values, start, end, regular, null);
0388: }
0389:
0390: CodingMethod choose(int[] values, Coding regular, int[] sizes) {
0391: return choose(values, 0, values.length, regular, sizes);
0392: }
0393:
0394: CodingMethod choose(int[] values, Coding regular) {
0395: return choose(values, 0, values.length, regular, null);
0396: }
0397:
0398: private int markUsableChoices(Coding regular) {
0399: int numChoices = 0;
0400: for (int i = 0; i < choices.length; i++) {
0401: Choice c = choices[i];
0402: c.reset();
0403: if (!c.coding.canRepresent(min, max)) {
0404: // Mark as already visited:
0405: c.searchOrder = -1;
0406: if (verbose > 1 && c.coding == regular) {
0407: Utils.log.info("regular coding cannot represent ["
0408: + min + ".." + max + "]: " + regular);
0409: }
0410: continue;
0411: }
0412: if (c.coding == regular)
0413: regularChoice = c;
0414: numChoices++;
0415: }
0416: if (regularChoice == null && regular.canRepresent(min, max)) {
0417: regularChoice = makeExtraChoice(regular);
0418: if (verbose > 1) {
0419: Utils.log.info("*** regular choice is extra: "
0420: + regularChoice.coding);
0421: }
0422: }
0423: if (regularChoice == null) {
0424: for (int i = 0; i < choices.length; i++) {
0425: Choice c = choices[i];
0426: if (c.searchOrder != -1) {
0427: regularChoice = c; // arbitrary pick
0428: break;
0429: }
0430: }
0431: if (verbose > 1) {
0432: Utils.log.info("*** regular choice does not apply "
0433: + regular);
0434: Utils.log.info(" using instead "
0435: + regularChoice.coding);
0436: }
0437: }
0438: if (verbose > 2) {
0439: Utils.log.info("chooser: #choices=" + numChoices + " ["
0440: + min + ".." + max + "]");
0441: if (verbose > 4) {
0442: for (int i = 0; i < choices.length; i++) {
0443: Choice c = choices[i];
0444: if (c.searchOrder >= 0)
0445: Utils.log.info(" " + c);
0446: }
0447: }
0448: }
0449: return numChoices;
0450: }
0451:
0452: // Find an arbitrary choice at least dlo away from a previously
0453: // evaluated choices, and at most dhi. Try also to regulate its
0454: // min distance to all previously evaluated choices, in this range.
0455: private Choice findChoiceNear(Choice near, int dhi, int dlo) {
0456: if (verbose > 5)
0457: Utils.log.info("findChoice " + dhi + ".." + dlo + " near: "
0458: + near);
0459: int[] distance = near.distance;
0460: Choice found = null;
0461: for (int i = 0; i < choices.length; i++) {
0462: Choice c = choices[i];
0463: if (c.searchOrder < searchOrder)
0464: continue; // already searched
0465: // Distance from "near" guy must be in bounds:
0466: if (distance[i] >= dlo && distance[i] <= dhi) {
0467: // Try also to keep min-distance from other guys in bounds:
0468: if (c.minDistance >= dlo && c.minDistance <= dhi) {
0469: if (verbose > 5)
0470: Utils.log.info("findChoice => good " + c);
0471: return c;
0472: }
0473: found = c;
0474: }
0475: }
0476: if (verbose > 5)
0477: Utils.log.info("findChoice => found " + found);
0478: return found;
0479: }
0480:
0481: private void evaluate(Choice c) {
0482: assert (c.searchOrder == Integer.MAX_VALUE);
0483: c.searchOrder = searchOrder++;
0484: boolean mustComputeSize;
0485: if (c == bestChoice || c.isExtra()) {
0486: mustComputeSize = true;
0487: } else if (optUseHistogram) {
0488: Histogram hist = getHistogram(c.coding.isDelta());
0489: c.histSize = (int) Math
0490: .ceil(hist.getBitLength(c.coding) / 8);
0491: c.byteSize = c.histSize;
0492: mustComputeSize = (c.byteSize <= targetSize);
0493: } else {
0494: mustComputeSize = true;
0495: }
0496: if (mustComputeSize) {
0497: int[] sizes = computeSizePrivate(c.coding);
0498: c.byteSize = sizes[BYTE_SIZE];
0499: c.zipSize = sizes[ZIP_SIZE];
0500: if (noteSizes(c.coding, c.byteSize, c.zipSize))
0501: bestChoice = c;
0502: }
0503: if (c.histSize >= 0) {
0504: assert (c.byteSize == c.histSize); // models should agree
0505: }
0506: if (verbose > 4) {
0507: Utils.log.info("evaluated " + c);
0508: }
0509: }
0510:
0511: private boolean noteSizes(CodingMethod c, int byteSize, int zipSize) {
0512: assert (zipSize > 0 && byteSize > 0);
0513: boolean better = (zipSize < bestZipSize);
0514: if (verbose > 3)
0515: Utils.log
0516: .info("computed size "
0517: + c
0518: + " "
0519: + byteSize
0520: + "/zs="
0521: + zipSize
0522: + ((better && bestMethod != null) ? (" better by " + pct(
0523: bestZipSize - zipSize, zipSize))
0524: : ""));
0525: if (better) {
0526: bestMethod = c;
0527: bestZipSize = zipSize;
0528: bestByteSize = byteSize;
0529: targetSize = (int) (byteSize * fuzz);
0530: return true;
0531: } else {
0532: return false;
0533: }
0534: }
0535:
0536: private int updateDistances(Choice c) {
0537: // update all minDistance values in still unevaluated choices
0538: int[] distance = c.distance;
0539: int maxd = 0; // how far is c from everybody else?
0540: for (int i = 0; i < choices.length; i++) {
0541: Choice c2 = choices[i];
0542: if (c2.searchOrder < searchOrder)
0543: continue;
0544: int d = distance[i];
0545: if (verbose > 5)
0546: Utils.log.info("evaluate dist " + d + " to " + c2);
0547: int mind = c2.minDistance;
0548: if (mind > d)
0549: c2.minDistance = mind = d;
0550: if (maxd < d)
0551: maxd = d;
0552: }
0553: // Now maxd has the distance of the farthest outlier
0554: // from all evaluated choices.
0555: if (verbose > 5)
0556: Utils.log.info("evaluate maxd => " + maxd);
0557: return maxd;
0558: }
0559:
0560: // Compute the coded size of a sequence of values.
0561: // The first int is the size in uncompressed bytes.
0562: // The second is an estimate of the compressed size of these bytes.
0563: public void computeSize(CodingMethod c, int[] values, int start,
0564: int end, int[] sizes) {
0565: if (end <= start) {
0566: sizes[BYTE_SIZE] = sizes[ZIP_SIZE] = 0;
0567: return;
0568: }
0569: try {
0570: resetData();
0571: c.writeArrayTo(byteSizer, values, start, end);
0572: sizes[BYTE_SIZE] = getByteSize();
0573: sizes[ZIP_SIZE] = getZipSize();
0574: } catch (IOException ee) {
0575: throw new RuntimeException(ee); // cannot happen
0576: }
0577: }
0578:
0579: public void computeSize(CodingMethod c, int[] values, int[] sizes) {
0580: computeSize(c, values, 0, values.length, sizes);
0581: }
0582:
0583: public int[] computeSize(CodingMethod c, int[] values, int start,
0584: int end) {
0585: int[] sizes = { 0, 0 };
0586: computeSize(c, values, start, end, sizes);
0587: return sizes;
0588: }
0589:
0590: public int[] computeSize(CodingMethod c, int[] values) {
0591: return computeSize(c, values, 0, values.length);
0592: }
0593:
0594: // This version uses the implicit local arguments
0595: private int[] computeSizePrivate(CodingMethod c) {
0596: int[] sizes = { 0, 0 };
0597: computeSize(c, values, start, end, sizes);
0598: return sizes;
0599: }
0600:
0601: public int computeByteSize(CodingMethod cm, int[] values,
0602: int start, int end) {
0603: int len = end - start;
0604: if (len < 0) {
0605: return 0;
0606: }
0607: if (cm instanceof Coding) {
0608: Coding c = (Coding) cm;
0609: int size = c.getLength(values, start, end);
0610: int size2;
0611: assert (size == (size2 = countBytesToSizer(cm, values,
0612: start, end))) : (cm + " : " + size + " != " + size2);
0613: return size;
0614: }
0615: return countBytesToSizer(cm, values, start, end);
0616: }
0617:
0618: private int countBytesToSizer(CodingMethod cm, int[] values,
0619: int start, int end) {
0620: try {
0621: byteOnlySizer.reset();
0622: cm.writeArrayTo(byteOnlySizer, values, start, end);
0623: return byteOnlySizer.getSize();
0624: } catch (IOException ee) {
0625: throw new RuntimeException(ee); // cannot happen
0626: }
0627: }
0628:
0629: int[] getDeltas(int min, int max) {
0630: if ((min | max) != 0)
0631: return Coding.makeDeltas(values, start, end, min, max);
0632: if (deltas == null) {
0633: deltas = Coding.makeDeltas(values, start, end, 0, 0);
0634: }
0635: return deltas;
0636: }
0637:
0638: Histogram getValueHistogram() {
0639: if (vHist == null) {
0640: vHist = new Histogram(values, start, end);
0641: if (verbose > 3) {
0642: vHist.print("vHist", System.out);
0643: } else if (verbose > 1) {
0644: vHist.print("vHist", null, System.out);
0645: }
0646: }
0647: return vHist;
0648: }
0649:
0650: Histogram getDeltaHistogram() {
0651: if (dHist == null) {
0652: dHist = new Histogram(getDeltas(0, 0));
0653: if (verbose > 3) {
0654: dHist.print("dHist", System.out);
0655: } else if (verbose > 1) {
0656: dHist.print("dHist", null, System.out);
0657: }
0658: }
0659: return dHist;
0660: }
0661:
0662: Histogram getHistogram(boolean isDelta) {
0663: return isDelta ? getDeltaHistogram() : getValueHistogram();
0664: }
0665:
0666: private void tryPopulationCoding(Coding plainCoding) {
0667: // assert(plainCoding.canRepresent(min, max));
0668: Histogram hist = getValueHistogram();
0669: // Start with "reasonable" default codings.
0670: final int approxL = 64;
0671: Coding favoredCoding = plainCoding.getValueCoding();
0672: Coding tokenCoding = BandStructure.UNSIGNED5.setL(approxL);
0673: Coding unfavoredCoding = plainCoding.getValueCoding();
0674: // There's going to be a band header. Estimate conservatively large.
0675: final int BAND_HEADER = 4;
0676: // Keep a running model of the predicted sizes of the F/T/U sequences.
0677: int currentFSize;
0678: int currentTSize;
0679: int currentUSize;
0680: // Start by assuming a degenerate favored-value length of 0,
0681: // which looks like a bunch of zero tokens followed by the
0682: // original sequence.
0683: // The {F} list ends with a repeated F value; find worst case:
0684: currentFSize = BAND_HEADER
0685: + Math.max(favoredCoding.getLength(min), favoredCoding
0686: .getLength(max));
0687: // The {T} list starts out a bunch of zeros, each of length 1.
0688: final int ZERO_LEN = tokenCoding.getLength(0);
0689: currentTSize = ZERO_LEN * (end - start);
0690: // The {U} list starts out a copy of the plainCoding:
0691: currentUSize = (int) Math.ceil(hist
0692: .getBitLength(unfavoredCoding) / 8);
0693:
0694: int bestPopSize = (currentFSize + currentTSize + currentUSize);
0695: int bestPopFVC = 0;
0696:
0697: // Record all the values, in decreasing order of favor.
0698: int[] allFavoredValues = new int[1 + hist.getTotalLength()];
0699: //int[] allPopSizes = new int[1+hist.getTotalLength()];
0700:
0701: // What sizes are "interesting"?
0702: int targetLowFVC = -1;
0703: int targetHighFVC = -1;
0704:
0705: // For each length, adjust the currentXSize model, and look for a win.
0706: int[][] matrix = hist.getMatrix();
0707: int mrow = -1;
0708: int mcol = 1;
0709: int mrowFreq = 0;
0710: for (int fvcount = 1; fvcount <= hist.getTotalLength(); fvcount++) {
0711: // The {F} list gets an additional member.
0712: // Take it from the end of the current matrix row.
0713: // (It's the end, so that we get larger favored values first.)
0714: if (mcol == 1) {
0715: mrow += 1;
0716: mrowFreq = matrix[mrow][0];
0717: mcol = matrix[mrow].length;
0718: }
0719: int this Value = matrix[mrow][--mcol];
0720: allFavoredValues[fvcount] = this Value;
0721: int this VLen = favoredCoding.getLength(this Value);
0722: currentFSize += this VLen;
0723: // The token list replaces occurrences of zero with a new token:
0724: int this VCount = mrowFreq;
0725: int this Token = fvcount;
0726: currentTSize += (tokenCoding.getLength(this Token) - ZERO_LEN)
0727: * this VCount;
0728: // The unfavored list loses occurrences of the newly favored value.
0729: // (This is the whole point of the exercise!)
0730: currentUSize -= this VLen * this VCount;
0731: int currentSize = (currentFSize + currentTSize + currentUSize);
0732: //allPopSizes[fvcount] = currentSize;
0733: if (bestPopSize > currentSize) {
0734: if (currentSize <= targetSize) {
0735: targetHighFVC = fvcount;
0736: if (targetLowFVC < 0)
0737: targetLowFVC = fvcount;
0738: if (verbose > 4)
0739: Utils.log.info("better pop-size at fvc="
0740: + fvcount
0741: + " by "
0742: + pct(bestPopSize - currentSize,
0743: bestPopSize));
0744: }
0745: bestPopSize = currentSize;
0746: bestPopFVC = fvcount;
0747: }
0748: }
0749: if (targetLowFVC < 0) {
0750: if (verbose > 1) {
0751: // Complete loss.
0752: if (verbose > 1)
0753: Utils.log.info("no good pop-size; best was "
0754: + bestPopSize
0755: + " at "
0756: + bestPopFVC
0757: + " worse by "
0758: + pct(bestPopSize - bestByteSize,
0759: bestByteSize));
0760: }
0761: return;
0762: }
0763: if (verbose > 1)
0764: Utils.log.info("initial best pop-size at fvc=" + bestPopFVC
0765: + " in [" + targetLowFVC + ".." + targetHighFVC
0766: + "]" + " by "
0767: + pct(bestByteSize - bestPopSize, bestByteSize));
0768: int oldZipSize = bestZipSize;
0769: // Now close onto a specific coding, testing more rigorously
0770: // with the zipSize metric.
0771: // Questions to decide:
0772: // 1. How many favored values?
0773: // 2. What token coding (TC)?
0774: // 3. Sort favored values by value within length brackets?
0775: // 4. What favored coding?
0776: // 5. What unfavored coding?
0777: // Steps 1/2/3 are interdependent, and may be iterated.
0778: // Steps 4 and 5 may be decided independently afterward.
0779: int[] LValuesCoded = PopulationCoding.LValuesCoded;
0780: ArrayList bestFits = new ArrayList();
0781: ArrayList fullFits = new ArrayList();
0782: ArrayList longFits = new ArrayList();
0783: final int PACK_TO_MAX_S = 1;
0784: if (bestPopFVC <= 255) {
0785: bestFits.add(BandStructure.BYTE1);
0786: } else {
0787: int bestB = Coding.B_MAX;
0788: boolean doFullAlso = (effort > POP_EFFORT);
0789: if (doFullAlso)
0790: fullFits.add(BandStructure.BYTE1.setS(PACK_TO_MAX_S));
0791: for (int i = LValuesCoded.length - 1; i >= 1; i--) {
0792: int L = LValuesCoded[i];
0793: Coding c0 = PopulationCoding.fitTokenCoding(
0794: targetLowFVC, L);
0795: Coding c1 = PopulationCoding.fitTokenCoding(bestPopFVC,
0796: L);
0797: Coding c3 = PopulationCoding.fitTokenCoding(
0798: targetHighFVC, L);
0799: if (c1 != null) {
0800: if (!bestFits.contains(c1))
0801: bestFits.add(c1);
0802: if (bestB > c1.B())
0803: bestB = c1.B();
0804: }
0805: if (doFullAlso) {
0806: if (c3 == null)
0807: c3 = c1;
0808: for (int B = c0.B(); B <= c3.B(); B++) {
0809: if (B == c1.B())
0810: continue;
0811: if (B == 1)
0812: continue;
0813: Coding c2 = c3.setB(B).setS(PACK_TO_MAX_S);
0814: if (!fullFits.contains(c2))
0815: fullFits.add(c2);
0816: }
0817: }
0818: }
0819: // interleave all B greater than bestB with best and full fits
0820: for (Iterator i = bestFits.iterator(); i.hasNext();) {
0821: Coding c = (Coding) i.next();
0822: if (c.B() > bestB) {
0823: i.remove();
0824: longFits.add(0, c);
0825: }
0826: }
0827: }
0828: ArrayList allFits = new ArrayList();
0829: for (Iterator i = bestFits.iterator(), j = fullFits.iterator(), k = longFits
0830: .iterator(); i.hasNext() || j.hasNext() || k.hasNext();) {
0831: if (i.hasNext())
0832: allFits.add(i.next());
0833: if (j.hasNext())
0834: allFits.add(j.next());
0835: if (k.hasNext())
0836: allFits.add(k.next());
0837: }
0838: bestFits.clear();
0839: fullFits.clear();
0840: longFits.clear();
0841: int maxFits = allFits.size();
0842: if (effort == POP_EFFORT)
0843: maxFits = 2;
0844: else if (maxFits > 4) {
0845: maxFits -= 4;
0846: maxFits = (maxFits * (effort - POP_EFFORT))
0847: / (MAX_EFFORT - POP_EFFORT);
0848: maxFits += 4;
0849: }
0850: if (allFits.size() > maxFits) {
0851: if (verbose > 4)
0852: Utils.log.info("allFits before clip: " + allFits);
0853: allFits.subList(maxFits, allFits.size()).clear();
0854: }
0855: if (verbose > 3)
0856: Utils.log.info("allFits: " + allFits);
0857: for (Iterator i = allFits.iterator(); i.hasNext();) {
0858: Coding tc = (Coding) i.next();
0859: boolean packToMax = false;
0860: if (tc.S() == PACK_TO_MAX_S) {
0861: // Kludge: setS(PACK_TO_MAX_S) means packToMax here.
0862: packToMax = true;
0863: tc = tc.setS(0);
0864: }
0865: int fVlen;
0866: if (!packToMax) {
0867: fVlen = bestPopFVC;
0868: assert (tc.umax() >= fVlen);
0869: assert (tc.B() == 1 || tc.setB(tc.B() - 1).umax() < fVlen);
0870: } else {
0871: fVlen = Math.min(tc.umax(), targetHighFVC);
0872: if (fVlen < targetLowFVC)
0873: continue;
0874: if (fVlen == bestPopFVC)
0875: continue; // redundant test
0876: }
0877: PopulationCoding pop = new PopulationCoding();
0878: pop.setHistogram(hist);
0879: pop.setL(tc.L());
0880: pop.setFavoredValues(allFavoredValues, fVlen);
0881: assert (pop.tokenCoding == tc); // predict correctly
0882: pop.resortFavoredValues();
0883: int[] tcsizes = computePopSizePrivate(pop, favoredCoding,
0884: unfavoredCoding);
0885: noteSizes(pop, tcsizes[BYTE_SIZE], BAND_HEADER
0886: + tcsizes[ZIP_SIZE]);
0887: }
0888: if (verbose > 3) {
0889: Utils.log.info("measured best pop, size=" + bestByteSize
0890: + "/zs=" + bestZipSize + " better by "
0891: + pct(oldZipSize - bestZipSize, oldZipSize));
0892: if (bestZipSize < oldZipSize) {
0893: Utils.log.info(">>> POP WINS BY "
0894: + (oldZipSize - bestZipSize));
0895: }
0896: }
0897: }
0898:
0899: private int[] computePopSizePrivate(PopulationCoding pop,
0900: Coding favoredCoding, Coding unfavoredCoding) {
0901: if (popHelper == null) {
0902: popHelper = new CodingChooser(effort, allCodingChoices);
0903: if (stress != null)
0904: popHelper.addStressSeed(stress.nextInt());
0905: popHelper.topLevel = false;
0906: popHelper.verbose -= 1;
0907: popHelper.disablePopCoding = true;
0908: popHelper.disableRunCoding = this .disableRunCoding;
0909: if (effort < MID_EFFORT)
0910: // No nested run codings.
0911: popHelper.disableRunCoding = true;
0912: }
0913: int fVlen = pop.fVlen;
0914: if (verbose > 2) {
0915: Utils.log.info("computePopSizePrivate fvlen=" + fVlen
0916: + " tc=" + pop.tokenCoding);
0917: Utils.log.info("{ //BEGIN");
0918: }
0919:
0920: // Find good coding choices for the token and unfavored sequences.
0921: int[] favoredValues = pop.fValues;
0922: int[][] vals = pop.encodeValues(values, start, end);
0923: int[] tokens = vals[0];
0924: int[] unfavoredValues = vals[1];
0925: if (verbose > 2)
0926: Utils.log.info("-- refine on fv[" + fVlen + "] fc="
0927: + favoredCoding);
0928: pop.setFavoredCoding(popHelper.choose(favoredValues, 1,
0929: 1 + fVlen, favoredCoding));
0930: if (pop.tokenCoding instanceof Coding
0931: && (stress == null || stress.nextBoolean())) {
0932: if (verbose > 2)
0933: Utils.log.info("-- refine on tv[" + tokens.length
0934: + "] tc=" + pop.tokenCoding);
0935: CodingMethod tc = popHelper.choose(tokens,
0936: (Coding) pop.tokenCoding);
0937: if (tc != pop.tokenCoding) {
0938: if (verbose > 2)
0939: Utils.log.info(">>> refined tc=" + tc);
0940: pop.setTokenCoding(tc);
0941: }
0942: }
0943: if (unfavoredValues.length == 0)
0944: pop.setUnfavoredCoding(null);
0945: else {
0946: if (verbose > 2)
0947: Utils.log.info("-- refine on uv["
0948: + unfavoredValues.length + "] uc="
0949: + pop.unfavoredCoding);
0950: pop.setUnfavoredCoding(popHelper.choose(unfavoredValues,
0951: unfavoredCoding));
0952: }
0953: if (verbose > 3) {
0954: Utils.log.info("finish computePopSizePrivate fvlen="
0955: + fVlen + " fc=" + pop.favoredCoding + " tc="
0956: + pop.tokenCoding + " uc=" + pop.unfavoredCoding);
0957: //pop.hist.print("pop-hist", null, System.out);
0958: StringBuffer sb = new StringBuffer();
0959: sb.append("fv = {");
0960: for (int i = 1; i <= fVlen; i++) {
0961: if ((i % 10) == 0)
0962: sb.append('\n');
0963: sb.append(" ").append(favoredValues[i]);
0964: }
0965: sb.append('\n');
0966: sb.append("}");
0967: Utils.log.info(sb.toString());
0968: }
0969: if (verbose > 2) {
0970: Utils.log.info("} //END");
0971: }
0972: if (stress != null) {
0973: return null; // do not bother with size computation
0974: }
0975: int[] sizes;
0976: try {
0977: resetData();
0978: // Write the array of favored values.
0979: pop.writeSequencesTo(byteSizer, tokens, unfavoredValues);
0980: sizes = new int[] { getByteSize(), getZipSize() };
0981: } catch (IOException ee) {
0982: throw new RuntimeException(ee); // cannot happen
0983: }
0984: int[] checkSizes = null;
0985: assert ((checkSizes = computeSizePrivate(pop)) != null);
0986: assert (checkSizes[BYTE_SIZE] == sizes[BYTE_SIZE]) : (checkSizes[BYTE_SIZE]
0987: + " != " + sizes[BYTE_SIZE]);
0988: return sizes;
0989: }
0990:
0991: private void tryAdaptiveCoding(Coding plainCoding) {
0992: int oldZipSize = bestZipSize;
0993: // Scan the value sequence, determining whether an interesting
0994: // run occupies too much space. ("Too much" means, say 5% more
0995: // than the average integer size of the band as a whole.)
0996: // Try to find a better coding for those segments.
0997: int start = this .start;
0998: int end = this .end;
0999: int[] values = this .values;
1000: int len = end - start;
1001: if (plainCoding.isDelta()) {
1002: values = getDeltas(0, 0); //%%% not quite right!
1003: start = 0;
1004: end = values.length;
1005: }
1006: int[] sizes = new int[len + 1];
1007: int fillp = 0;
1008: int totalSize = 0;
1009: for (int i = start; i < end; i++) {
1010: int val = values[i];
1011: sizes[fillp++] = totalSize;
1012: int size = plainCoding.getLength(val);
1013: assert (size < Integer.MAX_VALUE);
1014: //System.out.println("len "+val+" = "+size);
1015: totalSize += size;
1016: }
1017: sizes[fillp++] = totalSize;
1018: assert (fillp == sizes.length);
1019: double avgSize = (double) totalSize / len;
1020: double sizeFuzz;
1021: double sizeFuzz2;
1022: double sizeFuzz3;
1023: if (effort >= MID_EFFORT) {
1024: if (effort > MID_EFFORT + 1)
1025: sizeFuzz = 1.001;
1026: else
1027: sizeFuzz = 1.003;
1028: } else {
1029: if (effort > RUN_EFFORT)
1030: sizeFuzz = 1.01;
1031: else
1032: sizeFuzz = 1.03;
1033: }
1034: // for now:
1035: sizeFuzz *= sizeFuzz; // double the thresh
1036: sizeFuzz2 = (sizeFuzz * sizeFuzz);
1037: sizeFuzz3 = (sizeFuzz * sizeFuzz * sizeFuzz);
1038: // Find some mesh scales we like.
1039: double[] dmeshes = new double[1 + (effort - RUN_EFFORT)];
1040: double logLen = Math.log(len);
1041: for (int i = 0; i < dmeshes.length; i++) {
1042: dmeshes[i] = Math.exp(logLen * (i + 1)
1043: / (dmeshes.length + 1));
1044: }
1045: int[] meshes = new int[dmeshes.length];
1046: int mfillp = 0;
1047: for (int i = 0; i < dmeshes.length; i++) {
1048: int m = (int) Math.round(dmeshes[i]);
1049: m = AdaptiveCoding.getNextK(m - 1);
1050: if (m <= 0 || m >= len)
1051: continue;
1052: if (mfillp > 0 && m == meshes[mfillp - 1])
1053: continue;
1054: meshes[mfillp++] = m;
1055: }
1056: meshes = BandStructure.realloc(meshes, mfillp);
1057: // There's going to be a band header. Estimate conservatively large.
1058: final int BAND_HEADER = 4; // op, KB, A, B
1059: // Threshold values for a "too big" mesh.
1060: int[] threshes = new int[meshes.length];
1061: double[] fuzzes = new double[meshes.length];
1062: for (int i = 0; i < meshes.length; i++) {
1063: int mesh = meshes[i];
1064: double fuzz;
1065: if (mesh < 10)
1066: fuzz = sizeFuzz3;
1067: else if (mesh < 100)
1068: fuzz = sizeFuzz2;
1069: else
1070: fuzz = sizeFuzz;
1071: fuzzes[i] = fuzz;
1072: threshes[i] = BAND_HEADER
1073: + (int) Math.ceil(mesh * avgSize * fuzz);
1074: }
1075: if (verbose > 1) {
1076: System.out.print("tryAdaptiveCoding [" + len + "]"
1077: + " avgS=" + avgSize + " fuzz=" + sizeFuzz
1078: + " meshes: {");
1079: for (int i = 0; i < meshes.length; i++)
1080: System.out.print(" " + meshes[i] + "(" + threshes[i]
1081: + ")");
1082: Utils.log.info(" }");
1083: }
1084: if (runHelper == null) {
1085: runHelper = new CodingChooser(effort, allCodingChoices);
1086: if (stress != null)
1087: runHelper.addStressSeed(stress.nextInt());
1088: runHelper.topLevel = false;
1089: runHelper.verbose -= 1;
1090: runHelper.disableRunCoding = true;
1091: runHelper.disablePopCoding = this .disablePopCoding;
1092: if (effort < MID_EFFORT)
1093: // No nested pop codings.
1094: runHelper.disablePopCoding = true;
1095: }
1096: for (int i = 0; i < len; i++) {
1097: i = AdaptiveCoding.getNextK(i - 1);
1098: if (i > len)
1099: i = len;
1100: for (int j = meshes.length - 1; j >= 0; j--) {
1101: int mesh = meshes[j];
1102: int thresh = threshes[j];
1103: if (i + mesh > len)
1104: continue;
1105: int size = sizes[i + mesh] - sizes[i];
1106: if (size >= thresh) {
1107: // Found a size bulge.
1108: int bend = i + mesh;
1109: int bsize = size;
1110: double bigSize = avgSize * fuzzes[j];
1111: while (bend < len && (bend - i) <= len / 2) {
1112: int bend0 = bend;
1113: int bsize0 = bsize;
1114: bend += mesh;
1115: bend = i
1116: + AdaptiveCoding.getNextK(bend - i - 1);
1117: if (bend < 0 || bend > len)
1118: bend = len;
1119: bsize = sizes[bend] - sizes[i];
1120: if (bsize < BAND_HEADER + (bend - i) * bigSize) {
1121: bsize = bsize0;
1122: bend = bend0;
1123: break;
1124: }
1125: }
1126: int nexti = bend;
1127: if (verbose > 2) {
1128: Utils.log.info("bulge at "
1129: + i
1130: + "["
1131: + (bend - i)
1132: + "] of "
1133: + pct(bsize - avgSize * (bend - i),
1134: avgSize * (bend - i)));
1135: Utils.log.info("{ //BEGIN");
1136: }
1137: CodingMethod begcm, midcm, endcm;
1138: midcm = runHelper.choose(this .values, this .start
1139: + i, this .start + bend, plainCoding);
1140: if (midcm == plainCoding) {
1141: // No use working further.
1142: begcm = plainCoding;
1143: endcm = plainCoding;
1144: } else {
1145: begcm = runHelper
1146: .choose(this .values, this .start,
1147: this .start + i, plainCoding);
1148: endcm = runHelper.choose(this .values,
1149: this .start + bend, this .start + len,
1150: plainCoding);
1151: }
1152: if (verbose > 2)
1153: Utils.log.info("} //END");
1154: if (begcm == midcm && i > 0
1155: && AdaptiveCoding.isCodableLength(bend)) {
1156: i = 0;
1157: }
1158: if (midcm == endcm && bend < len) {
1159: bend = len;
1160: }
1161: if (begcm != plainCoding || midcm != plainCoding
1162: || endcm != plainCoding) {
1163: CodingMethod chain;
1164: int hlen = 0;
1165: if (bend == len) {
1166: chain = midcm;
1167: } else {
1168: chain = new AdaptiveCoding(bend - i, midcm,
1169: endcm);
1170: hlen += BAND_HEADER;
1171: }
1172: if (i > 0) {
1173: chain = new AdaptiveCoding(i, begcm, chain);
1174: hlen += BAND_HEADER;
1175: }
1176: int[] chainSize = computeSizePrivate(chain);
1177: noteSizes(chain, chainSize[BYTE_SIZE],
1178: chainSize[ZIP_SIZE] + hlen);
1179: }
1180: i = nexti;
1181: break;
1182: }
1183: }
1184: }
1185: if (verbose > 3) {
1186: if (bestZipSize < oldZipSize) {
1187: Utils.log.info(">>> RUN WINS BY "
1188: + (oldZipSize - bestZipSize));
1189: }
1190: }
1191: }
1192:
1193: static private String pct(double num, double den) {
1194: return (Math.round((num / den) * 10000) / 100.0) + "%";
1195: }
1196:
1197: static class Sizer extends OutputStream {
1198: final OutputStream out; // if non-null, copy output here also
1199:
1200: Sizer(OutputStream out) {
1201: this .out = out;
1202: }
1203:
1204: Sizer() {
1205: this (null);
1206: }
1207:
1208: private int count;
1209:
1210: public void write(int b) throws IOException {
1211: count++;
1212: if (out != null)
1213: out.write(b);
1214: }
1215:
1216: public void write(byte b[], int off, int len)
1217: throws IOException {
1218: count += len;
1219: if (out != null)
1220: out.write(b, off, len);
1221: }
1222:
1223: public void reset() {
1224: count = 0;
1225: }
1226:
1227: public int getSize() {
1228: return count;
1229: }
1230:
1231: public String toString() {
1232: String str = super .toString();
1233: // If -ea, print out more informative strings!
1234: assert ((str = stringForDebug()) != null);
1235: return str;
1236: }
1237:
1238: String stringForDebug() {
1239: return "<Sizer " + getSize() + ">";
1240: }
1241: }
1242:
1243: private Sizer zipSizer = new Sizer();
1244: private Deflater zipDef = new Deflater();
1245: private DeflaterOutputStream zipOut = new DeflaterOutputStream(
1246: zipSizer, zipDef);
1247: private Sizer byteSizer = new Sizer(zipOut);
1248: private Sizer byteOnlySizer = new Sizer();
1249:
1250: private void resetData() {
1251: flushData();
1252: zipDef.reset();
1253: if (context != null) {
1254: // Prepend given salt to the test output.
1255: try {
1256: context.writeTo(byteSizer);
1257: } catch (IOException ee) {
1258: throw new RuntimeException(ee); // cannot happen
1259: }
1260: }
1261: zipSizer.reset();
1262: byteSizer.reset();
1263: }
1264:
1265: private void flushData() {
1266: try {
1267: zipOut.finish();
1268: } catch (IOException ee) {
1269: throw new RuntimeException(ee); // cannot happen
1270: }
1271: }
1272:
1273: private int getByteSize() {
1274: return byteSizer.getSize();
1275: }
1276:
1277: private int getZipSize() {
1278: flushData();
1279: return zipSizer.getSize();
1280: }
1281:
1282: /// Stress-test helpers.
1283:
1284: void addStressSeed(int x) {
1285: if (stress == null)
1286: return;
1287: stress.setSeed(x + ((long) stress.nextInt() << 32));
1288: }
1289:
1290: // Pick a random pop-coding.
1291: private CodingMethod stressPopCoding(CodingMethod coding) {
1292: assert (stress != null); // this method is only for testing
1293: // Don't turn it into a pop coding if it's already something special.
1294: if (!(coding instanceof Coding))
1295: return coding;
1296: Coding valueCoding = ((Coding) coding).getValueCoding();
1297: Histogram hist = getValueHistogram();
1298: int fVlen = stressLen(hist.getTotalLength());
1299: if (fVlen == 0)
1300: return coding;
1301: List popvals = new ArrayList();
1302: if (stress.nextBoolean()) {
1303: // Build the population from the value list.
1304: HashSet popset = new HashSet();
1305: for (int i = start; i < end; i++) {
1306: Integer val = new Integer(values[i]);
1307: if (popset.add(val))
1308: popvals.add(val);
1309: }
1310: } else {
1311: int[][] matrix = hist.getMatrix();
1312: for (int mrow = 0; mrow < matrix.length; mrow++) {
1313: int[] row = matrix[mrow];
1314: for (int mcol = 1; mcol < row.length; mcol++) {
1315: popvals.add(new Integer(row[mcol]));
1316: }
1317: }
1318: }
1319: int reorder = stress.nextInt();
1320: if ((reorder & 7) <= 2) {
1321: // Lose the order.
1322: Collections.shuffle(popvals, stress);
1323: } else {
1324: // Keep the order, mostly.
1325: if (((reorder >>>= 3) & 7) <= 2)
1326: Collections.sort(popvals);
1327: if (((reorder >>>= 3) & 7) <= 2)
1328: Collections.reverse(popvals);
1329: if (((reorder >>>= 3) & 7) <= 2)
1330: Collections.rotate(popvals, stressLen(popvals.size()));
1331: }
1332: if (popvals.size() > fVlen) {
1333: // Cut the list down.
1334: if (((reorder >>>= 3) & 7) <= 2) {
1335: // Cut at end.
1336: popvals.subList(fVlen, popvals.size()).clear();
1337: } else {
1338: // Cut at start.
1339: popvals.subList(0, popvals.size() - fVlen).clear();
1340: }
1341: }
1342: fVlen = popvals.size();
1343: int[] fvals = new int[1 + fVlen];
1344: for (int i = 0; i < fVlen; i++) {
1345: fvals[1 + i] = ((Integer) popvals.get(i)).intValue();
1346: }
1347: PopulationCoding pop = new PopulationCoding();
1348: pop.setFavoredValues(fvals, fVlen);
1349: int[] lvals = PopulationCoding.LValuesCoded;
1350: for (int i = 0; i < lvals.length / 2; i++) {
1351: int popl = lvals[stress.nextInt(lvals.length)];
1352: if (popl < 0)
1353: continue;
1354: if (PopulationCoding.fitTokenCoding(fVlen, popl) != null) {
1355: pop.setL(popl);
1356: break;
1357: }
1358: }
1359: if (pop.tokenCoding == null) {
1360: int min = fvals[1], max = min;
1361: for (int i = 2; i <= fVlen; i++) {
1362: int val = fvals[i];
1363: if (min > val)
1364: min = val;
1365: if (max < val)
1366: max = val;
1367: }
1368: pop.tokenCoding = stressCoding(min, max);
1369: }
1370:
1371: computePopSizePrivate(pop, valueCoding, valueCoding);
1372: return pop;
1373: }
1374:
1375: // Pick a random adaptive coding.
1376: private CodingMethod stressAdaptiveCoding(CodingMethod coding) {
1377: assert (stress != null); // this method is only for testing
1378: // Don't turn it into a run coding if it's already something special.
1379: if (!(coding instanceof Coding))
1380: return coding;
1381: Coding plainCoding = (Coding) coding;
1382: int len = end - start;
1383: if (len < 2)
1384: return coding;
1385: // Decide how many spans we'll create.
1386: int spanlen = stressLen(len - 1) + 1;
1387: if (spanlen == len)
1388: return coding;
1389: try {
1390: assert (!disableRunCoding);
1391: disableRunCoding = true; // temporary, while I decide spans
1392: int[] allValues = (int[]) values.clone();
1393: CodingMethod result = null;
1394: int scan = this .end;
1395: int start = this .start;
1396: for (int split; scan > start; scan = split) {
1397: int this span;
1398: int rand = (scan - start < 100) ? -1 : stress.nextInt();
1399: if ((rand & 7) != 0) {
1400: this span = (spanlen == 1 ? spanlen
1401: : stressLen(spanlen - 1) + 1);
1402: } else {
1403: // Every so often generate a value based on KX/KB format.
1404: int KX = (rand >>>= 3) & AdaptiveCoding.KX_MAX;
1405: int KB = (rand >>>= 3) & AdaptiveCoding.KB_MAX;
1406: for (;;) {
1407: this span = AdaptiveCoding.decodeK(KX, KB);
1408: if (this span <= scan - start)
1409: break;
1410: // Try smaller and smaller codings:
1411: if (KB != AdaptiveCoding.KB_DEFAULT)
1412: KB = AdaptiveCoding.KB_DEFAULT;
1413: else
1414: KX -= 1;
1415: }
1416: //System.out.println("KX="+KX+" KB="+KB+" K="+thisspan);
1417: assert (AdaptiveCoding.isCodableLength(this span));
1418: }
1419: if (this span > scan - start)
1420: this span = scan - start;
1421: while (!AdaptiveCoding.isCodableLength(this span))
1422: --this span;
1423: split = scan - this span;
1424: assert (split < scan);
1425: assert (split >= start);
1426: // Choose a coding for the span [split..scan).
1427: CodingMethod sc = choose(allValues, split, scan,
1428: plainCoding);
1429: if (result == null) {
1430: result = sc; // the caboose
1431: } else {
1432: result = new AdaptiveCoding(scan - split, sc,
1433: result);
1434: }
1435: }
1436: return result;
1437: } finally {
1438: disableRunCoding = false; // return to normal value
1439: }
1440: }
1441:
1442: // Return a random value in [0..len], gently biased toward extremes.
1443: private Coding stressCoding(int min, int max) {
1444: assert (stress != null); // this method is only for testing
1445: for (int i = 0; i < 100; i++) {
1446: Coding c = Coding.of(stress.nextInt(Coding.B_MAX) + 1,
1447: stress.nextInt(Coding.H_MAX) + 1, stress
1448: .nextInt(Coding.S_MAX + 1));
1449: if (c.B() == 1)
1450: c = c.setH(256);
1451: if (c.H() == 256 && c.B() >= 5)
1452: c = c.setB(4);
1453: if (stress.nextBoolean()) {
1454: Coding dc = c.setD(1);
1455: if (dc.canRepresent(min, max))
1456: return dc;
1457: }
1458: if (c.canRepresent(min, max))
1459: return c;
1460: }
1461: return BandStructure.UNSIGNED5;
1462: }
1463:
1464: // Return a random value in [0..len], gently biased toward extremes.
1465: private int stressLen(int len) {
1466: assert (stress != null); // this method is only for testing
1467: assert (len >= 0);
1468: int rand = stress.nextInt(100);
1469: if (rand < 20)
1470: return Math.min(len / 5, rand);
1471: else if (rand < 40)
1472: return len;
1473: else
1474: return stress.nextInt(len);
1475: }
1476:
1477: // For debug only.
1478: /*
1479: public static
1480: int[] readValuesFrom(InputStream instr) {
1481: return readValuesFrom(new InputStreamReader(instr));
1482: }
1483: public static
1484: int[] readValuesFrom(Reader inrdr) {
1485: inrdr = new BufferedReader(inrdr);
1486: final StreamTokenizer in = new StreamTokenizer(inrdr);
1487: final int TT_NOTHING = -99;
1488: in.commentChar('#');
1489: return readValuesFrom(new Iterator() {
1490: int token = TT_NOTHING;
1491: private int getToken() {
1492: if (token == TT_NOTHING) {
1493: try {
1494: token = in.nextToken();
1495: assert(token != TT_NOTHING);
1496: } catch (IOException ee) {
1497: throw new RuntimeException(ee);
1498: }
1499: }
1500: return token;
1501: }
1502: public boolean hasNext() {
1503: return getToken() != StreamTokenizer.TT_EOF;
1504: }
1505: public Object next() {
1506: int ntok = getToken();
1507: token = TT_NOTHING;
1508: switch (ntok) {
1509: case StreamTokenizer.TT_EOF:
1510: throw new NoSuchElementException();
1511: case StreamTokenizer.TT_NUMBER:
1512: return new Integer((int) in.nval);
1513: default:
1514: assert(false);
1515: return null;
1516: }
1517: }
1518: public void remove() {
1519: throw new UnsupportedOperationException();
1520: }
1521: });
1522: }
1523: public static
1524: int[] readValuesFrom(Iterator iter) {
1525: return readValuesFrom(iter, 0);
1526: }
1527: public static
1528: int[] readValuesFrom(Iterator iter, int initSize) {
1529: int[] na = new int[Math.max(10, initSize)];
1530: int np = 0;
1531: while (iter.hasNext()) {
1532: Integer val = (Integer) iter.next();
1533: if (np == na.length) {
1534: na = BandStructure.realloc(na);
1535: }
1536: na[np++] = val.intValue();
1537: }
1538: if (np != na.length) {
1539: na = BandStructure.realloc(na, np);
1540: }
1541: return na;
1542: }
1543:
1544: public static
1545: void main(String[] av) throws IOException {
1546: int effort = MID_EFFORT;
1547: int ap = 0;
1548: if (ap < av.length && av[ap].equals("-e")) {
1549: ap++;
1550: effort = Integer.parseInt(av[ap++]);
1551: }
1552: int verbose = 1;
1553: if (ap < av.length && av[ap].equals("-v")) {
1554: ap++;
1555: verbose = Integer.parseInt(av[ap++]);
1556: }
1557: Coding[] bcs = BandStructure.getBasicCodings();
1558: CodingChooser cc = new CodingChooser(effort, bcs);
1559: if (ap < av.length && av[ap].equals("-p")) {
1560: ap++;
1561: cc.optUsePopulationCoding = false;
1562: }
1563: if (ap < av.length && av[ap].equals("-a")) {
1564: ap++;
1565: cc.optUseAdaptiveCoding = false;
1566: }
1567: cc.verbose = verbose;
1568: int[] values = readValuesFrom(System.in);
1569: int[] sizes = {0,0};
1570: CodingMethod cm = cc.choose(values, BandStructure.UNSIGNED5, sizes);
1571: System.out.println("size: "+sizes[BYTE_SIZE]+"/zs="+sizes[ZIP_SIZE]);
1572: System.out.println(cm);
1573: }
1574: //*/
1575:
1576: }
|