using System;
using System.Collections;
using System.IO;
namespace DCSharp.Extras{
/// <summary>
/// Class for compressing NMDC file lists.
/// </summary>
/// <remarks>
/// Written by Paul Bartrum (paulbartrum@hotmail.com)
/// </remarks>
public sealed class NMDCHuffmanCompressor
{
private NMDCHuffmanCompressor()
{
}
class HuffmanCompressionNode
{
public HuffmanCompressionNode Parent;
public HuffmanCompressionNode Zero;
public HuffmanCompressionNode One;
public int Frequency;
public int Symbol;
}
struct HuffmanTableEntry
{
public int Code;
public int CodeLength;
}
public static void Compress(Stream inputStream, Stream outputStream)
{
byte[] inputBuffer = new byte[inputStream.Length];
inputStream.Read(inputBuffer, 0, inputBuffer.Length);
Compress(inputBuffer, outputStream);
}
public static void Compress(byte[] inputBuffer, Stream outputStream)
{
// Construct a frequency table and calculate the parity.
int[] frequencyTable = new int[256];
byte parity = 0;
foreach(byte b in inputBuffer)
{
frequencyTable[b]++;
parity ^= b;
}
// Create a node for each symbol.
HuffmanCompressionNode[] leaves = new HuffmanCompressionNode[256];
for(int i = 0; i < 256; i++)
{
// Ignore symbols that don't appear in the data.
if(frequencyTable[i] == 0)
{
continue;
}
// Create a new node.
HuffmanCompressionNode node = new HuffmanCompressionNode();
node.Frequency = frequencyTable[i];
node.Symbol = i;
// Add the node to the list of leaf nodes.
leaves[i] = node;
}
ArrayList roots = new ArrayList(256);
// Initially, every leaf node is a root.
foreach(HuffmanCompressionNode leaf in leaves)
{
if(leaf != null)
{
roots.Add(leaf);
}
}
// Construct the huffman tree.
while(roots.Count > 1)
{
// Find the two lowest frequency roots.
int lowestFrequency = int.MaxValue;
HuffmanCompressionNode lowestFrequencyRoot = null;
int secondLowestFrequency = int.MaxValue;
HuffmanCompressionNode secondLowestFrequencyRoot = null;
foreach(HuffmanCompressionNode root in roots)
{
if(root.Frequency < lowestFrequency)
{
secondLowestFrequency = lowestFrequency;
secondLowestFrequencyRoot = lowestFrequencyRoot;
lowestFrequency = root.Frequency;
lowestFrequencyRoot = root;
}
else if(root.Frequency < secondLowestFrequency)
{
secondLowestFrequency = root.Frequency;
secondLowestFrequencyRoot = root;
}
}
// Remove the two lowest roots from the list of roots.
roots.Remove(lowestFrequencyRoot);
roots.Remove(secondLowestFrequencyRoot);
// Create the parent node.
HuffmanCompressionNode parent = new HuffmanCompressionNode();
parent.Frequency = lowestFrequencyRoot.Frequency + secondLowestFrequencyRoot.Frequency;
// Connect the dots. :-)
lowestFrequencyRoot.Parent = parent;
secondLowestFrequencyRoot.Parent = parent;
parent.Zero = lowestFrequencyRoot;
parent.One = secondLowestFrequencyRoot;
roots.Add(parent);
}
// Construct a table from the tree.
HuffmanTableEntry[] lookupTable = new HuffmanTableEntry[256];
int numNonZeroEntries = 0;
for(int i = 0; i < 256; i++)
{
HuffmanCompressionNode node = leaves[i];
if(node == null)
{
continue;
}
while(node.Parent != null)
{
lookupTable[i].Code <<= 1;
if(node.Parent.One == node)
{
lookupTable[i].Code |= 1;
}
lookupTable[i].CodeLength++;
node = node.Parent;
}
numNonZeroEntries++;
}
// Write header.
BinaryWriter writer = new BinaryWriter(outputStream);
writer.Write(new byte[] {(byte)'H', (byte)'E', (byte)'3', 13});
writer.Write((byte)parity);
writer.Write((int)inputBuffer.Length);
// Write couples.
writer.Write((short)numNonZeroEntries);
for(int i = 0; i < 256; i++)
{
if(lookupTable[i].CodeLength > 0)
{
writer.Write((byte)i);
writer.Write((byte)lookupTable[i].CodeLength);
}
}
// Write codes.
BitWriter bitWriter = new BitWriter(outputStream);
bitWriter.HighToLow = false;
for(int i = 0; i < 256; i++)
{
if(lookupTable[i].CodeLength > 0)
{
bitWriter.WriteBits(lookupTable[i].Code, lookupTable[i].CodeLength);
}
}
// Pad the rest of the byte.
bitWriter.Flush();
// Start writing data to the output stream.
foreach(byte b in inputBuffer)
{
bitWriter.WriteBits(lookupTable[b].Code, lookupTable[b].CodeLength);
}
bitWriter.Flush();
}
}
/// <summary>
/// Writes bits to a stream.
/// </summary>
public class BitWriter
{
Stream output;
// Settings.
bool highToLow;
byte cachedByte;
int bitPos;
public BitWriter(Stream output)
{
this.output = output;
highToLow = true;
}
public bool HighToLow
{
get
{
return highToLow;
}
set
{
highToLow = value;
}
}
// The position in the current byte (0 - 8).
// Zero means that none of the bits in the current byte have been written.
public int BitPosition
{
get
{
return bitPos;
}
}
public void WriteBit(bool value)
{
WriteBits(value ? 1 : 0, 1);
}
public void WriteBits(int value, int numBitsArg)
{
int numBits = numBitsArg;
if(numBits < 0 || numBits > 31)
{
throw new ArgumentOutOfRangeException("numBits");
}
while(numBits > 0)
{
int partialNumBits = Math.Min(numBits, 8 - bitPos);
if(highToLow)
{
cachedByte |= (byte)(GetUIntBits((uint)value, 32 - numBits, partialNumBits, true) << (8 - partialNumBits - bitPos));
}
else
{
cachedByte |= (byte)(GetUIntBits((uint)value, numBitsArg - numBits, partialNumBits, false) << bitPos);
}
bitPos += partialNumBits;
numBits -= partialNumBits;
if(bitPos == 8)
{
Flush();
}
}
}
public void Flush()
{
// Check if any data is cached.
if(bitPos == 0)
{
return;
}
// Write the cached data to the stream.
output.WriteByte(cachedByte);
bitPos = 0;
cachedByte = 0;
}
public static int GetUIntBits(uint value, int start, int length, bool highToLow)
{
if(highToLow)
{
start = 32 - start - length;
} // Big-endian bit order.
return (int)((uint)(value & (((1 << (start + length)) - 1) & ~((1 << start) - 1))) >> start);
}
}
}
|