summaryrefslogtreecommitdiff
path: root/coregrind/m_initimg/simple_huffman.c
diff options
context:
space:
mode:
Diffstat (limited to 'coregrind/m_initimg/simple_huffman.c')
-rw-r--r--coregrind/m_initimg/simple_huffman.c513
1 files changed, 513 insertions, 0 deletions
diff --git a/coregrind/m_initimg/simple_huffman.c b/coregrind/m_initimg/simple_huffman.c
new file mode 100644
index 0000000..f9f3bb5
--- /dev/null
+++ b/coregrind/m_initimg/simple_huffman.c
@@ -0,0 +1,513 @@
+/*************************************************************************
+* Name: huffman.c
+* Author: Marcus Geelnard
+* Description: Huffman coder/decoder implementation.
+* Reentrant: Yes
+* $Id: huffman.c,v 1.6 2004/12/14 18:59:40 marcus256 Exp $
+*
+* This is a very straight forward implementation of a Huffman coder and
+* decoder.
+*
+* Primary flaws with this primitive implementation are:
+* - Slow bit stream implementation
+* - Fairly slow decoding (slower than encoding)
+* - Maximum tree depth of 32 (the coder aborts if any code exceeds a
+* size of 32 bits). If I'm not mistaking, this should not be possible
+* unless the input buffer is larger than 2^32 bytes, which is not
+* supported by the coder anyway (max 2^32-1 bytes can be specified with
+* an unsigned 32-bit integer).
+*
+* On the other hand, there are a few advantages of this implementation:
+* - The Huffman tree is stored in a very compact form, requiring only
+* 12 bits per symbol (for 8 bit symbols), meaning a maximum of 384
+* bytes overhead.
+* - The Huffman coder does quite well in situations where the data is
+* noisy, in which case most dictionary based coders run into problems.
+*
+* Possible improvements (probably not worth it):
+* - Partition the input data stream into blocks, where each block has
+* its own Huffman tree. With variable block sizes, it should be
+* possible to find locally optimal Huffman trees, which in turn could
+* reduce the total size.
+* - Allow for a few different predefined Huffman trees, which could
+* reduce the size of a block even further.
+*-------------------------------------------------------------------------
+* Copyright (c) 2003-2004 Marcus Geelnard
+*
+* This software is provided 'as-is', without any express or implied
+* warranty. In no event will the authors be held liable for any damages
+* arising from the use of this software.
+*
+* Permission is granted to anyone to use this software for any purpose,
+* including commercial applications, and to alter it and redistribute it
+* freely, subject to the following restrictions:
+*
+* 1. The origin of this software must not be misrepresented; you must not
+* claim that you wrote the original software. If you use this software
+* in a product, an acknowledgment in the product documentation would
+* be appreciated but is not required.
+*
+* 2. Altered source versions must be plainly marked as such, and must not
+* be misrepresented as being the original software.
+*
+* 3. This notice may not be removed or altered from any source
+* distribution.
+*
+* Marcus Geelnard
+* marcus.geelnard at home.se
+*************************************************************************/
+
+/* Modified May 06 by Julian Seward for use in Valgrind.
+ - changed integral types to V's versions (UInt, UChar etc)
+ - added initialisation in _Huffman_WriteBits, as described in
+ comment in that function.
+*/
+
+/*************************************************************************
+* Types used for Huffman coding
+*************************************************************************/
+
+typedef struct {
+ UInt Symbol;
+ UInt Count;
+ UInt Code;
+ UInt Bits;
+} huff_sym_t;
+
+typedef struct {
+ UChar *BytePtr;
+ UInt BitPos;
+} huff_bitstream_t;
+
+
+
+/*************************************************************************
+* INTERNAL FUNCTIONS *
+*************************************************************************/
+
+
+/*************************************************************************
+* _Huffman_InitBitstream() - Initialize a bitstream.
+*************************************************************************/
+
+static void _Huffman_InitBitstream( huff_bitstream_t *stream,
+ UChar *buf )
+{
+ stream->BytePtr = buf;
+ stream->BitPos = 0;
+}
+
+
+/*************************************************************************
+* _Huffman_ReadBits() - Read bits from a bitstream.
+*************************************************************************/
+
+static UInt _Huffman_ReadBits( huff_bitstream_t *stream,
+ UInt bits )
+{
+ UInt x, bit, count;
+ UChar *buf;
+
+ /* Get current stream state */
+ buf = stream->BytePtr;
+ bit = stream->BitPos;
+
+ /* Extract bits */
+ x = 0;
+ for( count = 0; count < bits; ++ count )
+ {
+ x = (x<<1) + (*buf & (1<<(7-bit)) ? 1 : 0);
+ bit = (bit+1) & 7;
+ if( !bit )
+ {
+ ++ buf;
+ }
+ }
+
+ /* Store new stream state */
+ stream->BytePtr = buf;
+ stream->BitPos = bit;
+
+ return x;
+}
+
+
+/*************************************************************************
+* _Huffman_WriteBits() - Write bits to a bitstream.
+*************************************************************************/
+
+static void _Huffman_WriteBits( huff_bitstream_t *stream, UInt x,
+ UInt bits )
+{
+ UInt bit, count;
+ UChar *buf;
+ UInt mask;
+
+ /* Get current stream state */
+ buf = stream->BytePtr;
+ bit = stream->BitPos;
+
+ /* Append bits */
+ mask = 1 << (bits-1);
+ for( count = 0; count < bits; ++ count )
+ {
+ /* If we're starting a new byte, zero it out, so that the
+ resulting byte sequence looks completely defined from
+ Valgrind's point of view. If this doesn't happen then the
+ last byte in the stream may look partially undefined. */
+ if (bit == 0)
+ *buf = 0;
+ *buf = (*buf & (0xff^(1<<(7-bit)))) +
+ ((x & mask ? 1 : 0) << (7-bit));
+ x <<= 1;
+ bit = (bit+1) & 7;
+ if( !bit )
+ {
+ ++ buf;
+ }
+ }
+
+ /* Store new stream state */
+ stream->BytePtr = buf;
+ stream->BitPos = bit;
+}
+
+
+/*************************************************************************
+* _Huffman_Hist() - Calculate (sorted) histogram for a block of data.
+*************************************************************************/
+
+static void _Huffman_Hist( UChar *in, huff_sym_t *sym,
+ UInt size )
+{
+ Int k, swaps;
+ huff_sym_t tmp;
+
+ /* Clear/init histogram */
+ for( k = 0; k < 256; ++ k )
+ {
+ sym[k].Symbol = k;
+ sym[k].Count = 0;
+ sym[k].Code = 0;
+ sym[k].Bits = 0;
+ }
+
+ /* Build histogram */
+ for( k = size; k; -- k )
+ {
+ sym[ *in ++ ].Count ++;
+ }
+
+ /* Sort histogram - most frequent symbol first (bubble sort) */
+ do
+ {
+ swaps = 0;
+ for( k = 0; k < 255; ++ k )
+ {
+ if( sym[k].Count < sym[k+1].Count )
+ {
+ tmp = sym[k];
+ sym[k] = sym[k+1];
+ sym[k+1] = tmp;
+ swaps = 1;
+ }
+ }
+ }
+ while( swaps );
+}
+
+
+/*************************************************************************
+* _Huffman_MakeTree() - Generate a Huffman tree.
+*************************************************************************/
+
+static void _Huffman_MakeTree( huff_sym_t *sym, huff_bitstream_t *stream,
+ UInt code, UInt bits, UInt first,
+ UInt last )
+{
+ UInt k, size, size_a, size_b, last_a, first_b;
+
+ /* Is this a leaf node? */
+ if( first == last )
+ {
+ /* Append symbol to tree description */
+ _Huffman_WriteBits( stream, 1, 1 );
+ _Huffman_WriteBits( stream, sym[first].Symbol, 8 );
+
+ /* Store code info in symbol array */
+ sym[first].Code = code;
+ sym[first].Bits = bits;
+ return;
+ }
+ else
+ {
+ /* This was not a leaf node */
+ _Huffman_WriteBits( stream, 0, 1 );
+ }
+
+ /* Total size of interval */
+ size = 0;
+ for( k = first; k <= last; ++ k )
+ {
+ size += sym[k].Count;
+ }
+
+ /* Find size of branch a */
+ size_a = 0;
+ for( k = first; size_a < ((size+1)>>1) && k < last; ++ k )
+ {
+ size_a += sym[k].Count;
+ }
+
+ /* Non-empty branch? */
+ if( size_a > 0 )
+ {
+ /* Continue branching */
+ _Huffman_WriteBits( stream, 1, 1 );
+
+ /* Branch a cut in histogram */
+ last_a = k-1;
+
+ /* Create branch a */
+ _Huffman_MakeTree( sym, stream, (code<<1)+0, bits+1,
+ first, last_a );
+ }
+ else
+ {
+ /* This was an empty branch */
+ _Huffman_WriteBits( stream, 0, 1 );
+ }
+
+ /* Size of branch b */
+ size_b = size - size_a;
+
+ /* Non-empty branch? */
+ if( size_b > 0 )
+ {
+ /* Continue branching */
+ _Huffman_WriteBits( stream, 1, 1 );
+
+ /* Branch b cut in histogram */
+ first_b = k;
+
+ /* Create branch b */
+ _Huffman_MakeTree( sym, stream, (code<<1)+1, bits+1,
+ first_b, last );
+ }
+ else
+ {
+ /* This was an empty branch */
+ _Huffman_WriteBits( stream, 0, 1 );
+ }
+}
+
+
+/*************************************************************************
+* _Huffman_RecoverTree() - Recover a Huffman tree from a bitstream.
+*************************************************************************/
+
+static void _Huffman_RecoverTree( huff_sym_t *sym,
+ huff_bitstream_t *stream, UInt code, UInt bits,
+ UInt *symnum )
+{
+ UInt symbol;
+
+ /* Is this a leaf node? */
+ if( _Huffman_ReadBits( stream, 1 ) )
+ {
+ /* Get symbol from tree description */
+ symbol = _Huffman_ReadBits( stream, 8 );
+
+ /* Store code info in symbol array */
+ sym[*symnum].Symbol = symbol;
+ sym[*symnum].Code = code;
+ sym[*symnum].Bits = bits;
+
+ /* Increase symbol counter */
+ *symnum = *symnum + 1;
+
+ return;
+ }
+
+ /* Non-empty branch? */
+ if( _Huffman_ReadBits( stream, 1 ) )
+ {
+ /* Create branch a */
+ _Huffman_RecoverTree( sym, stream, (code<<1)+0, bits+1,
+ symnum );
+ }
+
+ /* Non-empty branch? */
+ if( _Huffman_ReadBits( stream, 1 ) )
+ {
+ /* Create branch b */
+ _Huffman_RecoverTree( sym, stream, (code<<1)+1, bits+1,
+ symnum );
+ }
+}
+
+
+
+
+/*************************************************************************
+* PUBLIC FUNCTIONS *
+*************************************************************************/
+
+
+/*************************************************************************
+* Huffman_Compress() - Compress a block of data using a Huffman coder.
+* in - Input (uncompressed) buffer.
+* out - Output (compressed) buffer. This buffer must be 384 bytes
+* larger than the input buffer.
+* insize - Number of input bytes.
+* The function returns the size of the compressed data.
+*************************************************************************/
+static
+Int Huffman_Compress( UChar *in, UChar *out,
+ UInt insize )
+{
+ huff_sym_t sym[ 256 ], tmp;
+ huff_bitstream_t stream;
+ UInt k, total_bytes, swaps, symbol, last_symbol;
+
+ /* Do we have anything to compress? */
+ if( insize < 1 ) return 0;
+
+ /* Initialize bitstream */
+ _Huffman_InitBitstream( &stream, out );
+
+ /* Calculate and sort histogram for input data */
+ _Huffman_Hist( in, sym, insize );
+
+ /* Find number of used symbols */
+ for( last_symbol = 255; sym[last_symbol].Count == 0; -- last_symbol );
+
+ /* Special case: In order to build a correct tree, we need at least
+ two symbols (otherwise we get zero-bit representations). */
+ if( last_symbol == 0 ) ++ last_symbol;
+
+ /* Build Huffman tree */
+ _Huffman_MakeTree( sym, &stream, 0, 0, 0, last_symbol );
+
+ /* Was any code > 32 bits? (we do not handle that at present) */
+ for( k = 0; k < 255; ++ k )
+ {
+ if( sym[k].Bits > 32 )
+ {
+ return 0;
+ }
+ }
+
+ /* Sort histogram - first symbol first (bubble sort) */
+ do
+ {
+ swaps = 0;
+ for( k = 0; k < 255; ++ k )
+ {
+ if( sym[k].Symbol > sym[k+1].Symbol )
+ {
+ tmp = sym[k];
+ sym[k] = sym[k+1];
+ sym[k+1] = tmp;
+ swaps = 1;
+ }
+ }
+ }
+ while( swaps );
+
+ /* Encode input stream */
+ for( k = 0; k < insize; ++ k )
+ {
+ symbol = in[ k ];
+ _Huffman_WriteBits( &stream, sym[symbol].Code,
+ sym[symbol].Bits );
+ }
+
+ /* Calculate size of output data */
+ total_bytes = (Int)(stream.BytePtr - out);
+ if( stream.BitPos > 0 )
+ {
+ ++ total_bytes;
+ }
+
+ return total_bytes;
+}
+
+
+
+/*************************************************************************
+* Huffman_Uncompress() - Uncompress a block of data using a Huffman
+* decoder.
+* in - Input (compressed) buffer.
+* out - Output (uncompressed) buffer. This buffer must be large
+* enough to hold the uncompressed data.
+* insize - Number of input bytes.
+* outsize - Number of output bytes.
+*************************************************************************/
+static
+void Huffman_Uncompress( UChar *in, UChar *out,
+ UInt insize, UInt outsize )
+{
+ huff_sym_t sym[ 256 ], tmp;
+ huff_bitstream_t stream;
+ UInt k, m, symbol_count, swaps;
+ UChar *buf;
+ UInt bits, delta_bits, new_bits, code;
+
+ /* Do we have anything to decompress? */
+ if( insize < 1 ) return;
+
+ /* Initialize bitstream */
+ _Huffman_InitBitstream( &stream, in );
+
+ /* Clear tree/histogram */
+ for( k = 0; k < 256; ++ k )
+ {
+ sym[k].Bits = 0x7fffffff;
+ }
+
+ /* Recover Huffman tree */
+ symbol_count = 0;
+ _Huffman_RecoverTree( sym, &stream, 0, 0, &symbol_count );
+
+ /* Sort histogram - shortest code first (bubble sort) */
+ do
+ {
+ swaps = 0;
+ for( k = 0; k < symbol_count-1; ++ k )
+ {
+ if( sym[k].Bits > sym[k+1].Bits )
+ {
+ tmp = sym[k];
+ sym[k] = sym[k+1];
+ sym[k+1] = tmp;
+ swaps = 1;
+ }
+ }
+ }
+ while( swaps );
+
+ /* Decode input stream */
+ buf = out;
+ for( k = 0; k < outsize; ++ k )
+ {
+ /* Search tree for matching code */
+ bits = 0;
+ code = 0;
+ for( m = 0; m < symbol_count; ++ m )
+ {
+ delta_bits = sym[m].Bits - bits;
+ if( delta_bits )
+ {
+ new_bits = _Huffman_ReadBits( &stream, delta_bits );
+ code = code | (new_bits << (32-bits-delta_bits));
+ bits = sym[m].Bits;
+ }
+ if( code == (sym[m].Code << (32-sym[m].Bits)) )
+ {
+ *buf ++ = (UChar) sym[m].Symbol;
+ break;
+ }
+ }
+ }
+}