////////////////////////////////////////////////////////////////////////////
//
//  lzw.cpp
//
//  1/13/01 (Jeff Ondich) Started.  Adapted from code published
//  by Mark R. Nelson in Dr. Dobbs' Journal, April 1989.
//
////////////////////////////////////////////////////////////////////////////

#include <stdlib.h>
#include "lzw.h"

static UINT *code_value;
static UINT *prefix_code;
static UCHAR *append_character;

///////////////////////////////////////////////////////////
// This is the compression routine.  Returns the number
// of bytes in the compressed 
///////////////////////////////////////////////////////////

int compress( UCHAR *source, UCHAR *destination, UINT nBytes )
{
	UINT next_code;
	UINT character;
	UINT string_code;
	UINT index;
	UINT i;

	int byteCount = 0;
	UINT output_bit_count = 0;
	ULONG output_bit_buffer = 0L;

	// Allocate buffers.
	code_value = new UINT[ TABLE_SIZE*sizeof(UINT) ];
	prefix_code = new UINT[ TABLE_SIZE*sizeof(UINT) ];
	append_character = new UCHAR[ TABLE_SIZE*sizeof(UCHAR) ];

	if( code_value==NULL || prefix_code==NULL || append_character==NULL )
	{
		return( 0 );
	}

	// Initialize the code table.
	next_code = 256;
	for( i=0; i < TABLE_SIZE; i++ )
		code_value[i] = 0xFFFFFFFF;

	// Initialize string_code using the first byte
	// of the source data.
	string_code = (UINT)(*source++);
	nBytes--;

	// Main loop.  Compress, adding to the code table as you go
	// until the table is full.
	while( nBytes > 0 )
	{
		character = (UINT)(*source++);
		nBytes--;

		// See whether the current string is in the table.
		// If so, get the code value.  If not, try to add it
		// to the table.
		index = find_match( string_code, character );
		if( code_value[index] != 0xFFFFFFFF )
		{
			string_code = code_value[index];
		}
		else 
		{
			if( next_code <= MAX_CODE )
			{
				code_value[index] = next_code++;
				prefix_code[index] = string_code;
				append_character[index] = character;
			}
			byteCount += output_code( &destination, string_code, output_bit_count, output_bit_buffer );
			string_code = character;
		}
	}

	// Deliver the last code and the end-of-buffer code, and flush the buffer.
	byteCount += output_code( &destination, string_code, output_bit_count, output_bit_buffer );
	byteCount += output_code( &destination, MAX_VALUE, output_bit_count, output_bit_buffer );
	byteCount += output_code( &destination, 0, output_bit_count, output_bit_buffer );
	
	// Clean up.
	delete [] code_value;
	delete [] prefix_code;
	delete [] append_character;

	return( byteCount );
}

//  This is the expansion routine.  It takes an LZW format buffer, and expands
//  it to an output buffer.  Returns the number of bytes in the output.

UCHAR decode_stack[4000];

int expand( UCHAR *source, UCHAR *destination )
{
	UINT next_code;
	UINT new_code;
	UINT old_code;
	UINT character;
	UCHAR *str;
	
	int byteCount = 0;

	// Allocate buffers.
	code_value = new UINT[ TABLE_SIZE*sizeof(UINT) ];
	prefix_code = new UINT[ TABLE_SIZE*sizeof(UINT) ];
	append_character = new UCHAR[ TABLE_SIZE*sizeof(UCHAR) ];

	if( code_value==NULL || prefix_code==NULL || append_character==NULL )
	{
		return( 0 );
	}

	// Initialize code table.
	next_code = 256;

	// Initialize input.
	UINT input_bit_count = 0;
	ULONG input_bit_buffer = 0L;

	// Read the first code, initialize the character, and send
	// the first code to the destination buffer.

	old_code = input_code( &source, input_bit_count, input_bit_buffer );
	character = old_code;
	*destination++ = old_code;
	byteCount++;


	// This is the main expansion loop.  It reads in characters from the LZW file
	// until it sees the special code used to indicate the end of the data.
	while( (new_code=input_code(&source,input_bit_count,input_bit_buffer)) != (MAX_VALUE) )
	{
		// This code checks for the special STRING+CHARACTER+STRING+CHARACTER+STRING
		// case which generates an undefined code.  It handles it by decoding
		// the last code, and adding a single character to the end of the decode string.
		if( new_code >= next_code )
		{
			*decode_stack = character;
			str = decode_string( decode_stack+1, old_code );
		}
		/*
		** Otherwise we do a straight decode of the new code.
		*/
		else
		{
			str = decode_string(decode_stack,new_code);
		}
		
		// Now we output the decoded string in reverse order.
		character = *str;
		while( str >= decode_stack )
		{
			*destination++ = *str--;
			byteCount++;
		}

		// Finally, if possible, add a new code to the string table.
		if( next_code <= MAX_CODE )
		{
			prefix_code[next_code] = old_code;
			append_character[next_code] = character;
			next_code++;
		}
		old_code = new_code;
	}
	
	// Clean up.
	delete [] code_value;
	delete [] prefix_code;
	delete [] append_character;

	return( byteCount );
}


// This is the hashing routine.  It tries to find a match for the prefix+char
// string in the string table.  If it finds it, the index is returned.  If
// the string is not found, the first available index in the string table is
// returned instead.

UINT find_match( UINT hash_prefix, UINT hash_character)
{
	int index;
	UINT offset;

	index = (hash_character << HASHING_SHIFT) ^ hash_prefix;
	if( index == 0 )
		offset = 1;
	else
		offset = TABLE_SIZE - index;

	while( 1 )
	{
		if( code_value[index] == 0xFFFFFFFF )
			return( index );

		if( prefix_code[index] == hash_prefix  &&  append_character[index] == hash_character )
			return( index );

		index -= offset;
		if( index < 0 )
			index += TABLE_SIZE;
	}
}


// This routine simply decodes a string from the string table, storing
// it in a buffer.  The buffer can then be output in reverse order by
// the expansion program.

UCHAR *decode_string( UCHAR *buffer, UINT code )
{
	while( code > 255 )
	{
		*buffer++ = append_character[code];
		code = prefix_code[code];
	}
	*buffer = code;

	return( buffer );
}


// The following two routines are used with variable length
// codes.  They are written strictly for clarity, and are not
// particularyl efficient.

UINT input_code( UCHAR **source, UINT& input_bit_count, ULONG& input_bit_buffer )
{
	UINT return_value;
	
	while( input_bit_count <= 24 )
	{
		input_bit_buffer |= ( (ULONG)(**source) << (24-input_bit_count) );
		(*source)++;
		input_bit_count += 8;
	}
	return_value = input_bit_buffer >> (32-BITS);
	input_bit_buffer <<= BITS;
	input_bit_count -= BITS;
	return( return_value );
}


///////////////////////////////////////////////////////////
// This is the compression routine.  Returns the number
// of bytes sent to the destination buffer.
///////////////////////////////////////////////////////////

int output_code( UCHAR **destination, UINT code, UINT& output_bit_count, ULONG& output_bit_buffer )
{
	int outputByteCount = 0;

	output_bit_buffer |= (ULONG)(code) << (32-BITS-output_bit_count);
	output_bit_count += BITS;
	while( output_bit_count >= 8 )
	{
		**destination = ( (UCHAR)(output_bit_buffer >> 24) );
		(*destination)++;
		outputByteCount++;
		output_bit_buffer <<= 8;
		output_bit_count -= 8;
	}

	return( outputByteCount );
}