//////////////////////////////////////////////////////////////////////////// // // lzw.cpp // // 1/13/01 (Jeff Ondich) Started. Adapted from code published // by Mark R. Nelson in Dr. Dobbs' Journal, April 1989. // //////////////////////////////////////////////////////////////////////////// #include #include "lzw.h" static UINT *code_value; static UINT *prefix_code; static UCHAR *append_character; /////////////////////////////////////////////////////////// // This is the compression routine. Returns the number // of bytes in the compressed /////////////////////////////////////////////////////////// int compress( UCHAR *source, UCHAR *destination, UINT nBytes ) { UINT next_code; UINT character; UINT string_code; UINT index; UINT i; int byteCount = 0; UINT output_bit_count = 0; ULONG output_bit_buffer = 0L; // Allocate buffers. code_value = new UINT[ TABLE_SIZE*sizeof(UINT) ]; prefix_code = new UINT[ TABLE_SIZE*sizeof(UINT) ]; append_character = new UCHAR[ TABLE_SIZE*sizeof(UCHAR) ]; if( code_value==NULL || prefix_code==NULL || append_character==NULL ) { return( 0 ); } // Initialize the code table. next_code = 256; for( i=0; i < TABLE_SIZE; i++ ) code_value[i] = 0xFFFFFFFF; // Initialize string_code using the first byte // of the source data. string_code = (UINT)(*source++); nBytes--; // Main loop. Compress, adding to the code table as you go // until the table is full. while( nBytes > 0 ) { character = (UINT)(*source++); nBytes--; // See whether the current string is in the table. // If so, get the code value. If not, try to add it // to the table. index = find_match( string_code, character ); if( code_value[index] != 0xFFFFFFFF ) { string_code = code_value[index]; } else { if( next_code <= MAX_CODE ) { code_value[index] = next_code++; prefix_code[index] = string_code; append_character[index] = character; } byteCount += output_code( &destination, string_code, output_bit_count, output_bit_buffer ); string_code = character; } } // Deliver the last code and the end-of-buffer code, and flush the buffer. byteCount += output_code( &destination, string_code, output_bit_count, output_bit_buffer ); byteCount += output_code( &destination, MAX_VALUE, output_bit_count, output_bit_buffer ); byteCount += output_code( &destination, 0, output_bit_count, output_bit_buffer ); // Clean up. delete [] code_value; delete [] prefix_code; delete [] append_character; return( byteCount ); } // This is the expansion routine. It takes an LZW format buffer, and expands // it to an output buffer. Returns the number of bytes in the output. UCHAR decode_stack[4000]; int expand( UCHAR *source, UCHAR *destination ) { UINT next_code; UINT new_code; UINT old_code; UINT character; UCHAR *str; int byteCount = 0; // Allocate buffers. code_value = new UINT[ TABLE_SIZE*sizeof(UINT) ]; prefix_code = new UINT[ TABLE_SIZE*sizeof(UINT) ]; append_character = new UCHAR[ TABLE_SIZE*sizeof(UCHAR) ]; if( code_value==NULL || prefix_code==NULL || append_character==NULL ) { return( 0 ); } // Initialize code table. next_code = 256; // Initialize input. UINT input_bit_count = 0; ULONG input_bit_buffer = 0L; // Read the first code, initialize the character, and send // the first code to the destination buffer. old_code = input_code( &source, input_bit_count, input_bit_buffer ); character = old_code; *destination++ = old_code; byteCount++; // This is the main expansion loop. It reads in characters from the LZW file // until it sees the special code used to indicate the end of the data. while( (new_code=input_code(&source,input_bit_count,input_bit_buffer)) != (MAX_VALUE) ) { // This code checks for the special STRING+CHARACTER+STRING+CHARACTER+STRING // case which generates an undefined code. It handles it by decoding // the last code, and adding a single character to the end of the decode string. if( new_code >= next_code ) { *decode_stack = character; str = decode_string( decode_stack+1, old_code ); } /* ** Otherwise we do a straight decode of the new code. */ else { str = decode_string(decode_stack,new_code); } // Now we output the decoded string in reverse order. character = *str; while( str >= decode_stack ) { *destination++ = *str--; byteCount++; } // Finally, if possible, add a new code to the string table. if( next_code <= MAX_CODE ) { prefix_code[next_code] = old_code; append_character[next_code] = character; next_code++; } old_code = new_code; } // Clean up. delete [] code_value; delete [] prefix_code; delete [] append_character; return( byteCount ); } // This is the hashing routine. It tries to find a match for the prefix+char // string in the string table. If it finds it, the index is returned. If // the string is not found, the first available index in the string table is // returned instead. UINT find_match( UINT hash_prefix, UINT hash_character) { int index; UINT offset; index = (hash_character << HASHING_SHIFT) ^ hash_prefix; if( index == 0 ) offset = 1; else offset = TABLE_SIZE - index; while( 1 ) { if( code_value[index] == 0xFFFFFFFF ) return( index ); if( prefix_code[index] == hash_prefix && append_character[index] == hash_character ) return( index ); index -= offset; if( index < 0 ) index += TABLE_SIZE; } } // This routine simply decodes a string from the string table, storing // it in a buffer. The buffer can then be output in reverse order by // the expansion program. UCHAR *decode_string( UCHAR *buffer, UINT code ) { while( code > 255 ) { *buffer++ = append_character[code]; code = prefix_code[code]; } *buffer = code; return( buffer ); } // The following two routines are used with variable length // codes. They are written strictly for clarity, and are not // particularyl efficient. UINT input_code( UCHAR **source, UINT& input_bit_count, ULONG& input_bit_buffer ) { UINT return_value; while( input_bit_count <= 24 ) { input_bit_buffer |= ( (ULONG)(**source) << (24-input_bit_count) ); (*source)++; input_bit_count += 8; } return_value = input_bit_buffer >> (32-BITS); input_bit_buffer <<= BITS; input_bit_count -= BITS; return( return_value ); } /////////////////////////////////////////////////////////// // This is the compression routine. Returns the number // of bytes sent to the destination buffer. /////////////////////////////////////////////////////////// int output_code( UCHAR **destination, UINT code, UINT& output_bit_count, ULONG& output_bit_buffer ) { int outputByteCount = 0; output_bit_buffer |= (ULONG)(code) << (32-BITS-output_bit_count); output_bit_count += BITS; while( output_bit_count >= 8 ) { **destination = ( (UCHAR)(output_bit_buffer >> 24) ); (*destination)++; outputByteCount++; output_bit_buffer <<= 8; output_bit_count -= 8; } return( outputByteCount ); }