function [recovered_data] = prefix_decode( codebook, compressed_bits, the_length ) % prefix_decode uncompresses the compressed data using a given codebook. % [recovered_data] = prefix_decode( codebook, compressed_bits, the_length ) % The recovered_data will be a vector of length the_length. % % Usage: % codebook = [ ... % bitstring_to_index([1]),... % bitstring_to_index([0,1]),... % bitstring_to_index([0,0,1]),... % bitstring_to_index([0,0,0]) ]; % compressed_bits = [ 1 0 1 0 0 1 0 0 0 1 1 1 1 ]; % recovered_data = prefix_decode(codebook, compressed_bits, 8) % returns % recovered_data = [ 0 1 2 3 0 0 0 0 ] % . % % See also HUFFMAN_UNCOMPRESS, PREFIX_ENCODE. % Change log: % 1999-07-02:DAV: Modified to use lookup table. % Now it works much faster. % 1999-07-01:DAV: David Cary started. % Reserve space for output plaintext. recovered_data = zeros( 1, the_length ); % Use look-up tables to speed up decode. % First set up tables. recovered_index = zeros( 1, the_length ); chew_size = 12; % Set chew_size to larger values to decode faster. % Set chew_size to smaller values to use less RAM. % These are times for decoding the "cameraman" image % on David Cary's computer: %chew_size = 7; % bits % 69 s %chew_size = 8; % bits % 43 s %chew_size = 9; % bits % 31 s %chew_size = 10; % bits % 23 s %chew_size = 11; % bits % 22 s %chew_size = 12; % bits % 21 s % 0.2 s spent decoding long codes. % pad end of file with zero bits, % so I don't get an error on the last chew. compressed_bits( length(compressed_bits) + chew_size - 1 ) = 1; code_length = zeros(1, 2.^chew_size ); % code bit length code_value = zeros(1, 2.^chew_size ); % decoded symbol value % A "length" of 0 indicates a long code; % in that case fall back on brute-force lookup. [bitlength, value] = index_to_binary(codebook); for i = 1:length(codebook), if( codebook(i) < 1 ) % skip symbols that never occurred in the plaintext else, if( chew_size < bitlength(i) ) % code is too long to fit in the table, % so leave those entries zero. else, % left shift the code % so most significant bit is the leftmost of chew_size bits. % filling with zeros: min_entry = bitshift( value(i), chew_size-bitlength(i) ); % filling with ones: max_entry = bitshift( value(i)+1, chew_size-bitlength(i) )-1; % bitlength(i) == chew_size: get 1 entry; % bitlength(i) == (chew_size-1): get 2 entries; ... % ... 1 bit codes fill half the table with entries. code_length( 1+min_entry:1+max_entry ) = bitlength(i); code_value( 1+min_entry:1+max_entry ) = i-1; end; end; end; start_bit = 1; powers_of_2 = 2.^((chew_size-1):-1:0); chew_list = 0:(chew_size-1); longest_bit_length = max(bitlength(:)); % -------------------------- % Now that the look-up tables have been set up, % use that table to decode values. % for each symbol for current_symbol = 1:the_length, % bite off the next chew_size bits and convert them to a table offset. offset = 1 + powers_of_2 * compressed_bits( start_bit+chew_list ); if( code_length(offset) ), % Aha, it's a short code. % Do a quick table lookup. start_bit = start_bit + code_length(offset); recovered_index(current_symbol) = offset; else % It's long code. % Fallback on brute-force search. end_bit = start_bit + chew_size; % FIXME: % a much faster algorithm relies on the fact % that while the maximum length of a code may % be as large as (alphabetsize-1), % once you strip off the leading zeros, % there will at most be % ceil(log2(alphabetsize)) trailing bits. % So one could set up a lookup table for long codes, % something like % recovered_long_index( zerolength, remaining_bits ) % find last 0 if(1) % optional speedup bits = compressed_bits( start_bit:(start_bit+longest_bit_length-1) ); I = find(bits); if(isempty(I)), % ASSERT( least-frequent all-zeros symbol occurred ); % set end_bit to point to the last bit of this codeword end_bit = start_bit+longest_bit_length-1; else, % set end_bit to point to the first 1 bit % which *might* be the last bit of this codeword end_bit = max((start_bit+chew_size), (start_bit + I(1) - 1)); end; end; % Grab one bit. Is it in the codebook ? % no. Grab another bit. Is the pair in the codebook ? % ... until we find a match, then output the corresponding symbol. % continue finding code until no more bits left. I = []; end_bit = end_bit-1; while( isempty(I) ), end_bit = end_bit + 1; bits = compressed_bits( start_bit:end_bit ); index = bitstring_to_index(bits); I = find( index == codebook ); end; % Except for the "-1" ("unused") code, % every code in codebook should be unique. % ASSERT( 1 == length(I) ); recovered_data(current_symbol) = I(1) - 1; start_bit = end_bit+1; end; end; % I save a few seconds by doing this all at once. I = find( recovered_index ); recovered_data(I) = code_value(recovered_index(I)); % There should be no more "1" bits past the end; % merely zero padding. % N = length(compressed_bits); % ASSERT( isempty(find( compressed_bits( start_bit:N ) )) ); % ASSERT( N - start_bit + 1 < 8 ); % end prefix_decode.m