| /* |
| * jchuff.c |
| * |
| * This file was part of the Independent JPEG Group's software: |
| * Copyright (C) 1991-1998, Thomas G. Lane. |
| * Lossless JPEG Modifications: |
| * Copyright (C) 1999, Ken Murchison. |
| * For conditions of distribution and use, see the accompanying README file. |
| * |
| * This file contains Huffman entropy decoding routines which are shared |
| * by the sequential, progressive and lossless decoders. |
| */ |
| |
| #define JPEG_INTERNALS |
| #include "jinclude.h" |
| #include "jpeglib.h" |
| #include "jchuff.h" /* Declarations shared with jc*huff.c */ |
| |
| |
| /* |
| * Compute the derived values for a Huffman table. |
| * This routine also performs some validation checks on the table. |
| */ |
| |
| GLOBAL(void) |
| jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, |
| c_derived_tbl ** pdtbl) |
| { |
| JHUFF_TBL *htbl; |
| c_derived_tbl *dtbl; |
| int p, i, l, lastp, si, maxsymbol; |
| char huffsize[257]; |
| unsigned int huffcode[257]; |
| unsigned int code; |
| |
| /* Note that huffsize[] and huffcode[] are filled in code-length order, |
| * paralleling the order of the symbols themselves in htbl->huffval[]. |
| */ |
| |
| /* Find the input Huffman table */ |
| if (tblno < 0 || tblno >= NUM_HUFF_TBLS) |
| ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); |
| htbl = |
| isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno]; |
| if (htbl == NULL) |
| ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); |
| |
| /* Allocate a workspace if we haven't already done so. */ |
| if (*pdtbl == NULL) |
| *pdtbl = (c_derived_tbl *) |
| (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
| SIZEOF(c_derived_tbl)); |
| dtbl = *pdtbl; |
| |
| /* Figure C.1: make table of Huffman code length for each symbol */ |
| |
| p = 0; |
| for (l = 1; l <= 16; l++) { |
| i = (int) htbl->bits[l]; |
| if (i < 0 || p + i > 256) /* protect against table overrun */ |
| ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
| while (i--) |
| huffsize[p++] = (char) l; |
| } |
| huffsize[p] = 0; |
| lastp = p; |
| |
| /* Figure C.2: generate the codes themselves */ |
| /* We also validate that the counts represent a legal Huffman code tree. */ |
| |
| code = 0; |
| si = huffsize[0]; |
| p = 0; |
| while (huffsize[p]) { |
| while (((int) huffsize[p]) == si) { |
| huffcode[p++] = code; |
| code++; |
| } |
| /* code is now 1 more than the last code used for codelength si; but |
| * it must still fit in si bits, since no code is allowed to be all ones. |
| */ |
| if (((INT32) code) >= (((INT32) 1) << si)) |
| ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
| code <<= 1; |
| si++; |
| } |
| |
| /* Figure C.3: generate encoding tables */ |
| /* These are code and size indexed by symbol value */ |
| |
| /* Set all codeless symbols to have code length 0; |
| * this lets us detect duplicate VAL entries here, and later |
| * allows emit_bits to detect any attempt to emit such symbols. |
| */ |
| MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi)); |
| |
| /* This is also a convenient place to check for out-of-range |
| * and duplicated VAL entries. We allow 0..255 for AC symbols |
| * but only 0..16 for DC. (We could constrain them further |
| * based on data depth and mode, but this seems enough.) |
| */ |
| maxsymbol = isDC ? 16 : 255; |
| |
| for (p = 0; p < lastp; p++) { |
| i = htbl->huffval[p]; |
| if (i < 0 || i > maxsymbol || dtbl->ehufsi[i]) |
| ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
| dtbl->ehufco[i] = huffcode[p]; |
| dtbl->ehufsi[i] = huffsize[p]; |
| } |
| } |
| |
| |
| /* |
| * Generate the best Huffman code table for the given counts, fill htbl. |
| * |
| * The JPEG standard requires that no symbol be assigned a codeword of all |
| * one bits (so that padding bits added at the end of a compressed segment |
| * can't look like a valid code). Because of the canonical ordering of |
| * codewords, this just means that there must be an unused slot in the |
| * longest codeword length category. Section K.2 of the JPEG spec suggests |
| * reserving such a slot by pretending that symbol 256 is a valid symbol |
| * with count 1. In theory that's not optimal; giving it count zero but |
| * including it in the symbol set anyway should give a better Huffman code. |
| * But the theoretically better code actually seems to come out worse in |
| * practice, because it produces more all-ones bytes (which incur stuffed |
| * zero bytes in the final file). In any case the difference is tiny. |
| * |
| * The JPEG standard requires Huffman codes to be no more than 16 bits long. |
| * If some symbols have a very small but nonzero probability, the Huffman tree |
| * must be adjusted to meet the code length restriction. We currently use |
| * the adjustment method suggested in JPEG section K.2. This method is *not* |
| * optimal; it may not choose the best possible limited-length code. But |
| * typically only very-low-frequency symbols will be given less-than-optimal |
| * lengths, so the code is almost optimal. Experimental comparisons against |
| * an optimal limited-length-code algorithm indicate that the difference is |
| * microscopic --- usually less than a hundredth of a percent of total size. |
| * So the extra complexity of an optimal algorithm doesn't seem worthwhile. |
| */ |
| |
| GLOBAL(void) |
| jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]) |
| { |
| #define MAX_CLEN 32 /* assumed maximum initial code length */ |
| UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */ |
| int codesize[257]; /* codesize[k] = code length of symbol k */ |
| int others[257]; /* next symbol in current branch of tree */ |
| int c1, c2; |
| int p, i, j; |
| long v; |
| |
| /* This algorithm is explained in section K.2 of the JPEG standard */ |
| |
| MEMZERO(bits, SIZEOF(bits)); |
| MEMZERO(codesize, SIZEOF(codesize)); |
| for (i = 0; i < 257; i++) |
| others[i] = -1; /* init links to empty */ |
| |
| freq[256] = 1; /* make sure 256 has a nonzero count */ |
| /* Including the pseudo-symbol 256 in the Huffman procedure guarantees |
| * that no real symbol is given code-value of all ones, because 256 |
| * will be placed last in the largest codeword category. |
| */ |
| |
| /* Huffman's basic algorithm to assign optimal code lengths to symbols */ |
| |
| for (;;) { |
| /* Find the smallest nonzero frequency, set c1 = its symbol */ |
| /* In case of ties, take the larger symbol number */ |
| c1 = -1; |
| v = 1000000000L; |
| for (i = 0; i <= 256; i++) { |
| if (freq[i] && freq[i] <= v) { |
| v = freq[i]; |
| c1 = i; |
| } |
| } |
| |
| /* Find the next smallest nonzero frequency, set c2 = its symbol */ |
| /* In case of ties, take the larger symbol number */ |
| c2 = -1; |
| v = 1000000000L; |
| for (i = 0; i <= 256; i++) { |
| if (freq[i] && freq[i] <= v && i != c1) { |
| v = freq[i]; |
| c2 = i; |
| } |
| } |
| |
| /* Done if we've merged everything into one frequency */ |
| if (c2 < 0) |
| break; |
| |
| /* Else merge the two counts/trees */ |
| freq[c1] += freq[c2]; |
| freq[c2] = 0; |
| |
| /* Increment the codesize of everything in c1's tree branch */ |
| codesize[c1]++; |
| while (others[c1] >= 0) { |
| c1 = others[c1]; |
| codesize[c1]++; |
| } |
| |
| others[c1] = c2; /* chain c2 onto c1's tree branch */ |
| |
| /* Increment the codesize of everything in c2's tree branch */ |
| codesize[c2]++; |
| while (others[c2] >= 0) { |
| c2 = others[c2]; |
| codesize[c2]++; |
| } |
| } |
| |
| /* Now count the number of symbols of each code length */ |
| for (i = 0; i <= 256; i++) { |
| if (codesize[i]) { |
| /* The JPEG standard seems to think that this can't happen, */ |
| /* but I'm paranoid... */ |
| if (codesize[i] > MAX_CLEN) |
| ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW); |
| |
| bits[codesize[i]]++; |
| } |
| } |
| |
| /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure |
| * Huffman procedure assigned any such lengths, we must adjust the coding. |
| * Here is what the JPEG spec says about how this next bit works: |
| * Since symbols are paired for the longest Huffman code, the symbols are |
| * removed from this length category two at a time. The prefix for the pair |
| * (which is one bit shorter) is allocated to one of the pair; then, |
| * skipping the BITS entry for that prefix length, a code word from the next |
| * shortest nonzero BITS entry is converted into a prefix for two code words |
| * one bit longer. |
| */ |
| |
| for (i = MAX_CLEN; i > 16; i--) { |
| while (bits[i] > 0) { |
| j = i - 2; /* find length of new prefix to be used */ |
| while (bits[j] == 0) |
| j--; |
| |
| bits[i] -= 2; /* remove two symbols */ |
| bits[i-1]++; /* one goes in this length */ |
| bits[j+1] += 2; /* two new symbols in this length */ |
| bits[j]--; /* symbol of this length is now a prefix */ |
| } |
| } |
| |
| /* Remove the count for the pseudo-symbol 256 from the largest codelength */ |
| while (bits[i] == 0) /* find largest codelength still in use */ |
| i--; |
| bits[i]--; |
| |
| /* Return final symbol counts (only for lengths 0..16) */ |
| MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits)); |
| |
| /* Return a list of the symbols sorted by code length */ |
| /* It's not real clear to me why we don't need to consider the codelength |
| * changes made above, but the JPEG spec seems to think this works. |
| */ |
| p = 0; |
| for (i = 1; i <= MAX_CLEN; i++) { |
| for (j = 0; j <= 255; j++) { |
| if (codesize[j] == i) { |
| htbl->huffval[p] = (UINT8) j; |
| p++; |
| } |
| } |
| } |
| |
| /* Set sent_table FALSE so updated table will be written to JPEG file. */ |
| htbl->sent_table = FALSE; |
| } |