jchuff.c - external/github.com/libjpeg-turbo/libjpeg-turbo - Git at Google

 /*
  * jchuff.c
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * Lossless JPEG Modifications:
  * Copyright (C) 1999, Ken Murchison.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains Huffman entropy decoding routines which are shared
  * by the sequential, progressive and lossless decoders.
  */

 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jchuff.h"		/* Declarations shared with jc*huff.c */


 /*
  * Compute the derived values for a Huffman table.
  * This routine also performs some validation checks on the table.
  */

 GLOBAL(void)
 jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
 			 c_derived_tbl ** pdtbl)
 {
   JHUFF_TBL *htbl;
   c_derived_tbl *dtbl;
   int p, i, l, lastp, si, maxsymbol;
   char huffsize[257];
   unsigned int huffcode[257];
   unsigned int code;

   /* Note that huffsize[] and huffcode[] are filled in code-length order,
    * paralleling the order of the symbols themselves in htbl->huffval[].
    */

   /* Find the input Huffman table */
   if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
     ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
   htbl =
     isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
   if (htbl == NULL)
     ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);

   /* Allocate a workspace if we haven't already done so. */
   if (*pdtbl == NULL)
     *pdtbl = (c_derived_tbl *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				  SIZEOF(c_derived_tbl));
   dtbl = *pdtbl;

   /* Figure C.1: make table of Huffman code length for each symbol */

   p = 0;
   for (l = 1; l <= 16; l++) {
     i = (int) htbl->bits[l];
     if (i < 0 || p + i > 256)	/* protect against table overrun */
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     while (i--)
       huffsize[p++] = (char) l;
   }
   huffsize[p] = 0;
   lastp = p;

   /* Figure C.2: generate the codes themselves */
   /* We also validate that the counts represent a legal Huffman code tree. */

   code = 0;
   si = huffsize[0];
   p = 0;
   while (huffsize[p]) {
     while (((int) huffsize[p]) == si) {
       huffcode[p++] = code;
       code++;
     }
     /* code is now 1 more than the last code used for codelength si; but
      * it must still fit in si bits, since no code is allowed to be all ones.
      */
     if (((INT32) code) >= (((INT32) 1) << si))
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     code <<= 1;
     si++;
   }

   /* Figure C.3: generate encoding tables */
   /* These are code and size indexed by symbol value */

   /* Set all codeless symbols to have code length 0;
    * this lets us detect duplicate VAL entries here, and later
    * allows emit_bits to detect any attempt to emit such symbols.
    */
   MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));

   /* This is also a convenient place to check for out-of-range
    * and duplicated VAL entries.  We allow 0..255 for AC symbols
    * but only 0..16 for DC.  (We could constrain them further
    * based on data depth and mode, but this seems enough.)
    */
   maxsymbol = isDC ? 16 : 255;

   for (p = 0; p < lastp; p++) {
     i = htbl->huffval[p];
     if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     dtbl->ehufco[i] = huffcode[p];
     dtbl->ehufsi[i] = huffsize[p];
   }
 }


 /*
  * Generate the best Huffman code table for the given counts, fill htbl.
  *
  * The JPEG standard requires that no symbol be assigned a codeword of all
  * one bits (so that padding bits added at the end of a compressed segment
  * can't look like a valid code).  Because of the canonical ordering of
  * codewords, this just means that there must be an unused slot in the
  * longest codeword length category.  Section K.2 of the JPEG spec suggests
  * reserving such a slot by pretending that symbol 256 is a valid symbol
  * with count 1.  In theory that's not optimal; giving it count zero but
  * including it in the symbol set anyway should give a better Huffman code.
  * But the theoretically better code actually seems to come out worse in
  * practice, because it produces more all-ones bytes (which incur stuffed
  * zero bytes in the final file).  In any case the difference is tiny.
  *
  * The JPEG standard requires Huffman codes to be no more than 16 bits long.
  * If some symbols have a very small but nonzero probability, the Huffman tree
  * must be adjusted to meet the code length restriction.  We currently use
  * the adjustment method suggested in JPEG section K.2.  This method is *not*
  * optimal; it may not choose the best possible limited-length code.  But
  * typically only very-low-frequency symbols will be given less-than-optimal
  * lengths, so the code is almost optimal.  Experimental comparisons against
  * an optimal limited-length-code algorithm indicate that the difference is
  * microscopic --- usually less than a hundredth of a percent of total size.
  * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
  */

 GLOBAL(void)
 jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
 {
 #define MAX_CLEN 32		/* assumed maximum initial code length */
   UINT8 bits[MAX_CLEN+1];	/* bits[k] = # of symbols with code length k */
   int codesize[257];		/* codesize[k] = code length of symbol k */
   int others[257];		/* next symbol in current branch of tree */
   int c1, c2;
   int p, i, j;
   long v;

   /* This algorithm is explained in section K.2 of the JPEG standard */

   MEMZERO(bits, SIZEOF(bits));
   MEMZERO(codesize, SIZEOF(codesize));
   for (i = 0; i < 257; i++)
     others[i] = -1;		/* init links to empty */

   freq[256] = 1;		/* make sure 256 has a nonzero count */
   /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
    * that no real symbol is given code-value of all ones, because 256
    * will be placed last in the largest codeword category.
    */

   /* Huffman's basic algorithm to assign optimal code lengths to symbols */

   for (;;) {
     /* Find the smallest nonzero frequency, set c1 = its symbol */
     /* In case of ties, take the larger symbol number */
     c1 = -1;
     v = 1000000000L;
     for (i = 0; i <= 256; i++) {
       if (freq[i] && freq[i] <= v) {
 	v = freq[i];
 	c1 = i;
       }
     }

     /* Find the next smallest nonzero frequency, set c2 = its symbol */
     /* In case of ties, take the larger symbol number */
     c2 = -1;
     v = 1000000000L;
     for (i = 0; i <= 256; i++) {
       if (freq[i] && freq[i] <= v && i != c1) {
 	v = freq[i];
 	c2 = i;
       }
     }

     /* Done if we've merged everything into one frequency */
     if (c2 < 0)
       break;

     /* Else merge the two counts/trees */
     freq[c1] += freq[c2];
     freq[c2] = 0;

     /* Increment the codesize of everything in c1's tree branch */
     codesize[c1]++;
     while (others[c1] >= 0) {
       c1 = others[c1];
       codesize[c1]++;
     }

     others[c1] = c2;		/* chain c2 onto c1's tree branch */

     /* Increment the codesize of everything in c2's tree branch */
     codesize[c2]++;
     while (others[c2] >= 0) {
       c2 = others[c2];
       codesize[c2]++;
     }
   }

   /* Now count the number of symbols of each code length */
   for (i = 0; i <= 256; i++) {
     if (codesize[i]) {
       /* The JPEG standard seems to think that this can't happen, */
       /* but I'm paranoid... */
       if (codesize[i] > MAX_CLEN)
 	ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);

       bits[codesize[i]]++;
     }
   }

   /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
    * Huffman procedure assigned any such lengths, we must adjust the coding.
    * Here is what the JPEG spec says about how this next bit works:
    * Since symbols are paired for the longest Huffman code, the symbols are
    * removed from this length category two at a time.  The prefix for the pair
    * (which is one bit shorter) is allocated to one of the pair; then,
    * skipping the BITS entry for that prefix length, a code word from the next
    * shortest nonzero BITS entry is converted into a prefix for two code words
    * one bit longer.
    */

   for (i = MAX_CLEN; i > 16; i--) {
     while (bits[i] > 0) {
       j = i - 2;		/* find length of new prefix to be used */
       while (bits[j] == 0)
 	j--;

       bits[i] -= 2;		/* remove two symbols */
       bits[i-1]++;		/* one goes in this length */
       bits[j+1] += 2;		/* two new symbols in this length */
       bits[j]--;		/* symbol of this length is now a prefix */
     }
   }

   /* Remove the count for the pseudo-symbol 256 from the largest codelength */
   while (bits[i] == 0)		/* find largest codelength still in use */
     i--;
   bits[i]--;

   /* Return final symbol counts (only for lengths 0..16) */
   MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));

   /* Return a list of the symbols sorted by code length */
   /* It's not real clear to me why we don't need to consider the codelength
    * changes made above, but the JPEG spec seems to think this works.
    */
   p = 0;
   for (i = 1; i <= MAX_CLEN; i++) {
     for (j = 0; j <= 255; j++) {
       if (codesize[j] == i) {
 	htbl->huffval[p] = (UINT8) j;
 	p++;
       }
     }
   }

   /* Set sent_table FALSE so updated table will be written to JPEG file. */
   htbl->sent_table = FALSE;
 }
	/*
	* jchuff.c
	*
	* This file was part of the Independent JPEG Group's software:
	* Copyright (C) 1991-1998, Thomas G. Lane.
	* Lossless JPEG Modifications:
	* Copyright (C) 1999, Ken Murchison.
	* For conditions of distribution and use, see the accompanying README file.
	*
	* This file contains Huffman entropy decoding routines which are shared
	* by the sequential, progressive and lossless decoders.
	*/

	#define JPEG_INTERNALS
	#include "jinclude.h"
	#include "jpeglib.h"
	#include "jchuff.h" /* Declarations shared with jchuff.c /


	/*
	* Compute the derived values for a Huffman table.
	* This routine also performs some validation checks on the table.
	*/

	GLOBAL(void)
	jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
	c_derived_tbl ** pdtbl)
	{
	JHUFF_TBL *htbl;
	c_derived_tbl *dtbl;
	int p, i, l, lastp, si, maxsymbol;
	char huffsize[257];
	unsigned int huffcode[257];
	unsigned int code;

	/* Note that huffsize[] and huffcode[] are filled in code-length order,
	* paralleling the order of the symbols themselves in htbl->huffval[].
	*/

	/* Find the input Huffman table */
	if (tblno < 0 \|\| tblno >= NUM_HUFF_TBLS)
	ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
	htbl =
	isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
	if (htbl == NULL)
	ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);

	/* Allocate a workspace if we haven't already done so. */
	if (*pdtbl == NULL)
	pdtbl = (c_derived_tbl )
	(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
	SIZEOF(c_derived_tbl));
	dtbl = *pdtbl;

	/* Figure C.1: make table of Huffman code length for each symbol */

	p = 0;
	for (l = 1; l <= 16; l++) {
	i = (int) htbl->bits[l];
	if (i < 0 \|\| p + i > 256) /* protect against table overrun */
	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
	while (i--)
	huffsize[p++] = (char) l;
	}
	huffsize[p] = 0;
	lastp = p;

	/* Figure C.2: generate the codes themselves */
	/* We also validate that the counts represent a legal Huffman code tree. */

	code = 0;
	si = huffsize[0];
	p = 0;
	while (huffsize[p]) {
	while (((int) huffsize[p]) == si) {
	huffcode[p++] = code;
	code++;
	}
	/* code is now 1 more than the last code used for codelength si; but
	* it must still fit in si bits, since no code is allowed to be all ones.
	*/
	if (((INT32) code) >= (((INT32) 1) << si))
	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
	code <<= 1;
	si++;
	}

	/* Figure C.3: generate encoding tables */
	/* These are code and size indexed by symbol value */

	/* Set all codeless symbols to have code length 0;
	* this lets us detect duplicate VAL entries here, and later
	* allows emit_bits to detect any attempt to emit such symbols.
	*/
	MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));

	/* This is also a convenient place to check for out-of-range
	* and duplicated VAL entries. We allow 0..255 for AC symbols
	* but only 0..16 for DC. (We could constrain them further
	* based on data depth and mode, but this seems enough.)
	*/
	maxsymbol = isDC ? 16 : 255;

	for (p = 0; p < lastp; p++) {
	i = htbl->huffval[p];
	if (i < 0 \|\| i > maxsymbol \|\| dtbl->ehufsi[i])
	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
	dtbl->ehufco[i] = huffcode[p];
	dtbl->ehufsi[i] = huffsize[p];
	}
	}


	/*
	* Generate the best Huffman code table for the given counts, fill htbl.
	*
	* The JPEG standard requires that no symbol be assigned a codeword of all
	* one bits (so that padding bits added at the end of a compressed segment
	* can't look like a valid code). Because of the canonical ordering of
	* codewords, this just means that there must be an unused slot in the
	* longest codeword length category. Section K.2 of the JPEG spec suggests
	* reserving such a slot by pretending that symbol 256 is a valid symbol
	* with count 1. In theory that's not optimal; giving it count zero but
	* including it in the symbol set anyway should give a better Huffman code.
	* But the theoretically better code actually seems to come out worse in
	* practice, because it produces more all-ones bytes (which incur stuffed
	* zero bytes in the final file). In any case the difference is tiny.
	*
	* The JPEG standard requires Huffman codes to be no more than 16 bits long.
	* If some symbols have a very small but nonzero probability, the Huffman tree
	* must be adjusted to meet the code length restriction. We currently use
	* the adjustment method suggested in JPEG section K.2. This method is not
	* optimal; it may not choose the best possible limited-length code. But
	* typically only very-low-frequency symbols will be given less-than-optimal
	* lengths, so the code is almost optimal. Experimental comparisons against
	* an optimal limited-length-code algorithm indicate that the difference is
	* microscopic --- usually less than a hundredth of a percent of total size.
	* So the extra complexity of an optimal algorithm doesn't seem worthwhile.
	*/

	GLOBAL(void)
	jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
	{
	#define MAX_CLEN 32 /* assumed maximum initial code length */
	UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */
	int codesize[257]; /* codesize[k] = code length of symbol k */
	int others[257]; /* next symbol in current branch of tree */
	int c1, c2;
	int p, i, j;
	long v;

	/* This algorithm is explained in section K.2 of the JPEG standard */

	MEMZERO(bits, SIZEOF(bits));
	MEMZERO(codesize, SIZEOF(codesize));
	for (i = 0; i < 257; i++)
	others[i] = -1; /* init links to empty */

	freq[256] = 1; /* make sure 256 has a nonzero count */
	/* Including the pseudo-symbol 256 in the Huffman procedure guarantees
	* that no real symbol is given code-value of all ones, because 256
	* will be placed last in the largest codeword category.
	*/

	/* Huffman's basic algorithm to assign optimal code lengths to symbols */

	for (;;) {
	/* Find the smallest nonzero frequency, set c1 = its symbol */
	/* In case of ties, take the larger symbol number */
	c1 = -1;
	v = 1000000000L;
	for (i = 0; i <= 256; i++) {
	if (freq[i] && freq[i] <= v) {
	v = freq[i];
	c1 = i;
	}
	}

	/* Find the next smallest nonzero frequency, set c2 = its symbol */
	/* In case of ties, take the larger symbol number */
	c2 = -1;
	v = 1000000000L;
	for (i = 0; i <= 256; i++) {
	if (freq[i] && freq[i] <= v && i != c1) {
	v = freq[i];
	c2 = i;
	}
	}

	/* Done if we've merged everything into one frequency */
	if (c2 < 0)
	break;

	/* Else merge the two counts/trees */
	freq[c1] += freq[c2];
	freq[c2] = 0;

	/* Increment the codesize of everything in c1's tree branch */
	codesize[c1]++;
	while (others[c1] >= 0) {
	c1 = others[c1];
	codesize[c1]++;
	}

	others[c1] = c2; /* chain c2 onto c1's tree branch */

	/* Increment the codesize of everything in c2's tree branch */
	codesize[c2]++;
	while (others[c2] >= 0) {
	c2 = others[c2];
	codesize[c2]++;
	}
	}

	/* Now count the number of symbols of each code length */
	for (i = 0; i <= 256; i++) {
	if (codesize[i]) {
	/* The JPEG standard seems to think that this can't happen, */
	/* but I'm paranoid... */
	if (codesize[i] > MAX_CLEN)
	ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);

	bits[codesize[i]]++;
	}
	}

	/* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
	* Huffman procedure assigned any such lengths, we must adjust the coding.
	* Here is what the JPEG spec says about how this next bit works:
	* Since symbols are paired for the longest Huffman code, the symbols are
	* removed from this length category two at a time. The prefix for the pair
	* (which is one bit shorter) is allocated to one of the pair; then,
	* skipping the BITS entry for that prefix length, a code word from the next
	* shortest nonzero BITS entry is converted into a prefix for two code words
	* one bit longer.
	*/

	for (i = MAX_CLEN; i > 16; i--) {
	while (bits[i] > 0) {
	j = i - 2; /* find length of new prefix to be used */
	while (bits[j] == 0)
	j--;

	bits[i] -= 2; /* remove two symbols */
	bits[i-1]++; /* one goes in this length */
	bits[j+1] += 2; /* two new symbols in this length */
	bits[j]--; /* symbol of this length is now a prefix */
	}
	}

	/* Remove the count for the pseudo-symbol 256 from the largest codelength */
	while (bits[i] == 0) /* find largest codelength still in use */
	i--;
	bits[i]--;

	/* Return final symbol counts (only for lengths 0..16) */
	MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));

	/* Return a list of the symbols sorted by code length */
	/* It's not real clear to me why we don't need to consider the codelength
	* changes made above, but the JPEG spec seems to think this works.
	*/
	p = 0;
	for (i = 1; i <= MAX_CLEN; i++) {
	for (j = 0; j <= 255; j++) {
	if (codesize[j] == i) {
	htbl->huffval[p] = (UINT8) j;
	p++;
	}
	}
	}

	/* Set sent_table FALSE so updated table will be written to JPEG file. */
	htbl->sent_table = FALSE;
	}