| /* |
| * jcphuff.c |
| * |
| * This file was part of the Independent JPEG Group's software: |
| * Copyright (C) 1995-1997, Thomas G. Lane. |
| * libjpeg-turbo Modifications: |
| * Copyright (C) 2011, 2015, 2018, D. R. Commander. |
| * Copyright (C) 2016, 2018, Matthieu Darbois. |
| * For conditions of distribution and use, see the accompanying README.ijg |
| * file. |
| * |
| * This file contains Huffman entropy encoding routines for progressive JPEG. |
| * |
| * We do not support output suspension in this module, since the library |
| * currently does not allow multiple-scan files to be written with output |
| * suspension. |
| */ |
| |
| #define JPEG_INTERNALS |
| #include "jinclude.h" |
| #include "jpeglib.h" |
| #include "jsimd.h" |
| #include "jconfigint.h" |
| #include <limits.h> |
| |
| #ifdef HAVE_INTRIN_H |
| #include <intrin.h> |
| #ifdef _MSC_VER |
| #ifdef HAVE_BITSCANFORWARD64 |
| #pragma intrinsic(_BitScanForward64) |
| #endif |
| #ifdef HAVE_BITSCANFORWARD |
| #pragma intrinsic(_BitScanForward) |
| #endif |
| #endif |
| #endif |
| |
| #ifdef C_PROGRESSIVE_SUPPORTED |
| |
| /* |
| * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be |
| * used for bit counting rather than the lookup table. This will reduce the |
| * memory footprint by 64k, which is important for some mobile applications |
| * that create many isolated instances of libjpeg-turbo (web browsers, for |
| * instance.) This may improve performance on some mobile platforms as well. |
| * This feature is enabled by default only on Arm processors, because some x86 |
| * chips have a slow implementation of bsr, and the use of clz/bsr cannot be |
| * shown to have a significant performance impact even on the x86 chips that |
| * have a fast implementation of it. When building for Armv6, you can |
| * explicitly disable the use of clz/bsr by adding -mthumb to the compiler |
| * flags (this defines __thumb__). |
| */ |
| |
| /* NOTE: Both GCC and Clang define __GNUC__ */ |
| #if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__)) |
| #if !defined(__thumb__) || defined(__thumb2__) |
| #define USE_CLZ_INTRINSIC |
| #endif |
| #endif |
| |
| #ifdef USE_CLZ_INTRINSIC |
| #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) |
| #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) |
| #else |
| #include "jpeg_nbits_table.h" |
| #define JPEG_NBITS(x) (jpeg_nbits_table[x]) |
| #define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) |
| #endif |
| |
| |
| /* Expanded entropy encoder object for progressive Huffman encoding. */ |
| |
| typedef struct { |
| struct jpeg_entropy_encoder pub; /* public fields */ |
| |
| /* Pointer to routine to prepare data for encode_mcu_AC_first() */ |
| void (*AC_first_prepare) (const JCOEF *block, |
| const int *jpeg_natural_order_start, int Sl, |
| int Al, JCOEF *values, size_t *zerobits); |
| /* Pointer to routine to prepare data for encode_mcu_AC_refine() */ |
| int (*AC_refine_prepare) (const JCOEF *block, |
| const int *jpeg_natural_order_start, int Sl, |
| int Al, JCOEF *absvalues, size_t *bits); |
| |
| /* Mode flag: TRUE for optimization, FALSE for actual data output */ |
| boolean gather_statistics; |
| |
| /* Bit-level coding status. |
| * next_output_byte/free_in_buffer are local copies of cinfo->dest fields. |
| */ |
| JOCTET *next_output_byte; /* => next byte to write in buffer */ |
| size_t free_in_buffer; /* # of byte spaces remaining in buffer */ |
| size_t put_buffer; /* current bit-accumulation buffer */ |
| int put_bits; /* # of bits now in it */ |
| j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */ |
| |
| /* Coding status for DC components */ |
| int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ |
| |
| /* Coding status for AC components */ |
| int ac_tbl_no; /* the table number of the single component */ |
| unsigned int EOBRUN; /* run length of EOBs */ |
| unsigned int BE; /* # of buffered correction bits before MCU */ |
| char *bit_buffer; /* buffer for correction bits (1 per char) */ |
| /* packing correction bits tightly would save some space but cost time... */ |
| |
| unsigned int restarts_to_go; /* MCUs left in this restart interval */ |
| int next_restart_num; /* next restart number to write (0-7) */ |
| |
| /* Pointers to derived tables (these workspaces have image lifespan). |
| * Since any one scan codes only DC or only AC, we only need one set |
| * of tables, not one for DC and one for AC. |
| */ |
| c_derived_tbl *derived_tbls[NUM_HUFF_TBLS]; |
| |
| /* Statistics tables for optimization; again, one set is enough */ |
| long *count_ptrs[NUM_HUFF_TBLS]; |
| } phuff_entropy_encoder; |
| |
| typedef phuff_entropy_encoder *phuff_entropy_ptr; |
| |
| /* MAX_CORR_BITS is the number of bits the AC refinement correction-bit |
| * buffer can hold. Larger sizes may slightly improve compression, but |
| * 1000 is already well into the realm of overkill. |
| * The minimum safe size is 64 bits. |
| */ |
| |
| #define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */ |
| |
| /* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG. |
| * We assume that int right shift is unsigned if JLONG right shift is, |
| * which should be safe. |
| */ |
| |
| #ifdef RIGHT_SHIFT_IS_UNSIGNED |
| #define ISHIFT_TEMPS int ishift_temp; |
| #define IRIGHT_SHIFT(x, shft) \ |
| ((ishift_temp = (x)) < 0 ? \ |
| (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \ |
| (ishift_temp >> (shft))) |
| #else |
| #define ISHIFT_TEMPS |
| #define IRIGHT_SHIFT(x, shft) ((x) >> (shft)) |
| #endif |
| |
| #define PAD(v, p) ((v + (p) - 1) & (~((p) - 1))) |
| |
| /* Forward declarations */ |
| METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo, |
| JBLOCKROW *MCU_data); |
| METHODDEF(void) encode_mcu_AC_first_prepare |
| (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, |
| JCOEF *values, size_t *zerobits); |
| METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo, |
| JBLOCKROW *MCU_data); |
| METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo, |
| JBLOCKROW *MCU_data); |
| METHODDEF(int) encode_mcu_AC_refine_prepare |
| (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, |
| JCOEF *absvalues, size_t *bits); |
| METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo, |
| JBLOCKROW *MCU_data); |
| METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo); |
| METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo); |
| |
| |
| /* Count bit loop zeroes */ |
| INLINE |
| METHODDEF(int) |
| count_zeroes(size_t *x) |
| { |
| int result; |
| #if defined(HAVE_BUILTIN_CTZL) |
| result = __builtin_ctzl(*x); |
| *x >>= result; |
| #elif defined(HAVE_BITSCANFORWARD64) |
| _BitScanForward64(&result, *x); |
| *x >>= result; |
| #elif defined(HAVE_BITSCANFORWARD) |
| _BitScanForward(&result, *x); |
| *x >>= result; |
| #else |
| result = 0; |
| while ((*x & 1) == 0) { |
| ++result; |
| *x >>= 1; |
| } |
| #endif |
| return result; |
| } |
| |
| |
| /* |
| * Initialize for a Huffman-compressed scan using progressive JPEG. |
| */ |
| |
| METHODDEF(void) |
| start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics) |
| { |
| phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
| boolean is_DC_band; |
| int ci, tbl; |
| jpeg_component_info *compptr; |
| |
| entropy->cinfo = cinfo; |
| entropy->gather_statistics = gather_statistics; |
| |
| is_DC_band = (cinfo->Ss == 0); |
| |
| /* We assume jcmaster.c already validated the scan parameters. */ |
| |
| /* Select execution routines */ |
| if (cinfo->Ah == 0) { |
| if (is_DC_band) |
| entropy->pub.encode_mcu = encode_mcu_DC_first; |
| else |
| entropy->pub.encode_mcu = encode_mcu_AC_first; |
| if (jsimd_can_encode_mcu_AC_first_prepare()) |
| entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare; |
| else |
| entropy->AC_first_prepare = encode_mcu_AC_first_prepare; |
| } else { |
| if (is_DC_band) |
| entropy->pub.encode_mcu = encode_mcu_DC_refine; |
| else { |
| entropy->pub.encode_mcu = encode_mcu_AC_refine; |
| if (jsimd_can_encode_mcu_AC_refine_prepare()) |
| entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare; |
| else |
| entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare; |
| /* AC refinement needs a correction bit buffer */ |
| if (entropy->bit_buffer == NULL) |
| entropy->bit_buffer = (char *) |
| (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, |
| MAX_CORR_BITS * sizeof(char)); |
| } |
| } |
| if (gather_statistics) |
| entropy->pub.finish_pass = finish_pass_gather_phuff; |
| else |
| entropy->pub.finish_pass = finish_pass_phuff; |
| |
| /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1 |
| * for AC coefficients. |
| */ |
| for (ci = 0; ci < cinfo->comps_in_scan; ci++) { |
| compptr = cinfo->cur_comp_info[ci]; |
| /* Initialize DC predictions to 0 */ |
| entropy->last_dc_val[ci] = 0; |
| /* Get table index */ |
| if (is_DC_band) { |
| if (cinfo->Ah != 0) /* DC refinement needs no table */ |
| continue; |
| tbl = compptr->dc_tbl_no; |
| } else { |
| entropy->ac_tbl_no = tbl = compptr->ac_tbl_no; |
| } |
| if (gather_statistics) { |
| /* Check for invalid table index */ |
| /* (make_c_derived_tbl does this in the other path) */ |
| if (tbl < 0 || tbl >= NUM_HUFF_TBLS) |
| ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl); |
| /* Allocate and zero the statistics tables */ |
| /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ |
| if (entropy->count_ptrs[tbl] == NULL) |
| entropy->count_ptrs[tbl] = (long *) |
| (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, |
| 257 * sizeof(long)); |
| MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long)); |
| } else { |
| /* Compute derived values for Huffman table */ |
| /* We may do this more than once for a table, but it's not expensive */ |
| jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl, |
| &entropy->derived_tbls[tbl]); |
| } |
| } |
| |
| /* Initialize AC stuff */ |
| entropy->EOBRUN = 0; |
| entropy->BE = 0; |
| |
| /* Initialize bit buffer to empty */ |
| entropy->put_buffer = 0; |
| entropy->put_bits = 0; |
| |
| /* Initialize restart stuff */ |
| entropy->restarts_to_go = cinfo->restart_interval; |
| entropy->next_restart_num = 0; |
| } |
| |
| |
| /* Outputting bytes to the file. |
| * NB: these must be called only when actually outputting, |
| * that is, entropy->gather_statistics == FALSE. |
| */ |
| |
| /* Emit a byte */ |
| #define emit_byte(entropy, val) { \ |
| *(entropy)->next_output_byte++ = (JOCTET)(val); \ |
| if (--(entropy)->free_in_buffer == 0) \ |
| dump_buffer(entropy); \ |
| } |
| |
| |
| LOCAL(void) |
| dump_buffer(phuff_entropy_ptr entropy) |
| /* Empty the output buffer; we do not support suspension in this module. */ |
| { |
| struct jpeg_destination_mgr *dest = entropy->cinfo->dest; |
| |
| if (!(*dest->empty_output_buffer) (entropy->cinfo)) |
| ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND); |
| /* After a successful buffer dump, must reset buffer pointers */ |
| entropy->next_output_byte = dest->next_output_byte; |
| entropy->free_in_buffer = dest->free_in_buffer; |
| } |
| |
| |
| /* Outputting bits to the file */ |
| |
| /* Only the right 24 bits of put_buffer are used; the valid bits are |
| * left-justified in this part. At most 16 bits can be passed to emit_bits |
| * in one call, and we never retain more than 7 bits in put_buffer |
| * between calls, so 24 bits are sufficient. |
| */ |
| |
| LOCAL(void) |
| emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size) |
| /* Emit some bits, unless we are in gather mode */ |
| { |
| /* This routine is heavily used, so it's worth coding tightly. */ |
| register size_t put_buffer = (size_t)code; |
| register int put_bits = entropy->put_bits; |
| |
| /* if size is 0, caller used an invalid Huffman table entry */ |
| if (size == 0) |
| ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); |
| |
| if (entropy->gather_statistics) |
| return; /* do nothing if we're only getting stats */ |
| |
| put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */ |
| |
| put_bits += size; /* new number of bits in buffer */ |
| |
| put_buffer <<= 24 - put_bits; /* align incoming bits */ |
| |
| put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */ |
| |
| while (put_bits >= 8) { |
| int c = (int)((put_buffer >> 16) & 0xFF); |
| |
| emit_byte(entropy, c); |
| if (c == 0xFF) { /* need to stuff a zero byte? */ |
| emit_byte(entropy, 0); |
| } |
| put_buffer <<= 8; |
| put_bits -= 8; |
| } |
| |
| entropy->put_buffer = put_buffer; /* update variables */ |
| entropy->put_bits = put_bits; |
| } |
| |
| |
| LOCAL(void) |
| flush_bits(phuff_entropy_ptr entropy) |
| { |
| emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */ |
| entropy->put_buffer = 0; /* and reset bit-buffer to empty */ |
| entropy->put_bits = 0; |
| } |
| |
| |
| /* |
| * Emit (or just count) a Huffman symbol. |
| */ |
| |
| LOCAL(void) |
| emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol) |
| { |
| if (entropy->gather_statistics) |
| entropy->count_ptrs[tbl_no][symbol]++; |
| else { |
| c_derived_tbl *tbl = entropy->derived_tbls[tbl_no]; |
| emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]); |
| } |
| } |
| |
| |
| /* |
| * Emit bits from a correction bit buffer. |
| */ |
| |
| LOCAL(void) |
| emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart, |
| unsigned int nbits) |
| { |
| if (entropy->gather_statistics) |
| return; /* no real work */ |
| |
| while (nbits > 0) { |
| emit_bits(entropy, (unsigned int)(*bufstart), 1); |
| bufstart++; |
| nbits--; |
| } |
| } |
| |
| |
| /* |
| * Emit any pending EOBRUN symbol. |
| */ |
| |
| LOCAL(void) |
| emit_eobrun(phuff_entropy_ptr entropy) |
| { |
| register int temp, nbits; |
| |
| if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */ |
| temp = entropy->EOBRUN; |
| nbits = JPEG_NBITS_NONZERO(temp) - 1; |
| /* safety check: shouldn't happen given limited correction-bit buffer */ |
| if (nbits > 14) |
| ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); |
| |
| emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4); |
| if (nbits) |
| emit_bits(entropy, entropy->EOBRUN, nbits); |
| |
| entropy->EOBRUN = 0; |
| |
| /* Emit any buffered correction bits */ |
| emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE); |
| entropy->BE = 0; |
| } |
| } |
| |
| |
| /* |
| * Emit a restart marker & resynchronize predictions. |
| */ |
| |
| LOCAL(void) |
| emit_restart(phuff_entropy_ptr entropy, int restart_num) |
| { |
| int ci; |
| |
| emit_eobrun(entropy); |
| |
| if (!entropy->gather_statistics) { |
| flush_bits(entropy); |
| emit_byte(entropy, 0xFF); |
| emit_byte(entropy, JPEG_RST0 + restart_num); |
| } |
| |
| if (entropy->cinfo->Ss == 0) { |
| /* Re-initialize DC predictions to 0 */ |
| for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++) |
| entropy->last_dc_val[ci] = 0; |
| } else { |
| /* Re-initialize all AC-related fields to 0 */ |
| entropy->EOBRUN = 0; |
| entropy->BE = 0; |
| } |
| } |
| |
| |
| /* |
| * MCU encoding for DC initial scan (either spectral selection, |
| * or first pass of successive approximation). |
| */ |
| |
| METHODDEF(boolean) |
| encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) |
| { |
| phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
| register int temp, temp2, temp3; |
| register int nbits; |
| int blkn, ci; |
| int Al = cinfo->Al; |
| JBLOCKROW block; |
| jpeg_component_info *compptr; |
| ISHIFT_TEMPS |
| |
| entropy->next_output_byte = cinfo->dest->next_output_byte; |
| entropy->free_in_buffer = cinfo->dest->free_in_buffer; |
| |
| /* Emit restart marker if needed */ |
| if (cinfo->restart_interval) |
| if (entropy->restarts_to_go == 0) |
| emit_restart(entropy, entropy->next_restart_num); |
| |
| /* Encode the MCU data blocks */ |
| for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { |
| block = MCU_data[blkn]; |
| ci = cinfo->MCU_membership[blkn]; |
| compptr = cinfo->cur_comp_info[ci]; |
| |
| /* Compute the DC value after the required point transform by Al. |
| * This is simply an arithmetic right shift. |
| */ |
| temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al); |
| |
| /* DC differences are figured on the point-transformed values. */ |
| temp = temp2 - entropy->last_dc_val[ci]; |
| entropy->last_dc_val[ci] = temp2; |
| |
| /* Encode the DC coefficient difference per section G.1.2.1 */ |
| |
| /* This is a well-known technique for obtaining the absolute value without |
| * a branch. It is derived from an assembly language technique presented |
| * in "How to Optimize for the Pentium Processors", Copyright (c) 1996, |
| * 1997 by Agner Fog. |
| */ |
| temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); |
| temp ^= temp3; |
| temp -= temp3; /* temp is abs value of input */ |
| /* For a negative input, want temp2 = bitwise complement of abs(input) */ |
| temp2 = temp ^ temp3; |
| |
| /* Find the number of bits needed for the magnitude of the coefficient */ |
| nbits = JPEG_NBITS(temp); |
| /* Check for out-of-range coefficient values. |
| * Since we're encoding a difference, the range limit is twice as much. |
| */ |
| if (nbits > MAX_COEF_BITS + 1) |
| ERREXIT(cinfo, JERR_BAD_DCT_COEF); |
| |
| /* Count/emit the Huffman-coded symbol for the number of bits */ |
| emit_symbol(entropy, compptr->dc_tbl_no, nbits); |
| |
| /* Emit that number of bits of the value, if positive, */ |
| /* or the complement of its magnitude, if negative. */ |
| if (nbits) /* emit_bits rejects calls with size 0 */ |
| emit_bits(entropy, (unsigned int)temp2, nbits); |
| } |
| |
| cinfo->dest->next_output_byte = entropy->next_output_byte; |
| cinfo->dest->free_in_buffer = entropy->free_in_buffer; |
| |
| /* Update restart-interval state too */ |
| if (cinfo->restart_interval) { |
| if (entropy->restarts_to_go == 0) { |
| entropy->restarts_to_go = cinfo->restart_interval; |
| entropy->next_restart_num++; |
| entropy->next_restart_num &= 7; |
| } |
| entropy->restarts_to_go--; |
| } |
| |
| return TRUE; |
| } |
| |
| |
| /* |
| * Data preparation for encode_mcu_AC_first(). |
| */ |
| |
| #define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \ |
| for (k = 0; k < Sl; k++) { \ |
| temp = block[jpeg_natural_order_start[k]]; \ |
| if (temp == 0) \ |
| continue; \ |
| /* We must apply the point transform by Al. For AC coefficients this \ |
| * is an integer division with rounding towards 0. To do this portably \ |
| * in C, we shift after obtaining the absolute value; so the code is \ |
| * interwoven with finding the abs value (temp) and output bits (temp2). \ |
| */ \ |
| temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \ |
| temp ^= temp2; \ |
| temp -= temp2; /* temp is abs value of input */ \ |
| temp >>= Al; /* apply the point transform */ \ |
| /* Watch out for case that nonzero coef is zero after point transform */ \ |
| if (temp == 0) \ |
| continue; \ |
| /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \ |
| temp2 ^= temp; \ |
| values[k] = temp; \ |
| values[k + DCTSIZE2] = temp2; \ |
| zerobits |= ((size_t)1U) << k; \ |
| } \ |
| } |
| |
| METHODDEF(void) |
| encode_mcu_AC_first_prepare(const JCOEF *block, |
| const int *jpeg_natural_order_start, int Sl, |
| int Al, JCOEF *values, size_t *bits) |
| { |
| register int k, temp, temp2; |
| size_t zerobits = 0U; |
| int Sl0 = Sl; |
| |
| #if SIZEOF_SIZE_T == 4 |
| if (Sl0 > 32) |
| Sl0 = 32; |
| #endif |
| |
| COMPUTE_ABSVALUES_AC_FIRST(Sl0); |
| |
| bits[0] = zerobits; |
| #if SIZEOF_SIZE_T == 4 |
| zerobits = 0U; |
| |
| if (Sl > 32) { |
| Sl -= 32; |
| jpeg_natural_order_start += 32; |
| values += 32; |
| |
| COMPUTE_ABSVALUES_AC_FIRST(Sl); |
| } |
| bits[1] = zerobits; |
| #endif |
| } |
| |
| /* |
| * MCU encoding for AC initial scan (either spectral selection, |
| * or first pass of successive approximation). |
| */ |
| |
| #define ENCODE_COEFS_AC_FIRST(label) { \ |
| while (zerobits) { \ |
| r = count_zeroes(&zerobits); \ |
| cvalue += r; \ |
| label \ |
| temp = cvalue[0]; \ |
| temp2 = cvalue[DCTSIZE2]; \ |
| \ |
| /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \ |
| while (r > 15) { \ |
| emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \ |
| r -= 16; \ |
| } \ |
| \ |
| /* Find the number of bits needed for the magnitude of the coefficient */ \ |
| nbits = JPEG_NBITS_NONZERO(temp); /* there must be at least one 1 bit */ \ |
| /* Check for out-of-range coefficient values */ \ |
| if (nbits > MAX_COEF_BITS) \ |
| ERREXIT(cinfo, JERR_BAD_DCT_COEF); \ |
| \ |
| /* Count/emit Huffman symbol for run length / number of bits */ \ |
| emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \ |
| \ |
| /* Emit that number of bits of the value, if positive, */ \ |
| /* or the complement of its magnitude, if negative. */ \ |
| emit_bits(entropy, (unsigned int)temp2, nbits); \ |
| \ |
| cvalue++; \ |
| zerobits >>= 1; \ |
| } \ |
| } |
| |
| METHODDEF(boolean) |
| encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) |
| { |
| phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
| register int temp, temp2; |
| register int nbits, r; |
| int Sl = cinfo->Se - cinfo->Ss + 1; |
| int Al = cinfo->Al; |
| JCOEF values_unaligned[2 * DCTSIZE2 + 15]; |
| JCOEF *values; |
| const JCOEF *cvalue; |
| size_t zerobits; |
| size_t bits[8 / SIZEOF_SIZE_T]; |
| |
| entropy->next_output_byte = cinfo->dest->next_output_byte; |
| entropy->free_in_buffer = cinfo->dest->free_in_buffer; |
| |
| /* Emit restart marker if needed */ |
| if (cinfo->restart_interval) |
| if (entropy->restarts_to_go == 0) |
| emit_restart(entropy, entropy->next_restart_num); |
| |
| #ifdef WITH_SIMD |
| cvalue = values = (JCOEF *)PAD((size_t)values_unaligned, 16); |
| #else |
| /* Not using SIMD, so alignment is not needed */ |
| cvalue = values = values_unaligned; |
| #endif |
| |
| /* Prepare data */ |
| entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss, |
| Sl, Al, values, bits); |
| |
| zerobits = bits[0]; |
| #if SIZEOF_SIZE_T == 4 |
| zerobits |= bits[1]; |
| #endif |
| |
| /* Emit any pending EOBRUN */ |
| if (zerobits && (entropy->EOBRUN > 0)) |
| emit_eobrun(entropy); |
| |
| #if SIZEOF_SIZE_T == 4 |
| zerobits = bits[0]; |
| #endif |
| |
| /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */ |
| |
| ENCODE_COEFS_AC_FIRST((void)0;); |
| |
| #if SIZEOF_SIZE_T == 4 |
| zerobits = bits[1]; |
| if (zerobits) { |
| int diff = ((values + DCTSIZE2 / 2) - cvalue); |
| r = count_zeroes(&zerobits); |
| r += diff; |
| cvalue += r; |
| goto first_iter_ac_first; |
| } |
| |
| ENCODE_COEFS_AC_FIRST(first_iter_ac_first:); |
| #endif |
| |
| if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */ |
| entropy->EOBRUN++; /* count an EOB */ |
| if (entropy->EOBRUN == 0x7FFF) |
| emit_eobrun(entropy); /* force it out to avoid overflow */ |
| } |
| |
| cinfo->dest->next_output_byte = entropy->next_output_byte; |
| cinfo->dest->free_in_buffer = entropy->free_in_buffer; |
| |
| /* Update restart-interval state too */ |
| if (cinfo->restart_interval) { |
| if (entropy->restarts_to_go == 0) { |
| entropy->restarts_to_go = cinfo->restart_interval; |
| entropy->next_restart_num++; |
| entropy->next_restart_num &= 7; |
| } |
| entropy->restarts_to_go--; |
| } |
| |
| return TRUE; |
| } |
| |
| |
| /* |
| * MCU encoding for DC successive approximation refinement scan. |
| * Note: we assume such scans can be multi-component, although the spec |
| * is not very clear on the point. |
| */ |
| |
| METHODDEF(boolean) |
| encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) |
| { |
| phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
| register int temp; |
| int blkn; |
| int Al = cinfo->Al; |
| JBLOCKROW block; |
| |
| entropy->next_output_byte = cinfo->dest->next_output_byte; |
| entropy->free_in_buffer = cinfo->dest->free_in_buffer; |
| |
| /* Emit restart marker if needed */ |
| if (cinfo->restart_interval) |
| if (entropy->restarts_to_go == 0) |
| emit_restart(entropy, entropy->next_restart_num); |
| |
| /* Encode the MCU data blocks */ |
| for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { |
| block = MCU_data[blkn]; |
| |
| /* We simply emit the Al'th bit of the DC coefficient value. */ |
| temp = (*block)[0]; |
| emit_bits(entropy, (unsigned int)(temp >> Al), 1); |
| } |
| |
| cinfo->dest->next_output_byte = entropy->next_output_byte; |
| cinfo->dest->free_in_buffer = entropy->free_in_buffer; |
| |
| /* Update restart-interval state too */ |
| if (cinfo->restart_interval) { |
| if (entropy->restarts_to_go == 0) { |
| entropy->restarts_to_go = cinfo->restart_interval; |
| entropy->next_restart_num++; |
| entropy->next_restart_num &= 7; |
| } |
| entropy->restarts_to_go--; |
| } |
| |
| return TRUE; |
| } |
| |
| |
| /* |
| * Data preparation for encode_mcu_AC_refine(). |
| */ |
| |
| #define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \ |
| /* It is convenient to make a pre-pass to determine the transformed \ |
| * coefficients' absolute values and the EOB position. \ |
| */ \ |
| for (k = 0; k < Sl; k++) { \ |
| temp = block[jpeg_natural_order_start[k]]; \ |
| /* We must apply the point transform by Al. For AC coefficients this \ |
| * is an integer division with rounding towards 0. To do this portably \ |
| * in C, we shift after obtaining the absolute value. \ |
| */ \ |
| temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \ |
| temp ^= temp2; \ |
| temp -= temp2; /* temp is abs value of input */ \ |
| temp >>= Al; /* apply the point transform */ \ |
| if (temp != 0) { \ |
| zerobits |= ((size_t)1U) << k; \ |
| signbits |= ((size_t)(temp2 + 1)) << k; \ |
| } \ |
| absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \ |
| if (temp == 1) \ |
| EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \ |
| } \ |
| } |
| |
| METHODDEF(int) |
| encode_mcu_AC_refine_prepare(const JCOEF *block, |
| const int *jpeg_natural_order_start, int Sl, |
| int Al, JCOEF *absvalues, size_t *bits) |
| { |
| register int k, temp, temp2; |
| int EOB = 0; |
| size_t zerobits = 0U, signbits = 0U; |
| int Sl0 = Sl; |
| |
| #if SIZEOF_SIZE_T == 4 |
| if (Sl0 > 32) |
| Sl0 = 32; |
| #endif |
| |
| COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0); |
| |
| bits[0] = zerobits; |
| #if SIZEOF_SIZE_T == 8 |
| bits[1] = signbits; |
| #else |
| bits[2] = signbits; |
| |
| zerobits = 0U; |
| signbits = 0U; |
| |
| if (Sl > 32) { |
| Sl -= 32; |
| jpeg_natural_order_start += 32; |
| absvalues += 32; |
| |
| COMPUTE_ABSVALUES_AC_REFINE(Sl, 32); |
| } |
| |
| bits[1] = zerobits; |
| bits[3] = signbits; |
| #endif |
| |
| return EOB; |
| } |
| |
| |
| /* |
| * MCU encoding for AC successive approximation refinement scan. |
| */ |
| |
| #define ENCODE_COEFS_AC_REFINE(label) { \ |
| while (zerobits) { \ |
| int idx = count_zeroes(&zerobits); \ |
| r += idx; \ |
| cabsvalue += idx; \ |
| signbits >>= idx; \ |
| label \ |
| /* Emit any required ZRLs, but not if they can be folded into EOB */ \ |
| while (r > 15 && (cabsvalue <= EOBPTR)) { \ |
| /* emit any pending EOBRUN and the BE correction bits */ \ |
| emit_eobrun(entropy); \ |
| /* Emit ZRL */ \ |
| emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \ |
| r -= 16; \ |
| /* Emit buffered correction bits that must be associated with ZRL */ \ |
| emit_buffered_bits(entropy, BR_buffer, BR); \ |
| BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \ |
| BR = 0; \ |
| } \ |
| \ |
| temp = *cabsvalue++; \ |
| \ |
| /* If the coef was previously nonzero, it only needs a correction bit. \ |
| * NOTE: a straight translation of the spec's figure G.7 would suggest \ |
| * that we also need to test r > 15. But if r > 15, we can only get here \ |
| * if k > EOB, which implies that this coefficient is not 1. \ |
| */ \ |
| if (temp > 1) { \ |
| /* The correction bit is the next bit of the absolute value. */ \ |
| BR_buffer[BR++] = (char)(temp & 1); \ |
| signbits >>= 1; \ |
| zerobits >>= 1; \ |
| continue; \ |
| } \ |
| \ |
| /* Emit any pending EOBRUN and the BE correction bits */ \ |
| emit_eobrun(entropy); \ |
| \ |
| /* Count/emit Huffman symbol for run length / number of bits */ \ |
| emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \ |
| \ |
| /* Emit output bit for newly-nonzero coef */ \ |
| temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \ |
| emit_bits(entropy, (unsigned int)temp, 1); \ |
| \ |
| /* Emit buffered correction bits that must be associated with this code */ \ |
| emit_buffered_bits(entropy, BR_buffer, BR); \ |
| BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \ |
| BR = 0; \ |
| r = 0; /* reset zero run length */ \ |
| signbits >>= 1; \ |
| zerobits >>= 1; \ |
| } \ |
| } |
| |
| METHODDEF(boolean) |
| encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) |
| { |
| phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
| register int temp, r; |
| char *BR_buffer; |
| unsigned int BR; |
| int Sl = cinfo->Se - cinfo->Ss + 1; |
| int Al = cinfo->Al; |
| JCOEF absvalues_unaligned[DCTSIZE2 + 15]; |
| JCOEF *absvalues; |
| const JCOEF *cabsvalue, *EOBPTR; |
| size_t zerobits, signbits; |
| size_t bits[16 / SIZEOF_SIZE_T]; |
| |
| entropy->next_output_byte = cinfo->dest->next_output_byte; |
| entropy->free_in_buffer = cinfo->dest->free_in_buffer; |
| |
| /* Emit restart marker if needed */ |
| if (cinfo->restart_interval) |
| if (entropy->restarts_to_go == 0) |
| emit_restart(entropy, entropy->next_restart_num); |
| |
| #ifdef WITH_SIMD |
| cabsvalue = absvalues = (JCOEF *)PAD((size_t)absvalues_unaligned, 16); |
| #else |
| /* Not using SIMD, so alignment is not needed */ |
| cabsvalue = absvalues = absvalues_unaligned; |
| #endif |
| |
| /* Prepare data */ |
| EOBPTR = absvalues + |
| entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss, |
| Sl, Al, absvalues, bits); |
| |
| /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */ |
| |
| r = 0; /* r = run length of zeros */ |
| BR = 0; /* BR = count of buffered bits added now */ |
| BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */ |
| |
| zerobits = bits[0]; |
| #if SIZEOF_SIZE_T == 8 |
| signbits = bits[1]; |
| #else |
| signbits = bits[2]; |
| #endif |
| ENCODE_COEFS_AC_REFINE((void)0;); |
| |
| #if SIZEOF_SIZE_T == 4 |
| zerobits = bits[1]; |
| signbits = bits[3]; |
| |
| if (zerobits) { |
| int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue); |
| int idx = count_zeroes(&zerobits); |
| signbits >>= idx; |
| idx += diff; |
| r += idx; |
| cabsvalue += idx; |
| goto first_iter_ac_refine; |
| } |
| |
| ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:); |
| #endif |
| |
| r |= (int)((absvalues + Sl) - cabsvalue); |
| |
| if (r > 0 || BR > 0) { /* If there are trailing zeroes, */ |
| entropy->EOBRUN++; /* count an EOB */ |
| entropy->BE += BR; /* concat my correction bits to older ones */ |
| /* We force out the EOB if we risk either: |
| * 1. overflow of the EOB counter; |
| * 2. overflow of the correction bit buffer during the next MCU. |
| */ |
| if (entropy->EOBRUN == 0x7FFF || |
| entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1)) |
| emit_eobrun(entropy); |
| } |
| |
| cinfo->dest->next_output_byte = entropy->next_output_byte; |
| cinfo->dest->free_in_buffer = entropy->free_in_buffer; |
| |
| /* Update restart-interval state too */ |
| if (cinfo->restart_interval) { |
| if (entropy->restarts_to_go == 0) { |
| entropy->restarts_to_go = cinfo->restart_interval; |
| entropy->next_restart_num++; |
| entropy->next_restart_num &= 7; |
| } |
| entropy->restarts_to_go--; |
| } |
| |
| return TRUE; |
| } |
| |
| |
| /* |
| * Finish up at the end of a Huffman-compressed progressive scan. |
| */ |
| |
| METHODDEF(void) |
| finish_pass_phuff(j_compress_ptr cinfo) |
| { |
| phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
| |
| entropy->next_output_byte = cinfo->dest->next_output_byte; |
| entropy->free_in_buffer = cinfo->dest->free_in_buffer; |
| |
| /* Flush out any buffered data */ |
| emit_eobrun(entropy); |
| flush_bits(entropy); |
| |
| cinfo->dest->next_output_byte = entropy->next_output_byte; |
| cinfo->dest->free_in_buffer = entropy->free_in_buffer; |
| } |
| |
| |
| /* |
| * Finish up a statistics-gathering pass and create the new Huffman tables. |
| */ |
| |
| METHODDEF(void) |
| finish_pass_gather_phuff(j_compress_ptr cinfo) |
| { |
| phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
| boolean is_DC_band; |
| int ci, tbl; |
| jpeg_component_info *compptr; |
| JHUFF_TBL **htblptr; |
| boolean did[NUM_HUFF_TBLS]; |
| |
| /* Flush out buffered data (all we care about is counting the EOB symbol) */ |
| emit_eobrun(entropy); |
| |
| is_DC_band = (cinfo->Ss == 0); |
| |
| /* It's important not to apply jpeg_gen_optimal_table more than once |
| * per table, because it clobbers the input frequency counts! |
| */ |
| MEMZERO(did, sizeof(did)); |
| |
| for (ci = 0; ci < cinfo->comps_in_scan; ci++) { |
| compptr = cinfo->cur_comp_info[ci]; |
| if (is_DC_band) { |
| if (cinfo->Ah != 0) /* DC refinement needs no table */ |
| continue; |
| tbl = compptr->dc_tbl_no; |
| } else { |
| tbl = compptr->ac_tbl_no; |
| } |
| if (!did[tbl]) { |
| if (is_DC_band) |
| htblptr = &cinfo->dc_huff_tbl_ptrs[tbl]; |
| else |
| htblptr = &cinfo->ac_huff_tbl_ptrs[tbl]; |
| if (*htblptr == NULL) |
| *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo); |
| jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]); |
| did[tbl] = TRUE; |
| } |
| } |
| } |
| |
| |
| /* |
| * Module initialization routine for progressive Huffman entropy encoding. |
| */ |
| |
| GLOBAL(void) |
| jinit_phuff_encoder(j_compress_ptr cinfo) |
| { |
| phuff_entropy_ptr entropy; |
| int i; |
| |
| entropy = (phuff_entropy_ptr) |
| (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, |
| sizeof(phuff_entropy_encoder)); |
| cinfo->entropy = (struct jpeg_entropy_encoder *)entropy; |
| entropy->pub.start_pass = start_pass_phuff; |
| |
| /* Mark tables unallocated */ |
| for (i = 0; i < NUM_HUFF_TBLS; i++) { |
| entropy->derived_tbls[i] = NULL; |
| entropy->count_ptrs[i] = NULL; |
| } |
| entropy->bit_buffer = NULL; /* needed only in AC refinement scan */ |
| } |
| |
| #endif /* C_PROGRESSIVE_SUPPORTED */ |