uninline ShannonEntropy/BitsEntropy PiperOrigin-RevId: 794474428
diff --git a/c/enc/bit_cost.c b/c/enc/bit_cost.c index e0ba2b0..059f8d9 100644 --- a/c/enc/bit_cost.c +++ b/c/enc/bit_cost.c
@@ -8,12 +8,41 @@ #include "bit_cost.h" -// #include "../common/platform.h" +#include "../common/platform.h" +#include "fast_log.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif +double BrotliBitsEntropy(const uint32_t* population, size_t size) { + size_t sum = 0; + double retval = 0; + const uint32_t* population_end = population + size; + size_t p; + if (size & 1) { + goto odd_number_of_elements_left; + } + while (population < population_end) { + p = *population++; + sum += p; + retval -= (double)p * FastLog2(p); + odd_number_of_elements_left: + p = *population++; + sum += p; + retval -= (double)p * FastLog2(p); + } + if (sum) retval += (double)sum * FastLog2(sum); + + if (retval < (double)sum) { + /* TODO(eustas): consider doing that per-symbol? */ + /* At least one bit per literal is needed. */ + retval = (double)sum; + } + + return retval; +} + #define FN(X) X ## Literal #include "bit_cost_inc.h" /* NOLINT(build/include) */ #undef FN
diff --git a/c/enc/bit_cost.h b/c/enc/bit_cost.h index 230b533..90313c4 100644 --- a/c/enc/bit_cost.h +++ b/c/enc/bit_cost.h
@@ -10,50 +10,20 @@ #define BROTLI_ENC_BIT_COST_H_ #include "../common/platform.h" -#include "fast_log.h" #include "histogram.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { #endif -static BROTLI_INLINE double ShannonEntropy( - const uint32_t* population, size_t size, size_t* total) { - size_t sum = 0; - double retval = 0; - const uint32_t* population_end = population + size; - size_t p; - if (size & 1) { - goto odd_number_of_elements_left; - } - while (population < population_end) { - p = *population++; - sum += p; - retval -= (double)p * FastLog2(p); - odd_number_of_elements_left: - p = *population++; - sum += p; - retval -= (double)p * FastLog2(p); - } - if (sum) retval += (double)sum * FastLog2(sum); - *total = sum; - return retval; -} - -static BROTLI_INLINE double BitsEntropy( - const uint32_t* population, size_t size) { - size_t sum; - double retval = ShannonEntropy(population, size, &sum); - if (retval < (double)sum) { - /* At least one bit per literal is needed. */ - retval = (double)sum; - } - return retval; -} - -BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*); -BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*); -BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*); +BROTLI_INTERNAL double BrotliBitsEntropy( + const uint32_t* population, size_t size); +BROTLI_INTERNAL double BrotliPopulationCostLiteral( + const HistogramLiteral* histogram); +BROTLI_INTERNAL double BrotliPopulationCostCommand( + const HistogramCommand* histogram); +BROTLI_INTERNAL double BrotliPopulationCostDistance( + const HistogramDistance* histogram); #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */
diff --git a/c/enc/bit_cost_inc.h b/c/enc/bit_cost_inc.h index 453c226..8e67f25 100644 --- a/c/enc/bit_cost_inc.h +++ b/c/enc/bit_cost_inc.h
@@ -119,7 +119,7 @@ /* Add the estimated encoding cost of the code length code histogram. */ bits += (double)(18 + 2 * max_depth); /* Add the entropy of the code length code histogram. */ - bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES); + bits += BrotliBitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES); } return bits; }
diff --git a/c/enc/compress_fragment_two_pass.c b/c/enc/compress_fragment_two_pass.c index cee2f69..01e4794 100644 --- a/c/enc/compress_fragment_two_pass.c +++ b/c/enc/compress_fragment_two_pass.c
@@ -535,7 +535,8 @@ for (i = 0; i < input_size; i += SAMPLE_RATE) { ++s->lit_histo[input[i]]; } - return TO_BROTLI_BOOL(BitsEntropy(s->lit_histo, 256) < max_total_bit_cost); + return TO_BROTLI_BOOL( + BrotliBitsEntropy(s->lit_histo, 256) < max_total_bit_cost); } }
diff --git a/c/enc/encode.c b/c/enc/encode.c index 294be2b..417cc25 100644 --- a/c/enc/encode.c +++ b/c/enc/encode.c
@@ -241,12 +241,25 @@ s->cmd_code_numbits = kDefaultCommandCodeNumBits; } +/* TODO(eustas): avoid FP calculations. */ +static double EstimateEntropy(const uint32_t* population, size_t size) { + size_t total = 0; + double result = 0; + for (size_t i = 0; i < size; ++i) { + uint32_t p = population[i]; + total += p; + result += (double)p * FastLog2(p); + } + result = (double)total * FastLog2(total) - result; + return result; +} + /* Decide about the context map based on the ability of the prediction ability of the previous byte UTF8-prefix on the next byte. The prediction ability is calculated as Shannon entropy. Here we need - Shannon entropy instead of 'BitsEntropy' since the prefix will be + Shannon entropy instead of 'BrotliBitsEntropy' since the prefix will be encoded with the remaining 6 bits of the following byte, and - BitsEntropy will assume that symbol to be stored alone using Huffman + BrotliBitsEntropy will assume that symbol to be stored alone using Huffman coding. */ static void ChooseContextMap(int quality, uint32_t* bigram_histo, @@ -271,18 +284,17 @@ uint32_t two_prefix_histo[6] = { 0 }; size_t total; size_t i; - size_t sink; double entropy[4]; for (i = 0; i < 9; ++i) { monogram_histo[i % 3] += bigram_histo[i]; two_prefix_histo[i % 6] += bigram_histo[i]; } - entropy[1] = ShannonEntropy(monogram_histo, 3, &sink); - entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &sink) + - ShannonEntropy(two_prefix_histo + 3, 3, &sink)); + entropy[1] = EstimateEntropy(monogram_histo, 3); + entropy[2] = (EstimateEntropy(two_prefix_histo, 3) + + EstimateEntropy(two_prefix_histo + 3, 3)); entropy[3] = 0; for (i = 0; i < 3; ++i) { - entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &sink); + entropy[3] += EstimateEntropy(bigram_histo + 3 * i, 3); } total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2]; @@ -349,7 +361,6 @@ uint32_t* BROTLI_RESTRICT const context_histo = arena + 32; uint32_t total = 0; double entropy[3]; - size_t sink; size_t i; ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8); memset(arena, 0, sizeof(arena[0]) * 32 * (BROTLI_MAX_STATIC_CONTEXTS + 1)); @@ -371,10 +382,10 @@ prev1 = literal; } } - entropy[1] = ShannonEntropy(combined_histo, 32, &sink); + entropy[1] = EstimateEntropy(combined_histo, 32); entropy[2] = 0; for (i = 0; i < BROTLI_MAX_STATIC_CONTEXTS; ++i) { - entropy[2] += ShannonEntropy(context_histo + (i << 5), 32, &sink); + entropy[2] += EstimateEntropy(context_histo + (i << 5), 32); } entropy[0] = 1.0 / (double)total; entropy[1] *= entropy[0]; @@ -449,7 +460,7 @@ ++literal_histo[data[pos & mask]]; pos += kSampleRate; } - if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) { + if (BrotliBitsEntropy(literal_histo, 256) > bit_cost_threshold) { return BROTLI_FALSE; } }
diff --git a/c/enc/metablock.c b/c/enc/metablock.c index c9bed76..dfe8120 100644 --- a/c/enc/metablock.c +++ b/c/enc/metablock.c
@@ -398,7 +398,7 @@ for (i = 0; i < num_contexts; ++i) { last_entropy[i] = - BitsEntropy(histograms[i].data_, self->alphabet_size_); + BrotliBitsEntropy(histograms[i].data_, self->alphabet_size_); last_entropy[num_contexts + i] = last_entropy[i]; } ++self->num_blocks_; @@ -424,15 +424,15 @@ for (i = 0; i < num_contexts; ++i) { size_t curr_histo_ix = self->curr_histogram_ix_ + i; size_t j; - entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_, - self->alphabet_size_); + entropy[i] = BrotliBitsEntropy(histograms[curr_histo_ix].data_, + self->alphabet_size_); for (j = 0; j < 2; ++j) { size_t jx = j * num_contexts + i; size_t last_histogram_ix = self->last_histogram_ix_[j] + i; combined_histo[jx] = histograms[curr_histo_ix]; HistogramAddHistogramLiteral(&combined_histo[jx], &histograms[last_histogram_ix]); - combined_entropy[jx] = BitsEntropy( + combined_entropy[jx] = BrotliBitsEntropy( &combined_histo[jx].data_[0], self->alphabet_size_); diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx]; }
diff --git a/c/enc/metablock_inc.h b/c/enc/metablock_inc.h index f939386..e7a7bc8 100644 --- a/c/enc/metablock_inc.h +++ b/c/enc/metablock_inc.h
@@ -96,7 +96,7 @@ split->lengths[0] = (uint32_t)self->block_size_; split->types[0] = 0; last_entropy[0] = - BitsEntropy(histograms[0].data_, self->alphabet_size_); + BrotliBitsEntropy(histograms[0].data_, self->alphabet_size_); last_entropy[1] = last_entropy[0]; ++self->num_blocks_; ++split->num_types; @@ -105,8 +105,8 @@ FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); self->block_size_ = 0; } else if (self->block_size_ > 0) { - double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_, - self->alphabet_size_); + double entropy = BrotliBitsEntropy( + histograms[self->curr_histogram_ix_].data_, self->alphabet_size_); double combined_entropy[2]; double diff[2]; size_t j; @@ -115,7 +115,7 @@ self->combined_histo[j] = histograms[self->curr_histogram_ix_]; FN(HistogramAddHistogram)(&self->combined_histo[j], &histograms[last_histogram_ix]); - combined_entropy[j] = BitsEntropy( + combined_entropy[j] = BrotliBitsEntropy( &self->combined_histo[j].data_[0], self->alphabet_size_); diff[j] = combined_entropy[j] - entropy - last_entropy[j]; }