| /* Copyright 2025 Google Inc. All Rights Reserved. |
| |
| Distributed under MIT license. |
| See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| */ |
| |
| /* Lookup table for static dictionary and transforms. */ |
| |
| #include "static_dict_lut.h" |
| |
| #include "../common/platform.h" /* IWYU pragma: keep */ |
| #include "static_init.h" |
| |
| #if (BROTLI_STATIC_INIT != BROTLI_STATIC_INIT_NONE) |
| #include "../common/dictionary.h" |
| #include "../common/transform.h" |
| #include "hash_base.h" |
| #endif |
| |
| #if defined(__cplusplus) || defined(c_plusplus) |
| extern "C" { |
| #endif |
| |
| #if (BROTLI_STATIC_INIT != BROTLI_STATIC_INIT_NONE) |
| |
| /* TODO(eustas): deal with largest bucket(s). Not it contains 163 items. */ |
| static BROTLI_BOOL BROTLI_COLD DoBrotliEncoderInitStaticDictionaryLut( |
| const BrotliDictionary* dict, uint16_t* buckets, DictWord* words, |
| void* arena) { |
| DictWord* slots = (DictWord*)arena; |
| uint16_t* heads = (uint16_t*)(slots + BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS); |
| uint16_t* counts = heads + BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS; |
| uint16_t* prev = counts + BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS; |
| size_t next_slot = 0; |
| uint8_t transformed_word[24]; |
| uint8_t transformed_other_word[24]; |
| size_t l; |
| size_t pos; |
| size_t i; |
| |
| memset(counts, 0, BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS * sizeof(uint16_t)); |
| memset(heads, 0, BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS * sizeof(uint16_t)); |
| memset(prev, 0, BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS * sizeof(uint16_t)); |
| |
| for (l = 4; l <= 24; ++l) { |
| size_t n = 1u << dict->size_bits_by_length[l]; |
| const uint8_t* dict_words = dict->data + dict->offsets_by_length[l]; |
| for (i = 0; i < n; ++i) { |
| const uint8_t* dict_word = dict_words + l * i; |
| uint32_t key = Hash15(dict_word); |
| slots[next_slot].len = (uint8_t)l; |
| slots[next_slot].transform = BROTLI_TRANSFORM_IDENTITY; |
| slots[next_slot].idx = (uint16_t)i; |
| prev[next_slot] = heads[key]; |
| heads[key] = (uint16_t)next_slot; |
| counts[key]++; |
| ++next_slot; |
| } |
| for (i = 0; i < n; ++i) { |
| uint32_t key; |
| uint32_t prefix; |
| BROTLI_BOOL found; |
| size_t curr; |
| const uint8_t* dict_word = dict_words + l * i; |
| if (dict_word[0] < 'a' || dict_word[0] > 'z') continue; |
| memcpy(transformed_word, dict_word, l); |
| transformed_word[0] = transformed_word[0] - 32; |
| key = Hash15(transformed_word); |
| prefix = BROTLI_UNALIGNED_LOAD32LE(transformed_word) & ~0x20202020u; |
| found = BROTLI_FALSE; |
| curr = heads[key]; |
| while (curr != 0) { |
| const uint8_t* other_word; |
| uint32_t other_prefix; |
| if (slots[curr].len != l) break; |
| other_word = dict_words + l * slots[curr].idx; |
| other_prefix = BROTLI_UNALIGNED_LOAD32LE(other_word) & ~0x20202020u; |
| if (prefix == other_prefix) { |
| if (memcmp(transformed_word, other_word, l) == 0) { |
| found = BROTLI_TRUE; |
| break; |
| } |
| } |
| curr = prev[curr]; |
| } |
| if (found) continue; |
| slots[next_slot].len = (uint8_t)l; |
| slots[next_slot].transform = BROTLI_TRANSFORM_UPPERCASE_FIRST; |
| slots[next_slot].idx = (uint16_t)i; |
| prev[next_slot] = heads[key]; |
| heads[key] = (uint16_t)next_slot; |
| counts[key]++; |
| ++next_slot; |
| } |
| for (i = 0; i < n; ++i) { |
| const uint8_t* dict_word = dict_words + l * i; |
| BROTLI_BOOL is_ascii = BROTLI_TRUE; |
| BROTLI_BOOL has_lower = BROTLI_FALSE; |
| size_t k; |
| uint32_t prefix; |
| uint32_t key; |
| size_t curr; |
| BROTLI_BOOL found; |
| for (k = 0; k < l; ++k) { |
| if (dict_word[k] >= 128) is_ascii = BROTLI_FALSE; |
| if (k > 0 && dict_word[k] >= 'a' && dict_word[k] <= 'z') |
| has_lower = BROTLI_TRUE; |
| } |
| if (!is_ascii || !has_lower) continue; |
| memcpy(transformed_word, dict_word, l); |
| prefix = BROTLI_UNALIGNED_LOAD32LE(transformed_word) & ~0x20202020u; |
| for (k = 0; k < l; ++k) { |
| if (transformed_word[k] >= 'a' && transformed_word[k] <= 'z') { |
| transformed_word[k] = transformed_word[k] - 32; |
| } |
| } |
| key = Hash15(transformed_word); |
| found = BROTLI_FALSE; |
| curr = heads[key]; |
| while (curr != 0) { |
| const uint8_t* other_word; |
| uint32_t other_prefix; |
| if (slots[curr].len != l) break; |
| other_word = dict_words + l * slots[curr].idx; |
| other_prefix = BROTLI_UNALIGNED_LOAD32LE(other_word) & ~0x20202020u; |
| if (prefix == other_prefix) { |
| if (slots[curr].transform == BROTLI_TRANSFORM_IDENTITY) { |
| if (memcmp(transformed_word, other_word, l) == 0) { |
| found = BROTLI_TRUE; |
| break; |
| } |
| } else if (slots[curr].transform == |
| BROTLI_TRANSFORM_UPPERCASE_FIRST) { |
| if ((transformed_word[0] == (other_word[0] - 32)) && |
| memcmp(transformed_word + 1, other_word + 1, l - 1) == 0) { |
| found = BROTLI_TRUE; |
| break; |
| } |
| } else { |
| for (k = 0; k < l; ++k) { |
| if (other_word[k] >= 'a' && other_word[k] <= 'z') { |
| transformed_other_word[k] = other_word[k] - 32; |
| } else { |
| transformed_other_word[k] = other_word[k]; |
| } |
| } |
| if (memcmp(transformed_word, transformed_other_word, l) == 0) { |
| found = BROTLI_TRUE; |
| break; |
| } |
| } |
| } |
| curr = prev[curr]; |
| } |
| if (found) { |
| continue; |
| } |
| slots[next_slot].len = (uint8_t)l; |
| slots[next_slot].transform = BROTLI_TRANSFORM_UPPERCASE_ALL; |
| slots[next_slot].idx = (uint16_t)i; |
| prev[next_slot] = heads[key]; |
| heads[key] = (uint16_t)next_slot; |
| counts[key]++; |
| ++next_slot; |
| } |
| } |
| |
| if (next_slot != 31704) return BROTLI_FALSE; |
| pos = 0; |
| /* Unused; makes offsets start from 1. */ |
| words[pos].len = 0; |
| words[pos].transform = 0; |
| words[pos].idx = 0; |
| pos++; |
| for (i = 0; i < BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS; ++i) { |
| size_t num_words = counts[i]; |
| size_t curr; |
| if (num_words == 0) { |
| buckets[i] = 0; |
| continue; |
| } |
| buckets[i] = (uint16_t)pos; |
| curr = heads[i]; |
| pos += num_words; |
| for (size_t k = 0; k < num_words; ++k) { |
| words[pos - 1 - k] = slots[curr]; |
| curr = prev[curr]; |
| } |
| words[pos - 1].len |= 0x80; |
| } |
| return BROTLI_TRUE; |
| } |
| |
| BROTLI_BOOL BrotliEncoderInitStaticDictionaryLut( |
| const BrotliDictionary* dict, uint16_t* buckets, DictWord* words) { |
| size_t arena_size = |
| BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS * |
| (sizeof(uint16_t) + sizeof(DictWord)) + |
| BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS * 2 * sizeof(uint16_t); |
| void* arena = malloc(arena_size); |
| BROTLI_BOOL ok; |
| if (arena == NULL) { |
| return BROTLI_FALSE; |
| } |
| ok = DoBrotliEncoderInitStaticDictionaryLut(dict, buckets, words, arena); |
| free(arena); |
| return ok; |
| } |
| |
| BROTLI_MODEL("small") |
| uint16_t kStaticDictionaryBuckets[BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS]; |
| BROTLI_MODEL("small") |
| DictWord kStaticDictionaryWords[BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS]; |
| |
| #else /* BROTLI_STATIC_INIT */ |
| |
| /* Embed kStaticDictionaryBuckets and kStaticDictionaryWords. */ |
| #include "static_dict_lut_inc.h" |
| |
| #endif /* BROTLI_STATIC_INIT */ |
| |
| #if defined(__cplusplus) || defined(c_plusplus) |
| } /* extern "C" */ |
| #endif |