blob: b7a87061649e33f2dd5601a70635adc341c9762a [file] [log] [blame]
/* Copyright 2025 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Lookup table for static dictionary and transforms. */
#include "static_dict_lut.h"
#include "../common/platform.h" /* IWYU pragma: keep */
#include "static_init.h"
#if (BROTLI_STATIC_INIT != BROTLI_STATIC_INIT_NONE)
#include "../common/dictionary.h"
#include "../common/transform.h"
#include "hash_base.h"
#endif
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if (BROTLI_STATIC_INIT != BROTLI_STATIC_INIT_NONE)
/* TODO(eustas): deal with largest bucket(s). Not it contains 163 items. */
static BROTLI_BOOL BROTLI_COLD DoBrotliEncoderInitStaticDictionaryLut(
const BrotliDictionary* dict, uint16_t* buckets, DictWord* words,
void* arena) {
DictWord* slots = (DictWord*)arena;
uint16_t* heads = (uint16_t*)(slots + BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS);
uint16_t* counts = heads + BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS;
uint16_t* prev = counts + BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS;
size_t next_slot = 0;
uint8_t transformed_word[24];
uint8_t transformed_other_word[24];
size_t l;
size_t pos;
size_t i;
memset(counts, 0, BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS * sizeof(uint16_t));
memset(heads, 0, BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS * sizeof(uint16_t));
memset(prev, 0, BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS * sizeof(uint16_t));
for (l = 4; l <= 24; ++l) {
size_t n = 1u << dict->size_bits_by_length[l];
const uint8_t* dict_words = dict->data + dict->offsets_by_length[l];
for (i = 0; i < n; ++i) {
const uint8_t* dict_word = dict_words + l * i;
uint32_t key = Hash15(dict_word);
slots[next_slot].len = (uint8_t)l;
slots[next_slot].transform = BROTLI_TRANSFORM_IDENTITY;
slots[next_slot].idx = (uint16_t)i;
prev[next_slot] = heads[key];
heads[key] = (uint16_t)next_slot;
counts[key]++;
++next_slot;
}
for (i = 0; i < n; ++i) {
uint32_t key;
uint32_t prefix;
BROTLI_BOOL found;
size_t curr;
const uint8_t* dict_word = dict_words + l * i;
if (dict_word[0] < 'a' || dict_word[0] > 'z') continue;
memcpy(transformed_word, dict_word, l);
transformed_word[0] = transformed_word[0] - 32;
key = Hash15(transformed_word);
prefix = BROTLI_UNALIGNED_LOAD32LE(transformed_word) & ~0x20202020u;
found = BROTLI_FALSE;
curr = heads[key];
while (curr != 0) {
const uint8_t* other_word;
uint32_t other_prefix;
if (slots[curr].len != l) break;
other_word = dict_words + l * slots[curr].idx;
other_prefix = BROTLI_UNALIGNED_LOAD32LE(other_word) & ~0x20202020u;
if (prefix == other_prefix) {
if (memcmp(transformed_word, other_word, l) == 0) {
found = BROTLI_TRUE;
break;
}
}
curr = prev[curr];
}
if (found) continue;
slots[next_slot].len = (uint8_t)l;
slots[next_slot].transform = BROTLI_TRANSFORM_UPPERCASE_FIRST;
slots[next_slot].idx = (uint16_t)i;
prev[next_slot] = heads[key];
heads[key] = (uint16_t)next_slot;
counts[key]++;
++next_slot;
}
for (i = 0; i < n; ++i) {
const uint8_t* dict_word = dict_words + l * i;
BROTLI_BOOL is_ascii = BROTLI_TRUE;
BROTLI_BOOL has_lower = BROTLI_FALSE;
size_t k;
uint32_t prefix;
uint32_t key;
size_t curr;
BROTLI_BOOL found;
for (k = 0; k < l; ++k) {
if (dict_word[k] >= 128) is_ascii = BROTLI_FALSE;
if (k > 0 && dict_word[k] >= 'a' && dict_word[k] <= 'z')
has_lower = BROTLI_TRUE;
}
if (!is_ascii || !has_lower) continue;
memcpy(transformed_word, dict_word, l);
prefix = BROTLI_UNALIGNED_LOAD32LE(transformed_word) & ~0x20202020u;
for (k = 0; k < l; ++k) {
if (transformed_word[k] >= 'a' && transformed_word[k] <= 'z') {
transformed_word[k] = transformed_word[k] - 32;
}
}
key = Hash15(transformed_word);
found = BROTLI_FALSE;
curr = heads[key];
while (curr != 0) {
const uint8_t* other_word;
uint32_t other_prefix;
if (slots[curr].len != l) break;
other_word = dict_words + l * slots[curr].idx;
other_prefix = BROTLI_UNALIGNED_LOAD32LE(other_word) & ~0x20202020u;
if (prefix == other_prefix) {
if (slots[curr].transform == BROTLI_TRANSFORM_IDENTITY) {
if (memcmp(transformed_word, other_word, l) == 0) {
found = BROTLI_TRUE;
break;
}
} else if (slots[curr].transform ==
BROTLI_TRANSFORM_UPPERCASE_FIRST) {
if ((transformed_word[0] == (other_word[0] - 32)) &&
memcmp(transformed_word + 1, other_word + 1, l - 1) == 0) {
found = BROTLI_TRUE;
break;
}
} else {
for (k = 0; k < l; ++k) {
if (other_word[k] >= 'a' && other_word[k] <= 'z') {
transformed_other_word[k] = other_word[k] - 32;
} else {
transformed_other_word[k] = other_word[k];
}
}
if (memcmp(transformed_word, transformed_other_word, l) == 0) {
found = BROTLI_TRUE;
break;
}
}
}
curr = prev[curr];
}
if (found) {
continue;
}
slots[next_slot].len = (uint8_t)l;
slots[next_slot].transform = BROTLI_TRANSFORM_UPPERCASE_ALL;
slots[next_slot].idx = (uint16_t)i;
prev[next_slot] = heads[key];
heads[key] = (uint16_t)next_slot;
counts[key]++;
++next_slot;
}
}
if (next_slot != 31704) return BROTLI_FALSE;
pos = 0;
/* Unused; makes offsets start from 1. */
words[pos].len = 0;
words[pos].transform = 0;
words[pos].idx = 0;
pos++;
for (i = 0; i < BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS; ++i) {
size_t num_words = counts[i];
size_t curr;
if (num_words == 0) {
buckets[i] = 0;
continue;
}
buckets[i] = (uint16_t)pos;
curr = heads[i];
pos += num_words;
for (size_t k = 0; k < num_words; ++k) {
words[pos - 1 - k] = slots[curr];
curr = prev[curr];
}
words[pos - 1].len |= 0x80;
}
return BROTLI_TRUE;
}
BROTLI_BOOL BrotliEncoderInitStaticDictionaryLut(
const BrotliDictionary* dict, uint16_t* buckets, DictWord* words) {
size_t arena_size =
BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS *
(sizeof(uint16_t) + sizeof(DictWord)) +
BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS * 2 * sizeof(uint16_t);
void* arena = malloc(arena_size);
BROTLI_BOOL ok;
if (arena == NULL) {
return BROTLI_FALSE;
}
ok = DoBrotliEncoderInitStaticDictionaryLut(dict, buckets, words, arena);
free(arena);
return ok;
}
BROTLI_MODEL("small")
uint16_t kStaticDictionaryBuckets[BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS];
BROTLI_MODEL("small")
DictWord kStaticDictionaryWords[BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS];
#else /* BROTLI_STATIC_INIT */
/* Embed kStaticDictionaryBuckets and kStaticDictionaryWords. */
#include "static_dict_lut_inc.h"
#endif /* BROTLI_STATIC_INIT */
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif