Compare 4 bytes when checking if a longer match is possible.
Loading and comparing 4 bytes is ~as fast as 1 byte, but allows us to avoid more full match length calculation.
PiperOrigin-RevId: 617556847
diff --git a/c/enc/hash.h b/c/enc/hash.h
index 5677d82..ba9b0d8 100644
--- a/c/enc/hash.h
+++ b/c/enc/hash.h
@@ -574,6 +574,11 @@
}
}
}
+ /* we require matches of len >4, so increase best_len to 3, so we can compare
+ * 4 bytes all the time. */
+ if (best_len < 3) {
+ best_len = 3;
+ }
while (item == 0) {
size_t offset;
size_t distance;
@@ -586,9 +591,10 @@
limit = source_size - offset;
limit = (limit > max_length) ? max_length : limit;
if (distance > max_distance) continue;
- if (cur_ix_masked + best_len > ring_buffer_mask ||
- best_len >= limit ||
- data[cur_ix_masked + best_len] != source[offset + best_len]) {
+ if (cur_ix_masked + best_len > ring_buffer_mask || best_len >= limit ||
+ /* compare 4 bytes ending at best_len + 1 */
+ BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
+ BrotliUnalignedRead32(&source[offset + best_len - 3])) {
continue;
}
{
diff --git a/c/enc/hash_forgetful_chain_inc.h b/c/enc/hash_forgetful_chain_inc.h
index 48e1cdc..9a8efeb 100644
--- a/c/enc/hash_forgetful_chain_inc.h
+++ b/c/enc/hash_forgetful_chain_inc.h
@@ -241,6 +241,11 @@
}
}
}
+ /* we require matches of len >4, so increase best_len to 3, so we can compare
+ * 4 bytes all the time. */
+ if (best_len < 3) {
+ best_len = 3;
+ }
{
const size_t bank = key & (NUM_BANKS - 1);
size_t backward = 0;
@@ -257,7 +262,9 @@
delta = banks[bank].slots[last].delta;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
- data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+ /* compare 4 bytes ending at best_len + 1 */
+ BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
+ BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
continue;
}
{
diff --git a/c/enc/hash_longest_match64_inc.h b/c/enc/hash_longest_match64_inc.h
index e48fc61..8f825de 100644
--- a/c/enc/hash_longest_match64_inc.h
+++ b/c/enc/hash_longest_match64_inc.h
@@ -211,6 +211,11 @@
}
}
}
+ /* we require matches of len >4, so increase best_len to 3, so we can compare
+ * 4 bytes all the time. */
+ if (best_len < 3) {
+ best_len = 3;
+ }
{
const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
@@ -230,7 +235,9 @@
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
- data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+ /* compare 4 bytes ending at best_len + 1 */
+ BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
+ BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
continue;
}
current4 = BrotliUnalignedRead32(data + prev_ix);
diff --git a/c/enc/hash_longest_match_inc.h b/c/enc/hash_longest_match_inc.h
index 788e9ef..608fa66 100644
--- a/c/enc/hash_longest_match_inc.h
+++ b/c/enc/hash_longest_match_inc.h
@@ -208,6 +208,11 @@
}
}
}
+ /* we require matches of len >4, so increase best_len to 3, so we can compare
+ * 4 bytes all the time. */
+ if (best_len < 3) {
+ best_len = 3;
+ }
{
const uint32_t key =
FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
@@ -223,7 +228,9 @@
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
- data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+ /* compare 4 bytes ending at best_len + 1 */
+ BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
+ BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
continue;
}
{
diff --git a/c/enc/hash_longest_match_quickly_inc.h b/c/enc/hash_longest_match_quickly_inc.h
index 54397ef..1f36022 100644
--- a/c/enc/hash_longest_match_quickly_inc.h
+++ b/c/enc/hash_longest_match_quickly_inc.h
@@ -155,6 +155,7 @@
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
const size_t best_len_in = out->len;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+ /* TODO: compare 4 bytes at once (and set the minimum best len to 4) */
int compare_char = data[cur_ix_masked + best_len_in];
size_t key = FN(HashBytes)(&data[cur_ix_masked]);
size_t key_out;