Compare 4 bytes when checking if a longer match is possible.

Loading and comparing 4 bytes is ~as fast as 1 byte, but allows us to avoid more full match length calculation.

PiperOrigin-RevId: 617556847
diff --git a/c/enc/hash.h b/c/enc/hash.h
index 5677d82..ba9b0d8 100644
--- a/c/enc/hash.h
+++ b/c/enc/hash.h
@@ -574,6 +574,11 @@
       }
     }
   }
+  /* we require matches of len >4, so increase best_len to 3, so we can compare
+   * 4 bytes all the time. */
+  if (best_len < 3) {
+    best_len = 3;
+  }
   while (item == 0) {
     size_t offset;
     size_t distance;
@@ -586,9 +591,10 @@
     limit = source_size - offset;
     limit = (limit > max_length) ? max_length : limit;
     if (distance > max_distance) continue;
-    if (cur_ix_masked + best_len > ring_buffer_mask ||
-        best_len >= limit ||
-        data[cur_ix_masked + best_len] != source[offset + best_len]) {
+    if (cur_ix_masked + best_len > ring_buffer_mask || best_len >= limit ||
+        /* compare 4 bytes ending at best_len + 1 */
+        BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
+            BrotliUnalignedRead32(&source[offset + best_len - 3])) {
       continue;
     }
     {
diff --git a/c/enc/hash_forgetful_chain_inc.h b/c/enc/hash_forgetful_chain_inc.h
index 48e1cdc..9a8efeb 100644
--- a/c/enc/hash_forgetful_chain_inc.h
+++ b/c/enc/hash_forgetful_chain_inc.h
@@ -241,6 +241,11 @@
       }
     }
   }
+  /* we require matches of len >4, so increase best_len to 3, so we can compare
+   * 4 bytes all the time. */
+  if (best_len < 3) {
+    best_len = 3;
+  }
   {
     const size_t bank = key & (NUM_BANKS - 1);
     size_t backward = 0;
@@ -257,7 +262,9 @@
       delta = banks[bank].slots[last].delta;
       if (cur_ix_masked + best_len > ring_buffer_mask ||
           prev_ix + best_len > ring_buffer_mask ||
-          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+          /* compare 4 bytes ending at best_len + 1 */
+          BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
+              BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
         continue;
       }
       {
diff --git a/c/enc/hash_longest_match64_inc.h b/c/enc/hash_longest_match64_inc.h
index e48fc61..8f825de 100644
--- a/c/enc/hash_longest_match64_inc.h
+++ b/c/enc/hash_longest_match64_inc.h
@@ -211,6 +211,11 @@
       }
     }
   }
+  /* we require matches of len >4, so increase best_len to 3, so we can compare
+   * 4 bytes all the time. */
+  if (best_len < 3) {
+    best_len = 3;
+  }
   {
     const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
     uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
@@ -230,7 +235,9 @@
       prev_ix &= ring_buffer_mask;
       if (cur_ix_masked + best_len > ring_buffer_mask ||
           prev_ix + best_len > ring_buffer_mask ||
-          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+          /* compare 4 bytes ending at best_len + 1 */
+          BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
+              BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
         continue;
       }
       current4 = BrotliUnalignedRead32(data + prev_ix);
diff --git a/c/enc/hash_longest_match_inc.h b/c/enc/hash_longest_match_inc.h
index 788e9ef..608fa66 100644
--- a/c/enc/hash_longest_match_inc.h
+++ b/c/enc/hash_longest_match_inc.h
@@ -208,6 +208,11 @@
       }
     }
   }
+  /* we require matches of len >4, so increase best_len to 3, so we can compare
+   * 4 bytes all the time. */
+  if (best_len < 3) {
+    best_len = 3;
+  }
   {
     const uint32_t key =
         FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
@@ -223,7 +228,9 @@
       prev_ix &= ring_buffer_mask;
       if (cur_ix_masked + best_len > ring_buffer_mask ||
           prev_ix + best_len > ring_buffer_mask ||
-          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+          /* compare 4 bytes ending at best_len + 1 */
+          BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
+              BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
         continue;
       }
       {
diff --git a/c/enc/hash_longest_match_quickly_inc.h b/c/enc/hash_longest_match_quickly_inc.h
index 54397ef..1f36022 100644
--- a/c/enc/hash_longest_match_quickly_inc.h
+++ b/c/enc/hash_longest_match_quickly_inc.h
@@ -155,6 +155,7 @@
   uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
   const size_t best_len_in = out->len;
   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  /* TODO: compare 4 bytes at once (and set the minimum best len to 4) */
   int compare_char = data[cur_ix_masked + best_len_in];
   size_t key = FN(HashBytes)(&data[cur_ix_masked]);
   size_t key_out;