speedup encoder on q5-9 / 1MB+ files

PiperOrigin-RevId: 553087469
diff --git a/c/enc/backward_references.c b/c/enc/backward_references.c
index ff5b7be..f600e64 100644
--- a/c/enc/backward_references.c
+++ b/c/enc/backward_references.c
@@ -181,6 +181,7 @@
       CASE_(65)
 #undef CASE_
       default:
+        BROTLI_DCHECK(false);
         break;
     }
   }
@@ -196,6 +197,7 @@
     FOR_GENERIC_HASHERS(CASE_)
 #undef CASE_
     default:
+      BROTLI_DCHECK(false);
       break;
   }
 }
diff --git a/c/enc/hash.h b/c/enc/hash.h
index fc6e334..5677d82 100644
--- a/c/enc/hash.h
+++ b/c/enc/hash.h
@@ -78,8 +78,7 @@
      for this use.
    * The number has been tuned heuristically against compression benchmarks. */
 static const uint32_t kHashMul32 = 0x1E35A7BD;
-static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
-static const uint64_t kHashMul64Long =
+static const uint64_t kHashMul64 =
     BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
 
 static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
diff --git a/c/enc/hash_longest_match64_inc.h b/c/enc/hash_longest_match64_inc.h
index da75949..ea5a831 100644
--- a/c/enc/hash_longest_match64_inc.h
+++ b/c/enc/hash_longest_match64_inc.h
@@ -20,13 +20,12 @@
 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
 
 /* HashBytes is the function that chooses the bucket to place the address in. */
-static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
-                                            const int shift) {
-  const uint64_t mask = (~((uint64_t)0U)) >> 24;  /* Use only 5 bytes. */
-  const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long;
+static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
+                                          uint64_t hash_mul) {
+  const uint64_t h = BROTLI_UNALIGNED_LOAD64LE(data) * hash_mul;
   /* The higher bits contain more mixture from the multiplication,
      so we take our results from there. */
-  return (uint32_t)(h >> shift);
+  return (size_t)(h >> (64 - 15));
 }
 
 typedef struct HashLongestMatch {
@@ -35,8 +34,8 @@
   /* Only block_size_ newest backward references are kept,
      and the older are forgotten. */
   size_t block_size_;
-  /* Left-shift for computing hash bucket index from hash value. */
-  int hash_shift_;
+  /* Hash multiplier tuned to match length. */
+  uint64_t hash_mul_;
   /* Mask for accessing entries in a block (in a ring-buffer manner). */
   uint32_t block_mask_;
 
@@ -61,7 +60,8 @@
   self->common_ = common;
 
   BROTLI_UNUSED(params);
-  self->hash_shift_ = 64 - common->params.bucket_bits;
+  self->hash_mul_ = kHashMul64 << (64 - 5 * 8);
+  BROTLI_DCHECK(common->params.bucket_bits == 15);
   self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
   self->block_bits_ = common->params.block_bits;
   self->block_size_ = (size_t)1 << common->params.block_bits;
@@ -81,7 +81,7 @@
   if (one_shot && input_size <= partial_prepare_threshold) {
     size_t i;
     for (i = 0; i < input_size; ++i) {
-      const uint32_t key = FN(HashBytes)(&data[i], self->hash_shift_);
+      const size_t key = FN(HashBytes)(&data[i], self->hash_mul_);
       num[key] = 0;
     }
   } else {
@@ -107,7 +107,7 @@
     const size_t mask, const size_t ix) {
   uint16_t* BROTLI_RESTRICT num = self->num_;
   uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
-  const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_);
+  const size_t key = FN(HashBytes)(&data[ix & mask], self->hash_mul_);
   const size_t minor_ix = num[key] & self->block_mask_;
   const size_t offset = minor_ix + (key << self->block_bits_);
   ++num[key];
@@ -212,7 +212,7 @@
     }
   }
   {
-    const uint32_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
+    const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
     uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
     const size_t down =
         (num[key] > self->block_size_) ?
diff --git a/c/enc/quality.h b/c/enc/quality.h
index 4415a54..ffdfd72 100644
--- a/c/enc/quality.h
+++ b/c/enc/quality.h
@@ -119,6 +119,41 @@
   return params->quality < 9 ? 64 : 512;
 }
 
+/* Quality to hasher mapping:
+
+   - q02: h02 (longest_match_quickly), b16, l5
+
+   - q03: h03 (longest_match_quickly), b17, l5
+
+   - q04: h04 (longest_match_quickly), b17, l5
+   - q04: h54 (longest_match_quickly), b20, l7 | for large files
+
+   - q05: h05 (longest_match        ), b14, l4
+   - q05: h06 (longest_match64      ), b15, l5 | for large files
+   - q05: h40 (forgetful_chain      ), b15, l4 | for small window
+
+   - q06: h05 (longest_match        ), b14, l4
+   - q06: h06 (longest_match64      ), b15, l5 | for large files
+   - q06: h40 (forgetful_chain      ), b15, l4 | for small window
+
+   - q07: h05 (longest_match        ), b15, l4
+   - q07: h06 (longest_match64      ), b15, l5 | for large files
+   - q07: h41 (forgetful_chain      ), b15, l4 | for small window
+
+   - q08: h05 (longest_match        ), b15, l4
+   - q08: h06 (longest_match64      ), b15, l5 | for large files
+   - q08: h41 (forgetful_chain      ), b15, l4 | for small window
+
+   - q09: h05 (longest_match        ), b15, l4
+   - q09: h06 (longest_match64      ), b15, l5 | for large files
+   - q09: h42 (forgetful_chain      ), b15, l4 | for small window
+
+   - q10: t10 (to_binary_tree       ), b17, l128
+
+   - q11: t10 (to_binary_tree       ), b17, l128
+
+  Where "q" is quality, "h" is hasher type, "b" is bucket bits,
+  "l" is source len. */
 static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
                                        BrotliHasherParams* hparams) {
   if (params->quality > 9) {
@@ -136,6 +171,8 @@
     hparams->num_last_distances_to_check =
         params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
   } else {
+    /* TODO(eustas): often previous setting (H6) is faster and denser; consider
+                     adding an option to use it. */
     hparams->type = 5;
     hparams->block_bits = params->quality - 1;
     hparams->bucket_bits = params->quality < 7 ? 14 : 15;