Record insert misses in hashtable profiling.

By comparing to the total number of objects, we can better determine the hit/miss ratio of various call sites and suitable container reservation sizes based on typical inputs.

PiperOrigin-RevId: 833469575
Change-Id: I2583676cefaf42b416adf172328a824b630e24b4
diff --git a/absl/container/internal/hashtablez_sampler.cc b/absl/container/internal/hashtablez_sampler.cc
index 1b8204b..4adf591 100644
--- a/absl/container/internal/hashtablez_sampler.cc
+++ b/absl/container/internal/hashtablez_sampler.cc
@@ -81,6 +81,7 @@
   capacity.store(0, std::memory_order_relaxed);
   size.store(0, std::memory_order_relaxed);
   num_erases.store(0, std::memory_order_relaxed);
+  num_insert_hits.store(0, std::memory_order_relaxed);
   num_rehashes.store(0, std::memory_order_relaxed);
   max_probe_length.store(0, std::memory_order_relaxed);
   total_probe_length.store(0, std::memory_order_relaxed);
diff --git a/absl/container/internal/hashtablez_sampler.h b/absl/container/internal/hashtablez_sampler.h
index 5c59a9e..163b18a 100644
--- a/absl/container/internal/hashtablez_sampler.h
+++ b/absl/container/internal/hashtablez_sampler.h
@@ -82,6 +82,7 @@
   std::atomic<size_t> capacity;
   std::atomic<size_t> size;
   std::atomic<size_t> num_erases;
+  std::atomic<size_t> num_insert_hits;
   std::atomic<size_t> num_rehashes;
   std::atomic<size_t> max_probe_length;
   std::atomic<size_t> total_probe_length;
@@ -111,6 +112,16 @@
 
 void RecordRehashSlow(HashtablezInfo* info, size_t total_probe_length);
 
+// This is inline to avoid calling convention overhead for an otherwise
+// lightweight operation.
+inline void RecordInsertHitSlow(HashtablezInfo* info) {
+  // We avoid fetch_add since no other thread should be mutating the table
+  // simultaneously without synchronization.
+  info->num_insert_hits.store(
+      info->num_insert_hits.load(std::memory_order_relaxed) + 1,
+      std::memory_order_relaxed);
+}
+
 void RecordReservationSlow(HashtablezInfo* info, size_t target_capacity);
 
 void RecordClearedReservationSlow(HashtablezInfo* info);
@@ -184,6 +195,11 @@
     RecordEraseSlow(info_);
   }
 
+  inline void RecordInsertHit() {
+    if (ABSL_PREDICT_TRUE(info_ == nullptr)) return;
+    RecordInsertHitSlow(info_);
+  }
+
   friend inline void swap(HashtablezInfoHandle& lhs,
                           HashtablezInfoHandle& rhs) {
     std::swap(lhs.info_, rhs.info_);
@@ -210,6 +226,7 @@
   inline void RecordInsertMiss(size_t /*hash*/,
                                size_t /*distance_from_desired*/) {}
   inline void RecordErase() {}
+  inline void RecordInsertHit() {}
 
   friend inline void swap(HashtablezInfoHandle& /*lhs*/,
                           HashtablezInfoHandle& /*rhs*/) {}
diff --git a/absl/container/internal/hashtablez_sampler_test.cc b/absl/container/internal/hashtablez_sampler_test.cc
index 80fe3cf..cbd8edc 100644
--- a/absl/container/internal/hashtablez_sampler_test.cc
+++ b/absl/container/internal/hashtablez_sampler_test.cc
@@ -99,6 +99,7 @@
   EXPECT_EQ(info.capacity.load(), 0);
   EXPECT_EQ(info.size.load(), 0);
   EXPECT_EQ(info.num_erases.load(), 0);
+  EXPECT_EQ(info.num_insert_hits.load(), 0);
   EXPECT_EQ(info.num_rehashes.load(), 0);
   EXPECT_EQ(info.max_probe_length.load(), 0);
   EXPECT_EQ(info.total_probe_length.load(), 0);
@@ -116,6 +117,7 @@
   info.capacity.store(1, std::memory_order_relaxed);
   info.size.store(1, std::memory_order_relaxed);
   info.num_erases.store(1, std::memory_order_relaxed);
+  info.num_insert_hits.store(1, std::memory_order_relaxed);
   info.max_probe_length.store(1, std::memory_order_relaxed);
   info.total_probe_length.store(1, std::memory_order_relaxed);
   info.hashes_bitwise_or.store(1, std::memory_order_relaxed);
@@ -131,6 +133,7 @@
   EXPECT_EQ(info.capacity.load(), 0);
   EXPECT_EQ(info.size.load(), 0);
   EXPECT_EQ(info.num_erases.load(), 0);
+  EXPECT_EQ(info.num_insert_hits.load(), 0);
   EXPECT_EQ(info.num_rehashes.load(), 0);
   EXPECT_EQ(info.max_probe_length.load(), 0);
   EXPECT_EQ(info.total_probe_length.load(), 0);
@@ -221,6 +224,25 @@
   EXPECT_EQ(info.soo_capacity, 1);
 }
 
+TEST(HashtablezInfoTest, RecordInsertHit) {
+  const int64_t test_stride = 31;
+  const size_t test_element_size = 29;
+  const size_t test_key_size = 27;
+  const size_t test_value_size = 25;
+
+  HashtablezInfo info;
+  absl::MutexLock l(info.init_mu);
+  info.PrepareForSampling(test_stride, test_element_size,
+                          /*key_size=*/test_key_size,
+                          /*value_size=*/test_value_size,
+                          /*soo_capacity_value=*/1);
+  EXPECT_EQ(info.num_insert_hits.load(), 0);
+  RecordInsertHitSlow(&info);
+  EXPECT_EQ(info.num_insert_hits.load(), 1);
+  RecordInsertHitSlow(&info);
+  EXPECT_EQ(info.num_insert_hits.load(), 2);
+}
+
 TEST(HashtablezInfoTest, RecordRehash) {
   const int64_t test_stride = 33;
   const size_t test_element_size = 31;
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index faa7880..31b117e 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -3173,6 +3173,7 @@
     }
     if (!empty()) {
       if (equal_to(key, single_slot())) {
+        common().infoz().RecordInsertHit();
         return {single_iterator(), false};
       }
     }
@@ -3204,6 +3205,7 @@
           if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) {
             index = seq.offset(i);
             inserted = false;
+            common().infoz().RecordInsertHit();
             return;
           }
         }
diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc
index 499e966..8c80463 100644
--- a/absl/container/internal/raw_hash_set_test.cc
+++ b/absl/container/internal/raw_hash_set_test.cc
@@ -2790,6 +2790,7 @@
   absl::flat_hash_set<const HashtablezInfo*> preexisting_info(10);
   absl::flat_hash_map<size_t, int> observed_checksums(10);
   absl::flat_hash_map<ssize_t, int> reservations(10);
+  absl::flat_hash_map<std::pair<size_t, size_t>, int> hit_misses(10);
 
   start_size += sampler.Iterate([&](const HashtablezInfo& info) {
     preexisting_info.insert(&info);
@@ -2802,6 +2803,8 @@
 
     const bool do_reserve = (i % 10 > 5);
     const bool do_rehash = !do_reserve && (i % 10 > 0);
+    const bool do_first_insert_hit = i % 2 == 0;
+    const bool do_second_insert_hit = i % 4 == 0;
 
     if (do_reserve) {
       // Don't reserve on all tables.
@@ -2809,7 +2812,14 @@
     }
 
     tables.back().insert(1);
+    if (do_first_insert_hit) {
+      tables.back().insert(1);
+      tables.back().insert(1);
+    }
     tables.back().insert(i % 5);
+    if (do_second_insert_hit) {
+      tables.back().insert(i % 5);
+    }
 
     if (do_rehash) {
       // Rehash some other tables.
@@ -2823,6 +2833,10 @@
     observed_checksums[info.hashes_bitwise_xor.load(
         std::memory_order_relaxed)]++;
     reservations[info.max_reserve.load(std::memory_order_relaxed)]++;
+    hit_misses[std::make_pair(
+        info.num_insert_hits.load(std::memory_order_relaxed),
+        info.size.load(std::memory_order_relaxed))]++;
+
     EXPECT_EQ(info.inline_element_size, sizeof(typename TypeParam::value_type));
     EXPECT_EQ(info.key_size, sizeof(typename TypeParam::key_type));
     EXPECT_EQ(info.value_size, sizeof(typename TypeParam::value_type));
@@ -2850,6 +2864,21 @@
     EXPECT_NEAR((100 * count) / static_cast<double>(tables.size()), 0.1, 0.05)
         << reservation;
   }
+
+  EXPECT_THAT(hit_misses, testing::SizeIs(6));
+  const double sampled_tables = end_size - start_size;
+  // i % 20: { 1, 11 }
+  EXPECT_NEAR((hit_misses[{1, 1}] / sampled_tables), 0.10, 0.02);
+  // i % 20: { 6 }
+  EXPECT_NEAR((hit_misses[{3, 1}] / sampled_tables), 0.05, 0.02);
+  // i % 20: { 0, 4, 8, 12 }
+  EXPECT_NEAR((hit_misses[{3, 2}] / sampled_tables), 0.20, 0.02);
+  // i % 20: { 2, 10, 14, 18 }
+  EXPECT_NEAR((hit_misses[{2, 2}] / sampled_tables), 0.20, 0.02);
+  // i % 20: { 16 }
+  EXPECT_NEAR((hit_misses[{4, 1}] / sampled_tables), 0.05, 0.02);
+  // i % 20: { 3, 5, 7, 9, 13, 15, 17, 19 }
+  EXPECT_NEAR((hit_misses[{0, 2}] / sampled_tables), 0.40, 0.02);
 }
 
 std::vector<const HashtablezInfo*> SampleSooMutation(
diff --git a/absl/profiling/hashtable.cc b/absl/profiling/hashtable.cc
index 407c6b4..17148d1 100644
--- a/absl/profiling/hashtable.cc
+++ b/absl/profiling/hashtable.cc
@@ -60,6 +60,7 @@
   const auto capacity_id = builder.InternString("capacity");
   const auto size_id = builder.InternString("size");
   const auto num_erases_id = builder.InternString("num_erases");
+  const auto num_insert_hits_id = builder.InternString("num_insert_hits");
   const auto num_rehashes_id = builder.InternString("num_rehashes");
   const auto max_probe_length_id = builder.InternString("max_probe_length");
   const auto total_probe_length_id = builder.InternString("total_probe_length");
@@ -89,6 +90,9 @@
         add_label(size_id, info.size.load(std::memory_order_relaxed));
         add_label(num_erases_id,
                   info.num_erases.load(std::memory_order_relaxed));
+        // TODO(b/436909492): Revisit whether this value is useful.
+        add_label(num_insert_hits_id,
+                  info.num_insert_hits.load(std::memory_order_relaxed));
         add_label(num_rehashes_id,
                   info.num_rehashes.load(std::memory_order_relaxed));
         add_label(max_probe_length_id,