Record insert misses in hashtable profiling.
By comparing to the total number of objects, we can better determine the hit/miss ratio of various call sites and suitable container reservation sizes based on typical inputs.
PiperOrigin-RevId: 833469575
Change-Id: I2583676cefaf42b416adf172328a824b630e24b4
diff --git a/absl/container/internal/hashtablez_sampler.cc b/absl/container/internal/hashtablez_sampler.cc
index 1b8204b..4adf591 100644
--- a/absl/container/internal/hashtablez_sampler.cc
+++ b/absl/container/internal/hashtablez_sampler.cc
@@ -81,6 +81,7 @@
capacity.store(0, std::memory_order_relaxed);
size.store(0, std::memory_order_relaxed);
num_erases.store(0, std::memory_order_relaxed);
+ num_insert_hits.store(0, std::memory_order_relaxed);
num_rehashes.store(0, std::memory_order_relaxed);
max_probe_length.store(0, std::memory_order_relaxed);
total_probe_length.store(0, std::memory_order_relaxed);
diff --git a/absl/container/internal/hashtablez_sampler.h b/absl/container/internal/hashtablez_sampler.h
index 5c59a9e..163b18a 100644
--- a/absl/container/internal/hashtablez_sampler.h
+++ b/absl/container/internal/hashtablez_sampler.h
@@ -82,6 +82,7 @@
std::atomic<size_t> capacity;
std::atomic<size_t> size;
std::atomic<size_t> num_erases;
+ std::atomic<size_t> num_insert_hits;
std::atomic<size_t> num_rehashes;
std::atomic<size_t> max_probe_length;
std::atomic<size_t> total_probe_length;
@@ -111,6 +112,16 @@
void RecordRehashSlow(HashtablezInfo* info, size_t total_probe_length);
+// This is inline to avoid calling convention overhead for an otherwise
+// lightweight operation.
+inline void RecordInsertHitSlow(HashtablezInfo* info) {
+ // We avoid fetch_add since no other thread should be mutating the table
+ // simultaneously without synchronization.
+ info->num_insert_hits.store(
+ info->num_insert_hits.load(std::memory_order_relaxed) + 1,
+ std::memory_order_relaxed);
+}
+
void RecordReservationSlow(HashtablezInfo* info, size_t target_capacity);
void RecordClearedReservationSlow(HashtablezInfo* info);
@@ -184,6 +195,11 @@
RecordEraseSlow(info_);
}
+ inline void RecordInsertHit() {
+ if (ABSL_PREDICT_TRUE(info_ == nullptr)) return;
+ RecordInsertHitSlow(info_);
+ }
+
friend inline void swap(HashtablezInfoHandle& lhs,
HashtablezInfoHandle& rhs) {
std::swap(lhs.info_, rhs.info_);
@@ -210,6 +226,7 @@
inline void RecordInsertMiss(size_t /*hash*/,
size_t /*distance_from_desired*/) {}
inline void RecordErase() {}
+ inline void RecordInsertHit() {}
friend inline void swap(HashtablezInfoHandle& /*lhs*/,
HashtablezInfoHandle& /*rhs*/) {}
diff --git a/absl/container/internal/hashtablez_sampler_test.cc b/absl/container/internal/hashtablez_sampler_test.cc
index 80fe3cf..cbd8edc 100644
--- a/absl/container/internal/hashtablez_sampler_test.cc
+++ b/absl/container/internal/hashtablez_sampler_test.cc
@@ -99,6 +99,7 @@
EXPECT_EQ(info.capacity.load(), 0);
EXPECT_EQ(info.size.load(), 0);
EXPECT_EQ(info.num_erases.load(), 0);
+ EXPECT_EQ(info.num_insert_hits.load(), 0);
EXPECT_EQ(info.num_rehashes.load(), 0);
EXPECT_EQ(info.max_probe_length.load(), 0);
EXPECT_EQ(info.total_probe_length.load(), 0);
@@ -116,6 +117,7 @@
info.capacity.store(1, std::memory_order_relaxed);
info.size.store(1, std::memory_order_relaxed);
info.num_erases.store(1, std::memory_order_relaxed);
+ info.num_insert_hits.store(1, std::memory_order_relaxed);
info.max_probe_length.store(1, std::memory_order_relaxed);
info.total_probe_length.store(1, std::memory_order_relaxed);
info.hashes_bitwise_or.store(1, std::memory_order_relaxed);
@@ -131,6 +133,7 @@
EXPECT_EQ(info.capacity.load(), 0);
EXPECT_EQ(info.size.load(), 0);
EXPECT_EQ(info.num_erases.load(), 0);
+ EXPECT_EQ(info.num_insert_hits.load(), 0);
EXPECT_EQ(info.num_rehashes.load(), 0);
EXPECT_EQ(info.max_probe_length.load(), 0);
EXPECT_EQ(info.total_probe_length.load(), 0);
@@ -221,6 +224,25 @@
EXPECT_EQ(info.soo_capacity, 1);
}
+TEST(HashtablezInfoTest, RecordInsertHit) {
+ const int64_t test_stride = 31;
+ const size_t test_element_size = 29;
+ const size_t test_key_size = 27;
+ const size_t test_value_size = 25;
+
+ HashtablezInfo info;
+ absl::MutexLock l(info.init_mu);
+ info.PrepareForSampling(test_stride, test_element_size,
+ /*key_size=*/test_key_size,
+ /*value_size=*/test_value_size,
+ /*soo_capacity_value=*/1);
+ EXPECT_EQ(info.num_insert_hits.load(), 0);
+ RecordInsertHitSlow(&info);
+ EXPECT_EQ(info.num_insert_hits.load(), 1);
+ RecordInsertHitSlow(&info);
+ EXPECT_EQ(info.num_insert_hits.load(), 2);
+}
+
TEST(HashtablezInfoTest, RecordRehash) {
const int64_t test_stride = 33;
const size_t test_element_size = 31;
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index faa7880..31b117e 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -3173,6 +3173,7 @@
}
if (!empty()) {
if (equal_to(key, single_slot())) {
+ common().infoz().RecordInsertHit();
return {single_iterator(), false};
}
}
@@ -3204,6 +3205,7 @@
if (ABSL_PREDICT_TRUE(equal_to(key, slot_array() + seq.offset(i)))) {
index = seq.offset(i);
inserted = false;
+ common().infoz().RecordInsertHit();
return;
}
}
diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc
index 499e966..8c80463 100644
--- a/absl/container/internal/raw_hash_set_test.cc
+++ b/absl/container/internal/raw_hash_set_test.cc
@@ -2790,6 +2790,7 @@
absl::flat_hash_set<const HashtablezInfo*> preexisting_info(10);
absl::flat_hash_map<size_t, int> observed_checksums(10);
absl::flat_hash_map<ssize_t, int> reservations(10);
+ absl::flat_hash_map<std::pair<size_t, size_t>, int> hit_misses(10);
start_size += sampler.Iterate([&](const HashtablezInfo& info) {
preexisting_info.insert(&info);
@@ -2802,6 +2803,8 @@
const bool do_reserve = (i % 10 > 5);
const bool do_rehash = !do_reserve && (i % 10 > 0);
+ const bool do_first_insert_hit = i % 2 == 0;
+ const bool do_second_insert_hit = i % 4 == 0;
if (do_reserve) {
// Don't reserve on all tables.
@@ -2809,7 +2812,14 @@
}
tables.back().insert(1);
+ if (do_first_insert_hit) {
+ tables.back().insert(1);
+ tables.back().insert(1);
+ }
tables.back().insert(i % 5);
+ if (do_second_insert_hit) {
+ tables.back().insert(i % 5);
+ }
if (do_rehash) {
// Rehash some other tables.
@@ -2823,6 +2833,10 @@
observed_checksums[info.hashes_bitwise_xor.load(
std::memory_order_relaxed)]++;
reservations[info.max_reserve.load(std::memory_order_relaxed)]++;
+ hit_misses[std::make_pair(
+ info.num_insert_hits.load(std::memory_order_relaxed),
+ info.size.load(std::memory_order_relaxed))]++;
+
EXPECT_EQ(info.inline_element_size, sizeof(typename TypeParam::value_type));
EXPECT_EQ(info.key_size, sizeof(typename TypeParam::key_type));
EXPECT_EQ(info.value_size, sizeof(typename TypeParam::value_type));
@@ -2850,6 +2864,21 @@
EXPECT_NEAR((100 * count) / static_cast<double>(tables.size()), 0.1, 0.05)
<< reservation;
}
+
+ EXPECT_THAT(hit_misses, testing::SizeIs(6));
+ const double sampled_tables = end_size - start_size;
+ // i % 20: { 1, 11 }
+ EXPECT_NEAR((hit_misses[{1, 1}] / sampled_tables), 0.10, 0.02);
+ // i % 20: { 6 }
+ EXPECT_NEAR((hit_misses[{3, 1}] / sampled_tables), 0.05, 0.02);
+ // i % 20: { 0, 4, 8, 12 }
+ EXPECT_NEAR((hit_misses[{3, 2}] / sampled_tables), 0.20, 0.02);
+ // i % 20: { 2, 10, 14, 18 }
+ EXPECT_NEAR((hit_misses[{2, 2}] / sampled_tables), 0.20, 0.02);
+ // i % 20: { 16 }
+ EXPECT_NEAR((hit_misses[{4, 1}] / sampled_tables), 0.05, 0.02);
+ // i % 20: { 3, 5, 7, 9, 13, 15, 17, 19 }
+ EXPECT_NEAR((hit_misses[{0, 2}] / sampled_tables), 0.40, 0.02);
}
std::vector<const HashtablezInfo*> SampleSooMutation(
diff --git a/absl/profiling/hashtable.cc b/absl/profiling/hashtable.cc
index 407c6b4..17148d1 100644
--- a/absl/profiling/hashtable.cc
+++ b/absl/profiling/hashtable.cc
@@ -60,6 +60,7 @@
const auto capacity_id = builder.InternString("capacity");
const auto size_id = builder.InternString("size");
const auto num_erases_id = builder.InternString("num_erases");
+ const auto num_insert_hits_id = builder.InternString("num_insert_hits");
const auto num_rehashes_id = builder.InternString("num_rehashes");
const auto max_probe_length_id = builder.InternString("max_probe_length");
const auto total_probe_length_id = builder.InternString("total_probe_length");
@@ -89,6 +90,9 @@
add_label(size_id, info.size.load(std::memory_order_relaxed));
add_label(num_erases_id,
info.num_erases.load(std::memory_order_relaxed));
+ // TODO(b/436909492): Revisit whether this value is useful.
+ add_label(num_insert_hits_id,
+ info.num_insert_hits.load(std::memory_order_relaxed));
add_label(num_rehashes_id,
info.num_rehashes.load(std::memory_order_relaxed));
add_label(max_probe_length_id,