absl: fix use-after-free in Mutex/CondVar

Both Mutex and CondVar signal PerThreadSem/Waiter after satisfying the wait condition,
as the result the waiting thread may return w/o waiting on the
PerThreadSem/Waiter at all. If the waiting thread then exits, it currently
destroys Waiter object. As the result Waiter::Post can be called on
already destroyed object.
PerThreadSem/Waiter must be type-stable after creation and must not be destroyed.
The futex-based implementation is the only one that is not affected by the bug
since there is effectively nothing to destroy (maybe only UBSan/ASan
could complain about calling methods on a destroyed object).

Here is the problematic sequence of events:

1: void Mutex::Block(PerThreadSynch *s) {
2:   while (s->state.load(std::memory_order_acquire) == PerThreadSynch::kQueued) {
3:     if (!DecrementSynchSem(this, s, s->waitp->timeout)) {

4: PerThreadSynch *Mutex::Wakeup(PerThreadSynch *w) {
5:   ...
6:   w->state.store(PerThreadSynch::kAvailable, std::memory_order_release);
7:   IncrementSynchSem(this, w);
8:   ...
9: }

Consider line 6 is executed, then line 2 observes kAvailable and
line 3 is not called. The thread executing Mutex::Block returns from
the method, acquires the mutex, releases the mutex, exits and destroys
PerThreadSem/Waiter.
Now Mutex::Wakeup resumes and executes line 7 on the destroyed object. Boom!

CondVar uses a similar pattern.

Moreover the semaphore-based Waiter implementation is not even destruction-safe
(the Waiter cannot be used to signal own destruction). So even if Mutex/CondVar
would always pair Waiter::Post with Waiter::Wait before destroying PerThreadSem/Waiter,
it would still be subject to use-after-free bug on the semaphore.

PiperOrigin-RevId: 449159939
Change-Id: I497134fa8b6ce1294a422827c5f0de0e897cea31
diff --git a/absl/synchronization/internal/create_thread_identity.cc b/absl/synchronization/internal/create_thread_identity.cc
index 53a71b3..44e6129 100644
--- a/absl/synchronization/internal/create_thread_identity.cc
+++ b/absl/synchronization/internal/create_thread_identity.cc
@@ -38,7 +38,7 @@
 
 // A per-thread destructor for reclaiming associated ThreadIdentity objects.
 // Since we must preserve their storage we cache them for re-use.
-void ReclaimThreadIdentity(void* v) {
+static void ReclaimThreadIdentity(void* v) {
   base_internal::ThreadIdentity* identity =
       static_cast<base_internal::ThreadIdentity*>(v);
 
@@ -48,8 +48,6 @@
     base_internal::LowLevelAlloc::Free(identity->per_thread_synch.all_locks);
   }
 
-  PerThreadSem::Destroy(identity);
-
   // We must explicitly clear the current thread's identity:
   // (a) Subsequent (unrelated) per-thread destructors may require an identity.
   //     We must guarantee a new identity is used in this case (this instructor
@@ -71,7 +69,12 @@
   return (addr + align - 1) & ~(align - 1);
 }
 
-static void ResetThreadIdentity(base_internal::ThreadIdentity* identity) {
+void OneTimeInitThreadIdentity(base_internal::ThreadIdentity* identity) {
+  PerThreadSem::Init(identity);
+}
+
+static void ResetThreadIdentityBetweenReuse(
+    base_internal::ThreadIdentity* identity) {
   base_internal::PerThreadSynch* pts = &identity->per_thread_synch;
   pts->next = nullptr;
   pts->skip = nullptr;
@@ -116,8 +119,9 @@
     identity = reinterpret_cast<base_internal::ThreadIdentity*>(
         RoundUp(reinterpret_cast<intptr_t>(allocation),
                 base_internal::PerThreadSynch::kAlignment));
+    OneTimeInitThreadIdentity(identity);
   }
-  ResetThreadIdentity(identity);
+  ResetThreadIdentityBetweenReuse(identity);
 
   return identity;
 }
@@ -127,7 +131,6 @@
 // REQUIRES: CurrentThreadIdentity(false) == nullptr
 base_internal::ThreadIdentity* CreateThreadIdentity() {
   base_internal::ThreadIdentity* identity = NewThreadIdentity();
-  PerThreadSem::Init(identity);
   // Associate the value with the current thread, and attach our destructor.
   base_internal::SetCurrentThreadIdentity(identity, ReclaimThreadIdentity);
   return identity;
diff --git a/absl/synchronization/internal/create_thread_identity.h b/absl/synchronization/internal/create_thread_identity.h
index e121f68..4cfde09 100644
--- a/absl/synchronization/internal/create_thread_identity.h
+++ b/absl/synchronization/internal/create_thread_identity.h
@@ -36,10 +36,6 @@
 // For private use only.
 base_internal::ThreadIdentity* CreateThreadIdentity();
 
-// A per-thread destructor for reclaiming associated ThreadIdentity objects.
-// For private use only.
-void ReclaimThreadIdentity(void* v);
-
 // Returns the ThreadIdentity object representing the calling thread; guaranteed
 // to be unique for its lifetime.  The returned object will remain valid for the
 // program's lifetime; although it may be re-assigned to a subsequent thread.
diff --git a/absl/synchronization/internal/per_thread_sem.cc b/absl/synchronization/internal/per_thread_sem.cc
index a603178..469e8f3 100644
--- a/absl/synchronization/internal/per_thread_sem.cc
+++ b/absl/synchronization/internal/per_thread_sem.cc
@@ -47,10 +47,6 @@
   identity->is_idle.store(false, std::memory_order_relaxed);
 }
 
-void PerThreadSem::Destroy(base_internal::ThreadIdentity *identity) {
-  Waiter::GetWaiter(identity)->~Waiter();
-}
-
 void PerThreadSem::Tick(base_internal::ThreadIdentity *identity) {
   const int ticker =
       identity->ticker.fetch_add(1, std::memory_order_relaxed) + 1;
diff --git a/absl/synchronization/internal/per_thread_sem.h b/absl/synchronization/internal/per_thread_sem.h
index 7beae8e..90a8880 100644
--- a/absl/synchronization/internal/per_thread_sem.h
+++ b/absl/synchronization/internal/per_thread_sem.h
@@ -66,10 +66,6 @@
   // REQUIRES: May only be called by ThreadIdentity.
   static void Init(base_internal::ThreadIdentity* identity);
 
-  // Destroy the PerThreadSem associated with "identity".
-  // REQUIRES: May only be called by ThreadIdentity.
-  static void Destroy(base_internal::ThreadIdentity* identity);
-
   // Increments "identity"'s count.
   static inline void Post(base_internal::ThreadIdentity* identity);
 
@@ -81,8 +77,7 @@
   // Permitted callers.
   friend class PerThreadSemTest;
   friend class absl::Mutex;
-  friend absl::base_internal::ThreadIdentity* CreateThreadIdentity();
-  friend void ReclaimThreadIdentity(void* v);
+  friend void OneTimeInitThreadIdentity(absl::base_internal::ThreadIdentity*);
 };
 
 }  // namespace synchronization_internal
diff --git a/absl/synchronization/internal/waiter.cc b/absl/synchronization/internal/waiter.cc
index 28ef311..f2051d6 100644
--- a/absl/synchronization/internal/waiter.cc
+++ b/absl/synchronization/internal/waiter.cc
@@ -71,8 +71,6 @@
   futex_.store(0, std::memory_order_relaxed);
 }
 
-Waiter::~Waiter() = default;
-
 bool Waiter::Wait(KernelTimeout t) {
   // Loop until we can atomically decrement futex from a positive
   // value, waiting on a futex while we believe it is zero.
@@ -161,18 +159,6 @@
   wakeup_count_ = 0;
 }
 
-Waiter::~Waiter() {
-  const int err = pthread_mutex_destroy(&mu_);
-  if (err != 0) {
-    ABSL_RAW_LOG(FATAL, "pthread_mutex_destroy failed: %d", err);
-  }
-
-  const int err2 = pthread_cond_destroy(&cv_);
-  if (err2 != 0) {
-    ABSL_RAW_LOG(FATAL, "pthread_cond_destroy failed: %d", err2);
-  }
-}
-
 bool Waiter::Wait(KernelTimeout t) {
   struct timespec abs_timeout;
   if (t.has_timeout()) {
@@ -240,12 +226,6 @@
   wakeups_.store(0, std::memory_order_relaxed);
 }
 
-Waiter::~Waiter() {
-  if (sem_destroy(&sem_) != 0) {
-    ABSL_RAW_LOG(FATAL, "sem_destroy failed with errno %d\n", errno);
-  }
-}
-
 bool Waiter::Wait(KernelTimeout t) {
   struct timespec abs_timeout;
   if (t.has_timeout()) {
@@ -363,11 +343,6 @@
   wakeup_count_ = 0;
 }
 
-// SRW locks and condition variables do not need to be explicitly destroyed.
-// https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-initializesrwlock
-// https://stackoverflow.com/questions/28975958/why-does-windows-have-no-deleteconditionvariable-function-to-go-together-with
-Waiter::~Waiter() = default;
-
 bool Waiter::Wait(KernelTimeout t) {
   SRWLOCK *mu = WinHelper::GetLock(this);
   CONDITION_VARIABLE *cv = WinHelper::GetCond(this);
diff --git a/absl/synchronization/internal/waiter.h b/absl/synchronization/internal/waiter.h
index 4f4f692..b8adfeb 100644
--- a/absl/synchronization/internal/waiter.h
+++ b/absl/synchronization/internal/waiter.h
@@ -71,9 +71,6 @@
   Waiter(const Waiter&) = delete;
   Waiter& operator=(const Waiter&) = delete;
 
-  // Destroy any data to track waits.
-  ~Waiter();
-
   // Blocks the calling thread until a matching call to `Post()` or
   // `t` has passed. Returns `true` if woken (`Post()` called),
   // `false` on timeout.
@@ -106,6 +103,12 @@
 #endif
 
  private:
+  // The destructor must not be called since Mutex/CondVar
+  // can use PerThreadSem/Waiter after the thread exits.
+  // Waiter objects are embedded in ThreadIdentity objects,
+  // which are reused via a freelist and are never destroyed.
+  ~Waiter() = delete;
+
 #if ABSL_WAITER_MODE == ABSL_WAITER_MODE_FUTEX
   // Futexes are defined by specification to be 32-bits.
   // Thus std::atomic<int32_t> must be just an int32_t with lockfree methods.
@@ -138,6 +141,9 @@
 
   // We can't include Windows.h in our headers, so we use aligned character
   // buffers to define the storage of SRWLOCK and CONDITION_VARIABLE.
+  // SRW locks and condition variables do not need to be explicitly destroyed.
+  // https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-initializesrwlock
+  // https://stackoverflow.com/questions/28975958/why-does-windows-have-no-deleteconditionvariable-function-to-go-together-with
   alignas(void*) unsigned char mu_storage_[sizeof(void*)];
   alignas(void*) unsigned char cv_storage_[sizeof(void*)];
   int waiter_count_;
diff --git a/absl/synchronization/mutex_test.cc b/absl/synchronization/mutex_test.cc
index 4f40317..99bb017 100644
--- a/absl/synchronization/mutex_test.cc
+++ b/absl/synchronization/mutex_test.cc
@@ -1704,4 +1704,30 @@
   EXPECT_EQ(RunTest(&TestMuTime, threads, iterations, 1), threads * iterations);
 }
 
+TEST(Mutex, SignalExitedThread) {
+  // The test may expose a race when Mutex::Unlock signals a thread
+  // that has already exited.
+#if defined(__wasm__) || defined(__asmjs__)
+  constexpr int kThreads = 1;  // OOMs under WASM
+#else
+  constexpr int kThreads = 100;
+#endif
+  std::vector<std::thread> top;
+  for (unsigned i = 0; i < 2 * std::thread::hardware_concurrency(); i++) {
+    top.emplace_back([&]() {
+      for (int i = 0; i < kThreads; i++) {
+        absl::Mutex mu;
+        std::thread t([&]() {
+          mu.Lock();
+          mu.Unlock();
+        });
+        mu.Lock();
+        mu.Unlock();
+        t.join();
+      }
+    });
+  }
+  for (auto &th : top) th.join();
+}
+
 }  // namespace