absl/time: Use "memory order acquire" for loads, which would allow for the safe removal of the data memory barrier.

PiperOrigin-RevId: 825322666
Change-Id: Ic47304c6914f3e5dae45682b6c2304f3dc7ad1b1
diff --git a/absl/time/clock.cc b/absl/time/clock.cc
index bf6a10b..2a5f41b 100644
--- a/absl/time/clock.cc
+++ b/absl/time/clock.cc
@@ -338,18 +338,20 @@
   // to the same shared data.
   seq_read0 = time_state.seq.load(std::memory_order_acquire);
 
-  base_ns = time_state.last_sample.base_ns.load(std::memory_order_relaxed);
+  // The algorithm does not require that the following four loads be ordered
+  // with respect to one another; it requires only that they precede the load of
+  // time_state.seq below them. Nevertheless, we mark each of them as an
+  // acquire-load, rather than using a barrier immediately before the
+  // time_state.seq load, because the former is likely faster on most CPUs of
+  // interest. Architectures that may see a regression because of this approach
+  // include PowerPC and MIPS.
+  base_ns = time_state.last_sample.base_ns.load(std::memory_order_acquire);
   base_cycles =
-      time_state.last_sample.base_cycles.load(std::memory_order_relaxed);
+      time_state.last_sample.base_cycles.load(std::memory_order_acquire);
   nsscaled_per_cycle =
-      time_state.last_sample.nsscaled_per_cycle.load(std::memory_order_relaxed);
+      time_state.last_sample.nsscaled_per_cycle.load(std::memory_order_acquire);
   min_cycles_per_sample = time_state.last_sample.min_cycles_per_sample.load(
-      std::memory_order_relaxed);
-
-  // This acquire fence pairs with the release fence in SeqAcquire.  Since it
-  // is sequenced between reads of shared data and seq_read1, the reads of
-  // shared data are effectively acquiring.
-  std::atomic_thread_fence(std::memory_order_acquire);
+      std::memory_order_acquire);
 
   // The shared-data reads are effectively acquire ordered, and the
   // shared-data writes are effectively release ordered. Therefore if our