Relocate and enhance CPU microarchitecture detection in Abseil

- Move cpu_detect to absl::base_internal, adding the kNvidiaGrace and kIntelGraniterapids enums.
- On aarch64 Linux, implement custom ISAR0.RNDR checking logic to distinguish kArmNeoverseV2 from kNvidiaGrace.
- Add IsSMTEnabled and NumContextsPerCPU

PiperOrigin-RevId: 922862384
Change-Id: I038b7732714400d6c63ff309bf20af3f436afcf5
diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake
index 66a1747..b603864 100644
--- a/CMake/AbseilDll.cmake
+++ b/CMake/AbseilDll.cmake
@@ -13,6 +13,8 @@
   "base/dynamic_annotations.h"
   "base/fast_type_id.h"
   "base/internal/atomic_hook.h"
+  "base/internal/cpu_detect.cc"
+  "base/internal/cpu_detect.h"
   "base/internal/cycleclock.cc"
   "base/internal/cycleclock.h"
   "base/internal/cycleclock_config.h"
@@ -105,8 +107,6 @@
   "container/node_hash_set.h"
   "crc/crc32c.cc"
   "crc/crc32c.h"
-  "crc/internal/cpu_detect.cc"
-  "crc/internal/cpu_detect.h"
   "crc/internal/crc.cc"
   "crc/internal/crc.h"
   "crc/internal/crc32_x86_arm_combined_simd.h"
diff --git a/absl/base/BUILD.bazel b/absl/base/BUILD.bazel
index 867f27d..abbf099 100644
--- a/absl/base/BUILD.bazel
+++ b/absl/base/BUILD.bazel
@@ -62,6 +62,23 @@
 )
 
 cc_library(
+    name = "cpu_detect",
+    srcs = [
+        "internal/cpu_detect.cc",
+    ],
+    hdrs = ["internal/cpu_detect.h"],
+    copts = ABSL_DEFAULT_COPTS,
+    linkopts = ABSL_DEFAULT_LINKOPTS,
+    visibility = [
+        "//absl:__subpackages__",
+        "//absl:friends",
+    ],
+    deps = [
+        ":config",
+    ],
+)
+
+cc_library(
     name = "hardening",
     hdrs = [
         "internal/hardening.h",
diff --git a/absl/base/CMakeLists.txt b/absl/base/CMakeLists.txt
index 9608061..f3875b0 100644
--- a/absl/base/CMakeLists.txt
+++ b/absl/base/CMakeLists.txt
@@ -32,6 +32,21 @@
 # Internal-only target, do not depend on directly.
 absl_cc_library(
   NAME
+    base_cpu_detect
+  HDRS
+    "internal/cpu_detect.h"
+  SRCS
+    "internal/cpu_detect.cc"
+  DEPS
+    absl::base
+    absl::config
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+)
+
+# Internal-only target, do not depend on directly.
+absl_cc_library(
+  NAME
     errno_saver
   HDRS
     "internal/errno_saver.h"
diff --git a/absl/crc/internal/cpu_detect.cc b/absl/base/internal/cpu_detect.cc
similarity index 82%
rename from absl/crc/internal/cpu_detect.cc
rename to absl/base/internal/cpu_detect.cc
index 86f55d0..c08637c 100644
--- a/absl/crc/internal/cpu_detect.cc
+++ b/absl/base/internal/cpu_detect.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "absl/crc/internal/cpu_detect.h"
+#include "absl/base/internal/cpu_detect.h"
 
 #include <cstdint>
 #include <optional>  // IWYU pragma: keep
@@ -42,6 +42,7 @@
 // MSVC-equivalent __cpuid intrinsic declaration for clang-like compilers
 // for non-Windows build environments.
 extern void __cpuid(int[4], int);
+extern void __cpuidex(int[4], int, int);
 #elif !defined(_WIN32) && !defined(_WIN64)
 // MSVC defines this function for us.
 // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex
@@ -51,12 +52,18 @@
                      "=d"(cpu_info[3])
                    : "a"(info_type), "c"(0));
 }
+static void __cpuidex(int cpu_info[4], int info_type, int ecx) {
+  __asm__ volatile("cpuid \n\t"
+                   : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
+                     "=d"(cpu_info[3])
+                   : "a"(info_type), "c"(ecx));
+}
 #endif  // !defined(_WIN32) && !defined(_WIN64)
 #endif  // defined(__x86_64__) || defined(_M_X64)
 
 namespace absl {
 ABSL_NAMESPACE_BEGIN
-namespace crc_internal {
+namespace base_internal {
 
 #if defined(__x86_64__) || defined(_M_X64)
 
@@ -137,7 +144,7 @@
             case 0x4f:  // Broadwell
             case 0x56:  // BroadwellDE
               return CpuType::kIntelBroadwell;
-            case 0x55:                 // Skylake Xeon
+            case 0x55:                         // Skylake Xeon
               if ((cpu_info[0] & 0x0f) < 5) {  // stepping < 5 is skylake
                 return CpuType::kIntelSkylakeXeon;
               } else {  // stepping >= 5 is cascadelake
@@ -152,7 +159,7 @@
             case 0xcf:  // Emerald Rapids
               return CpuType::kIntelEmeraldrapids;
             case 0xad:  // Granite Rapids
-              return CpuType::kIntelGraniterapidsap;
+              return CpuType::kIntelGraniterapids;
             default:
               return CpuType::kUnknown;
           }
@@ -271,18 +278,30 @@
     switch (implementer) {
       case 0x41:
         switch (part_number) {
-          case 0xd0c: return CpuType::kArmNeoverseN1;
-          case 0xd40: return CpuType::kArmNeoverseV1;
-          case 0xd49: return CpuType::kArmNeoverseN2;
-          case 0xd4f: return CpuType::kArmNeoverseV2;
-          case 0xd8e: return CpuType::kArmNeoverseN3;
+          case 0xd0c:
+            return CpuType::kArmNeoverseN1;
+          case 0xd40:
+            return CpuType::kArmNeoverseV1;
+          case 0xd49:
+            return CpuType::kArmNeoverseN2;
+          case 0xd4f: {
+            uint64_t isar0 = 0;
+            ABSL_INTERNAL_AARCH64_ID_REG_READ(ID_AA64ISAR0_EL1, isar0);
+            if (((isar0 >> 60) & 0xf) == 0x0) {
+              return CpuType::kNvidiaGrace;
+            }
+            return CpuType::kArmNeoverseV2;
+          }
+          case 0xd8e:
+            return CpuType::kArmNeoverseN3;
           default:
             return CpuType::kUnknown;
         }
         break;
       case 0xc0:
         switch (part_number) {
-          case 0xac3: return CpuType::kAmpereSiryn;
+          case 0xac3:
+            return CpuType::kAmpereSiryn;
           default:
             return CpuType::kUnknown;
         }
@@ -354,6 +373,47 @@
 
 #endif
 
-}  // namespace crc_internal
+// Returns how many hardware contexts per CPU exist. Note: AMD CPUs prior to Zen
+// 2 (Rome, 2019) do not support CPUID leaf 0xb. We intentionally avoid falling
+// back to leaf 1 ebx[23:16] because it reports total logical processors per
+// package (not threads per core), which risks false positives on older
+// multi-core non-SMT chips. Pre-Zen 2 AMD safely defaults to 1.
+int NumContextsPerCPU() {
+#if defined(__x86_64__) || defined(_M_X64)
+  int info[4];
+  __cpuid(info, 0);
+  if (info[0] < 0xb) {
+    return 1;
+  }
+
+  __cpuid(info, 1);
+  bool has_ht = (info[3] & (1 << 28)) != 0;
+  if (!has_ht) {
+    return 1;
+  }
+
+  for (int sub_leaf = 0; sub_leaf < 4; ++sub_leaf) {
+    __cpuidex(info, 0xb, sub_leaf);
+    int level_type = (info[2] >> 8) & 0xff;
+    if (level_type == 0) {
+      break;
+    }
+    if (level_type == 1) {
+      int num_threads = info[1] & 0x0ffff;
+      if (num_threads >= 1) {
+        return num_threads;
+      }
+    }
+  }
+
+  return 1;
+#else
+  return 1;
+#endif
+}
+
+bool IsSMTEnabled() { return NumContextsPerCPU() > 1; }
+
+}  // namespace base_internal
 ABSL_NAMESPACE_END
 }  // namespace absl
diff --git a/absl/crc/internal/cpu_detect.h b/absl/base/internal/cpu_detect.h
similarity index 80%
rename from absl/crc/internal/cpu_detect.h
rename to absl/base/internal/cpu_detect.h
index e76a802..5ea76ec 100644
--- a/absl/crc/internal/cpu_detect.h
+++ b/absl/base/internal/cpu_detect.h
@@ -12,14 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef ABSL_CRC_INTERNAL_CPU_DETECT_H_
-#define ABSL_CRC_INTERNAL_CPU_DETECT_H_
+#ifndef ABSL_BASE_INTERNAL_CPU_DETECT_H_
+#define ABSL_BASE_INTERNAL_CPU_DETECT_H_
 
 #include "absl/base/config.h"
 
 namespace absl {
 ABSL_NAMESPACE_BEGIN
-namespace crc_internal {
+namespace base_internal {
 
 // Enumeration of architectures that we have special-case tuning parameters for.
 // This set may change over time.
@@ -38,7 +38,7 @@
   kIntelIcelake,
   kIntelSapphirerapids,
   kIntelEmeraldrapids,
-  kIntelGraniterapidsap,
+  kIntelGraniterapids,
   kIntelSkylake,
   kIntelIvybridge,
   kIntelSandybridge,
@@ -49,6 +49,7 @@
   kArmNeoverseN2,
   kArmNeoverseV2,
   kArmNeoverseN3,
+  kNvidiaGrace,
 };
 
 // Returns the type of host CPU this code is running on.  Returns kUnknown if
@@ -62,8 +63,15 @@
 // tuning.
 bool SupportsArmCRC32PMULL();
 
-}  // namespace crc_internal
+// Returns whether the host CPU supports simultaneous multithreading (SMT) and
+// if it is enabled.
+bool IsSMTEnabled();
+
+// Returns how many hardware contexts per CPU exist.
+int NumContextsPerCPU();
+
+}  // namespace base_internal
 ABSL_NAMESPACE_END
 }  // namespace absl
 
-#endif  // ABSL_CRC_INTERNAL_CPU_DETECT_H_
+#endif  // ABSL_BASE_INTERNAL_CPU_DETECT_H_
diff --git a/absl/crc/BUILD.bazel b/absl/crc/BUILD.bazel
index 49e916c..88b9ea2 100644
--- a/absl/crc/BUILD.bazel
+++ b/absl/crc/BUILD.bazel
@@ -34,21 +34,6 @@
 licenses(["notice"])
 
 cc_library(
-    name = "cpu_detect",
-    srcs = [
-        "internal/cpu_detect.cc",
-    ],
-    hdrs = ["internal/cpu_detect.h"],
-    copts = ABSL_DEFAULT_COPTS,
-    linkopts = ABSL_DEFAULT_LINKOPTS,
-    visibility = ["//visibility:private"],
-    deps = [
-        "//absl/base",
-        "//absl/base:config",
-    ],
-)
-
-cc_library(
     name = "crc_internal",
     srcs = [
         "internal/crc.cc",
@@ -63,9 +48,9 @@
     linkopts = ABSL_DEFAULT_LINKOPTS,
     visibility = ["//visibility:private"],
     deps = [
-        ":cpu_detect",
         "//absl/base:config",
         "//absl/base:core_headers",
+        "//absl/base:cpu_detect",
         "//absl/base:endian",
         "//absl/base:prefetch",
         "//absl/base:raw_logging_internal",
@@ -92,11 +77,11 @@
     linkopts = ABSL_DEFAULT_LINKOPTS,
     visibility = ["//visibility:public"],
     deps = [
-        ":cpu_detect",
         ":crc_internal",
         ":non_temporal_memcpy",
         "//absl/base:config",
         "//absl/base:core_headers",
+        "//absl/base:cpu_detect",
         "//absl/base:endian",
         "//absl/base:prefetch",
         "//absl/strings",
diff --git a/absl/crc/CMakeLists.txt b/absl/crc/CMakeLists.txt
index 034d0d0..28f0bab 100644
--- a/absl/crc/CMakeLists.txt
+++ b/absl/crc/CMakeLists.txt
@@ -15,21 +15,6 @@
 # Internal-only target, do not depend on directly.
 absl_cc_library(
   NAME
-    crc_cpu_detect
-  HDRS
-    "internal/cpu_detect.h"
-  SRCS
-    "internal/cpu_detect.cc"
-  COPTS
-    ${ABSL_DEFAULT_COPTS}
-  DEPS
-    absl::base
-    absl::config
-)
-
-# Internal-only target, do not depend on directly.
-absl_cc_library(
-  NAME
     crc_internal
   HDRS
     "internal/crc.h"
@@ -41,7 +26,7 @@
   COPTS
     ${ABSL_DEFAULT_COPTS}
   DEPS
-    absl::crc_cpu_detect
+    absl::base_cpu_detect
     absl::bits
     absl::config
     absl::core_headers
@@ -67,7 +52,7 @@
   COPTS
     ${ABSL_DEFAULT_COPTS}
   DEPS
-    absl::crc_cpu_detect
+    absl::base_cpu_detect
     absl::crc_internal
     absl::non_temporal_memcpy
     absl::config
diff --git a/absl/crc/internal/crc_memcpy_x86_arm_combined.cc b/absl/crc/internal/crc_memcpy_x86_arm_combined.cc
index 247b3aa..fd3ce60 100644
--- a/absl/crc/internal/crc_memcpy_x86_arm_combined.cc
+++ b/absl/crc/internal/crc_memcpy_x86_arm_combined.cc
@@ -54,10 +54,10 @@
 
 #include "absl/base/attributes.h"
 #include "absl/base/config.h"
+#include "absl/base/internal/cpu_detect.h"
 #include "absl/base/optimization.h"
 #include "absl/base/prefetch.h"
 #include "absl/crc/crc32c.h"
-#include "absl/crc/internal/cpu_detect.h"
 #include "absl/crc/internal/crc32_x86_arm_combined_simd.h"
 #include "absl/crc/internal/crc_memcpy.h"
 #include "absl/strings/string_view.h"
@@ -69,6 +69,9 @@
 ABSL_NAMESPACE_BEGIN
 namespace crc_internal {
 
+using ::absl::base_internal::CpuType;
+using ::absl::base_internal::GetCpuType;
+
 namespace {
 
 inline crc32c_t ShortCrcCopy(char* dst, const char* src, std::size_t length,
@@ -427,6 +430,7 @@
     case CpuType::kArmNeoverseN2:
     case CpuType::kArmNeoverseV1:
     case CpuType::kArmNeoverseV2:
+    case CpuType::kNvidiaGrace:
       return {
           /*.temporal=*/new AcceleratedCrcMemcpyEngine<3, 0>(),
           /*.non_temporal=*/new CrcNonTemporalMemcpyEngine(),
diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc
index 8140378..e44a009 100644
--- a/absl/crc/internal/crc_x86_arm_combined.cc
+++ b/absl/crc/internal/crc_x86_arm_combined.cc
@@ -21,9 +21,9 @@
 
 #include "absl/base/attributes.h"
 #include "absl/base/config.h"
+#include "absl/base/internal/cpu_detect.h"
 #include "absl/base/internal/endian.h"
 #include "absl/base/prefetch.h"
-#include "absl/crc/internal/cpu_detect.h"
 #include "absl/crc/internal/crc32_x86_arm_combined_simd.h"
 #include "absl/crc/internal/crc_internal.h"
 #include "absl/memory/memory.h"
@@ -38,6 +38,10 @@
 ABSL_NAMESPACE_BEGIN
 namespace crc_internal {
 
+using ::absl::base_internal::CpuType;
+using ::absl::base_internal::GetCpuType;
+using ::absl::base_internal::SupportsArmCRC32PMULL;
+
 #if defined(ABSL_INTERNAL_CAN_USE_SIMD_CRC32C)
 
 // Implementation details not exported outside of file
@@ -772,7 +776,7 @@
     case CpuType::kIntelIcelake:
     case CpuType::kIntelSapphirerapids:
     case CpuType::kIntelEmeraldrapids:
-    case CpuType::kIntelGraniterapidsap:
+    case CpuType::kIntelGraniterapids:
       return new CRC32AcceleratedX86ARMCombinedMultipleStreams<
           3, 2, 0, CutoffStrategy::Fold3>();
     // PCLMULQDQ is slow, don't use it.
@@ -785,6 +789,7 @@
     case CpuType::kArmNeoverseN2:
     case CpuType::kArmNeoverseV1:
     case CpuType::kArmNeoverseN3:
+    case CpuType::kNvidiaGrace:
       return new CRC32AcceleratedX86ARMCombinedMultipleStreams<
           1, 1, 0, CutoffStrategy::Unroll64CRC>();
     case CpuType::kAmpereSiryn: