Relocate and enhance CPU microarchitecture detection in Abseil - Move cpu_detect to absl::base_internal, adding the kNvidiaGrace and kIntelGraniterapids enums. - On aarch64 Linux, implement custom ISAR0.RNDR checking logic to distinguish kArmNeoverseV2 from kNvidiaGrace. - Add IsSMTEnabled and NumContextsPerCPU PiperOrigin-RevId: 922862384 Change-Id: I038b7732714400d6c63ff309bf20af3f436afcf5
diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake index 66a1747..b603864 100644 --- a/CMake/AbseilDll.cmake +++ b/CMake/AbseilDll.cmake
@@ -13,6 +13,8 @@ "base/dynamic_annotations.h" "base/fast_type_id.h" "base/internal/atomic_hook.h" + "base/internal/cpu_detect.cc" + "base/internal/cpu_detect.h" "base/internal/cycleclock.cc" "base/internal/cycleclock.h" "base/internal/cycleclock_config.h" @@ -105,8 +107,6 @@ "container/node_hash_set.h" "crc/crc32c.cc" "crc/crc32c.h" - "crc/internal/cpu_detect.cc" - "crc/internal/cpu_detect.h" "crc/internal/crc.cc" "crc/internal/crc.h" "crc/internal/crc32_x86_arm_combined_simd.h"
diff --git a/absl/base/BUILD.bazel b/absl/base/BUILD.bazel index 867f27d..abbf099 100644 --- a/absl/base/BUILD.bazel +++ b/absl/base/BUILD.bazel
@@ -62,6 +62,23 @@ ) cc_library( + name = "cpu_detect", + srcs = [ + "internal/cpu_detect.cc", + ], + hdrs = ["internal/cpu_detect.h"], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + visibility = [ + "//absl:__subpackages__", + "//absl:friends", + ], + deps = [ + ":config", + ], +) + +cc_library( name = "hardening", hdrs = [ "internal/hardening.h",
diff --git a/absl/base/CMakeLists.txt b/absl/base/CMakeLists.txt index 9608061..f3875b0 100644 --- a/absl/base/CMakeLists.txt +++ b/absl/base/CMakeLists.txt
@@ -32,6 +32,21 @@ # Internal-only target, do not depend on directly. absl_cc_library( NAME + base_cpu_detect + HDRS + "internal/cpu_detect.h" + SRCS + "internal/cpu_detect.cc" + DEPS + absl::base + absl::config + COPTS + ${ABSL_DEFAULT_COPTS} +) + +# Internal-only target, do not depend on directly. +absl_cc_library( + NAME errno_saver HDRS "internal/errno_saver.h"
diff --git a/absl/crc/internal/cpu_detect.cc b/absl/base/internal/cpu_detect.cc similarity index 82% rename from absl/crc/internal/cpu_detect.cc rename to absl/base/internal/cpu_detect.cc index 86f55d0..c08637c 100644 --- a/absl/crc/internal/cpu_detect.cc +++ b/absl/base/internal/cpu_detect.cc
@@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "absl/crc/internal/cpu_detect.h" +#include "absl/base/internal/cpu_detect.h" #include <cstdint> #include <optional> // IWYU pragma: keep @@ -42,6 +42,7 @@ // MSVC-equivalent __cpuid intrinsic declaration for clang-like compilers // for non-Windows build environments. extern void __cpuid(int[4], int); +extern void __cpuidex(int[4], int, int); #elif !defined(_WIN32) && !defined(_WIN64) // MSVC defines this function for us. // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex @@ -51,12 +52,18 @@ "=d"(cpu_info[3]) : "a"(info_type), "c"(0)); } +static void __cpuidex(int cpu_info[4], int info_type, int ecx) { + __asm__ volatile("cpuid \n\t" + : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), + "=d"(cpu_info[3]) + : "a"(info_type), "c"(ecx)); +} #endif // !defined(_WIN32) && !defined(_WIN64) #endif // defined(__x86_64__) || defined(_M_X64) namespace absl { ABSL_NAMESPACE_BEGIN -namespace crc_internal { +namespace base_internal { #if defined(__x86_64__) || defined(_M_X64) @@ -137,7 +144,7 @@ case 0x4f: // Broadwell case 0x56: // BroadwellDE return CpuType::kIntelBroadwell; - case 0x55: // Skylake Xeon + case 0x55: // Skylake Xeon if ((cpu_info[0] & 0x0f) < 5) { // stepping < 5 is skylake return CpuType::kIntelSkylakeXeon; } else { // stepping >= 5 is cascadelake @@ -152,7 +159,7 @@ case 0xcf: // Emerald Rapids return CpuType::kIntelEmeraldrapids; case 0xad: // Granite Rapids - return CpuType::kIntelGraniterapidsap; + return CpuType::kIntelGraniterapids; default: return CpuType::kUnknown; } @@ -271,18 +278,30 @@ switch (implementer) { case 0x41: switch (part_number) { - case 0xd0c: return CpuType::kArmNeoverseN1; - case 0xd40: return CpuType::kArmNeoverseV1; - case 0xd49: return CpuType::kArmNeoverseN2; - case 0xd4f: return CpuType::kArmNeoverseV2; - case 0xd8e: return CpuType::kArmNeoverseN3; + case 0xd0c: + return CpuType::kArmNeoverseN1; + case 0xd40: + return CpuType::kArmNeoverseV1; + case 0xd49: + return CpuType::kArmNeoverseN2; + case 0xd4f: { + uint64_t isar0 = 0; + ABSL_INTERNAL_AARCH64_ID_REG_READ(ID_AA64ISAR0_EL1, isar0); + if (((isar0 >> 60) & 0xf) == 0x0) { + return CpuType::kNvidiaGrace; + } + return CpuType::kArmNeoverseV2; + } + case 0xd8e: + return CpuType::kArmNeoverseN3; default: return CpuType::kUnknown; } break; case 0xc0: switch (part_number) { - case 0xac3: return CpuType::kAmpereSiryn; + case 0xac3: + return CpuType::kAmpereSiryn; default: return CpuType::kUnknown; } @@ -354,6 +373,47 @@ #endif -} // namespace crc_internal +// Returns how many hardware contexts per CPU exist. Note: AMD CPUs prior to Zen +// 2 (Rome, 2019) do not support CPUID leaf 0xb. We intentionally avoid falling +// back to leaf 1 ebx[23:16] because it reports total logical processors per +// package (not threads per core), which risks false positives on older +// multi-core non-SMT chips. Pre-Zen 2 AMD safely defaults to 1. +int NumContextsPerCPU() { +#if defined(__x86_64__) || defined(_M_X64) + int info[4]; + __cpuid(info, 0); + if (info[0] < 0xb) { + return 1; + } + + __cpuid(info, 1); + bool has_ht = (info[3] & (1 << 28)) != 0; + if (!has_ht) { + return 1; + } + + for (int sub_leaf = 0; sub_leaf < 4; ++sub_leaf) { + __cpuidex(info, 0xb, sub_leaf); + int level_type = (info[2] >> 8) & 0xff; + if (level_type == 0) { + break; + } + if (level_type == 1) { + int num_threads = info[1] & 0x0ffff; + if (num_threads >= 1) { + return num_threads; + } + } + } + + return 1; +#else + return 1; +#endif +} + +bool IsSMTEnabled() { return NumContextsPerCPU() > 1; } + +} // namespace base_internal ABSL_NAMESPACE_END } // namespace absl
diff --git a/absl/crc/internal/cpu_detect.h b/absl/base/internal/cpu_detect.h similarity index 80% rename from absl/crc/internal/cpu_detect.h rename to absl/base/internal/cpu_detect.h index e76a802..5ea76ec 100644 --- a/absl/crc/internal/cpu_detect.h +++ b/absl/base/internal/cpu_detect.h
@@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef ABSL_CRC_INTERNAL_CPU_DETECT_H_ -#define ABSL_CRC_INTERNAL_CPU_DETECT_H_ +#ifndef ABSL_BASE_INTERNAL_CPU_DETECT_H_ +#define ABSL_BASE_INTERNAL_CPU_DETECT_H_ #include "absl/base/config.h" namespace absl { ABSL_NAMESPACE_BEGIN -namespace crc_internal { +namespace base_internal { // Enumeration of architectures that we have special-case tuning parameters for. // This set may change over time. @@ -38,7 +38,7 @@ kIntelIcelake, kIntelSapphirerapids, kIntelEmeraldrapids, - kIntelGraniterapidsap, + kIntelGraniterapids, kIntelSkylake, kIntelIvybridge, kIntelSandybridge, @@ -49,6 +49,7 @@ kArmNeoverseN2, kArmNeoverseV2, kArmNeoverseN3, + kNvidiaGrace, }; // Returns the type of host CPU this code is running on. Returns kUnknown if @@ -62,8 +63,15 @@ // tuning. bool SupportsArmCRC32PMULL(); -} // namespace crc_internal +// Returns whether the host CPU supports simultaneous multithreading (SMT) and +// if it is enabled. +bool IsSMTEnabled(); + +// Returns how many hardware contexts per CPU exist. +int NumContextsPerCPU(); + +} // namespace base_internal ABSL_NAMESPACE_END } // namespace absl -#endif // ABSL_CRC_INTERNAL_CPU_DETECT_H_ +#endif // ABSL_BASE_INTERNAL_CPU_DETECT_H_
diff --git a/absl/crc/BUILD.bazel b/absl/crc/BUILD.bazel index 49e916c..88b9ea2 100644 --- a/absl/crc/BUILD.bazel +++ b/absl/crc/BUILD.bazel
@@ -34,21 +34,6 @@ licenses(["notice"]) cc_library( - name = "cpu_detect", - srcs = [ - "internal/cpu_detect.cc", - ], - hdrs = ["internal/cpu_detect.h"], - copts = ABSL_DEFAULT_COPTS, - linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = ["//visibility:private"], - deps = [ - "//absl/base", - "//absl/base:config", - ], -) - -cc_library( name = "crc_internal", srcs = [ "internal/crc.cc", @@ -63,9 +48,9 @@ linkopts = ABSL_DEFAULT_LINKOPTS, visibility = ["//visibility:private"], deps = [ - ":cpu_detect", "//absl/base:config", "//absl/base:core_headers", + "//absl/base:cpu_detect", "//absl/base:endian", "//absl/base:prefetch", "//absl/base:raw_logging_internal", @@ -92,11 +77,11 @@ linkopts = ABSL_DEFAULT_LINKOPTS, visibility = ["//visibility:public"], deps = [ - ":cpu_detect", ":crc_internal", ":non_temporal_memcpy", "//absl/base:config", "//absl/base:core_headers", + "//absl/base:cpu_detect", "//absl/base:endian", "//absl/base:prefetch", "//absl/strings",
diff --git a/absl/crc/CMakeLists.txt b/absl/crc/CMakeLists.txt index 034d0d0..28f0bab 100644 --- a/absl/crc/CMakeLists.txt +++ b/absl/crc/CMakeLists.txt
@@ -15,21 +15,6 @@ # Internal-only target, do not depend on directly. absl_cc_library( NAME - crc_cpu_detect - HDRS - "internal/cpu_detect.h" - SRCS - "internal/cpu_detect.cc" - COPTS - ${ABSL_DEFAULT_COPTS} - DEPS - absl::base - absl::config -) - -# Internal-only target, do not depend on directly. -absl_cc_library( - NAME crc_internal HDRS "internal/crc.h" @@ -41,7 +26,7 @@ COPTS ${ABSL_DEFAULT_COPTS} DEPS - absl::crc_cpu_detect + absl::base_cpu_detect absl::bits absl::config absl::core_headers @@ -67,7 +52,7 @@ COPTS ${ABSL_DEFAULT_COPTS} DEPS - absl::crc_cpu_detect + absl::base_cpu_detect absl::crc_internal absl::non_temporal_memcpy absl::config
diff --git a/absl/crc/internal/crc_memcpy_x86_arm_combined.cc b/absl/crc/internal/crc_memcpy_x86_arm_combined.cc index 247b3aa..fd3ce60 100644 --- a/absl/crc/internal/crc_memcpy_x86_arm_combined.cc +++ b/absl/crc/internal/crc_memcpy_x86_arm_combined.cc
@@ -54,10 +54,10 @@ #include "absl/base/attributes.h" #include "absl/base/config.h" +#include "absl/base/internal/cpu_detect.h" #include "absl/base/optimization.h" #include "absl/base/prefetch.h" #include "absl/crc/crc32c.h" -#include "absl/crc/internal/cpu_detect.h" #include "absl/crc/internal/crc32_x86_arm_combined_simd.h" #include "absl/crc/internal/crc_memcpy.h" #include "absl/strings/string_view.h" @@ -69,6 +69,9 @@ ABSL_NAMESPACE_BEGIN namespace crc_internal { +using ::absl::base_internal::CpuType; +using ::absl::base_internal::GetCpuType; + namespace { inline crc32c_t ShortCrcCopy(char* dst, const char* src, std::size_t length, @@ -427,6 +430,7 @@ case CpuType::kArmNeoverseN2: case CpuType::kArmNeoverseV1: case CpuType::kArmNeoverseV2: + case CpuType::kNvidiaGrace: return { /*.temporal=*/new AcceleratedCrcMemcpyEngine<3, 0>(), /*.non_temporal=*/new CrcNonTemporalMemcpyEngine(),
diff --git a/absl/crc/internal/crc_x86_arm_combined.cc b/absl/crc/internal/crc_x86_arm_combined.cc index 8140378..e44a009 100644 --- a/absl/crc/internal/crc_x86_arm_combined.cc +++ b/absl/crc/internal/crc_x86_arm_combined.cc
@@ -21,9 +21,9 @@ #include "absl/base/attributes.h" #include "absl/base/config.h" +#include "absl/base/internal/cpu_detect.h" #include "absl/base/internal/endian.h" #include "absl/base/prefetch.h" -#include "absl/crc/internal/cpu_detect.h" #include "absl/crc/internal/crc32_x86_arm_combined_simd.h" #include "absl/crc/internal/crc_internal.h" #include "absl/memory/memory.h" @@ -38,6 +38,10 @@ ABSL_NAMESPACE_BEGIN namespace crc_internal { +using ::absl::base_internal::CpuType; +using ::absl::base_internal::GetCpuType; +using ::absl::base_internal::SupportsArmCRC32PMULL; + #if defined(ABSL_INTERNAL_CAN_USE_SIMD_CRC32C) // Implementation details not exported outside of file @@ -772,7 +776,7 @@ case CpuType::kIntelIcelake: case CpuType::kIntelSapphirerapids: case CpuType::kIntelEmeraldrapids: - case CpuType::kIntelGraniterapidsap: + case CpuType::kIntelGraniterapids: return new CRC32AcceleratedX86ARMCombinedMultipleStreams< 3, 2, 0, CutoffStrategy::Fold3>(); // PCLMULQDQ is slow, don't use it. @@ -785,6 +789,7 @@ case CpuType::kArmNeoverseN2: case CpuType::kArmNeoverseV1: case CpuType::kArmNeoverseN3: + case CpuType::kNvidiaGrace: return new CRC32AcceleratedX86ARMCombinedMultipleStreams< 1, 1, 0, CutoffStrategy::Unroll64CRC>(); case CpuType::kAmpereSiryn: