add functionality to get num of cores using x2APIC ID
diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h
index bcfeb34..38fae73 100644
--- a/xbyak/xbyak.h
+++ b/xbyak/xbyak.h
@@ -186,7 +186,8 @@
ERR_INVALID_ZERO,
ERR_INVALID_RIP_IN_AUTO_GROW,
ERR_INVALID_MIB_ADDRESS,
- ERR_INTERNAL
+ ERR_INTERNAL,
+ ERR_x2APIC_NOT_SUPPORTED_CANT_GET_NCORES
};
class Error : public std::exception {
@@ -248,6 +249,7 @@
"invalid rip in AutoGrow",
"invalid mib address",
"internal error",
+ "cannot determine num of cores because x2APIC not supported"
};
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
return errTbl[err_];
diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h
index 0154450..d5d9346 100644
--- a/xbyak/xbyak_util.h
+++ b/xbyak/xbyak_util.h
@@ -50,6 +50,11 @@
namespace Xbyak { namespace util {
+typedef enum intel_cpu_topology_level {
+ smt_level = 1,
+ core_level = 2
+}intel_cpu_topology_level_t;
+
/**
CPU detection class
*/
@@ -88,6 +93,39 @@
{
return (val >> base) & ((1u << (end - base)) - 1);
}
+ void setNumCores()
+ {
+ if ((type_ & tINTEL) == 0) return;
+
+ unsigned int data[4];
+
+ /* CAUTION: These numbers are configuration as shipped by Intel. */
+ getCpuidEx(0x0, 0, data);
+ if (data[0] >= 0xB) {
+ /*
+ if leaf 11 exists(x2APIC is supported),
+ we use it to get the number of smt cores and cores on socket
+
+ leaf 0xB can be zeroed-out by a hypervisor
+ */
+ x2APIC_supported = true;
+ for (size_t i = 0; i < maxTopologyLevels; i++) {
+ getCpuidEx(0xB, i, data);
+ intel_cpu_topology_level_t level_type =
+ (intel_cpu_topology_level_t)extractBit(data[2], 8, 15);
+ if (level_type == smt_level || level_type == core_level)
+ n_cores[level_type - 1] = extractBit(data[1], 0, 15);
+ }
+ if (n_cores[smt_level - 1] != 0)
+ n_cores[core_level - 1] /= n_cores[smt_level - 1];
+ } else {
+ /* Failed to deremine num of cores without x2APIC support.
+ TODO: USE initial APIC ID to determine ncores. */
+ n_cores[smt_level - 1] = 0;
+ n_cores[core_level - 1] = 0;
+ }
+
+ }
void setCacheHierarchy()
{
if ((type_ & tINTEL) == 0) return;
@@ -96,21 +134,12 @@
// const unsigned int INSTRUCTION_CACHE = 2;
const unsigned int UNIFIED_CACHE = 3;
unsigned int smt_width = 0;
- unsigned int n_cores = 0;
+ unsigned int logical_cores = 0;
unsigned int data[4];
- /*
- if leaf 11 exists, we use it to get the number of smt cores and cores on socket
- If x2APIC is supported, these are the only correct numbers.
-
- leaf 0xB can be zeroed-out by a hypervisor
- */
- getCpuidEx(0x0, 0, data);
- if (data[0] >= 0xB) {
- getCpuidEx(0xB, 0, data); // CPUID for SMT Level
- smt_width = data[1] & 0x7FFF;
- getCpuidEx(0xB, 1, data); // CPUID for CORE Level
- n_cores = data[1] & 0x7FFF;
+ if (x2APIC_supported) {
+ smt_width = n_cores[0];
+ logical_cores = n_cores[1];
}
/*
@@ -127,24 +156,27 @@
unsigned int cacheType = extractBit(data[0], 0, 4);
if (cacheType == NO_CACHE) break;
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
- unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1;
- if (n_cores != 0) { // true only if leaf 0xB is supported and valid
- nb_logical_cores = (std::min)(nb_logical_cores, n_cores);
- }
- assert(nb_logical_cores != 0);
+ unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1;
+ if (logical_cores != 0) // true only if leaf 0xB is supported and valid
+ actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
+ assert(actual_logical_cores != 0);
data_cache_size[data_cache_levels] =
(extractBit(data[1], 22, 31) + 1)
* (extractBit(data[1], 12, 21) + 1)
* (extractBit(data[1], 0, 11) + 1)
* (data[2] + 1);
- if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
+ if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
assert(smt_width != 0);
- cores_sharing_data_cache[data_cache_levels] = (std::max)(nb_logical_cores / smt_width, 1u);
+ cores_sharing_data_cache[data_cache_levels] = (std::max)(actual_logical_cores / smt_width, 1u);
data_cache_levels++;
}
}
}
+ //system topology
+ bool x2APIC_supported;
+ static const unsigned int maxTopologyLevels = 2;
+ unsigned int n_cores[maxTopologyLevels];
public:
int model;
int family;
@@ -160,6 +192,14 @@
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
unsigned int data_cache_levels;
+
+ unsigned int getNumCores(intel_cpu_topology_level_t topology_level) {
+ if (topology_level != smt_level
+ && topology_level != core_level) throw Error(ERR_BAD_PARAMETER);
+ if (!x2APIC_supported) throw Error(ERR_x2APIC_NOT_SUPPORTED_CANT_GET_NCORES);
+ return n_cores[topology_level - 1];
+ }
+
unsigned int getDataCacheLevels() const { return data_cache_levels; }
unsigned int getCoresSharingDataCache(unsigned int i) const
{
@@ -271,6 +311,7 @@
Cpu()
: type_(NONE)
+ , x2APIC_supported(false)
, data_cache_levels(0)
{
unsigned int data[4];
@@ -363,6 +404,7 @@
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
}
setFamily();
+ setNumCores();
setCacheHierarchy();
}
void putFamily() const