blob: 1376a4e72cd99124b1f8be7c6683425b6a6950ff [file] [edit]
/*
* Test program for Xbyak CPU Cache Topology API
* Demonstrates the CpuTopology, CpuCache, LogicalCpu, and CpuMask classes
*/
#include <stdio.h>
#include <string.h>
#include <map>
#include <vector>
#include "xbyak/xbyak_util.h"
using namespace Xbyak::util;
void printSeparator()
{
printf("========================================\n");
}
void printCpuMaskTest()
{
printSeparator();
printf("CpuMask Class - CPU affinity mask operations\n");
printSeparator();
CpuMask mask;
mask.append(0);
mask.append(2);
mask.append(5);
mask.append(7);
printf("Created CpuMask with CPUs: 0, 2, 5, 7\n");
printf("Iterating through set CPUs:\n");
for (CpuMask::iterator it = mask.begin(); it != mask.end(); ++it) {
printf(" CPU %u is set\n", *it);
}
printf("Total CPUs in mask: %zu\n", mask.size());
printf("\n");
}
void printSystemTopology(const CpuTopology& cpuTopo)
{
printSeparator();
printf("CpuTopology Class - System CPU topology\n");
printSeparator();
printf("System Configuration:\n");
printf(" Logical CPUs: %zu\n", cpuTopo.getLogicalCpuNum());
printf(" Physical Cores: %zu\n", cpuTopo.getPhysicalCoreNum());
printf(" Cache Line Size:%u bytes\n", cpuTopo.getLineSize());
printf(" Hybrid System: %s\n", cpuTopo.isHybrid() ? "Yes (P-cores + E-cores)" : "No");
printf("\n");
}
const char *type2str(int coreType)
{
switch (coreType) {
case Performance: return "P-core";
case Efficient: return "E-core";
case Standard: return "Standard";
default: return "Unknown";
}
}
void printLogicalCpuDetails(const CpuTopology& cpuTopo)
{
printSeparator();
printf("LogicalCpu Class - Per-CPU topology information\n");
printSeparator();
printf("Detailed CPU Topology (showing upto 32 Logical CPUs):\n");
size_t maxCpusToPrint = 32;
size_t numCpus = cpuTopo.getLogicalCpuNum();
for (size_t i = 0; i < numCpus && i < maxCpusToPrint; i++) {
const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(i);
printf(" CPU %3zu: Core=%u Type=%s Siblings=", i, logCpu.coreId, type2str(logCpu.coreType));
logCpu.getSiblings().put();
}
if (numCpus > maxCpusToPrint) {
printf(" ... (%zu more CPUs not shown)\n", numCpus - maxCpusToPrint);
}
printf("\n");
}
void printCacheHierarchy(const CpuTopology& cpuTopo)
{
printSeparator();
printf("CpuCache Class - Cache hierarchy and sharing\n");
printSeparator();
const char* cacheNames[] = {"L1i", "L1d", "L2", "L3"};
// Create a map to group CPUs by their cache topology
// Key: cache topology signature, Value: list of CPUs with that topology
std::map<std::string, std::vector<size_t> > topologyGroups;
// Build cache topology signatures for each CPU
for (size_t cpuIdx = 0; cpuIdx < cpuTopo.getLogicalCpuNum(); cpuIdx++) {
const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(cpuIdx);
// Create a signature string that uniquely identifies the cache topology
char signature[512];
int offset = 0;
offset += snprintf(signature + offset, sizeof(signature) - offset, "%s:", type2str(logCpu.coreType));
// Add cache properties to signature
for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
const CpuCache& cache = cpuTopo.getCache(cpuIdx, (CacheType)cType);
offset += snprintf(signature + offset, sizeof(signature) - offset,
"%d-%u-%u-%zu;",
cType, cache.size, cache.associativity,
cache.getSharedCpuNum());
}
topologyGroups[signature].push_back(cpuIdx);
}
// Print each unique cache topology group
printf("Cache Hierarchy by Topology:\n");
for (std::map<std::string, std::vector<size_t> >::iterator groupIt = topologyGroups.begin();
groupIt != topologyGroups.end(); ++groupIt) {
const std::vector<size_t>& cpuList = groupIt->second;
if (cpuList.empty()) continue;
size_t firstCpu = cpuList[0];
const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(firstCpu);
// Print core type and CPU list
printf("\n%s (CPUs [", type2str(logCpu.coreType));
for (size_t i = 0; i < cpuList.size(); i++) {
if (i > 0) printf(", ");
printf("%zu", cpuList[i]);
// Limit output to ~20 CPUs for readability
if (i >= 19 && cpuList.size() > 20) {
printf(", ...");
break;
}
}
printf("]):\n");
// Print cache details for this topology
for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
const CpuCache& cache = cpuTopo.getCache(firstCpu, (CacheType)cType);
if (cache.size > 0) {
printf(" %s: ", cacheNames[cType]);
if (cache.size >= 1024 * 1024) {
printf("%6.2f MB", cache.size / (1024.0 * 1024.0));
} else if (cache.size >= 1024) {
printf("%6.2f KB", cache.size / 1024.0);
} else {
printf("%6u B ", cache.size);
}
printf(" | %2u-way", cache.associativity);
if (cache.isShared()) {
printf(" | Shared by %zu CPUs", cache.getSharedCpuNum());
}
printf("\n");
}
}
}
printf("\n");
}
void printCacheSharingDetails(const CpuTopology& cpuTopo)
{
printSeparator();
printf("Cache Sharing Analysis\n");
printSeparator();
const char* cacheNames[] = {"L1i", "L1d", "L2", "L3"};
// Create a map to group CPUs by their cache topology (same logic as printCacheHierarchy)
std::map<std::string, std::vector<size_t> > topologyGroups;
// Build cache topology signatures for each CPU
for (size_t cpuIdx = 0; cpuIdx < cpuTopo.getLogicalCpuNum(); cpuIdx++) {
const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(cpuIdx);
// Create a signature string that uniquely identifies the cache topology
char signature[512];
int offset = 0;
offset += snprintf(signature + offset, sizeof(signature) - offset, "%s:", type2str(logCpu.coreType));
// Add cache properties to signature
for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
const CpuCache& cache = cpuTopo.getCache(cpuIdx, (CacheType)cType);
offset += snprintf(signature + offset, sizeof(signature) - offset,
"%d-%u-%u-%zu;",
cType, cache.size, cache.associativity, cache.getSharedCpuNum());
}
topologyGroups[signature].push_back(cpuIdx);
}
// Print cache sharing analysis for each unique topology
for (std::map<std::string, std::vector<size_t> >::iterator groupIt = topologyGroups.begin();
groupIt != topologyGroups.end(); ++groupIt) {
const std::vector<size_t>& cpuList = groupIt->second;
if (cpuList.empty()) continue;
size_t firstCpu = cpuList[0];
const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(firstCpu);
printf("%s Topology (representative CPU %zu):\n", type2str(logCpu.coreType), firstCpu);
// Analyze each cache level
for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
const CpuCache& cache = cpuTopo.getCache(firstCpu, (CacheType)cType);
if (cache.size > 0) {
printf(" %s Cache:\n", cacheNames[cType]);
printf(" Size: ");
if (cache.size >= 1024 * 1024) {
printf("%.2f MB\n", cache.size / (1024.0 * 1024.0));
} else if (cache.size >= 1024) {
printf("%.2f KB\n", cache.size / 1024.0);
} else {
printf("%u B\n", cache.size);
}
if (cache.isShared()) {
printf(" Shared by %zu CPUs: ", cache.getSharedCpuNum());
int count = 0;
for (CpuMask::const_iterator i = cache.sharedCpuIndices.begin(), ie = cache.sharedCpuIndices.end(); i != ie; ++i) {
if (count > 0) printf(",");
printf("%u", *i);
count++;
if (count > 20) {
printf("...");
break;
}
}
printf("\n");
} else {
printf(" Private (not shared)\n");
}
}
}
printf("\n");
}
}
int main()
try
{
printf("\n");
printf("Xbyak CPU Cache Topology API Test\n");
printf("==================================\n");
printf("\n");
Cpu cpu;
CpuTopology cpuTopo(cpu);
printCpuMaskTest();
printSystemTopology(cpuTopo);
printLogicalCpuDetails(cpuTopo);
printCacheHierarchy(cpuTopo);
printCacheSharingDetails(cpuTopo);
printSeparator();
printf("All tests completed successfully!\n");
printSeparator();
printf("\n");
} catch (std::exception& e) {
printf("Error: %s\n", e.what());
return 1;
}