sample/cputopology.cpp - third_party/xbyak - Git at Google

 /*
  * Test program for Xbyak CPU Cache Topology API
  * Demonstrates the CpuTopology, CpuCache, LogicalCpu, and CpuMask classes
  */

 #include <stdio.h>
 #include <string.h>
 #include <map>
 #include <vector>
 #include "xbyak/xbyak_util.h"

 using namespace Xbyak::util;

 void printSeparator()
 {
 	printf("========================================\n");
 }

 void printCpuMaskTest()
 {
 	printSeparator();
 	printf("CpuMask Class - CPU affinity mask operations\n");
 	printSeparator();

 	CpuMask mask;
 	mask.append(0);
 	mask.append(2);
 	mask.append(5);
 	mask.append(7);

 	printf("Created CpuMask with CPUs: 0, 2, 5, 7\n");
 	printf("Iterating through set CPUs:\n");
 	for (CpuMask::iterator it = mask.begin(); it != mask.end(); ++it) {
 		printf("  CPU %u is set\n", *it);
 	}
 	printf("Total CPUs in mask: %zu\n", mask.size());
 	printf("\n");
 }

 void printSystemTopology(const CpuTopology& cpuTopo)
 {
 	printSeparator();
 	printf("CpuTopology Class - System CPU topology\n");
 	printSeparator();

 	printf("System Configuration:\n");
 	printf("  Logical CPUs:   %zu\n", cpuTopo.getLogicalCpuNum());
 	printf("  Physical Cores: %zu\n", cpuTopo.getPhysicalCoreNum());
 	printf("  Cache Line Size:%u bytes\n", cpuTopo.getLineSize());
 	printf("  Hybrid System:  %s\n", cpuTopo.isHybrid() ? "Yes (P-cores + E-cores)" : "No");
 	printf("\n");
 }

 const char *type2str(int coreType)
 {
 	switch (coreType) {
 	case Performance: return "P-core";
 	case Efficient: return "E-core";
 	case Standard: return "Standard";
 	default: return "Unknown";
 	}
 }

 void printLogicalCpuDetails(const CpuTopology& cpuTopo)
 {
 	printSeparator();
 	printf("LogicalCpu Class - Per-CPU topology information\n");
 	printSeparator();

 	printf("Detailed CPU Topology (showing upto 32 Logical CPUs):\n");
 	size_t maxCpusToPrint = 32;
 	size_t numCpus = cpuTopo.getLogicalCpuNum();

 	for (size_t i = 0; i < numCpus && i < maxCpusToPrint; i++) {
 		const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(i);
 		printf("  CPU %3zu: Core=%u Type=%s Siblings=", i, logCpu.coreId, type2str(logCpu.coreType));
 		logCpu.getSiblings().put();
 	}

 	if (numCpus > maxCpusToPrint) {
 		printf("  ... (%zu more CPUs not shown)\n", numCpus - maxCpusToPrint);
 	}
 	printf("\n");
 }

 void printCacheHierarchy(const CpuTopology& cpuTopo)
 {
 	printSeparator();
 	printf("CpuCache Class - Cache hierarchy and sharing\n");
 	printSeparator();

 	const char* cacheNames[] = {"L1i", "L1d", "L2", "L3"};

 	// Create a map to group CPUs by their cache topology
 	// Key: cache topology signature, Value: list of CPUs with that topology
 	std::map<std::string, std::vector<size_t> > topologyGroups;

 	// Build cache topology signatures for each CPU
 	for (size_t cpuIdx = 0; cpuIdx < cpuTopo.getLogicalCpuNum(); cpuIdx++) {
 		const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(cpuIdx);

 		// Create a signature string that uniquely identifies the cache topology
 		char signature[512];
 		int offset = 0;
 		offset += snprintf(signature + offset, sizeof(signature) - offset, "%s:", type2str(logCpu.coreType));

 		// Add cache properties to signature
 		for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
 			const CpuCache& cache = cpuTopo.getCache(cpuIdx, (CacheType)cType);
 			offset += snprintf(signature + offset, sizeof(signature) - offset,
 				"%d-%u-%u-%zu;",
 				cType, cache.size, cache.associativity,
 				cache.getSharedCpuNum());
 		}

 		topologyGroups[signature].push_back(cpuIdx);
 	}

 	// Print each unique cache topology group
 	printf("Cache Hierarchy by Topology:\n");

 	for (std::map<std::string, std::vector<size_t> >::iterator groupIt = topologyGroups.begin();
 	     groupIt != topologyGroups.end(); ++groupIt) {

 		const std::vector<size_t>& cpuList = groupIt->second;
 		if (cpuList.empty()) continue;

 		size_t firstCpu = cpuList[0];
 		const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(firstCpu);

 		// Print core type and CPU list
 		printf("\n%s (CPUs [", type2str(logCpu.coreType));
 		for (size_t i = 0; i < cpuList.size(); i++) {
 			if (i > 0) printf(", ");
 			printf("%zu", cpuList[i]);
 			// Limit output to ~20 CPUs for readability
 			if (i >= 19 && cpuList.size() > 20) {
 				printf(", ...");
 				break;
 			}
 		}
 		printf("]):\n");

 		// Print cache details for this topology
 		for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
 			const CpuCache& cache = cpuTopo.getCache(firstCpu, (CacheType)cType);
 			if (cache.size > 0) {
 				printf("  %s: ", cacheNames[cType]);
 				if (cache.size >= 1024 * 1024) {
 					printf("%6.2f MB", cache.size / (1024.0 * 1024.0));
 				} else if (cache.size >= 1024) {
 					printf("%6.2f KB", cache.size / 1024.0);
 				} else {
 					printf("%6u B ", cache.size);
 				}
 				printf(" | %2u-way", cache.associativity);
 				if (cache.isShared()) {
 					printf(" | Shared by %zu CPUs", cache.getSharedCpuNum());
 				}
 				printf("\n");
 			}
 		}
 	}
 	printf("\n");
 }

 void printCacheSharingDetails(const CpuTopology& cpuTopo)
 {
 	printSeparator();
 	printf("Cache Sharing Analysis\n");
 	printSeparator();

 	const char* cacheNames[] = {"L1i", "L1d", "L2", "L3"};

 	// Create a map to group CPUs by their cache topology (same logic as printCacheHierarchy)
 	std::map<std::string, std::vector<size_t> > topologyGroups;

 	// Build cache topology signatures for each CPU
 	for (size_t cpuIdx = 0; cpuIdx < cpuTopo.getLogicalCpuNum(); cpuIdx++) {
 		const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(cpuIdx);

 		// Create a signature string that uniquely identifies the cache topology
 		char signature[512];
 		int offset = 0;
 		offset += snprintf(signature + offset, sizeof(signature) - offset, "%s:", type2str(logCpu.coreType));

 		// Add cache properties to signature
 		for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
 			const CpuCache& cache = cpuTopo.getCache(cpuIdx, (CacheType)cType);
 			offset += snprintf(signature + offset, sizeof(signature) - offset,
 				"%d-%u-%u-%zu;",
 				cType, cache.size, cache.associativity, cache.getSharedCpuNum());
 		}

 		topologyGroups[signature].push_back(cpuIdx);
 	}

 	// Print cache sharing analysis for each unique topology
 	for (std::map<std::string, std::vector<size_t> >::iterator groupIt = topologyGroups.begin();
 	     groupIt != topologyGroups.end(); ++groupIt) {

 		const std::vector<size_t>& cpuList = groupIt->second;
 		if (cpuList.empty()) continue;

 		size_t firstCpu = cpuList[0];
 		const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(firstCpu);

 		printf("%s Topology (representative CPU %zu):\n", type2str(logCpu.coreType), firstCpu);

 		// Analyze each cache level
 		for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
 			const CpuCache& cache = cpuTopo.getCache(firstCpu, (CacheType)cType);
 			if (cache.size > 0) {
 				printf("  %s Cache:\n", cacheNames[cType]);
 				printf("    Size: ");
 				if (cache.size >= 1024 * 1024) {
 					printf("%.2f MB\n", cache.size / (1024.0 * 1024.0));
 				} else if (cache.size >= 1024) {
 					printf("%.2f KB\n", cache.size / 1024.0);
 				} else {
 					printf("%u B\n", cache.size);
 				}

 				if (cache.isShared()) {
 					printf("    Shared by %zu CPUs: ", cache.getSharedCpuNum());
 					int count = 0;
 					for (CpuMask::const_iterator i = cache.sharedCpuIndices.begin(), ie = cache.sharedCpuIndices.end(); i != ie; ++i) {
 						if (count > 0) printf(",");
 						printf("%u", *i);
 						count++;
 						if (count > 20) {
 							printf("...");
 							break;
 						}
 					}
 					printf("\n");
 				} else {
 					printf("    Private (not shared)\n");
 				}
 			}
 		}
 		printf("\n");
 	}
 }

 int main()
 	try
 {
 	printf("\n");
 	printf("Xbyak CPU Cache Topology API Test\n");
 	printf("==================================\n");
 	printf("\n");

 	Cpu cpu;
 	CpuTopology cpuTopo(cpu);

 	printCpuMaskTest();
 	printSystemTopology(cpuTopo);
 	printLogicalCpuDetails(cpuTopo);
 	printCacheHierarchy(cpuTopo);
 	printCacheSharingDetails(cpuTopo);

 	printSeparator();
 	printf("All tests completed successfully!\n");
 	printSeparator();
 	printf("\n");
 } catch (std::exception& e) {
 	printf("Error: %s\n", e.what());
 	return 1;
 }
	/*
	* Test program for Xbyak CPU Cache Topology API
	* Demonstrates the CpuTopology, CpuCache, LogicalCpu, and CpuMask classes
	*/

	#include <stdio.h>
	#include <string.h>
	#include <map>
	#include <vector>
	#include "xbyak/xbyak_util.h"

	using namespace Xbyak::util;

	void printSeparator()
	{
	printf("========================================\n");
	}

	void printCpuMaskTest()
	{
	printSeparator();
	printf("CpuMask Class - CPU affinity mask operations\n");
	printSeparator();

	CpuMask mask;
	mask.append(0);
	mask.append(2);
	mask.append(5);
	mask.append(7);

	printf("Created CpuMask with CPUs: 0, 2, 5, 7\n");
	printf("Iterating through set CPUs:\n");
	for (CpuMask::iterator it = mask.begin(); it != mask.end(); ++it) {
	printf(" CPU %u is set\n", *it);
	}
	printf("Total CPUs in mask: %zu\n", mask.size());
	printf("\n");
	}

	void printSystemTopology(const CpuTopology& cpuTopo)
	{
	printSeparator();
	printf("CpuTopology Class - System CPU topology\n");
	printSeparator();

	printf("System Configuration:\n");
	printf(" Logical CPUs: %zu\n", cpuTopo.getLogicalCpuNum());
	printf(" Physical Cores: %zu\n", cpuTopo.getPhysicalCoreNum());
	printf(" Cache Line Size:%u bytes\n", cpuTopo.getLineSize());
	printf(" Hybrid System: %s\n", cpuTopo.isHybrid() ? "Yes (P-cores + E-cores)" : "No");
	printf("\n");
	}

	const char *type2str(int coreType)
	{
	switch (coreType) {
	case Performance: return "P-core";
	case Efficient: return "E-core";
	case Standard: return "Standard";
	default: return "Unknown";
	}
	}

	void printLogicalCpuDetails(const CpuTopology& cpuTopo)
	{
	printSeparator();
	printf("LogicalCpu Class - Per-CPU topology information\n");
	printSeparator();

	printf("Detailed CPU Topology (showing upto 32 Logical CPUs):\n");
	size_t maxCpusToPrint = 32;
	size_t numCpus = cpuTopo.getLogicalCpuNum();

	for (size_t i = 0; i < numCpus && i < maxCpusToPrint; i++) {
	const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(i);
	printf(" CPU %3zu: Core=%u Type=%s Siblings=", i, logCpu.coreId, type2str(logCpu.coreType));
	logCpu.getSiblings().put();
	}

	if (numCpus > maxCpusToPrint) {
	printf(" ... (%zu more CPUs not shown)\n", numCpus - maxCpusToPrint);
	}
	printf("\n");
	}

	void printCacheHierarchy(const CpuTopology& cpuTopo)
	{
	printSeparator();
	printf("CpuCache Class - Cache hierarchy and sharing\n");
	printSeparator();

	const char* cacheNames[] = {"L1i", "L1d", "L2", "L3"};

	// Create a map to group CPUs by their cache topology
	// Key: cache topology signature, Value: list of CPUs with that topology
	std::map<std::string, std::vector<size_t> > topologyGroups;

	// Build cache topology signatures for each CPU
	for (size_t cpuIdx = 0; cpuIdx < cpuTopo.getLogicalCpuNum(); cpuIdx++) {
	const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(cpuIdx);

	// Create a signature string that uniquely identifies the cache topology
	char signature[512];
	int offset = 0;
	offset += snprintf(signature + offset, sizeof(signature) - offset, "%s:", type2str(logCpu.coreType));

	// Add cache properties to signature
	for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
	const CpuCache& cache = cpuTopo.getCache(cpuIdx, (CacheType)cType);
	offset += snprintf(signature + offset, sizeof(signature) - offset,
	"%d-%u-%u-%zu;",
	cType, cache.size, cache.associativity,
	cache.getSharedCpuNum());
	}

	topologyGroups[signature].push_back(cpuIdx);
	}

	// Print each unique cache topology group
	printf("Cache Hierarchy by Topology:\n");

	for (std::map<std::string, std::vector<size_t> >::iterator groupIt = topologyGroups.begin();
	groupIt != topologyGroups.end(); ++groupIt) {

	const std::vector<size_t>& cpuList = groupIt->second;
	if (cpuList.empty()) continue;

	size_t firstCpu = cpuList[0];
	const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(firstCpu);

	// Print core type and CPU list
	printf("\n%s (CPUs [", type2str(logCpu.coreType));
	for (size_t i = 0; i < cpuList.size(); i++) {
	if (i > 0) printf(", ");
	printf("%zu", cpuList[i]);
	// Limit output to ~20 CPUs for readability
	if (i >= 19 && cpuList.size() > 20) {
	printf(", ...");
	break;
	}
	}
	printf("]):\n");

	// Print cache details for this topology
	for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
	const CpuCache& cache = cpuTopo.getCache(firstCpu, (CacheType)cType);
	if (cache.size > 0) {
	printf(" %s: ", cacheNames[cType]);
	if (cache.size >= 1024 * 1024) {
	printf("%6.2f MB", cache.size / (1024.0 * 1024.0));
	} else if (cache.size >= 1024) {
	printf("%6.2f KB", cache.size / 1024.0);
	} else {
	printf("%6u B ", cache.size);
	}
	printf(" \| %2u-way", cache.associativity);
	if (cache.isShared()) {
	printf(" \| Shared by %zu CPUs", cache.getSharedCpuNum());
	}
	printf("\n");
	}
	}
	}
	printf("\n");
	}

	void printCacheSharingDetails(const CpuTopology& cpuTopo)
	{
	printSeparator();
	printf("Cache Sharing Analysis\n");
	printSeparator();

	const char* cacheNames[] = {"L1i", "L1d", "L2", "L3"};

	// Create a map to group CPUs by their cache topology (same logic as printCacheHierarchy)
	std::map<std::string, std::vector<size_t> > topologyGroups;

	// Build cache topology signatures for each CPU
	for (size_t cpuIdx = 0; cpuIdx < cpuTopo.getLogicalCpuNum(); cpuIdx++) {
	const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(cpuIdx);

	// Create a signature string that uniquely identifies the cache topology
	char signature[512];
	int offset = 0;
	offset += snprintf(signature + offset, sizeof(signature) - offset, "%s:", type2str(logCpu.coreType));

	// Add cache properties to signature
	for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
	const CpuCache& cache = cpuTopo.getCache(cpuIdx, (CacheType)cType);
	offset += snprintf(signature + offset, sizeof(signature) - offset,
	"%d-%u-%u-%zu;",
	cType, cache.size, cache.associativity, cache.getSharedCpuNum());
	}

	topologyGroups[signature].push_back(cpuIdx);
	}

	// Print cache sharing analysis for each unique topology
	for (std::map<std::string, std::vector<size_t> >::iterator groupIt = topologyGroups.begin();
	groupIt != topologyGroups.end(); ++groupIt) {

	const std::vector<size_t>& cpuList = groupIt->second;
	if (cpuList.empty()) continue;

	size_t firstCpu = cpuList[0];
	const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(firstCpu);

	printf("%s Topology (representative CPU %zu):\n", type2str(logCpu.coreType), firstCpu);

	// Analyze each cache level
	for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
	const CpuCache& cache = cpuTopo.getCache(firstCpu, (CacheType)cType);
	if (cache.size > 0) {
	printf(" %s Cache:\n", cacheNames[cType]);
	printf(" Size: ");
	if (cache.size >= 1024 * 1024) {
	printf("%.2f MB\n", cache.size / (1024.0 * 1024.0));
	} else if (cache.size >= 1024) {
	printf("%.2f KB\n", cache.size / 1024.0);
	} else {
	printf("%u B\n", cache.size);
	}

	if (cache.isShared()) {
	printf(" Shared by %zu CPUs: ", cache.getSharedCpuNum());
	int count = 0;
	for (CpuMask::const_iterator i = cache.sharedCpuIndices.begin(), ie = cache.sharedCpuIndices.end(); i != ie; ++i) {
	if (count > 0) printf(",");
	printf("%u", *i);
	count++;
	if (count > 20) {
	printf("...");
	break;
	}
	}
	printf("\n");
	} else {
	printf(" Private (not shared)\n");
	}
	}
	}
	printf("\n");
	}
	}

	int main()
	try
	{
	printf("\n");
	printf("Xbyak CPU Cache Topology API Test\n");
	printf("==================================\n");
	printf("\n");

	Cpu cpu;
	CpuTopology cpuTopo(cpu);

	printCpuMaskTest();
	printSystemTopology(cpuTopo);
	printLogicalCpuDetails(cpuTopo);
	printCacheHierarchy(cpuTopo);
	printCacheSharingDetails(cpuTopo);

	printSeparator();
	printf("All tests completed successfully!\n");
	printSeparator();
	printf("\n");
	} catch (std::exception& e) {
	printf("Error: %s\n", e.what());
	return 1;
	}