Merge pull request #713 from cdavis5e/metal3-numa-heaps
MVKDevice: Report a second heap for non-UMA devices.
diff --git a/Common/MVKOSExtensions.h b/Common/MVKOSExtensions.h
index eefa957..a801503 100644
--- a/Common/MVKOSExtensions.h
+++ b/Common/MVKOSExtensions.h
@@ -111,3 +111,17 @@
int64_t val = wasFound ? ev : EV; \
cfgVal = (int32_t)std::min(std::max(val, (int64_t)INT32_MIN), (int64_t)INT32_MAX); \
} while(false)
+
+
+#pragma mark -
+#pragma mark System memory
+
+/** Returns the total amount of physical RAM in the system. */
+uint64_t mvkGetSystemMemorySize();
+
+/** Returns the amount of memory available to this process. */
+uint64_t mvkGetAvailableMemorySize();
+
+/** Returns the amount of memory currently used by this process. */
+uint64_t mvkGetUsedMemorySize();
+
diff --git a/Common/MVKOSExtensions.mm b/Common/MVKOSExtensions.mm
index 39ef03b..0922bc2 100644
--- a/Common/MVKOSExtensions.mm
+++ b/Common/MVKOSExtensions.mm
@@ -18,7 +18,9 @@
#include "MVKOSExtensions.h"
+#include <mach/mach_host.h>
#include <mach/mach_time.h>
+#include <mach/task.h>
#import <Foundation/Foundation.h>
@@ -91,3 +93,43 @@
bool mvkGetEnvVarBool(std::string varName, bool* pWasFound) {
return mvkGetEnvVarInt64(varName, pWasFound) != 0;
}
+
+
+#pragma mark -
+#pragma mark System memory
+
+uint64_t mvkGetSystemMemorySize() {
+ mach_msg_type_number_t host_size = HOST_BASIC_INFO_COUNT;
+ host_basic_info_data_t info;
+ if (host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&info, &host_size) == KERN_SUCCESS) {
+ return info.max_mem;
+ }
+ return 0;
+}
+
+uint64_t mvkGetAvailableMemorySize() {
+#if MVK_IOS
+ if (mvkOSVersion() >= 13.0) { return os_proc_available_memory(); }
+#endif
+ mach_port_t host_port;
+ mach_msg_type_number_t host_size;
+ vm_size_t pagesize;
+ host_port = mach_host_self();
+ host_size = HOST_VM_INFO_COUNT;
+ host_page_size(host_port, &pagesize);
+ vm_statistics_data_t vm_stat;
+ if (host_statistics(host_port, HOST_VM_INFO, (host_info_t)&vm_stat, &host_size) == KERN_SUCCESS ) {
+ return vm_stat.free_count * pagesize;
+ }
+ return 0;
+}
+
+uint64_t mvkGetUsedMemorySize() {
+ task_vm_info_data_t task_vm_info;
+ mach_msg_type_number_t task_size = TASK_VM_INFO_COUNT;
+ if (task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&task_vm_info, &task_size) == KERN_SUCCESS) {
+ return task_vm_info.phys_footprint;
+ }
+ return 0;
+}
+
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 90fcd2e..ce9e2f1 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -278,6 +278,9 @@
*/
inline uint32_t getLazilyAllocatedMemoryTypes() { return _lazilyAllocatedMemoryTypes; }
+ /** Returns whether this is a unified memory device. */
+ bool getHasUnifiedMemory();
+
#pragma mark Metal
@@ -330,7 +333,9 @@
void initProperties();
void initGPUInfoProperties();
void initMemoryProperties();
+ uint64_t getVRAMSize();
uint64_t getRecommendedMaxWorkingSetSize();
+ uint64_t getCurrentAllocatedSize();
void initExtensions();
MVKExtensionList* getSupportedExtensions(const char* pLayerName = nullptr);
std::vector<MVKQueueFamily*>& getQueueFamilies();
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 8873d69..331b01c 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -36,7 +36,6 @@
#include "MVKOSExtensions.h"
#include <MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h>
#include "vk_mvk_moltenvk.h"
-#include <mach/mach_host.h>
#import "CAMetalLayer+MoltenVK.h"
@@ -728,22 +727,21 @@
if (pMemoryProperties) {
pMemoryProperties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
pMemoryProperties->memoryProperties = _memoryProperties;
- auto* next = (MVKVkAPIStructHeader*)pMemoryProperties->pNext;
- while (next) {
- switch ((uint32_t)next->sType) {
+ for (auto* next = (VkBaseOutStructure*)pMemoryProperties->pNext; next; next = next->pNext) {
+ switch (next->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next;
memset(budgetProps->heapBudget, 0, sizeof(budgetProps->heapBudget));
memset(budgetProps->heapUsage, 0, sizeof(budgetProps->heapUsage));
budgetProps->heapBudget[0] = (VkDeviceSize)getRecommendedMaxWorkingSetSize();
- if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) {
- budgetProps->heapUsage[0] = (VkDeviceSize)_mtlDevice.currentAllocatedSize;
+ budgetProps->heapUsage[0] = (VkDeviceSize)getCurrentAllocatedSize();
+ if (!getHasUnifiedMemory()) {
+ budgetProps->heapBudget[1] = (VkDeviceSize)mvkGetAvailableMemorySize();
+ budgetProps->heapUsage[1] = (VkDeviceSize)mvkGetUsedMemorySize();
}
- next = (MVKVkAPIStructHeader*)budgetProps->pNext;
break;
}
default:
- next = (MVKVkAPIStructHeader*)next->pNext;
break;
}
}
@@ -1758,7 +1756,7 @@
.memoryHeaps = {
{
.flags = (VK_MEMORY_HEAP_DEVICE_LOCAL_BIT),
- .size = (VkDeviceSize)getRecommendedMaxWorkingSetSize(),
+ .size = (VkDeviceSize)getVRAMSize(),
},
},
// NB this list needs to stay sorted by propertyFlags (as bit sets)
@@ -1808,6 +1806,37 @@
_allMemoryTypes = 0x7; // Private, shared & memoryless
}
#endif
+#if MVK_MACOS
+ if (!getHasUnifiedMemory()) {
+ // This means we really have two heaps. The second heap is system memory.
+ _memoryProperties.memoryHeapCount = 2;
+ _memoryProperties.memoryHeaps[1].size = mvkGetSystemMemorySize();
+ _memoryProperties.memoryHeaps[1].flags = 0;
+ _memoryProperties.memoryTypes[2].heapIndex = 1; // Shared memory in the shared heap
+ }
+#endif
+}
+
+bool MVKPhysicalDevice::getHasUnifiedMemory() {
+#if MVK_IOS
+ return true;
+#endif
+#if MVK_MACOS
+ return [_mtlDevice respondsToSelector: @selector(hasUnifiedMemory)] && _mtlDevice.hasUnifiedMemory;
+#endif
+}
+
+uint64_t MVKPhysicalDevice::getVRAMSize() {
+#if MVK_IOS
+ // All iOS devices are UMA, so return the system memory size.
+ return mvkGetSystemMemorySize();
+#endif
+#if MVK_MACOS
+ if (getHasUnifiedMemory()) { return mvkGetSystemMemorySize(); }
+ // There's actually no way to query the total physical VRAM on the device in Metal.
+ // Just default to using the recommended max working set size (i.e. the budget).
+ return getRecommendedMaxWorkingSetSize();
+#endif
}
uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() {
@@ -1818,21 +1847,26 @@
#endif
#if MVK_IOS
// GPU and CPU use shared memory. Estimate the current free memory in the system.
- mach_port_t host_port;
- mach_msg_type_number_t host_size;
- vm_size_t pagesize;
- host_port = mach_host_self();
- host_size = sizeof(vm_statistics_data_t) / sizeof(integer_t);
- host_page_size(host_port, &pagesize);
- vm_statistics_data_t vm_stat;
- if (host_statistics(host_port, HOST_VM_INFO, (host_info_t)&vm_stat, &host_size) == KERN_SUCCESS ) {
- return vm_stat.free_count * pagesize;
- }
+ uint64_t freeMem = mvkGetAvailableMemorySize();
+ if (freeMem) { return freeMem; }
#endif
return 128 * MEBI; // Conservative minimum for macOS GPU's & iOS shared memory
}
+uint64_t MVKPhysicalDevice::getCurrentAllocatedSize() {
+ if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) {
+ return _mtlDevice.currentAllocatedSize;
+ }
+#if MVK_IOS
+ // We can use the current memory used by this process as a reasonable approximation.
+ return mvkGetUsedMemorySize();
+#endif
+#if MVK_MACOS
+ return 0;
+#endif
+}
+
void MVKPhysicalDevice::initExtensions() {
if (!_metalFeatures.postDepthCoverage) {
_supportedExtensions.vk_EXT_post_depth_coverage.enabled = false;