Merge pull request #713 from cdavis5e/metal3-numa-heaps

MVKDevice: Report a second heap for non-UMA devices.
diff --git a/Common/MVKOSExtensions.h b/Common/MVKOSExtensions.h
index eefa957..a801503 100644
--- a/Common/MVKOSExtensions.h
+++ b/Common/MVKOSExtensions.h
@@ -111,3 +111,17 @@
 		int64_t val = wasFound ? ev : EV;						\
 		cfgVal = (int32_t)std::min(std::max(val, (int64_t)INT32_MIN), (int64_t)INT32_MAX);	\
 	} while(false)
+
+
+#pragma mark -
+#pragma mark System memory
+
+/** Returns the total amount of physical RAM in the system. */
+uint64_t mvkGetSystemMemorySize();
+
+/** Returns the amount of memory available to this process. */
+uint64_t mvkGetAvailableMemorySize();
+
+/** Returns the amount of memory currently used by this process. */
+uint64_t mvkGetUsedMemorySize();
+
diff --git a/Common/MVKOSExtensions.mm b/Common/MVKOSExtensions.mm
index 39ef03b..0922bc2 100644
--- a/Common/MVKOSExtensions.mm
+++ b/Common/MVKOSExtensions.mm
@@ -18,7 +18,9 @@
 
 
 #include "MVKOSExtensions.h"
+#include <mach/mach_host.h>
 #include <mach/mach_time.h>
+#include <mach/task.h>
 
 #import <Foundation/Foundation.h>
 
@@ -91,3 +93,43 @@
 bool mvkGetEnvVarBool(std::string varName, bool* pWasFound) {
 	return mvkGetEnvVarInt64(varName, pWasFound) != 0;
 }
+
+
+#pragma mark -
+#pragma mark System memory
+
+uint64_t mvkGetSystemMemorySize() {
+	mach_msg_type_number_t host_size = HOST_BASIC_INFO_COUNT;
+	host_basic_info_data_t info;
+	if (host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&info, &host_size) == KERN_SUCCESS) {
+		return info.max_mem;
+	}
+	return 0;
+}
+
+uint64_t mvkGetAvailableMemorySize() {
+#if MVK_IOS
+	if (mvkOSVersion() >= 13.0) { return os_proc_available_memory(); }
+#endif
+	mach_port_t host_port;
+	mach_msg_type_number_t host_size;
+	vm_size_t pagesize;
+	host_port = mach_host_self();
+	host_size = HOST_VM_INFO_COUNT;
+	host_page_size(host_port, &pagesize);
+	vm_statistics_data_t vm_stat;
+	if (host_statistics(host_port, HOST_VM_INFO, (host_info_t)&vm_stat, &host_size) == KERN_SUCCESS ) {
+		return vm_stat.free_count * pagesize;
+	}
+	return 0;
+}
+
+uint64_t mvkGetUsedMemorySize() {
+	task_vm_info_data_t task_vm_info;
+	mach_msg_type_number_t task_size = TASK_VM_INFO_COUNT;
+	if (task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&task_vm_info, &task_size) == KERN_SUCCESS) {
+		return task_vm_info.phys_footprint;
+	}
+	return 0;
+}
+
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 90fcd2e..ce9e2f1 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -278,6 +278,9 @@
 	 */
 	inline uint32_t getLazilyAllocatedMemoryTypes() { return _lazilyAllocatedMemoryTypes; }
 
+	/** Returns whether this is a unified memory device. */
+	bool getHasUnifiedMemory();
+
 	
 #pragma mark Metal
 
@@ -330,7 +333,9 @@
 	void initProperties();
 	void initGPUInfoProperties();
 	void initMemoryProperties();
+	uint64_t getVRAMSize();
 	uint64_t getRecommendedMaxWorkingSetSize();
+	uint64_t getCurrentAllocatedSize();
 	void initExtensions();
 	MVKExtensionList* getSupportedExtensions(const char* pLayerName = nullptr);
 	std::vector<MVKQueueFamily*>& getQueueFamilies();
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 8873d69..331b01c 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -36,7 +36,6 @@
 #include "MVKOSExtensions.h"
 #include <MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h>
 #include "vk_mvk_moltenvk.h"
-#include <mach/mach_host.h>
 
 #import "CAMetalLayer+MoltenVK.h"
 
@@ -728,22 +727,21 @@
 	if (pMemoryProperties) {
 		pMemoryProperties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
 		pMemoryProperties->memoryProperties = _memoryProperties;
-		auto* next = (MVKVkAPIStructHeader*)pMemoryProperties->pNext;
-		while (next) {
-			switch ((uint32_t)next->sType) {
+		for (auto* next = (VkBaseOutStructure*)pMemoryProperties->pNext; next; next = next->pNext) {
+			switch (next->sType) {
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
 				auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next;
 				memset(budgetProps->heapBudget, 0, sizeof(budgetProps->heapBudget));
 				memset(budgetProps->heapUsage, 0, sizeof(budgetProps->heapUsage));
 				budgetProps->heapBudget[0] = (VkDeviceSize)getRecommendedMaxWorkingSetSize();
-				if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) {
-					budgetProps->heapUsage[0] = (VkDeviceSize)_mtlDevice.currentAllocatedSize;
+				budgetProps->heapUsage[0] = (VkDeviceSize)getCurrentAllocatedSize();
+				if (!getHasUnifiedMemory()) {
+					budgetProps->heapBudget[1] = (VkDeviceSize)mvkGetAvailableMemorySize();
+					budgetProps->heapUsage[1] = (VkDeviceSize)mvkGetUsedMemorySize();
 				}
-				next = (MVKVkAPIStructHeader*)budgetProps->pNext;
 				break;
 			}
 			default:
-				next = (MVKVkAPIStructHeader*)next->pNext;
 				break;
 			}
 		}
@@ -1758,7 +1756,7 @@
         .memoryHeaps = {
             {
                 .flags = (VK_MEMORY_HEAP_DEVICE_LOCAL_BIT),
-                .size = (VkDeviceSize)getRecommendedMaxWorkingSetSize(),
+                .size = (VkDeviceSize)getVRAMSize(),
             },
         },
         // NB this list needs to stay sorted by propertyFlags (as bit sets)
@@ -1808,6 +1806,37 @@
 		_allMemoryTypes				= 0x7;		// Private, shared & memoryless
 	}
 #endif
+#if MVK_MACOS
+	if (!getHasUnifiedMemory()) {
+		// This means we really have two heaps. The second heap is system memory.
+		_memoryProperties.memoryHeapCount = 2;
+		_memoryProperties.memoryHeaps[1].size = mvkGetSystemMemorySize();
+		_memoryProperties.memoryHeaps[1].flags = 0;
+		_memoryProperties.memoryTypes[2].heapIndex = 1;	// Shared memory in the shared heap
+	}
+#endif
+}
+
+bool MVKPhysicalDevice::getHasUnifiedMemory() {
+#if MVK_IOS
+	return true;
+#endif
+#if MVK_MACOS
+	return [_mtlDevice respondsToSelector: @selector(hasUnifiedMemory)] && _mtlDevice.hasUnifiedMemory;
+#endif
+}
+
+uint64_t MVKPhysicalDevice::getVRAMSize() {
+#if MVK_IOS
+	// All iOS devices are UMA, so return the system memory size.
+	return mvkGetSystemMemorySize();
+#endif
+#if MVK_MACOS
+	if (getHasUnifiedMemory()) { return mvkGetSystemMemorySize(); }
+	// There's actually no way to query the total physical VRAM on the device in Metal.
+	// Just default to using the recommended max working set size (i.e. the budget).
+	return getRecommendedMaxWorkingSetSize();
+#endif
 }
 
 uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() {
@@ -1818,21 +1847,26 @@
 #endif
 #if MVK_IOS
 	// GPU and CPU use shared memory. Estimate the current free memory in the system.
-	mach_port_t host_port;
-	mach_msg_type_number_t host_size;
-	vm_size_t pagesize;
-	host_port = mach_host_self();
-	host_size = sizeof(vm_statistics_data_t) / sizeof(integer_t);
-	host_page_size(host_port, &pagesize);
-	vm_statistics_data_t vm_stat;
-	if (host_statistics(host_port, HOST_VM_INFO, (host_info_t)&vm_stat, &host_size) == KERN_SUCCESS ) {
-		return vm_stat.free_count * pagesize;
-	}
+	uint64_t freeMem = mvkGetAvailableMemorySize();
+	if (freeMem) { return freeMem; }
 #endif
 
 	return 128 * MEBI;		// Conservative minimum for macOS GPU's & iOS shared memory
 }
 
+uint64_t MVKPhysicalDevice::getCurrentAllocatedSize() {
+	if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) {
+		return _mtlDevice.currentAllocatedSize;
+	}
+#if MVK_IOS
+	// We can use the current memory used by this process as a reasonable approximation.
+	return mvkGetUsedMemorySize();
+#endif
+#if MVK_MACOS
+	return 0;
+#endif
+}
+
 void MVKPhysicalDevice::initExtensions() {
 	if (!_metalFeatures.postDepthCoverage) {
 		_supportedExtensions.vk_EXT_post_depth_coverage.enabled = false;