Support Metal 3.0.
Merge Metal-3.0 branch into master branch.
Update What's New document with merged features.
diff --git a/Common/MVKOSExtensions.h b/Common/MVKOSExtensions.h
index eefa957..a801503 100644
--- a/Common/MVKOSExtensions.h
+++ b/Common/MVKOSExtensions.h
@@ -111,3 +111,17 @@
int64_t val = wasFound ? ev : EV; \
cfgVal = (int32_t)std::min(std::max(val, (int64_t)INT32_MIN), (int64_t)INT32_MAX); \
} while(false)
+
+
+#pragma mark -
+#pragma mark System memory
+
+/** Returns the total amount of physical RAM in the system. */
+uint64_t mvkGetSystemMemorySize();
+
+/** Returns the amount of memory available to this process. */
+uint64_t mvkGetAvailableMemorySize();
+
+/** Returns the amount of memory currently used by this process. */
+uint64_t mvkGetUsedMemorySize();
+
diff --git a/Common/MVKOSExtensions.mm b/Common/MVKOSExtensions.mm
index 43bc3c7..d195cb7 100644
--- a/Common/MVKOSExtensions.mm
+++ b/Common/MVKOSExtensions.mm
@@ -18,7 +18,9 @@
#include "MVKOSExtensions.h"
+#include <mach/mach_host.h>
#include <mach/mach_time.h>
+#include <mach/task.h>
#import <Foundation/Foundation.h>
@@ -91,3 +93,43 @@
bool mvkGetEnvVarBool(std::string varName, bool* pWasFound) {
return mvkGetEnvVarInt64(varName, pWasFound) != 0;
}
+
+
+#pragma mark -
+#pragma mark System memory
+
+uint64_t mvkGetSystemMemorySize() {
+ mach_msg_type_number_t host_size = HOST_BASIC_INFO_COUNT;
+ host_basic_info_data_t info;
+ if (host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&info, &host_size) == KERN_SUCCESS) {
+ return info.max_mem;
+ }
+ return 0;
+}
+
+uint64_t mvkGetAvailableMemorySize() {
+#if MVK_IOS
+ if (mvkOSVersion() >= 13.0) { return os_proc_available_memory(); }
+#endif
+ mach_port_t host_port;
+ mach_msg_type_number_t host_size;
+ vm_size_t pagesize;
+ host_port = mach_host_self();
+ host_size = HOST_VM_INFO_COUNT;
+ host_page_size(host_port, &pagesize);
+ vm_statistics_data_t vm_stat;
+ if (host_statistics(host_port, HOST_VM_INFO, (host_info_t)&vm_stat, &host_size) == KERN_SUCCESS ) {
+ return vm_stat.free_count * pagesize;
+ }
+ return 0;
+}
+
+uint64_t mvkGetUsedMemorySize() {
+ task_vm_info_data_t task_vm_info;
+ mach_msg_type_number_t task_size = TASK_VM_INFO_COUNT;
+ if (task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&task_vm_info, &task_size) == KERN_SUCCESS) {
+ return task_vm_info.phys_footprint;
+ }
+ return 0;
+}
+
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index d433658..faf9ea1 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -17,8 +17,18 @@
MoltenVK 1.0.38
---------------
-Released TBD
+Released 2019/10/31
+- Add support for Metal 3.0 capabilities.
+- Add support for extensions:
+ - `VK_EXT_swapchain_colorspace` (*iOS*, already supported on *macOS*).
+ - `VK_EXT_hdr_metadata` (*macOS*)
+- Use native texture swizzling when available.
+- Use placement `MTLHeaps` for `VkDeviceMemory` when possible.
+- Report heap sizes accurately when possible.
+- Add support for additional colorspace options.
+- Add support for the `VkPhysicalDeviceFeatures::shaderResourceMinLod` feature.
+- Add support for compressed 3D images on *macOS*.
- Update `MoltenVK_Runtime_UserGuide.md` about embedding `libMoltenVK.dylib` in an application.
- Clarify static linking as the recommended linking approach for *iOS* app store distribution.
- Add request for feedback from people who reject **MoltenVK** to `README.md` document.
@@ -27,6 +37,7 @@
- Fix memory leaks when compiling shaders and pipelines without default OS autorelease pool.
- Reduce memory usage by adjusting default memory allocs for many `MVKVectorInline` uses and
replacing use of `MVKVectorDefault` with `std::vector` in descriptor set bindings.
+
@@ -112,7 +123,7 @@
- `VK_EXT_post_depth_coverage`
- `VK_EXT_scalar_block_layout`
- `VK_EXT_shader_stencil_export`
- - `VK_EXT_swapchain_colorspace`
+ - `VK_EXT_swapchain_colorspace` (*macOS*)
- `VK_EXT_texel_buffer_alignment`
- `VK_AMD_shader_image_load_store_lod`
- `VK_AMD_shader_trinary_minmax`
diff --git a/MoltenVK/MoltenVK/API/mvk_datatypes.h b/MoltenVK/MoltenVK/API/mvk_datatypes.h
index a3e9792..d6ca91e 100644
--- a/MoltenVK/MoltenVK/API/mvk_datatypes.h
+++ b/MoltenVK/MoltenVK/API/mvk_datatypes.h
@@ -234,6 +234,12 @@
/** Returns the Vulkan bit flags corresponding to the numeric sample count, which must be a PoT value. */
VkSampleCountFlagBits mvkVkSampleCountFlagBitsFromSampleCount(NSUInteger sampleCount);
+/** Returns the Metal texture swizzle from the Vulkan component swizzle. */
+MTLTextureSwizzle mvkMTLTextureSwizzleFromVkComponentSwizzle(VkComponentSwizzle vkSwizzle);
+
+/** Returns all four Metal texture swizzles from the Vulkan component mapping. */
+MTLTextureSwizzleChannels mvkMTLTextureSwizzleChannelsFromVkComponentMapping(VkComponentMapping vkMapping);
+
#pragma mark Mipmaps
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index 06fbc12..4ec230f 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -549,6 +549,9 @@
VkBool32 postDepthCoverage; /**< If true, coverage masks in fragment shaders post-depth-test are supported. */
VkBool32 fences; /**< If true, Metal synchronization fences (MTLFence) are supported. */
VkBool32 rasterOrderGroups; /**< If true, Raster order groups in fragment shaders are supported. */
+ VkBool32 native3DCompressedTextures; /**< If true, 3D compressed images are supported natively, without manual decompression. */
+ VkBool32 nativeTextureSwizzle; /**< If true, component swizzle is supported natively, without manual swizzling in shaders. */
+ VkBool32 placementHeaps; /**< If true, MTLHeap objects support placement of resources. */
} MVKPhysicalDeviceMetalFeatures;
/**
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
index 1c8e882..71b5fb6 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
@@ -762,7 +762,8 @@
// If we're copying to a compressed 3D image, the image data need to be decompressed.
// If we're copying to mip level 0, we can skip the copy and just decode
// directly into the image. Otherwise, we need to use an intermediate buffer.
- if (_toImage && _image->getIsCompressed() && mtlTexture.textureType == MTLTextureType3D) {
+ if (_toImage && _image->getIsCompressed() && mtlTexture.textureType == MTLTextureType3D &&
+ !getDevice()->_pMetalFeatures->native3DCompressedTextures) {
MVKCmdCopyBufferToImageInfo info;
info.srcRowStride = bytesPerRow & 0xffffffff;
info.srcRowStrideHigh = bytesPerRow >> 32;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index 4d44d4c..42ff1a1 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -591,7 +591,7 @@
void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
MVKPipeline* pipeline = _cmdEncoder->_graphicsPipelineState.getPipeline();
- bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle();
+ bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || _cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle;
bool forTessellation = ((MVKGraphicsPipeline*)pipeline)->isTessellationPipeline();
if (stage == (forTessellation ? kMVKGraphicsStageVertex : kMVKGraphicsStageRasterization)) {
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
index 82815ca..67d8844 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
@@ -69,10 +69,10 @@
#pragma mark Metal
/** Returns the Metal buffer underlying this memory allocation. */
- inline id<MTLBuffer> getMTLBuffer() { return _deviceMemory ? _deviceMemory->getMTLBuffer() : nullptr; }
+ id<MTLBuffer> getMTLBuffer();
/** Returns the offset at which the contents of this instance starts within the underlying Metal buffer. */
- inline NSUInteger getMTLBufferOffset() { return _deviceMemoryOffset; }
+ inline NSUInteger getMTLBufferOffset() { return _deviceMemory && _deviceMemory->getMTLHeap() ? 0 : _deviceMemoryOffset; }
#pragma mark Construction
@@ -90,6 +90,7 @@
VkBufferMemoryBarrier* pBufferMemoryBarrier);
VkBufferUsageFlags _usage;
+ id<MTLBuffer> _mtlBuffer = nil;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
index adbf683..b2d18e3 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
@@ -29,21 +29,28 @@
#pragma mark MVKBuffer
void MVKBuffer::propogateDebugName() {
- if (_debugName &&
- _deviceMemory &&
+ if (!_debugName) { return; }
+ if (_deviceMemory &&
_deviceMemory->isDedicatedAllocation() &&
_deviceMemory->_debugName.length == 0) {
_deviceMemory->setDebugName(_debugName.UTF8String);
}
+ setLabelIfNotNil(_mtlBuffer, _debugName);
}
#pragma mark Resource memory
VkResult MVKBuffer::getMemoryRequirements(VkMemoryRequirements* pMemoryRequirements) {
- pMemoryRequirements->size = getByteCount();
- pMemoryRequirements->alignment = _byteAlignment;
+ if (_device->_pMetalFeatures->placementHeaps) {
+ MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapBufferSizeAndAlignWithLength: getByteCount() options: MTLResourceStorageModePrivate];
+ pMemoryRequirements->size = sizeAndAlign.size;
+ pMemoryRequirements->alignment = sizeAndAlign.align;
+ } else {
+ pMemoryRequirements->size = getByteCount();
+ pMemoryRequirements->alignment = _byteAlignment;
+ }
pMemoryRequirements->memoryTypeBits = _device->getPhysicalDevice()->getAllMemoryTypes();
#if MVK_MACOS
// Textures must not use shared memory
@@ -126,6 +133,25 @@
}
+#pragma mark Metal
+
+id<MTLBuffer> MVKBuffer::getMTLBuffer() {
+ if (_mtlBuffer) { return _mtlBuffer; }
+ if (_deviceMemory) {
+ if (_deviceMemory->getMTLHeap()) {
+ _mtlBuffer = [_deviceMemory->getMTLHeap() newBufferWithLength: getByteCount()
+ options: _deviceMemory->getMTLResourceOptions()
+ offset: _deviceMemoryOffset]; // retained
+ propogateDebugName();
+ return _mtlBuffer;
+ } else {
+ return _deviceMemory->getMTLBuffer();
+ }
+ }
+ return nil;
+}
+
+
#pragma mark Construction
MVKBuffer::MVKBuffer(MVKDevice* device, const VkBufferCreateInfo* pCreateInfo) : MVKResource(device), _usage(pCreateInfo->usage) {
@@ -135,6 +161,7 @@
MVKBuffer::~MVKBuffer() {
if (_deviceMemory) { _deviceMemory->removeBuffer(this); }
+ if (_mtlBuffer) { [_mtlBuffer release]; }
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index adbc3b2..800d5c1 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -283,9 +283,15 @@
*/
inline uint32_t getLazilyAllocatedMemoryTypes() { return _lazilyAllocatedMemoryTypes; }
+ /** Returns whether this is a unified memory device. */
+ bool getHasUnifiedMemory();
+
#pragma mark Metal
+ /** Returns whether the underlying MTLDevice supports the GPU family. */
+ bool getSupportsGPUFamily(MTLGPUFamily gpuFamily);
+
/** Populates the specified structure with the Metal-specific features of this device. */
inline const MVKPhysicalDeviceMetalFeatures* getMetalFeatures() { return &_metalFeatures; }
@@ -330,11 +336,15 @@
void initMetalFeatures();
void initFeatures();
void initProperties();
+ void initGPUInfoProperties();
void initMemoryProperties();
+ uint64_t getVRAMSize();
+ uint64_t getRecommendedMaxWorkingSetSize();
+ uint64_t getCurrentAllocatedSize();
void initExtensions();
MVKVector<MVKQueueFamily*>& getQueueFamilies();
void initPipelineCacheUUID();
- MTLFeatureSet getHighestMTLFeatureSet();
+ uint32_t getHighestMTLFeatureSet();
uint64_t getSpirvCrossRevision();
bool getImageViewIsSupported(const VkPhysicalDeviceImageFormatInfo2KHR *pImageFormatInfo);
void logGPUInfo();
@@ -831,18 +841,5 @@
#pragma mark -
#pragma mark Support functions
-/** Returns an approximation of how much memory, in bytes, the device can use with good performance. */
-uint64_t mvkRecommendedMaxWorkingSetSize(id<MTLDevice> mtlDevice);
-
-/** Populate the propertes with info about the GPU represented by the MTLDevice. */
-void mvkPopulateGPUInfo(VkPhysicalDeviceProperties& devProps, id<MTLDevice> mtlDevice);
-
/** Returns the registry ID of the specified device, or zero if the device does not have a registry ID. */
uint64_t mvkGetRegistryID(id<MTLDevice> mtlDevice);
-
-/**
- * If the MTLDevice defines a texture memory alignment for the format, it is retrieved from
- * the MTLDevice and returned, or returns zero if the MTLDevice does not define an alignment.
- * The format must support linear texture memory (must not be depth, stencil, or compressed).
- */
-VkDeviceSize mvkMTLPixelFormatLinearTextureAlignment(MTLPixelFormat mtlPixelFormat, id<MTLDevice> mtlDevice);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 19c8355..c52d1fe 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -36,7 +36,6 @@
#include "MVKOSExtensions.h"
#include <MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h>
#include "vk_mvk_moltenvk.h"
-#include <mach/mach_host.h>
#import "CAMetalLayer+MoltenVK.h"
@@ -134,8 +133,8 @@
auto* portabilityFeatures = (VkPhysicalDevicePortabilitySubsetFeaturesEXTX*)next;
portabilityFeatures->triangleFans = false;
portabilityFeatures->separateStencilMaskRef = true;
- portabilityFeatures->events = true;
- portabilityFeatures->standardImageViews = _mvkInstance->getMoltenVKConfiguration()->fullImageViewSwizzle;
+ portabilityFeatures->events = true;
+ portabilityFeatures->standardImageViews = _mvkInstance->getMoltenVKConfiguration()->fullImageViewSwizzle || _metalFeatures.nativeTextureSwizzle;
portabilityFeatures->samplerMipLodBias = false;
break;
}
@@ -335,7 +334,7 @@
}
#if MVK_MACOS
// If this is a compressed format and there's no codec, it isn't supported.
- if ((mvkFmt == kMVKFormatCompressed) && !mvkCanDecodeFormat(format)) {
+ if ((mvkFmt == kMVKFormatCompressed) && !mvkCanDecodeFormat(format) && !_metalFeatures.native3DCompressedTextures) {
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
#endif
@@ -491,8 +490,8 @@
MVKVectorInline<VkColorSpaceKHR, 16> colorSpaces;
colorSpaces.push_back(VK_COLOR_SPACE_SRGB_NONLINEAR_KHR);
-#if MVK_MACOS
if (getInstance()->_enabledExtensions.vk_EXT_swapchain_colorspace.enabled) {
+#if MVK_MACOS
// 10.11 supports some but not all of the color spaces specified by VK_EXT_swapchain_colorspace.
colorSpaces.push_back(VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT);
@@ -503,8 +502,38 @@
colorSpaces.push_back(VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT);
}
- }
+ if (mvkOSVersion() >= 10.14) {
+ colorSpaces.push_back(VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT);
+ colorSpaces.push_back(VK_COLOR_SPACE_BT2020_LINEAR_EXT);
+ colorSpaces.push_back(VK_COLOR_SPACE_HDR10_ST2084_EXT);
+ colorSpaces.push_back(VK_COLOR_SPACE_HDR10_HLG_EXT);
+ }
#endif
+#if MVK_IOS
+ // iOS 8 doesn't support anything but sRGB.
+ if (mvkOSVersion() >= 9.0) {
+ colorSpaces.push_back(VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT);
+ colorSpaces.push_back(VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT);
+ colorSpaces.push_back(VK_COLOR_SPACE_BT709_NONLINEAR_EXT);
+ colorSpaces.push_back(VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT);
+ colorSpaces.push_back(VK_COLOR_SPACE_PASS_THROUGH_EXT);
+ }
+ if (mvkOSVersion() >= 10.0) {
+ colorSpaces.push_back(VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT);
+ colorSpaces.push_back(VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT);
+ }
+ if (mvkOSVersion() >= 12.0) {
+ colorSpaces.push_back(VK_COLOR_SPACE_HDR10_ST2084_EXT);
+ }
+ if (mvkOSVersion() >= 12.3) {
+ colorSpaces.push_back(VK_COLOR_SPACE_DCI_P3_LINEAR_EXT);
+ colorSpaces.push_back(VK_COLOR_SPACE_BT2020_LINEAR_EXT);
+ }
+ if (mvkOSVersion() >= 13.0) {
+ colorSpaces.push_back(VK_COLOR_SPACE_HDR10_HLG_EXT);
+ }
+#endif
+ }
uint mtlFmtsCnt = sizeof(mtlFormats) / sizeof(MTLPixelFormat);
if (!mvkMTLPixelFormatIsSupported(MTLPixelFormatBGR10A2Unorm)) { mtlFmtsCnt--; }
@@ -710,22 +739,21 @@
if (pMemoryProperties) {
pMemoryProperties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
pMemoryProperties->memoryProperties = _memoryProperties;
- auto* next = (MVKVkAPIStructHeader*)pMemoryProperties->pNext;
- while (next) {
- switch ((uint32_t)next->sType) {
+ for (auto* next = (VkBaseOutStructure*)pMemoryProperties->pNext; next; next = next->pNext) {
+ switch (next->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next;
memset(budgetProps->heapBudget, 0, sizeof(budgetProps->heapBudget));
memset(budgetProps->heapUsage, 0, sizeof(budgetProps->heapUsage));
- budgetProps->heapBudget[0] = (VkDeviceSize)mvkRecommendedMaxWorkingSetSize(_mtlDevice);
- if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) {
- budgetProps->heapUsage[0] = (VkDeviceSize)_mtlDevice.currentAllocatedSize;
+ budgetProps->heapBudget[0] = (VkDeviceSize)getRecommendedMaxWorkingSetSize();
+ budgetProps->heapUsage[0] = (VkDeviceSize)getCurrentAllocatedSize();
+ if (!getHasUnifiedMemory()) {
+ budgetProps->heapBudget[1] = (VkDeviceSize)mvkGetAvailableMemorySize();
+ budgetProps->heapUsage[1] = (VkDeviceSize)mvkGetUsedMemorySize();
}
- next = (MVKVkAPIStructHeader*)budgetProps->pNext;
break;
}
default:
- next = (MVKVkAPIStructHeader*)next->pNext;
break;
}
}
@@ -821,6 +849,14 @@
_metalFeatures.stencilFeedback = true;
}
+ if ( mvkOSVersion() >= 13.0 ) {
+ _metalFeatures.mslVersionEnum = MTLLanguageVersion2_2;
+ _metalFeatures.placementHeaps = true;
+ if ( getSupportsGPUFamily(MTLGPUFamilyApple4) ) {
+ _metalFeatures.nativeTextureSwizzle = true;
+ }
+ }
+
#endif
#if MVK_MACOS
@@ -866,6 +902,15 @@
_metalFeatures.stencilFeedback = true;
}
+ if ( mvkOSVersion() >= 10.15 ) {
+ _metalFeatures.mslVersionEnum = MTLLanguageVersion2_2;
+ _metalFeatures.native3DCompressedTextures = true;
+ if ( getSupportsGPUFamily(MTLGPUFamilyMac2) ) {
+ _metalFeatures.nativeTextureSwizzle = true;
+ _metalFeatures.placementHeaps = true;
+ }
+ }
+
#endif
// Note the selector name, which is different from the property name.
@@ -887,6 +932,9 @@
_metalFeatures.mslVersion = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::make_msl_version(maj, min);
switch (_metalFeatures.mslVersionEnum) {
+ case MTLLanguageVersion2_2:
+ setMSLVersion(2, 2);
+ break;
case MTLLanguageVersion2_1:
setMSLVersion(2, 1);
break;
@@ -915,7 +963,12 @@
}
-/** Initializes the physical device features of this instance. */
+bool MVKPhysicalDevice::getSupportsGPUFamily(MTLGPUFamily gpuFamily) {
+ return ([_mtlDevice respondsToSelector: @selector(supportsFamily:)] &&
+ [_mtlDevice supportsFamily: gpuFamily]);
+}
+
+// Initializes the physical device features of this instance.
void MVKPhysicalDevice::initFeatures() {
memset(&_features, 0, sizeof(_features)); // Start with everything cleared
@@ -999,6 +1052,9 @@
_features.multiViewport = true;
}
+ if ( mvkOSVersion() >= 10.15 ) {
+ _features.shaderResourceMinLod = true;
+ }
#endif
}
@@ -1049,7 +1105,7 @@
// VkBool32 shaderInt64;
// VkBool32 shaderInt16; // done
// VkBool32 shaderResourceResidency;
-// VkBool32 shaderResourceMinLod;
+// VkBool32 shaderResourceMinLod; // done
// VkBool32 sparseBinding;
// VkBool32 sparseResidencyBuffer;
// VkBool32 sparseResidencyImage2D;
@@ -1070,7 +1126,7 @@
_properties.apiVersion = MVK_VULKAN_API_VERSION;
_properties.driverVersion = MVK_VERSION;
- mvkPopulateGPUInfo(_properties, _mtlDevice);
+ initGPUInfoProperties();
initPipelineCacheUUID();
// Limits
@@ -1381,6 +1437,104 @@
_properties.limits.maxGeometryTotalOutputComponents = 0;
}
+#if MVK_MACOS
+
+static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef propertyName) {
+
+ uint32_t value = 0;
+
+ CFTypeRef cfProp = IORegistryEntrySearchCFProperty(entry,
+ kIOServicePlane,
+ propertyName,
+ kCFAllocatorDefault,
+ kIORegistryIterateRecursively |
+ kIORegistryIterateParents);
+ if (cfProp) {
+ const uint32_t* pValue = reinterpret_cast<const uint32_t*>(CFDataGetBytePtr((CFDataRef)cfProp));
+ if (pValue) { value = *pValue; }
+ CFRelease(cfProp);
+ }
+
+ return value;
+}
+
+void MVKPhysicalDevice::initGPUInfoProperties() {
+
+ static const uint32_t kIntelVendorId = 0x8086;
+ bool isFound = false;
+
+ bool isIntegrated = _mtlDevice.isLowPower;
+ _properties.deviceType = isIntegrated ? VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU : VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
+ strlcpy(_properties.deviceName, _mtlDevice.name.UTF8String, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE);
+
+ // If the device has an associated registry ID, we can use that to get the associated IOKit node.
+ // The match dictionary is consumed by IOServiceGetMatchingServices and does not need to be released.
+ io_registry_entry_t entry;
+ uint64_t regID = mvkGetRegistryID(_mtlDevice);
+ if (regID) {
+ entry = IOServiceGetMatchingService(kIOMasterPortDefault, IORegistryEntryIDMatching(regID));
+ if (entry) {
+ // That returned the IOGraphicsAccelerator nub. Its parent, then, is the actual
+ // PCI device.
+ io_registry_entry_t parent;
+ if (IORegistryEntryGetParentEntry(entry, kIOServicePlane, &parent) == kIOReturnSuccess) {
+ isFound = true;
+ _properties.vendorID = mvkGetEntryProperty(parent, CFSTR("vendor-id"));
+ _properties.deviceID = mvkGetEntryProperty(parent, CFSTR("device-id"));
+ IOObjectRelease(parent);
+ }
+ IOObjectRelease(entry);
+ }
+ }
+ // Iterate all GPU's, looking for a match.
+ // The match dictionary is consumed by IOServiceGetMatchingServices and does not need to be released.
+ io_iterator_t entryIterator;
+ if (!isFound && IOServiceGetMatchingServices(kIOMasterPortDefault,
+ IOServiceMatching("IOPCIDevice"),
+ &entryIterator) == kIOReturnSuccess) {
+ while ( !isFound && (entry = IOIteratorNext(entryIterator)) ) {
+ if (mvkGetEntryProperty(entry, CFSTR("class-code")) == 0x30000) { // 0x30000 : DISPLAY_VGA
+
+ // The Intel GPU will always be marked as integrated.
+ // Return on a match of either Intel && low power, or non-Intel and non-low-power.
+ uint32_t vendorID = mvkGetEntryProperty(entry, CFSTR("vendor-id"));
+ if ( (vendorID == kIntelVendorId) == isIntegrated) {
+ isFound = true;
+ _properties.vendorID = vendorID;
+ _properties.deviceID = mvkGetEntryProperty(entry, CFSTR("device-id"));
+ }
+ }
+ }
+ IOObjectRelease(entryIterator);
+ }
+}
+
+#endif //MVK_MACOS
+
+#if MVK_IOS
+
+// For iOS devices, the Device ID is the SoC model (A8, A10X...), in the hex form 0xaMMX, where
+//"a" is the Apple brand, MM is the SoC model number (8, 10...) and X is 1 for X version, 0 for other.
+void MVKPhysicalDevice::initGPUInfoProperties() {
+ NSUInteger coreCnt = NSProcessInfo.processInfo.processorCount;
+ uint32_t devID = 0xa070;
+ if ([_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily5_v1]) {
+ devID = 0xa120;
+ } else if ([_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily4_v1]) {
+ devID = 0xa110;
+ } else if ([_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily3_v1]) {
+ devID = coreCnt > 2 ? 0xa101 : 0xa100;
+ } else if ([_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily2_v1]) {
+ devID = coreCnt > 2 ? 0xa081 : 0xa080;
+ }
+
+ _properties.vendorID = 0x0000106b; // Apple's PCI ID
+ _properties.deviceID = devID;
+ _properties.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
+ strlcpy(_properties.deviceName, _mtlDevice.name.UTF8String, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE);
+}
+#endif //MVK_IOS
+
#pragma mark VkPhysicalDeviceLimits - List of feature limits available on the device
@@ -1516,7 +1670,7 @@
uuidComponentOffset += sizeof(mvkVersion);
// Next 4 bytes contains hightest Metal feature set supported by this device
- uint32_t mtlFeatSet = (uint32_t)getHighestMTLFeatureSet();
+ uint32_t mtlFeatSet = getHighestMTLFeatureSet();
*(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatSet);
uuidComponentOffset += sizeof(mtlFeatSet);
@@ -1526,24 +1680,46 @@
uuidComponentOffset += sizeof(spvxRev);
}
-MTLFeatureSet MVKPhysicalDevice::getHighestMTLFeatureSet() {
+uint32_t MVKPhysicalDevice::getHighestMTLFeatureSet() {
+
+ // On newer OS's, combine highest Metal version with highest GPU family
+ // (Mac & Apple GPU lists should be mutex on platform)
+ uint32_t mtlVer = 0;
#if MVK_IOS
- MTLFeatureSet maxFS = MTLFeatureSet_iOS_GPUFamily5_v1;
- MTLFeatureSet minFS = MTLFeatureSet_iOS_GPUFamily1_v1;
+ if (mvkOSVersion() >= 13.0) { mtlVer = 0x30000; }
+#endif
+#if MVK_MACOS
+ if (mvkOSVersion() >= 10.15) { mtlVer = 0x30000; }
+#endif
+
+ MTLGPUFamily mtlFam = MTLGPUFamily(0);
+ if (getSupportsGPUFamily(MTLGPUFamilyMac1)) { mtlFam = MTLGPUFamilyMac1; }
+ if (getSupportsGPUFamily(MTLGPUFamilyMac2)) { mtlFam = MTLGPUFamilyMac2; }
+
+ if (getSupportsGPUFamily(MTLGPUFamilyApple1)) { mtlFam = MTLGPUFamilyApple1; }
+ if (getSupportsGPUFamily(MTLGPUFamilyApple2)) { mtlFam = MTLGPUFamilyApple2; }
+ if (getSupportsGPUFamily(MTLGPUFamilyApple3)) { mtlFam = MTLGPUFamilyApple3; }
+ if (getSupportsGPUFamily(MTLGPUFamilyApple4)) { mtlFam = MTLGPUFamilyApple4; }
+ if (getSupportsGPUFamily(MTLGPUFamilyApple5)) { mtlFam = MTLGPUFamilyApple5; }
+
+ // Not explicitly guaranteed to be unique...but close enough without spilling over
+ uint32_t mtlFS = (mtlVer << 8) + (uint32_t)mtlFam;
+ if (mtlFS) { return mtlFS; }
+
+ // Fall back to legacy feature sets on older OS's
+#if MVK_IOS
+ uint32_t maxFS = (uint32_t)MTLFeatureSet_iOS_GPUFamily5_v1;
+ uint32_t minFS = (uint32_t)MTLFeatureSet_iOS_GPUFamily1_v1;
#endif
#if MVK_MACOS
- MTLFeatureSet maxFS = MTLFeatureSet_macOS_GPUFamily2_v1;
- MTLFeatureSet minFS = MTLFeatureSet_macOS_GPUFamily1_v1;
+ uint32_t maxFS = (uint32_t)MTLFeatureSet_macOS_GPUFamily2_v1;
+ uint32_t minFS = (uint32_t)MTLFeatureSet_macOS_GPUFamily1_v1;
#endif
- for (NSUInteger fs = maxFS; fs > minFS; fs--) {
- MTLFeatureSet mtlFS = (MTLFeatureSet)fs;
- if ( [_mtlDevice supportsFeatureSet: mtlFS] ) {
- return mtlFS;
- }
+ for (uint32_t fs = maxFS; fs > minFS; fs--) {
+ if ( [_mtlDevice supportsFeatureSet: (MTLFeatureSet)fs] ) { return fs; }
}
-
return minFS;
}
@@ -1606,7 +1782,7 @@
.memoryHeaps = {
{
.flags = (VK_MEMORY_HEAP_DEVICE_LOCAL_BIT),
- .size = (VkDeviceSize)mvkRecommendedMaxWorkingSetSize(_mtlDevice),
+ .size = (VkDeviceSize)getVRAMSize(),
},
},
// NB this list needs to stay sorted by propertyFlags (as bit sets)
@@ -1656,6 +1832,65 @@
_allMemoryTypes = 0x7; // Private, shared & memoryless
}
#endif
+#if MVK_MACOS
+ if (!getHasUnifiedMemory()) {
+ // This means we really have two heaps. The second heap is system memory.
+ _memoryProperties.memoryHeapCount = 2;
+ _memoryProperties.memoryHeaps[1].size = mvkGetSystemMemorySize();
+ _memoryProperties.memoryHeaps[1].flags = 0;
+ _memoryProperties.memoryTypes[2].heapIndex = 1; // Shared memory in the shared heap
+ }
+#endif
+}
+
+bool MVKPhysicalDevice::getHasUnifiedMemory() {
+#if MVK_IOS
+ return true;
+#endif
+#if MVK_MACOS
+ return [_mtlDevice respondsToSelector: @selector(hasUnifiedMemory)] && _mtlDevice.hasUnifiedMemory;
+#endif
+}
+
+uint64_t MVKPhysicalDevice::getVRAMSize() {
+#if MVK_IOS
+ // All iOS devices are UMA, so return the system memory size.
+ return mvkGetSystemMemorySize();
+#endif
+#if MVK_MACOS
+ if (getHasUnifiedMemory()) { return mvkGetSystemMemorySize(); }
+ // There's actually no way to query the total physical VRAM on the device in Metal.
+ // Just default to using the recommended max working set size (i.e. the budget).
+ return getRecommendedMaxWorkingSetSize();
+#endif
+}
+
+uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() {
+#if MVK_MACOS
+ if ( [_mtlDevice respondsToSelector: @selector(recommendedMaxWorkingSetSize)]) {
+ return _mtlDevice.recommendedMaxWorkingSetSize;
+ }
+#endif
+#if MVK_IOS
+ // GPU and CPU use shared memory. Estimate the current free memory in the system.
+ uint64_t freeMem = mvkGetAvailableMemorySize();
+ if (freeMem) { return freeMem; }
+#endif
+
+ return 128 * MEBI; // Conservative minimum for macOS GPU's & iOS shared memory
+}
+
+uint64_t MVKPhysicalDevice::getCurrentAllocatedSize() {
+ if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) {
+ return _mtlDevice.currentAllocatedSize;
+ }
+#if MVK_IOS
+ // We can use the current memory used by this process as a reasonable approximation.
+ return mvkGetUsedMemorySize();
+#endif
+#if MVK_MACOS
+ return 0;
+#endif
}
void MVKPhysicalDevice::initExtensions() {
@@ -1699,7 +1934,24 @@
logMsg += "\n\t\tvendorID: %#06x";
logMsg += "\n\t\tdeviceID: %#06x";
logMsg += "\n\t\tpipelineCacheUUID: %s";
- logMsg += "\n\tsupports Metal Shading Language version %s and the following Metal Feature Sets:";
+ logMsg += "\n\tsupports the following Metal Versions, GPU's and Feature Sets:";
+ logMsg += "\n\t\tMetal Shading Language %s";
+
+ if (getSupportsGPUFamily(MTLGPUFamilyApple5)) { logMsg += "\n\t\tGPU Family Apple 5"; }
+ if (getSupportsGPUFamily(MTLGPUFamilyApple4)) { logMsg += "\n\t\tGPU Family Apple 4"; }
+ if (getSupportsGPUFamily(MTLGPUFamilyApple3)) { logMsg += "\n\t\tGPU Family Apple 3"; }
+ if (getSupportsGPUFamily(MTLGPUFamilyApple2)) { logMsg += "\n\t\tGPU Family Apple 2"; }
+ if (getSupportsGPUFamily(MTLGPUFamilyApple1)) { logMsg += "\n\t\tGPU Family Apple 1"; }
+
+ if (getSupportsGPUFamily(MTLGPUFamilyMac2)) { logMsg += "\n\t\tGPU Family Mac 2"; }
+ if (getSupportsGPUFamily(MTLGPUFamilyMac1)) { logMsg += "\n\t\tGPU Family Mac 1"; }
+
+ if (getSupportsGPUFamily(MTLGPUFamilyCommon3)) { logMsg += "\n\t\tGPU Family Common 3"; }
+ if (getSupportsGPUFamily(MTLGPUFamilyCommon2)) { logMsg += "\n\t\tGPU Family Common 2"; }
+ if (getSupportsGPUFamily(MTLGPUFamilyCommon1)) { logMsg += "\n\t\tGPU Family Common 1"; }
+
+ if (getSupportsGPUFamily(MTLGPUFamilyMacCatalyst2)) { logMsg += "\n\t\tGPU Family Mac Catalyst 2"; }
+ if (getSupportsGPUFamily(MTLGPUFamilyMacCatalyst1)) { logMsg += "\n\t\tGPU Family Mac Catalyst 1"; }
#if MVK_IOS
if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily5_v1] ) { logMsg += "\n\t\tiOS GPU Family 5 v1"; }
@@ -2267,7 +2519,11 @@
}
VkDeviceSize MVKDevice::getVkFormatTexelBufferAlignment(VkFormat format, MVKBaseObject* mvkObj) {
- VkDeviceSize deviceAlignment = mvkMTLPixelFormatLinearTextureAlignment(getMTLPixelFormatFromVkFormat(format, mvkObj), getMTLDevice());
+ VkDeviceSize deviceAlignment = 0;
+ id<MTLDevice> mtlDev = getMTLDevice();
+ if ([mtlDev respondsToSelector: @selector(minimumLinearTextureAlignmentForPixelFormat:)]) {
+ deviceAlignment = [mtlDev minimumLinearTextureAlignmentForPixelFormat: getMTLPixelFormatFromVkFormat(format, mvkObj)];
+ }
return deviceAlignment ? deviceAlignment : _pProperties->limits.minTexelBufferOffsetAlignment;
}
@@ -2645,139 +2901,6 @@
#pragma mark -
#pragma mark Support functions
-uint64_t mvkRecommendedMaxWorkingSetSize(id<MTLDevice> mtlDevice) {
-
-#if MVK_MACOS
- if ( [mtlDevice respondsToSelector: @selector(recommendedMaxWorkingSetSize)]) {
- return mtlDevice.recommendedMaxWorkingSetSize;
- }
-#endif
-#if MVK_IOS
- // GPU and CPU use shared memory. Estimate the current free memory in the system.
- mach_port_t host_port;
- mach_msg_type_number_t host_size;
- vm_size_t pagesize;
- host_port = mach_host_self();
- host_size = sizeof(vm_statistics_data_t) / sizeof(integer_t);
- host_page_size(host_port, &pagesize);
- vm_statistics_data_t vm_stat;
- if (host_statistics(host_port, HOST_VM_INFO, (host_info_t)&vm_stat, &host_size) == KERN_SUCCESS ) {
- return vm_stat.free_count * pagesize;
- }
-#endif
-
- return 128 * MEBI; // Conservative minimum for macOS GPU's & iOS shared memory
-}
-
-#if MVK_MACOS
-
-static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef propertyName) {
-
- uint32_t value = 0;
-
- CFTypeRef cfProp = IORegistryEntrySearchCFProperty(entry,
- kIOServicePlane,
- propertyName,
- kCFAllocatorDefault,
- kIORegistryIterateRecursively |
- kIORegistryIterateParents);
- if (cfProp) {
- const uint32_t* pValue = reinterpret_cast<const uint32_t*>(CFDataGetBytePtr((CFDataRef)cfProp));
- if (pValue) { value = *pValue; }
- CFRelease(cfProp);
- }
-
- return value;
-}
-
-void mvkPopulateGPUInfo(VkPhysicalDeviceProperties& devProps, id<MTLDevice> mtlDevice) {
-
- static const uint32_t kIntelVendorId = 0x8086;
- bool isFound = false;
-
- bool isIntegrated = mtlDevice.isLowPower;
- devProps.deviceType = isIntegrated ? VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU : VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
- strlcpy(devProps.deviceName, mtlDevice.name.UTF8String, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE);
-
- // If the device has an associated registry ID, we can use that to get the associated IOKit node.
- // The match dictionary is consumed by IOServiceGetMatchingServices and does not need to be released.
- io_registry_entry_t entry;
- uint64_t regID = mvkGetRegistryID(mtlDevice);
- if (regID) {
- entry = IOServiceGetMatchingService(kIOMasterPortDefault, IORegistryEntryIDMatching(regID));
- if (entry) {
- // That returned the IOGraphicsAccelerator nub. Its parent, then, is the actual
- // PCI device.
- io_registry_entry_t parent;
- if (IORegistryEntryGetParentEntry(entry, kIOServicePlane, &parent) == kIOReturnSuccess) {
- isFound = true;
- devProps.vendorID = mvkGetEntryProperty(parent, CFSTR("vendor-id"));
- devProps.deviceID = mvkGetEntryProperty(parent, CFSTR("device-id"));
- IOObjectRelease(parent);
- }
- IOObjectRelease(entry);
- }
- }
- // Iterate all GPU's, looking for a match.
- // The match dictionary is consumed by IOServiceGetMatchingServices and does not need to be released.
- io_iterator_t entryIterator;
- if (!isFound && IOServiceGetMatchingServices(kIOMasterPortDefault,
- IOServiceMatching("IOPCIDevice"),
- &entryIterator) == kIOReturnSuccess) {
- while ( !isFound && (entry = IOIteratorNext(entryIterator)) ) {
- if (mvkGetEntryProperty(entry, CFSTR("class-code")) == 0x30000) { // 0x30000 : DISPLAY_VGA
-
- // The Intel GPU will always be marked as integrated.
- // Return on a match of either Intel && low power, or non-Intel and non-low-power.
- uint32_t vendorID = mvkGetEntryProperty(entry, CFSTR("vendor-id"));
- if ( (vendorID == kIntelVendorId) == isIntegrated) {
- isFound = true;
- devProps.vendorID = vendorID;
- devProps.deviceID = mvkGetEntryProperty(entry, CFSTR("device-id"));
- }
- }
- }
- IOObjectRelease(entryIterator);
- }
-}
-
-#endif //MVK_MACOS
-
-#if MVK_IOS
-
-void mvkPopulateGPUInfo(VkPhysicalDeviceProperties& devProps, id<MTLDevice> mtlDevice) {
- // For iOS devices, the Device ID is the SoC model (A8, A10X...), in the hex form 0xaMMX, where
- //"a" is the Apple brand, MM is the SoC model number (8, 10...) and X is 1 for X version, 0 for other.
- NSUInteger coreCnt = NSProcessInfo.processInfo.processorCount;
- uint32_t devID = 0xa070;
- if ([mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily5_v1]) {
- devID = 0xa120;
- } else if ([mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily4_v1]) {
- devID = 0xa110;
- } else if ([mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily3_v1]) {
- devID = coreCnt > 2 ? 0xa101 : 0xa100;
- } else if ([mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily2_v1]) {
- devID = coreCnt > 2 ? 0xa081 : 0xa080;
- }
-
- devProps.vendorID = 0x0000106b; // Apple's PCI ID
- devProps.deviceID = devID;
- devProps.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
- strlcpy(devProps.deviceName, mtlDevice.name.UTF8String, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE);
-}
-#endif //MVK_IOS
-
uint64_t mvkGetRegistryID(id<MTLDevice> mtlDevice) {
return [mtlDevice respondsToSelector: @selector(registryID)] ? mtlDevice.registryID : 0;
}
-
-VkDeviceSize mvkMTLPixelFormatLinearTextureAlignment(MTLPixelFormat mtlPixelFormat,
- id<MTLDevice> mtlDevice) {
- if ([mtlDevice respondsToSelector: @selector(minimumLinearTextureAlignmentForPixelFormat:)]) {
- return [mtlDevice minimumLinearTextureAlignmentForPixelFormat: mtlPixelFormat];
- } else {
- return 0;
- }
-}
-
-
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
index 557c51c..d2ba33f 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
@@ -104,6 +104,9 @@
/** Returns the Metal buffer underlying this memory allocation. */
inline id<MTLBuffer> getMTLBuffer() { return _mtlBuffer; }
+ /** Returns the Metal heap underlying this memory allocation. */
+ inline id<MTLHeap> getMTLHeap() { return _mtlHeap; }
+
/** Returns the Metal storage mode used by this memory allocation. */
inline MTLStorageMode getMTLStorageMode() { return _mtlStorageMode; }
@@ -133,6 +136,7 @@
void removeBuffer(MVKBuffer* mvkBuff);
VkResult addImage(MVKImage* mvkImg);
void removeImage(MVKImage* mvkImg);
+ bool ensureMTLHeap();
bool ensureMTLBuffer();
bool ensureHostMemory();
void freeHostMemory();
@@ -145,6 +149,7 @@
VkDeviceSize _mapOffset = 0;
VkDeviceSize _mapSize = 0;
id<MTLBuffer> _mtlBuffer = nil;
+ id<MTLHeap> _mtlHeap = nil;
void* _pMemory = nullptr;
void* _pHostMemory = nullptr;
bool _isMapped = false;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
index 5d5dd09..c191737 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
@@ -32,7 +32,10 @@
#pragma mark MVKDeviceMemory
-void MVKDeviceMemory::propogateDebugName() { setLabelIfNotNil(_mtlBuffer, _debugName); }
+void MVKDeviceMemory::propogateDebugName() {
+ setLabelIfNotNil(_mtlHeap, _debugName);
+ setLabelIfNotNil(_mtlBuffer, _debugName);
+}
VkResult MVKDeviceMemory::map(VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void** ppData) {
@@ -86,8 +89,11 @@
}
#endif
- lock_guard<mutex> lock(_rezLock);
- for (auto& img : _images) { img->flushToDevice(offset, memSize); }
+ // If we have an MTLHeap object, there's no need to sync memory manually between images and the buffer.
+ if (!_mtlHeap) {
+ lock_guard<mutex> lock(_rezLock);
+ for (auto& img : _images) { img->flushToDevice(offset, memSize); }
+ }
}
return VK_SUCCESS;
}
@@ -98,7 +104,7 @@
MVKMTLBlitEncoder* pBlitEnc) {
// Coherent memory is flushed on unmap(), so it is only flushed if forced
VkDeviceSize memSize = adjustMemorySize(size, offset);
- if (memSize > 0 && isMemoryHostAccessible() && (evenIfCoherent || !isMemoryHostCoherent()) ) {
+ if (memSize > 0 && isMemoryHostAccessible() && (evenIfCoherent || !isMemoryHostCoherent()) && !_mtlHeap) {
lock_guard<mutex> lock(_rezLock);
for (auto& img : _images) { img->pullFromDevice(offset, memSize); }
@@ -153,8 +159,7 @@
return reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not bind VkImage %p to a VkDeviceMemory dedicated to resource %p. A dedicated allocation may only be used with the resource it was dedicated to.", mvkImg, getDedicatedResource() );
}
- if (!_isDedicated)
- _images.push_back(mvkImg);
+ if (!_isDedicated) { _images.push_back(mvkImg); }
return VK_SUCCESS;
}
@@ -164,6 +169,36 @@
mvkRemoveAllOccurances(_images, mvkImg);
}
+// Ensures that this instance is backed by a MTLHeap object,
+// creating the MTLHeap if needed, and returns whether it was successful.
+bool MVKDeviceMemory::ensureMTLHeap() {
+
+ if (_mtlHeap) { return true; }
+
+ // Don't bother if we don't have placement heaps.
+ if (!getDevice()->_pMetalFeatures->placementHeaps) { return true; }
+
+#if MVK_MACOS
+ // MTLHeaps on Mac must use private storage for now.
+ if (_mtlStorageMode != MTLStorageModePrivate) { return true; }
+#endif
+
+ MTLHeapDescriptor* heapDesc = [MTLHeapDescriptor new];
+ heapDesc.type = MTLHeapTypePlacement;
+ heapDesc.resourceOptions = getMTLResourceOptions();
+ // For now, use tracked resources. Later, we should probably default
+ // to untracked, since Vulkan uses explicit barriers anyway.
+ heapDesc.hazardTrackingMode = MTLHazardTrackingModeTracked;
+ heapDesc.size = _allocationSize;
+ _mtlHeap = [_device->getMTLDevice() newHeapWithDescriptor: heapDesc]; // retained
+ [heapDesc release];
+ if (!_mtlHeap) { return false; }
+
+ propogateDebugName();
+
+ return true;
+}
+
// Ensures that this instance is backed by a MTLBuffer object,
// creating the MTLBuffer if needed, and returns whether it was successful.
bool MVKDeviceMemory::ensureMTLBuffer() {
@@ -175,12 +210,20 @@
if (memLen > _device->_pMetalFeatures->maxMTLBufferSize) { return false; }
// If host memory was already allocated, it is copied into the new MTLBuffer, and then released.
- if (_pHostMemory) {
+ if (_mtlHeap) {
+ _mtlBuffer = [_mtlHeap newBufferWithLength: memLen options: getMTLResourceOptions() offset: 0]; // retained
+ if (_pHostMemory) {
+ memcpy(_mtlBuffer.contents, _pHostMemory, memLen);
+ freeHostMemory();
+ }
+ [_mtlBuffer makeAliasable];
+ } else if (_pHostMemory) {
_mtlBuffer = [getMTLDevice() newBufferWithBytes: _pHostMemory length: memLen options: getMTLResourceOptions()]; // retained
freeHostMemory();
} else {
_mtlBuffer = [getMTLDevice() newBufferWithLength: memLen options: getMTLResourceOptions()]; // retained
}
+ if (!_mtlBuffer) { return false; }
_pMemory = isMemoryHostAccessible() ? _mtlBuffer.contents : nullptr;
propogateDebugName();
@@ -267,6 +310,15 @@
return;
}
+ // If we can, create a MTLHeap. This should happen before creating the buffer
+ // allowing us to map its contents.
+ if (!dedicatedImage && !dedicatedBuffer) {
+ if (!ensureMTLHeap()) {
+ setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not allocate VkDeviceMemory of size %llu bytes.", _allocationSize));
+ return;
+ }
+ }
+
// If memory needs to be coherent it must reside in an MTLBuffer, since an open-ended map() must work.
if (isMemoryHostCoherent() && !ensureMTLBuffer() ) {
setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "Could not allocate a host-coherent VkDeviceMemory of size %llu bytes. The maximum memory-aligned size of a host-coherent VkDeviceMemory is %llu bytes.", _allocationSize, _device->_pMetalFeatures->maxMTLBufferSize));
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
index 616b124..f210a13 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
@@ -275,6 +275,7 @@
bool _usesTexelBuffer;
bool _isLinear;
bool _is3DCompressed;
+ bool _isAliasable;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index e2c827a..827754a 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -197,10 +197,8 @@
switch (next->sType) {
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
auto* dedicatedReqs = (VkMemoryDedicatedRequirements*)next;
- // TODO: Maybe someday we could do something with MTLHeaps
- // and allocate non-dedicated memory from them. For now, we
- // always prefer dedicated allocations for non-buffer-backed images.
- dedicatedReqs->prefersDedicatedAllocation = !_usesTexelBuffer;
+ bool writable = mvkIsAnyFlagEnabled(_usage, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
+ dedicatedReqs->prefersDedicatedAllocation = !_usesTexelBuffer && (writable || !_device->_pMetalFeatures->placementHeaps);
dedicatedReqs->requiresDedicatedAllocation = VK_FALSE;
break;
}
@@ -229,7 +227,7 @@
bool isUncompressed = blockExt.width == 1 && blockExt.height == 1;
bool useTexelBuffer = _device->_pMetalFeatures->texelBuffers; // Texel buffers available
- useTexelBuffer = useTexelBuffer && isMemoryHostAccessible() && _isLinear && isUncompressed; // Applicable memory layout
+ useTexelBuffer = useTexelBuffer && (isMemoryHostAccessible() || _device->_pMetalFeatures->placementHeaps) && _isLinear && isUncompressed; // Applicable memory layout
useTexelBuffer = useTexelBuffer && _deviceMemory && _deviceMemory->_mtlBuffer; // Buffer is available to overlay
#if MVK_MACOS
@@ -350,6 +348,10 @@
mtlTex = [_deviceMemory->_mtlBuffer newTextureWithDescriptor: mtlTexDesc
offset: getDeviceMemoryOffset()
bytesPerRow: _subresources[0].layout.rowPitch];
+ } else if (_deviceMemory->_mtlHeap) {
+ mtlTex = [_deviceMemory->_mtlHeap newTextureWithDescriptor: mtlTexDesc
+ offset: getDeviceMemoryOffset()];
+ if (_isAliasable) [mtlTex makeAliasable];
} else {
mtlTex = [getMTLDevice() newTextureWithDescriptor: mtlTexDesc];
}
@@ -448,7 +450,7 @@
MTLTextureDescriptor* mtlTexDesc = [MTLTextureDescriptor new]; // retained
#if MVK_MACOS
if (_is3DCompressed) {
- // Metal doesn't yet support 3D compressed textures, so we'll decompress
+ // Metal before 3.0 doesn't support 3D compressed textures, so we'll decompress
// the texture ourselves. This, then, is the *uncompressed* format.
mtlTexDesc.pixelFormat = MTLPixelFormatBGRA8Unorm;
} else {
@@ -620,17 +622,26 @@
_mtlTextureType = mvkMTLTextureTypeFromVkImageType(pCreateInfo->imageType, _arrayLayers, _samples > VK_SAMPLE_COUNT_1_BIT);
_usage = pCreateInfo->usage;
- _is3DCompressed = (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) && (mvkFormatTypeFromVkFormat(pCreateInfo->format) == kMVKFormatCompressed);
+ _is3DCompressed = (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) && (mvkFormatTypeFromVkFormat(pCreateInfo->format) == kMVKFormatCompressed) && !getDevice()->_pMetalFeatures->native3DCompressedTextures;
_isDepthStencilAttachment = (mvkAreAllFlagsEnabled(pCreateInfo->usage, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ||
mvkAreAllFlagsEnabled(mvkVkFormatProperties(pCreateInfo->format).optimalTilingFeatures, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT));
_canSupportMTLTextureView = !_isDepthStencilAttachment || _device->_pMetalFeatures->stencilViews;
_hasExpectedTexelSize = (mvkMTLPixelFormatBytesPerBlock(_mtlPixelFormat) == mvkVkFormatBytesPerBlock(pCreateInfo->format));
- // Calc _byteCount after _byteAlignment
- _byteAlignment = _isLinear ? _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this) : mvkEnsurePowerOfTwo(mvkVkFormatBytesPerBlock(pCreateInfo->format));
- for (uint32_t mipLvl = 0; mipLvl < _mipLevels; mipLvl++) {
- _byteCount += getBytesPerLayer(mipLvl) * _extent.depth * _arrayLayers;
- }
+ if (!_isLinear && _device->_pMetalFeatures->placementHeaps) {
+ MTLTextureDescriptor *mtlTexDesc = newMTLTextureDescriptor(); // temp retain
+ MTLSizeAndAlign sizeAndAlign = [_device->getMTLDevice() heapTextureSizeAndAlignWithDescriptor: mtlTexDesc];
+ [mtlTexDesc release];
+ _byteCount = sizeAndAlign.size;
+ _byteAlignment = sizeAndAlign.align;
+ _isAliasable = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_ALIAS_BIT);
+ } else {
+ // Calc _byteCount after _byteAlignment
+ _byteAlignment = _isLinear ? _device->getVkFormatTexelBufferAlignment(pCreateInfo->format, this) : mvkEnsurePowerOfTwo(mvkVkFormatBytesPerBlock(pCreateInfo->format));
+ for (uint32_t mipLvl = 0; mipLvl < _mipLevels; mipLvl++) {
+ _byteCount += getBytesPerLayer(mipLvl) * _extent.depth * _arrayLayers;
+ }
+ }
initSubresources(pCreateInfo);
}
@@ -646,8 +657,12 @@
}
#endif
#if MVK_MACOS
- if (isCompressed && !is2D && !mvkCanDecodeFormat(pCreateInfo->format)) {
- setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Under Metal, the %s compressed format may only be used with 2D images.", mvkVkFormatName(pCreateInfo->format)));
+ if (isCompressed && !is2D) {
+ if (pCreateInfo->imageType != VK_IMAGE_TYPE_3D) {
+ setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Under Metal, compressed formats may only be used with 2D or 3D images."));
+ } else if (!getDevice()->_pMetalFeatures->native3DCompressedTextures && !mvkCanDecodeFormat(pCreateInfo->format)) {
+ setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Under Metal, the %s compressed format may only be used with 2D images.", mvkVkFormatName(pCreateInfo->format)));
+ }
}
#endif
@@ -859,15 +874,25 @@
// overlay on the Metal texture of the underlying image.
id<MTLTexture> MVKImageView::newMTLTexture() {
MTLTextureType mtlTextureType = _mtlTextureType;
+ NSRange sliceRange = NSMakeRange(_subresourceRange.baseArrayLayer, _subresourceRange.layerCount);
// Fake support for 2D views of 3D textures.
if (_image->getImageType() == VK_IMAGE_TYPE_3D &&
- (mtlTextureType == MTLTextureType2D || mtlTextureType == MTLTextureType2DArray)) {
+ (mtlTextureType == MTLTextureType2D || mtlTextureType == MTLTextureType2DArray)) {
mtlTextureType = MTLTextureType3D;
- }
- return [_image->getMTLTexture() newTextureViewWithPixelFormat: _mtlPixelFormat
- textureType: mtlTextureType
- levels: NSMakeRange(_subresourceRange.baseMipLevel, _subresourceRange.levelCount)
- slices: NSMakeRange(_subresourceRange.baseArrayLayer, _subresourceRange.layerCount)]; // retained
+ sliceRange = NSMakeRange(0, 1);
+ }
+ if (getDevice()->_pMetalFeatures->nativeTextureSwizzle && _packedSwizzle) {
+ return [_image->getMTLTexture() newTextureViewWithPixelFormat: _mtlPixelFormat
+ textureType: mtlTextureType
+ levels: NSMakeRange(_subresourceRange.baseMipLevel, _subresourceRange.levelCount)
+ slices: sliceRange
+ swizzle: mvkMTLTextureSwizzleChannelsFromVkComponentMapping(mvkUnpackSwizzle(_packedSwizzle))]; // retained
+ } else {
+ return [_image->getMTLTexture() newTextureViewWithPixelFormat: _mtlPixelFormat
+ textureType: mtlTextureType
+ levels: NSMakeRange(_subresourceRange.baseMipLevel, _subresourceRange.levelCount)
+ slices: sliceRange]; // retained
+ }
}
@@ -908,12 +933,12 @@
_subresourceRange.layerCount = _image ? (_image->getLayerCount() - _subresourceRange.baseArrayLayer) : 1;
}
- bool useShaderSwizzle;
+ bool useSwizzle;
bool isMultisample = _image ? _image->getSampleCount() != VK_SAMPLE_COUNT_1_BIT : false;
_mtlTexture = nil;
- _mtlPixelFormat = getSwizzledMTLPixelFormat(pCreateInfo->format, pCreateInfo->components, useShaderSwizzle,
+ _mtlPixelFormat = getSwizzledMTLPixelFormat(pCreateInfo->format, pCreateInfo->components, useSwizzle,
(_device ? _device->_pMVKConfig : pAltMVKConfig));
- _packedSwizzle = useShaderSwizzle ? mvkPackSwizzle(pCreateInfo->components) : 0;
+ _packedSwizzle = useSwizzle ? mvkPackSwizzle(pCreateInfo->components) : 0;
_mtlTextureType = mvkMTLTextureTypeFromVkImageViewType(pCreateInfo->viewType, isMultisample);
initMTLTextureViewSupport();
@@ -944,20 +969,22 @@
// Returns a MTLPixelFormat, based on the MTLPixelFormat converted from the VkFormat, but possibly
// modified by the swizzles defined in the VkComponentMapping of the VkImageViewCreateInfo.
-// Metal does not support general per-texture swizzles, so if the swizzle is not an identity swizzle, this
-// function attempts to find an alternate MTLPixelFormat that coincidentally matches the swizzled format.
-// If a replacement MTLFormat was found, it is returned and useShaderSwizzle is set to false.
+// Metal prior to version 3.0 does not support general per-texture swizzles, so if the swizzle is not an
+// identity swizzle, this function attempts to find an alternate MTLPixelFormat that coincidentally
+// matches the swizzled format.
+// If a replacement MTLFormat was found, it is returned and useSwizzle is set to false.
// If a replacement MTLFormat could not be found, the original MTLPixelFormat is returned, and the
-// useShaderSwizzle is set to true, indicating that shader swizzling should be used for this image view.
+// useSwizzle is set to true, indicating that either native or shader swizzling should be used for
+// this image view.
// The config is used to test whether full shader swizzle support is available, and to report an error if not.
MTLPixelFormat MVKImageView::getSwizzledMTLPixelFormat(VkFormat format,
VkComponentMapping components,
- bool& useShaderSwizzle,
+ bool& useSwizzle,
const MVKConfiguration* pMVKConfig) {
// Attempt to find a valid format transformation swizzle first.
MTLPixelFormat mtlPF = getMTLPixelFormatFromVkFormat(format);
- useShaderSwizzle = false;
+ useSwizzle = false;
#define SWIZZLE_MATCHES(R, G, B, A) mvkVkComponentMappingsMatch(components, {VK_COMPONENT_SWIZZLE_ ##R, VK_COMPONENT_SWIZZLE_ ##G, VK_COMPONENT_SWIZZLE_ ##B, VK_COMPONENT_SWIZZLE_ ##A} )
#define VK_COMPONENT_SWIZZLE_ANY VK_COMPONENT_SWIZZLE_MAX_ENUM
@@ -1027,9 +1054,9 @@
// No format transformation swizzles were found, so unless we have an identity swizzle, we'll need to use shader swizzling.
if ( !SWIZZLE_MATCHES(R, G, B, A)) {
- useShaderSwizzle = true;
+ useSwizzle = true;
- if ( !pMVKConfig->fullImageViewSwizzle ) {
+ if ( !pMVKConfig->fullImageViewSwizzle && !getDevice()->_pMetalFeatures->nativeTextureSwizzle ) {
const char* vkCmd = _image ? "vkCreateImageView(VkImageViewCreateInfo" : "vkGetPhysicalDeviceImageFormatProperties2KHR(VkPhysicalDeviceImageViewSupportEXTX";
const char* errMsg = ("The value of %s::components) (%s, %s, %s, %s), when applied to a VkImageView, requires full component swizzling to be enabled both at the"
" time when the VkImageView is created and at the time any pipeline that uses that VkImageView is compiled. Full component swizzling can"
@@ -1061,12 +1088,8 @@
(_mtlTextureType == _image->_mtlTextureType ||
((_mtlTextureType == MTLTextureType2D || _mtlTextureType == MTLTextureType2DArray) && is3D)) &&
_subresourceRange.levelCount == _image->_mipLevels &&
- _subresourceRange.layerCount == (is3D ? _image->_extent.depth : _image->_arrayLayers)) {
- _useMTLTextureView = false;
- }
-
- // Never use views for subsets of 3D textures. Metal doesn't support them yet.
- if (is3D && _subresourceRange.layerCount != _image->_extent.depth) {
+ (is3D || _subresourceRange.layerCount == _image->_arrayLayers) &&
+ (!getDevice()->_pMetalFeatures->nativeTextureSwizzle || !_packedSwizzle)) {
_useMTLTextureView = false;
}
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
index bdbb862..c5a7ae8 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
@@ -628,6 +628,7 @@
ADD_DVC_EXT2_ENTRY_POINT(vkGetDeviceGroupSurfacePresentModesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
ADD_DVC_EXT2_ENTRY_POINT(vkGetPhysicalDevicePresentRectanglesKHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
ADD_DVC_EXT2_ENTRY_POINT(vkAcquireNextImage2KHR, KHR_SWAPCHAIN, KHR_DEVICE_GROUP);
+ ADD_DVC_EXT_ENTRY_POINT(vkSetHdrMetadataEXT, EXT_HDR_METADATA);
ADD_DVC_EXT_ENTRY_POINT(vkResetQueryPoolEXT, EXT_HOST_QUERY_RESET);
ADD_DVC_EXT_ENTRY_POINT(vkDebugMarkerSetObjectTagEXT, EXT_DEBUG_MARKER);
ADD_DVC_EXT_ENTRY_POINT(vkDebugMarkerSetObjectNameEXT, EXT_DEBUG_MARKER);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index 1d1ae2d..cd0a2dc 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -1159,7 +1159,7 @@
shaderContext.options.mslOptions.enable_point_size_builtin = isRenderingPoints(pCreateInfo, reflectData);
shaderContext.options.shouldFlipVertexY = _device->_pMVKConfig->shaderConversionFlipVertexY;
- shaderContext.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle;
+ shaderContext.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle && !getDevice()->_pMetalFeatures->nativeTextureSwizzle;
shaderContext.options.mslOptions.tess_domain_origin_lower_left = pTessDomainOriginState && pTessDomainOriginState->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT;
shaderContext.options.tessPatchKind = reflectData.patchKind;
@@ -1346,7 +1346,7 @@
shaderContext.options.entryPointStage = spv::ExecutionModelGLCompute;
shaderContext.options.mslOptions.msl_version = _device->_pMetalFeatures->mslVersion;
shaderContext.options.mslOptions.texel_buffer_texture_width = _device->_pMetalFeatures->maxTextureDimension;
- shaderContext.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle;
+ shaderContext.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle && !getDevice()->_pMetalFeatures->nativeTextureSwizzle;
shaderContext.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers;
shaderContext.options.mslOptions.dispatch_base = _allowsDispatchBase;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
index 6211f0d..107d42a 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
@@ -80,6 +80,9 @@
/** Returns the specified performance stats structure. */
const MVKSwapchainPerformance* getPerformanceStatistics() { return &_performanceStatistics; }
+ /** Adds HDR metadata to this swapchain. */
+ void setHDRMetadataEXT(const VkHdrMetadataEXT& metadata);
+
/**
* Registers a semaphore and/or fence that will be signaled when the image at the given index becomes available.
* This function accepts both a semaphore and a fence, and either none, one, or both may be provided.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
index a4716f0..0c396e3 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
@@ -30,6 +30,8 @@
#import "CAMetalLayer+MoltenVK.h"
#import "MVKBlockObserver.h"
+#include <libkern/OSByteOrder.h>
+
using namespace std;
@@ -276,6 +278,70 @@
}
}
+#if MVK_MACOS
+struct CIE1931XY {
+ uint16_t x;
+ uint16_t y;
+} __attribute__((packed));
+
+// According to D.3.28:
+// "[x and y] specify the normalized x and y chromaticity coordinates, respectively...
+// in normalized increments of 0.00002."
+static inline uint16_t FloatToCIE1931Unorm(float x) { return OSSwapHostToBigInt16((uint16_t)(x * 100000 / 2)); }
+static inline CIE1931XY VkXYColorEXTToCIE1931XY(VkXYColorEXT xy) {
+ return { FloatToCIE1931Unorm(xy.x), FloatToCIE1931Unorm(xy.y) };
+}
+#endif
+
+void MVKSwapchain::setHDRMetadataEXT(const VkHdrMetadataEXT& metadata) {
+#if MVK_MACOS
+ // We were given metadata as floats, but CA wants it as specified in H.265.
+ // More specifically, it wants "Mastering display colour volume" (D.2.28) and
+ // "Content light level information" (D.2.35) SEI messages, with big-endian
+ // integers. We have to convert.
+ struct ColorVolumeSEI {
+ CIE1931XY display_primaries[3]; // Green, blue, red
+ CIE1931XY white_point;
+ uint32_t max_display_mastering_luminance;
+ uint32_t min_display_mastering_luminance;
+ } __attribute__((packed));
+ struct LightLevelSEI {
+ uint16_t max_content_light_level;
+ uint16_t max_pic_average_light_level;
+ } __attribute__((packed));
+ ColorVolumeSEI colorVol;
+ LightLevelSEI lightLevel;
+ // According to D.3.28:
+ // "For describing mastering displays that use red, green, and blue colour
+ // primaries, it is suggested that index value c equal to 0 should correspond
+ // to the green primary, c equal to 1 should correspond to the blue primary
+ // and c equal to 2 should correspond to the red colour primary."
+ colorVol.display_primaries[0] = VkXYColorEXTToCIE1931XY(metadata.displayPrimaryGreen);
+ colorVol.display_primaries[1] = VkXYColorEXTToCIE1931XY(metadata.displayPrimaryBlue);
+ colorVol.display_primaries[2] = VkXYColorEXTToCIE1931XY(metadata.displayPrimaryRed);
+ colorVol.white_point = VkXYColorEXTToCIE1931XY(metadata.whitePoint);
+ // Later in D.3.28:
+ // "max_display_mastering_luminance and min_display_mastering_luminance specify
+ // the nominal maximum and minimum display luminance, respectively, of the mastering
+ // display in units of 0.0001 candelas [sic] per square metre."
+ // N.B. 1 nit = 1 cd/m^2
+ colorVol.max_display_mastering_luminance = OSSwapHostToBigInt32((uint32_t)(metadata.maxLuminance * 10000));
+ colorVol.min_display_mastering_luminance = OSSwapHostToBigInt32((uint32_t)(metadata.minLuminance * 10000));
+ lightLevel.max_content_light_level = OSSwapHostToBigInt16((uint16_t)metadata.maxContentLightLevel);
+ lightLevel.max_pic_average_light_level = OSSwapHostToBigInt16((uint16_t)metadata.maxFrameAverageLightLevel);
+ NSData* colorVolData = [NSData dataWithBytes: &colorVol length: sizeof(colorVol)];
+ NSData* lightLevelData = [NSData dataWithBytes: &lightLevel length: sizeof(lightLevel)];
+ CAEDRMetadata* caMetadata = [CAEDRMetadata HDR10MetadataWithDisplayInfo: colorVolData
+ contentInfo: lightLevelData
+ opticalOutputScale: 1];
+ _mtlLayer.EDRMetadata = caMetadata;
+ [caMetadata release];
+ [colorVolData release];
+ [lightLevelData release];
+ _mtlLayer.wantsExtendedDynamicRangeContent = YES;
+#endif
+}
+
#pragma mark Metal
@@ -342,41 +408,59 @@
if (pCreateInfo->compositeAlpha != VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) {
_mtlLayer.opaque = pCreateInfo->compositeAlpha == VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
}
-#if MVK_MACOS
+
switch (pCreateInfo->imageColorSpace) {
case VK_COLOR_SPACE_SRGB_NONLINEAR_KHR:
_mtlLayer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB);
break;
case VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT:
_mtlLayer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceDisplayP3);
+ _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
break;
case VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT:
_mtlLayer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearSRGB);
+ _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+ break;
+ case VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT:
+ _mtlLayer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedSRGB);
+ _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+ break;
+ case VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT:
+ _mtlLayer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearDisplayP3);
+ _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
break;
case VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT:
_mtlLayer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceDCIP3);
+ _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
break;
case VK_COLOR_SPACE_BT709_NONLINEAR_EXT:
_mtlLayer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_709);
break;
+ case VK_COLOR_SPACE_BT2020_LINEAR_EXT:
+ _mtlLayer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearITUR_2020);
+ _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+ break;
+ case VK_COLOR_SPACE_HDR10_ST2084_EXT:
+ _mtlLayer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_2020_PQ_EOTF);
+ _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+ break;
+ case VK_COLOR_SPACE_HDR10_HLG_EXT:
+ _mtlLayer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_2020_HLG);
+ _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES;
+ break;
case VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT:
_mtlLayer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceAdobeRGB1998);
break;
- case VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT:
- _mtlLayer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedSRGB);
- break;
case VK_COLOR_SPACE_PASS_THROUGH_EXT:
default:
// Nothing - the default is not to do color matching.
break;
}
-#endif
_mtlLayerOrigDrawSize = _mtlLayer.updatedDrawableSizeMVK;
// TODO: set additional CAMetalLayer properties before extracting drawables:
// - presentsWithTransaction
// - drawsAsynchronously
- // - wantsExtendedDynamicRangeContent (macOS only)
if ( [_mtlLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]] ) {
// Sometimes, the owning view can replace its CAMetalLayer. In that case, the client
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
index 52543e8..29a821d 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
@@ -68,6 +68,7 @@
MVK_EXTENSION(EXT_debug_report, EXT_DEBUG_REPORT, MVK_EXTENSION_INSTANCE)
MVK_EXTENSION(EXT_debug_utils, EXT_DEBUG_UTILS, MVK_EXTENSION_INSTANCE)
MVK_EXTENSION(EXT_fragment_shader_interlock, EXT_FRAGMENT_SHADER_INTERLOCK, MVK_EXTENSION_DEVICE)
+MVK_EXTENSION(EXT_hdr_metadata, EXT_HDR_METADATA, MVK_EXTENSION_DEVICE)
MVK_EXTENSION(EXT_host_query_reset, EXT_HOST_QUERY_RESET, MVK_EXTENSION_DEVICE)
MVK_EXTENSION(EXT_memory_budget, EXT_MEMORY_BUDGET, MVK_EXTENSION_DEVICE)
MVK_EXTENSION(EXT_metal_surface, EXT_METAL_SURFACE, MVK_EXTENSION_INSTANCE)
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
index 7d069b8..31d4a69 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
@@ -47,6 +47,9 @@
// Returns whether the specified properties are valid for this platform
static bool mvkIsSupportedOnPlatform(VkExtensionProperties* pProperties) {
#if MVK_MACOS
+ if (pProperties == &kVkExtProps_EXT_HDR_METADATA) {
+ return mvkOSVersion() >= 10.15;
+ }
if (pProperties == &kVkExtProps_EXT_FRAGMENT_SHADER_INTERLOCK) {
return mvkOSVersion() >= 10.13;
}
@@ -69,6 +72,7 @@
#endif
#if MVK_IOS
if (pProperties == &kVkExtProps_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE) { return false; }
+ if (pProperties == &kVkExtProps_EXT_HDR_METADATA) { return false; }
if (pProperties == &kVkExtProps_EXT_FRAGMENT_SHADER_INTERLOCK) {
return mvkOSVersion() >= 11.0;
}
@@ -81,7 +85,9 @@
if (pProperties == &kVkExtProps_EXT_SHADER_STENCIL_EXPORT) {
return mvkOSVersion() >= 12.0;
}
- if (pProperties == &kVkExtProps_EXT_SWAPCHAIN_COLOR_SPACE) { return false; }
+ if (pProperties == &kVkExtProps_EXT_SWAPCHAIN_COLOR_SPACE) {
+ return mvkOSVersion() >= 9.0;
+ }
if (pProperties == &kVkExtProps_EXT_TEXEL_BUFFER_ALIGNMENT) {
return mvkOSVersion() >= 11.0;
}
diff --git a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h
index 0b762ee..0586545 100644
--- a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h
+++ b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.h
@@ -56,4 +56,12 @@
*/
@property(nonatomic, readwrite) NSUInteger maximumDrawableCountMVK;
+/**
+ * Replacement for the wantsExtendedDynamicRangeContent property.
+ *
+ * This property allows support under all OS versions. Delegates to the wantsExtendedDynamicRangeContent
+ * property if it is available. Otherwise, returns NO when read and does nothing when set.
+ */
+@property(nonatomic, readwrite) BOOL wantsExtendedDynamicRangeContentMVK;
+
@end
diff --git a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.m b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.m
index 6316b91..323efaf 100644
--- a/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.m
+++ b/MoltenVK/MoltenVK/OS/CAMetalLayer+MoltenVK.m
@@ -62,4 +62,18 @@
if ( [self respondsToSelector: @selector(setMaximumDrawableCount:)] ) { self.maximumDrawableCount = count; }
}
+-(BOOL) wantsExtendedDynamicRangeContentMVK {
+#if MVK_MACOS
+ return self.wantsExtendedDynamicRangeContent;
+#else
+ return NO;
+#endif
+}
+
+-(void) setWantsExtendedDynamicRangeContentMVK: (BOOL) edr {
+#if MVK_MACOS
+ self.wantsExtendedDynamicRangeContent = edr;
+#endif
+}
+
@end
diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
index 8fce1c7..950ffb5 100644
--- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
+++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
@@ -932,6 +932,25 @@
return VkSampleCountFlagBits(sampleCount);
}
+MVK_PUBLIC_SYMBOL MTLTextureSwizzle mvkMTLTextureSwizzleFromVkComponentSwizzle(VkComponentSwizzle vkSwizzle) {
+ switch (vkSwizzle) {
+ case VK_COMPONENT_SWIZZLE_ZERO: return MTLTextureSwizzleZero;
+ case VK_COMPONENT_SWIZZLE_ONE: return MTLTextureSwizzleOne;
+ case VK_COMPONENT_SWIZZLE_R: return MTLTextureSwizzleRed;
+ case VK_COMPONENT_SWIZZLE_G: return MTLTextureSwizzleGreen;
+ case VK_COMPONENT_SWIZZLE_B: return MTLTextureSwizzleBlue;
+ case VK_COMPONENT_SWIZZLE_A: return MTLTextureSwizzleAlpha;
+ default: return MTLTextureSwizzleRed;
+ }
+}
+
+MVK_PUBLIC_SYMBOL MTLTextureSwizzleChannels mvkMTLTextureSwizzleChannelsFromVkComponentMapping(VkComponentMapping vkMapping) {
+#define convert(v, d) \
+ v == VK_COMPONENT_SWIZZLE_IDENTITY ? MTLTextureSwizzle##d : mvkMTLTextureSwizzleFromVkComponentSwizzle(v)
+ return MTLTextureSwizzleChannelsMake(convert(vkMapping.r, Red), convert(vkMapping.g, Green), convert(vkMapping.b, Blue), convert(vkMapping.a, Alpha));
+#undef convert
+}
+
#pragma mark Mipmaps
diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
index ec368a6..0170dca 100644
--- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm
+++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
@@ -2651,6 +2651,24 @@
#pragma mark -
+#pragma mark VK_EXT_hdr_metadata extension
+
+MVK_PUBLIC_SYMBOL void vkSetHdrMetadataEXT(
+ VkDevice device,
+ uint32_t swapchainCount,
+ const VkSwapchainKHR* pSwapchains,
+ const VkHdrMetadataEXT* pMetadata) {
+
+ MVKTraceVulkanCallStart();
+ for (uint32_t i = 0; i < swapchainCount; i++) {
+ auto* mvkSwpChn = (MVKSwapchain*)pSwapchains[i];
+ mvkSwpChn->setHDRMetadataEXT(pMetadata[i]);
+ }
+ MVKTraceVulkanCallEnd();
+}
+
+
+#pragma mark -
#pragma mark VK_EXT_host_query_reset extension
MVK_PUBLIC_SYMBOL void vkResetQueryPoolEXT(
diff --git a/Scripts/create_dylib.sh b/Scripts/create_dylib.sh
index 6731d13..6fbc7b6 100755
--- a/Scripts/create_dylib.sh
+++ b/Scripts/create_dylib.sh
@@ -35,7 +35,7 @@
${MVK_LINK_WARN} \
-isysroot ${SDK_DIR} \
-iframework ${MVK_SYS_FWK_DIR} \
--framework Metal ${MVK_IOSURFACE_FWK} -framework ${MVK_UX_FWK} -framework QuartzCore -framework IOKit -framework Foundation \
+-framework Metal ${MVK_IOSURFACE_FWK} -framework ${MVK_UX_FWK} -framework QuartzCore -framework CoreGraphics -framework IOKit -framework Foundation \
--library-directory ${MVK_USR_LIB_DIR} \
-o "${BUILT_PRODUCTS_DIR}/dynamic/${MVK_DYLIB_NAME}" \
-force_load "${BUILT_PRODUCTS_DIR}/lib${PRODUCT_NAME}.a"