blob: 763e639993e394d82ff82b348e9a599c520b44d8 [file] [log] [blame]
/*
* MVKDevice.mm
*
* Copyright (c) 2015-2022 The Brenwill Workshop Ltd. (http://www.brenwill.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "MVKInstance.h"
#include "MVKDevice.h"
#include "MVKQueue.h"
#include "MVKSurface.h"
#include "MVKBuffer.h"
#include "MVKImage.h"
#include "MVKSwapchain.h"
#include "MVKQueryPool.h"
#include "MVKShaderModule.h"
#include "MVKPipeline.h"
#include "MVKFramebuffer.h"
#include "MVKRenderPass.h"
#include "MVKSync.h"
#include "MVKCommandPool.h"
#include "MVKFoundation.h"
#include "MVKCodec.h"
#include "MVKEnvironment.h"
#include <MoltenVKShaderConverter/SPIRVToMSLConverter.h>
#import "CAMetalLayer+MoltenVK.h"
#include <cmath>
using namespace std;
#if MVK_IOS_OR_TVOS
# include <UIKit/UIKit.h>
# define MVKViewClass UIView
#endif
#if MVK_MACOS
# include <AppKit/AppKit.h>
# define MVKViewClass NSView
#endif
// Mac Catalyst does not support feature sets, so we redefine them to GPU families in MVKDevice.h.
#if MVK_MACCAT
#define supportsMTLFeatureSet(MFS) [_mtlDevice supportsFamily: MTLFeatureSet_ ##MFS]
#else
#define supportsMTLFeatureSet(MFS) [_mtlDevice supportsFeatureSet: MTLFeatureSet_ ##MFS]
#endif
#define supportsMTLGPUFamily(GPUF) ([_mtlDevice respondsToSelector: @selector(supportsFamily:)] && [_mtlDevice supportsFamily: MTLGPUFamily ##GPUF])
// Suppress unused variable warnings to allow us to define these all in one place,
// but use them in platform-conditional code blocks.
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-variable"
static const uint32_t kAMDVendorId = 0x1002;
static const uint32_t kAppleVendorId = 0x106b;
static const uint32_t kIntelVendorId = 0x8086;
static const uint32_t kNVVendorId = 0x10de;
static const uint32_t kAMDRadeonRX5700DeviceId = 0x731f;
static const uint32_t kAMDRadeonRX5500DeviceId = 0x7340;
static const uint32_t kAMDRadeonRX6800DeviceId = 0x73bf;
static const uint32_t kAMDRadeonRX6700DeviceId = 0x73df;
static const VkExtent2D kMetalSamplePositionGridSize = { 1, 1 };
static const VkExtent2D kMetalSamplePositionGridSizeNotSupported = { 0, 0 };
#pragma clang diagnostic pop
#pragma mark -
#pragma mark MVKPhysicalDevice
VkResult MVKPhysicalDevice::getExtensionProperties(const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties) {
return _supportedExtensions.getProperties(pCount, pProperties);
}
void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures* features) {
*features = _features;
}
void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) {
// Create a SSOT for these Vulkan 1.1 features, which can be queried via two mechanisms here.
VkPhysicalDeviceVulkan11Features supportedFeats11 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
.pNext = nullptr,
.storageBuffer16BitAccess = true,
.uniformAndStorageBuffer16BitAccess = true,
.storagePushConstant16 = true,
.storageInputOutput16 = true,
.multiview = true,
.multiviewGeometryShader = false,
.multiviewTessellationShader = false, // FIXME
.variablePointersStorageBuffer = true,
.variablePointers = true,
.protectedMemory = false,
.samplerYcbcrConversion = true,
.shaderDrawParameters = true,
};
// Create a SSOT for these Vulkan 1.2 features, which can be queried via two mechanisms here.
VkPhysicalDeviceVulkan12Features supportedFeats12 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
.pNext = nullptr,
.samplerMirrorClampToEdge = _metalFeatures.samplerMirrorClampToEdge,
.drawIndirectCount = false, // VK_KHR_draw_indirect_count
.storageBuffer8BitAccess = true,
.uniformAndStorageBuffer8BitAccess = true,
.storagePushConstant8 = true,
.shaderBufferInt64Atomics = false, // VK_KHR_shader_atomic_int64
.shaderSharedInt64Atomics = false, // VK_KHR_shader_atomic_int64
.shaderFloat16 = true,
.shaderInt8 = true,
.descriptorIndexing = false, // Requires _metalFeatures.arrayOfTextures && _metalFeatures.arrayOfSamplers && shaderStorageBufferArrayNonUniformIndexing
.shaderInputAttachmentArrayDynamicIndexing = _metalFeatures.arrayOfTextures,
.shaderUniformTexelBufferArrayDynamicIndexing = _metalFeatures.arrayOfTextures,
.shaderStorageTexelBufferArrayDynamicIndexing = _metalFeatures.arrayOfTextures,
.shaderUniformBufferArrayNonUniformIndexing = false,
.shaderSampledImageArrayNonUniformIndexing = _metalFeatures.arrayOfTextures && _metalFeatures.arrayOfSamplers,
.shaderStorageBufferArrayNonUniformIndexing = false,
.shaderStorageImageArrayNonUniformIndexing = _metalFeatures.arrayOfTextures,
.shaderInputAttachmentArrayNonUniformIndexing = _metalFeatures.arrayOfTextures,
.shaderUniformTexelBufferArrayNonUniformIndexing = _metalFeatures.arrayOfTextures,
.shaderStorageTexelBufferArrayNonUniformIndexing = _metalFeatures.arrayOfTextures,
.descriptorBindingUniformBufferUpdateAfterBind = true,
.descriptorBindingSampledImageUpdateAfterBind = true,
.descriptorBindingStorageImageUpdateAfterBind = true,
.descriptorBindingStorageBufferUpdateAfterBind = true,
.descriptorBindingUniformTexelBufferUpdateAfterBind = true,
.descriptorBindingStorageTexelBufferUpdateAfterBind = true,
.descriptorBindingUpdateUnusedWhilePending = true,
.descriptorBindingPartiallyBound = true,
.descriptorBindingVariableDescriptorCount = true,
.runtimeDescriptorArray = true,
.samplerFilterMinmax = false, // VK_EXT_sampler_filter_minmax
.scalarBlockLayout = true,
.imagelessFramebuffer = true,
.uniformBufferStandardLayout = true,
.shaderSubgroupExtendedTypes = _metalFeatures.simdPermute || _metalFeatures.quadPermute,
.separateDepthStencilLayouts = true,
.hostQueryReset = true,
.timelineSemaphore = true,
.bufferDeviceAddress = mvkOSVersionIsAtLeast(12.05, 16.0),
.bufferDeviceAddressCaptureReplay = false,
.bufferDeviceAddressMultiDevice = false,
.vulkanMemoryModel = false, // VK_KHR_vulkan_memory_model
.vulkanMemoryModelDeviceScope = false, // VK_KHR_vulkan_memory_model
.vulkanMemoryModelAvailabilityVisibilityChains = false, // VK_KHR_vulkan_memory_model
.shaderOutputViewportIndex = true,
.shaderOutputLayer = true,
.subgroupBroadcastDynamicId = true,
};
features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
features->features = _features;
for (auto* next = (VkBaseOutStructure*)features->pNext; next; next = next->pNext) {
switch ((uint32_t)next->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
// Copy from supportedFeats11, but keep pNext as is.
auto* pFeats11 = (VkPhysicalDeviceVulkan11Features*)next;
supportedFeats11.pNext = pFeats11->pNext;
*pFeats11 = supportedFeats11;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
// Copy from supportedFeats12, but keep pNext as is.
auto* pFeats12 = (VkPhysicalDeviceVulkan12Features*)next;
supportedFeats12.pNext = pFeats12->pNext;
*pFeats12 = supportedFeats12;
break;
}
// For consistency and ease of admin, keep the following list in the same order as in MVKDeviceFeatureStructs.def
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
auto* storageFeatures = (VkPhysicalDevice16BitStorageFeatures*)next;
storageFeatures->storageBuffer16BitAccess = supportedFeats11.storageBuffer16BitAccess;
storageFeatures->uniformAndStorageBuffer16BitAccess = supportedFeats11.uniformAndStorageBuffer16BitAccess;
storageFeatures->storagePushConstant16 = supportedFeats11.storagePushConstant16;
storageFeatures->storageInputOutput16 = supportedFeats11.storageInputOutput16;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES: {
auto* storageFeatures = (VkPhysicalDevice8BitStorageFeatures*)next;
storageFeatures->storageBuffer8BitAccess = supportedFeats12.storagePushConstant8;
storageFeatures->uniformAndStorageBuffer8BitAccess = supportedFeats12.storagePushConstant8;
storageFeatures->storagePushConstant8 = supportedFeats12.storagePushConstant8;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES:
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
auto* bufferDeviceAddressFeatures = (VkPhysicalDeviceBufferDeviceAddressFeatures*)next;
bufferDeviceAddressFeatures->bufferDeviceAddress = supportedFeats12.bufferDeviceAddress;
bufferDeviceAddressFeatures->bufferDeviceAddressCaptureReplay = supportedFeats12.bufferDeviceAddressCaptureReplay;
bufferDeviceAddressFeatures->bufferDeviceAddressMultiDevice = supportedFeats12.bufferDeviceAddressMultiDevice;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES: {
auto* pDescIdxFeatures = (VkPhysicalDeviceDescriptorIndexingFeatures*)next;
pDescIdxFeatures->shaderInputAttachmentArrayDynamicIndexing = supportedFeats12.shaderInputAttachmentArrayDynamicIndexing;
pDescIdxFeatures->shaderUniformTexelBufferArrayDynamicIndexing = supportedFeats12.shaderUniformTexelBufferArrayDynamicIndexing;
pDescIdxFeatures->shaderStorageTexelBufferArrayDynamicIndexing = supportedFeats12.shaderStorageTexelBufferArrayDynamicIndexing;
pDescIdxFeatures->shaderUniformBufferArrayNonUniformIndexing = supportedFeats12.shaderUniformBufferArrayNonUniformIndexing;
pDescIdxFeatures->shaderSampledImageArrayNonUniformIndexing = supportedFeats12.shaderSampledImageArrayNonUniformIndexing;
pDescIdxFeatures->shaderStorageBufferArrayNonUniformIndexing = supportedFeats12.shaderStorageBufferArrayNonUniformIndexing;
pDescIdxFeatures->shaderStorageImageArrayNonUniformIndexing = supportedFeats12.shaderStorageImageArrayNonUniformIndexing;
pDescIdxFeatures->shaderInputAttachmentArrayNonUniformIndexing = supportedFeats12.shaderInputAttachmentArrayNonUniformIndexing;
pDescIdxFeatures->shaderUniformTexelBufferArrayNonUniformIndexing = supportedFeats12.shaderUniformTexelBufferArrayNonUniformIndexing;
pDescIdxFeatures->shaderStorageTexelBufferArrayNonUniformIndexing = supportedFeats12.shaderStorageTexelBufferArrayNonUniformIndexing;
pDescIdxFeatures->descriptorBindingUniformBufferUpdateAfterBind = supportedFeats12.descriptorBindingUniformBufferUpdateAfterBind;
pDescIdxFeatures->descriptorBindingSampledImageUpdateAfterBind = supportedFeats12.descriptorBindingSampledImageUpdateAfterBind;
pDescIdxFeatures->descriptorBindingStorageImageUpdateAfterBind = supportedFeats12.descriptorBindingStorageImageUpdateAfterBind;
pDescIdxFeatures->descriptorBindingStorageBufferUpdateAfterBind = supportedFeats12.descriptorBindingStorageBufferUpdateAfterBind;
pDescIdxFeatures->descriptorBindingUniformTexelBufferUpdateAfterBind = supportedFeats12.descriptorBindingUniformTexelBufferUpdateAfterBind;
pDescIdxFeatures->descriptorBindingStorageTexelBufferUpdateAfterBind = supportedFeats12.descriptorBindingStorageTexelBufferUpdateAfterBind;
pDescIdxFeatures->descriptorBindingUpdateUnusedWhilePending = supportedFeats12.descriptorBindingUpdateUnusedWhilePending;
pDescIdxFeatures->descriptorBindingPartiallyBound = supportedFeats12.descriptorBindingPartiallyBound;
pDescIdxFeatures->descriptorBindingVariableDescriptorCount = supportedFeats12.descriptorBindingVariableDescriptorCount;
pDescIdxFeatures->runtimeDescriptorArray = supportedFeats12.runtimeDescriptorArray;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES: {
auto* dynamicRenderingFeatures = (VkPhysicalDeviceDynamicRenderingFeatures*)next;
dynamicRenderingFeatures->dynamicRendering = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES: {
auto* hostQueryResetFeatures = (VkPhysicalDeviceHostQueryResetFeatures*)next;
hostQueryResetFeatures->hostQueryReset = supportedFeats12.hostQueryReset;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES: {
auto* imagelessFramebufferFeatures = (VkPhysicalDeviceImagelessFramebufferFeatures*)next;
imagelessFramebufferFeatures->imagelessFramebuffer = supportedFeats12.imagelessFramebuffer;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES: {
auto *imageRobustnessFeatures = (VkPhysicalDeviceImageRobustnessFeatures*)next;
imageRobustnessFeatures->robustImageAccess = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES: {
auto* inlineUniformBlockFeatures = (VkPhysicalDeviceInlineUniformBlockFeatures*)next;
inlineUniformBlockFeatures->inlineUniformBlock = true;
inlineUniformBlockFeatures->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
auto* multiviewFeatures = (VkPhysicalDeviceMultiviewFeatures*)next;
multiviewFeatures->multiview = supportedFeats11.multiview;
multiviewFeatures->multiviewGeometryShader = supportedFeats11.multiviewGeometryShader;
multiviewFeatures->multiviewTessellationShader = supportedFeats11.multiviewTessellationShader;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES: {
auto* privateDataFeatures = (VkPhysicalDevicePrivateDataFeatures*)next;
privateDataFeatures->privateData = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
auto* protectedMemFeatures = (VkPhysicalDeviceProtectedMemoryFeatures*)next;
protectedMemFeatures->protectedMemory = supportedFeats11.protectedMemory;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
auto* samplerYcbcrConvFeatures = (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)next;
samplerYcbcrConvFeatures->samplerYcbcrConversion = supportedFeats11.samplerYcbcrConversion;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
auto* scalarLayoutFeatures = (VkPhysicalDeviceScalarBlockLayoutFeatures*)next;
scalarLayoutFeatures->scalarBlockLayout = supportedFeats12.scalarBlockLayout;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES: {
auto* separateDepthStencilLayoutsFeatures = (VkPhysicalDeviceSeparateDepthStencilLayoutsFeatures*)next;
separateDepthStencilLayoutsFeatures->separateDepthStencilLayouts = supportedFeats12.separateDepthStencilLayouts;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
auto* shaderDrawParamsFeatures = (VkPhysicalDeviceShaderDrawParametersFeatures*)next;
shaderDrawParamsFeatures->shaderDrawParameters = supportedFeats11.shaderDrawParameters;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
auto* f16Features = (VkPhysicalDeviceShaderFloat16Int8Features*)next;
f16Features->shaderFloat16 = supportedFeats12.shaderFloat16;
f16Features->shaderInt8 = supportedFeats12.shaderInt8;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
auto* shaderSGTypesFeatures = (VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures*)next;
shaderSGTypesFeatures->shaderSubgroupExtendedTypes = supportedFeats12.shaderSubgroupExtendedTypes;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES: {
auto* subgroupSizeFeatures = (VkPhysicalDeviceSubgroupSizeControlFeatures*)next;
subgroupSizeFeatures->subgroupSizeControl = _metalFeatures.simdPermute || _metalFeatures.quadPermute;
subgroupSizeFeatures->computeFullSubgroups = _metalFeatures.simdPermute || _metalFeatures.quadPermute;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXTURE_COMPRESSION_ASTC_HDR_FEATURES: {
auto* astcHDRFeatures = (VkPhysicalDeviceTextureCompressionASTCHDRFeatures*)next;
astcHDRFeatures->textureCompressionASTC_HDR = _metalFeatures.astcHDRTextures;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
auto* timelineSem4Features = (VkPhysicalDeviceTimelineSemaphoreFeatures*)next;
timelineSem4Features->timelineSemaphore = supportedFeats12.timelineSemaphore;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES: {
auto* uboLayoutFeatures = (VkPhysicalDeviceUniformBufferStandardLayoutFeatures*)next;
uboLayoutFeatures->uniformBufferStandardLayout = supportedFeats12.uniformBufferStandardLayout;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES: {
auto* varPtrFeatures = (VkPhysicalDeviceVariablePointerFeatures*)next;
varPtrFeatures->variablePointersStorageBuffer = supportedFeats11.variablePointersStorageBuffer;
varPtrFeatures->variablePointers = supportedFeats11.variablePointers;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_KHR: {
auto* barycentricFeatures = (VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR*)next;
barycentricFeatures->fragmentShaderBarycentric = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_FEATURES_KHR: {
auto* portabilityFeatures = (VkPhysicalDevicePortabilitySubsetFeaturesKHR*)next;
portabilityFeatures->constantAlphaColorBlendFactors = true;
portabilityFeatures->events = true;
portabilityFeatures->imageViewFormatReinterpretation = true;
portabilityFeatures->imageViewFormatSwizzle = (_metalFeatures.nativeTextureSwizzle ||
mvkConfig().fullImageViewSwizzle);
portabilityFeatures->imageView2DOn3DImage = false;
portabilityFeatures->multisampleArrayImage = _metalFeatures.multisampleArrayTextures;
portabilityFeatures->mutableComparisonSamplers = _metalFeatures.depthSampleCompare;
portabilityFeatures->pointPolygons = false;
portabilityFeatures->samplerMipLodBias = false;
portabilityFeatures->separateStencilMaskRef = true;
portabilityFeatures->shaderSampleRateInterpolationFunctions = _metalFeatures.pullModelInterpolation;
portabilityFeatures->tessellationIsolines = false;
portabilityFeatures->tessellationPointMode = false;
portabilityFeatures->triangleFans = false;
portabilityFeatures->vertexAttributeAccessBeyondStride = true; // Costs additional buffers. Should make configuration switch.
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: {
auto* interlockFeatures = (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT*)next;
interlockFeatures->fragmentShaderSampleInterlock = _metalFeatures.rasterOrderGroups;
interlockFeatures->fragmentShaderPixelInterlock = _metalFeatures.rasterOrderGroups;
interlockFeatures->fragmentShaderShadingRateInterlock = false; // Requires variable rate shading; not supported yet in Metal
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
auto* robustness2Features = (VkPhysicalDeviceRobustness2FeaturesEXT*)next;
robustness2Features->robustBufferAccess2 = false;
robustness2Features->robustImageAccess2 = true;
robustness2Features->nullDescriptor = false;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
auto* texelBuffAlignFeatures = (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT*)next;
texelBuffAlignFeatures->texelBufferAlignment = _metalFeatures.texelBuffers && [_mtlDevice respondsToSelector: @selector(minimumLinearTextureAlignmentForPixelFormat:)];
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
auto* divisorFeatures = (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT*)next;
divisorFeatures->vertexAttributeInstanceRateDivisor = true;
divisorFeatures->vertexAttributeInstanceRateZeroDivisor = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL: {
auto* shaderIntFuncsFeatures = (VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL*)next;
shaderIntFuncsFeatures->shaderIntegerFunctions2 = true;
break;
}
default:
break;
}
}
}
void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties* properties) {
*properties = _properties;
}
void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) {
uint32_t uintMax = std::numeric_limits<uint32_t>::max();
uint32_t maxSamplerCnt = getMaxSamplerCount();
bool isTier2 = isUsingMetalArgumentBuffers() && (_metalFeatures.argumentBuffersTier >= MTLArgumentBuffersTier2);
// Create a SSOT for these Vulkan 1.1 properties, which can be queried via two mechanisms here.
VkPhysicalDeviceVulkan11Properties supportedProps11;
supportedProps11.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES;
supportedProps11.pNext = nullptr;
populateDeviceIDProperties(&supportedProps11);
populateSubgroupProperties(&supportedProps11);
supportedProps11.pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
supportedProps11.maxMultiviewViewCount = 32;
supportedProps11.maxMultiviewInstanceIndex = canUseInstancingForMultiview() ? uintMax / 32 : uintMax;
supportedProps11.protectedNoFault = false;
supportedProps11.maxPerSetDescriptors = 4 * (_metalFeatures.maxPerStageBufferCount +
_metalFeatures.maxPerStageTextureCount +
_metalFeatures.maxPerStageSamplerCount);
supportedProps11.maxMemoryAllocationSize = _metalFeatures.maxMTLBufferSize;
// Create a SSOT for these Vulkan 1.2 properties, which can be queried via two mechanisms here.
VkPhysicalDeviceVulkan12Properties supportedProps12;
supportedProps12.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES;
supportedProps12.pNext = nullptr;
supportedProps12.driverID = VK_DRIVER_ID_MOLTENVK;
strcpy(supportedProps12.driverName, "MoltenVK");
strcpy(supportedProps12.driverInfo, mvkGetMoltenVKVersionString(MVK_VERSION).c_str());
supportedProps12.conformanceVersion.major = 0;
supportedProps12.conformanceVersion.minor = 0;
supportedProps12.conformanceVersion.subminor = 0;
supportedProps12.conformanceVersion.patch = 0;
supportedProps12.denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY; // VK_KHR_shader_float_controls
supportedProps12.roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY; // VK_KHR_shader_float_controls
supportedProps12.shaderSignedZeroInfNanPreserveFloat16 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderSignedZeroInfNanPreserveFloat32 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderSignedZeroInfNanPreserveFloat64 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderDenormPreserveFloat16 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderDenormPreserveFloat32 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderDenormPreserveFloat64 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderDenormFlushToZeroFloat16 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderDenormFlushToZeroFloat32 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderDenormFlushToZeroFloat64 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderRoundingModeRTEFloat16 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderRoundingModeRTEFloat32 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderRoundingModeRTEFloat64 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderRoundingModeRTZFloat16 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderRoundingModeRTZFloat32 = false; // VK_KHR_shader_float_controls
supportedProps12.shaderRoundingModeRTZFloat64 = false; // VK_KHR_shader_float_controls
supportedProps12.maxUpdateAfterBindDescriptorsInAllPools = kMVKUndefinedLargeUInt32;
supportedProps12.shaderUniformBufferArrayNonUniformIndexingNative = false;
supportedProps12.shaderSampledImageArrayNonUniformIndexingNative = _metalFeatures.arrayOfTextures && _metalFeatures.arrayOfSamplers;
supportedProps12.shaderStorageBufferArrayNonUniformIndexingNative = false;
supportedProps12.shaderStorageImageArrayNonUniformIndexingNative = _metalFeatures.arrayOfTextures;
supportedProps12.shaderInputAttachmentArrayNonUniformIndexingNative = _metalFeatures.arrayOfTextures;
supportedProps12.robustBufferAccessUpdateAfterBind = _features.robustBufferAccess;
supportedProps12.quadDivergentImplicitLod = false;
supportedProps12.maxPerStageDescriptorUpdateAfterBindSamplers = isTier2 ? maxSamplerCnt : _properties.limits.maxPerStageDescriptorSamplers;
supportedProps12.maxPerStageDescriptorUpdateAfterBindUniformBuffers = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorUniformBuffers;
supportedProps12.maxPerStageDescriptorUpdateAfterBindStorageBuffers = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageBuffers;
supportedProps12.maxPerStageDescriptorUpdateAfterBindSampledImages = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorSampledImages;
supportedProps12.maxPerStageDescriptorUpdateAfterBindStorageImages = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageImages;
supportedProps12.maxPerStageDescriptorUpdateAfterBindInputAttachments = _properties.limits.maxPerStageDescriptorInputAttachments;
supportedProps12.maxPerStageUpdateAfterBindResources = isTier2 ? 500000 : _properties.limits.maxPerStageResources;
supportedProps12.maxDescriptorSetUpdateAfterBindSamplers = isTier2 ? maxSamplerCnt : _properties.limits.maxDescriptorSetSamplers;
supportedProps12.maxDescriptorSetUpdateAfterBindUniformBuffers = isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffers;
supportedProps12.maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffersDynamic;
supportedProps12.maxDescriptorSetUpdateAfterBindStorageBuffers = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffers;
supportedProps12.maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffersDynamic;
supportedProps12.maxDescriptorSetUpdateAfterBindSampledImages = isTier2 ? 500000 : _properties.limits.maxDescriptorSetSampledImages;
supportedProps12.maxDescriptorSetUpdateAfterBindStorageImages = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageImages;
supportedProps12.maxDescriptorSetUpdateAfterBindInputAttachments = _properties.limits.maxDescriptorSetInputAttachments;
supportedProps12.supportedDepthResolveModes = (_metalFeatures.depthResolve
? VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT
: VK_RESOLVE_MODE_SAMPLE_ZERO_BIT);
supportedProps12.supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; // Metal allows you to set the stencil resolve filter to either Sample0 or the same sample used for depth resolve. This is impossible to express in Vulkan.
supportedProps12.independentResolveNone = true;
supportedProps12.independentResolve = true;
supportedProps12.filterMinmaxSingleComponentFormats = false; // VK_EXT_sampler_filter_minmax;
supportedProps12.filterMinmaxImageComponentMapping = false; // VK_EXT_sampler_filter_minmax;
supportedProps12.maxTimelineSemaphoreValueDifference = std::numeric_limits<uint64_t>::max();
supportedProps12.framebufferIntegerColorSampleCounts = _metalFeatures.supportedSampleCounts;
properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
properties->properties = _properties;
for (auto* next = (VkBaseOutStructure*)properties->pNext; next; next = next->pNext) {
switch ((uint32_t)next->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES: {
// Copy from supportedProps11, but keep pNext as is.
auto* pProps11 = (VkPhysicalDeviceVulkan11Properties*)next;
supportedProps11.pNext = pProps11->pNext;
*pProps11 = supportedProps11;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES: {
// Copy from supportedProps12, but keep pNext as is.
auto* pProps12 = (VkPhysicalDeviceVulkan12Properties*)next;
supportedProps12.pNext = pProps12->pNext;
*pProps12 = supportedProps12;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
auto* dvcIDProps = (VkPhysicalDeviceIDProperties*)next;
mvkCopy(dvcIDProps->deviceUUID, supportedProps11.deviceUUID, VK_UUID_SIZE);
mvkCopy(dvcIDProps->driverUUID, supportedProps11.driverUUID, VK_UUID_SIZE);
mvkCopy(dvcIDProps->deviceLUID, supportedProps11.deviceLUID, VK_LUID_SIZE);
dvcIDProps->deviceNodeMask = supportedProps11.deviceNodeMask;
dvcIDProps->deviceLUIDValid = supportedProps11.deviceLUIDValid;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
auto* subgroupProps = (VkPhysicalDeviceSubgroupProperties*)next;
subgroupProps->subgroupSize = supportedProps11.subgroupSize;
subgroupProps->supportedStages = supportedProps11.subgroupSupportedStages;
subgroupProps->supportedOperations = supportedProps11.subgroupSupportedOperations;
subgroupProps->quadOperationsInAllStages = supportedProps11.subgroupQuadOperationsInAllStages;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
auto* pointClipProps = (VkPhysicalDevicePointClippingProperties*)next;
pointClipProps->pointClippingBehavior = supportedProps11.pointClippingBehavior;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
auto* multiviewProps = (VkPhysicalDeviceMultiviewProperties*)next;
multiviewProps->maxMultiviewViewCount = supportedProps11.maxMultiviewViewCount;
multiviewProps->maxMultiviewInstanceIndex = supportedProps11.maxMultiviewInstanceIndex;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
auto* protectedMemProps = (VkPhysicalDeviceProtectedMemoryProperties*)next;
protectedMemProps->protectedNoFault = supportedProps11.protectedNoFault;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
auto* maint3Props = (VkPhysicalDeviceMaintenance3Properties*)next;
maint3Props->maxPerSetDescriptors = supportedProps11.maxPerSetDescriptors;
maint3Props->maxMemoryAllocationSize = supportedProps11.maxMemoryAllocationSize;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: {
auto* depthStencilResolveProps = (VkPhysicalDeviceDepthStencilResolveProperties*)next;
depthStencilResolveProps->supportedDepthResolveModes = supportedProps12.supportedDepthResolveModes;
depthStencilResolveProps->supportedStencilResolveModes = supportedProps12.supportedStencilResolveModes;
depthStencilResolveProps->independentResolveNone = supportedProps12.independentResolveNone;
depthStencilResolveProps->independentResolve = supportedProps12.independentResolve;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: {
auto* physicalDeviceDriverProps = (VkPhysicalDeviceDriverProperties*)next;
physicalDeviceDriverProps->driverID = supportedProps12.driverID;
mvkCopy(physicalDeviceDriverProps->driverName, supportedProps12.driverName, VK_MAX_DRIVER_NAME_SIZE);
mvkCopy(physicalDeviceDriverProps->driverInfo, supportedProps12.driverInfo, VK_MAX_DRIVER_INFO_SIZE);
physicalDeviceDriverProps->conformanceVersion = supportedProps12.conformanceVersion;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
auto* timelineSem4Props = (VkPhysicalDeviceTimelineSemaphoreProperties*)next;
timelineSem4Props->maxTimelineSemaphoreValueDifference = supportedProps12.maxTimelineSemaphoreValueDifference;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES: {
auto* pDescIdxProps = (VkPhysicalDeviceDescriptorIndexingProperties*)next;
pDescIdxProps->maxUpdateAfterBindDescriptorsInAllPools = supportedProps12.maxUpdateAfterBindDescriptorsInAllPools;
pDescIdxProps->shaderUniformBufferArrayNonUniformIndexingNative = supportedProps12.shaderUniformBufferArrayNonUniformIndexingNative;
pDescIdxProps->shaderSampledImageArrayNonUniformIndexingNative = supportedProps12.shaderSampledImageArrayNonUniformIndexingNative;
pDescIdxProps->shaderStorageBufferArrayNonUniformIndexingNative = supportedProps12.shaderStorageBufferArrayNonUniformIndexingNative;
pDescIdxProps->shaderStorageImageArrayNonUniformIndexingNative = supportedProps12.shaderStorageImageArrayNonUniformIndexingNative;
pDescIdxProps->shaderInputAttachmentArrayNonUniformIndexingNative = supportedProps12.shaderInputAttachmentArrayNonUniformIndexingNative;
pDescIdxProps->robustBufferAccessUpdateAfterBind = supportedProps12.robustBufferAccessUpdateAfterBind;
pDescIdxProps->quadDivergentImplicitLod = supportedProps12.quadDivergentImplicitLod;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSamplers = supportedProps12.maxPerStageDescriptorUpdateAfterBindSamplers;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindUniformBuffers = supportedProps12.maxPerStageDescriptorUpdateAfterBindUniformBuffers;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageBuffers = supportedProps12.maxPerStageDescriptorUpdateAfterBindStorageBuffers;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSampledImages = supportedProps12.maxPerStageDescriptorUpdateAfterBindSampledImages;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageImages = supportedProps12.maxPerStageDescriptorUpdateAfterBindStorageImages;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindInputAttachments = supportedProps12.maxPerStageDescriptorUpdateAfterBindInputAttachments;
pDescIdxProps->maxPerStageUpdateAfterBindResources = supportedProps12.maxPerStageUpdateAfterBindResources;
pDescIdxProps->maxDescriptorSetUpdateAfterBindSamplers = supportedProps12.maxDescriptorSetUpdateAfterBindSamplers;
pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffers = supportedProps12.maxDescriptorSetUpdateAfterBindUniformBuffers;
pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = supportedProps12.maxDescriptorSetUpdateAfterBindUniformBuffersDynamic;
pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffers = supportedProps12.maxDescriptorSetUpdateAfterBindStorageBuffers;
pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = supportedProps12.maxDescriptorSetUpdateAfterBindStorageBuffersDynamic;
pDescIdxProps->maxDescriptorSetUpdateAfterBindSampledImages = supportedProps12.maxDescriptorSetUpdateAfterBindSampledImages;
pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageImages = supportedProps12.maxDescriptorSetUpdateAfterBindStorageImages;
pDescIdxProps->maxDescriptorSetUpdateAfterBindInputAttachments = supportedProps12.maxDescriptorSetUpdateAfterBindInputAttachments;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES: {
auto* inlineUniformBlockProps = (VkPhysicalDeviceInlineUniformBlockProperties*)next;
inlineUniformBlockProps->maxInlineUniformBlockSize = _metalFeatures.dynamicMTLBufferSize;
inlineUniformBlockProps->maxPerStageDescriptorInlineUniformBlocks = _metalFeatures.dynamicMTLBufferSize ? _metalFeatures.maxPerStageDynamicMTLBufferCount - 1 : 0; // Less one for push constants
inlineUniformBlockProps->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = inlineUniformBlockProps->maxPerStageDescriptorInlineUniformBlocks;
inlineUniformBlockProps->maxDescriptorSetInlineUniformBlocks = (inlineUniformBlockProps->maxPerStageDescriptorInlineUniformBlocks * 4);
inlineUniformBlockProps->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = (inlineUniformBlockProps->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks * 4);
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES: {
auto* subgroupSizeProps = (VkPhysicalDeviceSubgroupSizeControlProperties*)next;
subgroupSizeProps->minSubgroupSize = _metalFeatures.minSubgroupSize;
subgroupSizeProps->maxSubgroupSize = _metalFeatures.maxSubgroupSize;
subgroupSizeProps->maxComputeWorkgroupSubgroups = _properties.limits.maxComputeWorkGroupInvocations / _metalFeatures.minSubgroupSize;
subgroupSizeProps->requiredSubgroupSizeStages = 0;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES: {
// Save the 'next' pointer; we'll unintentionally overwrite it
// on the next line. Put it back when we're done.
auto* texelBuffAlignProps = (VkPhysicalDeviceTexelBufferAlignmentProperties*)next;
void* pNext = texelBuffAlignProps->pNext;
*texelBuffAlignProps = _texelBuffAlignProperties;
texelBuffAlignProps->pNext = pNext;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_PROPERTIES_KHR: {
auto* barycentricProperties = (VkPhysicalDeviceFragmentShaderBarycentricPropertiesKHR*)next;
barycentricProperties->triStripVertexOrderIndependentOfProvokingVertex = false;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
auto* pushDescProps = (VkPhysicalDevicePushDescriptorPropertiesKHR*)next;
pushDescProps->maxPushDescriptors = _properties.limits.maxPerStageResources;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_PROPERTIES_KHR: {
auto* portabilityProps = (VkPhysicalDevicePortabilitySubsetPropertiesKHR*)next;
portabilityProps->minVertexInputBindingStrideAlignment = (uint32_t)_metalFeatures.vertexStrideAlignment;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
auto* divisorProps = (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT*)next;
divisorProps->maxVertexAttribDivisor = kMVKUndefinedLargeUInt32;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
auto* sampLocnProps = (VkPhysicalDeviceSampleLocationsPropertiesEXT*)next;
sampLocnProps->sampleLocationSampleCounts = _metalFeatures.supportedSampleCounts;
sampLocnProps->maxSampleLocationGridSize = kMetalSamplePositionGridSize;
sampLocnProps->sampleLocationCoordinateRange[0] = 0.0;
sampLocnProps->sampleLocationCoordinateRange[1] = (15.0 / 16.0);
sampLocnProps->sampleLocationSubPixelBits = 4;
sampLocnProps->variableSampleLocations = VK_FALSE;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
// This isn't implemented yet, but when it is, it is expected that we'll wind up doing it manually.
auto* robustness2Props = (VkPhysicalDeviceRobustness2PropertiesEXT*)next;
robustness2Props->robustStorageBufferAccessSizeAlignment = 1;
robustness2Props->robustUniformBufferAccessSizeAlignment = 1;
break;
}
default:
break;
}
}
}
void MVKPhysicalDevice::populateDeviceIDProperties(VkPhysicalDeviceVulkan11Properties* pVk11Props) {
uint8_t* uuid;
size_t uuidComponentOffset;
// ---- Device ID ----------------------------------------------
uuid = pVk11Props->deviceUUID;
uuidComponentOffset = 0;
mvkClear(uuid, VK_UUID_SIZE);
// First 4 bytes contains GPU vendor ID
uint32_t vendorID = _properties.vendorID;
*(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(vendorID);
uuidComponentOffset += sizeof(vendorID);
// Next 4 bytes contains GPU device ID
uint32_t deviceID = _properties.deviceID;
*(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(deviceID);
uuidComponentOffset += sizeof(deviceID);
// Last 8 bytes contain the GPU registry ID
uint64_t regID = mvkGetRegistryID(_mtlDevice);
*(uint64_t*)&uuid[uuidComponentOffset] = NSSwapHostLongLongToBig(regID);
uuidComponentOffset += sizeof(regID);
// ---- Driver ID ----------------------------------------------
uuid = pVk11Props->driverUUID;
uuidComponentOffset = 0;
mvkClear(uuid, VK_UUID_SIZE);
// First 4 bytes contains MoltenVK prefix
const char* mvkPfx = "MVK";
size_t mvkPfxLen = strlen(mvkPfx);
mvkCopy(&uuid[uuidComponentOffset], (uint8_t*)mvkPfx, mvkPfxLen);
uuidComponentOffset += mvkPfxLen + 1;
// Next 4 bytes contains MoltenVK version
uint32_t mvkVersion = MVK_VERSION;
*(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(mvkVersion);
uuidComponentOffset += sizeof(mvkVersion);
// Next 4 bytes contains highest GPU capability supported by this device
uint32_t gpuCap = getHighestGPUCapability();
*(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(gpuCap);
uuidComponentOffset += sizeof(gpuCap);
// ---- LUID ignored for Metal devices ------------------------
mvkClear(pVk11Props->deviceLUID, VK_LUID_SIZE);
pVk11Props->deviceNodeMask = 0;
pVk11Props->deviceLUIDValid = VK_FALSE;
}
void MVKPhysicalDevice::populateSubgroupProperties(VkPhysicalDeviceVulkan11Properties* pVk11Props) {
pVk11Props->subgroupSize = _metalFeatures.maxSubgroupSize;
pVk11Props->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
if (_features.tessellationShader) {
pVk11Props->subgroupSupportedStages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
}
if (mvkOSVersionIsAtLeast(10.15, 13.0)) {
pVk11Props->subgroupSupportedStages |= VK_SHADER_STAGE_FRAGMENT_BIT;
}
pVk11Props->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT;
if (_metalFeatures.simdPermute || _metalFeatures.quadPermute) {
pVk11Props->subgroupSupportedOperations |= (VK_SUBGROUP_FEATURE_VOTE_BIT |
VK_SUBGROUP_FEATURE_BALLOT_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT);
}
if (_metalFeatures.simdReduction) {
pVk11Props->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
}
if (_metalFeatures.quadPermute) {
pVk11Props->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
}
pVk11Props->subgroupQuadOperationsInAllStages = _metalFeatures.quadPermute;
}
void MVKPhysicalDevice::getFormatProperties(VkFormat format, VkFormatProperties* pFormatProperties) {
*pFormatProperties = _pixelFormats.getVkFormatProperties(format);
}
void MVKPhysicalDevice::getFormatProperties(VkFormat format, VkFormatProperties2KHR* pFormatProperties) {
pFormatProperties->sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2_KHR;
getFormatProperties(format, &pFormatProperties->formatProperties);
}
void MVKPhysicalDevice::getMultisampleProperties(VkSampleCountFlagBits samples,
VkMultisamplePropertiesEXT* pMultisampleProperties) {
if (pMultisampleProperties) {
pMultisampleProperties->maxSampleLocationGridSize = (mvkIsOnlyAnyFlagEnabled(samples, _metalFeatures.supportedSampleCounts)
? kMetalSamplePositionGridSize
: kMetalSamplePositionGridSizeNotSupported);
}
}
VkResult MVKPhysicalDevice::getImageFormatProperties(VkFormat format,
VkImageType type,
VkImageTiling tiling,
VkImageUsageFlags usage,
VkImageCreateFlags flags,
VkImageFormatProperties* pImageFormatProperties) {
if ( !_pixelFormats.isSupported(format) ) { return VK_ERROR_FORMAT_NOT_SUPPORTED; }
if ( !pImageFormatProperties ) { return VK_SUCCESS; }
mvkClear(pImageFormatProperties);
// Metal does not support creating uncompressed views of compressed formats.
// Metal does not support split-instance images.
if (mvkIsAnyFlagEnabled(flags, VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT | VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT)) {
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
MVKFormatType mvkFmt = _pixelFormats.getFormatType(format);
bool isChromaSubsampled = _pixelFormats.getChromaSubsamplingPlaneCount(format) > 0;
bool isMultiPlanar = _pixelFormats.getChromaSubsamplingPlaneCount(format) > 1;
bool isBGRG = isChromaSubsampled && !isMultiPlanar && _pixelFormats.getBlockTexelSize(format).width > 1;
bool hasAttachmentUsage = mvkIsAnyFlagEnabled(usage, (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT |
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT));
// Disjoint memory requires a multiplanar format.
if (!isMultiPlanar && mvkIsAnyFlagEnabled(flags, VK_IMAGE_CREATE_DISJOINT_BIT)) {
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
VkPhysicalDeviceLimits* pLimits = &_properties.limits;
VkExtent3D maxExt = { 1, 1, 1};
uint32_t maxLevels = 1;
uint32_t maxLayers = hasAttachmentUsage ? pLimits->maxFramebufferLayers : pLimits->maxImageArrayLayers;
bool supportsMSAA = mvkAreAllFlagsEnabled(_pixelFormats.getCapabilities(format), kMVKMTLFmtCapsMSAA);
VkSampleCountFlags sampleCounts = supportsMSAA ? _metalFeatures.supportedSampleCounts : VK_SAMPLE_COUNT_1_BIT;
switch (type) {
case VK_IMAGE_TYPE_1D:
maxExt.height = 1;
maxExt.depth = 1;
if (!mvkConfig().texture1DAs2D) {
maxExt.width = pLimits->maxImageDimension1D;
maxLevels = 1;
sampleCounts = VK_SAMPLE_COUNT_1_BIT;
// Metal does not allow native 1D textures to be used as attachments
if (hasAttachmentUsage ) { return VK_ERROR_FORMAT_NOT_SUPPORTED; }
// Metal does not allow linear tiling on native 1D textures
if (tiling == VK_IMAGE_TILING_LINEAR) { return VK_ERROR_FORMAT_NOT_SUPPORTED; }
// Metal does not allow compressed or depth/stencil formats on native 1D textures
if (mvkFmt == kMVKFormatDepthStencil) { return VK_ERROR_FORMAT_NOT_SUPPORTED; }
if (mvkFmt == kMVKFormatCompressed) { return VK_ERROR_FORMAT_NOT_SUPPORTED; }
if (isChromaSubsampled) { return VK_ERROR_FORMAT_NOT_SUPPORTED; }
break;
}
// A 420 1D image doesn't make much sense.
if (isChromaSubsampled && _pixelFormats.getBlockTexelSize(format).height > 1) {
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
// Vulkan doesn't allow 1D multisampled images.
sampleCounts = VK_SAMPLE_COUNT_1_BIT;
/* fallthrough */
case VK_IMAGE_TYPE_2D:
if (mvkIsAnyFlagEnabled(flags, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) ) {
// Chroma-subsampled cube images aren't supported.
if (isChromaSubsampled) { return VK_ERROR_FORMAT_NOT_SUPPORTED; }
// 1D cube images aren't supported.
if (type == VK_IMAGE_TYPE_1D) { return VK_ERROR_FORMAT_NOT_SUPPORTED; }
maxExt.width = pLimits->maxImageDimensionCube;
maxExt.height = pLimits->maxImageDimensionCube;
} else {
maxExt.width = pLimits->maxImageDimension2D;
maxExt.height = (type == VK_IMAGE_TYPE_1D ? 1 : pLimits->maxImageDimension2D);
}
maxExt.depth = 1;
if (tiling == VK_IMAGE_TILING_LINEAR) {
// Linear textures have additional restrictions under Metal:
// - They may not be depth/stencil, compressed, or chroma subsampled textures.
// We allow multi-planar formats because those internally use non-subsampled formats.
if (mvkFmt == kMVKFormatDepthStencil || mvkFmt == kMVKFormatCompressed || isBGRG) {
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
#if !MVK_APPLE_SILICON
// - On macOS IMR GPUs, Linear textures may not be used as framebuffer attachments.
if (hasAttachmentUsage) { return VK_ERROR_FORMAT_NOT_SUPPORTED; }
#endif
// Linear textures may only have one mip level, layer & sample.
maxLevels = 1;
maxLayers = 1;
sampleCounts = VK_SAMPLE_COUNT_1_BIT;
} else {
VkFormatProperties fmtProps;
getFormatProperties(format, &fmtProps);
// Compressed multisampled textures aren't supported.
// Chroma-subsampled multisampled textures aren't supported.
// Multisampled cube textures aren't supported.
// Non-renderable multisampled textures aren't supported.
if (mvkFmt == kMVKFormatCompressed || isChromaSubsampled ||
mvkIsAnyFlagEnabled(flags, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) ||
!mvkIsAnyFlagEnabled(fmtProps.optimalTilingFeatures, VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT|VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) ) {
sampleCounts = VK_SAMPLE_COUNT_1_BIT;
}
// BGRG and GBGR images may only have one mip level and one layer.
// Other chroma subsampled formats may have multiple mip levels, but still only one layer.
if (isChromaSubsampled) {
maxLevels = isBGRG ? 1 : mvkMipmapLevels3D(maxExt);
maxLayers = 1;
} else {
maxLevels = mvkMipmapLevels3D(maxExt);
}
}
break;
case VK_IMAGE_TYPE_3D:
// Metal does not allow linear tiling on 3D textures
if (tiling == VK_IMAGE_TILING_LINEAR) {
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
// Metal does not allow compressed or depth/stencil formats on 3D textures
if (mvkFmt == kMVKFormatDepthStencil ||
isChromaSubsampled
#if MVK_IOS_OR_TVOS
|| (mvkFmt == kMVKFormatCompressed && !_metalFeatures.native3DCompressedTextures)
#endif
) {
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
#if MVK_MACOS
// If this is a compressed format and there's no codec, it isn't supported.
if ((mvkFmt == kMVKFormatCompressed) && !mvkCanDecodeFormat(format) && !_metalFeatures.native3DCompressedTextures) {
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
#endif
#if MVK_APPLE_SILICON
// ETC2 and EAC formats aren't supported for 3D textures.
switch (format) {
case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
case VK_FORMAT_EAC_R11_UNORM_BLOCK:
case VK_FORMAT_EAC_R11_SNORM_BLOCK:
case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
return VK_ERROR_FORMAT_NOT_SUPPORTED;
default:
break;
}
#endif
maxExt.width = pLimits->maxImageDimension3D;
maxExt.height = pLimits->maxImageDimension3D;
maxExt.depth = pLimits->maxImageDimension3D;
maxLevels = mvkMipmapLevels3D(maxExt);
maxLayers = 1;
sampleCounts = VK_SAMPLE_COUNT_1_BIT;
break;
default:
return VK_ERROR_FORMAT_NOT_SUPPORTED; // Illegal VkImageType
}
pImageFormatProperties->maxExtent = maxExt;
pImageFormatProperties->maxMipLevels = maxLevels;
pImageFormatProperties->maxArrayLayers = maxLayers;
pImageFormatProperties->sampleCounts = sampleCounts;
pImageFormatProperties->maxResourceSize = kMVKUndefinedLargeUInt64;
return VK_SUCCESS;
}
VkResult MVKPhysicalDevice::getImageFormatProperties(const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
VkImageFormatProperties2* pImageFormatProperties) {
auto usage = pImageFormatInfo->usage;
for (const auto* nextInfo = (VkBaseInStructure*)pImageFormatInfo->pNext; nextInfo; nextInfo = nextInfo->pNext) {
switch (nextInfo->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: {
// Return information about external memory support for MTLTexture.
// Search VkImageFormatProperties2 for the corresponding VkExternalImageFormatProperties and populate it.
auto* pExtImgFmtInfo = (VkPhysicalDeviceExternalImageFormatInfo*)nextInfo;
for (auto* nextProps = (VkBaseOutStructure*)pImageFormatProperties->pNext; nextProps; nextProps = nextProps->pNext) {
if (nextProps->sType == VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES) {
auto* pExtImgFmtProps = (VkExternalImageFormatProperties*)nextProps;
pExtImgFmtProps->externalMemoryProperties = getExternalImageProperties(pExtImgFmtInfo->handleType);
}
}
break;
}
case VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO: {
// If the format includes a stencil component, combine any separate stencil usage with non-stencil usage.
if (_pixelFormats.isStencilFormat(_pixelFormats.getMTLPixelFormat(pImageFormatInfo->format))) {
usage |= ((VkImageStencilUsageCreateInfo*)nextInfo)->stencilUsage;
}
break;
}
default:
break;
}
}
for (const auto* nextProps = (VkBaseInStructure*)pImageFormatProperties->pNext; nextProps; nextProps = nextProps->pNext) {
switch (nextProps->sType) {
case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES: {
auto* samplerYcbcrConvProps = (VkSamplerYcbcrConversionImageFormatProperties*)nextProps;
samplerYcbcrConvProps->combinedImageSamplerDescriptorCount = std::max(_pixelFormats.getChromaSubsamplingPlaneCount(pImageFormatInfo->format), (uint8_t)1u);
break;
}
default:
break;
}
}
if ( !_pixelFormats.isSupported(pImageFormatInfo->format) ) { return VK_ERROR_FORMAT_NOT_SUPPORTED; }
return getImageFormatProperties(pImageFormatInfo->format, pImageFormatInfo->type,
pImageFormatInfo->tiling, usage,
pImageFormatInfo->flags,
&pImageFormatProperties->imageFormatProperties);
}
void MVKPhysicalDevice::getExternalBufferProperties(const VkPhysicalDeviceExternalBufferInfo* pExternalBufferInfo,
VkExternalBufferProperties* pExternalBufferProperties) {
pExternalBufferProperties->externalMemoryProperties = getExternalBufferProperties(pExternalBufferInfo->handleType);
}
static VkExternalMemoryProperties _emptyExtMemProps = {};
VkExternalMemoryProperties& MVKPhysicalDevice::getExternalBufferProperties(VkExternalMemoryHandleTypeFlagBits handleType) {
switch (handleType) {
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_KHR: return _mtlBufferExternalMemoryProperties;
default: return _emptyExtMemProps;
}
}
VkExternalMemoryProperties& MVKPhysicalDevice::getExternalImageProperties(VkExternalMemoryHandleTypeFlagBits handleType) {
switch (handleType) {
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_KHR: return _mtlTextureExternalMemoryProperties;
default: return _emptyExtMemProps;
}
}
static const VkExternalFenceProperties _emptyExtFenceProps = {VK_STRUCTURE_TYPE_EXTERNAL_FENCE_PROPERTIES, nullptr, 0, 0, 0};
void MVKPhysicalDevice::getExternalFenceProperties(const VkPhysicalDeviceExternalFenceInfo* pExternalFenceInfo,
VkExternalFenceProperties* pExternalFenceProperties) {
void* next = pExternalFenceProperties->pNext;
*pExternalFenceProperties = _emptyExtFenceProps;
pExternalFenceProperties->pNext = next;
}
static const VkExternalSemaphoreProperties _emptyExtSemProps = {VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES, nullptr, 0, 0, 0};
void MVKPhysicalDevice::getExternalSemaphoreProperties(const VkPhysicalDeviceExternalSemaphoreInfo* pExternalSemaphoreInfo,
VkExternalSemaphoreProperties* pExternalSemaphoreProperties) {
void* next = pExternalSemaphoreProperties->pNext;
*pExternalSemaphoreProperties = _emptyExtSemProps;
pExternalSemaphoreProperties->pNext = next;
}
#pragma mark Surfaces
VkResult MVKPhysicalDevice::getSurfaceSupport(uint32_t queueFamilyIndex,
MVKSurface* surface,
VkBool32* pSupported) {
// Check whether this is a headless device
bool isHeadless = false;
#if MVK_MACOS
isHeadless = getMTLDevice().isHeadless;
#endif
// If this device is headless or the surface does not have a CAMetalLayer, it is not supported.
*pSupported = !(isHeadless || (surface->getCAMetalLayer() == nil));
return *pSupported ? VK_SUCCESS : surface->getConfigurationResult();
}
VkResult MVKPhysicalDevice::getSurfaceCapabilities(MVKSurface* surface,
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) {
// The layer underlying the surface view must be a CAMetalLayer.
CAMetalLayer* mtlLayer = surface->getCAMetalLayer();
if ( !mtlLayer ) { return surface->getConfigurationResult(); }
VkExtent2D surfExtnt = mvkVkExtent2DFromCGSize(mtlLayer.naturalDrawableSizeMVK);
pSurfaceCapabilities->minImageCount = _metalFeatures.minSwapchainImageCount;
pSurfaceCapabilities->maxImageCount = _metalFeatures.maxSwapchainImageCount;
pSurfaceCapabilities->currentExtent = surfExtnt;
pSurfaceCapabilities->minImageExtent = { 1, 1 };
pSurfaceCapabilities->maxImageExtent = { _properties.limits.maxImageDimension2D, _properties.limits.maxImageDimension2D };
pSurfaceCapabilities->maxImageArrayLayers = 1;
pSurfaceCapabilities->supportedTransforms = (VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR);
pSurfaceCapabilities->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
pSurfaceCapabilities->supportedCompositeAlpha = (VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR |
VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR |
VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR);
pSurfaceCapabilities->supportedUsageFlags = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_STORAGE_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT);
return VK_SUCCESS;
}
VkResult MVKPhysicalDevice::getSurfaceFormats(MVKSurface* surface,
uint32_t* pCount,
VkSurfaceFormatKHR* pSurfaceFormats) {
// The layer underlying the surface view must be a CAMetalLayer.
CAMetalLayer* mtlLayer = surface->getCAMetalLayer();
if ( !mtlLayer ) { return surface->getConfigurationResult(); }
#define addSurfFmt(MTL_FMT) \
do { \
if (_pixelFormats.isSupported(MTLPixelFormat ##MTL_FMT)) { \
VkFormat vkFmt = _pixelFormats.getVkFormat(MTLPixelFormat ##MTL_FMT); \
if (vkFmt) { vkFormats.push_back(vkFmt); } \
} \
} while(false)
MVKSmallVector<VkFormat, 16> vkFormats;
addSurfFmt(BGRA8Unorm);
addSurfFmt(BGRA8Unorm_sRGB);
addSurfFmt(RGBA16Float);
#if MVK_MACOS
addSurfFmt(RGB10A2Unorm);
addSurfFmt(BGR10A2Unorm);
#endif
#if MVK_APPLE_SILICON
addSurfFmt(BGRA10_XR);
addSurfFmt(BGRA10_XR_sRGB);
addSurfFmt(BGR10_XR);
addSurfFmt(BGR10_XR_sRGB);
#endif
MVKSmallVector<VkColorSpaceKHR, 16> colorSpaces;
colorSpaces.push_back(VK_COLOR_SPACE_SRGB_NONLINEAR_KHR);
#if MVK_MACOS
// 10.11 supports some but not all of the color spaces specified by VK_EXT_swapchain_colorspace.
colorSpaces.push_back(VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_BT709_NONLINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_PASS_THROUGH_EXT);
if (mvkOSVersionIsAtLeast(10.12)) {
colorSpaces.push_back(VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT);
}
if (mvkOSVersionIsAtLeast(10.14)) {
colorSpaces.push_back(VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_BT2020_LINEAR_EXT);
}
#if MVK_XCODE_12
if (mvkOSVersionIsAtLeast(11.0)) {
colorSpaces.push_back(VK_COLOR_SPACE_HDR10_HLG_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_HDR10_ST2084_EXT);
}
#endif
#endif
#if MVK_IOS_OR_TVOS
// iOS 8 doesn't support anything but sRGB.
if (mvkOSVersionIsAtLeast(9.0)) {
colorSpaces.push_back(VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_BT709_NONLINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_PASS_THROUGH_EXT);
}
if (mvkOSVersionIsAtLeast(10.0)) {
colorSpaces.push_back(VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT);
}
if (mvkOSVersionIsAtLeast(12.3)) {
colorSpaces.push_back(VK_COLOR_SPACE_DCI_P3_LINEAR_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_BT2020_LINEAR_EXT);
}
#if MVK_XCODE_12
if (mvkOSVersionIsAtLeast(14.0)) {
colorSpaces.push_back(VK_COLOR_SPACE_HDR10_HLG_EXT);
colorSpaces.push_back(VK_COLOR_SPACE_HDR10_ST2084_EXT);
}
#endif
#endif
size_t vkFmtsCnt = vkFormats.size();
size_t vkColSpcFmtsCnt = vkFmtsCnt * colorSpaces.size();
// If properties aren't actually being requested yet, simply update the returned count
if ( !pSurfaceFormats ) {
*pCount = (uint32_t)vkColSpcFmtsCnt;
return VK_SUCCESS;
}
// Determine how many results we'll return, and return that number
VkResult result = (*pCount >= vkColSpcFmtsCnt) ? VK_SUCCESS : VK_INCOMPLETE;
*pCount = min(*pCount, (uint32_t)vkColSpcFmtsCnt);
// Now populate the supplied array
for (uint csIdx = 0, idx = 0; idx < *pCount && csIdx < colorSpaces.size(); csIdx++) {
for (uint fmtIdx = 0; idx < *pCount && fmtIdx < vkFmtsCnt; fmtIdx++, idx++) {
pSurfaceFormats[idx].format = vkFormats[fmtIdx];
pSurfaceFormats[idx].colorSpace = colorSpaces[csIdx];
}
}
return result;
}
VkResult MVKPhysicalDevice::getSurfaceFormats(MVKSurface* surface,
uint32_t* pCount,
VkSurfaceFormat2KHR* pSurfaceFormats) {
VkResult rslt;
if (pSurfaceFormats) {
// Populate temp array of VkSurfaceFormatKHR then copy into array of VkSurfaceFormat2KHR.
// The value of *pCount may be reduced during call, but will always be <= size of temp array.
VkSurfaceFormatKHR surfFmts[*pCount];
rslt = getSurfaceFormats(surface, pCount, surfFmts);
for (uint32_t fmtIdx = 0; fmtIdx < *pCount; fmtIdx++) {
auto pSF = &pSurfaceFormats[fmtIdx];
pSF->sType = VK_STRUCTURE_TYPE_SURFACE_FORMAT_2_KHR;
pSF->pNext = nullptr;
pSF->surfaceFormat = surfFmts[fmtIdx];
}
} else {
rslt = getSurfaceFormats(surface, pCount, (VkSurfaceFormatKHR*)nullptr);
}
return rslt;
}
VkResult MVKPhysicalDevice::getSurfacePresentModes(MVKSurface* surface,
uint32_t* pCount,
VkPresentModeKHR* pPresentModes) {
// The layer underlying the surface view must be a CAMetalLayer.
CAMetalLayer* mtlLayer = surface->getCAMetalLayer();
if ( !mtlLayer ) { return surface->getConfigurationResult(); }
#define ADD_VK_PRESENT_MODE(VK_PM) \
do { \
if (pPresentModes && presentModesCnt < *pCount) { pPresentModes[presentModesCnt] = VK_PM; } \
presentModesCnt++; \
} while(false)
uint32_t presentModesCnt = 0;
ADD_VK_PRESENT_MODE(VK_PRESENT_MODE_FIFO_KHR);
if (_metalFeatures.presentModeImmediate) {
ADD_VK_PRESENT_MODE(VK_PRESENT_MODE_IMMEDIATE_KHR);
}
if (pPresentModes && *pCount < presentModesCnt) {
return VK_INCOMPLETE;
}
*pCount = presentModesCnt;
return VK_SUCCESS;
}
VkResult MVKPhysicalDevice::getPresentRectangles(MVKSurface* surface,
uint32_t* pRectCount,
VkRect2D* pRects) {
// The layer underlying the surface view must be a CAMetalLayer.
CAMetalLayer* mtlLayer = surface->getCAMetalLayer();
if ( !mtlLayer ) { return surface->getConfigurationResult(); }
if ( !pRects ) {
*pRectCount = 1;
return VK_SUCCESS;
}
if (*pRectCount == 0) { return VK_INCOMPLETE; }
*pRectCount = 1;
pRects[0].offset = { 0, 0 };
pRects[0].extent = mvkVkExtent2DFromCGSize(mtlLayer.naturalDrawableSizeMVK);
return VK_SUCCESS;
}
#pragma mark Queues
// Returns the queue families supported by this instance, lazily creating them if necessary.
// Metal does not distinguish functionality between queues, which would normally lead us
// to create only only one general-purpose queue family. However, Vulkan associates command
// buffers with a queue family, whereas Metal associates command buffers with a Metal queue.
// In order to allow a Metal command buffer to be prefilled before it is formally submitted to
// a Vulkan queue, we need to enforce that each Vulkan queue family can have only one Metal queue.
// In order to provide parallel queue operations, we therefore provide multiple queue families.
// In addition, Metal queues are always general purpose, so the default behaviour is for all
// queue families to support graphics + compute + transfer, unless the app indicates it
// requires queue family specialization.
MVKArrayRef<MVKQueueFamily*> MVKPhysicalDevice::getQueueFamilies() {
if (_queueFamilies.empty()) {
VkQueueFamilyProperties qfProps;
bool specialize = mvkConfig().specializedQueueFamilies;
uint32_t qfIdx = 0;
qfProps.queueCount = kMVKQueueCountPerQueueFamily;
qfProps.timestampValidBits = 64;
qfProps.minImageTransferGranularity = { 1, 1, 1};
// General-purpose queue family
qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT);
_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
// Dedicated graphics queue family...or another general-purpose queue family.
if (specialize) { qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT); }
_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
// Dedicated compute queue family...or another general-purpose queue family.
if (specialize) { qfProps.queueFlags = (VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT); }
_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
// Dedicated transfer queue family...or another general-purpose queue family.
if (specialize) { qfProps.queueFlags = VK_QUEUE_TRANSFER_BIT; }
_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
MVKAssert(kMVKQueueFamilyCount >= _queueFamilies.size(), "Adjust value of kMVKQueueFamilyCount.");
}
return _queueFamilies.contents();
}
VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount,
VkQueueFamilyProperties* pQueueFamilyProperties) {
auto qFams = getQueueFamilies();
uint32_t qfCnt = uint32_t(qFams.size);
// If properties aren't actually being requested yet, simply update the returned count
if ( !pQueueFamilyProperties ) {
*pCount = qfCnt;
return VK_SUCCESS;
}
// Determine how many families we'll return, and return that number
VkResult rslt = (*pCount >= qfCnt) ? VK_SUCCESS : VK_INCOMPLETE;
*pCount = min(*pCount, qfCnt);
// Now populate the queue families
if (pQueueFamilyProperties) {
for (uint32_t qfIdx = 0; qfIdx < *pCount; qfIdx++) {
qFams[qfIdx]->getProperties(&pQueueFamilyProperties[qfIdx]);
}
}
return rslt;
}
VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount,
VkQueueFamilyProperties2KHR* pQueueFamilyProperties) {
VkResult rslt;
if (pQueueFamilyProperties) {
// Populate temp array of VkQueueFamilyProperties then copy into array of VkQueueFamilyProperties2KHR.
// The value of *pCount may be reduced during call, but will always be <= size of temp array.
VkQueueFamilyProperties qProps[*pCount];
rslt = getQueueFamilyProperties(pCount, qProps);
for (uint32_t qpIdx = 0; qpIdx < *pCount; qpIdx++) {
auto pQP = &pQueueFamilyProperties[qpIdx];
pQP->sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2_KHR;
pQP->pNext = nullptr;
pQP->queueFamilyProperties = qProps[qpIdx];
}
} else {
rslt = getQueueFamilyProperties(pCount, (VkQueueFamilyProperties*)nullptr);
}
return rslt;
}
// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps
// are the same, or if we're not using GPU timestamp counters.
void MVKPhysicalDevice::startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart) {
if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; }
[_mtlDevice sampleTimestamps: &cpuStart gpuTimestamp: &gpuStart];
}
// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps
// are the same, or if we're not using GPU timestamp counters.
void MVKPhysicalDevice::updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart) {
if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; }
MTLTimestamp cpuEnd;
MTLTimestamp gpuEnd;
[_mtlDevice sampleTimestamps: &cpuEnd gpuTimestamp: &gpuEnd];
_properties.limits.timestampPeriod = (double)(cpuEnd - cpuStart) / (double)(gpuEnd - gpuStart);
}
#pragma mark Memory models
/** Populates the specified memory properties with the memory characteristics of this device. */
VkResult MVKPhysicalDevice::getMemoryProperties(VkPhysicalDeviceMemoryProperties* pMemoryProperties) {
*pMemoryProperties = _memoryProperties;
return VK_SUCCESS;
}
VkResult MVKPhysicalDevice::getMemoryProperties(VkPhysicalDeviceMemoryProperties2* pMemoryProperties) {
pMemoryProperties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
pMemoryProperties->memoryProperties = _memoryProperties;
for (auto* next = (VkBaseOutStructure*)pMemoryProperties->pNext; next; next = next->pNext) {
switch (next->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next;
mvkClear(budgetProps->heapBudget, VK_MAX_MEMORY_HEAPS);
mvkClear(budgetProps->heapUsage, VK_MAX_MEMORY_HEAPS);
budgetProps->heapBudget[0] = (VkDeviceSize)getRecommendedMaxWorkingSetSize();
budgetProps->heapUsage[0] = (VkDeviceSize)getCurrentAllocatedSize();
if (!getHasUnifiedMemory()) {
budgetProps->heapBudget[1] = (VkDeviceSize)mvkGetAvailableMemorySize();
budgetProps->heapUsage[1] = (VkDeviceSize)mvkGetUsedMemorySize();
}
break;
}
default:
break;
}
}
return VK_SUCCESS;
}
#pragma mark Construction
MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtlDevice) :
_mtlDevice([mtlDevice retain]), // Set first
_mvkInstance(mvkInstance),
_supportedExtensions(this, true),
_pixelFormats(this) { // Set after _mtlDevice
initProperties(); // Call first.
initMetalFeatures(); // Call second.
initFeatures(); // Call third.
initLimits(); // Call fourth.
initExtensions();
initMemoryProperties();
initExternalMemoryProperties();
initCounterSets();
logGPUInfo();
}
// Initializes the physical device properties (except limits).
void MVKPhysicalDevice::initProperties() {
mvkClear(&_properties); // Start with everything cleared
_properties.apiVersion = mvkConfig().apiVersionToAdvertise;
_properties.driverVersion = MVK_VERSION;
initGPUInfoProperties();
initPipelineCacheUUID();
}
// Initializes the Metal-specific physical device features of this instance.
void MVKPhysicalDevice::initMetalFeatures() {
// Start with all Metal features cleared
mvkClear(&_metalFeatures);
_metalFeatures.maxPerStageBufferCount = 31;
_metalFeatures.maxMTLBufferSize = (256 * MEBI);
_metalFeatures.dynamicMTLBufferSize = 0;
_metalFeatures.maxPerStageDynamicMTLBufferCount = 0;
_metalFeatures.maxPerStageSamplerCount = 16;
_metalFeatures.maxQueryBufferSize = (64 * KIBI);
_metalFeatures.pushConstantSizeAlignment = 16; // Min float4 alignment for typical uniform structs.
_metalFeatures.maxTextureLayers = (2 * KIBI);
_metalFeatures.ioSurfaces = MVK_SUPPORT_IOSURFACE_BOOL;
// Metal supports 2 or 3 concurrent CAMetalLayer drawables.
_metalFeatures.minSwapchainImageCount = kMVKMinSwapchainImageCount;
_metalFeatures.maxSwapchainImageCount = kMVKMaxSwapchainImageCount;
_metalFeatures.vertexStrideAlignment = 4;
_metalFeatures.maxPerStageStorageTextureCount = 8;
// GPU-specific features
switch (_properties.vendorID) {
case kAMDVendorId:
_metalFeatures.clearColorFloatRounding = MVK_FLOAT_ROUNDING_DOWN;
break;
case kAppleVendorId:
case kIntelVendorId:
case kNVVendorId:
default:
_metalFeatures.clearColorFloatRounding = MVK_FLOAT_ROUNDING_NEAREST;
break;
}
#if MVK_TVOS
_metalFeatures.mslVersionEnum = MTLLanguageVersion1_1;
_metalFeatures.mtlBufferAlignment = 64;
_metalFeatures.mtlCopyBufferAlignment = 1;
_metalFeatures.texelBuffers = true;
_metalFeatures.maxTextureDimension = (8 * KIBI);
_metalFeatures.dynamicMTLBufferSize = (4 * KIBI);
_metalFeatures.sharedLinearTextures = true;
_metalFeatures.maxPerStageDynamicMTLBufferCount = _metalFeatures.maxPerStageBufferCount;
_metalFeatures.renderLinearTextures = true;
_metalFeatures.tileBasedDeferredRendering = true;
if (supportsMTLFeatureSet(tvOS_GPUFamily1_v2)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion1_2;
_metalFeatures.shaderSpecialization = true;
_metalFeatures.stencilViews = true;
_metalFeatures.fences = true;
_metalFeatures.deferredStoreActions = true;
}
if (supportsMTLFeatureSet(tvOS_GPUFamily1_v3)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_0;
_metalFeatures.renderWithoutAttachments = true;
_metalFeatures.argumentBuffers = true;
}
if (supportsMTLFeatureSet(tvOS_GPUFamily1_v4)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_1;
_metalFeatures.events = true;
_metalFeatures.textureBuffers = true;
}
if (supportsMTLFeatureSet(tvOS_GPUFamily2_v1)) {
_metalFeatures.indirectDrawing = true;
_metalFeatures.baseVertexInstanceDrawing = true;
_metalFeatures.combinedStoreResolveAction = true;
_metalFeatures.mtlBufferAlignment = 16; // Min float4 alignment for typical vertex buffers. MTLBuffer may go down to 4 bytes for other data.
_metalFeatures.maxTextureDimension = (16 * KIBI);
_metalFeatures.depthSampleCompare = true;
_metalFeatures.arrayOfTextures = true;
_metalFeatures.arrayOfSamplers = true;
_metalFeatures.depthResolve = true;
}
if ( mvkOSVersionIsAtLeast(13.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_2;
_metalFeatures.placementHeaps = mvkConfig().useMTLHeap;
_metalFeatures.nativeTextureSwizzle = true;
if (supportsMTLGPUFamily(Apple3)) {
_metalFeatures.native3DCompressedTextures = true;
}
if (supportsMTLGPUFamily(Apple4)) {
_metalFeatures.quadPermute = true;
}
}
if (supportsMTLGPUFamily(Apple4)) {
_metalFeatures.maxPerStageTextureCount = 96;
} else {
_metalFeatures.maxPerStageTextureCount = 31;
}
#if MVK_XCODE_12
if ( mvkOSVersionIsAtLeast(14.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_3;
}
#endif
#if MVK_XCODE_13
if ( mvkOSVersionIsAtLeast(15.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_4;
}
#endif
#if MVK_XCODE_14
if ( mvkOSVersionIsAtLeast(16.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion3_0;
}
#endif
#endif
#if MVK_IOS
_metalFeatures.mslVersionEnum = MTLLanguageVersion1_0;
_metalFeatures.mtlBufferAlignment = 64;
_metalFeatures.mtlCopyBufferAlignment = 1;
_metalFeatures.texelBuffers = true;
_metalFeatures.maxTextureDimension = (4 * KIBI);
_metalFeatures.sharedLinearTextures = true;
_metalFeatures.renderLinearTextures = true;
_metalFeatures.tileBasedDeferredRendering = true;
if (supportsMTLFeatureSet(iOS_GPUFamily1_v2)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion1_1;
_metalFeatures.dynamicMTLBufferSize = (4 * KIBI);
_metalFeatures.maxTextureDimension = (8 * KIBI);
_metalFeatures.maxPerStageDynamicMTLBufferCount = _metalFeatures.maxPerStageBufferCount;
}
if (supportsMTLFeatureSet(iOS_GPUFamily1_v3)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion1_2;
_metalFeatures.shaderSpecialization = true;
_metalFeatures.stencilViews = true;
_metalFeatures.fences = true;
_metalFeatures.deferredStoreActions = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily1_v4)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_0;
_metalFeatures.renderWithoutAttachments = true;
_metalFeatures.argumentBuffers = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily1_v5)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_1;
_metalFeatures.events = true;
_metalFeatures.textureBuffers = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily3_v1)) {
_metalFeatures.indirectDrawing = true;
_metalFeatures.baseVertexInstanceDrawing = true;
_metalFeatures.combinedStoreResolveAction = true;
_metalFeatures.mtlBufferAlignment = 16; // Min float4 alignment for typical vertex buffers. MTLBuffer may go down to 4 bytes for other data.
_metalFeatures.maxTextureDimension = (16 * KIBI);
_metalFeatures.depthSampleCompare = true;
_metalFeatures.depthResolve = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily3_v2)) {
_metalFeatures.arrayOfTextures = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily3_v3)) {
_metalFeatures.arrayOfSamplers = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily4_v1)) {
_metalFeatures.postDepthCoverage = true;
_metalFeatures.nonUniformThreadgroups = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily5_v1)) {
_metalFeatures.layeredRendering = true;
_metalFeatures.stencilFeedback = true;
_metalFeatures.indirectTessellationDrawing = true;
_metalFeatures.stencilResolve = true;
}
if ( mvkOSVersionIsAtLeast(13.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_2;
_metalFeatures.placementHeaps = mvkConfig().useMTLHeap;
_metalFeatures.nativeTextureSwizzle = true;
if (supportsMTLGPUFamily(Apple3)) {
_metalFeatures.native3DCompressedTextures = true;
}
if (supportsMTLGPUFamily(Apple4)) {
_metalFeatures.quadPermute = true;
}
if (supportsMTLGPUFamily(Apple6) ) {
_metalFeatures.astcHDRTextures = true;
_metalFeatures.simdPermute = true;
}
}
if (supportsMTLGPUFamily(Apple4)) {
_metalFeatures.maxPerStageTextureCount = 96;
} else {
_metalFeatures.maxPerStageTextureCount = 31;
}
#if MVK_XCODE_12
if ( mvkOSVersionIsAtLeast(14.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_3;
_metalFeatures.multisampleArrayTextures = true;
if ( supportsMTLGPUFamily(Apple7) ) {
_metalFeatures.maxQueryBufferSize = (256 * KIBI);
_metalFeatures.multisampleLayeredRendering = _metalFeatures.layeredRendering;
_metalFeatures.samplerClampToBorder = true;
_metalFeatures.samplerMirrorClampToEdge = true;
_metalFeatures.simdReduction = true;
}
}
#endif
#if MVK_XCODE_13
if ( mvkOSVersionIsAtLeast(15.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_4;
}
#endif
#if MVK_XCODE_14
if ( mvkOSVersionIsAtLeast(16.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion3_0;
}
#endif
#endif
#if MVK_MACOS
_metalFeatures.mslVersionEnum = MTLLanguageVersion1_1;
_metalFeatures.maxPerStageTextureCount = 128;
_metalFeatures.mtlBufferAlignment = 256;
_metalFeatures.mtlCopyBufferAlignment = 4;
_metalFeatures.baseVertexInstanceDrawing = true;
_metalFeatures.layeredRendering = true;
_metalFeatures.maxTextureDimension = (16 * KIBI);
_metalFeatures.depthSampleCompare = true;
_metalFeatures.samplerMirrorClampToEdge = true;
if (supportsMTLFeatureSet(macOS_GPUFamily1_v2)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion1_2;
_metalFeatures.indirectDrawing = true;
_metalFeatures.indirectTessellationDrawing = true;
_metalFeatures.dynamicMTLBufferSize = (4 * KIBI);
_metalFeatures.shaderSpecialization = true;
_metalFeatures.stencilViews = true;
_metalFeatures.samplerClampToBorder = true;
_metalFeatures.combinedStoreResolveAction = true;
_metalFeatures.deferredStoreActions = true;
_metalFeatures.maxMTLBufferSize = (1 * GIBI);
_metalFeatures.maxPerStageDynamicMTLBufferCount = 14;
}
if (supportsMTLFeatureSet(macOS_GPUFamily1_v3)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_0;
_metalFeatures.texelBuffers = true;
_metalFeatures.arrayOfTextures = true;
_metalFeatures.arrayOfSamplers = true;
_metalFeatures.presentModeImmediate = true;
_metalFeatures.fences = true;
_metalFeatures.nonUniformThreadgroups = true;
_metalFeatures.argumentBuffers = true;
}
if (supportsMTLFeatureSet(macOS_GPUFamily1_v4)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_1;
_metalFeatures.multisampleArrayTextures = true;
_metalFeatures.events = true;
_metalFeatures.textureBuffers = true;
}
if (supportsMTLFeatureSet(macOS_GPUFamily2_v1)) {
_metalFeatures.multisampleLayeredRendering = _metalFeatures.layeredRendering;
_metalFeatures.stencilFeedback = true;
_metalFeatures.depthResolve = true;
_metalFeatures.stencilResolve = true;
_metalFeatures.simdPermute = true;
_metalFeatures.quadPermute = true;
_metalFeatures.simdReduction = true;
}
if ( mvkOSVersionIsAtLeast(10.15) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_2;
_metalFeatures.maxQueryBufferSize = (256 * KIBI);
_metalFeatures.native3DCompressedTextures = true;
if ( mvkOSVersionIsAtLeast(mvkMakeOSVersion(10, 15, 6)) ) {
_metalFeatures.sharedLinearTextures = true;
}
if (supportsMTLGPUFamily(Mac2)) {
_metalFeatures.nativeTextureSwizzle = true;
_metalFeatures.placementHeaps = mvkConfig().useMTLHeap;
_metalFeatures.renderWithoutAttachments = true;
}
}
#if MVK_XCODE_12
if ( mvkOSVersionIsAtLeast(11.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_3;
}
#endif
#if MVK_XCODE_13
if ( mvkOSVersionIsAtLeast(12.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_4;
}
#endif
#if MVK_XCODE_14
if ( mvkOSVersionIsAtLeast(13.0) ) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion3_0;
}
#endif
// This is an Apple GPU--treat it accordingly.
if (supportsMTLGPUFamily(Apple1)) {
_metalFeatures.mtlCopyBufferAlignment = 1;
_metalFeatures.mtlBufferAlignment = 16; // Min float4 alignment for typical vertex buffers. MTLBuffer may go down to 4 bytes for other data.
_metalFeatures.maxQueryBufferSize = (64 * KIBI);
_metalFeatures.maxPerStageDynamicMTLBufferCount = _metalFeatures.maxPerStageBufferCount;
_metalFeatures.postDepthCoverage = true;
_metalFeatures.renderLinearTextures = true;
_metalFeatures.tileBasedDeferredRendering = true;
#if MVK_XCODE_12
if (supportsMTLGPUFamily(Apple6)) {
_metalFeatures.astcHDRTextures = true;
}
if (supportsMTLGPUFamily(Apple7)) {
_metalFeatures.maxQueryBufferSize = (256 * KIBI);
}
#endif
}
// Don't use barriers in render passes on Apple GPUs. Apple GPUs don't support them,
// and in fact Metal's validation layer will complain if you try to use them.
if ( !supportsMTLGPUFamily(Apple1) ) {
if (supportsMTLFeatureSet(macOS_GPUFamily1_v4)) {
_metalFeatures.memoryBarriers = true;
}
_metalFeatures.textureBarriers = true;
}
#endif
if ( [_mtlDevice respondsToSelector: @selector(areProgrammableSamplePositionsSupported)] ) {
_metalFeatures.programmableSamplePositions = _mtlDevice.areProgrammableSamplePositionsSupported;
}
if ( [_mtlDevice respondsToSelector: @selector(areRasterOrderGroupsSupported)] ) {
_metalFeatures.rasterOrderGroups = _mtlDevice.areRasterOrderGroupsSupported;
}
#if MVK_XCODE_12
if ( [_mtlDevice respondsToSelector: @selector(supportsPullModelInterpolation)] ) {
_metalFeatures.pullModelInterpolation = _mtlDevice.supportsPullModelInterpolation;
}
#endif
#if (MVK_MACOS && !MVK_MACCAT) || (MVK_MACCAT && MVK_XCODE_14) || (MVK_IOS && MVK_XCODE_12)
if ( [_mtlDevice respondsToSelector: @selector(supportsShaderBarycentricCoordinates)] ) {
_metalFeatures.shaderBarycentricCoordinates = _mtlDevice.supportsShaderBarycentricCoordinates;
}
#endif
if ( [_mtlDevice respondsToSelector: @selector(maxBufferLength)] ) {
_metalFeatures.maxMTLBufferSize = _mtlDevice.maxBufferLength;
}
for (uint32_t sc = VK_SAMPLE_COUNT_1_BIT; sc <= VK_SAMPLE_COUNT_64_BIT; sc <<= 1) {
if ([_mtlDevice supportsTextureSampleCount: mvkSampleCountFromVkSampleCountFlagBits((VkSampleCountFlagBits)sc)]) {
_metalFeatures.supportedSampleCounts |= sc;
}
}
_metalFeatures.minSubgroupSize = _metalFeatures.maxSubgroupSize = 1;
#if MVK_MACOS
if (_metalFeatures.simdPermute) {
// Based on data from Sascha Willems' Vulkan Hardware Database.
// This would be a lot easier and less painful if MTLDevice had properties for this...
_metalFeatures.maxSubgroupSize = (_properties.vendorID == kAMDVendorId) ? 64 : 32;
switch (_properties.vendorID) {
case kIntelVendorId:
_metalFeatures.minSubgroupSize = 8;
break;
case kAMDVendorId:
switch (_properties.deviceID) {
case kAMDRadeonRX5700DeviceId:
case kAMDRadeonRX5500DeviceId:
case kAMDRadeonRX6800DeviceId:
case kAMDRadeonRX6700DeviceId:
_metalFeatures.minSubgroupSize = 32;
break;
default:
_metalFeatures.minSubgroupSize = _metalFeatures.maxSubgroupSize;
break;
}
break;
case kAppleVendorId:
// XXX Minimum thread execution width for Apple GPUs is unknown, but assumed to be 4. May be greater.
_metalFeatures.minSubgroupSize = 4;
break;
default:
_metalFeatures.minSubgroupSize = _metalFeatures.maxSubgroupSize;
break;
}
}
#endif
#if MVK_IOS
if (_metalFeatures.simdPermute) {
_metalFeatures.minSubgroupSize = 4;
_metalFeatures.maxSubgroupSize = 32;
} else if (_metalFeatures.quadPermute) {
_metalFeatures.minSubgroupSize = _metalFeatures.maxSubgroupSize = 4;
}
#endif
#define setMSLVersion(maj, min) \
_metalFeatures.mslVersion = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::make_msl_version(maj, min);
switch (_metalFeatures.mslVersionEnum) {
#if MVK_XCODE_14
case MTLLanguageVersion3_0:
setMSLVersion(3, 0);
break;
#endif
#if MVK_XCODE_13
case MTLLanguageVersion2_4:
setMSLVersion(2, 4);
break;
#endif
#if MVK_XCODE_12
case MTLLanguageVersion2_3:
setMSLVersion(2, 3);
break;
#endif
case MTLLanguageVersion2_2:
setMSLVersion(2, 2);
break;
case MTLLanguageVersion2_1:
setMSLVersion(2, 1);
break;
case MTLLanguageVersion2_0:
setMSLVersion(2, 0);
break;
case MTLLanguageVersion1_2:
setMSLVersion(1, 2);
break;
case MTLLanguageVersion1_1:
setMSLVersion(1, 1);
break;
#if MVK_IOS_OR_TVOS
case MTLLanguageVersion1_0:
setMSLVersion(1, 0);
break;
#endif
}
// iOS and tvOS adjustments necessary when running in the simulator on non-Apple GPUs.
#if MVK_OS_SIMULATOR && !MVK_APPLE_SILICON
_metalFeatures.mtlBufferAlignment = 256;
#endif
// Currently, Metal argument buffer support is in beta stage, and is only supported
// on macOS 11.0 (Big Sur) or later, or on older versions of macOS using an Intel GPU.
// Metal argument buffers support is not available on iOS. Development to support iOS
// and a wider combination of GPU's on older macOS versions is under way.
#if MVK_MACOS
_metalFeatures.descriptorSetArgumentBuffers = (_metalFeatures.argumentBuffers &&
(mvkOSVersionIsAtLeast(10.16) ||
_properties.vendorID == kIntelVendorId));
#endif
// Currently, if we don't support descriptor set argument buffers, we can't support argument buffers.
_metalFeatures.argumentBuffers = _metalFeatures.descriptorSetArgumentBuffers;
if ([_mtlDevice respondsToSelector: @selector(argumentBuffersSupport)]) {
_metalFeatures.argumentBuffersTier = _mtlDevice.argumentBuffersSupport;
}
#define checkSupportsMTLCounterSamplingPoint(mtlSP, mvkSP) \
if ([_mtlDevice respondsToSelector: @selector(supportsCounterSampling:)] && \
[_mtlDevice supportsCounterSampling: MTLCounterSamplingPointAt ##mtlSP ##Boundary]) { \
_metalFeatures.counterSamplingPoints |= MVK_COUNTER_SAMPLING_AT_ ##mvkSP; \
}
#if MVK_XCODE_12
checkSupportsMTLCounterSamplingPoint(Draw, DRAW);
checkSupportsMTLCounterSamplingPoint(Dispatch, DISPATCH);
checkSupportsMTLCounterSamplingPoint(Blit, BLIT);
checkSupportsMTLCounterSamplingPoint(Stage, PIPELINE_STAGE);
#endif
#if MVK_MACOS
// On macOS, if we couldn't query supported sample points (on macOS 11),
// but the platform can support immediate-mode sample points, indicate that here.
if (!_metalFeatures.counterSamplingPoints && mvkOSVersionIsAtLeast(10.15) && !supportsMTLGPUFamily(Apple1)) { \
_metalFeatures.counterSamplingPoints = MVK_COUNTER_SAMPLING_AT_DRAW | MVK_COUNTER_SAMPLING_AT_DISPATCH | MVK_COUNTER_SAMPLING_AT_BLIT; \
}
#endif
}
// Initializes the physical device features of this instance.
void MVKPhysicalDevice::initFeatures() {
mvkClear(&_features); // Start with everything cleared
_features.robustBufferAccess = true; // XXX Required by Vulkan spec
_features.fullDrawIndexUint32 = true;
_features.independentBlend = true;
_features.sampleRateShading = true;
_features.depthBiasClamp = true;
_features.fillModeNonSolid = true;
_features.largePoints = true;
_features.alphaToOne = true;
_features.samplerAnisotropy = true;
_features.shaderImageGatherExtended = true;
_features.shaderStorageImageExtendedFormats = true;
_features.shaderStorageImageReadWithoutFormat = true;
_features.shaderStorageImageWriteWithoutFormat = true;
_features.shaderUniformBufferArrayDynamicIndexing = true;
_features.shaderStorageBufferArrayDynamicIndexing = true;
_features.shaderClipDistance = true;
_features.shaderInt16 = true;
_features.multiDrawIndirect = true;
_features.inheritedQueries = true;
_features.shaderSampledImageArrayDynamicIndexing = _metalFeatures.arrayOfTextures;
_features.textureCompressionBC = mvkSupportsBCTextureCompression(_mtlDevice);
if (_metalFeatures.indirectDrawing && _metalFeatures.baseVertexInstanceDrawing) {
_features.drawIndirectFirstInstance = true;
}
#if MVK_TVOS
_features.textureCompressionETC2 = true;
_features.textureCompressionASTC_LDR = true;
#if MVK_XCODE_12
_features.shaderInt64 = mslVersionIsAtLeast(MTLLanguageVersion2_3) && supportsMTLGPUFamily(Apple3);
#else
_features.shaderInt64 = false;
#endif
if (supportsMTLFeatureSet(tvOS_GPUFamily1_v3)) {
_features.dualSrcBlend = true;
}
if (supportsMTLFeatureSet(tvOS_GPUFamily2_v1)) {
_features.occlusionQueryPrecise = true;
}
if (supportsMTLFeatureSet(tvOS_GPUFamily2_v1)) {
_features.tessellationShader = true;
}
#endif
#if MVK_IOS
_features.textureCompressionETC2 = true;
#if MVK_XCODE_12
_features.shaderInt64 = mslVersionIsAtLeast(MTLLanguageVersion2_3) && supportsMTLGPUFamily(Apple3);
#else
_features.shaderInt64 = false;
#endif
if (supportsMTLFeatureSet(iOS_GPUFamily2_v1)) {
_features.textureCompressionASTC_LDR = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily3_v1)) {
_features.occlusionQueryPrecise = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily1_v4)) {
_features.dualSrcBlend = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily2_v4)) {
_features.depthClamp = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily3_v2)) {
_features.tessellationShader = true;
_features.shaderTessellationAndGeometryPointSize = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily4_v1)) {
_features.imageCubeArray = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily5_v1)) {
_features.multiViewport = true;
}
if (supportsMTLGPUFamily(Apple6)) {
_features.shaderResourceMinLod = true;
}
#endif
#if MVK_MACOS
_features.occlusionQueryPrecise = true;
_features.imageCubeArray = true;
_features.depthClamp = true;
_features.vertexPipelineStoresAndAtomics = true;
_features.fragmentStoresAndAtomics = true;
#if MVK_XCODE_12
_features.shaderInt64 = mslVersionIsAtLeast(MTLLanguageVersion2_3);
#else
_features.shaderInt64 = false;
#endif
_features.shaderStorageImageArrayDynamicIndexing = _metalFeatures.arrayOfTextures;
if (supportsMTLFeatureSet(macOS_GPUFamily1_v2)) {
_features.tessellationShader = true;
_features.dualSrcBlend = true;
_features.shaderTessellationAndGeometryPointSize = true;
}
if (supportsMTLFeatureSet(macOS_GPUFamily1_v3)) {
_features.multiViewport = true;
}
if ( mvkOSVersionIsAtLeast(10.15) ) {
_features.shaderResourceMinLod = true;
}
if ( supportsMTLGPUFamily(Apple5) ) {
_features.textureCompressionETC2 = true;
_features.textureCompressionASTC_LDR = true;
}
#endif
}
#pragma mark VkPhysicalDeviceFeatures - List of features available on the device
//typedef struct VkPhysicalDeviceFeatures {
// VkBool32 robustBufferAccess; // done
// VkBool32 fullDrawIndexUint32; // done
// VkBool32 imageCubeArray; // done
// VkBool32 independentBlend; // done
// VkBool32 geometryShader;
// VkBool32 tessellationShader; // done
// VkBool32 sampleRateShading; // done
// VkBool32 dualSrcBlend; // done
// VkBool32 logicOp;
// VkBool32 multiDrawIndirect; // done
// VkBool32 drawIndirectFirstInstance; // done
// VkBool32 depthClamp; // done
// VkBool32 depthBiasClamp; // done
// VkBool32 fillModeNonSolid; // done
// VkBool32 depthBounds;
// VkBool32 wideLines;
// VkBool32 largePoints; // done
// VkBool32 alphaToOne; // done
// VkBool32 multiViewport; // done
// VkBool32 samplerAnisotropy; // done
// VkBool32 textureCompressionETC2; // done
// VkBool32 textureCompressionASTC_LDR; // done
// VkBool32 textureCompressionBC; // done
// VkBool32 occlusionQueryPrecise; // done
// VkBool32 pipelineStatisticsQuery;
// VkBool32 vertexPipelineStoresAndAtomics; // done
// VkBool32 fragmentStoresAndAtomics; // done
// VkBool32 shaderTessellationAndGeometryPointSize; // done
// VkBool32 shaderImageGatherExtended; // done
// VkBool32 shaderStorageImageExtendedFormats; // done
// VkBool32 shaderStorageImageMultisample;
// VkBool32 shaderStorageImageReadWithoutFormat; // done
// VkBool32 shaderStorageImageWriteWithoutFormat; // done
// VkBool32 shaderUniformBufferArrayDynamicIndexing; // done
// VkBool32 shaderSampledImageArrayDynamicIndexing; // done
// VkBool32 shaderStorageBufferArrayDynamicIndexing; // done
// VkBool32 shaderStorageImageArrayDynamicIndexing; // done
// VkBool32 shaderClipDistance; // done
// VkBool32 shaderCullDistance;
// VkBool32 shaderFloat64;
// VkBool32 shaderInt64; // done
// VkBool32 shaderInt16; // done
// VkBool32 shaderResourceResidency;
// VkBool32 shaderResourceMinLod; // done
// VkBool32 sparseBinding;
// VkBool32 sparseResidencyBuffer;
// VkBool32 sparseResidencyImage2D;
// VkBool32 sparseResidencyImage3D;
// VkBool32 sparseResidency2Samples;
// VkBool32 sparseResidency4Samples;
// VkBool32 sparseResidency8Samples;
// VkBool32 sparseResidency16Samples;
// VkBool32 sparseResidencyAliased;
// VkBool32 variableMultisampleRate;
// VkBool32 inheritedQueries; // done
//} VkPhysicalDeviceFeatures;
// Initializes the physical device property limits.
void MVKPhysicalDevice::initLimits() {
#if MVK_TVOS
_properties.limits.maxColorAttachments = kMVKCachedColorAttachmentCount;
#endif
#if MVK_IOS
if (supportsMTLFeatureSet(iOS_GPUFamily2_v1)) {
_properties.limits.maxColorAttachments = kMVKCachedColorAttachmentCount;
} else {
_properties.limits.maxColorAttachments = 4; // < kMVKCachedColorAttachmentCount
}
#endif
#if MVK_MACOS
_properties.limits.maxColorAttachments = kMVKCachedColorAttachmentCount;
#endif
_properties.limits.maxFragmentOutputAttachments = _properties.limits.maxColorAttachments;
_properties.limits.maxFragmentDualSrcAttachments = _features.dualSrcBlend ? 1 : 0;
_properties.limits.framebufferColorSampleCounts = _metalFeatures.supportedSampleCounts;
_properties.limits.framebufferDepthSampleCounts = _metalFeatures.supportedSampleCounts;
_properties.limits.framebufferStencilSampleCounts = _metalFeatures.supportedSampleCounts;
_properties.limits.framebufferNoAttachmentsSampleCounts = _metalFeatures.supportedSampleCounts;
_properties.limits.sampledImageColorSampleCounts = _metalFeatures.supportedSampleCounts;
_properties.limits.sampledImageIntegerSampleCounts = _metalFeatures.supportedSampleCounts;
_properties.limits.sampledImageDepthSampleCounts = _metalFeatures.supportedSampleCounts;
_properties.limits.sampledImageStencilSampleCounts = _metalFeatures.supportedSampleCounts;
_properties.limits.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT;
_properties.limits.maxSampleMaskWords = 1;
_properties.limits.maxImageDimension1D = _metalFeatures.maxTextureDimension;
_properties.limits.maxImageDimension2D = _metalFeatures.maxTextureDimension;
_properties.limits.maxImageDimensionCube = _metalFeatures.maxTextureDimension;
_properties.limits.maxFramebufferWidth = _metalFeatures.maxTextureDimension;
_properties.limits.maxFramebufferHeight = _metalFeatures.maxTextureDimension;
_properties.limits.maxFramebufferLayers = _metalFeatures.layeredRendering ? _metalFeatures.maxTextureLayers : 1;
_properties.limits.maxViewportDimensions[0] = _metalFeatures.maxTextureDimension;
_properties.limits.maxViewportDimensions[1] = _metalFeatures.maxTextureDimension;
float maxVPDim = max(_properties.limits.maxViewportDimensions[0], _properties.limits.maxViewportDimensions[1]);
_properties.limits.viewportBoundsRange[0] = (-2.0 * maxVPDim);
_properties.limits.viewportBoundsRange[1] = (2.0 * maxVPDim) - 1;
_properties.limits.maxViewports = _features.multiViewport ? kMVKCachedViewportScissorCount : 1;
_properties.limits.maxImageDimension3D = _metalFeatures.maxTextureLayers;
_properties.limits.maxImageArrayLayers = _metalFeatures.maxTextureLayers;
_properties.limits.maxSamplerAnisotropy = 16;
_properties.limits.maxVertexInputAttributes = 31;
_properties.limits.maxVertexInputBindings = 31;
_properties.limits.maxVertexInputBindingStride = (2 * KIBI);
_properties.limits.maxVertexInputAttributeOffset = _properties.limits.maxVertexInputBindingStride - 1;
_properties.limits.maxPerStageDescriptorSamplers = _metalFeatures.maxPerStageSamplerCount;
_properties.limits.maxPerStageDescriptorUniformBuffers = _metalFeatures.maxPerStageBufferCount;
_properties.limits.maxPerStageDescriptorStorageBuffers = _metalFeatures.maxPerStageBufferCount;
_properties.limits.maxPerStageDescriptorSampledImages = _metalFeatures.maxPerStageTextureCount;
_properties.limits.maxPerStageDescriptorStorageImages = _metalFeatures.maxPerStageStorageTextureCount;
_properties.limits.maxPerStageDescriptorInputAttachments = _metalFeatures.maxPerStageTextureCount;
_properties.limits.maxPerStageResources = (_metalFeatures.maxPerStageBufferCount + _metalFeatures.maxPerStageTextureCount);
_properties.limits.maxFragmentCombinedOutputResources = _properties.limits.maxPerStageResources;
_properties.limits.maxDescriptorSetSamplers = (_properties.limits.maxPerStageDescriptorSamplers * 5);
_properties.limits.maxDescriptorSetUniformBuffers = (_properties.limits.maxPerStageDescriptorUniformBuffers * 5);
_properties.limits.maxDescriptorSetUniformBuffersDynamic = (_properties.limits.maxPerStageDescriptorUniformBuffers * 5);
_properties.limits.maxDescriptorSetStorageBuffers = (_properties.limits.maxPerStageDescriptorStorageBuffers * 5);
_properties.limits.maxDescriptorSetStorageBuffersDynamic = (_properties.limits.maxPerStageDescriptorStorageBuffers * 5);
_properties.limits.maxDescriptorSetSampledImages = (_properties.limits.maxPerStageDescriptorSampledImages * 5);
_properties.limits.maxDescriptorSetStorageImages = (_properties.limits.maxPerStageDescriptorStorageImages * 5);
_properties.limits.maxDescriptorSetInputAttachments = (_properties.limits.maxPerStageDescriptorInputAttachments * 5);
// Whether handled as a real texture buffer or a 2D texture, this value is likely nowhere near the size of a buffer,
// needs to fit in 32 bits, and some apps (I'm looking at you, CTS), assume it is low when doing 32-bit math.
_properties.limits.maxTexelBufferElements = _properties.limits.maxImageDimension2D * (4 * KIBI);
#if MVK_MACOS
_properties.limits.maxUniformBufferRange = (64 * KIBI);
if (supportsMTLGPUFamily(Apple5)) {
_properties.limits.maxUniformBufferRange = (uint32_t)min(_metalFeatures.maxMTLBufferSize, (VkDeviceSize)std::numeric_limits<uint32_t>::max());
}
#endif
#if MVK_IOS_OR_TVOS
_properties.limits.maxUniformBufferRange = (uint32_t)min(_metalFeatures.maxMTLBufferSize, (VkDeviceSize)std::numeric_limits<uint32_t>::max());
#endif
_properties.limits.maxStorageBufferRange = (uint32_t)min(_metalFeatures.maxMTLBufferSize, (VkDeviceSize)std::numeric_limits<uint32_t>::max());
_properties.limits.maxPushConstantsSize = (4 * KIBI);
_properties.limits.minMemoryMapAlignment = max(_metalFeatures.mtlBufferAlignment, (VkDeviceSize)64); // Vulkan spec requires MIN of 64
_properties.limits.minUniformBufferOffsetAlignment = _metalFeatures.mtlBufferAlignment;
_properties.limits.minStorageBufferOffsetAlignment = 16;
_properties.limits.bufferImageGranularity = _metalFeatures.mtlBufferAlignment;
_properties.limits.nonCoherentAtomSize = _metalFeatures.mtlBufferAlignment;
if ([_mtlDevice respondsToSelector: @selector(minimumLinearTextureAlignmentForPixelFormat:)]) {
// Figure out the greatest alignment required by all supported formats, and whether
// or not they only require alignment to a single texel. We'll use this information
// to fill out the VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT struct.
uint32_t maxStorage = 0, maxUniform = 0;
bool singleTexelStorage = true, singleTexelUniform = true;
_pixelFormats.enumerateSupportedFormats({0, 0, VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT}, true, [&](VkFormat vk) {
MTLPixelFormat mtlFmt = _pixelFormats.getMTLPixelFormat(vk);
if ( !mtlFmt ) { return false; } // If format is invalid, avoid validation errors on MTLDevice format alignment calls
NSUInteger alignment;
if ([_mtlDevice respondsToSelector: @selector(minimumTextureBufferAlignmentForPixelFormat:)]) {
alignment = [_mtlDevice minimumTextureBufferAlignmentForPixelFormat: mtlFmt];
} else {
alignment = [_mtlDevice minimumLinearTextureAlignmentForPixelFormat: mtlFmt];
}
VkFormatProperties& props = _pixelFormats.getVkFormatProperties(vk);
// For uncompressed formats, this is the size of a single texel.
// Note that no implementations of Metal support compressed formats
// in a linear texture (including texture buffers). It's likely that even
// if they did, this would be the absolute minimum alignment.
uint32_t texelSize = _pixelFormats.getBytesPerBlock(vk);
// From the spec:
// "If the size of a single texel is a multiple of three bytes, then
// the size of a single component of the format is used instead."
if (texelSize % 3 == 0) {
switch (_pixelFormats.getFormatType(vk)) {
case kMVKFormatColorInt8:
case kMVKFormatColorUInt8:
texelSize = 1;
break;
case kMVKFormatColorHalf:
case kMVKFormatColorInt16:
case kMVKFormatColorUInt16:
texelSize = 2;
break;
case kMVKFormatColorFloat:
case kMVKFormatColorInt32:
case kMVKFormatColorUInt32:
default:
texelSize = 4;
break;
}
}
if (mvkAreAllFlagsEnabled(props.bufferFeatures, VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) {
maxStorage = max(maxStorage, uint32_t(alignment));
if (alignment > texelSize) { singleTexelStorage = false; }
}
if (mvkAreAllFlagsEnabled(props.bufferFeatures, VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT)) {
maxUniform = max(maxUniform, uint32_t(alignment));
if (alignment > texelSize) { singleTexelUniform = false; }
}
return true;
});
_texelBuffAlignProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT;
_texelBuffAlignProperties.storageTexelBufferOffsetAlignmentBytes = maxStorage;
_texelBuffAlignProperties.storageTexelBufferOffsetSingleTexelAlignment = singleTexelStorage;
_texelBuffAlignProperties.uniformTexelBufferOffsetAlignmentBytes = maxUniform;
_texelBuffAlignProperties.uniformTexelBufferOffsetSingleTexelAlignment = singleTexelUniform;
_properties.limits.minTexelBufferOffsetAlignment = max(maxStorage, maxUniform);
} else {
#if MVK_TVOS
_properties.limits.minTexelBufferOffsetAlignment = 64;
#endif
#if MVK_IOS
if (supportsMTLFeatureSet(iOS_GPUFamily3_v1)) {
_properties.limits.minTexelBufferOffsetAlignment = 16;
} else {
_properties.limits.minTexelBufferOffsetAlignment = 64;
}
#endif
#if MVK_MACOS
_properties.limits.minTexelBufferOffsetAlignment = 256;
if (supportsMTLGPUFamily(Apple5)) {
_properties.limits.minTexelBufferOffsetAlignment = 16;
}
#endif
_texelBuffAlignProperties.storageTexelBufferOffsetAlignmentBytes = _properties.limits.minTexelBufferOffsetAlignment;
_texelBuffAlignProperties.storageTexelBufferOffsetSingleTexelAlignment = VK_FALSE;
_texelBuffAlignProperties.uniformTexelBufferOffsetAlignmentBytes = _properties.limits.minTexelBufferOffsetAlignment;
_texelBuffAlignProperties.uniformTexelBufferOffsetSingleTexelAlignment = VK_FALSE;
}
#if MVK_TVOS
if (mvkOSVersionIsAtLeast(13.0) && supportsMTLGPUFamily(Apple4)) {
_properties.limits.maxFragmentInputComponents = 124;
} else {
_properties.limits.maxFragmentInputComponents = 60;
}
if (supportsMTLFeatureSet(tvOS_GPUFamily2_v1)) {
_properties.limits.optimalBufferCopyOffsetAlignment = 16;
} else {
_properties.limits.optimalBufferCopyOffsetAlignment = 64;
}
_properties.limits.maxTessellationGenerationLevel = 16;
_properties.limits.maxTessellationPatchSize = 32;
#endif
#if MVK_IOS
if (mvkOSVersionIsAtLeast(13.0) && supportsMTLGPUFamily(Apple4)) {
_properties.limits.maxFragmentInputComponents = 124;
} else {
_properties.limits.maxFragmentInputComponents = 60;
}
if (supportsMTLFeatureSet(iOS_GPUFamily3_v1)) {
_properties.limits.optimalBufferCopyOffsetAlignment = 16;
} else {
_properties.limits.optimalBufferCopyOffsetAlignment = 64;
}
if (supportsMTLFeatureSet(iOS_GPUFamily5_v1)) {
_properties.limits.maxTessellationGenerationLevel = 64;
_properties.limits.maxTessellationPatchSize = 32;
} else if (supportsMTLFeatureSet(iOS_GPUFamily3_v2)) {
_properties.limits.maxTessellationGenerationLevel = 16;
_properties.limits.maxTessellationPatchSize = 32;
} else {
_properties.limits.maxTessellationGenerationLevel = 0;
_properties.limits.maxTessellationPatchSize = 0;
}
#endif
#if MVK_MACOS
_properties.limits.maxFragmentInputComponents = 124;
_properties.limits.optimalBufferCopyOffsetAlignment = 256;
if (supportsMTLGPUFamily(Apple5)) {
_properties.limits.optimalBufferCopyOffsetAlignment = 16;
}
if (supportsMTLFeatureSet(macOS_GPUFamily1_v2)) {
_properties.limits.maxTessellationGenerationLevel = 64;
_properties.limits.maxTessellationPatchSize = 32;
} else {
_properties.limits.maxTessellationGenerationLevel = 0;
_properties.limits.maxTessellationPatchSize = 0;
}
#endif
_properties.limits.maxVertexOutputComponents = _properties.limits.maxFragmentInputComponents;
if (_features.tessellationShader) {
_properties.limits.maxTessellationControlPerVertexInputComponents = _properties.limits.maxVertexOutputComponents;
_properties.limits.maxTessellationControlPerVertexOutputComponents = _properties.limits.maxTessellationControlPerVertexInputComponents;
// Reserve a few for the tessellation levels.
_properties.limits.maxTessellationControlPerPatchOutputComponents = std::max(_properties.limits.maxFragmentInputComponents - 8, 120u);
_properties.limits.maxTessellationControlTotalOutputComponents = _properties.limits.maxTessellationPatchSize * _properties.limits.maxTessellationControlPerVertexOutputComponents + _properties.limits.maxTessellationControlPerPatchOutputComponents;
_properties.limits.maxTessellationEvaluationInputComponents = _properties.limits.maxTessellationControlPerVertexInputComponents;
_properties.limits.maxTessellationEvaluationOutputComponents = _properties.limits.maxTessellationEvaluationInputComponents;
} else {
_properties.limits.maxTessellationControlPerVertexInputComponents = 0;
_properties.limits.maxTessellationControlPerVertexOutputComponents = 0;
_properties.limits.maxTessellationControlPerPatchOutputComponents = 0;
_properties.limits.maxTessellationControlTotalOutputComponents = 0;
_properties.limits.maxTessellationEvaluationInputComponents = 0;
_properties.limits.maxTessellationEvaluationOutputComponents = 0;
}
_properties.limits.optimalBufferCopyRowPitchAlignment = 1;
_properties.limits.timestampComputeAndGraphics = VK_TRUE;
_properties.limits.timestampPeriod = _metalFeatures.counterSamplingPoints ? 1.0 : mvkGetTimestampPeriod();
_properties.limits.pointSizeRange[0] = 1;
switch (_properties.vendorID) {
case kAppleVendorId:
_properties.limits.pointSizeRange[1] = 511;
break;
case kIntelVendorId:
_properties.limits.pointSizeRange[1] = 256;
break;
case kAMDVendorId:
case kNVVendorId:
default:
_properties.limits.pointSizeRange[1] = 64;
break;
}
_properties.limits.pointSizeGranularity = 1;
_properties.limits.lineWidthRange[0] = 1;
_properties.limits.lineWidthRange[1] = 1;
_properties.limits.lineWidthGranularity = 0;
_properties.limits.standardSampleLocations = VK_TRUE;
_properties.limits.strictLines = _properties.vendorID == kIntelVendorId || _properties.vendorID == kNVVendorId;
VkExtent3D wgSize = mvkVkExtent3DFromMTLSize(_mtlDevice.maxThreadsPerThreadgroup);
_properties.limits.maxComputeWorkGroupSize[0] = wgSize.width;
_properties.limits.maxComputeWorkGroupSize[1] = wgSize.height;
_properties.limits.maxComputeWorkGroupSize[2] = wgSize.depth;
_properties.limits.maxComputeWorkGroupInvocations = max({wgSize.width, wgSize.height, wgSize.depth});
if ( [_mtlDevice respondsToSelector: @selector(maxThreadgroupMemoryLength)] ) {
_properties.limits.maxComputeSharedMemorySize = (uint32_t)_mtlDevice.maxThreadgroupMemoryLength;
} else {
#if MVK_TVOS
if (supportsMTLFeatureSet(tvOS_GPUFamily2_v1)) {
_properties.limits.maxComputeSharedMemorySize = (16 * KIBI);
} else {
_properties.limits.maxComputeSharedMemorySize = ((16 * KIBI) - 32);
}
#endif
#if MVK_IOS
if (supportsMTLFeatureSet(iOS_GPUFamily4_v1)) {
_properties.limits.maxComputeSharedMemorySize = (32 * KIBI);
} else if (supportsMTLFeatureSet(iOS_GPUFamily3_v1)) {
_properties.limits.maxComputeSharedMemorySize = (16 * KIBI);
} else {
_properties.limits.maxComputeSharedMemorySize = ((16 * KIBI) - 32);
}
#endif
#if MVK_MACOS
_properties.limits.maxComputeSharedMemorySize = (32 * KIBI);
#endif
}
// Max sum of API and shader values. Bias not supported in API, but can be applied in shader directly.
// The lack of API value is covered by VkPhysicalDevicePortabilitySubsetFeaturesKHR::samplerMipLodBias.
// Metal does not specify limit for shader value, so choose something reasonable.
_properties.limits.maxSamplerLodBias = 4;
_properties.limits.minTexelOffset = -8;
_properties.limits.maxTexelOffset = 7;
_properties.limits.minTexelGatherOffset = _properties.limits.minTexelOffset;
_properties.limits.maxTexelGatherOffset = _properties.limits.maxTexelOffset;
// Features with no specific limits - default to unlimited int values
_properties.limits.maxMemoryAllocationCount = kMVKUndefinedLargeUInt32;
_properties.limits.maxSamplerAllocationCount = getMaxSamplerCount();
_properties.limits.maxBoundDescriptorSets = kMVKMaxDescriptorSetCount;
_properties.limits.maxComputeWorkGroupCount[0] = kMVKUndefinedLargeUInt32;
_properties.limits.maxComputeWorkGroupCount[1] = kMVKUndefinedLargeUInt32;
_properties.limits.maxComputeWorkGroupCount[2] = kMVKUndefinedLargeUInt32;
_properties.limits.maxDrawIndexedIndexValue = numeric_limits<uint32_t>::max(); // Must be (2^32 - 1) to support fullDrawIndexUint32
_properties.limits.maxDrawIndirectCount = kMVKUndefinedLargeUInt32;
_properties.limits.maxClipDistances = kMVKUndefinedLargeUInt32;
_properties.limits.maxCullDistances = 0; // unsupported
_properties.limits.maxCombinedClipAndCullDistances = _properties.limits.maxClipDistances +
_properties.limits.maxCullDistances;
// Features with unknown limits - default to Vulkan required limits
_properties.limits.subPixelPrecisionBits = 4;
_properties.limits.subTexelPrecisionBits = 4;
_properties.limits.mipmapPrecisionBits = 4;
_properties.limits.viewportSubPixelBits = 0;
_properties.limits.discreteQueuePriorities = 2;
_properties.limits.minInterpolationOffset = -0.5;
_properties.limits.maxInterpolationOffset = 0.5;
_properties.limits.subPixelInterpolationOffsetBits = 4;
// Unsupported features - set to zeros generally
_properties.limits.sparseAddressSpaceSize = 0;
_properties.limits.maxGeometryShaderInvocations = 0;
_properties.limits.maxGeometryInputComponents = 0;
_properties.limits.maxGeometryOutputComponents = 0;
_properties.limits.maxGeometryOutputVertices = 0;
_properties.limits.maxGeometryTotalOutputComponents = 0;
}
#if MVK_MACOS
static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef propertyName) {
uint32_t value = 0;
CFTypeRef cfProp = IORegistryEntrySearchCFProperty(entry,
kIOServicePlane,