| /* |
| * MVKDevice.mm |
| * |
| * Copyright (c) 2015-2022 The Brenwill Workshop Ltd. (http://www.brenwill.com) |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "MVKInstance.h" |
| #include "MVKDevice.h" |
| #include "MVKQueue.h" |
| #include "MVKSurface.h" |
| #include "MVKBuffer.h" |
| #include "MVKImage.h" |
| #include "MVKSwapchain.h" |
| #include "MVKQueryPool.h" |
| #include "MVKShaderModule.h" |
| #include "MVKPipeline.h" |
| #include "MVKFramebuffer.h" |
| #include "MVKRenderPass.h" |
| #include "MVKSync.h" |
| #include "MVKCommandPool.h" |
| #include "MVKFoundation.h" |
| #include "MVKCodec.h" |
| #include "MVKEnvironment.h" |
| #include <MoltenVKShaderConverter/SPIRVToMSLConverter.h> |
| |
| #import "CAMetalLayer+MoltenVK.h" |
| |
| #include <cmath> |
| |
| using namespace std; |
| |
| |
| #if MVK_IOS_OR_TVOS |
| # include <UIKit/UIKit.h> |
| # define MVKViewClass UIView |
| #endif |
| #if MVK_MACOS |
| # include <AppKit/AppKit.h> |
| # define MVKViewClass NSView |
| #endif |
| |
| // Mac Catalyst does not support feature sets, so we redefine them to GPU families in MVKDevice.h. |
| #if MVK_MACCAT |
| #define supportsMTLFeatureSet(MFS) [_mtlDevice supportsFamily: MTLFeatureSet_ ##MFS] |
| #else |
| #define supportsMTLFeatureSet(MFS) [_mtlDevice supportsFeatureSet: MTLFeatureSet_ ##MFS] |
| #endif |
| |
| #define supportsMTLGPUFamily(GPUF) ([_mtlDevice respondsToSelector: @selector(supportsFamily:)] && [_mtlDevice supportsFamily: MTLGPUFamily ##GPUF]) |
| |
| // Suppress unused variable warnings to allow us to define these all in one place, |
| // but use them in platform-conditional code blocks. |
| #pragma clang diagnostic push |
| #pragma clang diagnostic ignored "-Wunused-variable" |
| |
| static const uint32_t kAMDVendorId = 0x1002; |
| static const uint32_t kAppleVendorId = 0x106b; |
| static const uint32_t kIntelVendorId = 0x8086; |
| static const uint32_t kNVVendorId = 0x10de; |
| |
| static const uint32_t kAMDRadeonRX5700DeviceId = 0x731f; |
| static const uint32_t kAMDRadeonRX5500DeviceId = 0x7340; |
| static const uint32_t kAMDRadeonRX6800DeviceId = 0x73bf; |
| static const uint32_t kAMDRadeonRX6700DeviceId = 0x73df; |
| |
| static const VkExtent2D kMetalSamplePositionGridSize = { 1, 1 }; |
| static const VkExtent2D kMetalSamplePositionGridSizeNotSupported = { 0, 0 }; |
| |
| #pragma clang diagnostic pop |
| |
| |
| #pragma mark - |
| #pragma mark MVKPhysicalDevice |
| |
| VkResult MVKPhysicalDevice::getExtensionProperties(const char* pLayerName, uint32_t* pCount, VkExtensionProperties* pProperties) { |
| return _supportedExtensions.getProperties(pCount, pProperties); |
| } |
| |
| void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures* features) { |
| *features = _features; |
| } |
| |
| void MVKPhysicalDevice::getFeatures(VkPhysicalDeviceFeatures2* features) { |
| features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; |
| features->features = _features; |
| for (auto* next = (VkBaseOutStructure*)features->pNext; next; next = next->pNext) { |
| switch ((uint32_t)next->sType) { |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { |
| auto* storageFeatures = (VkPhysicalDevice16BitStorageFeatures*)next; |
| storageFeatures->storageBuffer16BitAccess = true; |
| storageFeatures->uniformAndStorageBuffer16BitAccess = true; |
| storageFeatures->storagePushConstant16 = true; |
| storageFeatures->storageInputOutput16 = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: { |
| auto* storageFeatures = (VkPhysicalDevice8BitStorageFeaturesKHR*)next; |
| storageFeatures->storageBuffer8BitAccess = true; |
| storageFeatures->uniformAndStorageBuffer8BitAccess = true; |
| storageFeatures->storagePushConstant8 = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: { |
| auto* f16Features = (VkPhysicalDeviceFloat16Int8FeaturesKHR*)next; |
| f16Features->shaderFloat16 = true; |
| f16Features->shaderInt8 = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: { |
| auto* multiviewFeatures = (VkPhysicalDeviceMultiviewFeatures*)next; |
| multiviewFeatures->multiview = true; |
| multiviewFeatures->multiviewGeometryShader = false; |
| multiviewFeatures->multiviewTessellationShader = false; // FIXME |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { |
| auto* protectedMemFeatures = (VkPhysicalDeviceProtectedMemoryFeatures*)next; |
| protectedMemFeatures->protectedMemory = false; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { |
| auto* samplerYcbcrConvFeatures = (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)next; |
| samplerYcbcrConvFeatures->samplerYcbcrConversion = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: { |
| auto* shaderDrawParamsFeatures = (VkPhysicalDeviceShaderDrawParametersFeatures*)next; |
| shaderDrawParamsFeatures->shaderDrawParameters = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: { |
| auto* shaderSGTypesFeatures = (VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures*)next; |
| shaderSGTypesFeatures->shaderSubgroupExtendedTypes = _metalFeatures.simdPermute || _metalFeatures.quadPermute; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: { |
| auto* timelineSem4Features = (VkPhysicalDeviceTimelineSemaphoreFeatures*)next; |
| timelineSem4Features->timelineSemaphore = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: { |
| auto* uboLayoutFeatures = (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR*)next; |
| uboLayoutFeatures->uniformBufferStandardLayout = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES: { |
| auto* varPtrFeatures = (VkPhysicalDeviceVariablePointerFeatures*)next; |
| varPtrFeatures->variablePointersStorageBuffer = true; |
| varPtrFeatures->variablePointers = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: { |
| auto* pDescIdxFeatures = (VkPhysicalDeviceDescriptorIndexingFeaturesEXT*)next; |
| pDescIdxFeatures->shaderInputAttachmentArrayDynamicIndexing = _metalFeatures.arrayOfTextures; |
| pDescIdxFeatures->shaderUniformTexelBufferArrayDynamicIndexing = _metalFeatures.arrayOfTextures; |
| pDescIdxFeatures->shaderStorageTexelBufferArrayDynamicIndexing = _metalFeatures.arrayOfTextures; |
| pDescIdxFeatures->shaderUniformBufferArrayNonUniformIndexing = false; |
| pDescIdxFeatures->shaderSampledImageArrayNonUniformIndexing = _metalFeatures.arrayOfTextures && _metalFeatures.arrayOfSamplers; |
| pDescIdxFeatures->shaderStorageBufferArrayNonUniformIndexing = false; |
| pDescIdxFeatures->shaderStorageImageArrayNonUniformIndexing = _metalFeatures.arrayOfTextures; |
| pDescIdxFeatures->shaderInputAttachmentArrayNonUniformIndexing = _metalFeatures.arrayOfTextures; |
| pDescIdxFeatures->shaderUniformTexelBufferArrayNonUniformIndexing = _metalFeatures.arrayOfTextures; |
| pDescIdxFeatures->shaderStorageTexelBufferArrayNonUniformIndexing = _metalFeatures.arrayOfTextures; |
| pDescIdxFeatures->descriptorBindingUniformBufferUpdateAfterBind = true; |
| pDescIdxFeatures->descriptorBindingSampledImageUpdateAfterBind = true; |
| pDescIdxFeatures->descriptorBindingStorageImageUpdateAfterBind = true; |
| pDescIdxFeatures->descriptorBindingStorageBufferUpdateAfterBind = true; |
| pDescIdxFeatures->descriptorBindingUniformTexelBufferUpdateAfterBind = true; |
| pDescIdxFeatures->descriptorBindingStorageTexelBufferUpdateAfterBind = true; |
| pDescIdxFeatures->descriptorBindingUpdateUnusedWhilePending = true; |
| pDescIdxFeatures->descriptorBindingPartiallyBound = true; |
| pDescIdxFeatures->descriptorBindingVariableDescriptorCount = true; |
| pDescIdxFeatures->runtimeDescriptorArray = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: { |
| auto* interlockFeatures = (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT*)next; |
| interlockFeatures->fragmentShaderSampleInterlock = _metalFeatures.rasterOrderGroups; |
| interlockFeatures->fragmentShaderPixelInterlock = _metalFeatures.rasterOrderGroups; |
| interlockFeatures->fragmentShaderShadingRateInterlock = false; // Requires variable rate shading; not supported yet in Metal |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT: { |
| auto* hostQueryResetFeatures = (VkPhysicalDeviceHostQueryResetFeaturesEXT*)next; |
| hostQueryResetFeatures->hostQueryReset = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: { |
| auto *imageRobustnessFeatures = (VkPhysicalDeviceImageRobustnessFeaturesEXT*)next; |
| imageRobustnessFeatures->robustImageAccess = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: { |
| auto* privateDataFeatures = (VkPhysicalDevicePrivateDataFeaturesEXT*)next; |
| privateDataFeatures->privateData = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: { |
| auto* robustness2Features = (VkPhysicalDeviceRobustness2FeaturesEXT*)next; |
| robustness2Features->robustBufferAccess2 = false; |
| robustness2Features->robustImageAccess2 = true; |
| robustness2Features->nullDescriptor = false; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: { |
| auto* scalarLayoutFeatures = (VkPhysicalDeviceScalarBlockLayoutFeaturesEXT*)next; |
| scalarLayoutFeatures->scalarBlockLayout = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: { |
| auto* subgroupSizeFeatures = (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT*)next; |
| subgroupSizeFeatures->subgroupSizeControl = _metalFeatures.simdPermute || _metalFeatures.quadPermute; |
| subgroupSizeFeatures->computeFullSubgroups = _metalFeatures.simdPermute || _metalFeatures.quadPermute; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: { |
| auto* texelBuffAlignFeatures = (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT*)next; |
| texelBuffAlignFeatures->texelBufferAlignment = _metalFeatures.texelBuffers && [_mtlDevice respondsToSelector: @selector(minimumLinearTextureAlignmentForPixelFormat:)]; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXTURE_COMPRESSION_ASTC_HDR_FEATURES_EXT: { |
| auto* astcHDRFeatures = (VkPhysicalDeviceTextureCompressionASTCHDRFeaturesEXT*)next; |
| astcHDRFeatures->textureCompressionASTC_HDR = _metalFeatures.astcHDRTextures; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: { |
| auto* divisorFeatures = (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT*)next; |
| divisorFeatures->vertexAttributeInstanceRateDivisor = true; |
| divisorFeatures->vertexAttributeInstanceRateZeroDivisor = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_FEATURES_KHR: { |
| auto* portabilityFeatures = (VkPhysicalDevicePortabilitySubsetFeaturesKHR*)next; |
| portabilityFeatures->constantAlphaColorBlendFactors = true; |
| portabilityFeatures->events = true; |
| portabilityFeatures->imageViewFormatReinterpretation = true; |
| portabilityFeatures->imageViewFormatSwizzle = (_metalFeatures.nativeTextureSwizzle || |
| mvkConfig().fullImageViewSwizzle); |
| portabilityFeatures->imageView2DOn3DImage = false; |
| portabilityFeatures->multisampleArrayImage = _metalFeatures.multisampleArrayTextures; |
| portabilityFeatures->mutableComparisonSamplers = _metalFeatures.depthSampleCompare; |
| portabilityFeatures->pointPolygons = false; |
| portabilityFeatures->samplerMipLodBias = false; |
| portabilityFeatures->separateStencilMaskRef = true; |
| portabilityFeatures->shaderSampleRateInterpolationFunctions = _metalFeatures.pullModelInterpolation; |
| portabilityFeatures->tessellationIsolines = false; |
| portabilityFeatures->tessellationPointMode = false; |
| portabilityFeatures->triangleFans = false; |
| portabilityFeatures->vertexAttributeAccessBeyondStride = true; // Costs additional buffers. Should make configuration switch. |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL: { |
| auto* shaderIntFuncsFeatures = (VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL*)next; |
| shaderIntFuncsFeatures->shaderIntegerFunctions2 = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: { |
| auto* inlineUniformBlockFeatures = (VkPhysicalDeviceInlineUniformBlockFeaturesEXT*)next; |
| inlineUniformBlockFeatures->inlineUniformBlock = true; |
| inlineUniformBlockFeatures->descriptorBindingInlineUniformBlockUpdateAfterBind = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES: { |
| auto* imagelessFramebufferFeatures = (VkPhysicalDeviceImagelessFramebufferFeaturesKHR*)next; |
| imagelessFramebufferFeatures->imagelessFramebuffer = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES: { |
| auto* dynamicRenderingFeatures = (VkPhysicalDeviceDynamicRenderingFeatures*)next; |
| dynamicRenderingFeatures->dynamicRendering = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES: { |
| auto* separateDepthStencilLayoutsFeatures = (VkPhysicalDeviceSeparateDepthStencilLayoutsFeatures*)next; |
| separateDepthStencilLayoutsFeatures->separateDepthStencilLayouts = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_KHR: { |
| auto* barycentricProperties = (VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR*)next; |
| barycentricProperties->fragmentShaderBarycentric = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES: { |
| auto* bufferDeviceAddressFeatures = (VkPhysicalDeviceBufferDeviceAddressFeatures*)next; |
| bufferDeviceAddressFeatures->bufferDeviceAddress = true; |
| bufferDeviceAddressFeatures->bufferDeviceAddressCaptureReplay = false; |
| bufferDeviceAddressFeatures->bufferDeviceAddressMultiDevice = false; |
| } |
| default: |
| break; |
| } |
| } |
| } |
| |
| void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties* properties) { |
| *properties = _properties; |
| } |
| |
| void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) { |
| properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; |
| properties->properties = _properties; |
| for (auto* next = (VkBaseOutStructure*)properties->pNext; next; next = next->pNext) { |
| switch ((uint32_t)next->sType) { |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: { |
| auto* depthStencilResolveProps = (VkPhysicalDeviceDepthStencilResolveProperties*)next; |
| |
| // We can always support resolve from sample zero. Other modes require additional capabilities. |
| depthStencilResolveProps->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; |
| if (_metalFeatures.depthResolve) { |
| depthStencilResolveProps->supportedDepthResolveModes |= VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT; |
| } |
| // Metal allows you to set the stencil resolve filter to either |
| // Sample0 or DepthResolvedSample--in other words, you can always use sample 0, |
| // but you can also use the sample chosen for depth resolve. This is impossible |
| // to express in Vulkan. |
| depthStencilResolveProps->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; |
| depthStencilResolveProps->independentResolveNone = true; |
| depthStencilResolveProps->independentResolve = true; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: { |
| auto* physicalDeviceDriverProps = (VkPhysicalDeviceDriverPropertiesKHR*)next; |
| strcpy(physicalDeviceDriverProps->driverName, "MoltenVK"); |
| strcpy(physicalDeviceDriverProps->driverInfo, mvkGetMoltenVKVersionString(MVK_VERSION).c_str()); |
| physicalDeviceDriverProps->driverID = VK_DRIVER_ID_MOLTENVK; |
| physicalDeviceDriverProps->conformanceVersion.major = 0; |
| physicalDeviceDriverProps->conformanceVersion.minor = 0; |
| physicalDeviceDriverProps->conformanceVersion.subminor = 0; |
| physicalDeviceDriverProps->conformanceVersion.patch = 0; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { |
| populate((VkPhysicalDeviceIDProperties*)next); |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: { |
| auto* maint3Props = (VkPhysicalDeviceMaintenance3Properties*)next; |
| maint3Props->maxPerSetDescriptors = (_metalFeatures.maxPerStageBufferCount + _metalFeatures.maxPerStageTextureCount + _metalFeatures.maxPerStageSamplerCount) * 4; |
| maint3Props->maxMemoryAllocationSize = _metalFeatures.maxMTLBufferSize; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { |
| auto* multiviewProps = (VkPhysicalDeviceMultiviewProperties*)next; |
| multiviewProps->maxMultiviewViewCount = 32; |
| if (canUseInstancingForMultiview()) { |
| multiviewProps->maxMultiviewInstanceIndex = std::numeric_limits<uint32_t>::max() / 32; |
| } else { |
| multiviewProps->maxMultiviewInstanceIndex = std::numeric_limits<uint32_t>::max(); |
| } |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { |
| auto* pointClipProps = (VkPhysicalDevicePointClippingProperties*)next; |
| pointClipProps->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { |
| auto* protectedMemProps = (VkPhysicalDeviceProtectedMemoryProperties*)next; |
| protectedMemProps->protectedNoFault = false; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { |
| auto* pushDescProps = (VkPhysicalDevicePushDescriptorPropertiesKHR*)next; |
| pushDescProps->maxPushDescriptors = _properties.limits.maxPerStageResources; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: { |
| auto* subgroupProps = (VkPhysicalDeviceSubgroupProperties*)next; |
| subgroupProps->subgroupSize = _metalFeatures.maxSubgroupSize; |
| subgroupProps->supportedStages = VK_SHADER_STAGE_COMPUTE_BIT; |
| if (_features.tessellationShader) { |
| subgroupProps->supportedStages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; |
| } |
| if (mvkOSVersionIsAtLeast(10.15, 13.0)) { |
| subgroupProps->supportedStages |= VK_SHADER_STAGE_FRAGMENT_BIT; |
| } |
| subgroupProps->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT; |
| if (_metalFeatures.simdPermute || _metalFeatures.quadPermute) { |
| subgroupProps->supportedOperations |= VK_SUBGROUP_FEATURE_VOTE_BIT | |
| VK_SUBGROUP_FEATURE_BALLOT_BIT | |
| VK_SUBGROUP_FEATURE_SHUFFLE_BIT | |
| VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT; |
| } |
| if (_metalFeatures.simdReduction) { |
| subgroupProps->supportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT; |
| } |
| if (_metalFeatures.quadPermute) { |
| subgroupProps->supportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT; |
| } |
| subgroupProps->quadOperationsInAllStages = _metalFeatures.quadPermute; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: { |
| auto* timelineSem4Props = (VkPhysicalDeviceTimelineSemaphoreProperties*)next; |
| timelineSem4Props->maxTimelineSemaphoreValueDifference = std::numeric_limits<uint64_t>::max(); |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: { |
| bool isTier2 = isUsingMetalArgumentBuffers() && (_mtlDevice.argumentBuffersSupport >= MTLArgumentBuffersTier2); |
| uint32_t maxSampCnt = getMaxSamplerCount(); |
| |
| auto* pDescIdxProps = (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)next; |
| pDescIdxProps->maxUpdateAfterBindDescriptorsInAllPools = kMVKUndefinedLargeUInt32; |
| pDescIdxProps->shaderUniformBufferArrayNonUniformIndexingNative = false; |
| pDescIdxProps->shaderSampledImageArrayNonUniformIndexingNative = _metalFeatures.arrayOfTextures && _metalFeatures.arrayOfSamplers; |
| pDescIdxProps->shaderStorageBufferArrayNonUniformIndexingNative = false; |
| pDescIdxProps->shaderStorageImageArrayNonUniformIndexingNative = _metalFeatures.arrayOfTextures; |
| pDescIdxProps->shaderInputAttachmentArrayNonUniformIndexingNative = _metalFeatures.arrayOfTextures; |
| pDescIdxProps->robustBufferAccessUpdateAfterBind = _features.robustBufferAccess; |
| pDescIdxProps->quadDivergentImplicitLod = false; |
| pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSamplers = isTier2 ? maxSampCnt : _properties.limits.maxPerStageDescriptorSamplers; |
| pDescIdxProps->maxPerStageDescriptorUpdateAfterBindUniformBuffers = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorUniformBuffers; |
| pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageBuffers = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageBuffers; |
| pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSampledImages = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorSampledImages; |
| pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageImages = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageImages; |
| pDescIdxProps->maxPerStageDescriptorUpdateAfterBindInputAttachments = _properties.limits.maxPerStageDescriptorInputAttachments; |
| pDescIdxProps->maxPerStageUpdateAfterBindResources = isTier2 ? 500000 : _properties.limits.maxPerStageResources; |
| pDescIdxProps->maxDescriptorSetUpdateAfterBindSamplers = isTier2 ? maxSampCnt : _properties.limits.maxDescriptorSetSamplers; |
| pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffers = isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffers; |
| pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffersDynamic; |
| pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffers = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffers; |
| pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffersDynamic; |
| pDescIdxProps->maxDescriptorSetUpdateAfterBindSampledImages = isTier2 ? 500000 : _properties.limits.maxDescriptorSetSampledImages; |
| pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageImages = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageImages; |
| pDescIdxProps->maxDescriptorSetUpdateAfterBindInputAttachments = _properties.limits.maxDescriptorSetInputAttachments; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: { |
| auto* inlineUniformBlockProps = (VkPhysicalDeviceInlineUniformBlockPropertiesEXT*)next; |
| inlineUniformBlockProps->maxInlineUniformBlockSize = _metalFeatures.dynamicMTLBufferSize; |
| inlineUniformBlockProps->maxPerStageDescriptorInlineUniformBlocks = _metalFeatures.dynamicMTLBufferSize ? _metalFeatures.maxPerStageDynamicMTLBufferCount - 1 : 0; // Less one for push constants |
| inlineUniformBlockProps->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = inlineUniformBlockProps->maxPerStageDescriptorInlineUniformBlocks; |
| inlineUniformBlockProps->maxDescriptorSetInlineUniformBlocks = (inlineUniformBlockProps->maxPerStageDescriptorInlineUniformBlocks * 4); |
| inlineUniformBlockProps->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = (inlineUniformBlockProps->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks * 4); |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: { |
| auto* robustness2Props = (VkPhysicalDeviceRobustness2PropertiesEXT*)next; |
| // This isn't implemented yet, but when it is, I expect that we'll wind up |
| // doing it manually. |
| robustness2Props->robustStorageBufferAccessSizeAlignment = 1; |
| robustness2Props->robustUniformBufferAccessSizeAlignment = 1; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: { |
| auto* subgroupSizeProps = (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT*)next; |
| subgroupSizeProps->minSubgroupSize = _metalFeatures.minSubgroupSize; |
| subgroupSizeProps->maxSubgroupSize = _metalFeatures.maxSubgroupSize; |
| subgroupSizeProps->maxComputeWorkgroupSubgroups = _properties.limits.maxComputeWorkGroupInvocations / _metalFeatures.minSubgroupSize; |
| subgroupSizeProps->requiredSubgroupSizeStages = 0; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: { |
| auto* texelBuffAlignProps = (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT*)next; |
| // Save the 'next' pointer; we'll unintentionally overwrite it |
| // on the next line. Put it back when we're done. |
| void* savedNext = texelBuffAlignProps->pNext; |
| *texelBuffAlignProps = _texelBuffAlignProperties; |
| texelBuffAlignProps->pNext = savedNext; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { |
| auto* divisorProps = (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT*)next; |
| divisorProps->maxVertexAttribDivisor = kMVKUndefinedLargeUInt32; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_PROPERTIES_KHR: { |
| auto* portabilityProps = (VkPhysicalDevicePortabilitySubsetPropertiesKHR*)next; |
| portabilityProps->minVertexInputBindingStrideAlignment = (uint32_t)_metalFeatures.vertexStrideAlignment; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: { |
| auto* sampLocnProps = (VkPhysicalDeviceSampleLocationsPropertiesEXT*)next; |
| sampLocnProps->sampleLocationSampleCounts = _metalFeatures.supportedSampleCounts; |
| sampLocnProps->maxSampleLocationGridSize = kMetalSamplePositionGridSize; |
| sampLocnProps->sampleLocationCoordinateRange[0] = 0.0; |
| sampLocnProps->sampleLocationCoordinateRange[1] = (15.0 / 16.0); |
| sampLocnProps->sampleLocationSubPixelBits = 4; |
| sampLocnProps->variableSampleLocations = VK_FALSE; |
| break; |
| } |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_PROPERTIES_KHR: { |
| auto* barycentricProperties = (VkPhysicalDeviceFragmentShaderBarycentricPropertiesKHR*)next; |
| barycentricProperties->triStripVertexOrderIndependentOfProvokingVertex = false; |
| break; |
| } |
| default: |
| break; |
| } |
| } |
| } |
| |
| // Populates the device ID properties structure |
| void MVKPhysicalDevice::populate(VkPhysicalDeviceIDProperties* pDevIdProps) { |
| |
| uint8_t* uuid; |
| size_t uuidComponentOffset; |
| |
| // ---- Device ID ---------------------------------------------- |
| uuid = pDevIdProps->deviceUUID; |
| uuidComponentOffset = 0; |
| mvkClear(uuid, VK_UUID_SIZE); |
| |
| // First 4 bytes contains GPU vendor ID |
| uint32_t vendorID = _properties.vendorID; |
| *(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(vendorID); |
| uuidComponentOffset += sizeof(vendorID); |
| |
| // Next 4 bytes contains GPU device ID |
| uint32_t deviceID = _properties.deviceID; |
| *(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(deviceID); |
| uuidComponentOffset += sizeof(deviceID); |
| |
| // Last 8 bytes contain the GPU registry ID |
| uint64_t regID = mvkGetRegistryID(_mtlDevice); |
| *(uint64_t*)&uuid[uuidComponentOffset] = NSSwapHostLongLongToBig(regID); |
| uuidComponentOffset += sizeof(regID); |
| |
| |
| // ---- Driver ID ---------------------------------------------- |
| uuid = pDevIdProps->driverUUID; |
| uuidComponentOffset = 0; |
| mvkClear(uuid, VK_UUID_SIZE); |
| |
| // First 4 bytes contains MoltenVK prefix |
| const char* mvkPfx = "MVK"; |
| size_t mvkPfxLen = strlen(mvkPfx); |
| mvkCopy(&uuid[uuidComponentOffset], (uint8_t*)mvkPfx, mvkPfxLen); |
| uuidComponentOffset += mvkPfxLen + 1; |
| |
| // Next 4 bytes contains MoltenVK version |
| uint32_t mvkVersion = MVK_VERSION; |
| *(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(mvkVersion); |
| uuidComponentOffset += sizeof(mvkVersion); |
| |
| // Next 4 bytes contains highest GPU capability supported by this device |
| uint32_t gpuCap = getHighestGPUCapability(); |
| *(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(gpuCap); |
| uuidComponentOffset += sizeof(gpuCap); |
| |
| // ---- LUID ignored for Metal devices ------------------------ |
| mvkClear(pDevIdProps->deviceLUID, VK_LUID_SIZE); |
| pDevIdProps->deviceNodeMask = 0; |
| pDevIdProps->deviceLUIDValid = VK_FALSE; |
| } |
| |
| void MVKPhysicalDevice::getFormatProperties(VkFormat format, VkFormatProperties* pFormatProperties) { |
| *pFormatProperties = _pixelFormats.getVkFormatProperties(format); |
| } |
| |
| void MVKPhysicalDevice::getFormatProperties(VkFormat format, VkFormatProperties2KHR* pFormatProperties) { |
| pFormatProperties->sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2_KHR; |
| getFormatProperties(format, &pFormatProperties->formatProperties); |
| } |
| |
| void MVKPhysicalDevice::getMultisampleProperties(VkSampleCountFlagBits samples, |
| VkMultisamplePropertiesEXT* pMultisampleProperties) { |
| if (pMultisampleProperties) { |
| pMultisampleProperties->maxSampleLocationGridSize = (mvkIsOnlyAnyFlagEnabled(samples, _metalFeatures.supportedSampleCounts) |
| ? kMetalSamplePositionGridSize |
| : kMetalSamplePositionGridSizeNotSupported); |
| } |
| } |
| |
| VkResult MVKPhysicalDevice::getImageFormatProperties(VkFormat format, |
| VkImageType type, |
| VkImageTiling tiling, |
| VkImageUsageFlags usage, |
| VkImageCreateFlags flags, |
| VkImageFormatProperties* pImageFormatProperties) { |
| |
| if ( !_pixelFormats.isSupported(format) ) { return VK_ERROR_FORMAT_NOT_SUPPORTED; } |
| |
| if ( !pImageFormatProperties ) { return VK_SUCCESS; } |
| |
| mvkClear(pImageFormatProperties); |
| |
| // Metal does not support creating uncompressed views of compressed formats. |
| // Metal does not support split-instance images. |
| if (mvkIsAnyFlagEnabled(flags, VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT | VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT)) { |
| return VK_ERROR_FORMAT_NOT_SUPPORTED; |
| } |
| |
| MVKFormatType mvkFmt = _pixelFormats.getFormatType(format); |
| bool isChromaSubsampled = _pixelFormats.getChromaSubsamplingPlaneCount(format) > 0; |
| bool isMultiPlanar = _pixelFormats.getChromaSubsamplingPlaneCount(format) > 1; |
| bool isBGRG = isChromaSubsampled && !isMultiPlanar && _pixelFormats.getBlockTexelSize(format).width > 1; |
| bool hasAttachmentUsage = mvkIsAnyFlagEnabled(usage, (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | |
| VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | |
| VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT | |
| VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)); |
| |
| // Disjoint memory requires a multiplanar format. |
| if (!isMultiPlanar && mvkIsAnyFlagEnabled(flags, VK_IMAGE_CREATE_DISJOINT_BIT)) { |
| return VK_ERROR_FORMAT_NOT_SUPPORTED; |
| } |
| |
| VkPhysicalDeviceLimits* pLimits = &_properties.limits; |
| VkExtent3D maxExt = { 1, 1, 1}; |
| uint32_t maxLevels = 1; |
| uint32_t maxLayers = hasAttachmentUsage ? pLimits->maxFramebufferLayers : pLimits->maxImageArrayLayers; |
| |
| bool supportsMSAA = mvkAreAllFlagsEnabled(_pixelFormats.getCapabilities(format), kMVKMTLFmtCapsMSAA); |
| VkSampleCountFlags sampleCounts = supportsMSAA ? _metalFeatures.supportedSampleCounts : VK_SAMPLE_COUNT_1_BIT; |
| |
| switch (type) { |
| case VK_IMAGE_TYPE_1D: |
| maxExt.height = 1; |
| maxExt.depth = 1; |
| if (!mvkConfig().texture1DAs2D) { |
| maxExt.width = pLimits->maxImageDimension1D; |
| maxLevels = 1; |
| sampleCounts = VK_SAMPLE_COUNT_1_BIT; |
| |
| // Metal does not allow native 1D textures to be used as attachments |
| if (hasAttachmentUsage ) { return VK_ERROR_FORMAT_NOT_SUPPORTED; } |
| |
| // Metal does not allow linear tiling on native 1D textures |
| if (tiling == VK_IMAGE_TILING_LINEAR) { return VK_ERROR_FORMAT_NOT_SUPPORTED; } |
| |
| // Metal does not allow compressed or depth/stencil formats on native 1D textures |
| if (mvkFmt == kMVKFormatDepthStencil) { return VK_ERROR_FORMAT_NOT_SUPPORTED; } |
| if (mvkFmt == kMVKFormatCompressed) { return VK_ERROR_FORMAT_NOT_SUPPORTED; } |
| if (isChromaSubsampled) { return VK_ERROR_FORMAT_NOT_SUPPORTED; } |
| break; |
| } |
| |
| // A 420 1D image doesn't make much sense. |
| if (isChromaSubsampled && _pixelFormats.getBlockTexelSize(format).height > 1) { |
| return VK_ERROR_FORMAT_NOT_SUPPORTED; |
| } |
| // Vulkan doesn't allow 1D multisampled images. |
| sampleCounts = VK_SAMPLE_COUNT_1_BIT; |
| /* fallthrough */ |
| case VK_IMAGE_TYPE_2D: |
| if (mvkIsAnyFlagEnabled(flags, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) ) { |
| // Chroma-subsampled cube images aren't supported. |
| if (isChromaSubsampled) { return VK_ERROR_FORMAT_NOT_SUPPORTED; } |
| // 1D cube images aren't supported. |
| if (type == VK_IMAGE_TYPE_1D) { return VK_ERROR_FORMAT_NOT_SUPPORTED; } |
| maxExt.width = pLimits->maxImageDimensionCube; |
| maxExt.height = pLimits->maxImageDimensionCube; |
| } else { |
| maxExt.width = pLimits->maxImageDimension2D; |
| maxExt.height = (type == VK_IMAGE_TYPE_1D ? 1 : pLimits->maxImageDimension2D); |
| } |
| maxExt.depth = 1; |
| if (tiling == VK_IMAGE_TILING_LINEAR) { |
| // Linear textures have additional restrictions under Metal: |
| // - They may not be depth/stencil, compressed, or chroma subsampled textures. |
| // We allow multi-planar formats because those internally use non-subsampled formats. |
| if (mvkFmt == kMVKFormatDepthStencil || mvkFmt == kMVKFormatCompressed || isBGRG) { |
| return VK_ERROR_FORMAT_NOT_SUPPORTED; |
| } |
| #if !MVK_APPLE_SILICON |
| // - On macOS IMR GPUs, Linear textures may not be used as framebuffer attachments. |
| if (hasAttachmentUsage) { return VK_ERROR_FORMAT_NOT_SUPPORTED; } |
| #endif |
| // Linear textures may only have one mip level, layer & sample. |
| maxLevels = 1; |
| maxLayers = 1; |
| sampleCounts = VK_SAMPLE_COUNT_1_BIT; |
| } else { |
| VkFormatProperties fmtProps; |
| getFormatProperties(format, &fmtProps); |
| // Compressed multisampled textures aren't supported. |
| // Chroma-subsampled multisampled textures aren't supported. |
| // Multisampled cube textures aren't supported. |
| // Non-renderable multisampled textures aren't supported. |
| if (mvkFmt == kMVKFormatCompressed || isChromaSubsampled || |
| mvkIsAnyFlagEnabled(flags, VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) || |
| !mvkIsAnyFlagEnabled(fmtProps.optimalTilingFeatures, VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT|VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) ) { |
| sampleCounts = VK_SAMPLE_COUNT_1_BIT; |
| } |
| // BGRG and GBGR images may only have one mip level and one layer. |
| // Other chroma subsampled formats may have multiple mip levels, but still only one layer. |
| if (isChromaSubsampled) { |
| maxLevels = isBGRG ? 1 : mvkMipmapLevels3D(maxExt); |
| maxLayers = 1; |
| } else { |
| maxLevels = mvkMipmapLevels3D(maxExt); |
| } |
| } |
| break; |
| |
| case VK_IMAGE_TYPE_3D: |
| // Metal does not allow linear tiling on 3D textures |
| if (tiling == VK_IMAGE_TILING_LINEAR) { |
| return VK_ERROR_FORMAT_NOT_SUPPORTED; |
| } |
| // Metal does not allow compressed or depth/stencil formats on 3D textures |
| if (mvkFmt == kMVKFormatDepthStencil || |
| isChromaSubsampled |
| #if MVK_IOS_OR_TVOS |
| || (mvkFmt == kMVKFormatCompressed && !_metalFeatures.native3DCompressedTextures) |
| #endif |
| ) { |
| return VK_ERROR_FORMAT_NOT_SUPPORTED; |
| } |
| #if MVK_MACOS |
| // If this is a compressed format and there's no codec, it isn't supported. |
| if ((mvkFmt == kMVKFormatCompressed) && !mvkCanDecodeFormat(format) && !_metalFeatures.native3DCompressedTextures) { |
| return VK_ERROR_FORMAT_NOT_SUPPORTED; |
| } |
| #endif |
| #if MVK_APPLE_SILICON |
| // ETC2 and EAC formats aren't supported for 3D textures. |
| switch (format) { |
| case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: |
| case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: |
| case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: |
| case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: |
| case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: |
| case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: |
| case VK_FORMAT_EAC_R11_UNORM_BLOCK: |
| case VK_FORMAT_EAC_R11_SNORM_BLOCK: |
| case VK_FORMAT_EAC_R11G11_UNORM_BLOCK: |
| case VK_FORMAT_EAC_R11G11_SNORM_BLOCK: |
| return VK_ERROR_FORMAT_NOT_SUPPORTED; |
| default: |
| break; |
| } |
| #endif |
| maxExt.width = pLimits->maxImageDimension3D; |
| maxExt.height = pLimits->maxImageDimension3D; |
| maxExt.depth = pLimits->maxImageDimension3D; |
| maxLevels = mvkMipmapLevels3D(maxExt); |
| maxLayers = 1; |
| sampleCounts = VK_SAMPLE_COUNT_1_BIT; |
| break; |
| |
| default: |
| return VK_ERROR_FORMAT_NOT_SUPPORTED; // Illegal VkImageType |
| } |
| |
| pImageFormatProperties->maxExtent = maxExt; |
| pImageFormatProperties->maxMipLevels = maxLevels; |
| pImageFormatProperties->maxArrayLayers = maxLayers; |
| pImageFormatProperties->sampleCounts = sampleCounts; |
| pImageFormatProperties->maxResourceSize = kMVKUndefinedLargeUInt64; |
| |
| return VK_SUCCESS; |
| } |
| |
| VkResult MVKPhysicalDevice::getImageFormatProperties(const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo, |
| VkImageFormatProperties2* pImageFormatProperties) { |
| |
| auto usage = pImageFormatInfo->usage; |
| for (const auto* nextInfo = (VkBaseInStructure*)pImageFormatInfo->pNext; nextInfo; nextInfo = nextInfo->pNext) { |
| switch (nextInfo->sType) { |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: { |
| // Return information about external memory support for MTLTexture. |
| // Search VkImageFormatProperties2 for the corresponding VkExternalImageFormatProperties and populate it. |
| auto* pExtImgFmtInfo = (VkPhysicalDeviceExternalImageFormatInfo*)nextInfo; |
| for (auto* nextProps = (VkBaseOutStructure*)pImageFormatProperties->pNext; nextProps; nextProps = nextProps->pNext) { |
| if (nextProps->sType == VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES) { |
| auto* pExtImgFmtProps = (VkExternalImageFormatProperties*)nextProps; |
| pExtImgFmtProps->externalMemoryProperties = getExternalImageProperties(pExtImgFmtInfo->handleType); |
| } |
| } |
| break; |
| } |
| case VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO: { |
| // If the format includes a stencil component, combine any separate stencil usage with non-stencil usage. |
| if (_pixelFormats.isStencilFormat(_pixelFormats.getMTLPixelFormat(pImageFormatInfo->format))) { |
| usage |= ((VkImageStencilUsageCreateInfo*)nextInfo)->stencilUsage; |
| } |
| break; |
| } |
| default: |
| break; |
| } |
| } |
| |
| for (const auto* nextProps = (VkBaseInStructure*)pImageFormatProperties->pNext; nextProps; nextProps = nextProps->pNext) { |
| switch (nextProps->sType) { |
| case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES: { |
| auto* samplerYcbcrConvProps = (VkSamplerYcbcrConversionImageFormatProperties*)nextProps; |
| samplerYcbcrConvProps->combinedImageSamplerDescriptorCount = std::max(_pixelFormats.getChromaSubsamplingPlaneCount(pImageFormatInfo->format), (uint8_t)1u); |
| break; |
| } |
| default: |
| break; |
| } |
| } |
| |
| if ( !_pixelFormats.isSupported(pImageFormatInfo->format) ) { return VK_ERROR_FORMAT_NOT_SUPPORTED; } |
| |
| return getImageFormatProperties(pImageFormatInfo->format, pImageFormatInfo->type, |
| pImageFormatInfo->tiling, usage, |
| pImageFormatInfo->flags, |
| &pImageFormatProperties->imageFormatProperties); |
| } |
| |
| void MVKPhysicalDevice::getExternalBufferProperties(const VkPhysicalDeviceExternalBufferInfo* pExternalBufferInfo, |
| VkExternalBufferProperties* pExternalBufferProperties) { |
| pExternalBufferProperties->externalMemoryProperties = getExternalBufferProperties(pExternalBufferInfo->handleType); |
| } |
| |
| static VkExternalMemoryProperties _emptyExtMemProps = {}; |
| |
| VkExternalMemoryProperties& MVKPhysicalDevice::getExternalBufferProperties(VkExternalMemoryHandleTypeFlagBits handleType) { |
| switch (handleType) { |
| case VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_KHR: return _mtlBufferExternalMemoryProperties; |
| default: return _emptyExtMemProps; |
| } |
| } |
| |
| VkExternalMemoryProperties& MVKPhysicalDevice::getExternalImageProperties(VkExternalMemoryHandleTypeFlagBits handleType) { |
| switch (handleType) { |
| case VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_KHR: return _mtlTextureExternalMemoryProperties; |
| default: return _emptyExtMemProps; |
| } |
| } |
| |
| static const VkExternalFenceProperties _emptyExtFenceProps = {VK_STRUCTURE_TYPE_EXTERNAL_FENCE_PROPERTIES, nullptr, 0, 0, 0}; |
| |
| void MVKPhysicalDevice::getExternalFenceProperties(const VkPhysicalDeviceExternalFenceInfo* pExternalFenceInfo, |
| VkExternalFenceProperties* pExternalFenceProperties) { |
| void* next = pExternalFenceProperties->pNext; |
| *pExternalFenceProperties = _emptyExtFenceProps; |
| pExternalFenceProperties->pNext = next; |
| } |
| |
| static const VkExternalSemaphoreProperties _emptyExtSemProps = {VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES, nullptr, 0, 0, 0}; |
| |
| void MVKPhysicalDevice::getExternalSemaphoreProperties(const VkPhysicalDeviceExternalSemaphoreInfo* pExternalSemaphoreInfo, |
| VkExternalSemaphoreProperties* pExternalSemaphoreProperties) { |
| void* next = pExternalSemaphoreProperties->pNext; |
| *pExternalSemaphoreProperties = _emptyExtSemProps; |
| pExternalSemaphoreProperties->pNext = next; |
| } |
| |
| |
| #pragma mark Surfaces |
| |
| VkResult MVKPhysicalDevice::getSurfaceSupport(uint32_t queueFamilyIndex, |
| MVKSurface* surface, |
| VkBool32* pSupported) { |
| // Check whether this is a headless device |
| bool isHeadless = false; |
| #if MVK_MACOS |
| isHeadless = getMTLDevice().isHeadless; |
| #endif |
| |
| // If this device is headless or the surface does not have a CAMetalLayer, it is not supported. |
| *pSupported = !(isHeadless || (surface->getCAMetalLayer() == nil)); |
| return *pSupported ? VK_SUCCESS : surface->getConfigurationResult(); |
| } |
| |
| VkResult MVKPhysicalDevice::getSurfaceCapabilities(MVKSurface* surface, |
| VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) { |
| |
| // The layer underlying the surface view must be a CAMetalLayer. |
| CAMetalLayer* mtlLayer = surface->getCAMetalLayer(); |
| if ( !mtlLayer ) { return surface->getConfigurationResult(); } |
| |
| VkExtent2D surfExtnt = mvkVkExtent2DFromCGSize(mtlLayer.naturalDrawableSizeMVK); |
| |
| pSurfaceCapabilities->minImageCount = _metalFeatures.minSwapchainImageCount; |
| pSurfaceCapabilities->maxImageCount = _metalFeatures.maxSwapchainImageCount; |
| |
| pSurfaceCapabilities->currentExtent = surfExtnt; |
| pSurfaceCapabilities->minImageExtent = { 1, 1 }; |
| pSurfaceCapabilities->maxImageExtent = { _properties.limits.maxImageDimension2D, _properties.limits.maxImageDimension2D }; |
| pSurfaceCapabilities->maxImageArrayLayers = 1; |
| pSurfaceCapabilities->supportedTransforms = (VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR); |
| pSurfaceCapabilities->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; |
| pSurfaceCapabilities->supportedCompositeAlpha = (VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR | |
| VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR | |
| VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR); |
| pSurfaceCapabilities->supportedUsageFlags = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | |
| VK_IMAGE_USAGE_STORAGE_BIT | |
| VK_IMAGE_USAGE_TRANSFER_SRC_BIT | |
| VK_IMAGE_USAGE_TRANSFER_DST_BIT | |
| VK_IMAGE_USAGE_SAMPLED_BIT); |
| return VK_SUCCESS; |
| } |
| |
| VkResult MVKPhysicalDevice::getSurfaceFormats(MVKSurface* surface, |
| uint32_t* pCount, |
| VkSurfaceFormatKHR* pSurfaceFormats) { |
| |
| // The layer underlying the surface view must be a CAMetalLayer. |
| CAMetalLayer* mtlLayer = surface->getCAMetalLayer(); |
| if ( !mtlLayer ) { return surface->getConfigurationResult(); } |
| |
| #define addSurfFmt(MTL_FMT) \ |
| do { \ |
| if (_pixelFormats.isSupported(MTLPixelFormat ##MTL_FMT)) { \ |
| VkFormat vkFmt = _pixelFormats.getVkFormat(MTLPixelFormat ##MTL_FMT); \ |
| if (vkFmt) { vkFormats.push_back(vkFmt); } \ |
| } \ |
| } while(false) |
| |
| MVKSmallVector<VkFormat, 16> vkFormats; |
| addSurfFmt(BGRA8Unorm); |
| addSurfFmt(BGRA8Unorm_sRGB); |
| addSurfFmt(RGBA16Float); |
| #if MVK_MACOS |
| addSurfFmt(RGB10A2Unorm); |
| addSurfFmt(BGR10A2Unorm); |
| #endif |
| #if MVK_APPLE_SILICON |
| addSurfFmt(BGRA10_XR); |
| addSurfFmt(BGRA10_XR_sRGB); |
| addSurfFmt(BGR10_XR); |
| addSurfFmt(BGR10_XR_sRGB); |
| #endif |
| |
| MVKSmallVector<VkColorSpaceKHR, 16> colorSpaces; |
| colorSpaces.push_back(VK_COLOR_SPACE_SRGB_NONLINEAR_KHR); |
| #if MVK_MACOS |
| // 10.11 supports some but not all of the color spaces specified by VK_EXT_swapchain_colorspace. |
| colorSpaces.push_back(VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_BT709_NONLINEAR_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_PASS_THROUGH_EXT); |
| if (mvkOSVersionIsAtLeast(10.12)) { |
| colorSpaces.push_back(VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT); |
| } |
| if (mvkOSVersionIsAtLeast(10.14)) { |
| colorSpaces.push_back(VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_BT2020_LINEAR_EXT); |
| } |
| #if MVK_XCODE_12 |
| if (mvkOSVersionIsAtLeast(11.0)) { |
| colorSpaces.push_back(VK_COLOR_SPACE_HDR10_HLG_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_HDR10_ST2084_EXT); |
| } |
| #endif |
| #endif |
| #if MVK_IOS_OR_TVOS |
| // iOS 8 doesn't support anything but sRGB. |
| if (mvkOSVersionIsAtLeast(9.0)) { |
| colorSpaces.push_back(VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_BT709_NONLINEAR_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_PASS_THROUGH_EXT); |
| } |
| if (mvkOSVersionIsAtLeast(10.0)) { |
| colorSpaces.push_back(VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT); |
| } |
| if (mvkOSVersionIsAtLeast(12.3)) { |
| colorSpaces.push_back(VK_COLOR_SPACE_DCI_P3_LINEAR_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_BT2020_LINEAR_EXT); |
| } |
| #if MVK_XCODE_12 |
| if (mvkOSVersionIsAtLeast(14.0)) { |
| colorSpaces.push_back(VK_COLOR_SPACE_HDR10_HLG_EXT); |
| colorSpaces.push_back(VK_COLOR_SPACE_HDR10_ST2084_EXT); |
| } |
| #endif |
| #endif |
| |
| size_t vkFmtsCnt = vkFormats.size(); |
| size_t vkColSpcFmtsCnt = vkFmtsCnt * colorSpaces.size(); |
| |
| // If properties aren't actually being requested yet, simply update the returned count |
| if ( !pSurfaceFormats ) { |
| *pCount = (uint32_t)vkColSpcFmtsCnt; |
| return VK_SUCCESS; |
| } |
| |
| // Determine how many results we'll return, and return that number |
| VkResult result = (*pCount >= vkColSpcFmtsCnt) ? VK_SUCCESS : VK_INCOMPLETE; |
| *pCount = min(*pCount, (uint32_t)vkColSpcFmtsCnt); |
| |
| // Now populate the supplied array |
| for (uint csIdx = 0, idx = 0; idx < *pCount && csIdx < colorSpaces.size(); csIdx++) { |
| for (uint fmtIdx = 0; idx < *pCount && fmtIdx < vkFmtsCnt; fmtIdx++, idx++) { |
| pSurfaceFormats[idx].format = vkFormats[fmtIdx]; |
| pSurfaceFormats[idx].colorSpace = colorSpaces[csIdx]; |
| } |
| } |
| |
| return result; |
| } |
| |
| VkResult MVKPhysicalDevice::getSurfaceFormats(MVKSurface* surface, |
| uint32_t* pCount, |
| VkSurfaceFormat2KHR* pSurfaceFormats) { |
| VkResult rslt; |
| if (pSurfaceFormats) { |
| // Populate temp array of VkSurfaceFormatKHR then copy into array of VkSurfaceFormat2KHR. |
| // The value of *pCount may be reduced during call, but will always be <= size of temp array. |
| VkSurfaceFormatKHR surfFmts[*pCount]; |
| rslt = getSurfaceFormats(surface, pCount, surfFmts); |
| for (uint32_t fmtIdx = 0; fmtIdx < *pCount; fmtIdx++) { |
| auto pSF = &pSurfaceFormats[fmtIdx]; |
| pSF->sType = VK_STRUCTURE_TYPE_SURFACE_FORMAT_2_KHR; |
| pSF->pNext = nullptr; |
| pSF->surfaceFormat = surfFmts[fmtIdx]; |
| } |
| } else { |
| rslt = getSurfaceFormats(surface, pCount, (VkSurfaceFormatKHR*)nullptr); |
| } |
| return rslt; |
| } |
| |
| VkResult MVKPhysicalDevice::getSurfacePresentModes(MVKSurface* surface, |
| uint32_t* pCount, |
| VkPresentModeKHR* pPresentModes) { |
| |
| // The layer underlying the surface view must be a CAMetalLayer. |
| CAMetalLayer* mtlLayer = surface->getCAMetalLayer(); |
| if ( !mtlLayer ) { return surface->getConfigurationResult(); } |
| |
| #define ADD_VK_PRESENT_MODE(VK_PM) \ |
| do { \ |
| if (pPresentModes && presentModesCnt < *pCount) { pPresentModes[presentModesCnt] = VK_PM; } \ |
| presentModesCnt++; \ |
| } while(false) |
| |
| uint32_t presentModesCnt = 0; |
| |
| ADD_VK_PRESENT_MODE(VK_PRESENT_MODE_FIFO_KHR); |
| |
| if (_metalFeatures.presentModeImmediate) { |
| ADD_VK_PRESENT_MODE(VK_PRESENT_MODE_IMMEDIATE_KHR); |
| } |
| |
| if (pPresentModes && *pCount < presentModesCnt) { |
| return VK_INCOMPLETE; |
| } |
| |
| *pCount = presentModesCnt; |
| return VK_SUCCESS; |
| } |
| |
| VkResult MVKPhysicalDevice::getPresentRectangles(MVKSurface* surface, |
| uint32_t* pRectCount, |
| VkRect2D* pRects) { |
| |
| // The layer underlying the surface view must be a CAMetalLayer. |
| CAMetalLayer* mtlLayer = surface->getCAMetalLayer(); |
| if ( !mtlLayer ) { return surface->getConfigurationResult(); } |
| |
| if ( !pRects ) { |
| *pRectCount = 1; |
| return VK_SUCCESS; |
| } |
| |
| if (*pRectCount == 0) { return VK_INCOMPLETE; } |
| |
| *pRectCount = 1; |
| |
| pRects[0].offset = { 0, 0 }; |
| pRects[0].extent = mvkVkExtent2DFromCGSize(mtlLayer.naturalDrawableSizeMVK); |
| |
| return VK_SUCCESS; |
| } |
| |
| |
| #pragma mark Queues |
| |
| // Returns the queue families supported by this instance, lazily creating them if necessary. |
| // Metal does not distinguish functionality between queues, which would normally lead us |
| // to create only only one general-purpose queue family. However, Vulkan associates command |
| // buffers with a queue family, whereas Metal associates command buffers with a Metal queue. |
| // In order to allow a Metal command buffer to be prefilled before it is formally submitted to |
| // a Vulkan queue, we need to enforce that each Vulkan queue family can have only one Metal queue. |
| // In order to provide parallel queue operations, we therefore provide multiple queue families. |
| // In addition, Metal queues are always general purpose, so the default behaviour is for all |
| // queue families to support graphics + compute + transfer, unless the app indicates it |
| // requires queue family specialization. |
| MVKArrayRef<MVKQueueFamily*> MVKPhysicalDevice::getQueueFamilies() { |
| if (_queueFamilies.empty()) { |
| VkQueueFamilyProperties qfProps; |
| bool specialize = mvkConfig().specializedQueueFamilies; |
| uint32_t qfIdx = 0; |
| |
| qfProps.queueCount = kMVKQueueCountPerQueueFamily; |
| qfProps.timestampValidBits = 64; |
| qfProps.minImageTransferGranularity = { 1, 1, 1}; |
| |
| // General-purpose queue family |
| qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT); |
| _queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps)); |
| |
| // Dedicated graphics queue family...or another general-purpose queue family. |
| if (specialize) { qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT); } |
| _queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps)); |
| |
| // Dedicated compute queue family...or another general-purpose queue family. |
| if (specialize) { qfProps.queueFlags = (VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT); } |
| _queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps)); |
| |
| // Dedicated transfer queue family...or another general-purpose queue family. |
| if (specialize) { qfProps.queueFlags = VK_QUEUE_TRANSFER_BIT; } |
| _queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps)); |
| |
| MVKAssert(kMVKQueueFamilyCount >= _queueFamilies.size(), "Adjust value of kMVKQueueFamilyCount."); |
| } |
| return _queueFamilies.contents(); |
| } |
| |
| VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount, |
| VkQueueFamilyProperties* pQueueFamilyProperties) { |
| auto qFams = getQueueFamilies(); |
| uint32_t qfCnt = uint32_t(qFams.size); |
| |
| // If properties aren't actually being requested yet, simply update the returned count |
| if ( !pQueueFamilyProperties ) { |
| *pCount = qfCnt; |
| return VK_SUCCESS; |
| } |
| |
| // Determine how many families we'll return, and return that number |
| VkResult rslt = (*pCount >= qfCnt) ? VK_SUCCESS : VK_INCOMPLETE; |
| *pCount = min(*pCount, qfCnt); |
| |
| // Now populate the queue families |
| if (pQueueFamilyProperties) { |
| for (uint32_t qfIdx = 0; qfIdx < *pCount; qfIdx++) { |
| qFams[qfIdx]->getProperties(&pQueueFamilyProperties[qfIdx]); |
| } |
| } |
| |
| return rslt; |
| } |
| |
| VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount, |
| VkQueueFamilyProperties2KHR* pQueueFamilyProperties) { |
| VkResult rslt; |
| if (pQueueFamilyProperties) { |
| // Populate temp array of VkQueueFamilyProperties then copy into array of VkQueueFamilyProperties2KHR. |
| // The value of *pCount may be reduced during call, but will always be <= size of temp array. |
| VkQueueFamilyProperties qProps[*pCount]; |
| rslt = getQueueFamilyProperties(pCount, qProps); |
| for (uint32_t qpIdx = 0; qpIdx < *pCount; qpIdx++) { |
| auto pQP = &pQueueFamilyProperties[qpIdx]; |
| pQP->sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2_KHR; |
| pQP->pNext = nullptr; |
| pQP->queueFamilyProperties = qProps[qpIdx]; |
| } |
| } else { |
| rslt = getQueueFamilyProperties(pCount, (VkQueueFamilyProperties*)nullptr); |
| } |
| return rslt; |
| } |
| |
| // Don't need to do this for Apple GPUs, where the GPU and CPU timestamps |
| // are the same, or if we're not using GPU timestamp counters. |
| void MVKPhysicalDevice::startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart) { |
| if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; } |
| [_mtlDevice sampleTimestamps: &cpuStart gpuTimestamp: &gpuStart]; |
| } |
| |
| // Don't need to do this for Apple GPUs, where the GPU and CPU timestamps |
| // are the same, or if we're not using GPU timestamp counters. |
| void MVKPhysicalDevice::updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart) { |
| if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; } |
| |
| MTLTimestamp cpuEnd; |
| MTLTimestamp gpuEnd; |
| [_mtlDevice sampleTimestamps: &cpuEnd gpuTimestamp: &gpuEnd]; |
| |
| _properties.limits.timestampPeriod = (double)(cpuEnd - cpuStart) / (double)(gpuEnd - gpuStart); |
| } |
| |
| |
| #pragma mark Memory models |
| |
| /** Populates the specified memory properties with the memory characteristics of this device. */ |
| VkResult MVKPhysicalDevice::getMemoryProperties(VkPhysicalDeviceMemoryProperties* pMemoryProperties) { |
| *pMemoryProperties = _memoryProperties; |
| return VK_SUCCESS; |
| } |
| |
| VkResult MVKPhysicalDevice::getMemoryProperties(VkPhysicalDeviceMemoryProperties2* pMemoryProperties) { |
| pMemoryProperties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2; |
| pMemoryProperties->memoryProperties = _memoryProperties; |
| for (auto* next = (VkBaseOutStructure*)pMemoryProperties->pNext; next; next = next->pNext) { |
| switch (next->sType) { |
| case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: { |
| auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next; |
| mvkClear(budgetProps->heapBudget, VK_MAX_MEMORY_HEAPS); |
| mvkClear(budgetProps->heapUsage, VK_MAX_MEMORY_HEAPS); |
| budgetProps->heapBudget[0] = (VkDeviceSize)getRecommendedMaxWorkingSetSize(); |
| budgetProps->heapUsage[0] = (VkDeviceSize)getCurrentAllocatedSize(); |
| if (!getHasUnifiedMemory()) { |
| budgetProps->heapBudget[1] = (VkDeviceSize)mvkGetAvailableMemorySize(); |
| budgetProps->heapUsage[1] = (VkDeviceSize)mvkGetUsedMemorySize(); |
| } |
| break; |
| } |
| default: |
| break; |
| } |
| } |
| return VK_SUCCESS; |
| } |
| |
| |
| #pragma mark Construction |
| |
| MVKPhysicalDevice::MVKPhysicalDevice(MVKInstance* mvkInstance, id<MTLDevice> mtlDevice) : |
| _mtlDevice([mtlDevice retain]), // Set first |
| _mvkInstance(mvkInstance), |
| _supportedExtensions(this, true), |
| _pixelFormats(this) { // Set after _mtlDevice |
| |
| initProperties(); // Call first. |
| initMetalFeatures(); // Call second. |
| initFeatures(); // Call third. |
| initLimits(); // Call fourth. |
| initExtensions(); |
| initMemoryProperties(); |
| initExternalMemoryProperties(); |
| initCounterSets(); |
| logGPUInfo(); |
| } |
| |
| // Initializes the physical device properties (except limits). |
| void MVKPhysicalDevice::initProperties() { |
| mvkClear(&_properties); // Start with everything cleared |
| |
| _properties.apiVersion = mvkConfig().apiVersionToAdvertise; |
| _properties.driverVersion = MVK_VERSION; |
| |
| initGPUInfoProperties(); |
| initPipelineCacheUUID(); |
| } |
| |
| // Initializes the Metal-specific physical device features of this instance. |
| void MVKPhysicalDevice::initMetalFeatures() { |
| |
| // Start with all Metal features cleared |
| mvkClear(&_metalFeatures); |
| |
| _metalFeatures.maxPerStageBufferCount = 31; |
| _metalFeatures.maxMTLBufferSize = (256 * MEBI); |
| _metalFeatures.dynamicMTLBufferSize = 0; |
| _metalFeatures.maxPerStageDynamicMTLBufferCount = 0; |
| |
| _metalFeatures.maxPerStageSamplerCount = 16; |
| _metalFeatures.maxQueryBufferSize = (64 * KIBI); |
| |
| _metalFeatures.pushConstantSizeAlignment = 16; // Min float4 alignment for typical uniform structs. |
| |
| _metalFeatures.maxTextureLayers = (2 * KIBI); |
| |
| _metalFeatures.ioSurfaces = MVK_SUPPORT_IOSURFACE_BOOL; |
| |
| // Metal supports 2 or 3 concurrent CAMetalLayer drawables. |
| _metalFeatures.minSwapchainImageCount = kMVKMinSwapchainImageCount; |
| _metalFeatures.maxSwapchainImageCount = kMVKMaxSwapchainImageCount; |
| |
| _metalFeatures.vertexStrideAlignment = 4; |
| |
| _metalFeatures.maxPerStageStorageTextureCount = 8; |
| |
| // GPU-specific features |
| switch (_properties.vendorID) { |
| case kAMDVendorId: |
| _metalFeatures.clearColorFloatRounding = MVK_FLOAT_ROUNDING_DOWN; |
| break; |
| case kAppleVendorId: |
| case kIntelVendorId: |
| case kNVVendorId: |
| default: |
| _metalFeatures.clearColorFloatRounding = MVK_FLOAT_ROUNDING_NEAREST; |
| break; |
| } |
| |
| #if MVK_TVOS |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion1_1; |
| _metalFeatures.mtlBufferAlignment = 64; |
| _metalFeatures.mtlCopyBufferAlignment = 1; |
| _metalFeatures.texelBuffers = true; |
| _metalFeatures.maxTextureDimension = (8 * KIBI); |
| _metalFeatures.dynamicMTLBufferSize = (4 * KIBI); |
| _metalFeatures.sharedLinearTextures = true; |
| _metalFeatures.maxPerStageDynamicMTLBufferCount = _metalFeatures.maxPerStageBufferCount; |
| _metalFeatures.renderLinearTextures = true; |
| _metalFeatures.tileBasedDeferredRendering = true; |
| |
| if (supportsMTLFeatureSet(tvOS_GPUFamily1_v2)) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion1_2; |
| _metalFeatures.shaderSpecialization = true; |
| _metalFeatures.stencilViews = true; |
| _metalFeatures.fences = true; |
| _metalFeatures.deferredStoreActions = true; |
| } |
| |
| if (supportsMTLFeatureSet(tvOS_GPUFamily1_v3)) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_0; |
| _metalFeatures.renderWithoutAttachments = true; |
| _metalFeatures.argumentBuffers = true; |
| } |
| |
| if (supportsMTLFeatureSet(tvOS_GPUFamily1_v4)) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_1; |
| _metalFeatures.events = true; |
| _metalFeatures.textureBuffers = true; |
| } |
| |
| if (supportsMTLFeatureSet(tvOS_GPUFamily2_v1)) { |
| _metalFeatures.indirectDrawing = true; |
| _metalFeatures.baseVertexInstanceDrawing = true; |
| _metalFeatures.combinedStoreResolveAction = true; |
| _metalFeatures.mtlBufferAlignment = 16; // Min float4 alignment for typical vertex buffers. MTLBuffer may go down to 4 bytes for other data. |
| _metalFeatures.maxTextureDimension = (16 * KIBI); |
| _metalFeatures.depthSampleCompare = true; |
| _metalFeatures.arrayOfTextures = true; |
| _metalFeatures.arrayOfSamplers = true; |
| _metalFeatures.depthResolve = true; |
| } |
| |
| if ( mvkOSVersionIsAtLeast(13.0) ) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_2; |
| _metalFeatures.placementHeaps = mvkConfig().useMTLHeap; |
| _metalFeatures.nativeTextureSwizzle = true; |
| if (supportsMTLGPUFamily(Apple3)) { |
| _metalFeatures.native3DCompressedTextures = true; |
| } |
| if (supportsMTLGPUFamily(Apple4)) { |
| _metalFeatures.quadPermute = true; |
| } |
| } |
| |
| if (supportsMTLGPUFamily(Apple4)) { |
| _metalFeatures.maxPerStageTextureCount = 96; |
| } else { |
| _metalFeatures.maxPerStageTextureCount = 31; |
| } |
| |
| #if MVK_XCODE_12 |
| if ( mvkOSVersionIsAtLeast(14.0) ) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_3; |
| } |
| #endif |
| #if MVK_XCODE_13 |
| if ( mvkOSVersionIsAtLeast(15.0) ) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_4; |
| } |
| #endif |
| #if MVK_XCODE_14 |
| if ( mvkOSVersionIsAtLeast(16.0) ) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion3_0; |
| } |
| #endif |
| |
| #endif |
| |
| #if MVK_IOS |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion1_0; |
| _metalFeatures.mtlBufferAlignment = 64; |
| _metalFeatures.mtlCopyBufferAlignment = 1; |
| _metalFeatures.texelBuffers = true; |
| _metalFeatures.maxTextureDimension = (4 * KIBI); |
| _metalFeatures.sharedLinearTextures = true; |
| _metalFeatures.renderLinearTextures = true; |
| _metalFeatures.tileBasedDeferredRendering = true; |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily1_v2)) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion1_1; |
| _metalFeatures.dynamicMTLBufferSize = (4 * KIBI); |
| _metalFeatures.maxTextureDimension = (8 * KIBI); |
| _metalFeatures.maxPerStageDynamicMTLBufferCount = _metalFeatures.maxPerStageBufferCount; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily1_v3)) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion1_2; |
| _metalFeatures.shaderSpecialization = true; |
| _metalFeatures.stencilViews = true; |
| _metalFeatures.fences = true; |
| _metalFeatures.deferredStoreActions = true; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily1_v4)) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_0; |
| _metalFeatures.renderWithoutAttachments = true; |
| _metalFeatures.argumentBuffers = true; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily1_v5)) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_1; |
| _metalFeatures.events = true; |
| _metalFeatures.textureBuffers = true; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily3_v1)) { |
| _metalFeatures.indirectDrawing = true; |
| _metalFeatures.baseVertexInstanceDrawing = true; |
| _metalFeatures.combinedStoreResolveAction = true; |
| _metalFeatures.mtlBufferAlignment = 16; // Min float4 alignment for typical vertex buffers. MTLBuffer may go down to 4 bytes for other data. |
| _metalFeatures.maxTextureDimension = (16 * KIBI); |
| _metalFeatures.depthSampleCompare = true; |
| _metalFeatures.depthResolve = true; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily3_v2)) { |
| _metalFeatures.arrayOfTextures = true; |
| } |
| if (supportsMTLFeatureSet(iOS_GPUFamily3_v3)) { |
| _metalFeatures.arrayOfSamplers = true; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily4_v1)) { |
| _metalFeatures.postDepthCoverage = true; |
| _metalFeatures.nonUniformThreadgroups = true; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily5_v1)) { |
| _metalFeatures.layeredRendering = true; |
| _metalFeatures.stencilFeedback = true; |
| _metalFeatures.indirectTessellationDrawing = true; |
| _metalFeatures.stencilResolve = true; |
| } |
| |
| if ( mvkOSVersionIsAtLeast(13.0) ) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_2; |
| _metalFeatures.placementHeaps = mvkConfig().useMTLHeap; |
| _metalFeatures.nativeTextureSwizzle = true; |
| if (supportsMTLGPUFamily(Apple3)) { |
| _metalFeatures.native3DCompressedTextures = true; |
| } |
| if (supportsMTLGPUFamily(Apple4)) { |
| _metalFeatures.quadPermute = true; |
| } |
| if (supportsMTLGPUFamily(Apple6) ) { |
| _metalFeatures.astcHDRTextures = true; |
| _metalFeatures.simdPermute = true; |
| } |
| } |
| |
| if (supportsMTLGPUFamily(Apple4)) { |
| _metalFeatures.maxPerStageTextureCount = 96; |
| } else { |
| _metalFeatures.maxPerStageTextureCount = 31; |
| } |
| |
| #if MVK_XCODE_12 |
| if ( mvkOSVersionIsAtLeast(14.0) ) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_3; |
| _metalFeatures.multisampleArrayTextures = true; |
| if ( supportsMTLGPUFamily(Apple7) ) { |
| _metalFeatures.maxQueryBufferSize = (256 * KIBI); |
| _metalFeatures.multisampleLayeredRendering = _metalFeatures.layeredRendering; |
| _metalFeatures.samplerClampToBorder = true; |
| _metalFeatures.samplerMirrorClampToEdge = true; |
| _metalFeatures.simdReduction = true; |
| } |
| } |
| #endif |
| #if MVK_XCODE_13 |
| if ( mvkOSVersionIsAtLeast(15.0) ) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_4; |
| } |
| #endif |
| #if MVK_XCODE_14 |
| if ( mvkOSVersionIsAtLeast(16.0) ) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion3_0; |
| } |
| #endif |
| |
| #endif |
| |
| #if MVK_MACOS |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion1_1; |
| _metalFeatures.maxPerStageTextureCount = 128; |
| _metalFeatures.mtlBufferAlignment = 256; |
| _metalFeatures.mtlCopyBufferAlignment = 4; |
| _metalFeatures.baseVertexInstanceDrawing = true; |
| _metalFeatures.layeredRendering = true; |
| _metalFeatures.maxTextureDimension = (16 * KIBI); |
| _metalFeatures.depthSampleCompare = true; |
| _metalFeatures.samplerMirrorClampToEdge = true; |
| |
| if (supportsMTLFeatureSet(macOS_GPUFamily1_v2)) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion1_2; |
| _metalFeatures.indirectDrawing = true; |
| _metalFeatures.indirectTessellationDrawing = true; |
| _metalFeatures.dynamicMTLBufferSize = (4 * KIBI); |
| _metalFeatures.shaderSpecialization = true; |
| _metalFeatures.stencilViews = true; |
| _metalFeatures.samplerClampToBorder = true; |
| _metalFeatures.combinedStoreResolveAction = true; |
| _metalFeatures.deferredStoreActions = true; |
| _metalFeatures.maxMTLBufferSize = (1 * GIBI); |
| _metalFeatures.maxPerStageDynamicMTLBufferCount = 14; |
| } |
| |
| if (supportsMTLFeatureSet(macOS_GPUFamily1_v3)) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_0; |
| _metalFeatures.texelBuffers = true; |
| _metalFeatures.arrayOfTextures = true; |
| _metalFeatures.arrayOfSamplers = true; |
| _metalFeatures.presentModeImmediate = true; |
| _metalFeatures.fences = true; |
| _metalFeatures.nonUniformThreadgroups = true; |
| _metalFeatures.argumentBuffers = true; |
| } |
| |
| if (supportsMTLFeatureSet(macOS_GPUFamily1_v4)) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_1; |
| _metalFeatures.multisampleArrayTextures = true; |
| _metalFeatures.events = true; |
| _metalFeatures.textureBuffers = true; |
| } |
| |
| if (supportsMTLFeatureSet(macOS_GPUFamily2_v1)) { |
| _metalFeatures.multisampleLayeredRendering = _metalFeatures.layeredRendering; |
| _metalFeatures.stencilFeedback = true; |
| _metalFeatures.depthResolve = true; |
| _metalFeatures.stencilResolve = true; |
| _metalFeatures.simdPermute = true; |
| _metalFeatures.quadPermute = true; |
| _metalFeatures.simdReduction = true; |
| } |
| |
| if ( mvkOSVersionIsAtLeast(10.15) ) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_2; |
| _metalFeatures.maxQueryBufferSize = (256 * KIBI); |
| _metalFeatures.native3DCompressedTextures = true; |
| if ( mvkOSVersionIsAtLeast(mvkMakeOSVersion(10, 15, 6)) ) { |
| _metalFeatures.sharedLinearTextures = true; |
| } |
| if (supportsMTLGPUFamily(Mac2)) { |
| _metalFeatures.nativeTextureSwizzle = true; |
| _metalFeatures.placementHeaps = mvkConfig().useMTLHeap; |
| _metalFeatures.renderWithoutAttachments = true; |
| } |
| } |
| |
| #if MVK_XCODE_12 |
| if ( mvkOSVersionIsAtLeast(11.0) ) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_3; |
| } |
| #endif |
| #if MVK_XCODE_13 |
| if ( mvkOSVersionIsAtLeast(12.0) ) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion2_4; |
| } |
| #endif |
| #if MVK_XCODE_14 |
| if ( mvkOSVersionIsAtLeast(13.0) ) { |
| _metalFeatures.mslVersionEnum = MTLLanguageVersion3_0; |
| } |
| #endif |
| |
| // This is an Apple GPU--treat it accordingly. |
| if (supportsMTLGPUFamily(Apple1)) { |
| _metalFeatures.mtlCopyBufferAlignment = 1; |
| _metalFeatures.mtlBufferAlignment = 16; // Min float4 alignment for typical vertex buffers. MTLBuffer may go down to 4 bytes for other data. |
| _metalFeatures.maxQueryBufferSize = (64 * KIBI); |
| _metalFeatures.maxPerStageDynamicMTLBufferCount = _metalFeatures.maxPerStageBufferCount; |
| _metalFeatures.postDepthCoverage = true; |
| _metalFeatures.renderLinearTextures = true; |
| _metalFeatures.tileBasedDeferredRendering = true; |
| |
| #if MVK_XCODE_12 |
| if (supportsMTLGPUFamily(Apple6)) { |
| _metalFeatures.astcHDRTextures = true; |
| } |
| if (supportsMTLGPUFamily(Apple7)) { |
| _metalFeatures.maxQueryBufferSize = (256 * KIBI); |
| } |
| #endif |
| } |
| |
| // Don't use barriers in render passes on Apple GPUs. Apple GPUs don't support them, |
| // and in fact Metal's validation layer will complain if you try to use them. |
| if ( !supportsMTLGPUFamily(Apple1) ) { |
| if (supportsMTLFeatureSet(macOS_GPUFamily1_v4)) { |
| _metalFeatures.memoryBarriers = true; |
| } |
| _metalFeatures.textureBarriers = true; |
| } |
| |
| #endif |
| |
| if ( [_mtlDevice respondsToSelector: @selector(areProgrammableSamplePositionsSupported)] ) { |
| _metalFeatures.programmableSamplePositions = _mtlDevice.areProgrammableSamplePositionsSupported; |
| } |
| |
| if ( [_mtlDevice respondsToSelector: @selector(areRasterOrderGroupsSupported)] ) { |
| _metalFeatures.rasterOrderGroups = _mtlDevice.areRasterOrderGroupsSupported; |
| } |
| #if MVK_XCODE_12 |
| if ( [_mtlDevice respondsToSelector: @selector(supportsPullModelInterpolation)] ) { |
| _metalFeatures.pullModelInterpolation = _mtlDevice.supportsPullModelInterpolation; |
| } |
| #endif |
| if ( [_mtlDevice respondsToSelector: @selector(maxBufferLength)] ) { |
| _metalFeatures.maxMTLBufferSize = _mtlDevice.maxBufferLength; |
| } |
| |
| for (uint32_t sc = VK_SAMPLE_COUNT_1_BIT; sc <= VK_SAMPLE_COUNT_64_BIT; sc <<= 1) { |
| if ([_mtlDevice supportsTextureSampleCount: mvkSampleCountFromVkSampleCountFlagBits((VkSampleCountFlagBits)sc)]) { |
| _metalFeatures.supportedSampleCounts |= sc; |
| } |
| } |
| |
| _metalFeatures.minSubgroupSize = _metalFeatures.maxSubgroupSize = 1; |
| #if MVK_MACOS |
| if (_metalFeatures.simdPermute) { |
| // Based on data from Sascha Willems' Vulkan Hardware Database. |
| // This would be a lot easier and less painful if MTLDevice had properties for this... |
| _metalFeatures.maxSubgroupSize = (_properties.vendorID == kAMDVendorId) ? 64 : 32; |
| switch (_properties.vendorID) { |
| case kIntelVendorId: |
| _metalFeatures.minSubgroupSize = 8; |
| break; |
| case kAMDVendorId: |
| switch (_properties.deviceID) { |
| case kAMDRadeonRX5700DeviceId: |
| case kAMDRadeonRX5500DeviceId: |
| case kAMDRadeonRX6800DeviceId: |
| case kAMDRadeonRX6700DeviceId: |
| _metalFeatures.minSubgroupSize = 32; |
| break; |
| default: |
| _metalFeatures.minSubgroupSize = _metalFeatures.maxSubgroupSize; |
| break; |
| } |
| break; |
| case kAppleVendorId: |
| // XXX Minimum thread execution width for Apple GPUs is unknown, but assumed to be 4. May be greater. |
| _metalFeatures.minSubgroupSize = 4; |
| break; |
| default: |
| _metalFeatures.minSubgroupSize = _metalFeatures.maxSubgroupSize; |
| break; |
| } |
| } |
| #endif |
| #if MVK_IOS |
| if (_metalFeatures.simdPermute) { |
| _metalFeatures.minSubgroupSize = 4; |
| _metalFeatures.maxSubgroupSize = 32; |
| } else if (_metalFeatures.quadPermute) { |
| _metalFeatures.minSubgroupSize = _metalFeatures.maxSubgroupSize = 4; |
| } |
| #endif |
| |
| #define setMSLVersion(maj, min) \ |
| _metalFeatures.mslVersion = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::make_msl_version(maj, min); |
| |
| switch (_metalFeatures.mslVersionEnum) { |
| #if MVK_XCODE_14 |
| case MTLLanguageVersion3_0: |
| setMSLVersion(3, 0); |
| break; |
| #endif |
| #if MVK_XCODE_13 |
| case MTLLanguageVersion2_4: |
| setMSLVersion(2, 4); |
| break; |
| #endif |
| #if MVK_XCODE_12 |
| case MTLLanguageVersion2_3: |
| setMSLVersion(2, 3); |
| break; |
| #endif |
| case MTLLanguageVersion2_2: |
| setMSLVersion(2, 2); |
| break; |
| case MTLLanguageVersion2_1: |
| setMSLVersion(2, 1); |
| break; |
| case MTLLanguageVersion2_0: |
| setMSLVersion(2, 0); |
| break; |
| case MTLLanguageVersion1_2: |
| setMSLVersion(1, 2); |
| break; |
| case MTLLanguageVersion1_1: |
| setMSLVersion(1, 1); |
| break; |
| #if MVK_IOS_OR_TVOS || MVK_XCODE_14 |
| case MTLLanguageVersion1_0: |
| setMSLVersion(1, 0); |
| break; |
| #endif |
| } |
| |
| // iOS and tvOS adjustments necessary when running in the simulator on non-Apple GPUs. |
| #if MVK_OS_SIMULATOR && !MVK_APPLE_SILICON |
| _metalFeatures.mtlBufferAlignment = 256; |
| #endif |
| |
| // Currently, Metal argument buffer support is in beta stage, and is only supported |
| // on macOS 11.0 (Big Sur) or later, or on older versions of macOS using an Intel GPU. |
| // Metal argument buffers support is not available on iOS. Development to support iOS |
| // and a wider combination of GPU's on older macOS versions is under way. |
| #if MVK_MACOS |
| _metalFeatures.descriptorSetArgumentBuffers = (_metalFeatures.argumentBuffers && |
| (mvkOSVersionIsAtLeast(10.16) || |
| _properties.vendorID == kIntelVendorId)); |
| #endif |
| // Currently, if we don't support descriptor set argument buffers, we can't support argument buffers. |
| _metalFeatures.argumentBuffers = _metalFeatures.descriptorSetArgumentBuffers; |
| |
| #define checkSupportsMTLCounterSamplingPoint(mtlSP, mvkSP) \ |
| if ([_mtlDevice respondsToSelector: @selector(supportsCounterSampling:)] && \ |
| [_mtlDevice supportsCounterSampling: MTLCounterSamplingPointAt ##mtlSP ##Boundary]) { \ |
| _metalFeatures.counterSamplingPoints |= MVK_COUNTER_SAMPLING_AT_ ##mvkSP; \ |
| } |
| |
| #if MVK_XCODE_12 |
| checkSupportsMTLCounterSamplingPoint(Draw, DRAW); |
| checkSupportsMTLCounterSamplingPoint(Dispatch, DISPATCH); |
| checkSupportsMTLCounterSamplingPoint(Blit, BLIT); |
| checkSupportsMTLCounterSamplingPoint(Stage, PIPELINE_STAGE); |
| #endif |
| |
| #if MVK_MACOS |
| // On macOS, if we couldn't query supported sample points (on macOS 11), |
| // but the platform can support immediate-mode sample points, indicate that here. |
| if (!_metalFeatures.counterSamplingPoints && mvkOSVersionIsAtLeast(10.15) && !supportsMTLGPUFamily(Apple1)) { \ |
| _metalFeatures.counterSamplingPoints = MVK_COUNTER_SAMPLING_AT_DRAW | MVK_COUNTER_SAMPLING_AT_DISPATCH | MVK_COUNTER_SAMPLING_AT_BLIT; \ |
| } |
| #endif |
| |
| } |
| |
| // Initializes the physical device features of this instance. |
| void MVKPhysicalDevice::initFeatures() { |
| mvkClear(&_features); // Start with everything cleared |
| |
| _features.robustBufferAccess = true; // XXX Required by Vulkan spec |
| _features.fullDrawIndexUint32 = true; |
| _features.independentBlend = true; |
| _features.sampleRateShading = true; |
| _features.depthBiasClamp = true; |
| _features.fillModeNonSolid = true; |
| _features.largePoints = true; |
| _features.alphaToOne = true; |
| _features.samplerAnisotropy = true; |
| _features.shaderImageGatherExtended = true; |
| _features.shaderStorageImageExtendedFormats = true; |
| _features.shaderStorageImageReadWithoutFormat = true; |
| _features.shaderStorageImageWriteWithoutFormat = true; |
| _features.shaderUniformBufferArrayDynamicIndexing = true; |
| _features.shaderStorageBufferArrayDynamicIndexing = true; |
| _features.shaderClipDistance = true; |
| _features.shaderInt16 = true; |
| _features.multiDrawIndirect = true; |
| _features.inheritedQueries = true; |
| |
| _features.shaderSampledImageArrayDynamicIndexing = _metalFeatures.arrayOfTextures; |
| _features.textureCompressionBC = mvkSupportsBCTextureCompression(_mtlDevice); |
| |
| if (_metalFeatures.indirectDrawing && _metalFeatures.baseVertexInstanceDrawing) { |
| _features.drawIndirectFirstInstance = true; |
| } |
| |
| #if MVK_TVOS |
| _features.textureCompressionETC2 = true; |
| _features.textureCompressionASTC_LDR = true; |
| #if MVK_XCODE_12 |
| _features.shaderInt64 = mslVersionIsAtLeast(MTLLanguageVersion2_3) && supportsMTLGPUFamily(Apple3); |
| #else |
| _features.shaderInt64 = false; |
| #endif |
| |
| if (supportsMTLFeatureSet(tvOS_GPUFamily1_v3)) { |
| _features.dualSrcBlend = true; |
| } |
| |
| if (supportsMTLFeatureSet(tvOS_GPUFamily2_v1)) { |
| _features.occlusionQueryPrecise = true; |
| } |
| |
| if (supportsMTLFeatureSet(tvOS_GPUFamily2_v1)) { |
| _features.tessellationShader = true; |
| } |
| #endif |
| |
| #if MVK_IOS |
| _features.textureCompressionETC2 = true; |
| #if MVK_XCODE_12 |
| _features.shaderInt64 = mslVersionIsAtLeast(MTLLanguageVersion2_3) && supportsMTLGPUFamily(Apple3); |
| #else |
| _features.shaderInt64 = false; |
| #endif |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily2_v1)) { |
| _features.textureCompressionASTC_LDR = true; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily3_v1)) { |
| _features.occlusionQueryPrecise = true; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily1_v4)) { |
| _features.dualSrcBlend = true; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily2_v4)) { |
| _features.depthClamp = true; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily3_v2)) { |
| _features.tessellationShader = true; |
| _features.shaderTessellationAndGeometryPointSize = true; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily4_v1)) { |
| _features.imageCubeArray = true; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily5_v1)) { |
| _features.multiViewport = true; |
| } |
| |
| if (supportsMTLGPUFamily(Apple6)) { |
| _features.shaderResourceMinLod = true; |
| } |
| #endif |
| |
| #if MVK_MACOS |
| _features.occlusionQueryPrecise = true; |
| _features.imageCubeArray = true; |
| _features.depthClamp = true; |
| _features.vertexPipelineStoresAndAtomics = true; |
| _features.fragmentStoresAndAtomics = true; |
| #if MVK_XCODE_12 |
| _features.shaderInt64 = mslVersionIsAtLeast(MTLLanguageVersion2_3); |
| #else |
| _features.shaderInt64 = false; |
| #endif |
| |
| _features.shaderStorageImageArrayDynamicIndexing = _metalFeatures.arrayOfTextures; |
| |
| if (supportsMTLFeatureSet(macOS_GPUFamily1_v2)) { |
| _features.tessellationShader = true; |
| _features.dualSrcBlend = true; |
| _features.shaderTessellationAndGeometryPointSize = true; |
| } |
| |
| if (supportsMTLFeatureSet(macOS_GPUFamily1_v3)) { |
| _features.multiViewport = true; |
| } |
| |
| if ( mvkOSVersionIsAtLeast(10.15) ) { |
| _features.shaderResourceMinLod = true; |
| } |
| |
| if ( supportsMTLGPUFamily(Apple5) ) { |
| _features.textureCompressionETC2 = true; |
| _features.textureCompressionASTC_LDR = true; |
| } |
| #endif |
| } |
| |
| |
| #pragma mark VkPhysicalDeviceFeatures - List of features available on the device |
| |
| //typedef struct VkPhysicalDeviceFeatures { |
| // VkBool32 robustBufferAccess; // done |
| // VkBool32 fullDrawIndexUint32; // done |
| // VkBool32 imageCubeArray; // done |
| // VkBool32 independentBlend; // done |
| // VkBool32 geometryShader; |
| // VkBool32 tessellationShader; // done |
| // VkBool32 sampleRateShading; // done |
| // VkBool32 dualSrcBlend; // done |
| // VkBool32 logicOp; |
| // VkBool32 multiDrawIndirect; // done |
| // VkBool32 drawIndirectFirstInstance; // done |
| // VkBool32 depthClamp; // done |
| // VkBool32 depthBiasClamp; // done |
| // VkBool32 fillModeNonSolid; // done |
| // VkBool32 depthBounds; |
| // VkBool32 wideLines; |
| // VkBool32 largePoints; // done |
| // VkBool32 alphaToOne; // done |
| // VkBool32 multiViewport; // done |
| // VkBool32 samplerAnisotropy; // done |
| // VkBool32 textureCompressionETC2; // done |
| // VkBool32 textureCompressionASTC_LDR; // done |
| // VkBool32 textureCompressionBC; // done |
| // VkBool32 occlusionQueryPrecise; // done |
| // VkBool32 pipelineStatisticsQuery; |
| // VkBool32 vertexPipelineStoresAndAtomics; // done |
| // VkBool32 fragmentStoresAndAtomics; // done |
| // VkBool32 shaderTessellationAndGeometryPointSize; // done |
| // VkBool32 shaderImageGatherExtended; // done |
| // VkBool32 shaderStorageImageExtendedFormats; // done |
| // VkBool32 shaderStorageImageMultisample; |
| // VkBool32 shaderStorageImageReadWithoutFormat; // done |
| // VkBool32 shaderStorageImageWriteWithoutFormat; // done |
| // VkBool32 shaderUniformBufferArrayDynamicIndexing; // done |
| // VkBool32 shaderSampledImageArrayDynamicIndexing; // done |
| // VkBool32 shaderStorageBufferArrayDynamicIndexing; // done |
| // VkBool32 shaderStorageImageArrayDynamicIndexing; // done |
| // VkBool32 shaderClipDistance; // done |
| // VkBool32 shaderCullDistance; |
| // VkBool32 shaderFloat64; |
| // VkBool32 shaderInt64; // done |
| // VkBool32 shaderInt16; // done |
| // VkBool32 shaderResourceResidency; |
| // VkBool32 shaderResourceMinLod; // done |
| // VkBool32 sparseBinding; |
| // VkBool32 sparseResidencyBuffer; |
| // VkBool32 sparseResidencyImage2D; |
| // VkBool32 sparseResidencyImage3D; |
| // VkBool32 sparseResidency2Samples; |
| // VkBool32 sparseResidency4Samples; |
| // VkBool32 sparseResidency8Samples; |
| // VkBool32 sparseResidency16Samples; |
| // VkBool32 sparseResidencyAliased; |
| // VkBool32 variableMultisampleRate; |
| // VkBool32 inheritedQueries; // done |
| //} VkPhysicalDeviceFeatures; |
| |
| // Initializes the physical device property limits. |
| void MVKPhysicalDevice::initLimits() { |
| |
| #if MVK_TVOS |
| _properties.limits.maxColorAttachments = kMVKCachedColorAttachmentCount; |
| #endif |
| #if MVK_IOS |
| if (supportsMTLFeatureSet(iOS_GPUFamily2_v1)) { |
| _properties.limits.maxColorAttachments = kMVKCachedColorAttachmentCount; |
| } else { |
| _properties.limits.maxColorAttachments = 4; // < kMVKCachedColorAttachmentCount |
| } |
| #endif |
| #if MVK_MACOS |
| _properties.limits.maxColorAttachments = kMVKCachedColorAttachmentCount; |
| #endif |
| |
| _properties.limits.maxFragmentOutputAttachments = _properties.limits.maxColorAttachments; |
| _properties.limits.maxFragmentDualSrcAttachments = _features.dualSrcBlend ? 1 : 0; |
| |
| _properties.limits.framebufferColorSampleCounts = _metalFeatures.supportedSampleCounts; |
| _properties.limits.framebufferDepthSampleCounts = _metalFeatures.supportedSampleCounts; |
| _properties.limits.framebufferStencilSampleCounts = _metalFeatures.supportedSampleCounts; |
| _properties.limits.framebufferNoAttachmentsSampleCounts = _metalFeatures.supportedSampleCounts; |
| _properties.limits.sampledImageColorSampleCounts = _metalFeatures.supportedSampleCounts; |
| _properties.limits.sampledImageIntegerSampleCounts = _metalFeatures.supportedSampleCounts; |
| _properties.limits.sampledImageDepthSampleCounts = _metalFeatures.supportedSampleCounts; |
| _properties.limits.sampledImageStencilSampleCounts = _metalFeatures.supportedSampleCounts; |
| _properties.limits.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT; |
| |
| _properties.limits.maxSampleMaskWords = 1; |
| |
| _properties.limits.maxImageDimension1D = _metalFeatures.maxTextureDimension; |
| _properties.limits.maxImageDimension2D = _metalFeatures.maxTextureDimension; |
| _properties.limits.maxImageDimensionCube = _metalFeatures.maxTextureDimension; |
| _properties.limits.maxFramebufferWidth = _metalFeatures.maxTextureDimension; |
| _properties.limits.maxFramebufferHeight = _metalFeatures.maxTextureDimension; |
| _properties.limits.maxFramebufferLayers = _metalFeatures.layeredRendering ? _metalFeatures.maxTextureLayers : 1; |
| |
| _properties.limits.maxViewportDimensions[0] = _metalFeatures.maxTextureDimension; |
| _properties.limits.maxViewportDimensions[1] = _metalFeatures.maxTextureDimension; |
| float maxVPDim = max(_properties.limits.maxViewportDimensions[0], _properties.limits.maxViewportDimensions[1]); |
| _properties.limits.viewportBoundsRange[0] = (-2.0 * maxVPDim); |
| _properties.limits.viewportBoundsRange[1] = (2.0 * maxVPDim) - 1; |
| _properties.limits.maxViewports = _features.multiViewport ? kMVKCachedViewportScissorCount : 1; |
| |
| _properties.limits.maxImageDimension3D = _metalFeatures.maxTextureLayers; |
| _properties.limits.maxImageArrayLayers = _metalFeatures.maxTextureLayers; |
| _properties.limits.maxSamplerAnisotropy = 16; |
| |
| _properties.limits.maxVertexInputAttributes = 31; |
| _properties.limits.maxVertexInputBindings = 31; |
| |
| _properties.limits.maxVertexInputBindingStride = (2 * KIBI); |
| _properties.limits.maxVertexInputAttributeOffset = _properties.limits.maxVertexInputBindingStride - 1; |
| |
| _properties.limits.maxPerStageDescriptorSamplers = _metalFeatures.maxPerStageSamplerCount; |
| _properties.limits.maxPerStageDescriptorUniformBuffers = _metalFeatures.maxPerStageBufferCount; |
| _properties.limits.maxPerStageDescriptorStorageBuffers = _metalFeatures.maxPerStageBufferCount; |
| _properties.limits.maxPerStageDescriptorSampledImages = _metalFeatures.maxPerStageTextureCount; |
| _properties.limits.maxPerStageDescriptorStorageImages = _metalFeatures.maxPerStageStorageTextureCount; |
| _properties.limits.maxPerStageDescriptorInputAttachments = _metalFeatures.maxPerStageTextureCount; |
| |
| _properties.limits.maxPerStageResources = (_metalFeatures.maxPerStageBufferCount + _metalFeatures.maxPerStageTextureCount); |
| _properties.limits.maxFragmentCombinedOutputResources = _properties.limits.maxPerStageResources; |
| |
| _properties.limits.maxDescriptorSetSamplers = (_properties.limits.maxPerStageDescriptorSamplers * 5); |
| _properties.limits.maxDescriptorSetUniformBuffers = (_properties.limits.maxPerStageDescriptorUniformBuffers * 5); |
| _properties.limits.maxDescriptorSetUniformBuffersDynamic = (_properties.limits.maxPerStageDescriptorUniformBuffers * 5); |
| _properties.limits.maxDescriptorSetStorageBuffers = (_properties.limits.maxPerStageDescriptorStorageBuffers * 5); |
| _properties.limits.maxDescriptorSetStorageBuffersDynamic = (_properties.limits.maxPerStageDescriptorStorageBuffers * 5); |
| _properties.limits.maxDescriptorSetSampledImages = (_properties.limits.maxPerStageDescriptorSampledImages * 5); |
| _properties.limits.maxDescriptorSetStorageImages = (_properties.limits.maxPerStageDescriptorStorageImages * 5); |
| _properties.limits.maxDescriptorSetInputAttachments = (_properties.limits.maxPerStageDescriptorInputAttachments * 5); |
| |
| // Whether handled as a real texture buffer or a 2D texture, this value is likely nowhere near the size of a buffer, |
| // needs to fit in 32 bits, and some apps (I'm looking at you, CTS), assume it is low when doing 32-bit math. |
| _properties.limits.maxTexelBufferElements = _properties.limits.maxImageDimension2D * (4 * KIBI); |
| #if MVK_MACOS |
| _properties.limits.maxUniformBufferRange = (64 * KIBI); |
| if (supportsMTLGPUFamily(Apple5)) { |
| _properties.limits.maxUniformBufferRange = (uint32_t)min(_metalFeatures.maxMTLBufferSize, (VkDeviceSize)std::numeric_limits<uint32_t>::max()); |
| } |
| #endif |
| #if MVK_IOS_OR_TVOS |
| _properties.limits.maxUniformBufferRange = (uint32_t)min(_metalFeatures.maxMTLBufferSize, (VkDeviceSize)std::numeric_limits<uint32_t>::max()); |
| #endif |
| _properties.limits.maxStorageBufferRange = (uint32_t)min(_metalFeatures.maxMTLBufferSize, (VkDeviceSize)std::numeric_limits<uint32_t>::max()); |
| _properties.limits.maxPushConstantsSize = (4 * KIBI); |
| |
| _properties.limits.minMemoryMapAlignment = max(_metalFeatures.mtlBufferAlignment, (VkDeviceSize)64); // Vulkan spec requires MIN of 64 |
| _properties.limits.minUniformBufferOffsetAlignment = _metalFeatures.mtlBufferAlignment; |
| _properties.limits.minStorageBufferOffsetAlignment = 16; |
| _properties.limits.bufferImageGranularity = _metalFeatures.mtlBufferAlignment; |
| _properties.limits.nonCoherentAtomSize = _metalFeatures.mtlBufferAlignment; |
| |
| if ([_mtlDevice respondsToSelector: @selector(minimumLinearTextureAlignmentForPixelFormat:)]) { |
| // Figure out the greatest alignment required by all supported formats, and whether |
| // or not they only require alignment to a single texel. We'll use this information |
| // to fill out the VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT struct. |
| uint32_t maxStorage = 0, maxUniform = 0; |
| bool singleTexelStorage = true, singleTexelUniform = true; |
| _pixelFormats.enumerateSupportedFormats({0, 0, VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT}, true, [&](VkFormat vk) { |
| MTLPixelFormat mtlFmt = _pixelFormats.getMTLPixelFormat(vk); |
| if ( !mtlFmt ) { return false; } // If format is invalid, avoid validation errors on MTLDevice format alignment calls |
| |
| NSUInteger alignment; |
| if ([_mtlDevice respondsToSelector: @selector(minimumTextureBufferAlignmentForPixelFormat:)]) { |
| alignment = [_mtlDevice minimumTextureBufferAlignmentForPixelFormat: mtlFmt]; |
| } else { |
| alignment = [_mtlDevice minimumLinearTextureAlignmentForPixelFormat: mtlFmt]; |
| } |
| VkFormatProperties& props = _pixelFormats.getVkFormatProperties(vk); |
| // For uncompressed formats, this is the size of a single texel. |
| // Note that no implementations of Metal support compressed formats |
| // in a linear texture (including texture buffers). It's likely that even |
| // if they did, this would be the absolute minimum alignment. |
| uint32_t texelSize = _pixelFormats.getBytesPerBlock(vk); |
| // From the spec: |
| // "If the size of a single texel is a multiple of three bytes, then |
| // the size of a single component of the format is used instead." |
| if (texelSize % 3 == 0) { |
| switch (_pixelFormats.getFormatType(vk)) { |
| case kMVKFormatColorInt8: |
| case kMVKFormatColorUInt8: |
| texelSize = 1; |
| break; |
| case kMVKFormatColorHalf: |
| case kMVKFormatColorInt16: |
| case kMVKFormatColorUInt16: |
| texelSize = 2; |
| break; |
| case kMVKFormatColorFloat: |
| case kMVKFormatColorInt32: |
| case kMVKFormatColorUInt32: |
| default: |
| texelSize = 4; |
| break; |
| } |
| } |
| if (mvkAreAllFlagsEnabled(props.bufferFeatures, VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) { |
| maxStorage = max(maxStorage, uint32_t(alignment)); |
| if (alignment > texelSize) { singleTexelStorage = false; } |
| } |
| if (mvkAreAllFlagsEnabled(props.bufferFeatures, VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT)) { |
| maxUniform = max(maxUniform, uint32_t(alignment)); |
| if (alignment > texelSize) { singleTexelUniform = false; } |
| } |
| return true; |
| }); |
| _texelBuffAlignProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; |
| _texelBuffAlignProperties.storageTexelBufferOffsetAlignmentBytes = maxStorage; |
| _texelBuffAlignProperties.storageTexelBufferOffsetSingleTexelAlignment = singleTexelStorage; |
| _texelBuffAlignProperties.uniformTexelBufferOffsetAlignmentBytes = maxUniform; |
| _texelBuffAlignProperties.uniformTexelBufferOffsetSingleTexelAlignment = singleTexelUniform; |
| _properties.limits.minTexelBufferOffsetAlignment = max(maxStorage, maxUniform); |
| } else { |
| #if MVK_TVOS |
| _properties.limits.minTexelBufferOffsetAlignment = 64; |
| #endif |
| #if MVK_IOS |
| if (supportsMTLFeatureSet(iOS_GPUFamily3_v1)) { |
| _properties.limits.minTexelBufferOffsetAlignment = 16; |
| } else { |
| _properties.limits.minTexelBufferOffsetAlignment = 64; |
| } |
| #endif |
| #if MVK_MACOS |
| _properties.limits.minTexelBufferOffsetAlignment = 256; |
| if (supportsMTLGPUFamily(Apple5)) { |
| _properties.limits.minTexelBufferOffsetAlignment = 16; |
| } |
| #endif |
| _texelBuffAlignProperties.storageTexelBufferOffsetAlignmentBytes = _properties.limits.minTexelBufferOffsetAlignment; |
| _texelBuffAlignProperties.storageTexelBufferOffsetSingleTexelAlignment = VK_FALSE; |
| _texelBuffAlignProperties.uniformTexelBufferOffsetAlignmentBytes = _properties.limits.minTexelBufferOffsetAlignment; |
| _texelBuffAlignProperties.uniformTexelBufferOffsetSingleTexelAlignment = VK_FALSE; |
| } |
| |
| #if MVK_TVOS |
| if (mvkOSVersionIsAtLeast(13.0) && supportsMTLGPUFamily(Apple4)) { |
| _properties.limits.maxFragmentInputComponents = 124; |
| } else { |
| _properties.limits.maxFragmentInputComponents = 60; |
| } |
| |
| if (supportsMTLFeatureSet(tvOS_GPUFamily2_v1)) { |
| _properties.limits.optimalBufferCopyOffsetAlignment = 16; |
| } else { |
| _properties.limits.optimalBufferCopyOffsetAlignment = 64; |
| } |
| |
| _properties.limits.maxTessellationGenerationLevel = 16; |
| _properties.limits.maxTessellationPatchSize = 32; |
| #endif |
| #if MVK_IOS |
| if (mvkOSVersionIsAtLeast(13.0) && supportsMTLGPUFamily(Apple4)) { |
| _properties.limits.maxFragmentInputComponents = 124; |
| } else { |
| _properties.limits.maxFragmentInputComponents = 60; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily3_v1)) { |
| _properties.limits.optimalBufferCopyOffsetAlignment = 16; |
| } else { |
| _properties.limits.optimalBufferCopyOffsetAlignment = 64; |
| } |
| |
| if (supportsMTLFeatureSet(iOS_GPUFamily5_v1)) { |
| _properties.limits.maxTessellationGenerationLevel = 64; |
| _properties.limits.maxTessellationPatchSize = 32; |
| } else if (supportsMTLFeatureSet(iOS_GPUFamily3_v2)) { |
| _properties.limits.maxTessellationGenerationLevel = 16; |
| _properties.limits.maxTessellationPatchSize = 32; |
| } else { |
| _properties.limits.maxTessellationGenerationLevel = 0; |
| _properties.limits.maxTessellationPatchSize = 0; |
| } |
| #endif |
| #if MVK_MACOS |
| _properties.limits.maxFragmentInputComponents = 124; |
| _properties.limits.optimalBufferCopyOffsetAlignment = 256; |
| if (supportsMTLGPUFamily(Apple5)) { |
| _properties.limits.optimalBufferCopyOffsetAlignment = 16; |
| } |
| |
| if (supportsMTLFeatureSet(macOS_GPUFamily1_v2)) { |
| _properties.limits.maxTessellationGenerationLevel = 64; |
| _properties.limits.maxTessellationPatchSize = 32; |
| } else { |
| _properties.limits.maxTessellationGenerationLevel = 0; |
| _properties.limits.maxTessellationPatchSize = 0; |
| } |
| #endif |
| |
| _properties.limits.maxVertexOutputComponents = _properties.limits.maxFragmentInputComponents; |
| |
| if (_features.tessellationShader) { |
| _properties.limits.maxTessellationControlPerVertexInputComponents = _properties.limits.maxVertexOutputComponents; |
| _properties.limits.maxTessellationControlPerVertexOutputComponents = _properties.limits.maxTessellationControlPerVertexInputComponents; |
| // Reserve a few for the tessellation levels. |
| _properties.limits.maxTessellationControlPerPatchOutputComponents = std::max(_properties.limits.maxFragmentInputComponents - 8, 120u); |
| _properties.limits.maxTessellationControlTotalOutputComponents = _properties.limits.maxTessellationPatchSize * _properties.limits.maxTessellationControlPerVertexOutputComponents + _properties.limits.maxTessellationControlPerPatchOutputComponents; |
| _properties.limits.maxTessellationEvaluationInputComponents = _properties.limits.maxTessellationControlPerVertexInputComponents; |
| _properties.limits.maxTessellationEvaluationOutputComponents = _properties.limits.maxTessellationEvaluationInputComponents; |
| } else { |
| _properties.limits.maxTessellationControlPerVertexInputComponents = 0; |
| _properties.limits.maxTessellationControlPerVertexOutputComponents = 0; |
| _properties.limits.maxTessellationControlPerPatchOutputComponents = 0; |
| _properties.limits.maxTessellationControlTotalOutputComponents = 0; |
| _properties.limits.maxTessellationEvaluationInputComponents = 0; |
| _properties.limits.maxTessellationEvaluationOutputComponents = 0; |
| } |
| |
| _properties.limits.optimalBufferCopyRowPitchAlignment = 1; |
| |
| _properties.limits.timestampComputeAndGraphics = VK_TRUE; |
| _properties.limits.timestampPeriod = _metalFeatures.counterSamplingPoints ? 1.0 : mvkGetTimestampPeriod(); |
| |
| _properties.limits.pointSizeRange[0] = 1; |
| switch (_properties.vendorID) { |
| case kAppleVendorId: |
| _properties.limits.pointSizeRange[1] = 511; |
| break; |
| case kIntelVendorId: |
| _properties.limits.pointSizeRange[1] = 256; |
| break; |
| case kAMDVendorId: |
| case kNVVendorId: |
| default: |
| _properties.limits.pointSizeRange[1] = 64; |
| break; |
| } |
| |
| _properties.limits.pointSizeGranularity = 1; |
| _properties.limits.lineWidthRange[0] = 1; |
| _properties.limits.lineWidthRange[1] = 1; |
| _properties.limits.lineWidthGranularity = 0; |
| |
| _properties.limits.standardSampleLocations = VK_TRUE; |
| _properties.limits.strictLines = _properties.vendorID == kIntelVendorId || _properties.vendorID == kNVVendorId; |
| |
| VkExtent3D wgSize = mvkVkExtent3DFromMTLSize(_mtlDevice.maxThreadsPerThreadgroup); |
| _properties.limits.maxComputeWorkGroupSize[0] = wgSize.width; |
| _properties.limits.maxComputeWorkGroupSize[1] = wgSize.height; |
| _properties.limits.maxComputeWorkGroupSize[2] = wgSize.depth; |
| _properties.limits.maxComputeWorkGroupInvocations = max({wgSize.width, wgSize.height, wgSize.depth}); |
| |
| if ( [_mtlDevice respondsToSelector: @selector(maxThreadgroupMemoryLength)] ) { |
| _properties.limits.maxComputeSharedMemorySize = (uint32_t)_mtlDevice.maxThreadgroupMemoryLength; |
| } else { |
| #if MVK_TVOS |
| if (supportsMTLFeatureSet(tvOS_GPUFamily2_v1)) { |
| _properties.limits.maxComputeSharedMemorySize = (16 * KIBI); |
| } else { |
| _properties.limits.maxComputeSharedMemorySize = ((16 * KIBI) - 32); |
| } |
| #endif |
| #if MVK_IOS |
| if (supportsMTLFeatureSet(iOS_GPUFamily4_v1)) { |
| _properties.limits.maxComputeSharedMemorySize = (32 * KIBI); |
| } else if (supportsMTLFeatureSet(iOS_GPUFamily3_v1)) { |
| _properties.limits.maxComputeSharedMemorySize = (16 * KIBI); |
| } else { |
| _properties.limits.maxComputeSharedMemorySize = ((16 * KIBI) - 32); |
| } |
| #endif |
| #if MVK_MACOS |
| _properties.limits.maxComputeSharedMemorySize = (32 * KIBI); |
| #endif |
| } |
| |
| // Max sum of API and shader values. Bias not supported in API, but can be applied in shader directly. |
| // The lack of API value is covered by VkPhysicalDevicePortabilitySubsetFeaturesKHR::samplerMipLodBias. |
| // Metal does not specify limit for shader value, so choose something reasonable. |
| _properties.limits.maxSamplerLodBias = 4; |
| |
| _properties.limits.minTexelOffset = -8; |
| _properties.limits.maxTexelOffset = 7; |
| _properties.limits.minTexelGatherOffset = _properties.limits.minTexelOffset; |
| _properties.limits.maxTexelGatherOffset = _properties.limits.maxTexelOffset; |
| |
| // Features with no specific limits - default to unlimited int values |
| |
| _properties.limits.maxMemoryAllocationCount = kMVKUndefinedLargeUInt32; |
| _properties.limits.maxSamplerAllocationCount = getMaxSamplerCount(); |
| _properties.limits.maxBoundDescriptorSets = kMVKMaxDescriptorSetCount; |
| |
| _properties.limits.maxComputeWorkGroupCount[0] = kMVKUndefinedLargeUInt32; |
| _properties.limits.maxComputeWorkGroupCount[1] = kMVKUndefinedLargeUInt32; |
| _properties.limits.maxComputeWorkGroupCount[2] = kMVKUndefinedLargeUInt32; |
| |
| _properties.limits.maxDrawIndexedIndexValue = numeric_limits<uint32_t>::max(); // Must be (2^32 - 1) to support fullDrawIndexUint32 |
| _properties.limits.maxDrawIndirectCount = kMVKUndefinedLargeUInt32; |
| |
| _properties.limits.maxClipDistances = kMVKUndefinedLargeUInt32; |
| _properties.limits.maxCullDistances = 0; // unsupported |
| _properties.limits.maxCombinedClipAndCullDistances = _properties.limits.maxClipDistances + |
| _properties.limits.maxCullDistances; |
| |
| |
| // Features with unknown limits - default to Vulkan required limits |
| |
| _properties.limits.subPixelPrecisionBits = 4; |
| _properties.limits.subTexelPrecisionBits = 4; |
| _properties.limits.mipmapPrecisionBits = 4; |
| _properties.limits.viewportSubPixelBits = 0; |
| |
| _properties.limits.discreteQueuePriorities = 2; |
| |
| _properties.limits.minInterpolationOffset = -0.5; |
| _properties.limits.maxInterpolationOffset = 0.5; |
| _properties.limits.subPixelInterpolationOffsetBits = 4; |
| |
| |
| // Unsupported features - set to zeros generally |
| |
| _properties.limits.sparseAddressSpaceSize = 0; |
| |
| _properties.limits.maxGeometryShaderInvocations = 0; |
| _properties.limits.maxGeometryInputComponents = 0; |
| _properties.limits.maxGeometryOutputComponents = 0; |
| _properties.limits.maxGeometryOutputVertices = 0; |
| _properties.limits.maxGeometryTotalOutputComponents = 0; |
| } |
| |
| #if MVK_MACOS |
| |
| static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef propertyName) { |
| |
| uint32_t value = 0; |
| |
| CFTypeRef cfProp = IORegistryEntrySearchCFProperty(entry, |
| kIOServicePlane, |
| propertyName, |
| kCFAllocatorDefault, |
| kIORegistryIterateRecursively | |
| kIORegistryIterateParents); |
| if (cfProp) { |
| const uint32_t* pValue = reinterpret_cast<const uint32_t*>(CFDataGetBytePtr((CFDataRef)cfProp)); |
| if (pValue) { value = *pValue; } |
| CFRelease(cfProp); |
| } |
| |
| return value; |
| } |
| |
| void MVKPhysicalDevice::initGPUInfoProperties() { |
| |
| bool isIntegrated = getHasUnifiedMemory(); |
| _properties.deviceType = isIntegrated ? VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU : VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; |
| strlcpy(_properties.deviceName, _mtlDevice.name.UTF8String, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE); |
| |
| // For Apple Silicon, the Device ID is determined by the highest |
| // GPU capability, which is a combination of OS version and GPU type. |
| // We determine Apple Silicon directly from the GPU, instead |
| // of from the build, in case we are running Rosetta2. |
| if (supportsMTLGPUFamily(Apple1)) { |
| _properties.vendorID = kAppleVendorId; |
| _properties.deviceID = getHighestGPUCapability(); |
| return; |
| } |
| |
| // If the device has an associated registry ID, we can use that to get the associated IOKit node. |
| // The match dictionary is consumed by IOServiceGetMatchingServices and does not need to be released. |
| bool isFound = false; |
| io_registry_entry_t entry; |
| uint64_t regID = mvkGetRegistryID(_mtlDevice); |
| if (regID) { |
| entry = IOServiceGetMatchingService(MACH_PORT_NULL, IORegistryEntryIDMatching(regID)); |
| if (entry) { |
| // That returned the IOGraphicsAccelerator nub. Its parent, then, is the actual PCI device. |
| io_registry_entry_t parent; |
| if (IORegistryEntryGetParentEntry(entry, kIOServicePlane, &parent) == kIOReturnSuccess) { |
| isFound = true; |
| _properties.vendorID = mvkGetEntryProperty(parent, CFSTR("vendor-id")); |
| _properties.deviceID = mvkGetEntryProperty(parent, CFSTR("device-id")); |
| IOObjectRelease(parent); |
| } |
| IOObjectRelease(entry); |
| } |
| } |
| // Iterate all GPU's, looking for a match. |
| // The match dictionary is consumed by IOServiceGetMatchingServices and does not need to be released. |
| io_iterator_t entryIterator; |
| if (!isFound && IOServiceGetMatchingServices(MACH_PORT_NULL, |
| IOServiceMatching("IOPCIDevice"), |
| &entryIterator) == kIOReturnSuccess) { |
| while ( !isFound && (entry = IOIteratorNext(entryIterator)) ) { |
| if (mvkGetEntryProperty(entry, CFSTR("class-code")) == 0x30000) { // 0x30000 : DISPLAY_VGA |
| |
| // The Intel GPU will always be marked as integrated. |
| // Return on a match of either Intel && low power, or non-Intel and non-low-power. |
| uint32_t vendorID = mvkGetEntryProperty(entry, CFSTR("vendor-id")); |
| if ( (vendorID == kIntelVendorId) == isIntegrated) { |
| isFound = true; |
| _properties.vendorID = vendorID; |
| _properties.deviceID = mvkGetEntryProperty(entry, CFSTR("device-id")); |
| } |
| } |
| } |
| IOObjectRelease(entryIterator); |
| } |
| } |
| |
| #endif //MVK_MACOS |
| |
| #if MVK_IOS_OR_TVOS |
| // For Apple Silicon, the Device ID is determined by the highest |
| // GPU capability, which is a combination of OS version and GPU type. |
| void MVKPhysicalDevice::initGPUInfoProperties() { |
| _properties.vendorID = kAppleVendorId; |
| _properties.deviceID = getHighestGPUCapability(); |
| _properties.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; |
| strlcpy(_properties.deviceName, _mtlDevice.name.UTF8String, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE); |
| } |
| #endif //MVK_IOS_OR_TVOS |
| |
| #pragma mark VkPhysicalDeviceLimits - List of feature limits available on the device |
| |
| //typedef struct VkPhysicalDeviceLimits { |
| // uint32_t maxImageDimension1D; // done |
| // uint32_t maxImageDimension2D; // done |
| // uint32_t maxImageDimension3D; // done |
| // uint32_t maxImageDimensionCube; // done |
| // uint32_t maxImageArrayLayers; // done |
| // uint32_t maxTexelBufferElements; // done |
| // uint32_t maxUniformBufferRange; // done |
| // uint32_t maxStorageBufferRange; // done |
| // uint32_t maxPushConstantsSize; // done |
| // uint32_t maxMemoryAllocationCount; // done |
| // uint32_t maxSamplerAllocationCount; // done |
| // VkDeviceSize bufferImageGranularity; // done |
| // VkDeviceSize sparseAddressSpaceSize; // done |
| // uint32_t maxBoundDescriptorSets; // done |
| // uint32_t maxPerStageDescriptorSamplers; // done |
| // uint32_t maxPerStageDescriptorUniformBuffers; // done |
| // uint32_t maxPerStageDescriptorStorageBuffers; // done |
| // uint32_t maxPerStageDescriptorSampledImages; // done |
| // uint32_t maxPerStageDescriptorStorageImages; // done |
| // uint32_t maxPerStageDescriptorInputAttachments; // done |
| // uint32_t maxPerStageResources; // done |
| // uint32_t maxDescriptorSetSamplers; // done |
| // uint32_t maxDescriptorSetUniformBuffers; // done |
| // uint32_t maxDescriptorSetUniformBuffersDynamic; // done |
| // uint32_t maxDescriptorSetStorageBuffers; // done |
| // uint32_t maxDescriptorSetStorageBuffersDynamic; // done |
| // uint32_t maxDescriptorSetSampledImages; // done |
| // uint32_t maxDescriptorSetStorageImages; // done |
| // uint32_t maxDescriptorSetInputAttachments; // done |
| // uint32_t maxVertexInputAttributes; // done |
| // uint32_t maxVertexInputBindings; // done |
| // uint32_t maxVertexInputAttributeOffset; // done |
| // uint32_t maxVertexInputBindingStride; // done |
| // uint32_t maxVertexOutputComponents; // done |
| // uint32_t maxTessellationGenerationLevel; // done |
| // uint32_t maxTessellationPatchSize; // done |
| // uint32_t maxTessellationControlPerVertexInputComponents; // done |
| // uint32_t maxTessellationControlPerVertexOutputComponents; // done |
| // uint32_t maxTessellationControlPerPatchOutputComponents; // done |
| // uint32_t maxTessellationControlTotalOutputComponents; // done |
| // uint32_t maxTessellationEvaluationInputComponents; // done |
| // uint32_t maxTessellationEvaluationOutputComponents; // done |
| // uint32_t maxGeometryShaderInvocations; // done |
| // uint32_t maxGeometryInputComponents; // done |
| // uint32_t maxGeometryOutputComponents; // done |
| // uint32_t maxGeometryOutputVertices; // done |
| // uint32_t maxGeometryTotalOutputComponents; // done |
| // uint32_t maxFragmentInputComponents; // done |
| // uint32_t maxFragmentOutputAttachments; // done |
| // uint32_t maxFragmentDualSrcAttachments; // done |
| // uint32_t maxFragmentCombinedOutputResources; // done |
| // uint32_t maxComputeSharedMemorySize; // done |
| // uint32_t maxComputeWorkGroupCount[3]; // done |
| // uint32_t maxComputeWorkGroupInvocations; // done |
| // uint32_t maxComputeWorkGroupSize[3]; // done |
| // uint32_t subPixelPrecisionBits; // done |
| // uint32_t subTexelPrecisionBits; // done |
| // uint32_t mipmapPrecisionBits; // done |
| // uint32_t maxDrawIndexedIndexValue; // done |
| // uint32_t maxDrawIndirectCount; // done |
| // float maxSamplerLodBias; // done |
| // float maxSamplerAnisotropy; // done |
| // uint32_t maxViewports; // done |
| // uint32_t maxViewportDimensions[2]; // done |
| // float viewportBoundsRange[2]; // done |
| // uint32_t viewportSubPixelBits; // done |
| // size_t minMemoryMapAlignment; // done |
| // VkDeviceSize minTexelBufferOffsetAlignment; // done |
| // VkDeviceSize minUniformBufferOffsetAlignment; // done |
| // VkDeviceSize minStorageBufferOffsetAlignment; // done |
| // int32_t minTexelOffset; // done |
| // uint32_t maxTexelOffset; // done |
| // int32_t minTexelGatherOffset; // done |
| // uint32_t maxTexelGatherOffset; // done |
| // float minInterpolationOffset; // done |
| // float maxInterpolationOffset; // done |
| // uint32_t subPixelInterpolationOffsetBits; // done |
| // uint32_t maxFramebufferWidth; // done |
| // uint32_t maxFramebufferHeight; // done |
| // uint32_t maxFramebufferLayers; // done |
| // VkSampleCountFlags framebufferColorSampleCounts; // done |
| // VkSampleCountFlags framebufferDepthSampleCounts; // done |
| // VkSampleCountFlags framebufferStencilSampleCounts; // done |
| // VkSampleCountFlags framebufferNoAttachmentsSampleCounts; // done |
| // uint32_t maxColorAttachments; // done |
| // VkSampleCountFlags sampledImageColorSampleCounts; // done |
| // VkSampleCountFlags sampledImageIntegerSampleCounts; // done |
| // VkSampleCountFlags sampledImageDepthSampleCounts; // done |
| // VkSampleCountFlags sampledImageStencilSampleCounts; // done |
| // VkSampleCountFlags storageImageSampleCounts; // done |
| // uint32_t maxSampleMaskWords; // done |
| // VkBool32 timestampComputeAndGraphics; // done |
| // float timestampPeriod; // done |
| // uint32_t maxClipDistances; // done |
| // uint32_t maxCullDistances; // done |
| // uint32_t maxCombinedClipAndCullDistances; // done |
| // uint32_t discreteQueuePriorities; // done |
| // float pointSizeRange[2]; // done |
| // float lineWidthRange[2]; // done |
| // float pointSizeGranularity; // done |
| // float lineWidthGranularity; // done |
| // VkBool32 strictLines; // done |
| // VkBool32 standardSampleLocations; // done |
| // VkDeviceSize optimalBufferCopyOffsetAlignment; // done |
| // VkDeviceSize optimalBufferCopyRowPitchAlignment; // done |
| // VkDeviceSize nonCoherentAtomSize; // done |
| //} VkPhysicalDeviceLimits; |
| |
| //typedef struct { |
| // VkBool32 residencyStandard2DBlockShape; |
| // VkBool32 residencyStandard2DMSBlockShape; |
| // VkBool32 residencyStandard3DBlockShape; |
| // VkBool32 residencyAlignedMipSize; |
| // VkBool32 residencyNonResident; |
| // VkBool32 residencyNonResidentStrict; |
| //} VkPhysicalDeviceSparseProperties; |
| |
| |
| void MVKPhysicalDevice::initPipelineCacheUUID() { |
| |
| // Clear the UUID |
| mvkClear(&_properties.pipelineCacheUUID, VK_UUID_SIZE); |
| |
| size_t uuidComponentOffset = 0; |
| |
| // First 4 bytes contains MoltenVK revision. |
| // This is captured either as the MoltenVK Git revision, or if that's not available, as the MoltenVK version. |
| uint32_t mvkRev = getMoltenVKGitRevision(); |
| if ( !mvkRev ) { mvkRev = MVK_VERSION; } |
| *(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mvkRev); |
| uuidComponentOffset += sizeof(mvkRev); |
| |
| // Next 4 bytes contains highest GPU capability supported by this device |
| uint32_t gpuCap = getHighestGPUCapability(); |
| *(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(gpuCap); |
| uuidComponentOffset += sizeof(gpuCap); |
| |
| // Next 4 bytes contains flags based on enabled Metal features that |
| // might affect the contents of the pipeline cache (mostly MSL content). |
| uint32_t mtlFeatures = 0; |
| mtlFeatures |= isUsingMetalArgumentBuffers() << 0; |
| *(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatures); |
| uuidComponentOffset += sizeof(mtlFeatures); |
| } |
| |
| uint32_t MVKPhysicalDevice::getHighestGPUCapability() { |
| |
| // On newer OS's, combine OS version with highest GPU family. |
| // On macOS, Apple GPU fam takes precedence over others. |
| MTLGPUFamily gpuFam = MTLGPUFamily(0); |
| if (supportsMTLGPUFamily(Mac1)) { gpuFam = MTLGPUFamilyMac1; } |
| if (supportsMTLGPUFamily(Mac2)) { gpuFam = MTLGPUFamilyMac2; } |
| |
| if (supportsMTLGPUFamily(Apple1)) { gpuFam = MTLGPUFamilyApple1; } |
| if (supportsMTLGPUFamily(Apple2)) { gpuFam = MTLGPUFamilyApple2; } |
| if (supportsMTLGPUFamily(Apple3)) { gpuFam = MTLGPUFamilyApple3; } |
| if (supportsMTLGPUFamily(Apple4)) { gpuFam = MTLGPUFamilyApple4; } |
| if (supportsMTLGPUFamily(Apple5)) { gpuFam = MTLGPUFamilyApple5; } |
| #if MVK_IOS || (MVK_MACOS && MVK_XCODE_12) |
| if (supportsMTLGPUFamily(Apple6)) { gpuFam = MTLGPUFamilyApple6; } |
| #endif |
| #if (MVK_IOS || MVK_MACOS) && MVK_XCODE_12 |
| if (supportsMTLGPUFamily(Apple7)) { gpuFam = MTLGPUFamilyApple7; } |
| #endif |
| #if MVK_IOS && MVK_XCODE_13 |
| if (supportsMTLGPUFamily(Apple8)) { gpuFam = MTLGPUFamilyApple8; } |
| #endif |
| |
| // Combine OS major (8 bits), OS minor (8 bits), and GPU family (16 bits) |
| // into one 32-bit value summarizing highest GPU capability. |
| if (gpuFam) { |
| float fosMaj, fosMin; |
| fosMin = modf(mvkOSVersion(), &fosMaj); |
| uint8_t osMaj = (uint8_t)fosMaj; |
| uint8_t osMin = (uint8_t)(fosMin * 100); |
| return (osMaj << 24) + (osMin << 16) + (uint16_t)gpuFam; |
| } |
| |
| // Fall back to legacy feature sets on older OS's |
| #if MVK_IOS |
| uint32_t maxFS = (uint32_t)MTLFeatureSet_iOS_GPUFamily5_v1; |
| uint32_t minFS = (uint32_t)MTLFeatureSet_iOS_GPUFamily1_v1; |
| #endif |
| |
| #if MVK_TVOS |
| uint32_t maxFS = (uint32_t)MTLFeatureSet_tvOS_GPUFamily2_v2; |
| uint32_t minFS = (uint32_t)MTLFeatureSet_tvOS_GPUFamily1_v1; |
| #endif |
| |
| #if MVK_MACOS |
| uint32_t maxFS = (uint32_t)MTLFeatureSet_macOS_GPUFamily2_v1; |
| uint32_t minFS = (uint32_t)MTLFeatureSet_macOS_GPUFamily1_v1; |
| #endif |
| |
| for (uint32_t fs = maxFS; fs > minFS; fs--) { |
| if ( [_mtlDevice supportsFeatureSet: (MTLFeatureSet)fs] ) { return fs; } |
| } |
| return minFS; |
| } |
| |
| // Retrieve the SPIRV-Cross Git revision hash from a derived header file, |
| // which is generated in advance, either statically, or more typically in |
| // an early build phase script, and contains a line similar to the following: |
| // static const char* mvkRevString = "fc0750d67cfe825b887dd2cf25a42e9d9a013eb2"; |
| uint32_t MVKPhysicalDevice::getMoltenVKGitRevision() { |
| |
| #include "mvkGitRevDerived.h" |
| |
| static const string revStr(mvkRevString, 0, 8); // We just need the first 8 chars |
| static const string lut("0123456789ABCDEF"); |
| |
| uint32_t revVal = 0; |
| for (char c : revStr) { |
| size_t cVal = lut.find(toupper(c)); |
| if (cVal != string::npos) { |
| revVal <<= 4; |
| revVal += cVal; |
| } |
| } |
| return revVal; |
| } |
| |
| void MVKPhysicalDevice::setMemoryHeap(uint32_t heapIndex, VkDeviceSize heapSize, VkMemoryHeapFlags heapFlags) { |
| _memoryProperties.memoryHeaps[heapIndex].size = heapSize; |
| _memoryProperties.memoryHeaps[heapIndex].flags = heapFlags; |
| } |
| |
| void MVKPhysicalDevice::setMemoryType(uint32_t typeIndex, uint32_t heapIndex, VkMemoryPropertyFlags propertyFlags) { |
| _memoryProperties.memoryTypes[typeIndex].heapIndex = heapIndex; |
| _memoryProperties.memoryTypes[typeIndex].propertyFlags = propertyFlags; |
| } |
| |
| // Initializes the memory properties of this instance. |
| // Metal Shared: |
| // - applies to both buffers and textures |
| // - default mode for buffers on both iOS & macOS |
| // - default mode for textures on iOS |
| // - one copy of memory visible to both CPU & GPU |
| // - coherent at command buffer boundaries |
| // Metal Private: |
| // - applies to both buffers and textures |
| // - accessed only by GPU through render, compute, or BLIT operations |
| // - no access by CPU |
| // - always use for framebuffers and renderable textures |
| // Metal Managed: |
| // - applies to both buffers and textures |
| // - default mode for textures on macOS |
| // - two copies of each buffer or texture when discrete memory available |
| // - convenience of shared mode, performance of private mode |
| // - on unified systems behaves like shared memory and has only one copy of content |
| // - when writing, use: |
| // - buffer didModifyRange: |
| // - texture replaceRegion: |
| // - when reading, use: |
| // - encoder synchronizeResource: followed by |
| // - cmdbuff waitUntilCompleted (or completion handler) |
| // - buffer/texture getBytes: |
| // Metal Memoryless: |
| // - applies only to textures used as transient render targets |
| // - only available with TBDR devices (i.e. on iOS) |
| // - no device memory is reserved at all |
| // - storage comes from tile memory |
| // - contents are undefined after rendering |
| // - use for temporary renderable textures |
| void MVKPhysicalDevice::initMemoryProperties() { |
| |
| mvkClear(&_memoryProperties); // Start with everything cleared |
| |
| // Main heap |
| uint32_t mainHeapIdx = 0; |
| setMemoryHeap(mainHeapIdx, getVRAMSize(), VK_MEMORY_HEAP_DEVICE_LOCAL_BIT); |
| |
| // Optional second heap for shared memory |
| uint32_t sharedHeapIdx; |
| VkMemoryPropertyFlags sharedTypePropFlags; |
| if (getHasUnifiedMemory()) { |
| // Shared memory goes in the single main heap in unified memory, and per Vulkan spec must be marked local |
| sharedHeapIdx = mainHeapIdx; |
| sharedTypePropFlags = MVK_VK_MEMORY_TYPE_METAL_SHARED | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; |
| } else { |
| // Define a second heap to mark the shared memory as non-local |
| sharedHeapIdx = mainHeapIdx + 1; |
| setMemoryHeap(sharedHeapIdx, mvkGetSystemMemorySize(), 0); |
| sharedTypePropFlags = MVK_VK_MEMORY_TYPE_METAL_SHARED; |
| } |
| |
| _memoryProperties.memoryHeapCount = sharedHeapIdx + 1; |
| |
| // Memory types |
| uint32_t typeIdx = 0; |
| |
| // Private storage |
| uint32_t privateBit = 1 << typeIdx; |
| setMemoryType(typeIdx, mainHeapIdx, MVK_VK_MEMORY_TYPE_METAL_PRIVATE); |
| typeIdx++; |
| |
| // Shared storage |
| uint32_t sharedBit = 1 << typeIdx; |
| setMemoryType(typeIdx, sharedHeapIdx, sharedTypePropFlags); |
| typeIdx++; |
| |
| // Managed storage |
| uint32_t managedBit = 0; |
| #if MVK_MACOS |
| managedBit = 1 << typeIdx; |
| setMemoryType(typeIdx, mainHeapIdx, MVK_VK_MEMORY_TYPE_METAL_MANAGED); |
| typeIdx++; |
| #endif |
| |
| // Memoryless storage |
| uint32_t memlessBit = 0; |
| #if MVK_MACOS |
| if (supportsMTLGPUFamily(Apple5)) { |
| memlessBit = 1 << typeIdx; |
| setMemoryType(typeIdx, mainHeapIdx, MVK_VK_MEMORY_TYPE_METAL_MEMORYLESS); |
| typeIdx++; |
| } |
| #endif |
| #if MVK_IOS |
| if (supportsMTLFeatureSet(iOS_GPUFamily1_v3)) { |
| memlessBit = 1 << typeIdx; |
| setMemoryType(typeIdx, mainHeapIdx, MVK_VK_MEMORY_TYPE_METAL_MEMORYLESS); |
| typeIdx++; |
| } |
| #endif |
| #if MVK_TVOS |
| if (supportsMTLFeatureSet(tvOS_GPUFamily1_v2)) { |
| memlessBit = 1 << typeIdx; |
| setMemoryType(typeIdx, mainHeapIdx, MVK_VK_MEMORY_TYPE_METAL_MEMORYLESS); |
| typeIdx++; |
| } |
| #endif |
| |
| _memoryProperties.memoryTypeCount = typeIdx; |
| |
| _privateMemoryTypes = privateBit | memlessBit; |
| _hostVisibleMemoryTypes = sharedBit | managedBit; |
| _hostCoherentMemoryTypes = sharedBit; |
| _lazilyAllocatedMemoryTypes = memlessBit; |
| _allMemoryTypes = privateBit | sharedBit | managedBit | memlessBit; |
| } |
| |
| bool MVKPhysicalDevice::getHasUnifiedMemory() { |
| #if MVK_IOS_OR_TVOS |
| return true; |
| #endif |
| #if MVK_MACOS |
| return ([_mtlDevice respondsToSelector: @selector(hasUnifiedMemory)] |
| ? _mtlDevice.hasUnifiedMemory : _mtlDevice.isLowPower); |
| #endif |
| } |
| |
| uint64_t MVKPhysicalDevice::getVRAMSize() { |
| if (getHasUnifiedMemory()) { |
| return mvkGetSystemMemorySize(); |
| } else { |
| // There's actually no way to query the total physical VRAM on the device in Metal. |
| // Just default to using the recommended max working set size (i.e. the budget). |
| return getRecommendedMaxWorkingSetSize(); |
| } |
| } |
| |
| uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() { |
| #if MVK_MACOS |
| if ( [_mtlDevice respondsToSelector: @selector(recommendedMaxWorkingSetSize)]) { |
| return _mtlDevice.recommendedMaxWorkingSetSize; |
| } |
| #endif |
| #if MVK_IOS_OR_TVOS |
| // GPU and CPU use shared memory. Estimate the current free memory in the system. |
| uint64_t freeMem = mvkGetAvailableMemorySize(); |
| if (freeMem) { return freeMem; } |
| #endif |
| |
| return 128 * MEBI; // Conservative minimum for macOS GPU's & iOS shared memory |
| } |
| |
| uint64_t MVKPhysicalDevice::getCurrentAllocatedSize() { |
| if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) { |
| return _mtlDevice.currentAllocatedSize; |
| } |
| #if MVK_IOS_OR_TVOS |
| // We can use the current memory used by this process as a reasonable approximation. |
| return mvkGetUsedMemorySize(); |
| #endif |
| #if MVK_MACOS |
| return 0; |
| #endif |
| } |
| |
| // When using argument buffers, Metal imposes a hard limit on the number of MTLSamplerState |
| // objects that can be created within the app. When not using argument buffers, no such |
| // limit is imposed. This has been verified with testing up to 1M MTLSamplerStates. |
| uint32_t MVKPhysicalDevice::getMaxSamplerCount() { |
| if (isUsingMetalArgumentBuffers()) { |
| return ([_mtlDevice respondsToSelector: @selector(maxArgumentBufferSamplerCount)] |
| ? (uint32_t)_mtlDevice.maxArgumentBufferSamplerCount : 1024); |
| } else { |
| return kMVKUndefinedLargeUInt32; |
| } |
| } |
| |
| void MVKPhysicalDevice::initExternalMemoryProperties() { |
| |
| // Buffers |
| _mtlBufferExternalMemoryProperties.externalMemoryFeatures = (VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | |
| VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT); |
| _mtlBufferExternalMemoryProperties.exportFromImportedHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_KHR; |
| _mtlBufferExternalMemoryProperties.compatibleHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_KHR; |
| |
| // Images |
| _mtlTextureExternalMemoryProperties.externalMemoryFeatures = (VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | |
| VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT | |
| VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT); |
| _mtlTextureExternalMemoryProperties.exportFromImportedHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_KHR; |
| _mtlTextureExternalMemoryProperties.compatibleHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_KHR; |
| } |
| |
| void MVKPhysicalDevice::initExtensions() { |
| MVKExtensionList* pWritableExtns = (MVKExtensionList*)&_supportedExtensions; |
| pWritableExtns->disableAllButEnabledDeviceExtensions(); |
| |
| #if MVK_IOS_OR_TVOS |
| if (!_metalFeatures.depthResolve) { |
| pWritableExtns->vk_KHR_depth_stencil_resolve.enabled = false; |
| } |
| #endif |
| if (!_metalFeatures.samplerMirrorClampToEdge) { |
| pWritableExtns->vk_KHR_sampler_mirror_clamp_to_edge.enabled = false; |
| } |
| if (!_metalFeatures.programmableSamplePositions) { |
| pWritableExtns->vk_EXT_sample_locations.enabled = false; |
| } |
| if (!_metalFeatures.rasterOrderGroups) { |
| pWritableExtns->vk_EXT_fragment_shader_interlock.enabled = false; |
| } |
| if (!_metalFeatures.postDepthCoverage) { |
| pWritableExtns->vk_EXT_post_depth_coverage.enabled = false; |
| }
|