Metal argument buffers code review fixes.
Use `[MTLDevice maxArgumentBufferSamplerCount]` to determine maximum sampler counts.
When checking `[MTLDevice argumentBuffersSupport]` tiers, compare
using `>=` to automatically handle evolution of this value in future.
Remove suspect use of early breaks in
MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets().
Revert to using void* when padding for buffers in shader desc set structs.
Fix several typos.
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index f0ab0ea..1ff5a9c 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -472,7 +472,7 @@
}
// Update dynamic buffer offsets
- uint32_t baseDynOfstIdx = dslMTLRezIdxOffsets.getMetalResourceIndexs().dynamicOffsetBufferIndex;
+ uint32_t baseDynOfstIdx = dslMTLRezIdxOffsets.getMetalResourceIndexes().dynamicOffsetBufferIndex;
uint32_t doCnt = descSet->getDynamicOffsetDescriptorCount();
for (uint32_t doIdx = 0; doIdx < doCnt && dynamicOffsetIndex < dynamicOffsets.size; doIdx++) {
updateImplicitBuffer(_dynamicOffsets, baseDynOfstIdx + doIdx, dynamicOffsets[dynamicOffsetIndex++]);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
index e401e89..f862b61 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
@@ -58,7 +58,7 @@
MVKShaderResourceBinding operator+ (const MVKShaderResourceBinding& rhs);
MVKShaderResourceBinding& operator+= (const MVKShaderResourceBinding& rhs);
- MVKShaderStageResourceBinding& getMetalResourceIndexs(MVKShaderStage stage = kMVKShaderStageVertex) { return stages[stage]; }
+ MVKShaderStageResourceBinding& getMetalResourceIndexes(MVKShaderStage stage = kMVKShaderStageVertex) { return stages[stage]; }
void clearArgumentBufferResources();
void addArgumentBuffers(uint32_t count);
@@ -142,7 +142,7 @@
* in that case the stage can be withheld and a default stage will be used.
*/
MVKShaderStageResourceBinding& getMetalResourceIndexOffsets(MVKShaderStage stage = kMVKShaderStageVertex) {
- return _mtlResourceIndexOffsets.getMetalResourceIndexs(stage);
+ return _mtlResourceIndexOffsets.getMetalResourceIndexes(stage);
}
/** Returns a bitwise OR of Metal render stages. */
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
index a7f53a8..1f50b62 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
@@ -108,7 +108,7 @@
do { \
mvk::MSLResourceBinding rb; \
auto& rbb = rb.resourceBinding; \
- rbb.stage = svpExecModels[stage]; \
+ rbb.stage = spvExecModels[stage]; \
rbb.basetype = SPIRV_CROSS_NAMESPACE_OVERRIDE::SPIRType::spvRezType; \
rbb.desc_set = descriptorSetIndex; \
rbb.binding = bindingIndex; \
@@ -120,7 +120,7 @@
context.resourceBindings.push_back(rb); \
} while(false)
- static const spv::ExecutionModel svpExecModels[] = {
+ static const spv::ExecutionModel spvExecModels[] = {
spv::ExecutionModelVertex,
spv::ExecutionModelTessellationControl,
spv::ExecutionModelTessellationEvaluation,
@@ -132,7 +132,7 @@
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
- addResourceBinding(Float);
+ addResourceBinding(Void);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -140,7 +140,7 @@
addResourceBinding(Float);
mvk::DescriptorBinding db;
- db.stage = svpExecModels[stage];
+ db.stage = spvExecModels[stage];
db.descriptorSet = descriptorSetIndex;
db.binding = bindingIndex;
db.index = ssRB.dynamicOffsetBufferIndex;
@@ -640,9 +640,6 @@
} \
} while(false)
- // Only perform validation if this binding is used by the stage
-# define breakIfUnused() if ( !_applyToStage[stage] ) break
-
MVKShaderStageResourceBinding& bindIdxs = _mtlResourceIndexOffsets.stages[stage];
MVKShaderStageResourceBinding& dslCnts = _layout->_mtlResourceCounts.stages[stage];
@@ -650,7 +647,6 @@
switch (pBinding->descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
setResourceIndexOffset(samplerIndex);
- breakIfUnused();
if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfSamplers) {
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of samplers.", _device->getName()));
@@ -660,7 +656,6 @@
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
setResourceIndexOffset(textureIndex);
setResourceIndexOffset(samplerIndex);
- breakIfUnused();
if (pBinding->descriptorCount > 1) {
if ( !_device->_pMetalFeatures->arrayOfTextures ) {
@@ -671,7 +666,7 @@
}
}
- if ( pBinding->pImmutableSamplers ) {
+ if (pBinding->pImmutableSamplers && _applyToStage[stage]) {
for (uint32_t i = 0; i < pBinding->descriptorCount; i++) {
uint8_t planeCount = ((MVKSampler*)pBinding->pImmutableSamplers[i])->getPlaneCount();
if (planeCount > 1) {
@@ -685,7 +680,6 @@
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
setResourceIndexOffset(textureIndex);
- breakIfUnused();
if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) {
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
@@ -696,7 +690,6 @@
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
setResourceIndexOffset(textureIndex);
setResourceIndexOffset(bufferIndex);
- breakIfUnused();
if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) {
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
index 929c5a4..36c1861 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
@@ -187,7 +187,7 @@
SPIRVToMSLConversionConfiguration& context,
MVKShaderStage stage,
uint32_t descSetIndex) {
- static const spv::ExecutionModel svpExecModels[] = {
+ static const spv::ExecutionModel spvExecModels[] = {
spv::ExecutionModelVertex,
spv::ExecutionModelTessellationControl,
spv::ExecutionModelTessellationEvaluation,
@@ -200,7 +200,7 @@
bindingUse.resize(bindCnt);
for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
auto& dslBind = _bindings[bindIdx];
- if (context.isResourceUsed(svpExecModels[stage], descSetIndex, dslBind.getBinding())) {
+ if (context.isResourceUsed(spvExecModels[stage], descSetIndex, dslBind.getBinding())) {
bindingUse.setBit(bindIdx);
descSetIsUsed = true;
}
@@ -797,7 +797,7 @@
// a Metal argument buffer may have a fixed overhead storage, in addition to the storage required
// to hold the resources. This overhead per descriptor set is conservatively calculated by measuring
// the size of a Metal argument buffer containing one of each type of resource (S1), and the size
- // of a Metal argument buffer containing one of each type of resource (S2), and then calculating
+ // of a Metal argument buffer containing two of each type of resource (S2), and then calculating
// the fixed overhead per argument buffer as (2 * S1 - S2). To this is added the overhead due to
// the alignment of each descriptor set Metal argument buffer offset.
NSUInteger overheadPerDescSet = (2 * getMetalArgumentBufferResourceStorageSize(1, 1, 1) -
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 4af98bf..c389deb 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -368,6 +368,7 @@
uint64_t getVRAMSize();
uint64_t getRecommendedMaxWorkingSetSize();
uint64_t getCurrentAllocatedSize();
+ uint32_t getMaxSamplerCount();
void initExternalMemoryProperties();
void initExtensions();
MVKArrayRef<MVKQueueFamily*> getQueueFamilies();
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 1bc568e..d4e1a4b 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -375,7 +375,9 @@
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
- bool isTier2 = isUsingMetalArgumentBuffers() && (_mtlDevice.argumentBuffersSupport == MTLArgumentBuffersTier2);
+ bool isTier2 = isUsingMetalArgumentBuffers() && (_mtlDevice.argumentBuffersSupport >= MTLArgumentBuffersTier2);
+ uint32_t maxSampCnt = getMaxSamplerCount();
+
auto* pDescIdxProps = (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)next;
pDescIdxProps->maxUpdateAfterBindDescriptorsInAllPools = kMVKUndefinedLargeUInt32;
pDescIdxProps->shaderUniformBufferArrayNonUniformIndexingNative = false;
@@ -385,14 +387,14 @@
pDescIdxProps->shaderInputAttachmentArrayNonUniformIndexingNative = _metalFeatures.arrayOfTextures;
pDescIdxProps->robustBufferAccessUpdateAfterBind = _features.robustBufferAccess;
pDescIdxProps->quadDivergentImplicitLod = false;
- pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSamplers = isTier2 ? 2048 : _properties.limits.maxPerStageDescriptorSamplers;
+ pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSamplers = isTier2 ? maxSampCnt : _properties.limits.maxPerStageDescriptorSamplers;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindUniformBuffers = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorUniformBuffers;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageBuffers = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageBuffers;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSampledImages = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorSampledImages;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageImages = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageImages;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindInputAttachments = _properties.limits.maxPerStageDescriptorInputAttachments;
pDescIdxProps->maxPerStageUpdateAfterBindResources = isTier2 ? 500000 : _properties.limits.maxPerStageResources;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindSamplers = isTier2 ? 2048 : _properties.limits.maxDescriptorSetSamplers;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindSamplers = isTier2 ? maxSampCnt : _properties.limits.maxDescriptorSetSamplers;
pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffers = isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffers;
pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffersDynamic;
pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffers = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffers;
@@ -2072,7 +2074,7 @@
// Features with no specific limits - default to unlimited int values
_properties.limits.maxMemoryAllocationCount = kMVKUndefinedLargeUInt32;
- _properties.limits.maxSamplerAllocationCount = kMVKUndefinedLargeUInt32;
+ _properties.limits.maxSamplerAllocationCount = getMaxSamplerCount();
_properties.limits.maxBoundDescriptorSets = kMVKMaxDescriptorSetCount;
_properties.limits.maxComputeWorkGroupCount[0] = kMVKUndefinedLargeUInt32;
@@ -2643,6 +2645,11 @@
#endif
}
+uint32_t MVKPhysicalDevice::getMaxSamplerCount() {
+ return ([_mtlDevice respondsToSelector: @selector(maxArgumentBufferSamplerCount)]
+ ? (uint32_t)_mtlDevice.maxArgumentBufferSamplerCount : 1024);
+}
+
void MVKPhysicalDevice::initExternalMemoryProperties() {
// Buffers