Merge pull request #1335 from billhollings/argument-buffers
Initial Metal argument buffer support
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index a5532c7..f1641c4 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -801,6 +801,30 @@
*/
VkBool32 resumeLostDevice;
+ /**
+ * Controls whether MoltenVK should use Metal argument buffers for resources defined in
+ * descriptor sets, if Metal argument buffers are supported on the platform. Using Metal
+ * argument buffers dramatically increases the number of buffers, textures and samplers
+ * that can be bound to a pipeline shader, and in most cases improves performance. If this
+ * setting is enabled, MoltenVK will use Metal argument buffers to bind resources to the
+ * shaders. If this setting is disabled, MoltenVK will bind resources to shaders discretely.
+ *
+ * NOTE: Currently, Metal argument buffer support is in beta stage, and is only supported
+ * on macOS 10.16 (Big Sur) or later, or on older versions of macOS using an Intel GPU.
+ * Metal argument buffers support is not available on iOS. Development to support iOS
+ * and a wider combination of GPU's on older macOS versions is under way.
+ *
+ * The value of this parameter must be changed before creating a VkInstance,
+ * for the change to take effect.
+ *
+ * The initial value or this parameter is set by the
+ * MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS
+ * runtime environment variable or MoltenVK compile-time build setting.
+ * If neither is set, this setting is enabled by default, and MoltenVK will not
+ * use Metal argument buffers, and will bind resources to shaders discretely.
+ */
+ VkBool32 useMetalArgumentBuffers;
+
} MVKConfiguration;
/**
@@ -880,6 +904,8 @@
uint32_t minSubgroupSize; /**< The minimum number of threads in a SIMD-group. */
VkBool32 textureBarriers; /**< If true, texture barriers are supported within Metal render passes. */
VkBool32 tileBasedDeferredRendering; /**< If true, this device uses tile-based deferred rendering. */
+ VkBool32 argumentBuffers; /**< If true, Metal argument buffers are supported. */
+ VkBool32 descriptorSetArgumentBuffers; /**< If true, a Metal argument buffer can be assigned to a descriptor set, and used on any pipeline and pipeline stage. If false, a different Metal argument buffer must be used for each pipeline-stage/descriptor-set combination. */
} MVKPhysicalDeviceMetalFeatures;
/** MoltenVK performance of a particular type of activity. */
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
index e1919da..b691dfc 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
@@ -143,6 +143,7 @@
protected:
MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+ void encode(MVKCommandEncoder* cmdEncoder, MVKArrayRef<uint32_t> dynamicOffsets);
MVKSmallVector<MVKDescriptorSet*, N> _descriptorSets;
MVKPipelineLayout* _pipelineLayout = nullptr;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
index 57c2179..bb5124d 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
@@ -215,7 +215,12 @@
template <size_t N>
void MVKCmdBindDescriptorSetsStatic<N>::encode(MVKCommandEncoder* cmdEncoder) {
- _pipelineLayout->bindDescriptorSets(cmdEncoder, _descriptorSets.contents(), _firstSet, MVKArrayRef<uint32_t>());
+ encode(cmdEncoder, MVKArrayRef<uint32_t>());
+}
+
+template <size_t N>
+void MVKCmdBindDescriptorSetsStatic<N>::encode(MVKCommandEncoder* cmdEncoder, MVKArrayRef<uint32_t> dynamicOffsets) {
+ _pipelineLayout->bindDescriptorSets(cmdEncoder, _pipelineBindPoint, _descriptorSets.contents(), _firstSet, dynamicOffsets);
}
template <size_t N>
@@ -256,7 +261,7 @@
template <size_t N>
void MVKCmdBindDescriptorSetsDynamic<N>::encode(MVKCommandEncoder* cmdEncoder) {
- MVKCmdBindDescriptorSetsStatic<N>::_pipelineLayout->bindDescriptorSets(cmdEncoder, MVKCmdBindDescriptorSetsStatic<N>::_descriptorSets.contents(), MVKCmdBindDescriptorSetsStatic<N>::_firstSet, _dynamicOffsets.contents());
+ MVKCmdBindDescriptorSetsStatic<N>::encode(cmdEncoder, _dynamicOffsets.contents());
}
template class MVKCmdBindDescriptorSetsDynamic<4>;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index 43b45a5..6bc4f4e 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -298,9 +298,20 @@
/** Returns the index of the currently active multiview subpass, or zero if the current render pass is not multiview. */
uint32_t getMultiviewPassIndex();
+ /** Begins a Metal compute encoding. */
+ void beginMetalComputeEncoding(MVKCommandUse cmdUse);
+
/** Binds a pipeline to a bind point. */
void bindPipeline(VkPipelineBindPoint pipelineBindPoint, MVKPipeline* pipeline);
+ /** Binds the descriptor set to the index at the bind point. */
+ void bindDescriptorSet(VkPipelineBindPoint pipelineBindPoint,
+ uint32_t descSetIndex,
+ MVKDescriptorSet* descSet,
+ MVKShaderResourceBinding& dslMTLRezIdxOffsets,
+ MVKArrayRef<uint32_t> dynamicOffsets,
+ uint32_t& dynamicOffsetIndex);
+
/** Encodes an operation to signal an event to a status. */
void signalEvent(MVKEvent* mvkEvent, bool status);
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index b7866f7..2de0f53 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -415,11 +415,11 @@
void MVKCommandEncoder::bindPipeline(VkPipelineBindPoint pipelineBindPoint, MVKPipeline* pipeline) {
switch (pipelineBindPoint) {
case VK_PIPELINE_BIND_POINT_GRAPHICS:
- _graphicsPipelineState.setPipeline(pipeline);
+ _graphicsPipelineState.bindPipeline(pipeline);
break;
case VK_PIPELINE_BIND_POINT_COMPUTE:
- _computePipelineState.setPipeline(pipeline);
+ _computePipelineState.bindPipeline(pipeline);
break;
default:
@@ -427,6 +427,28 @@
}
}
+void MVKCommandEncoder::bindDescriptorSet(VkPipelineBindPoint pipelineBindPoint,
+ uint32_t descSetIndex,
+ MVKDescriptorSet* descSet,
+ MVKShaderResourceBinding& dslMTLRezIdxOffsets,
+ MVKArrayRef<uint32_t> dynamicOffsets,
+ uint32_t& dynamicOffsetIndex) {
+ switch (pipelineBindPoint) {
+ case VK_PIPELINE_BIND_POINT_GRAPHICS:
+ _graphicsResourcesState.bindDescriptorSet(descSetIndex, descSet, dslMTLRezIdxOffsets,
+ dynamicOffsets, dynamicOffsetIndex);
+ break;
+
+ case VK_PIPELINE_BIND_POINT_COMPUTE:
+ _computeResourcesState.bindDescriptorSet(descSetIndex, descSet, dslMTLRezIdxOffsets,
+ dynamicOffsets, dynamicOffsetIndex);
+ break;
+
+ default:
+ break;
+ }
+}
+
void MVKCommandEncoder::signalEvent(MVKEvent* mvkEvent, bool status) {
endCurrentMetalEncoding();
mvkEvent->encodeSignal(_mtlCmdBuffer, status);
@@ -514,6 +536,14 @@
}
}
+void MVKCommandEncoder::beginMetalComputeEncoding(MVKCommandUse cmdUse) {
+ if (cmdUse == kMVKCommandUseTessellationVertexTessCtl) {
+ _graphicsResourcesState.beginMetalComputeEncoding();
+ } else {
+ _computeResourcesState.beginMetalComputeEncoding();
+ }
+}
+
void MVKCommandEncoder::finalizeDispatchState() {
_computePipelineState.encode(); // Must do first..it sets others
_computeResourcesState.encode();
@@ -571,6 +601,7 @@
if ( !_mtlComputeEncoder ) {
endCurrentMetalEncoding();
_mtlComputeEncoder = [_mtlCmdBuffer computeCommandEncoder]; // not retained
+ beginMetalComputeEncoding(cmdUse);
}
if (_mtlComputeEncoderUse != cmdUse) {
_mtlComputeEncoderUse = cmdUse;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
index 41e4544..d18fa87 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
@@ -21,11 +21,14 @@
#include "MVKMTLResourceBindings.h"
#include "MVKCommandResourceFactory.h"
#include "MVKDevice.h"
+#include "MVKDescriptor.h"
#include "MVKSmallVector.h"
+#include "MVKBitArray.h"
#include <unordered_map>
class MVKCommandEncoder;
class MVKGraphicsPipeline;
+class MVKDescriptorSet;
class MVKOcclusionQueryPool;
struct MVKShaderImplicitRezBinding;
@@ -66,11 +69,17 @@
*/
virtual void beginMetalRenderPass() { if (_isModified) { markDirty(); } }
- /**
- * Called automatically when a Metal render pass ends.
- */
+ /** Called automatically when a Metal render pass ends. */
virtual void endMetalRenderPass() { }
+ /**
+ * Called automatically when a Metal compute pass begins. If the contents have been
+ * modified from the default values, this instance is marked as dirty, so the contents
+ * will be encoded to Metal, otherwise it is marked as clean, so the contents will NOT
+ * be encoded. Default state can be left unencoded on a new Metal encoder.
+ */
+ virtual void beginMetalComputeEncoding() { if (_isModified) { markDirty(); } }
+
/**
* If the content of this instance is dirty, marks this instance as no longer dirty
* and calls the encodeImpl() function to encode the content onto the Metal encoder.
@@ -88,6 +97,7 @@
protected:
virtual void encodeImpl(uint32_t stage) = 0;
+ MVKDevice* getDevice();
MVKCommandEncoder* _cmdEncoder;
bool _isDirty = false;
@@ -103,8 +113,8 @@
public:
- /** Sets the pipeline during pipeline binding. */
- void setPipeline(MVKPipeline* pipeline);
+ /** Binds the pipeline. */
+ void bindPipeline(MVKPipeline* pipeline);
/** Returns the currently bound pipeline. */
MVKPipeline* getPipeline();
@@ -337,10 +347,27 @@
public:
- /** Constructs this instance for the specified command encoder. */
- MVKResourcesCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {}
+ /** Returns the currently bound pipeline for this bind point. */
+ virtual MVKPipeline* getPipeline() = 0;
+
+ /** Binds the specified descriptor set to the specified index. */
+ void bindDescriptorSet(uint32_t descSetIndex,
+ MVKDescriptorSet* descSet,
+ MVKShaderResourceBinding& dslMTLRezIdxOffsets,
+ MVKArrayRef<uint32_t> dynamicOffsets,
+ uint32_t& dynamicOffsetIndex);
+
+ /** Encodes the Metal resource to the Metal command encoder. */
+ virtual void encodeArgumentBufferResourceUsage(MVKShaderStage stage,
+ id<MTLResource> mtlResource,
+ MTLResourceUsage mtlUsage,
+ MTLRenderStages mtlStages) = 0;
+
+ MVKResourcesCommandEncoderState(MVKCommandEncoder* cmdEncoder) :
+ MVKCommandEncoderState(cmdEncoder), _boundDescriptorSets{} {}
protected:
+ void markDirty() override;
// Template function that marks both the vector and all binding elements in the vector as dirty.
template<class T>
@@ -402,6 +429,8 @@
}
void assertMissingSwizzles(bool needsSwizzle, const char* stageName, const MVKArrayRef<MVKMTLTextureBinding>& texBindings);
+ void encodeMetalArgumentBuffer(MVKShaderStage stage);
+ virtual void bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) = 0;
template<size_t N>
struct ResourceBindings {
@@ -413,6 +442,7 @@
MVKMTLBufferBinding swizzleBufferBinding;
MVKMTLBufferBinding bufferSizeBufferBinding;
+ MVKMTLBufferBinding dynamicOffsetBufferBinding;
MVKMTLBufferBinding viewRangeBufferBinding;
bool areBufferBindingsDirty = false;
@@ -422,6 +452,11 @@
bool needsSwizzle = false;
};
+ MVKDescriptorSet* _boundDescriptorSets[kMVKMaxDescriptorSetCount];
+ MVKBitArray _metalUsageDirtyDescriptors[kMVKMaxDescriptorSetCount];
+
+ MVKSmallVector<uint32_t, 8> _dynamicOffsets;
+
};
@@ -433,6 +468,9 @@
public:
+ /** Returns the currently bound pipeline for this bind point. */
+ MVKPipeline* getPipeline() override;
+
/** Binds the specified buffer for the specified shader stage. */
void bindBuffer(MVKShaderStage stage, const MVKMTLBufferBinding& binding);
@@ -464,6 +502,13 @@
bool needTessEvalSizeBuffer,
bool needFragmentSizeBuffer);
+ /** Sets the current dynamic offset buffer state. */
+ void bindDynamicOffsetBuffer(const MVKShaderImplicitRezBinding& binding,
+ bool needVertexDynanicOffsetBuffer,
+ bool needTessCtlDynanicOffsetBuffer,
+ bool needTessEvalDynanicOffsetBuffer,
+ bool needFragmentDynanicOffsetBuffer);
+
/** Sets the current view range buffer state. */
void bindViewRangeBuffer(const MVKShaderImplicitRezBinding& binding,
bool needVertexViewBuffer,
@@ -477,6 +522,11 @@
std::function<void(MVKCommandEncoder*, MVKMTLTextureBinding&)> bindTexture,
std::function<void(MVKCommandEncoder*, MVKMTLSamplerStateBinding&)> bindSampler);
+ void encodeArgumentBufferResourceUsage(MVKShaderStage stage,
+ id<MTLResource> mtlResource,
+ MTLResourceUsage mtlUsage,
+ MTLRenderStages mtlStages) override;
+
/** Offset all buffers for vertex attribute bindings with zero divisors by the given number of strides. */
void offsetZeroDivisorVertexBuffers(MVKGraphicsStage stage, MVKGraphicsPipeline* pipeline, uint32_t firstInstance);
@@ -488,6 +538,7 @@
protected:
void encodeImpl(uint32_t stage) override;
void markDirty() override;
+ void bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) override;
ResourceBindings<8> _shaderStageResourceBindings[4];
};
@@ -501,6 +552,9 @@
public:
+ /** Returns the currently bound pipeline for this bind point. */
+ MVKPipeline* getPipeline() override;
+
/** Binds the specified buffer. */
void bindBuffer(const MVKMTLBufferBinding& binding);
@@ -516,6 +570,14 @@
/** Sets the current buffer size buffer state. */
void bindBufferSizeBuffer(const MVKShaderImplicitRezBinding& binding, bool needSizeBuffer);
+ /** Sets the current dynamic offset buffer state. */
+ void bindDynamicOffsetBuffer(const MVKShaderImplicitRezBinding& binding, bool needDynamicOffsetBuffer);
+
+ void encodeArgumentBufferResourceUsage(MVKShaderStage stage,
+ id<MTLResource> mtlResource,
+ MTLResourceUsage mtlUsage,
+ MTLRenderStages mtlStages) override;
+
void markDirty() override;
#pragma mark Construction
@@ -525,6 +587,7 @@
protected:
void encodeImpl(uint32_t) override;
+ void bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) override;
ResourceBindings<4> _resourceBindings;
};
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index 968d087..1ff5a9c 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -30,12 +30,13 @@
#pragma mark MVKCommandEncoderState
MVKVulkanAPIObject* MVKCommandEncoderState::getVulkanAPIObject() { return _cmdEncoder->getVulkanAPIObject(); };
+MVKDevice* MVKCommandEncoderState::getDevice() { return _cmdEncoder->getDevice(); }
#pragma mark -
#pragma mark MVKPipelineCommandEncoderState
-void MVKPipelineCommandEncoderState::setPipeline(MVKPipeline* pipeline) {
+void MVKPipelineCommandEncoderState::bindPipeline(MVKPipeline* pipeline) {
_pipeline = pipeline;
markDirty();
}
@@ -58,7 +59,7 @@
bool isSettingDynamically) {
size_t vpCnt = viewports.size;
- uint32_t maxViewports = _cmdEncoder->getDevice()->_pProperties->limits.maxViewports;
+ uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports;
if ((firstViewport + vpCnt > maxViewports) ||
(firstViewport >= maxViewports) ||
(isSettingDynamically && vpCnt == 0))
@@ -108,7 +109,7 @@
bool isSettingDynamically) {
size_t sCnt = scissors.size;
- uint32_t maxScissors = _cmdEncoder->getDevice()->_pProperties->limits.maxViewports;
+ uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports;
if ((firstScissor + sCnt > maxScissors) ||
(firstScissor >= maxScissors) ||
(isSettingDynamically && sCnt == 0))
@@ -157,7 +158,7 @@
// MSL structs can have a larger size than the equivalent C struct due to MSL alignment needs.
// Typically any MSL struct that contains a float4 will also have a size that is rounded up to a multiple of a float4 size.
// Ensure that we pass along enough content to cover this extra space even if it is never actually accessed by the shader.
- size_t pcSizeAlign = _cmdEncoder->getDevice()->_pMetalFeatures->pushConstantSizeAlignment;
+ size_t pcSizeAlign = getDevice()->_pMetalFeatures->pushConstantSizeAlignment;
size_t pcSize = pushConstants.size;
size_t pcBuffSize = mvkAlignByteCount(offset + pcSize, pcSizeAlign);
mvkEnsureSize(_pushConstants, pcBuffSize);
@@ -452,6 +453,131 @@
#pragma mark -
#pragma mark MVKResourcesCommandEncoderState
+void MVKResourcesCommandEncoderState::bindDescriptorSet(uint32_t descSetIndex,
+ MVKDescriptorSet* descSet,
+ MVKShaderResourceBinding& dslMTLRezIdxOffsets,
+ MVKArrayRef<uint32_t> dynamicOffsets,
+ uint32_t& dynamicOffsetIndex) {
+
+ bool dsChanged = (descSet != _boundDescriptorSets[descSetIndex]);
+
+ _boundDescriptorSets[descSetIndex] = descSet;
+
+ if (descSet->isUsingMetalArgumentBuffers()) {
+ // If the descriptor set has changed, track new resource usage.
+ if (dsChanged) {
+ auto& usageDirty = _metalUsageDirtyDescriptors[descSetIndex];
+ usageDirty.resize(descSet->getDescriptorCount());
+ usageDirty.setAllBits();
+ }
+
+ // Update dynamic buffer offsets
+ uint32_t baseDynOfstIdx = dslMTLRezIdxOffsets.getMetalResourceIndexes().dynamicOffsetBufferIndex;
+ uint32_t doCnt = descSet->getDynamicOffsetDescriptorCount();
+ for (uint32_t doIdx = 0; doIdx < doCnt && dynamicOffsetIndex < dynamicOffsets.size; doIdx++) {
+ updateImplicitBuffer(_dynamicOffsets, baseDynOfstIdx + doIdx, dynamicOffsets[dynamicOffsetIndex++]);
+ }
+
+ // If something changed, mark dirty
+ if (dsChanged || doCnt > 0) { MVKCommandEncoderState::markDirty(); }
+ }
+}
+
+// Encode the dirty descriptors to the Metal argument buffer, set the Metal command encoder
+// usage for each resource, and bind the Metal argument buffer to the command encoder.
+void MVKResourcesCommandEncoderState::encodeMetalArgumentBuffer(MVKShaderStage stage) {
+ if ( !_cmdEncoder->isUsingMetalArgumentBuffers() ) { return; }
+
+ // The Metal arg encoder can only write to one arg buffer at a time (it holds the arg buffer),
+ // so we need to lock out other access to it while we are writing to it.
+ MVKPipeline* pipeline = getPipeline();
+ lock_guard<mutex> lock(pipeline->_mtlArgumentEncodingLock);
+
+ uint32_t dsCnt = pipeline->getDescriptorSetCount();
+ for (uint32_t dsIdx = 0; dsIdx < dsCnt; dsIdx++) {
+ auto* descSet = _boundDescriptorSets[dsIdx];
+ if ( !descSet ) { continue; }
+
+ id<MTLArgumentEncoder> mtlArgEncoder = nil;
+ id<MTLBuffer> mtlArgBuffer = nil;
+ NSUInteger metalArgBufferOffset = 0;
+
+ auto* dsLayout = descSet->getLayout();
+ if (dsLayout->isUsingDescriptorSetMetalArgumentBuffers()) {
+ mtlArgEncoder = dsLayout->getMTLArgumentEncoder().getMTLArgumentEncoder();
+ mtlArgBuffer = descSet->getMetalArgumentBuffer();
+ metalArgBufferOffset = descSet->getMetalArgumentBufferOffset();
+ } else {
+ mtlArgEncoder = pipeline->getMTLArgumentEncoder(dsIdx, stage).getMTLArgumentEncoder();
+ // TODO: Source a different arg buffer & offset for each pipeline-stage/desccriptors set
+ // Also need to only encode the descriptors that are referenced in the shader.
+ // MVKMTLArgumentEncoder could include an MVKBitArray to track that and have it checked below.
+ }
+
+ if ( !(mtlArgEncoder && mtlArgBuffer) ) { continue; }
+
+ auto& argBuffDirtyDescs = descSet->getMetalArgumentBufferDirtyDescriptors();
+ auto& resourceUsageDirtyDescs = _metalUsageDirtyDescriptors[dsIdx];
+ auto& shaderBindingUsage = pipeline->getDescriptorBindingUse(dsIdx, stage);
+
+ bool mtlArgEncAttached = false;
+ bool shouldBindArgBuffToStage = false;
+ uint32_t dslBindCnt = dsLayout->getBindingCount();
+ for (uint32_t dslBindIdx = 0; dslBindIdx < dslBindCnt; dslBindIdx++) {
+ auto* dslBind = dsLayout->getBindingAt(dslBindIdx);
+ if (dslBind->getApplyToStage(stage) && shaderBindingUsage.getBit(dslBindIdx)) {
+ shouldBindArgBuffToStage = true;
+ uint32_t elemCnt = dslBind->getDescriptorCount(descSet);
+ for (uint32_t elemIdx = 0; elemIdx < elemCnt; elemIdx++) {
+ uint32_t descIdx = dslBind->getDescriptorIndex(elemIdx);
+ bool argBuffDirty = argBuffDirtyDescs.getBit(descIdx, true);
+ bool resourceUsageDirty = resourceUsageDirtyDescs.getBit(descIdx, true);
+ if (argBuffDirty || resourceUsageDirty) {
+ // Don't attach the arg buffer to the arg encoder unless something actually needs
+ // to be written to it. We often might only be updating command encoder resource usage.
+ if (!mtlArgEncAttached && argBuffDirty) {
+ [mtlArgEncoder setArgumentBuffer: mtlArgBuffer offset: metalArgBufferOffset];
+ mtlArgEncAttached = true;
+ }
+ auto* mvkDesc = descSet->getDescriptorAt(descIdx);
+ mvkDesc->encodeToMetalArgumentBuffer(this, mtlArgEncoder,
+ dsIdx, dslBind, elemIdx,
+ stage, argBuffDirty, true);
+ }
+ }
+ }
+ }
+
+ // If the arg buffer was attached to the arg encoder, detach it now.
+ if (mtlArgEncAttached) { [mtlArgEncoder setArgumentBuffer: nil offset: 0]; }
+
+ // If it is needed, bind the Metal argument buffer itself to the command encoder,
+ if (shouldBindArgBuffToStage) {
+ MVKMTLBufferBinding bb;
+ bb.mtlBuffer = descSet->getMetalArgumentBuffer();
+ bb.offset = descSet->getMetalArgumentBufferOffset();
+ bb.index = dsIdx;
+ bindMetalArgumentBuffer(stage, bb);
+ }
+
+ // For some unexpected reason, GPU capture on Xcode 12 doesn't always correctly expose
+ // the contents of Metal argument buffers. Triggering an extraction of the arg buffer
+ // contents here, after filling it, seems to correct that.
+ // Sigh. A bug report has been filed with Apple.
+ if (getDevice()->isCurrentlyAutoGPUCapturing()) { [descSet->getMetalArgumentBuffer() contents]; }
+ }
+}
+
+// Mark the resource usage as needing an update for each Metal render encoder.
+void MVKResourcesCommandEncoderState::markDirty() {
+ MVKCommandEncoderState::markDirty();
+ if (_cmdEncoder->isUsingMetalArgumentBuffers()) {
+ for (uint32_t dsIdx = 0; dsIdx < kMVKMaxDescriptorSetCount; dsIdx++) {
+ _metalUsageDirtyDescriptors[dsIdx].setAllBits();
+ }
+ }
+}
+
// If a swizzle is needed for this stage, iterates all the bindings and logs errors for those that need texture swizzling.
void MVKResourcesCommandEncoderState::assertMissingSwizzles(bool needsSwizzle, const char* stageName, const MVKArrayRef<MVKMTLTextureBinding>& texBindings) {
if (needsSwizzle) {
@@ -513,6 +639,20 @@
_shaderStageResourceBindings[kMVKShaderStageFragment].bufferSizeBufferBinding.isDirty = needFragmentSizeBuffer;
}
+void MVKGraphicsResourcesCommandEncoderState::bindDynamicOffsetBuffer(const MVKShaderImplicitRezBinding& binding,
+ bool needVertexDynamicOffsetBuffer,
+ bool needTessCtlDynamicOffsetBuffer,
+ bool needTessEvalDynamicOffsetBuffer,
+ bool needFragmentDynamicOffsetBuffer) {
+ for (uint32_t i = kMVKShaderStageVertex; i <= kMVKShaderStageFragment; i++) {
+ _shaderStageResourceBindings[i].dynamicOffsetBufferBinding.index = binding.stages[i];
+ }
+ _shaderStageResourceBindings[kMVKShaderStageVertex].dynamicOffsetBufferBinding.isDirty = needVertexDynamicOffsetBuffer;
+ _shaderStageResourceBindings[kMVKShaderStageTessCtl].dynamicOffsetBufferBinding.isDirty = needTessCtlDynamicOffsetBuffer;
+ _shaderStageResourceBindings[kMVKShaderStageTessEval].dynamicOffsetBufferBinding.isDirty = needTessEvalDynamicOffsetBuffer;
+ _shaderStageResourceBindings[kMVKShaderStageFragment].dynamicOffsetBufferBinding.isDirty = needFragmentDynamicOffsetBuffer;
+}
+
void MVKGraphicsResourcesCommandEncoderState::bindViewRangeBuffer(const MVKShaderImplicitRezBinding& binding,
bool needVertexViewBuffer,
bool needFragmentViewBuffer) {
@@ -532,6 +672,9 @@
std::function<void(MVKCommandEncoder*, MVKMTLBufferBinding&, const MVKArrayRef<uint32_t>&)> bindImplicitBuffer,
std::function<void(MVKCommandEncoder*, MVKMTLTextureBinding&)> bindTexture,
std::function<void(MVKCommandEncoder*, MVKMTLSamplerStateBinding&)> bindSampler) {
+
+ encodeMetalArgumentBuffer(stage);
+
auto& shaderStage = _shaderStageResourceBindings[stage];
encodeBinding<MVKMTLBufferBinding>(shaderStage.bufferBindings, shaderStage.areBufferBindingsDirty, bindBuffer);
@@ -555,6 +698,10 @@
bindImplicitBuffer(_cmdEncoder, shaderStage.bufferSizeBufferBinding, shaderStage.bufferSizes.contents());
}
+ if (shaderStage.dynamicOffsetBufferBinding.isDirty) {
+ bindImplicitBuffer(_cmdEncoder, shaderStage.dynamicOffsetBufferBinding, _dynamicOffsets.contents());
+ }
+
if (shaderStage.viewRangeBufferBinding.isDirty) {
MVKSmallVector<uint32_t, 2> viewRange;
viewRange.push_back(_cmdEncoder->getSubpass()->getFirstViewIndexInMetalPass(_cmdEncoder->getMultiviewPassIndex()));
@@ -592,7 +739,7 @@
// Mark everything as dirty
void MVKGraphicsResourcesCommandEncoderState::markDirty() {
- MVKCommandEncoderState::markDirty();
+ MVKResourcesCommandEncoderState::markDirty();
for (uint32_t i = kMVKShaderStageVertex; i <= kMVKShaderStageFragment; i++) {
MVKResourcesCommandEncoderState::markDirty(_shaderStageResourceBindings[i].bufferBindings, _shaderStageResourceBindings[i].areBufferBindingsDirty);
MVKResourcesCommandEncoderState::markDirty(_shaderStageResourceBindings[i].textureBindings, _shaderStageResourceBindings[i].areTextureBindingsDirty);
@@ -603,7 +750,7 @@
void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
MVKGraphicsPipeline* pipeline = (MVKGraphicsPipeline*)_cmdEncoder->_graphicsPipelineState.getPipeline();
- bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || _cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle;
+ bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || getDevice()->_pMetalFeatures->nativeTextureSwizzle;
bool forTessellation = pipeline->isTessellationPipeline();
if (stage == kMVKGraphicsStageVertex) {
@@ -765,6 +912,33 @@
}
}
+MVKPipeline* MVKGraphicsResourcesCommandEncoderState::getPipeline() {
+ return _cmdEncoder->_graphicsPipelineState.getPipeline();
+}
+
+void MVKGraphicsResourcesCommandEncoderState::bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) {
+ bindBuffer(stage, buffBind);
+}
+
+void MVKGraphicsResourcesCommandEncoderState::encodeArgumentBufferResourceUsage(MVKShaderStage stage,
+ id<MTLResource> mtlResource,
+ MTLResourceUsage mtlUsage,
+ MTLRenderStages mtlStages) {
+ if (mtlResource && mtlStages) {
+ if (stage == kMVKShaderStageTessCtl) {
+ auto* mtlCompEnc = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
+ [mtlCompEnc useResource: mtlResource usage: mtlUsage];
+ } else {
+ auto* mtlRendEnc = _cmdEncoder->_mtlRenderEncoder;
+ if ([mtlRendEnc respondsToSelector: @selector(useResource:usage:stages:)]) {
+ [mtlRendEnc useResource: mtlResource usage: mtlUsage stages: mtlStages];
+ } else {
+ [mtlRendEnc useResource: mtlResource usage: mtlUsage];
+ }
+ }
+ }
+}
+
#pragma mark -
#pragma mark MVKComputeResourcesCommandEncoderState
@@ -793,9 +967,15 @@
_resourceBindings.bufferSizeBufferBinding.isDirty = needBufferSizeBuffer;
}
+void MVKComputeResourcesCommandEncoderState::bindDynamicOffsetBuffer(const MVKShaderImplicitRezBinding& binding,
+ bool needDynamicOffsetBuffer) {
+ _resourceBindings.dynamicOffsetBufferBinding.index = binding.stages[kMVKShaderStageCompute];
+ _resourceBindings.dynamicOffsetBufferBinding.isDirty = needDynamicOffsetBuffer;
+}
+
// Mark everything as dirty
void MVKComputeResourcesCommandEncoderState::markDirty() {
- MVKCommandEncoderState::markDirty();
+ MVKResourcesCommandEncoderState::markDirty();
MVKResourcesCommandEncoderState::markDirty(_resourceBindings.bufferBindings, _resourceBindings.areBufferBindingsDirty);
MVKResourcesCommandEncoderState::markDirty(_resourceBindings.textureBindings, _resourceBindings.areTextureBindingsDirty);
MVKResourcesCommandEncoderState::markDirty(_resourceBindings.samplerStateBindings, _resourceBindings.areSamplerStateBindingsDirty);
@@ -803,6 +983,8 @@
void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) {
+ encodeMetalArgumentBuffer(kMVKShaderStageCompute);
+
MVKPipeline* pipeline = _cmdEncoder->_computePipelineState.getPipeline();
bool fullImageViewSwizzle = pipeline ? pipeline->fullImageViewSwizzle() : false;
@@ -832,6 +1014,14 @@
}
+ if (_resourceBindings.dynamicOffsetBufferBinding.isDirty) {
+ _cmdEncoder->setComputeBytes(_cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch),
+ _dynamicOffsets.data(),
+ _dynamicOffsets.size() * sizeof(uint32_t),
+ _resourceBindings.dynamicOffsetBufferBinding.index);
+
+ }
+
encodeBinding<MVKMTLBufferBinding>(_resourceBindings.bufferBindings, _resourceBindings.areBufferBindingsDirty,
[](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
if (b.isInline) {
@@ -859,6 +1049,24 @@
});
}
+MVKPipeline* MVKComputeResourcesCommandEncoderState::getPipeline() {
+ return _cmdEncoder->_computePipelineState.getPipeline();
+}
+
+void MVKComputeResourcesCommandEncoderState::bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) {
+ bindBuffer(buffBind);
+}
+
+void MVKComputeResourcesCommandEncoderState::encodeArgumentBufferResourceUsage(MVKShaderStage stage,
+ id<MTLResource> mtlResource,
+ MTLResourceUsage mtlUsage,
+ MTLRenderStages mtlStages) {
+ if (mtlResource) {
+ auto* mtlCompEnc = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch);
+ [mtlCompEnc useResource: mtlResource usage: mtlUsage];
+ }
+}
+
#pragma mark -
#pragma mark MVKOcclusionQueryCommandEncoderState
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
index 21f1808..f862b61 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
@@ -20,22 +20,27 @@
#include "MVKImage.h"
#include "MVKSmallVector.h"
+#include "MVKMTLBufferAllocation.h"
class MVKDescriptorSet;
class MVKDescriptorSetLayout;
class MVKCommandEncoder;
+class MVKResourcesCommandEncoderState;
#pragma mark MVKShaderStageResourceBinding
/** Indicates the Metal resource indexes used by a single shader stage in a descriptor. */
typedef struct MVKShaderStageResourceBinding {
- uint16_t bufferIndex = 0;
- uint16_t textureIndex = 0;
- uint16_t samplerIndex = 0;
+ uint32_t bufferIndex = 0;
+ uint32_t textureIndex = 0;
+ uint32_t samplerIndex = 0;
+ uint32_t resourceIndex = 0;
+ uint32_t dynamicOffsetBufferIndex = 0;
MVKShaderStageResourceBinding operator+ (const MVKShaderStageResourceBinding& rhs);
MVKShaderStageResourceBinding& operator+= (const MVKShaderStageResourceBinding& rhs);
+ void clearArgumentBufferResources();
} MVKShaderStageResourceBinding;
@@ -46,15 +51,32 @@
typedef struct MVKShaderResourceBinding {
MVKShaderStageResourceBinding stages[kMVKShaderStageMax];
+ uint16_t getMaxResourceIndex();
uint16_t getMaxBufferIndex();
uint16_t getMaxTextureIndex();
uint16_t getMaxSamplerIndex();
MVKShaderResourceBinding operator+ (const MVKShaderResourceBinding& rhs);
MVKShaderResourceBinding& operator+= (const MVKShaderResourceBinding& rhs);
+ MVKShaderStageResourceBinding& getMetalResourceIndexes(MVKShaderStage stage = kMVKShaderStageVertex) { return stages[stage]; }
+ void clearArgumentBufferResources();
+ void addArgumentBuffers(uint32_t count);
} MVKShaderResourceBinding;
+/**
+ * If the shader stage binding has a binding defined for the specified stage, populates
+ * the context at the descriptor set binding from the shader stage resource binding.
+ */
+void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
+ MVKShaderStageResourceBinding& ssRB,
+ MVKShaderStage stage,
+ uint32_t descriptorSetIndex,
+ uint32_t bindingIndex,
+ uint32_t count,
+ VkDescriptorType descType,
+ MVKSampler* immutableSampler);
+
#pragma mark -
#pragma mark MVKDescriptorSetLayoutBinding
@@ -71,7 +93,7 @@
inline uint32_t getBinding() { return _info.binding; }
/** Returns whether this binding has a variable descriptor count. */
- inline bool hasVariableDescriptorCount() {
+ inline bool hasVariableDescriptorCount() const {
return mvkIsAnyFlagEnabled(_flags, VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT);
}
@@ -83,11 +105,14 @@
* count provided to that descriptor set is returned. Otherwise returns the value
* defined in VkDescriptorSetLayoutBinding::descriptorCount.
*/
- uint32_t getDescriptorCount(MVKDescriptorSet* descSet);
+ uint32_t getDescriptorCount(MVKDescriptorSet* descSet = nullptr) const;
/** Returns the descriptor type of this layout. */
inline VkDescriptorType getDescriptorType() { return _info.descriptorType; }
+ /** Returns whether this binding uses immutable samplers. */
+ bool usesImmutableSamplers() { return !_immutableSamplers.empty(); }
+
/** Returns the immutable sampler at the index, or nullptr if immutable samplers are not used. */
MVKSampler* getImmutableSampler(uint32_t index);
@@ -109,7 +134,22 @@
MVKShaderResourceBinding& dslMTLRezIdxOffsets);
/** Returns the index of the descriptor within the descriptor set of the element at the index within this descriptor layout. */
- inline uint32_t getDescriptorIndex(uint32_t elementIndex = 0) { return _descriptorIndex + elementIndex; }
+ uint32_t getDescriptorIndex(uint32_t elementIndex = 0) const { return _descriptorIndex + elementIndex; }
+
+ /**
+ * Returns the indexes into the resources, relative to the descriptor set.
+ * When using Metal argument buffers, all stages have the same values, and
+ * in that case the stage can be withheld and a default stage will be used.
+ */
+ MVKShaderStageResourceBinding& getMetalResourceIndexOffsets(MVKShaderStage stage = kMVKShaderStageVertex) {
+ return _mtlResourceIndexOffsets.getMetalResourceIndexes(stage);
+ }
+
+ /** Returns a bitwise OR of Metal render stages. */
+ MTLRenderStages getMTLRenderStages();
+
+ /** Returns whether this binding should be applied to the shader stage. */
+ bool getApplyToStage(MVKShaderStage stage) { return _applyToStage[stage]; }
MVKDescriptorSetLayoutBinding(MVKDevice* device,
MVKDescriptorSetLayout* layout,
@@ -125,9 +165,13 @@
friend class MVKDescriptorSetLayout;
friend class MVKInlineUniformBlockDescriptor;
- void initMetalResourceIndexOffsets(MVKShaderStageResourceBinding* pBindingIndexes,
- MVKShaderStageResourceBinding* pDescSetCounts,
- const VkDescriptorSetLayoutBinding* pBinding);
+ void initMetalResourceIndexOffsets(const VkDescriptorSetLayoutBinding* pBinding, uint32_t stage);
+ void addMTLArgumentDescriptors(NSMutableArray<MTLArgumentDescriptor*>* args);
+ void addMTLArgumentDescriptor(NSMutableArray<MTLArgumentDescriptor*>* args,
+ uint32_t argIndex,
+ MTLDataType dataType,
+ MTLArgumentAccess access);
+ bool isUsingMetalArgumentBuffer();
void populateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
MVKShaderResourceBinding& dslMTLRezIdxOffsets,
uint32_t dslIndex);
@@ -156,6 +200,9 @@
virtual VkDescriptorType getDescriptorType() = 0;
+ /** Returns whether this descriptor type uses dynamic buffer offsets. */
+ virtual bool usesDynamicBufferOffsets() { return false; }
+
/** Encodes this descriptor (based on its layout binding index) on the the command encoder. */
virtual void bind(MVKCommandEncoder* cmdEncoder,
MVKDescriptorSetLayoutBinding* mvkDSLBind,
@@ -165,6 +212,16 @@
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) = 0;
+ /** Encodes this descriptor to the Metal argument buffer. */
+ virtual void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) = 0;
+
/**
* Updates the internal binding from the specified content. The format of the content depends
* on the descriptor type, and is extracted from pData at the location given by index * stride.
@@ -200,6 +257,9 @@
~MVKDescriptor() { reset(); }
+protected:
+ MTLResourceUsage getMTLResourceUsage();
+
};
@@ -218,6 +278,15 @@
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) override;
+ void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) override;
+
void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t srcIndex,
@@ -258,6 +327,7 @@
class MVKUniformBufferDynamicDescriptor : public MVKBufferDescriptor {
public:
VkDescriptorType getDescriptorType() override { return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; }
+ bool usesDynamicBufferOffsets() override { return true; }
};
@@ -276,6 +346,7 @@
class MVKStorageBufferDynamicDescriptor : public MVKBufferDescriptor {
public:
VkDescriptorType getDescriptorType() override { return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; }
+ bool usesDynamicBufferOffsets() override { return true; }
};
@@ -296,6 +367,15 @@
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) override;
+ void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) override;
+
void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t dstOffset, // For inline buffers we are using this parameter as dst offset not as src descIdx
@@ -315,7 +395,9 @@
~MVKInlineUniformBlockDescriptor() { reset(); }
protected:
- uint8_t* _buffer = nullptr;
+ inline uint8_t* getData() { return _mvkMTLBufferAllocation ? (uint8_t*)_mvkMTLBufferAllocation->getContents() : nullptr; }
+
+ const MVKMTLBufferAllocation* _mvkMTLBufferAllocation = nullptr;
};
@@ -334,6 +416,15 @@
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) override;
+ void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) override;
+
void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t srcIndex,
@@ -405,6 +496,14 @@
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex);
+ void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer);
+
void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t srcIndex,
@@ -444,6 +543,15 @@
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) override;
+ void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) override;
+
void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t srcIndex,
@@ -482,6 +590,15 @@
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) override;
+ void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) override;
+
void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t srcIndex,
@@ -518,6 +635,15 @@
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) override;
+ void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) override;
+
void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t srcIndex,
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
index 725752d..1f50b62 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
@@ -28,6 +28,8 @@
rslt.bufferIndex = this->bufferIndex + rhs.bufferIndex;
rslt.textureIndex = this->textureIndex + rhs.textureIndex;
rslt.samplerIndex = this->samplerIndex + rhs.samplerIndex;
+ rslt.resourceIndex = this->resourceIndex + rhs.resourceIndex;
+ rslt.dynamicOffsetBufferIndex = this->dynamicOffsetBufferIndex + rhs.dynamicOffsetBufferIndex;
return rslt;
}
@@ -35,12 +37,24 @@
this->bufferIndex += rhs.bufferIndex;
this->textureIndex += rhs.textureIndex;
this->samplerIndex += rhs.samplerIndex;
+ this->resourceIndex += rhs.resourceIndex;
+ this->dynamicOffsetBufferIndex += rhs.dynamicOffsetBufferIndex;
return *this;
}
+void MVKShaderStageResourceBinding::clearArgumentBufferResources() {
+ bufferIndex = 0;
+ textureIndex = 0;
+ samplerIndex = 0;
+}
+
#pragma mark MVKShaderResourceBinding
+uint16_t MVKShaderResourceBinding::getMaxResourceIndex() {
+ return std::max({stages[kMVKShaderStageVertex].resourceIndex, stages[kMVKShaderStageTessCtl].resourceIndex, stages[kMVKShaderStageTessEval].resourceIndex, stages[kMVKShaderStageFragment].resourceIndex, stages[kMVKShaderStageCompute].resourceIndex});
+}
+
uint16_t MVKShaderResourceBinding::getMaxBufferIndex() {
return std::max({stages[kMVKShaderStageVertex].bufferIndex, stages[kMVKShaderStageTessCtl].bufferIndex, stages[kMVKShaderStageTessEval].bufferIndex, stages[kMVKShaderStageFragment].bufferIndex, stages[kMVKShaderStageCompute].bufferIndex});
}
@@ -68,13 +82,105 @@
return *this;
}
+void MVKShaderResourceBinding::clearArgumentBufferResources() {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ stages[i].clearArgumentBufferResources();
+ }
+}
+
+void MVKShaderResourceBinding::addArgumentBuffers(uint32_t count) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ stages[i].bufferIndex += count;
+ stages[i].resourceIndex += count;
+ }
+}
+
+void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
+ MVKShaderStageResourceBinding& ssRB,
+ MVKShaderStage stage,
+ uint32_t descriptorSetIndex,
+ uint32_t bindingIndex,
+ uint32_t count,
+ VkDescriptorType descType,
+ MVKSampler* immutableSampler) {
+
+#define addResourceBinding(spvRezType) \
+ do { \
+ mvk::MSLResourceBinding rb; \
+ auto& rbb = rb.resourceBinding; \
+ rbb.stage = spvExecModels[stage]; \
+ rbb.basetype = SPIRV_CROSS_NAMESPACE_OVERRIDE::SPIRType::spvRezType; \
+ rbb.desc_set = descriptorSetIndex; \
+ rbb.binding = bindingIndex; \
+ rbb.count = count; \
+ rbb.msl_buffer = ssRB.bufferIndex; \
+ rbb.msl_texture = ssRB.textureIndex; \
+ rbb.msl_sampler = ssRB.samplerIndex; \
+ if (immutableSampler) { immutableSampler->getConstexprSampler(rb); } \
+ context.resourceBindings.push_back(rb); \
+ } while(false)
+
+ static const spv::ExecutionModel spvExecModels[] = {
+ spv::ExecutionModelVertex,
+ spv::ExecutionModelTessellationControl,
+ spv::ExecutionModelTessellationEvaluation,
+ spv::ExecutionModelFragment,
+ spv::ExecutionModelGLCompute
+ };
+
+ switch (descType) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+ addResourceBinding(Void);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+ addResourceBinding(Float);
+
+ mvk::DescriptorBinding db;
+ db.stage = spvExecModels[stage];
+ db.descriptorSet = descriptorSetIndex;
+ db.binding = bindingIndex;
+ db.index = ssRB.dynamicOffsetBufferIndex;
+ context.dynamicBufferDescriptors.push_back(db);
+ break;
+ }
+
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ addResourceBinding(Image);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ addResourceBinding(Image);
+ addResourceBinding(Void);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ addResourceBinding(Sampler);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ addResourceBinding(SampledImage);
+ break;
+
+ default:
+ addResourceBinding(Unknown);
+ break;
+ }
+}
+
#pragma mark -
#pragma mark MVKDescriptorSetLayoutBinding
MVKVulkanAPIObject* MVKDescriptorSetLayoutBinding::getVulkanAPIObject() { return _layout; };
-uint32_t MVKDescriptorSetLayoutBinding::getDescriptorCount(MVKDescriptorSet* descSet) {
+uint32_t MVKDescriptorSetLayoutBinding::getDescriptorCount(MVKDescriptorSet* descSet) const {
if (_info.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
return 1;
@@ -335,13 +441,71 @@
}
}
-// If depth compare is required, but unavailable on the device, the sampler can only be used as an immutable sampler
-bool MVKDescriptorSetLayoutBinding::validate(MVKSampler* mvkSampler) {
- if (mvkSampler->getRequiresConstExprSampler()) {
- mvkSampler->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdPushDescriptorSet/vkCmdPushDescriptorSetWithTemplate(): Tried to push an immutable sampler.");
- return false;
+bool MVKDescriptorSetLayoutBinding::isUsingMetalArgumentBuffer() { return _layout->isUsingMetalArgumentBuffer(); };
+
+// Adds MTLArgumentDescriptors to the array, and updates resource indexes consumed.
+void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(NSMutableArray<MTLArgumentDescriptor*>* args) {
+ switch (getDescriptorType()) {
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+ addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadOnly);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadOnly);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite);
+ addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite); // Needed for atomic operations
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadOnly);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite);
+ addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite); // Needed for atomic operations
+ break;
+
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().samplerIndex, MTLDataTypeSampler, MTLArgumentAccessReadOnly);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadOnly);
+ addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().samplerIndex, MTLDataTypeSampler, MTLArgumentAccessReadOnly);
+ break;
+
+ default:
+ break;
}
- return true;
+}
+
+void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptor(NSMutableArray<MTLArgumentDescriptor*>* args,
+ uint32_t argIndex,
+ MTLDataType dataType,
+ MTLArgumentAccess access) {
+ uint32_t descCnt = getDescriptorCount();
+ if (descCnt == 0) { return; }
+
+ auto* argDesc = [MTLArgumentDescriptor argumentDescriptor];
+ argDesc.dataType = dataType;
+ argDesc.access = access;
+ argDesc.index = argIndex;
+ argDesc.arrayLength = descCnt;
+ argDesc.textureType = MTLTextureType2D;
+
+ [args addObject: argDesc];
}
void MVKDescriptorSetLayoutBinding::populateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
@@ -353,26 +517,54 @@
// Establish the resource indices to use, by combining the offsets of the DSL and this DSL binding.
MVKShaderResourceBinding mtlIdxs = _mtlResourceIndexOffsets + dslMTLRezIdxOffsets;
- static const spv::ExecutionModel models[] = {
- spv::ExecutionModelVertex,
- spv::ExecutionModelTessellationControl,
- spv::ExecutionModelTessellationEvaluation,
- spv::ExecutionModelFragment,
- spv::ExecutionModelGLCompute
- };
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
- if (_applyToStage[i]) {
+ uint32_t descCnt = getDescriptorCount();
+ bool isUsingMtlArgBuff = isUsingMetalArgumentBuffer();
+ for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageMax; stage++) {
+ if ((_applyToStage[stage] || isUsingMtlArgBuff) && descCnt > 0) {
mvkPopulateShaderConverterContext(context,
- mtlIdxs.stages[i],
- models[i],
+ mtlIdxs.stages[stage],
+ MVKShaderStage(stage),
dslIndex,
_info.binding,
- getDescriptorCount(nullptr),
+ descCnt,
+ getDescriptorType(),
mvkSamp);
}
}
}
+// If depth compare is required, but unavailable on the device, the sampler can only be used as an immutable sampler
+bool MVKDescriptorSetLayoutBinding::validate(MVKSampler* mvkSampler) {
+ if (mvkSampler->getRequiresConstExprSampler()) {
+ mvkSampler->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdPushDescriptorSet/vkCmdPushDescriptorSetWithTemplate(): Tried to push an immutable sampler.");
+ return false;
+ }
+ return true;
+}
+
+MTLRenderStages MVKDescriptorSetLayoutBinding::getMTLRenderStages() {
+ MTLRenderStages mtlStages = 0;
+ for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageMax; stage++) {
+ if (_applyToStage[stage]) {
+ switch (stage) {
+ case kMVKShaderStageVertex:
+ case kMVKShaderStageTessCtl:
+ case kMVKShaderStageTessEval:
+ mtlStages |= MTLRenderStageVertex;
+ break;
+
+ case kMVKShaderStageFragment:
+ mtlStages |= MTLRenderStageFragment;
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+ return mtlStages;
+}
+
MVKDescriptorSetLayoutBinding::MVKDescriptorSetLayoutBinding(MVKDevice* device,
MVKDescriptorSetLayout* layout,
const VkDescriptorSetLayoutBinding* pBinding,
@@ -386,27 +578,23 @@
_info.pImmutableSamplers = nullptr; // Remove dangling pointer
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
- // Determine if this binding is used by this shader stage
- _applyToStage[i] = mvkAreAllFlagsEnabled(pBinding->stageFlags, mvkVkShaderStageFlagBitsFromMVKShaderStage(MVKShaderStage(i)));
- // If this binding is used by the shader, set the Metal resource index
- if (_applyToStage[i]) {
- initMetalResourceIndexOffsets(&_mtlResourceIndexOffsets.stages[i],
- &layout->_mtlResourceCounts.stages[i], pBinding);
- }
- }
+ // Determine if this binding is used by this shader stage, and initialize resource indexes.
+ for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageMax; stage++) {
+ _applyToStage[stage] = mvkAreAllFlagsEnabled(pBinding->stageFlags, mvkVkShaderStageFlagBitsFromMVKShaderStage(MVKShaderStage(stage)));
+ initMetalResourceIndexOffsets(pBinding, stage);
+ }
- // If immutable samplers are defined, copy them in
- if ( pBinding->pImmutableSamplers &&
- (pBinding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER ||
- pBinding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ) {
- _immutableSamplers.reserve(pBinding->descriptorCount);
- for (uint32_t i = 0; i < pBinding->descriptorCount; i++) {
- _immutableSamplers.push_back((MVKSampler*)pBinding->pImmutableSamplers[i]);
- _immutableSamplers.back()->retain();
- }
- }
+ // If immutable samplers are defined, copy them in
+ if ( pBinding->pImmutableSamplers &&
+ (pBinding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER ||
+ pBinding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ) {
+ _immutableSamplers.reserve(pBinding->descriptorCount);
+ for (uint32_t i = 0; i < pBinding->descriptorCount; i++) {
+ _immutableSamplers.push_back((MVKSampler*)pBinding->pImmutableSamplers[i]);
+ _immutableSamplers.back()->retain();
+ }
+ }
}
MVKDescriptorSetLayoutBinding::MVKDescriptorSetLayoutBinding(const MVKDescriptorSetLayoutBinding& binding) :
@@ -434,13 +622,31 @@
// Sets the appropriate Metal resource indexes within this binding from the
// specified descriptor set binding counts, and updates those counts accordingly.
-void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(MVKShaderStageResourceBinding* pBindingIndexes,
- MVKShaderStageResourceBinding* pDescSetCounts,
- const VkDescriptorSetLayoutBinding* pBinding) {
+void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescriptorSetLayoutBinding* pBinding, uint32_t stage) {
+
+ // Sets an index offset and updates both that index and the general resource index.
+ // Can be used multiply for combined multi-resource descriptor types.
+ // When using Metal argument buffers, we accumulate the resource indexes cummulatively,
+ // across all resource types, and do not increase the individual resources counts
+ // consumed by the descriptor set layout.
+#define setResourceIndexOffset(rezIdx) \
+ do { \
+ bool isUsingMtlArgBuff = isUsingMetalArgumentBuffer(); \
+ if (_applyToStage[stage] || isUsingMtlArgBuff) { \
+ bindIdxs.rezIdx = isUsingMtlArgBuff ? dslCnts.resourceIndex : dslCnts.rezIdx; \
+ dslCnts.rezIdx += isUsingMtlArgBuff ? 0 : descCnt; \
+ bindIdxs.resourceIndex = dslCnts.resourceIndex; \
+ dslCnts.resourceIndex += descCnt; \
+ } \
+ } while(false)
+
+ MVKShaderStageResourceBinding& bindIdxs = _mtlResourceIndexOffsets.stages[stage];
+ MVKShaderStageResourceBinding& dslCnts = _layout->_mtlResourceCounts.stages[stage];
+
+ uint32_t descCnt = pBinding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT ? 1 : pBinding->descriptorCount;
switch (pBinding->descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
- pBindingIndexes->samplerIndex = pDescSetCounts->samplerIndex;
- pDescSetCounts->samplerIndex += pBinding->descriptorCount;
+ setResourceIndexOffset(samplerIndex);
if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfSamplers) {
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of samplers.", _device->getName()));
@@ -448,10 +654,8 @@
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- pBindingIndexes->textureIndex = pDescSetCounts->textureIndex;
- pDescSetCounts->textureIndex += pBinding->descriptorCount;
- pBindingIndexes->samplerIndex = pDescSetCounts->samplerIndex;
- pDescSetCounts->samplerIndex += pBinding->descriptorCount;
+ setResourceIndexOffset(textureIndex);
+ setResourceIndexOffset(samplerIndex);
if (pBinding->descriptorCount > 1) {
if ( !_device->_pMetalFeatures->arrayOfTextures ) {
@@ -462,44 +666,49 @@
}
}
- if ( pBinding->pImmutableSamplers ) {
+ if (pBinding->pImmutableSamplers && _applyToStage[stage]) {
for (uint32_t i = 0; i < pBinding->descriptorCount; i++) {
uint8_t planeCount = ((MVKSampler*)pBinding->pImmutableSamplers[i])->getPlaneCount();
if (planeCount > 1) {
- pDescSetCounts->textureIndex += planeCount - 1;
+ dslCnts.textureIndex += planeCount - 1;
}
}
}
break;
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- pBindingIndexes->bufferIndex = pDescSetCounts->bufferIndex;
- pDescSetCounts->bufferIndex += pBinding->descriptorCount;
- // fallthrough
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- pBindingIndexes->textureIndex = pDescSetCounts->textureIndex;
- pDescSetCounts->textureIndex += pBinding->descriptorCount;
+ setResourceIndexOffset(textureIndex);
if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) {
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
}
break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ setResourceIndexOffset(textureIndex);
+ setResourceIndexOffset(bufferIndex);
+
+ if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) {
+ _layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
+ }
+ break;
+
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
- pBindingIndexes->bufferIndex = pDescSetCounts->bufferIndex;
- pDescSetCounts->bufferIndex += pBinding->descriptorCount;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+ setResourceIndexOffset(bufferIndex);
break;
- case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
- pBindingIndexes->bufferIndex = pDescSetCounts->bufferIndex;
- pDescSetCounts->bufferIndex += 1;
- break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ setResourceIndexOffset(bufferIndex);
+ bindIdxs.dynamicOffsetBufferIndex = dslCnts.dynamicOffsetBufferIndex;
+ dslCnts.dynamicOffsetBufferIndex += descCnt;
+
+ break;
default:
break;
@@ -508,6 +717,31 @@
#pragma mark -
+#pragma mark MVKDescriptor
+
+MTLResourceUsage MVKDescriptor::getMTLResourceUsage() {
+ MTLResourceUsage mtlUsage = MTLResourceUsageRead;
+ switch (getDescriptorType()) {
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ mtlUsage |= MTLResourceUsageSample;
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ mtlUsage |= MTLResourceUsageWrite;
+ break;
+
+ default:
+ break;
+ }
+ return mtlUsage;
+}
+
+
+#pragma mark -
#pragma mark MVKBufferDescriptor
// A null cmdEncoder can be passed to perform a validation pass
@@ -519,14 +753,8 @@
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) {
MVKMTLBufferBinding bb;
- NSUInteger bufferDynamicOffset = 0;
- VkDescriptorType descType = getDescriptorType();
- if (descType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
- descType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
- if (dynamicOffsets.size > dynamicOffsetIndex) {
- bufferDynamicOffset = dynamicOffsets[dynamicOffsetIndex++];
- }
- }
+ NSUInteger bufferDynamicOffset = (usesDynamicBufferOffsets() && dynamicOffsets.size > dynamicOffsetIndex
+ ? dynamicOffsets[dynamicOffsetIndex++] : 0);
if (_mvkBuffer) {
bb.mtlBuffer = _mvkBuffer->getMTLBuffer();
bb.offset = _mvkBuffer->getMTLBufferOffset() + _buffOffset + bufferDynamicOffset;
@@ -547,6 +775,28 @@
}
}
+void MVKBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) {
+ if (encodeToArgBuffer) {
+ uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + elementIndex;
+ [mtlArgEncoder setBuffer: _mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil
+ offset: _mvkBuffer ? _mvkBuffer->getMTLBufferOffset() + _buffOffset : 0
+ atIndex: argIdx];
+ }
+ if (encodeUsage) {
+ rezEncState->encodeArgumentBufferResourceUsage(stage,
+ _mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil,
+ getMTLResourceUsage(),
+ mvkDSLBind->getMTLRenderStages());
+ }
+}
+
void MVKBufferDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t srcIndex,
@@ -597,9 +847,12 @@
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) {
MVKMTLBufferBinding bb;
- bb.mtlBytes = _buffer;
- bb.size = mvkDSLBind->_info.descriptorCount;
- bb.isInline = true;
+ if (_mvkMTLBufferAllocation) {
+ bb.mtlBuffer = _mvkMTLBufferAllocation->_mtlBuffer;
+ bb.offset = _mvkMTLBufferAllocation->_offset;
+ bb.size = mvkDSLBind->_info.descriptorCount;
+ }
+
for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
if (stages[i]) {
bb.index = mtlIndexes.stages[i].bufferIndex;
@@ -612,6 +865,28 @@
}
}
+void MVKInlineUniformBlockDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) {
+ if (encodeToArgBuffer) {
+ uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex;
+ [mtlArgEncoder setBuffer: _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil
+ offset: _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_offset : 0
+ atIndex: argIdx];
+ }
+ if (encodeUsage) {
+ rezEncState->encodeArgumentBufferResourceUsage(stage,
+ _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil,
+ getMTLResourceUsage(),
+ mvkDSLBind->getMTLRenderStages());
+ }
+}
+
void MVKInlineUniformBlockDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t dstOffset,
@@ -619,12 +894,13 @@
const void* pData) {
// Ensure there is a destination to write to
uint32_t buffSize = mvkDSLBind->_info.descriptorCount;
- if ( !_buffer ) { _buffer = (uint8_t*)malloc(buffSize); }
+ if ( !_mvkMTLBufferAllocation ) { _mvkMTLBufferAllocation = mvkDescSet->acquireMTLBufferRegion(buffSize); }
+ uint8_t* data = getData();
const auto& pInlineUniformBlock = *(VkWriteDescriptorSetInlineUniformBlockEXT*)pData;
- if (_buffer && pInlineUniformBlock.pData && dstOffset < buffSize) {
+ if (data && pInlineUniformBlock.pData && dstOffset < buffSize) {
uint32_t dataLen = std::min(pInlineUniformBlock.dataSize, buffSize - dstOffset);
- memcpy(_buffer + dstOffset, pInlineUniformBlock.pData, dataLen);
+ memcpy(data + dstOffset, pInlineUniformBlock.pData, dataLen);
}
}
@@ -635,16 +911,17 @@
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock) {
+ uint8_t* data = getData();
uint32_t buffSize = mvkDSLBind->_info.descriptorCount;
- if (_buffer && pInlineUniformBlock->pData && srcOffset < buffSize) {
+ if (data && pInlineUniformBlock->pData && srcOffset < buffSize) {
uint32_t dataLen = std::min(pInlineUniformBlock->dataSize, buffSize - srcOffset);
- memcpy((void*)pInlineUniformBlock->pData, _buffer + srcOffset, dataLen);
+ memcpy((void*)pInlineUniformBlock->pData, data + srcOffset, dataLen);
}
}
void MVKInlineUniformBlockDescriptor::reset() {
- free(_buffer);
- _buffer = nullptr;
+ if (_mvkMTLBufferAllocation) { _mvkMTLBufferAllocation->returnToPool(); }
+ _mvkMTLBufferAllocation = nullptr;
MVKDescriptor::reset();
}
@@ -701,6 +978,44 @@
}
}
+void MVKImageDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) {
+ VkDescriptorType descType = getDescriptorType();
+ uint8_t planeCount = (_mvkImageView) ? _mvkImageView->getPlaneCount() : 1;
+
+ for (uint8_t planeIndex = 0; planeIndex < planeCount; planeIndex++) {
+ uint32_t planeDescIdx = (elementIndex * planeCount) + planeIndex;
+
+ id<MTLTexture> mtlTexture = _mvkImageView ? _mvkImageView->getMTLTexture(planeIndex) : nil;
+ if (encodeToArgBuffer) {
+ uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().textureIndex + planeDescIdx;
+ [mtlArgEncoder setTexture: mtlTexture atIndex: argIdx];
+ }
+ if (encodeUsage) {
+ rezEncState->encodeArgumentBufferResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages());
+ }
+ if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
+ id<MTLTexture> mtlTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture;
+ id<MTLBuffer> mtlBuff = mtlTex.buffer;
+ if (mtlBuff) {
+ if (encodeToArgBuffer) {
+ uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + planeDescIdx;
+ [mtlArgEncoder setBuffer: mtlBuff offset: mtlTex.bufferOffset atIndex: argIdx];
+ }
+ if (encodeUsage) {
+ rezEncState->encodeArgumentBufferResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages());
+ }
+ }
+ }
+ }
+}
+
void MVKImageDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t srcIndex,
@@ -767,6 +1082,26 @@
}
}
+// Metal validation requires each sampler in an array of samplers to be populated,
+// even if not used, so populate a default if one hasn't been set.
+void MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer) {
+ if (encodeToArgBuffer) {
+ MVKSampler* imutSamp = mvkDSLBind->getImmutableSampler(elementIndex);
+ MVKSampler* mvkSamp = imutSamp ? imutSamp : _mvkSampler;
+ id<MTLSamplerState> mtlSamp = (mvkSamp
+ ? mvkSamp->getMTLSamplerState()
+ : mvkDSLBind->getDevice()->getDefaultMTLSamplerState());
+ uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().samplerIndex + elementIndex;
+ [mtlArgEncoder setSamplerState: mtlSamp atIndex: argIdx];
+ }
+}
+
void MVKSamplerDescriptorMixin::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t srcIndex,
@@ -815,6 +1150,17 @@
MVKSamplerDescriptorMixin::bind(cmdEncoder, mvkDSLBind, elementIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
}
+void MVKSamplerDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) {
+ MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(rezEncState, mtlArgEncoder, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer);
+}
+
void MVKSamplerDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t srcIndex,
@@ -854,6 +1200,18 @@
MVKSamplerDescriptorMixin::bind(cmdEncoder, mvkDSLBind, elementIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
}
+void MVKCombinedImageSamplerDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) {
+ MVKImageDescriptor::encodeToMetalArgumentBuffer(rezEncState, mtlArgEncoder, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer, encodeUsage);
+ MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(rezEncState, mtlArgEncoder, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer);
+}
+
void MVKCombinedImageSamplerDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
uint32_t srcIndex,
@@ -922,6 +1280,37 @@
}
}
}
+void MVKTexelBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+ id<MTLArgumentEncoder> mtlArgEncoder,
+ uint32_t descSetIndex,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ uint32_t elementIndex,
+ MVKShaderStage stage,
+ bool encodeToArgBuffer,
+ bool encodeUsage) {
+ VkDescriptorType descType = getDescriptorType();
+ id<MTLTexture> mtlTexture = _mvkBufferView ? _mvkBufferView->getMTLTexture() : nil;
+ if (encodeToArgBuffer) {
+ uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().textureIndex + elementIndex;
+ [mtlArgEncoder setTexture: mtlTexture atIndex: argIdx];
+ }
+ if (encodeUsage) {
+ rezEncState->encodeArgumentBufferResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages());
+ }
+
+ if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
+ id<MTLBuffer> mtlBuff = mtlTexture.buffer;
+ if (mtlBuff) {
+ if (encodeToArgBuffer) {
+ uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + elementIndex;
+ [mtlArgEncoder setBuffer: mtlBuff offset: mtlTexture.bufferOffset atIndex: argIdx];
+ }
+ if (encodeUsage) {
+ rezEncState->encodeArgumentBufferResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages());
+ }
+ }
+ }
+}
void MVKTexelBufferDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
MVKDescriptorSet* mvkDescSet,
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
index 4d69c35..c8ec767 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
@@ -28,11 +28,28 @@
class MVKDescriptorPool;
class MVKPipelineLayout;
class MVKCommandEncoder;
+class MVKResourcesCommandEncoderState;
#pragma mark -
#pragma mark MVKDescriptorSetLayout
+/** Holds and manages the lifecycle of a MTLArgumentEncoder. The encoder can only be set once. */
+struct MVKMTLArgumentEncoder {
+ NSUInteger mtlArgumentEncoderSize = 0;
+
+ id<MTLArgumentEncoder> getMTLArgumentEncoder() { return _mtlArgumentEncoder; }
+ void init(id<MTLArgumentEncoder> mtlArgEnc) {
+ if (_mtlArgumentEncoder) { return; }
+ _mtlArgumentEncoder = mtlArgEnc; // takes ownership
+ mtlArgumentEncoderSize = mtlArgEnc.encodedLength;
+ }
+ ~MVKMTLArgumentEncoder() { [_mtlArgumentEncoder release]; }
+
+private:
+ id<MTLArgumentEncoder> _mtlArgumentEncoder = nil;
+};
+
/** Represents a Vulkan descriptor set layout. */
class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject {
@@ -46,6 +63,8 @@
/** Encodes this descriptor set layout and the specified descriptor set on the specified command encoder. */
void bindDescriptorSet(MVKCommandEncoder* cmdEncoder,
+ VkPipelineBindPoint pipelineBindPoint,
+ uint32_t descSetIndex,
MVKDescriptorSet* descSet,
MVKShaderResourceBinding& dslMTLRezIdxOffsets,
MVKArrayRef<uint32_t> dynamicOffsets,
@@ -67,11 +86,32 @@
/** Populates the specified shader converter context, at the specified DSL index. */
void populateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
MVKShaderResourceBinding& dslMTLRezIdxOffsets,
- uint32_t dslIndex);
+ uint32_t descSetIndex);
+
+ /**
+ * Populates the bindings in this descriptor set layout used by the shader.
+ * Returns false if the shader does not use the descriptor set at all.
+ */
+ bool populateBindingUse(MVKBitArray& bindingUse,
+ mvk::SPIRVToMSLConversionConfiguration& context,
+ MVKShaderStage stage,
+ uint32_t descSetIndex);
+
+ /** Returns the number of bindings. */
+ uint32_t getBindingCount() { return (uint32_t)_bindings.size(); }
+
+ /** Returns the binding at the index in a descriptor set layout. */
+ MVKDescriptorSetLayoutBinding* getBindingAt(uint32_t index) { return &_bindings[index]; }
/** Returns true if this layout is for push descriptors only. */
bool isPushDescriptorLayout() const { return _isPushDescriptorLayout; }
+ /** Returns true if this layout is using a Metal argument buffer. */
+ bool isUsingMetalArgumentBuffer() { return isUsingMetalArgumentBuffers() && !isPushDescriptorLayout(); };
+
+ /** Returns the MTLArgumentEncoder for the descriptor set. */
+ MVKMTLArgumentEncoder& getMTLArgumentEncoder() { return _mtlArgumentEncoder; }
+
MVKDescriptorSetLayout(MVKDevice* device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo);
protected:
@@ -79,16 +119,17 @@
friend class MVKDescriptorSetLayoutBinding;
friend class MVKPipelineLayout;
friend class MVKDescriptorSet;
- friend class MVKDescriptorPool;
void propagateDebugName() override {}
- inline uint32_t getDescriptorCount() { return _descriptorCount; }
- inline uint32_t getDescriptorIndex(uint32_t binding, uint32_t elementIndex = 0) { return getBinding(binding)->getDescriptorIndex(elementIndex); }
- inline MVKDescriptorSetLayoutBinding* getBinding(uint32_t binding) { return &_bindings[_bindingToIndex[binding]]; }
+ uint32_t getDescriptorCount() { return _descriptorCount; }
+ uint32_t getDescriptorIndex(uint32_t binding, uint32_t elementIndex = 0) { return getBinding(binding)->getDescriptorIndex(elementIndex); }
+ MVKDescriptorSetLayoutBinding* getBinding(uint32_t binding) { return &_bindings[_bindingToIndex[binding]]; }
const VkDescriptorBindingFlags* getBindingFlags(const VkDescriptorSetLayoutCreateInfo* pCreateInfo);
+ void initMTLArgumentEncoder();
MVKSmallVector<MVKDescriptorSetLayoutBinding> _bindings;
std::unordered_map<uint32_t, uint32_t> _bindingToIndex;
+ MVKMTLArgumentEncoder _mtlArgumentEncoder;
MVKShaderResourceBinding _mtlResourceCounts;
uint32_t _descriptorCount;
bool _isPushDescriptorLayout;
@@ -109,6 +150,9 @@
/** Returns the debug report object type of this object. */
VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT; }
+ /** Returns the layout that defines this descriptor set. */
+ MVKDescriptorSetLayout* getLayout() { return _layout; }
+
/** Returns the descriptor type for the specified binding number. */
VkDescriptorType getDescriptorType(uint32_t binding);
@@ -126,6 +170,29 @@
VkBufferView* pTexelBufferView,
VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock);
+ /** Returns an MTLBuffer region allocation. */
+ const MVKMTLBufferAllocation* acquireMTLBufferRegion(NSUInteger length);
+ /**
+ * Returns the Metal argument buffer to which resources are written,
+ * or return nil if Metal argument buffers are not being used.
+ */
+ id<MTLBuffer> getMetalArgumentBuffer();
+
+ /** Returns the offset into the Metal argument buffer to which resources are written. */
+ NSUInteger getMetalArgumentBufferOffset() { return _metalArgumentBufferOffset; }
+
+ /** Returns an array indicating the descriptors that have changed since the Metal argument buffer was last updated. */
+ MVKBitArray& getMetalArgumentBufferDirtyDescriptors() { return _metalArgumentBufferDirtyDescriptors; }
+
+ /** Returns the descriptor at an index. */
+ MVKDescriptor* getDescriptorAt(uint32_t descIndex) { return _descriptors[descIndex]; }
+
+ /** Returns the number of descriptors in this descriptor set. */
+ uint32_t getDescriptorCount() { return (uint32_t)_descriptors.size(); }
+
+ /** Returns the number of descriptors in this descriptor set that use dynamic offsets. */
+ uint32_t getDynamicOffsetDescriptorCount() { return _dynamicOffsetDescriptorCount; }
+
MVKDescriptorSet(MVKDescriptorPool* pool);
protected:
@@ -134,12 +201,17 @@
void propagateDebugName() override {}
MVKDescriptor* getDescriptor(uint32_t binding, uint32_t elementIndex = 0);
- VkResult allocate(MVKDescriptorSetLayout* layout, uint32_t variableDescriptorCount);
+ VkResult allocate(MVKDescriptorSetLayout* layout,
+ uint32_t variableDescriptorCount,
+ NSUInteger mtlArgBufferOffset);
void free(bool isPoolReset);
MVKDescriptorPool* _pool;
MVKDescriptorSetLayout* _layout;
MVKSmallVector<MVKDescriptor*> _descriptors;
+ MVKBitArray _metalArgumentBufferDirtyDescriptors;
+ NSUInteger _metalArgumentBufferOffset;
+ uint32_t _dynamicOffsetDescriptorCount;
uint32_t _variableDescriptorCount;
};
@@ -207,9 +279,15 @@
void freeDescriptorSet(MVKDescriptorSet* mvkDS, bool isPoolReset);
VkResult allocateDescriptor(VkDescriptorType descriptorType, MVKDescriptor** pMVKDesc);
void freeDescriptor(MVKDescriptor* mvkDesc);
+ void initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo* pCreateInfo);
+ NSUInteger getMetalArgumentBufferResourceStorageSize(NSUInteger bufferCount, NSUInteger textureCount, NSUInteger samplerCount);
+ MTLArgumentDescriptor* getMTLArgumentDescriptor(MTLDataType resourceType, NSUInteger argIndex, NSUInteger count);
MVKSmallVector<MVKDescriptorSet> _descriptorSets;
MVKBitArray _descriptorSetAvailablility;
+ id<MTLBuffer> _metalArgumentBuffer;
+ NSUInteger _nextMetalArgumentBufferOffset;
+ MVKMTLBufferAllocator _inlineBlockMTLBufferAllocator;
MVKDescriptorTypePool<MVKUniformBufferDescriptor> _uniformBufferDescriptors;
MVKDescriptorTypePool<MVKStorageBufferDescriptor> _storageBufferDescriptors;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
index 7e233f1..36c1861 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
@@ -17,6 +17,9 @@
*/
#include "MVKDescriptorSet.h"
+#include "MVKCommandBuffer.h"
+#include "MVKCommandEncoderState.h"
+#include "MVKPipeline.h"
#include "MVKInstance.h"
#include "MVKOSExtensions.h"
@@ -26,12 +29,19 @@
// A null cmdEncoder can be passed to perform a validation pass
void MVKDescriptorSetLayout::bindDescriptorSet(MVKCommandEncoder* cmdEncoder,
+ VkPipelineBindPoint pipelineBindPoint,
+ uint32_t descSetIndex,
MVKDescriptorSet* descSet,
MVKShaderResourceBinding& dslMTLRezIdxOffsets,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) {
if (!cmdEncoder) { clearConfigurationResult(); }
- if ( !_isPushDescriptorLayout ) {
+ if (_isPushDescriptorLayout ) { return; }
+
+ if (cmdEncoder) { cmdEncoder->bindDescriptorSet(pipelineBindPoint, descSetIndex,
+ descSet, dslMTLRezIdxOffsets,
+ dynamicOffsets, dynamicOffsetIndex); }
+ if ( !isUsingMetalArgumentBuffers() ) {
for (auto& dslBind : _bindings) {
dslBind.bind(cmdEncoder, descSet, dslMTLRezIdxOffsets, dynamicOffsets, dynamicOffsetIndex);
}
@@ -161,11 +171,41 @@
void MVKDescriptorSetLayout::populateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
MVKShaderResourceBinding& dslMTLRezIdxOffsets,
- uint32_t dslIndex) {
+ uint32_t descSetIndex) {
uint32_t bindCnt = (uint32_t)_bindings.size();
for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
- _bindings[bindIdx].populateShaderConverterContext(context, dslMTLRezIdxOffsets, dslIndex);
+ _bindings[bindIdx].populateShaderConverterContext(context, dslMTLRezIdxOffsets, descSetIndex);
}
+
+ // Mark if Metal argument buffers are in use, but this descriptor set layout is not using them.
+ if (isUsingMetalArgumentBuffers() && !isUsingMetalArgumentBuffer()) {
+ context.discreteDescriptorSets.push_back(descSetIndex);
+ }
+}
+
+bool MVKDescriptorSetLayout::populateBindingUse(MVKBitArray& bindingUse,
+ SPIRVToMSLConversionConfiguration& context,
+ MVKShaderStage stage,
+ uint32_t descSetIndex) {
+ static const spv::ExecutionModel spvExecModels[] = {
+ spv::ExecutionModelVertex,
+ spv::ExecutionModelTessellationControl,
+ spv::ExecutionModelTessellationEvaluation,
+ spv::ExecutionModelFragment,
+ spv::ExecutionModelGLCompute
+ };
+
+ bool descSetIsUsed = false;
+ uint32_t bindCnt = (uint32_t)_bindings.size();
+ bindingUse.resize(bindCnt);
+ for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
+ auto& dslBind = _bindings[bindIdx];
+ if (context.isResourceUsed(spvExecModels[stage], descSetIndex, dslBind.getBinding())) {
+ bindingUse.setBit(bindIdx);
+ descSetIsUsed = true;
+ }
+ }
+ return descSetIsUsed;
}
MVKDescriptorSetLayout::MVKDescriptorSetLayout(MVKDevice* device,
@@ -190,15 +230,18 @@
return bindInfo1.pBinding->binding < bindInfo2.pBinding->binding;
});
- _isPushDescriptorLayout = (pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) != 0;
_descriptorCount = 0;
- _bindings.reserve(bindCnt);
+ _isPushDescriptorLayout = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
+
+ _bindings.reserve(bindCnt);
for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
BindInfo& bindInfo = sortedBindings[bindIdx];
_bindings.emplace_back(_device, this, bindInfo.pBinding, bindInfo.bindingFlags, _descriptorCount);
_bindingToIndex[bindInfo.pBinding->binding] = bindIdx;
- _descriptorCount += _bindings.back().getDescriptorCount(nullptr);
+ _descriptorCount += _bindings.back().getDescriptorCount();
}
+
+ initMTLArgumentEncoder();
}
// Find and return an array of binding flags from the pNext chain of pCreateInfo,
@@ -217,6 +260,16 @@
return nullptr;
}
+void MVKDescriptorSetLayout::initMTLArgumentEncoder() {
+ if (isUsingDescriptorSetMetalArgumentBuffers() && isUsingMetalArgumentBuffer()) {
+ @autoreleasepool {
+ NSMutableArray<MTLArgumentDescriptor*>* args = [NSMutableArray arrayWithCapacity: _bindings.size()];
+ for (auto& dslBind : _bindings) { dslBind.addMTLArgumentDescriptors(args); }
+ _mtlArgumentEncoder.init(args.count ? [getMTLDevice() newArgumentEncoderWithArguments: args] : nil);
+ }
+ }
+}
+
#pragma mark -
#pragma mark MVKDescriptorSet
@@ -229,6 +282,8 @@
return _descriptors[_layout->getDescriptorIndex(binding, elementIndex)];
}
+id<MTLBuffer> MVKDescriptorSet::getMetalArgumentBuffer() { return _pool->_metalArgumentBuffer; }
+
template<typename DescriptorAction>
void MVKDescriptorSet::write(const DescriptorAction* pDescriptorAction,
size_t stride,
@@ -238,6 +293,7 @@
MVKDescriptor* mvkDesc = _descriptors[descIdx]; \
if (mvkDesc->getDescriptorType() == descType) { \
mvkDesc->write(mvkDSLBind, this, IDX, stride, pData); \
+ _metalArgumentBufferDirtyDescriptors.setBit(descIdx); \
} \
} while(false)
@@ -283,21 +339,35 @@
}
}
-VkResult MVKDescriptorSet::allocate(MVKDescriptorSetLayout* layout, uint32_t variableDescriptorCount) {
+const MVKMTLBufferAllocation* MVKDescriptorSet::acquireMTLBufferRegion(NSUInteger length) {
+ return _pool->_inlineBlockMTLBufferAllocator.acquireMTLBufferRegion(length);
+}
+
+VkResult MVKDescriptorSet::allocate(MVKDescriptorSetLayout* layout,
+ uint32_t variableDescriptorCount,
+ NSUInteger mtlArgBufferOffset) {
_layout = layout;
_variableDescriptorCount = variableDescriptorCount;
+ // If the Metal argument buffer offset has not been set yet, set it now.
+ if ( !_metalArgumentBufferOffset ) { _metalArgumentBufferOffset = mtlArgBufferOffset; }
+
uint32_t descCnt = layout->getDescriptorCount();
_descriptors.reserve(descCnt);
+ _metalArgumentBufferDirtyDescriptors.resize(descCnt);
uint32_t bindCnt = (uint32_t)layout->_bindings.size();
for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
MVKDescriptorSetLayoutBinding* mvkDSLBind = &layout->_bindings[bindIdx];
uint32_t elemCnt = mvkDSLBind->getDescriptorCount(this);
for (uint32_t elemIdx = 0; elemIdx < elemCnt; elemIdx++) {
+ VkDescriptorType descType = mvkDSLBind->getDescriptorType();
+ uint32_t descIdx = (uint32_t)_descriptors.size();
MVKDescriptor* mvkDesc = nullptr;
- setConfigurationResult(_pool->allocateDescriptor(mvkDSLBind->getDescriptorType(), &mvkDesc));
+ setConfigurationResult(_pool->allocateDescriptor(descType, &mvkDesc));
if ( !wasConfigurationSuccessful() ) { return getConfigurationResult(); }
+ if (mvkDesc->usesDynamicBufferOffsets()) { _dynamicOffsetDescriptorCount++; }
+ if (mvkDSLBind->usesImmutableSamplers()) { _metalArgumentBufferDirtyDescriptors.setBit(descIdx); }
_descriptors.push_back(mvkDesc);
}
}
@@ -306,14 +376,19 @@
void MVKDescriptorSet::free(bool isPoolReset) {
_layout = nullptr;
+ _dynamicOffsetDescriptorCount = 0;
_variableDescriptorCount = 0;
+ // Only reset the Metal arg buffer offset if the entire pool is being reset
+ if (isPoolReset) { _metalArgumentBufferOffset = 0; }
+
// Pooled descriptors don't need to be individually freed under pool resets.
if ( !(_pool->_hasPooledDescriptors && isPoolReset) ) {
for (auto mvkDesc : _descriptors) { _pool->freeDescriptor(mvkDesc); }
}
_descriptors.clear();
_descriptors.shrink_to_fit();
+ _metalArgumentBufferDirtyDescriptors.resize(0);
clearConfigurationResult();
}
@@ -330,7 +405,7 @@
// If not preallocated, create one on the fly.
template<class DescriptorClass>
VkResult MVKDescriptorTypePool<DescriptorClass>::allocateDescriptor(MVKDescriptor** pMVKDesc,
- MVKDescriptorPool* pool) {
+ MVKDescriptorPool* pool) {
DescriptorClass* mvkDesc;
if (pool->_hasPooledDescriptors) {
size_t availDescIdx = _availability.getIndexOfFirstSetBit(true);
@@ -350,7 +425,7 @@
// If not preallocated, simply destroy returning descriptor.
template<typename DescriptorClass>
void MVKDescriptorTypePool<DescriptorClass>::freeDescriptor(MVKDescriptor* mvkDesc,
- MVKDescriptorPool* pool) {
+ MVKDescriptorPool* pool) {
if (pool->_hasPooledDescriptors) {
size_t descIdx = (DescriptorClass*)mvkDesc - _descriptors.data();
_availability.setBit(descIdx);
@@ -409,17 +484,50 @@
VkResult MVKDescriptorPool::allocateDescriptorSet(MVKDescriptorSetLayout* mvkDSL,
uint32_t variableDescriptorCount,
VkDescriptorSet* pVKDS) {
+ VkResult rslt = VK_ERROR_OUT_OF_POOL_MEMORY;
+ NSUInteger mtlArgBuffAllocSize = mvkDSL->getMTLArgumentEncoder().mtlArgumentEncoderSize;
+ NSUInteger mtlArgBuffAlignedSize = mvkAlignByteCount(mtlArgBuffAllocSize,
+ getDevice()->_pMetalFeatures->mtlBufferAlignment);
- size_t dsIdx = _descriptorSetAvailablility.getIndexOfFirstSetBit(true);
- if (dsIdx >= _descriptorSets.size()) { return VK_ERROR_OUT_OF_POOL_MEMORY; }
+ size_t dsCnt = _descriptorSetAvailablility.size();
+ _descriptorSetAvailablility.enumerateEnabledBits(true, [&](size_t dsIdx) {
+ bool isSpaceAvail = true; // If not using Metal arg buffers, space will always be available.
+ MVKDescriptorSet* mvkDS = &_descriptorSets[dsIdx];
+ NSUInteger mtlArgBuffOffset = mvkDS->_metalArgumentBufferOffset;
- MVKDescriptorSet* mvkDS = &_descriptorSets[dsIdx];
- VkResult rslt = mvkDS->allocate(mvkDSL, variableDescriptorCount);
- if (rslt) {
- freeDescriptorSet(mvkDS, false);
- } else {
- *pVKDS = (VkDescriptorSet)mvkDS;
- }
+ // If the desc set is using a Metal argument buffer, we also need to see if the desc set
+ // will fit in the slot that might already have been allocated for it in the Metal argument
+ // buffer from a previous allocation that was returned. If this pool has been reset recently,
+ // then the desc sets will not have had a Metal argument buffer allocation assigned yet.
+ if (isUsingDescriptorSetMetalArgumentBuffers() && mvkDSL->isUsingMetalArgumentBuffer()) {
+
+ // If the offset has not been set (and it's not the first desc set except
+ // on a reset pool), set the offset and update the next available offset value.
+ if ( !mtlArgBuffOffset && (dsIdx || !_nextMetalArgumentBufferOffset)) {
+ mtlArgBuffOffset = _nextMetalArgumentBufferOffset;
+ _nextMetalArgumentBufferOffset += mtlArgBuffAlignedSize;
+ }
+
+ // Get the offset of the next desc set, if one exists and
+ // its offset has been set, or the end of the arg buffer.
+ size_t nextDSIdx = dsIdx + 1;
+ NSUInteger nextOffset = (nextDSIdx < dsCnt ? _descriptorSets[nextDSIdx]._metalArgumentBufferOffset : 0);
+ if ( !nextOffset ) { nextOffset = _metalArgumentBuffer.length; }
+
+ isSpaceAvail = (mtlArgBuffOffset + mtlArgBuffAllocSize) <= nextOffset;
+ }
+
+ if (isSpaceAvail) {
+ rslt = mvkDS->allocate(mvkDSL, variableDescriptorCount, mtlArgBuffOffset);
+ if (rslt) {
+ freeDescriptorSet(mvkDS, false);
+ } else {
+ *pVKDS = (VkDescriptorSet)mvkDS;
+ }
+ return false;
+ }
+ return true;
+ });
return rslt;
}
@@ -465,6 +573,8 @@
_uniformTexelBufferDescriptors.reset();
_storageTexelBufferDescriptors.reset();
+ _nextMetalArgumentBufferOffset = 0;
+
return VK_SUCCESS;
}
@@ -573,6 +683,29 @@
return descCnt;
}
+// Return the size of the preallocated pool for descriptors of the
+// VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT type, or zero if we
+// are not preallocating descriptors in the pool.
+// For consistency with getPoolSize() behavior, we support more than one pNext entry
+// for inline blocks. Accumulate the descriptor count for inline blocks accordingly.
+static size_t getInlineBlockPoolSize(const VkDescriptorPoolCreateInfo* pCreateInfo, bool poolDescriptors) {
+ uint32_t descCnt = 0;
+ if (poolDescriptors) {
+ for (const auto* next = (VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) {
+ switch (next->sType) {
+ case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: {
+ auto* pDescPoolInlineBlockCreateInfo = (VkDescriptorPoolInlineUniformBlockCreateInfoEXT*)next;
+ descCnt += pDescPoolInlineBlockCreateInfo->maxInlineUniformBlockBindings;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ }
+ return descCnt;
+}
+
// Although poolDescriptors is derived from MVKConfiguration, it is passed in here to ensure all components of this instance see a SVOT for this value.
// Alternate might have been to force _hasPooledDescriptors to be set first by changing member declaration order in class declaration.
MVKDescriptorPool::MVKDescriptorPool(MVKDevice* device, const VkDescriptorPoolCreateInfo* pCreateInfo, bool poolDescriptors) :
@@ -583,7 +716,7 @@
_storageBufferDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, poolDescriptors)),
_uniformBufferDynamicDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, poolDescriptors)),
_storageBufferDynamicDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, poolDescriptors)),
- _inlineUniformBlockDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT, poolDescriptors)),
+ _inlineUniformBlockDescriptors(getInlineBlockPoolSize(pCreateInfo, poolDescriptors)),
_sampledImageDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, poolDescriptors)),
_storageImageDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, poolDescriptors)),
_inputAttachmentDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, poolDescriptors)),
@@ -591,10 +724,140 @@
_combinedImageSamplerDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, poolDescriptors)),
_uniformTexelBufferDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, poolDescriptors)),
_storageTexelBufferDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, poolDescriptors)),
- _hasPooledDescriptors(poolDescriptors) {}
+ _inlineBlockMTLBufferAllocator(device, device->_pMetalFeatures->dynamicMTLBufferSize, true),
+ _hasPooledDescriptors(poolDescriptors) {
+ initMetalArgumentBuffer(pCreateInfo);
+ }
+
+void MVKDescriptorPool::initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo* pCreateInfo) {
+ _metalArgumentBuffer = nil;
+ _nextMetalArgumentBufferOffset = 0;
+
+ if ( !isUsingDescriptorSetMetalArgumentBuffers() ) { return; }
+
+ @autoreleasepool {
+ NSUInteger mtlBuffCnt = 0;
+ NSUInteger mtlTexCnt = 0;
+ NSUInteger mtlSampCnt = 0;
+
+ uint32_t poolCnt = pCreateInfo->poolSizeCount;
+ for (uint32_t poolIdx = 0; poolIdx < poolCnt; poolIdx++) {
+ auto& poolSize = pCreateInfo->pPoolSizes[poolIdx];
+ switch (poolSize.type) {
+ // VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT counts handled separately below
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ mtlBuffCnt += poolSize.descriptorCount;
+ break;
+
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ mtlTexCnt += poolSize.descriptorCount;
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ mtlTexCnt += poolSize.descriptorCount;
+ mtlBuffCnt += poolSize.descriptorCount;
+ break;
+
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ mtlSampCnt += poolSize.descriptorCount;
+ break;
+
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ mtlTexCnt += poolSize.descriptorCount;
+ mtlSampCnt += poolSize.descriptorCount;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ // VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT counts pulled separately
+ for (const auto* next = (VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) {
+ switch (next->sType) {
+ case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: {
+ auto* pDescPoolInlineBlockCreateInfo = (VkDescriptorPoolInlineUniformBlockCreateInfoEXT*)next;
+ mtlBuffCnt += pDescPoolInlineBlockCreateInfo->maxInlineUniformBlockBindings;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ // Each descriptor set uses a separate Metal argument buffer, but all of these descriptor set
+ // Metal argument buffers share a single MTLBuffer. This single MTLBuffer needs to be large enough
+ // to hold all of the Metal resources for the descriptors. In addition, depending on the platform,
+ // a Metal argument buffer may have a fixed overhead storage, in addition to the storage required
+ // to hold the resources. This overhead per descriptor set is conservatively calculated by measuring
+ // the size of a Metal argument buffer containing one of each type of resource (S1), and the size
+ // of a Metal argument buffer containing two of each type of resource (S2), and then calculating
+ // the fixed overhead per argument buffer as (2 * S1 - S2). To this is added the overhead due to
+ // the alignment of each descriptor set Metal argument buffer offset.
+ NSUInteger overheadPerDescSet = (2 * getMetalArgumentBufferResourceStorageSize(1, 1, 1) -
+ getMetalArgumentBufferResourceStorageSize(2, 2, 2) +
+ _device->_pMetalFeatures->mtlBufferAlignment);
+
+ // Measure the size of an argument buffer that would hold all of the resources
+ // managed in this pool, then add any overhead for all the descriptor sets.
+ NSUInteger metalArgBuffSize = getMetalArgumentBufferResourceStorageSize(mtlBuffCnt, mtlTexCnt, mtlSampCnt);
+ metalArgBuffSize += (overheadPerDescSet * (pCreateInfo->maxSets - 1)); // metalArgBuffSize already includes overhead for one descriptor set
+ if (metalArgBuffSize) {
+ NSUInteger maxMTLBuffSize = _device->_pMetalFeatures->maxMTLBufferSize;
+ if (metalArgBuffSize > maxMTLBuffSize) {
+ setConfigurationResult(reportError(VK_ERROR_FRAGMENTATION_EXT, "vkCreateDescriptorPool(): The requested descriptor storage of %d MB is larger than the maximum descriptor storage of %d MB per VkDescriptorPool.", (uint32_t)(metalArgBuffSize / MEBI), (uint32_t)(maxMTLBuffSize / MEBI)));
+ metalArgBuffSize = maxMTLBuffSize;
+ }
+ _metalArgumentBuffer = [getMTLDevice() newBufferWithLength: metalArgBuffSize options: MTLResourceStorageModeShared]; // retained
+ _metalArgumentBuffer.label = @"Argument buffer";
+ }
+ }
+}
+
+// Returns the size of a Metal argument buffer containing the number of various types.
+// Make sure any call to this function is wrapped in @autoreleasepool.
+NSUInteger MVKDescriptorPool::getMetalArgumentBufferResourceStorageSize(NSUInteger bufferCount,
+ NSUInteger textureCount,
+ NSUInteger samplerCount) {
+ NSMutableArray<MTLArgumentDescriptor*>* args = [NSMutableArray arrayWithCapacity: 3];
+
+ NSUInteger argIdx = 0;
+ [args addObject: getMTLArgumentDescriptor(MTLDataTypePointer, argIdx, bufferCount)];
+ argIdx += bufferCount;
+ [args addObject: getMTLArgumentDescriptor(MTLDataTypeTexture, argIdx, textureCount)];
+ argIdx += textureCount;
+ [args addObject: getMTLArgumentDescriptor(MTLDataTypeSampler, argIdx, samplerCount)];
+ argIdx += samplerCount;
+
+ id<MTLArgumentEncoder> argEnc = [getMTLDevice() newArgumentEncoderWithArguments: args];
+ NSUInteger metalArgBuffSize = argEnc.encodedLength;
+ [argEnc release];
+
+ return metalArgBuffSize;
+}
+
+// Returns a MTLArgumentDescriptor of a particular type.
+// To be conservative, use some worse-case values, in case content makes a difference in argument size.
+MTLArgumentDescriptor* MVKDescriptorPool::getMTLArgumentDescriptor(MTLDataType resourceType, NSUInteger argIndex, NSUInteger count) {
+ auto* argDesc = [MTLArgumentDescriptor argumentDescriptor];
+ argDesc.dataType = resourceType;
+ argDesc.access = MTLArgumentAccessReadWrite;
+ argDesc.index = argIndex;
+ argDesc.arrayLength = count;
+ argDesc.textureType = MTLTextureTypeCubeArray;
+ return argDesc;
+}
MVKDescriptorPool::~MVKDescriptorPool() {
reset(0);
+ [_metalArgumentBuffer release];
+ _metalArgumentBuffer = nil;
}
@@ -701,26 +964,3 @@
dstSet->write(pEntry, pEntry->stride, pCurData);
}
}
-
-void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
- MVKShaderStageResourceBinding& ssRB,
- spv::ExecutionModel stage,
- uint32_t descriptorSetIndex,
- uint32_t bindingIndex,
- uint32_t count,
- MVKSampler* immutableSampler) {
- mvk::MSLResourceBinding rb;
-
- auto& rbb = rb.resourceBinding;
- rbb.stage = stage;
- rbb.desc_set = descriptorSetIndex;
- rbb.binding = bindingIndex;
- rbb.count = count;
- rbb.msl_buffer = ssRB.bufferIndex;
- rbb.msl_texture = ssRB.textureIndex;
- rbb.msl_sampler = ssRB.samplerIndex;
-
- if (immutableSampler) { immutableSampler->getConstexprSampler(rb); }
-
- context.resourceBindings.push_back(rb);
-}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 60cc8e7..c389deb 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -18,6 +18,7 @@
#pragma once
+#include "MVKEnvironment.h"
#include "MVKFoundation.h"
#include "MVKVulkanAPIObject.h"
#include "MVKMTLResourceBindings.h"
@@ -325,6 +326,9 @@
/** Returns whether the MSL version is supported on this device. */
inline bool mslVersionIsAtLeast(MTLLanguageVersion minVer) { return _metalFeatures.mslVersionEnum >= minVer; }
+ /** Returns whether this device is using Metal argument buffers. */
+ inline bool isUsingMetalArgumentBuffers() const { return _metalFeatures.argumentBuffers && mvkConfig()->useMetalArgumentBuffers; };
+
#pragma mark Construction
@@ -364,6 +368,7 @@
uint64_t getVRAMSize();
uint64_t getRecommendedMaxWorkingSetSize();
uint64_t getCurrentAllocatedSize();
+ uint32_t getMaxSamplerCount();
void initExternalMemoryProperties();
void initExtensions();
MVKArrayRef<MVKQueueFamily*> getQueueFamilies();
@@ -848,6 +853,15 @@
/** Returns info about the pixel format supported by the physical device. */
inline MVKPixelFormats* getPixelFormats() { return _device->getPixelFormats(); }
+ /** Returns whether this device is using Metal argument buffers. */
+ inline bool isUsingMetalArgumentBuffers() { return getPhysicalDevice()->isUsingMetalArgumentBuffers(); };
+
+ /** Returns whether this device is using one Metal argument buffer for each descriptor set, on multiple pipeline and pipeline stages. */
+ inline bool isUsingDescriptorSetMetalArgumentBuffers() { return isUsingMetalArgumentBuffers() && _device->_pMetalFeatures->descriptorSetArgumentBuffers; };
+
+ /** Returns whether this device is using one Metal argument buffer for each descriptor set-pipeline-stage combination. */
+ inline bool isUsingPipelineStageMetalArgumentBuffers() { return isUsingMetalArgumentBuffers() && !_device->_pMetalFeatures->descriptorSetArgumentBuffers; };
+
/** Constructs an instance for the specified device. */
MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); }
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index d304fa0..d4e1a4b 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -375,6 +375,9 @@
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
+ bool isTier2 = isUsingMetalArgumentBuffers() && (_mtlDevice.argumentBuffersSupport >= MTLArgumentBuffersTier2);
+ uint32_t maxSampCnt = getMaxSamplerCount();
+
auto* pDescIdxProps = (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)next;
pDescIdxProps->maxUpdateAfterBindDescriptorsInAllPools = kMVKUndefinedLargeUInt32;
pDescIdxProps->shaderUniformBufferArrayNonUniformIndexingNative = false;
@@ -384,20 +387,20 @@
pDescIdxProps->shaderInputAttachmentArrayNonUniformIndexingNative = _metalFeatures.arrayOfTextures;
pDescIdxProps->robustBufferAccessUpdateAfterBind = _features.robustBufferAccess;
pDescIdxProps->quadDivergentImplicitLod = false;
- pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSamplers = _properties.limits.maxPerStageDescriptorSamplers;
- pDescIdxProps->maxPerStageDescriptorUpdateAfterBindUniformBuffers = _properties.limits.maxPerStageDescriptorUniformBuffers;
- pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageBuffers = _properties.limits.maxPerStageDescriptorStorageBuffers;
- pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSampledImages = _properties.limits.maxPerStageDescriptorSampledImages;
- pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageImages = _properties.limits.maxPerStageDescriptorStorageImages;
+ pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSamplers = isTier2 ? maxSampCnt : _properties.limits.maxPerStageDescriptorSamplers;
+ pDescIdxProps->maxPerStageDescriptorUpdateAfterBindUniformBuffers = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorUniformBuffers;
+ pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageBuffers = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageBuffers;
+ pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSampledImages = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorSampledImages;
+ pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageImages = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageImages;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindInputAttachments = _properties.limits.maxPerStageDescriptorInputAttachments;
- pDescIdxProps->maxPerStageUpdateAfterBindResources = _properties.limits.maxPerStageResources;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindSamplers = _properties.limits.maxDescriptorSetSamplers;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffers = _properties.limits.maxDescriptorSetUniformBuffers;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = _properties.limits.maxDescriptorSetUniformBuffersDynamic;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffers = _properties.limits.maxDescriptorSetStorageBuffers;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = _properties.limits.maxDescriptorSetStorageBuffersDynamic;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindSampledImages = _properties.limits.maxDescriptorSetSampledImages;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageImages = _properties.limits.maxDescriptorSetStorageImages;
+ pDescIdxProps->maxPerStageUpdateAfterBindResources = isTier2 ? 500000 : _properties.limits.maxPerStageResources;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindSamplers = isTier2 ? maxSampCnt : _properties.limits.maxDescriptorSetSamplers;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffers = isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffers;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffersDynamic;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffers = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffers;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffersDynamic;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindSampledImages = isTier2 ? 500000 : _properties.limits.maxDescriptorSetSampledImages;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageImages = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageImages;
pDescIdxProps->maxDescriptorSetUpdateAfterBindInputAttachments = _properties.limits.maxDescriptorSetInputAttachments;
break;
}
@@ -1205,6 +1208,7 @@
if (supportsMTLFeatureSet(tvOS_GPUFamily1_v3)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_0;
_metalFeatures.renderWithoutAttachments = true;
+ _metalFeatures.argumentBuffers = true;
}
if (supportsMTLFeatureSet(tvOS_GPUFamily1_v4)) {
@@ -1279,6 +1283,7 @@
if (supportsMTLFeatureSet(iOS_GPUFamily1_v4)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_0;
_metalFeatures.renderWithoutAttachments = true;
+ _metalFeatures.argumentBuffers = true;
}
if (supportsMTLFeatureSet(iOS_GPUFamily1_v5)) {
@@ -1387,6 +1392,7 @@
_metalFeatures.presentModeImmediate = true;
_metalFeatures.fences = true;
_metalFeatures.nonUniformThreadgroups = true;
+ _metalFeatures.argumentBuffers = true;
}
if (supportsMTLFeatureSet(macOS_GPUFamily1_v4)) {
@@ -1548,6 +1554,19 @@
#if MVK_OS_SIMULATOR && !MVK_APPLE_SILICON
_metalFeatures.mtlBufferAlignment = 256;
#endif
+
+ // Currently, Metal argument buffer support is in beta stage, and is only supported
+ // on macOS 10.16 (Big Sur) or later, or on older versions of macOS using an Intel GPU.
+ // Metal argument buffers support is not available on iOS. Development to support iOS
+ // and a wider combination of GPU's on older macOS versions is under way.
+#if MVK_MACOS
+ _metalFeatures.descriptorSetArgumentBuffers = (_metalFeatures.argumentBuffers &&
+ (mvkOSVersionIsAtLeast(10.16) ||
+ _properties.vendorID == kIntelVendorId));
+#endif
+ // Currently, if we don't support descriptor set argument buffers, we can't support argument buffers.
+ _metalFeatures.argumentBuffers = _metalFeatures.descriptorSetArgumentBuffers;
+
}
// Initializes the physical device features of this instance.
@@ -2055,7 +2074,7 @@
// Features with no specific limits - default to unlimited int values
_properties.limits.maxMemoryAllocationCount = kMVKUndefinedLargeUInt32;
- _properties.limits.maxSamplerAllocationCount = kMVKUndefinedLargeUInt32;
+ _properties.limits.maxSamplerAllocationCount = getMaxSamplerCount();
_properties.limits.maxBoundDescriptorSets = kMVKMaxDescriptorSetCount;
_properties.limits.maxComputeWorkGroupCount[0] = kMVKUndefinedLargeUInt32;
@@ -2377,6 +2396,7 @@
// Next 4 bytes contains flags based on enabled Metal features that
// might affect the contents of the pipeline cache (mostly MSL content).
uint32_t mtlFeatures = 0;
+ mtlFeatures |= isUsingMetalArgumentBuffers() << 0;
*(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatures);
uuidComponentOffset += sizeof(mtlFeatures);
}
@@ -2625,6 +2645,11 @@
#endif
}
+uint32_t MVKPhysicalDevice::getMaxSamplerCount() {
+ return ([_mtlDevice respondsToSelector: @selector(maxArgumentBufferSamplerCount)]
+ ? (uint32_t)_mtlDevice.maxArgumentBufferSamplerCount : 1024);
+}
+
void MVKPhysicalDevice::initExternalMemoryProperties() {
// Buffers
@@ -3650,6 +3675,7 @@
if ( !_defaultMTLSamplerState ) {
@autoreleasepool {
MTLSamplerDescriptor* mtlSampDesc = [[MTLSamplerDescriptor new] autorelease];
+ mtlSampDesc.supportArgumentBuffers = _physicalDevice->isUsingMetalArgumentBuffers();
_defaultMTLSamplerState = [getMTLDevice() newSamplerStateWithDescriptor: mtlSampDesc]; // retained
}
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index c2785f4..5029698 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -1929,6 +1929,7 @@
? mvkClamp(pCreateInfo->maxAnisotropy, 1.0f, _device->_pProperties->limits.maxSamplerAnisotropy)
: 1);
mtlSampDesc.normalizedCoordinates = !pCreateInfo->unnormalizedCoordinates;
+ mtlSampDesc.supportArgumentBuffers = isUsingMetalArgumentBuffers();
// If compareEnable is true, but dynamic samplers with depth compare are not available
// on this device, this sampler must only be used as an immutable sampler, and will
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
index cdef884..913ae01 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
@@ -23,6 +23,7 @@
#include "MVKShaderModule.h"
#include "MVKSync.h"
#include "MVKSmallVector.h"
+#include "MVKBitArray.h"
#include <MoltenVKShaderConverter/SPIRVReflection.h>
#include <MoltenVKShaderConverter/SPIRVToMSLConverter.h>
#include <unordered_map>
@@ -55,6 +56,7 @@
/** Binds descriptor sets to a command encoder. */
void bindDescriptorSets(MVKCommandEncoder* cmdEncoder,
+ VkPipelineBindPoint pipelineBindPoint,
MVKArrayRef<MVKDescriptorSet*> descriptorSets,
uint32_t firstSet,
MVKArrayRef<uint32_t> dynamicOffsets);
@@ -79,6 +81,9 @@
/** Returns the current buffer size buffer bindings. */
const MVKShaderImplicitRezBinding& getBufferSizeBufferIndex() { return _bufferSizeBufferIndex; }
+ /** Returns the current dynamic buffer offset buffer bindings. */
+ const MVKShaderImplicitRezBinding& getDynamicOffsetBufferIndex() { return _dynamicOffsetBufferIndex; }
+
/** Returns the current view range buffer binding for multiview draws. */
const MVKShaderImplicitRezBinding& getViewRangeBufferIndex() { return _viewRangeBufferIndex; }
@@ -100,6 +105,15 @@
/** Returns the number of buffers in this layout. This is used to calculate the size of the buffer size buffer. */
uint32_t getBufferCount() { return _pushConstantsMTLResourceIndexes.getMaxBufferIndex(); }
+ /** Returns the number of descriptor sets in this pipeline layout. */
+ uint32_t getDescriptorSetCount() { return (uint32_t)_descriptorSetLayouts.size(); }
+
+ /** Returns the number of descriptors in the descriptor set layout. */
+ uint32_t getDescriptorCount(uint32_t descSetIndex) { return getDescriptorSetLayout(descSetIndex)->getDescriptorCount(); }
+
+ /** Returns the descriptor set layout. */
+ MVKDescriptorSetLayout* getDescriptorSetLayout(uint32_t descSetIndex) { return _descriptorSetLayouts[descSetIndex]; }
+
/** Returns the push constant binding info. */
const MVKShaderResourceBinding& getPushConstantBindings() { return _pushConstantsMTLResourceIndexes; }
@@ -109,6 +123,8 @@
~MVKPipelineLayout() override;
protected:
+ friend class MVKPipeline;
+
void propagateDebugName() override {}
MVKSmallVector<MVKDescriptorSetLayout*, 1> _descriptorSetLayouts;
@@ -117,6 +133,7 @@
MVKShaderResourceBinding _pushConstantsMTLResourceIndexes;
MVKShaderImplicitRezBinding _swizzleBufferIndex;
MVKShaderImplicitRezBinding _bufferSizeBufferIndex;
+ MVKShaderImplicitRezBinding _dynamicOffsetBufferIndex;
MVKShaderImplicitRezBinding _viewRangeBufferIndex;
MVKShaderImplicitRezBinding _indirectParamsIndex;
MVKShaderImplicitRezBinding _outputBufferIndex;
@@ -153,12 +170,6 @@
/** Binds the push constants to a command encoder. */
void bindPushConstants(MVKCommandEncoder* cmdEncoder);
- /** Returns the current swizzle buffer bindings. */
- const MVKShaderImplicitRezBinding& getSwizzleBufferIndex() { return _swizzleBufferIndex; }
-
- /** Returns the current buffer size buffer bindings. */
- const MVKShaderImplicitRezBinding& getBufferSizeBufferIndex() { return _bufferSizeBufferIndex; }
-
/** Returns the current indirect parameter buffer bindings. */
const MVKShaderImplicitRezBinding& getIndirectParamsIndex() { return _indirectParamsIndex; }
@@ -168,17 +179,35 @@
/** Returns whether all internal Metal pipeline states are valid. */
bool hasValidMTLPipelineStates() { return _hasValidMTLPipelineStates; }
+ /** Returns the MTLArgumentEncoder for the descriptor set. */
+ virtual MVKMTLArgumentEncoder& getMTLArgumentEncoder(uint32_t descSetIndex, MVKShaderStage stage) = 0;
+
+ /** Returns the array of descriptor binding use for the descriptor set. */
+ virtual MVKBitArray& getDescriptorBindingUse(uint32_t descSetIndex, MVKShaderStage stage) = 0;
+
+ /** Returns the number of descriptor sets in this pipeline layout. */
+ uint32_t getDescriptorSetCount() { return _descriptorSetCount; }
+
+ /** A mutex lock to protect access to the Metal argument encoders. */
+ std::mutex _mtlArgumentEncodingLock;
+
/** Constructs an instance for the device. layout, and parent (which may be NULL). */
MVKPipeline(MVKDevice* device, MVKPipelineCache* pipelineCache, MVKPipelineLayout* layout, MVKPipeline* parent);
protected:
void propagateDebugName() override {}
+ template<typename CreateInfo> void addMTLArgumentEncoders(MVKMTLFunction& mvkMTLFunc,
+ const CreateInfo* pCreateInfo,
+ SPIRVToMSLConversionConfiguration& context,
+ MVKShaderStage stage);
MVKPipelineCache* _pipelineCache;
MVKShaderImplicitRezBinding _swizzleBufferIndex;
MVKShaderImplicitRezBinding _bufferSizeBufferIndex;
+ MVKShaderImplicitRezBinding _dynamicOffsetBufferIndex;
MVKShaderImplicitRezBinding _indirectParamsIndex;
MVKShaderResourceBinding _pushConstantsMTLResourceIndexes;
+ uint32_t _descriptorSetCount;
bool _fullImageViewSwizzle;
bool _hasValidMTLPipelineStates = true;
@@ -200,6 +229,14 @@
typedef MVKSmallVector<MVKGraphicsStage, 4> MVKPiplineStages;
+struct MVKStagedMTLArgumentEncoders {
+ MVKMTLArgumentEncoder stages[4] = {};
+};
+
+struct MVKStagedDescriptorBindingUse {
+ MVKBitArray stages[4] = {};
+};
+
/** The number of dynamic states possible in Vulkan. */
static const uint32_t kMVKVkDynamicStateCount = 32;
@@ -265,6 +302,12 @@
/** Returns the collection of instance-rate vertex bindings whose divisor is zero, along with their strides. */
MVKArrayRef<MVKZeroDivisorVertexBinding> getZeroDivisorVertexBindings() { return _zeroDivisorVertexBindings.contents(); }
+ /** Returns the MTLArgumentEncoder for the descriptor set. */
+ MVKMTLArgumentEncoder& getMTLArgumentEncoder(uint32_t descSetIndex, MVKShaderStage stage) override { return _mtlArgumentEncoders[descSetIndex].stages[stage]; }
+
+ /** Returns the array of descriptor binding use for the descriptor set. */
+ MVKBitArray& getDescriptorBindingUse(uint32_t descSetIndex, MVKShaderStage stage) override { return _descriptorBindingUse[descSetIndex].stages[stage]; }
+
/** Constructs an instance for the device and parent (which may be NULL). */
MVKGraphicsPipeline(MVKDevice* device,
MVKPipelineCache* pipelineCache,
@@ -314,6 +357,8 @@
MVKSmallVector<VkRect2D, kMVKCachedViewportScissorCount> _scissors;
MVKSmallVector<MVKTranslatedVertexBinding> _translatedVertexBindings;
MVKSmallVector<MVKZeroDivisorVertexBinding> _zeroDivisorVertexBindings;
+ MVKSmallVector<MVKStagedMTLArgumentEncoders> _mtlArgumentEncoders;
+ MVKSmallVector<MVKStagedDescriptorBindingUse> _descriptorBindingUse;
MTLComputePipelineDescriptor* _mtlTessVertexStageDesc = nil;
id<MTLFunction> _mtlTessVertexFunctions[3] = {nil, nil, nil};
@@ -340,17 +385,21 @@
bool _dynamicStateEnabled[kMVKVkDynamicStateCount];
bool _needsVertexSwizzleBuffer = false;
bool _needsVertexBufferSizeBuffer = false;
+ bool _needsVertexDynamicOffsetBuffer = false;
bool _needsVertexViewRangeBuffer = false;
bool _needsVertexOutputBuffer = false;
bool _needsTessCtlSwizzleBuffer = false;
bool _needsTessCtlBufferSizeBuffer = false;
+ bool _needsTessCtlDynamicOffsetBuffer = false;
bool _needsTessCtlOutputBuffer = false;
bool _needsTessCtlPatchOutputBuffer = false;
bool _needsTessCtlInputBuffer = false;
bool _needsTessEvalSwizzleBuffer = false;
bool _needsTessEvalBufferSizeBuffer = false;
+ bool _needsTessEvalDynamicOffsetBuffer = false;
bool _needsFragmentSwizzleBuffer = false;
bool _needsFragmentBufferSizeBuffer = false;
+ bool _needsFragmentDynamicOffsetBuffer = false;
bool _needsFragmentViewRangeBuffer = false;
};
@@ -369,6 +418,12 @@
/** Returns if this pipeline allows non-zero dispatch bases in vkCmdDispatchBase(). */
bool allowsDispatchBase() { return _allowsDispatchBase; }
+ /** Returns the MTLArgumentEncoder for the descriptor set. */
+ MVKMTLArgumentEncoder& getMTLArgumentEncoder(uint32_t descSetIndex, MVKShaderStage stage) override { return _mtlArgumentEncoders[descSetIndex]; }
+
+ /** Returns the array of descriptor binding use for the descriptor set. */
+ MVKBitArray& getDescriptorBindingUse(uint32_t descSetIndex, MVKShaderStage stage) override { return _descriptorBindingUse[descSetIndex]; }
+
/** Constructs an instance for the device and parent (which may be NULL). */
MVKComputePipeline(MVKDevice* device,
MVKPipelineCache* pipelineCache,
@@ -381,9 +436,12 @@
MVKMTLFunction getMTLFunction(const VkComputePipelineCreateInfo* pCreateInfo);
id<MTLComputePipelineState> _mtlPipelineState;
+ MVKSmallVector<MVKMTLArgumentEncoder> _mtlArgumentEncoders;
+ MVKSmallVector<MVKBitArray> _descriptorBindingUse;
MTLSize _mtlThreadgroupSize;
bool _needsSwizzleBuffer = false;
bool _needsBufferSizeBuffer = false;
+ bool _needsDynamicOffsetBuffer = false;
bool _needsDispatchBaseBuffer = false;
bool _allowsDispatchBase = false;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index 792d768..e9b8839 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -17,7 +17,6 @@
*/
#include "MVKPipeline.h"
-#include <MoltenVKShaderConverter/SPIRVToMSLConverter.h>
#include "MVKRenderPass.h"
#include "MVKCommandBuffer.h"
#include "MVKFoundation.h"
@@ -38,6 +37,7 @@
// A null cmdEncoder can be passed to perform a validation pass
void MVKPipelineLayout::bindDescriptorSets(MVKCommandEncoder* cmdEncoder,
+ VkPipelineBindPoint pipelineBindPoint,
MVKArrayRef<MVKDescriptorSet*> descriptorSets,
uint32_t firstSet,
MVKArrayRef<uint32_t> dynamicOffsets) {
@@ -48,7 +48,9 @@
MVKDescriptorSet* descSet = descriptorSets[dsIdx];
uint32_t dslIdx = firstSet + dsIdx;
MVKDescriptorSetLayout* dsl = _descriptorSetLayouts[dslIdx];
- dsl->bindDescriptorSet(cmdEncoder, descSet, _dslMTLResourceIndexOffsets[dslIdx],
+ dsl->bindDescriptorSet(cmdEncoder, pipelineBindPoint,
+ dslIdx, descSet,
+ _dslMTLResourceIndexOffsets[dslIdx],
dynamicOffsets, dynamicOffsetIndex);
if (!cmdEncoder) { setConfigurationResult(dsl->getConfigurationResult()); }
}
@@ -77,30 +79,27 @@
void MVKPipelineLayout::populateShaderConverterContext(SPIRVToMSLConversionConfiguration& context) {
context.resourceBindings.clear();
+ context.discreteDescriptorSets.clear();
+ context.dynamicBufferDescriptors.clear();
// Add resource bindings defined in the descriptor set layouts
- uint32_t dslCnt = (uint32_t)_descriptorSetLayouts.size();
+ uint32_t dslCnt = getDescriptorSetCount();
for (uint32_t dslIdx = 0; dslIdx < dslCnt; dslIdx++) {
_descriptorSetLayouts[dslIdx]->populateShaderConverterContext(context,
_dslMTLResourceIndexOffsets[dslIdx],
dslIdx);
}
- // Add any resource bindings used by push-constants
- static const spv::ExecutionModel models[] = {
- spv::ExecutionModelVertex,
- spv::ExecutionModelTessellationControl,
- spv::ExecutionModelTessellationEvaluation,
- spv::ExecutionModelFragment,
- spv::ExecutionModelGLCompute
- };
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ // Add any resource bindings used by push-constants.
+ // Use VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT descriptor type as compatible with push constants in Metal.
+ for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageMax; stage++) {
mvkPopulateShaderConverterContext(context,
- _pushConstantsMTLResourceIndexes.stages[i],
- models[i],
+ _pushConstantsMTLResourceIndexes.stages[stage],
+ MVKShaderStage(stage),
kPushConstDescSet,
kPushConstBinding,
1,
+ VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT,
nullptr);
}
}
@@ -112,19 +111,32 @@
// corresponding DSL, and associating the current accumulated resource index offsets
// with each DSL as it is added. The final accumulation of resource index offsets
// becomes the resource index offsets that will be used for push contants.
+ // If we are using Metal argument buffers, reserve space for the Metal argument
+ // buffers themselves, and clear indexes of offsets used in Metal argument buffers,
+ // but still accumulate dynamic offset buffer indexes across descriptor sets.
// According to the Vulkan spec, VkDescriptorSetLayout is intended to be consumed when passed
// to any Vulkan function, and may be safely destroyed by app immediately after. In order for
// this pipeline layout to retain the VkDescriptorSetLayout, the MVKDescriptorSetLayout
// instance is retained, so that it will live on here after it has been destroyed by the API.
- _descriptorSetLayouts.reserve(pCreateInfo->setLayoutCount);
- for (uint32_t i = 0; i < pCreateInfo->setLayoutCount; i++) {
+ uint32_t dslCnt = pCreateInfo->setLayoutCount;
+ _pushConstantsMTLResourceIndexes.addArgumentBuffers(dslCnt);
+
+ _descriptorSetLayouts.reserve(dslCnt);
+ for (uint32_t i = 0; i < dslCnt; i++) {
MVKDescriptorSetLayout* pDescSetLayout = (MVKDescriptorSetLayout*)pCreateInfo->pSetLayouts[i];
pDescSetLayout->retain();
_descriptorSetLayouts.push_back(pDescSetLayout);
- _dslMTLResourceIndexOffsets.push_back(_pushConstantsMTLResourceIndexes);
- _pushConstantsMTLResourceIndexes += pDescSetLayout->_mtlResourceCounts;
+
+ MVKShaderResourceBinding adjstdDSLRezOfsts = _pushConstantsMTLResourceIndexes;
+ MVKShaderResourceBinding adjstdDSLRezCnts = pDescSetLayout->_mtlResourceCounts;
+ if (pDescSetLayout->isUsingMetalArgumentBuffer()) {
+ adjstdDSLRezOfsts.clearArgumentBufferResources();
+ adjstdDSLRezCnts.clearArgumentBufferResources();
+ }
+ _dslMTLResourceIndexOffsets.push_back(adjstdDSLRezOfsts);
+ _pushConstantsMTLResourceIndexes += adjstdDSLRezCnts;
}
// Add push constants
@@ -135,13 +147,13 @@
// Set implicit buffer indices
// FIXME: Many of these are optional. We shouldn't set the ones that aren't
- // present--or at least, we should move the ones that are down to avoid
- // running over the limit of available buffers. But we can't know that
- // until we compile the shaders.
+ // present--or at least, we should move the ones that are down to avoid running over
+ // the limit of available buffers. But we can't know that until we compile the shaders.
for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
- _swizzleBufferIndex.stages[i] = _pushConstantsMTLResourceIndexes.stages[i].bufferIndex + 1;
- _bufferSizeBufferIndex.stages[i] = _swizzleBufferIndex.stages[i] + 1;
- _indirectParamsIndex.stages[i] = _bufferSizeBufferIndex.stages[i] + 1;
+ _dynamicOffsetBufferIndex.stages[i] = _pushConstantsMTLResourceIndexes.stages[i].bufferIndex + 1;
+ _bufferSizeBufferIndex.stages[i] = _dynamicOffsetBufferIndex.stages[i] + 1;
+ _swizzleBufferIndex.stages[i] = _bufferSizeBufferIndex.stages[i] + 1;
+ _indirectParamsIndex.stages[i] = _swizzleBufferIndex.stages[i] + 1;
_outputBufferIndex.stages[i] = _indirectParamsIndex.stages[i] + 1;
if (i == kMVKShaderStageTessCtl) {
_tessCtlPatchOutputBufferIndex = _outputBufferIndex.stages[i] + 1;
@@ -170,11 +182,32 @@
}
}
+// For each descriptor set, populate the descriptor bindings used by the shader for this stage,
+// and if Metal argument encoders must be dedicated to a pipeline stage, create the encoder here.
+template<typename CreateInfo>
+void MVKPipeline::addMTLArgumentEncoders(MVKMTLFunction& mvkMTLFunc,
+ const CreateInfo* pCreateInfo,
+ SPIRVToMSLConversionConfiguration& context,
+ MVKShaderStage stage) {
+ if ( !isUsingMetalArgumentBuffers() ) { return; }
+
+ bool needMTLArgEnc = isUsingPipelineStageMetalArgumentBuffers();
+ auto mtlFunc = mvkMTLFunc.getMTLFunction();
+ for (uint32_t dsIdx = 0; dsIdx < _descriptorSetCount; dsIdx++) {
+ auto* dsLayout = ((MVKPipelineLayout*)pCreateInfo->layout)->getDescriptorSetLayout(dsIdx);
+ bool descSetIsUsed = dsLayout->populateBindingUse(getDescriptorBindingUse(dsIdx, stage), context, stage, dsIdx);
+ if (descSetIsUsed && needMTLArgEnc) {
+ getMTLArgumentEncoder(dsIdx, stage).init([mtlFunc newArgumentEncoderWithBufferIndex: dsIdx]);
+ }
+ }
+}
+
MVKPipeline::MVKPipeline(MVKDevice* device, MVKPipelineCache* pipelineCache, MVKPipelineLayout* layout, MVKPipeline* parent) :
MVKVulkanAPIDeviceObject(device),
_pipelineCache(pipelineCache),
_pushConstantsMTLResourceIndexes(layout->getPushConstantBindings()),
- _fullImageViewSwizzle(mvkConfig()->fullImageViewSwizzle) {}
+ _fullImageViewSwizzle(mvkConfig()->fullImageViewSwizzle),
+ _descriptorSetCount(layout->getDescriptorSetCount()) {}
#pragma mark -
@@ -265,6 +298,7 @@
}
cmdEncoder->_graphicsResourcesState.bindSwizzleBuffer(_swizzleBufferIndex, _needsVertexSwizzleBuffer, _needsTessCtlSwizzleBuffer, _needsTessEvalSwizzleBuffer, _needsFragmentSwizzleBuffer);
cmdEncoder->_graphicsResourcesState.bindBufferSizeBuffer(_bufferSizeBufferIndex, _needsVertexBufferSizeBuffer, _needsTessCtlBufferSizeBuffer, _needsTessEvalBufferSizeBuffer, _needsFragmentBufferSizeBuffer);
+ cmdEncoder->_graphicsResourcesState.bindDynamicOffsetBuffer(_dynamicOffsetBufferIndex, _needsVertexDynamicOffsetBuffer, _needsTessCtlDynamicOffsetBuffer, _needsTessEvalDynamicOffsetBuffer, _needsFragmentDynamicOffsetBuffer);
cmdEncoder->_graphicsResourcesState.bindViewRangeBuffer(_viewRangeBufferIndex, _needsVertexViewRangeBuffer, _needsFragmentViewRangeBuffer);
}
@@ -473,6 +507,10 @@
_mtlPipelineState = nil;
_mtlTessVertexStageDesc = nil;
for (uint32_t i = 0; i < 3; i++) { _mtlTessVertexFunctions[i] = nil; }
+
+ if (isUsingMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); }
+ if (isUsingPipelineStageMetalArgumentBuffers()) { _mtlArgumentEncoders.resize(_descriptorSetCount); }
+
if (!isTessellationPipeline()) {
MTLRenderPipelineDescriptor* plDesc = newMTLRenderPipelineDescriptor(pCreateInfo, reflectData); // temp retain
if (plDesc) {
@@ -879,6 +917,7 @@
shaderContext.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageVertex];
shaderContext.options.mslOptions.shader_output_buffer_index = _outputBufferIndex.stages[kMVKShaderStageVertex];
shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageVertex];
+ shaderContext.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageVertex];
shaderContext.options.mslOptions.view_mask_buffer_index = _viewRangeBufferIndex.stages[kMVKShaderStageVertex];
shaderContext.options.mslOptions.capture_output_to_buffer = false;
shaderContext.options.mslOptions.disable_rasterization = isRasterizationDisabled(pCreateInfo);
@@ -896,9 +935,12 @@
plDesc.rasterizationEnabled = !funcRslts.isRasterizationDisabled;
_needsVertexSwizzleBuffer = funcRslts.needsSwizzleBuffer;
_needsVertexBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+ _needsVertexDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer;
_needsVertexViewRangeBuffer = funcRslts.needsViewRangeBuffer;
_needsVertexOutputBuffer = funcRslts.needsOutputBuffer;
+ addMTLArgumentEncoders(func, pCreateInfo, shaderContext, kMVKShaderStageVertex);
+
if (funcRslts.isRasterizationDisabled) {
_pFragmentSS = nullptr;
}
@@ -911,6 +953,10 @@
if (!verifyImplicitBuffer(_needsVertexBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageVertex, "buffer size", vbCnt)) {
return false;
}
+ // Ditto dynamic offset buffer.
+ if (!verifyImplicitBuffer(_needsVertexDynamicOffsetBuffer, _dynamicOffsetBufferIndex, kMVKShaderStageVertex, "dynamic offset", vbCnt)) {
+ return false;
+ }
// Ditto captured output buffer.
if (!verifyImplicitBuffer(_needsVertexOutputBuffer, _outputBufferIndex, kMVKShaderStageVertex, "output", vbCnt)) {
return false;
@@ -935,6 +981,7 @@
shaderContext.options.mslOptions.shader_index_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageVertex];
shaderContext.options.mslOptions.shader_output_buffer_index = _outputBufferIndex.stages[kMVKShaderStageVertex];
shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageVertex];
+ shaderContext.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageVertex];
shaderContext.options.mslOptions.capture_output_to_buffer = true;
shaderContext.options.mslOptions.vertex_for_tessellation = true;
shaderContext.options.mslOptions.disable_rasterization = true;
@@ -946,9 +993,10 @@
CompilerMSL::Options::IndexType::UInt32,
};
// We need to compile this function three times, with no indexing, 16-bit indices, and 32-bit indices.
+ MVKMTLFunction func;
for (uint32_t i = 0; i < sizeof(indexTypes)/sizeof(indexTypes[0]); i++) {
shaderContext.options.mslOptions.vertex_index_type = indexTypes[i];
- MVKMTLFunction func = ((MVKShaderModule*)_pVertexSS->module)->getMTLFunction(&shaderContext, _pVertexSS->pSpecializationInfo, _pipelineCache);
+ func = ((MVKShaderModule*)_pVertexSS->module)->getMTLFunction(&shaderContext, _pVertexSS->pSpecializationInfo, _pipelineCache);
id<MTLFunction> mtlFunc = func.getMTLFunction();
if ( !mtlFunc ) {
setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Vertex shader function could not be compiled into pipeline. See previous logged error."));
@@ -959,9 +1007,12 @@
auto& funcRslts = func.shaderConversionResults;
_needsVertexSwizzleBuffer = funcRslts.needsSwizzleBuffer;
_needsVertexBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+ _needsVertexDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer;
_needsVertexOutputBuffer = funcRslts.needsOutputBuffer;
}
+ addMTLArgumentEncoders(func, pCreateInfo, shaderContext, kMVKShaderStageVertex);
+
// If we need the swizzle buffer and there's no place to put it, we're in serious trouble.
if (!verifyImplicitBuffer(_needsVertexSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageVertex, "swizzle", vbCnt)) {
return false;
@@ -970,6 +1021,10 @@
if (!verifyImplicitBuffer(_needsVertexBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageVertex, "buffer size", vbCnt)) {
return false;
}
+ // Ditto dynamic offset buffer.
+ if (!verifyImplicitBuffer(_needsVertexDynamicOffsetBuffer, _dynamicOffsetBufferIndex, kMVKShaderStageVertex, "dynamic offset", vbCnt)) {
+ return false;
+ }
// Ditto captured output buffer.
if (!verifyImplicitBuffer(_needsVertexOutputBuffer, _outputBufferIndex, kMVKShaderStageVertex, "output", vbCnt)) {
return false;
@@ -993,6 +1048,7 @@
shaderContext.options.mslOptions.shader_patch_output_buffer_index = _tessCtlPatchOutputBufferIndex;
shaderContext.options.mslOptions.shader_tess_factor_buffer_index = _tessCtlLevelBufferIndex;
shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageTessCtl];
+ shaderContext.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageTessCtl];
shaderContext.options.mslOptions.capture_output_to_buffer = true;
shaderContext.options.mslOptions.multi_patch_workgroup = true;
shaderContext.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(_pTessCtlSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize;
@@ -1009,16 +1065,22 @@
auto& funcRslts = func.shaderConversionResults;
_needsTessCtlSwizzleBuffer = funcRslts.needsSwizzleBuffer;
_needsTessCtlBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+ _needsTessCtlDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer;
_needsTessCtlOutputBuffer = funcRslts.needsOutputBuffer;
_needsTessCtlPatchOutputBuffer = funcRslts.needsPatchOutputBuffer;
_needsTessCtlInputBuffer = funcRslts.needsInputThreadgroupMem;
+ addMTLArgumentEncoders(func, pCreateInfo, shaderContext, kMVKShaderStageTessCtl);
+
if (!verifyImplicitBuffer(_needsTessCtlSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageTessCtl, "swizzle", kMVKTessCtlNumReservedBuffers)) {
return false;
}
if (!verifyImplicitBuffer(_needsTessCtlBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageTessCtl, "buffer size", kMVKTessCtlNumReservedBuffers)) {
return false;
}
+ if (!verifyImplicitBuffer(_needsTessCtlDynamicOffsetBuffer, _dynamicOffsetBufferIndex, kMVKShaderStageTessCtl, "dynamic offset", kMVKTessCtlNumReservedBuffers)) {
+ return false;
+ }
if (!verifyImplicitBuffer(true, _indirectParamsIndex, kMVKShaderStageTessCtl, "indirect parameters", kMVKTessCtlNumReservedBuffers)) {
return false;
}
@@ -1044,6 +1106,7 @@
shaderContext.options.entryPointName = _pTessEvalSS->pName;
shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageTessEval];
shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageTessEval];
+ shaderContext.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageTessEval];
shaderContext.options.mslOptions.capture_output_to_buffer = false;
shaderContext.options.mslOptions.disable_rasterization = isRasterizationDisabled(pCreateInfo);
addPrevStageOutputToShaderConverterContext(shaderContext, tcOutputs);
@@ -1061,6 +1124,9 @@
plDesc.rasterizationEnabled = !funcRslts.isRasterizationDisabled;
_needsTessEvalSwizzleBuffer = funcRslts.needsSwizzleBuffer;
_needsTessEvalBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+ _needsTessEvalDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer;
+
+ addMTLArgumentEncoders(func, pCreateInfo, shaderContext, kMVKShaderStageTessEval);
if (funcRslts.isRasterizationDisabled) {
_pFragmentSS = nullptr;
@@ -1072,6 +1138,9 @@
if (!verifyImplicitBuffer(_needsTessEvalBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageTessEval, "buffer size", kMVKTessEvalNumReservedBuffers)) {
return false;
}
+ if (!verifyImplicitBuffer(_needsTessEvalDynamicOffsetBuffer, _dynamicOffsetBufferIndex, kMVKShaderStageTessEval, "dynamic offset", kMVKTessEvalNumReservedBuffers)) {
+ return false;
+ }
return true;
}
@@ -1083,6 +1152,7 @@
shaderContext.options.entryPointStage = spv::ExecutionModelFragment;
shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageFragment];
shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageFragment];
+ shaderContext.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageFragment];
shaderContext.options.mslOptions.view_mask_buffer_index = _viewRangeBufferIndex.stages[kMVKShaderStageFragment];
shaderContext.options.entryPointName = _pFragmentSS->pName;
shaderContext.options.mslOptions.capture_output_to_buffer = false;
@@ -1109,13 +1179,20 @@
auto& funcRslts = func.shaderConversionResults;
_needsFragmentSwizzleBuffer = funcRslts.needsSwizzleBuffer;
_needsFragmentBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+ _needsFragmentDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer;
_needsFragmentViewRangeBuffer = funcRslts.needsViewRangeBuffer;
+
+ addMTLArgumentEncoders(func, pCreateInfo, shaderContext, kMVKShaderStageFragment);
+
if (!verifyImplicitBuffer(_needsFragmentSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageFragment, "swizzle", 0)) {
return false;
}
if (!verifyImplicitBuffer(_needsFragmentBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageFragment, "buffer size", 0)) {
return false;
}
+ if (!verifyImplicitBuffer(_needsFragmentDynamicOffsetBuffer, _dynamicOffsetBufferIndex, kMVKShaderStageFragment, "dynamic offset", 0)) {
+ return false;
+ }
if (!verifyImplicitBuffer(_needsFragmentViewRangeBuffer, _viewRangeBufferIndex, kMVKShaderStageFragment, "view range", 0)) {
return false;
}
@@ -1448,10 +1525,16 @@
shaderContext.options.mslOptions.r32ui_linear_texture_alignment = (uint32_t)_device->getVkFormatTexelBufferAlignment(VK_FORMAT_R32_UINT, this);
shaderContext.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers;
+ bool useMetalArgBuff = isUsingMetalArgumentBuffers();
+ shaderContext.options.mslOptions.argument_buffers = useMetalArgBuff;
+ shaderContext.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff;
+ shaderContext.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff;
+
MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout;
layout->populateShaderConverterContext(shaderContext);
_swizzleBufferIndex = layout->getSwizzleBufferIndex();
_bufferSizeBufferIndex = layout->getBufferSizeBufferIndex();
+ _dynamicOffsetBufferIndex = layout->getDynamicOffsetBufferIndex();
_indirectParamsIndex = layout->getIndirectParamsIndex();
_outputBufferIndex = layout->getOutputBufferIndex();
_tessCtlPatchOutputBufferIndex = layout->getTessCtlPatchOutputBufferIndex();
@@ -1627,6 +1710,7 @@
cmdEncoder->_mtlThreadgroupSize = _mtlThreadgroupSize;
cmdEncoder->_computeResourcesState.bindSwizzleBuffer(_swizzleBufferIndex, _needsSwizzleBuffer);
cmdEncoder->_computeResourcesState.bindBufferSizeBuffer(_bufferSizeBufferIndex, _needsBufferSizeBuffer);
+ cmdEncoder->_computeResourcesState.bindDynamicOffsetBuffer(_dynamicOffsetBufferIndex, _needsDynamicOffsetBuffer);
}
MVKComputePipeline::MVKComputePipeline(MVKDevice* device,
@@ -1637,6 +1721,9 @@
_allowsDispatchBase = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_DISPATCH_BASE_BIT);
+ if (isUsingMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); }
+ if (isUsingPipelineStageMetalArgumentBuffers()) { _mtlArgumentEncoders.resize(_descriptorSetCount); }
+
MVKMTLFunction func = getMTLFunction(pCreateInfo);
_mtlThreadgroupSize = func.threadGroupSize;
_mtlPipelineState = nil;
@@ -1669,6 +1756,9 @@
if (_needsBufferSizeBuffer && _bufferSizeBufferIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires buffer size buffer, but there is no free slot to pass it."));
}
+ if (_needsDynamicOffsetBuffer && _dynamicOffsetBufferIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
+ setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires dynamic offset buffer, but there is no free slot to pass it."));
+ }
if (_needsDispatchBaseBuffer && _indirectParamsIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires dispatch base buffer, but there is no free slot to pass it."));
}
@@ -1691,6 +1781,12 @@
shaderContext.options.mslOptions.dispatch_base = _allowsDispatchBase;
shaderContext.options.mslOptions.texture_1D_as_2D = mvkConfig()->texture1DAs2D;
shaderContext.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize;
+
+ bool useMetalArgBuff = isUsingMetalArgumentBuffers();
+ shaderContext.options.mslOptions.argument_buffers = useMetalArgBuff;
+ shaderContext.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff;
+ shaderContext.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff;
+
#if MVK_MACOS
shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
#endif
@@ -1703,8 +1799,10 @@
layout->populateShaderConverterContext(shaderContext);
_swizzleBufferIndex = layout->getSwizzleBufferIndex();
_bufferSizeBufferIndex = layout->getBufferSizeBufferIndex();
+ _dynamicOffsetBufferIndex = layout->getDynamicOffsetBufferIndex();
shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageCompute];
shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageCompute];
+ shaderContext.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageCompute];
shaderContext.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageCompute];
MVKMTLFunction func = ((MVKShaderModule*)pSS->module)->getMTLFunction(&shaderContext, pSS->pSpecializationInfo, _pipelineCache);
@@ -1712,8 +1810,11 @@
auto& funcRslts = func.shaderConversionResults;
_needsSwizzleBuffer = funcRslts.needsSwizzleBuffer;
_needsBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+ _needsDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer;
_needsDispatchBaseBuffer = funcRslts.needsDispatchBaseBuffer;
+ addMTLArgumentEncoders(func, pCreateInfo, shaderContext, kMVKShaderStageCompute);
+
return func;
}
@@ -1998,6 +2099,7 @@
opt.enable_decoration_binding,
opt.texture_buffer_native,
opt.force_active_argument_buffer_resources,
+ opt.pad_argument_buffer_resources,
opt.force_native_arrays,
opt.enable_clip_distance_user_varying,
opt.multi_patch_workgroup,
@@ -2020,6 +2122,7 @@
template<class Archive>
void serialize(Archive & archive, MSLResourceBinding& rb) {
archive(rb.stage,
+ rb.basetype,
rb.desc_set,
rb.binding,
rb.count,
@@ -2091,7 +2194,7 @@
void serialize(Archive & archive, MSLShaderInput& si) {
archive(si.shaderInput,
si.binding,
- si.isUsedByShader);
+ si.outIsUsedByShader);
}
template<class Archive>
@@ -2099,14 +2202,23 @@
archive(rb.resourceBinding,
rb.constExprSampler,
rb.requiresConstExprSampler,
- rb.isUsedByShader);
+ rb.outIsUsedByShader);
+ }
+
+ template<class Archive>
+ void serialize(Archive & archive, DescriptorBinding& db) {
+ archive(db.stage,
+ db.descriptorSet,
+ db.binding,
+ db.index);
}
template<class Archive>
void serialize(Archive & archive, SPIRVToMSLConversionConfiguration& ctx) {
archive(ctx.options,
ctx.shaderInputs,
- ctx.resourceBindings);
+ ctx.resourceBindings,
+ ctx.discreteDescriptorSets);
}
template<class Archive>
@@ -2118,6 +2230,7 @@
scr.needsOutputBuffer,
scr.needsPatchOutputBuffer,
scr.needsBufferSizeBuffer,
+ scr.needsDynamicOffsetBuffer,
scr.needsInputThreadgroupMem,
scr.needsDispatchBaseBuffer,
scr.needsViewRangeBuffer);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
index 7b45d55..1fa007e 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
@@ -46,10 +46,12 @@
MVKMTLFunction(id<MTLFunction> mtlFunc, const SPIRVToMSLConversionResults scRslts, MTLSize tgSize);
MVKMTLFunction(const MVKMTLFunction& other);
+ MVKMTLFunction& operator=(const MVKMTLFunction& other);
+ MVKMTLFunction() {}
~MVKMTLFunction();
private:
- id<MTLFunction> _mtlFunction;
+ id<MTLFunction> _mtlFunction = nil;
} MVKMTLFunction;
@@ -92,9 +94,10 @@
const void* mslCompiledCodeData,
size_t mslCompiledCodeLength);
- /** Copy constructor. */
MVKShaderLibrary(const MVKShaderLibrary& other);
+ MVKShaderLibrary& operator=(const MVKShaderLibrary& other);
+
~MVKShaderLibrary() override;
protected:
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
index dead568..8fa6594 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
@@ -36,6 +36,14 @@
threadGroupSize = other.threadGroupSize;
}
+MVKMTLFunction& MVKMTLFunction::operator=(const MVKMTLFunction& other) {
+ [_mtlFunction release];
+ _mtlFunction = [other._mtlFunction retain]; // retained
+ shaderConversionResults = other.shaderConversionResults;
+ threadGroupSize = other.threadGroupSize;
+ return *this;
+}
+
MVKMTLFunction::~MVKMTLFunction() {
[_mtlFunction release];
}
@@ -163,12 +171,22 @@
mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslLoad, startTime);
}
-MVKShaderLibrary::MVKShaderLibrary(const MVKShaderLibrary& other) : _owner(other._owner) {
+MVKShaderLibrary::MVKShaderLibrary(const MVKShaderLibrary& other) {
+ _owner = other._owner;
_mtlLibrary = [other._mtlLibrary retain];
_shaderConversionResults = other._shaderConversionResults;
_msl = other._msl;
}
+MVKShaderLibrary& MVKShaderLibrary::operator=(const MVKShaderLibrary& other) {
+ [_mtlLibrary release];
+ _owner = other._owner;
+ _mtlLibrary = [other._mtlLibrary retain];
+ _shaderConversionResults = other._shaderConversionResults;
+ _msl = other._msl;
+ return *this;
+}
+
// If err object is nil, the compilation succeeded without any warnings.
// If err object exists, and the MTLLibrary was created, the compilation succeeded, but with warnings.
// If err object exists, and the MTLLibrary was not created, the compilation failed.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.h b/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.h
index 98d5d68..cbb0219 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.h
@@ -96,7 +96,10 @@
MVKVulkanAPIObject() : _refCount(1) {}
/** Default copy constructor disallowed due to mutex. Copy starts with fresh reference counts. */
- MVKVulkanAPIObject(const MVKVulkanAPIObject& other) : _refCount(1) {}
+ MVKVulkanAPIObject(const MVKVulkanAPIObject& other);
+
+ /** Default copy assignment disallowed due to mutex. Copy starts with fresh reference counts. */
+ MVKVulkanAPIObject& operator=(const MVKVulkanAPIObject& other);
~MVKVulkanAPIObject() override;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.mm b/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.mm
index 07971cd..5df32e1 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.mm
@@ -61,6 +61,18 @@
}
}
+MVKVulkanAPIObject::MVKVulkanAPIObject(const MVKVulkanAPIObject& other) {
+ _refCount = 1;
+ _debugName = [other._debugName retain];
+}
+
+MVKVulkanAPIObject& MVKVulkanAPIObject::operator=(const MVKVulkanAPIObject& other) {
+ [_debugName release];
+ _refCount = 1;
+ _debugName = [other._debugName retain];
+ return *this;
+}
+
MVKVulkanAPIObject::~MVKVulkanAPIObject() {
[_debugName release];
}
diff --git a/MoltenVK/MoltenVK/Utility/MVKBitArray.h b/MoltenVK/MoltenVK/Utility/MVKBitArray.h
index 4016547..719464e 100755
--- a/MoltenVK/MoltenVK/Utility/MVKBitArray.h
+++ b/MoltenVK/MoltenVK/Utility/MVKBitArray.h
@@ -28,37 +28,43 @@
class MVKBitArray {
static constexpr size_t SectionMaskSize = 6; // 64 bits
- static constexpr size_t SectionBitCount = 1U << SectionMaskSize;
+ static constexpr size_t SectionBitCount = (size_t)1U << SectionMaskSize;
static constexpr size_t SectionByteCount = SectionBitCount / 8;
static constexpr uint64_t SectionMask = SectionBitCount - 1;
public:
- /** Returns the value of the bit. */
- inline bool getBit(size_t bitIndex) {
- return mvkIsAnyFlagEnabled(_pSections[getIndexOfSection(bitIndex)], getSectionSetMask(bitIndex));
+ /**
+ * Returns the value of the bit, and optionally clears that bit if it was set.
+ * Returns false if the bitIndex is beyond the size of this array, returns false.
+ */
+ bool getBit(size_t bitIndex, bool shouldClear = false) {
+ if (bitIndex >= _bitCount) { return false; }
+ bool val = mvkIsAnyFlagEnabled(getSection(getIndexOfSection(bitIndex)), getSectionSetMask(bitIndex));
+ if (shouldClear && val) { clearBit(bitIndex); }
+ return val;
}
/** Sets the value of the bit to the val (or to 1 by default). */
- inline void setBit(size_t bitIndex, bool val = true) {
+ void setBit(size_t bitIndex, bool val = true) {
size_t secIdx = getIndexOfSection(bitIndex);
if (val) {
- mvkEnableFlags(_pSections[secIdx], getSectionSetMask(bitIndex));
+ mvkEnableFlags(getSection(secIdx), getSectionSetMask(bitIndex));
if (secIdx < _minUnclearedSectionIndex) { _minUnclearedSectionIndex = secIdx; }
} else {
- mvkDisableFlags(_pSections[secIdx], getSectionSetMask(bitIndex));
- if (secIdx == _minUnclearedSectionIndex && !_pSections[secIdx]) { _minUnclearedSectionIndex++; }
+ mvkDisableFlags(getSection(secIdx), getSectionSetMask(bitIndex));
+ if (secIdx == _minUnclearedSectionIndex && !getSection(secIdx)) { _minUnclearedSectionIndex++; }
}
}
/** Sets the value of the bit to 0. */
- inline void clearBit(size_t bitIndex) { setBit(bitIndex, false); }
+ void clearBit(size_t bitIndex) { setBit(bitIndex, false); }
/** Sets all bits in the array to 1. */
- inline void setAllBits() { setAllSections(~0); }
+ void setAllBits() { setAllSections(~0); }
/** Clears all bits in the array to 0. */
- inline void clearAllBits() { setAllSections(0); }
+ void clearAllBits() { setAllSections(0); }
/**
* Returns the index of the first bit that is set, at or after the specified index,
@@ -69,10 +75,10 @@
size_t bitIdx = startSecIdx << SectionMaskSize;
size_t secCnt = getSectionCount();
for (size_t secIdx = startSecIdx; secIdx < secCnt; secIdx++) {
- size_t lclBitIdx = getIndexOfFirstSetBitInSection(_pSections[secIdx], getBitIndexInSection(startIndex));
+ size_t lclBitIdx = getIndexOfFirstSetBitInSection(getSection(secIdx), getBitIndexInSection(startIndex));
bitIdx += lclBitIdx;
if (lclBitIdx < SectionBitCount) {
- if (startSecIdx == _minUnclearedSectionIndex && !_pSections[startSecIdx]) { _minUnclearedSectionIndex = secIdx; }
+ if (startSecIdx == _minUnclearedSectionIndex && !getSection(startSecIdx)) { _minUnclearedSectionIndex = secIdx; }
if (shouldClear) { clearBit(bitIdx); }
return bitIdx;
}
@@ -84,7 +90,7 @@
* Returns the index of the first bit that is set, at or after the specified index.
* If no bits are set, returns the size() of this bit array.
*/
- inline size_t getIndexOfFirstSetBit(size_t startIndex) {
+ size_t getIndexOfFirstSetBit(size_t startIndex) {
return getIndexOfFirstSetBit(startIndex, false);
}
@@ -92,7 +98,7 @@
* Returns the index of the first bit that is set and optionally clears that bit.
* If no bits are set, returns the size() of this bit array.
*/
- inline size_t getIndexOfFirstSetBit(bool shouldClear) {
+ size_t getIndexOfFirstSetBit(bool shouldClear) {
return getIndexOfFirstSetBit(0, shouldClear);
}
@@ -100,57 +106,139 @@
* Returns the index of the first bit that is set.
* If no bits are set, returns the size() of this bit array.
*/
- inline size_t getIndexOfFirstSetBit() {
+ size_t getIndexOfFirstSetBit() {
return getIndexOfFirstSetBit(0, false);
}
+ /**
+ * Enumerates the bits, executing a custom function on each bit that is enabled.
+ *
+ * The function to execute is passed a bitIndex parameter which indicates
+ * the index of the bit for which the function is executing.
+ *
+ * The custom function should return true to continue processing further bits, or false
+ * to stop processing further bits. This function returns false if any of the invocations
+ * of the custom function halted further invocations, and returns true otherwise.
+ *
+ * If shouldClear is true, each enabled bit is cleared before the custom function executes.
+ */
+ bool enumerateEnabledBits(bool shouldClear, std::function<bool(size_t bitIndex)> func) {
+ for (size_t bitIdx = getIndexOfFirstSetBit(shouldClear);
+ bitIdx < _bitCount;
+ getIndexOfFirstSetBit(++bitIdx, shouldClear)) {
+
+ if ( !func(bitIdx) ) { return false; }
+ }
+ return true;
+ }
+
/** Returns the number of bits in this array. */
- inline size_t size() { return _bitCount; }
+ size_t size() const { return _bitCount; }
/** Returns whether this array is empty. */
- inline bool empty() { return !_bitCount; }
+ bool empty() const { return !_bitCount; }
- /** Resize this array to the specified number of bits, and sets the initial value of all the bits. */
- inline void resize(size_t size = 0, bool val = false) {
- free(_pSections);
+ /**
+ * Resize this array to the specified number of bits.
+ *
+ * The value of existing bits that fit within the new size are retained, and any
+ * new bits that are added to accommodate the new size are set to the given value.
+ *
+ * If the new size is larger than the existing size, new memory may be allocated.
+ * If the new size is less than the existing size, consumed memory is retained
+ * unless the size is set to zero.
+ */
+ void resize(size_t size, bool val = false) {
+ size_t oldBitCnt = _bitCount;
+ size_t oldSecCnt = getSectionCount();
+ size_t oldEndBitCnt = oldSecCnt << SectionMaskSize;
+
+ // Some magic here. If we need only one section, _data is used as that section,
+ // and it will be stomped on if we reallocate, so we cache it here.
+ uint64_t* oldData = _data;
+ uint64_t* pOldData = oldSecCnt > 1 ? oldData : (uint64_t*)&oldData;
_bitCount = size;
- _pSections = _bitCount ? (uint64_t*)malloc(getSectionCount() * SectionByteCount) : nullptr;
- if (val) {
- setAllBits();
- } else {
- clearAllBits();
+
+ size_t newSecCnt = getSectionCount();
+ if (newSecCnt == 0) {
+ // Clear out the existing data
+ if (oldSecCnt > 1) { free(pOldData); }
+ _data = 0;
+ _minUnclearedSectionIndex = 0;
+ } else if (newSecCnt == oldSecCnt) {
+ // Keep the existing data, but fill any bits in the last section
+ // that were beyond the old bit count with the new initial value.
+ for (size_t bitIdx = oldBitCnt; bitIdx < oldEndBitCnt; bitIdx++) { setBit(bitIdx, val); }
+ } else if (newSecCnt > oldSecCnt) {
+ size_t oldByteCnt = oldSecCnt * SectionByteCount;
+ size_t newByteCnt = newSecCnt * SectionByteCount;
+
+ // If needed, allocate new memory.
+ if (newSecCnt > 1) { _data = (uint64_t*)malloc(newByteCnt); }
+
+ // Fill the new memory with the new initial value, copy the old contents to
+ // the new memory, fill any bits in the old last section that were beyond
+ // the old bit count with the new initial value, and remove the old memory.
+ uint64_t* pNewData = getData();
+ memset(pNewData, val ? ~0 : 0, newByteCnt);
+ memcpy(pNewData, pOldData, oldByteCnt);
+ for (size_t bitIdx = oldBitCnt; bitIdx < oldEndBitCnt; bitIdx++) { setBit(bitIdx, val); }
+ if (oldSecCnt > 1) { free(pOldData); }
+
+ // If the entire old array and the new array are cleared, move the uncleared indicator to the new end.
+ if (_minUnclearedSectionIndex == oldSecCnt && !val) { _minUnclearedSectionIndex = newSecCnt; }
}
}
/** Constructs an instance for the specified number of bits, and sets the initial value of all the bits. */
- MVKBitArray(size_t size = 0, bool val = false) {
- _pSections = nullptr;
- resize(size, val);
+ MVKBitArray(size_t size = 0, bool val = false) { resize(size, val); }
+
+ MVKBitArray(const MVKBitArray& other) {
+ resize(other._bitCount);
+ memcpy(getData(), other.getData(), getSectionCount() * SectionByteCount);
}
- ~MVKBitArray() { free(_pSections); }
+ MVKBitArray& operator=(const MVKBitArray& other) {
+ resize(0);
+ resize(other._bitCount);
+ memcpy(getData(), other.getData(), getSectionCount() * SectionByteCount);
+ return *this;
+ }
+
+ ~MVKBitArray() { resize(0); }
protected:
+ // Returns a pointer do the data.
+ // Some magic here. If we need only one section, _data is used as that section.
+ uint64_t* getData() const {
+ return getSectionCount() > 1 ? _data : (uint64_t*)&_data;
+ }
+
+ // Returns a reference to the section.
+ uint64_t& getSection(size_t secIdx) {
+ return getData()[secIdx];
+ }
+
// Returns the number of sections.
- inline size_t getSectionCount() {
+ size_t getSectionCount() const {
return _bitCount ? getIndexOfSection(_bitCount - 1) + 1 : 0;
}
// Returns the index of the section that contains the specified bit.
- static inline size_t getIndexOfSection(size_t bitIndex) {
+ static size_t getIndexOfSection(size_t bitIndex) {
return bitIndex >> SectionMaskSize;
}
// Converts the bit index to a local bit index within a section, and returns that local bit index.
- static inline size_t getBitIndexInSection(size_t bitIndex) {
+ static size_t getBitIndexInSection(size_t bitIndex) {
return bitIndex & SectionMask;
}
// Returns a section mask containing a single 1 value in the bit in the section that
// corresponds to the specified global bit index, and 0 values in all other bits.
- static inline uint64_t getSectionSetMask(size_t bitIndex) {
+ static uint64_t getSectionSetMask(size_t bitIndex) {
return (uint64_t)1U << ((SectionBitCount - 1) - getBitIndexInSection(bitIndex));
}
@@ -168,12 +256,12 @@
void setAllSections(uint64_t sectionValue) {
size_t secCnt = getSectionCount();
for (size_t secIdx = 0; secIdx < secCnt; secIdx++) {
- _pSections[secIdx] = sectionValue;
+ getSection(secIdx) = sectionValue;
}
_minUnclearedSectionIndex = sectionValue ? 0 : secCnt;
}
- uint64_t* _pSections;
- size_t _bitCount;
- size_t _minUnclearedSectionIndex; // Tracks where to start looking for bits that are set
+ uint64_t* _data = 0;
+ size_t _bitCount = 0;
+ size_t _minUnclearedSectionIndex = 0; // Tracks where to start looking for bits that are set
};
diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
index cd1f956..c3b892a 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
@@ -61,6 +61,7 @@
MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.apiVersionToAdvertise, MVK_CONFIG_API_VERSION_TO_ADVERTISE);
MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.advertiseExtensions, MVK_CONFIG_ADVERTISE_EXTENSIONS);
MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.resumeLostDevice, MVK_CONFIG_RESUME_LOST_DEVICE);
+ MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.useMetalArgumentBuffers, MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS);
mvkSetConfig(&evCfg);
}
diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
index 036dbbd..5f6c821 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
@@ -277,3 +277,8 @@
#ifndef MVK_CONFIG_RESUME_LOST_DEVICE
# define MVK_CONFIG_RESUME_LOST_DEVICE 0
#endif
+
+/** Support Metal argument buffers. Disabled by default. */
+#ifndef MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS
+# define MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS 0
+#endif
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVReflection.h b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVReflection.h
index 5d851da..a9305ee 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVReflection.h
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVReflection.h
@@ -26,6 +26,7 @@
#include <string>
#include <vector>
+
namespace mvk {
#pragma mark -
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
index 996374a..7142113 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
@@ -32,6 +32,13 @@
#pragma mark -
#pragma mark SPIRVToMSLConversionConfiguration
+// Returns whether the container contains an item equal to the value.
+template<class C, class T>
+bool contains(const C& container, const T& val) {
+ for (const T& cVal : container) { if (cVal == val) { return true; } }
+ return false;
+}
+
// Returns whether the vector contains the value (using a matches(T&) comparison member function). */
template<class T>
bool containsMatching(const vector<T>& vec, const T& val) {
@@ -100,13 +107,13 @@
MVK_PUBLIC_SYMBOL bool mvk::MSLResourceBinding::matches(const MSLResourceBinding& other) const {
if (resourceBinding.stage != other.resourceBinding.stage) { return false; }
+ if (resourceBinding.basetype != other.resourceBinding.basetype) { return false; }
if (resourceBinding.desc_set != other.resourceBinding.desc_set) { return false; }
if (resourceBinding.binding != other.resourceBinding.binding) { return false; }
if (resourceBinding.count != other.resourceBinding.count) { return false; }
if (resourceBinding.msl_buffer != other.resourceBinding.msl_buffer) { return false; }
if (resourceBinding.msl_texture != other.resourceBinding.msl_texture) { return false; }
if (resourceBinding.msl_sampler != other.resourceBinding.msl_sampler) { return false; }
-
if (requiresConstExprSampler != other.requiresConstExprSampler) { return false; }
// If requiresConstExprSampler is false, constExprSampler can be ignored
@@ -144,6 +151,14 @@
return true;
}
+MVK_PUBLIC_SYMBOL bool mvk::DescriptorBinding::matches(const mvk::DescriptorBinding& other) const {
+ if (stage != other.stage) { return false; }
+ if (descriptorSet != other.descriptorSet) { return false; }
+ if (binding != other.binding) { return false; }
+ if (index != other.index) { return false; }
+ return true;
+}
+
MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::stageSupportsVertexAttributes() const {
return (options.entryPointStage == ExecutionModelVertex ||
options.entryPointStage == ExecutionModelTessellationControl ||
@@ -153,7 +168,7 @@
// Check them all in case inactive VA's duplicate locations used by active VA's.
MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::isShaderInputLocationUsed(uint32_t location) const {
for (auto& si : shaderInputs) {
- if ((si.shaderInput.location == location) && si.isUsedByShader) { return true; }
+ if ((si.shaderInput.location == location) && si.outIsUsedByShader) { return true; }
}
return false;
}
@@ -161,14 +176,24 @@
MVK_PUBLIC_SYMBOL uint32_t SPIRVToMSLConversionConfiguration::countShaderInputsAt(uint32_t binding) const {
uint32_t siCnt = 0;
for (auto& si : shaderInputs) {
- if ((si.binding == binding) && si.isUsedByShader) { siCnt++; }
+ if ((si.binding == binding) && si.outIsUsedByShader) { siCnt++; }
}
return siCnt;
}
+MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::isResourceUsed(ExecutionModel stage, uint32_t descSet, uint32_t binding) const {
+ for (auto& rb : resourceBindings) {
+ auto& rbb = rb.resourceBinding;
+ if (rbb.stage == stage && rbb.desc_set == descSet && rbb.binding == binding) {
+ return rb.outIsUsedByShader;
+ }
+ }
+ return false;
+}
+
MVK_PUBLIC_SYMBOL void SPIRVToMSLConversionConfiguration::markAllInputsAndResourcesUsed() {
- for (auto& si : shaderInputs) { si.isUsedByShader = true; }
- for (auto& rb : resourceBindings) { rb.isUsedByShader = true; }
+ for (auto& si : shaderInputs) { si.outIsUsedByShader = true; }
+ for (auto& rb : resourceBindings) { rb.outIsUsedByShader = true; }
}
MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::matches(const SPIRVToMSLConversionConfiguration& other) const {
@@ -176,13 +201,21 @@
if ( !options.matches(other.options) ) { return false; }
for (const auto& si : shaderInputs) {
- if (si.isUsedByShader && !containsMatching(other.shaderInputs, si)) { return false; }
+ if (si.outIsUsedByShader && !containsMatching(other.shaderInputs, si)) { return false; }
}
for (const auto& rb : resourceBindings) {
- if (rb.isUsedByShader && !containsMatching(other.resourceBindings, rb)) { return false; }
+ if (rb.outIsUsedByShader && !containsMatching(other.resourceBindings, rb)) { return false; }
}
+ for (uint32_t dsIdx : discreteDescriptorSets) {
+ if ( !contains(other.discreteDescriptorSets, dsIdx)) { return false; }
+ }
+
+ for (const auto& db : dynamicBufferDescriptors) {
+ if ( !containsMatching(other.dynamicBufferDescriptors, db)) { return false; }
+ }
+
return true;
}
@@ -190,16 +223,18 @@
MVK_PUBLIC_SYMBOL void SPIRVToMSLConversionConfiguration::alignWith(const SPIRVToMSLConversionConfiguration& srcContext) {
for (auto& si : shaderInputs) {
- si.isUsedByShader = false;
+ si.outIsUsedByShader = false;
for (auto& srcSI : srcContext.shaderInputs) {
- if (si.matches(srcSI)) { si.isUsedByShader = srcSI.isUsedByShader; }
+ if (si.matches(srcSI)) { si.outIsUsedByShader = srcSI.outIsUsedByShader; }
}
}
for (auto& rb : resourceBindings) {
- rb.isUsedByShader = false;
+ rb.outIsUsedByShader = false;
for (auto& srcRB : srcContext.resourceBindings) {
- if (rb.matches(srcRB)) { rb.isUsedByShader = srcRB.isUsedByShader; }
+ if (rb.matches(srcRB)) {
+ rb.outIsUsedByShader = srcRB.outIsUsedByShader;
+ }
}
}
}
@@ -278,6 +313,21 @@
}
}
+ // Add any descriptor sets that are not using Metal argument buffers.
+ // This only has an effect if SPIRVToMSLConversionConfiguration::options::mslOptions::argument_buffers is enabled.
+ for (uint32_t dsIdx : context.discreteDescriptorSets) {
+ pMSLCompiler->add_discrete_descriptor_set(dsIdx);
+ }
+
+ // Add any dynamic buffer bindings.
+ // This only has an applies if SPIRVToMSLConversionConfiguration::options::mslOptions::argument_buffers is enabled.
+ if (context.options.mslOptions.argument_buffers) {
+ for (auto& db : context.dynamicBufferDescriptors) {
+ if (db.stage == context.options.entryPointStage) {
+ pMSLCompiler->add_dynamic_buffer(db.descriptorSet, db.binding, db.index);
+ }
+ }
+ }
_msl = pMSLCompiler->compile();
if (shouldLogMSL) { logSource(_msl, "MSL", "Converted"); }
@@ -307,13 +357,26 @@
_shaderConversionResults.needsDispatchBaseBuffer = pMSLCompiler && pMSLCompiler->needs_dispatch_base_buffer();
_shaderConversionResults.needsViewRangeBuffer = pMSLCompiler && pMSLCompiler->needs_view_mask_buffer();
+ // When using Metal argument buffers, if the shader is provided with dynamic buffer offsets,
+ // then it needs a buffer to hold these dynamic offsets.
+ _shaderConversionResults.needsDynamicOffsetBuffer = false;
+ if (context.options.mslOptions.argument_buffers) {
+ for (auto& db : context.dynamicBufferDescriptors) {
+ if (db.stage == context.options.entryPointStage) {
+ _shaderConversionResults.needsDynamicOffsetBuffer = true;
+ }
+ }
+ }
+
for (auto& ctxSI : context.shaderInputs) {
- ctxSI.isUsedByShader = pMSLCompiler->is_msl_shader_input_used(ctxSI.shaderInput.location);
+ ctxSI.outIsUsedByShader = pMSLCompiler->is_msl_shader_input_used(ctxSI.shaderInput.location);
}
for (auto& ctxRB : context.resourceBindings) {
- ctxRB.isUsedByShader = pMSLCompiler->is_msl_resource_binding_used(ctxRB.resourceBinding.stage,
- ctxRB.resourceBinding.desc_set,
- ctxRB.resourceBinding.binding);
+ if (ctxRB.resourceBinding.stage == context.options.entryPointStage) {
+ ctxRB.outIsUsedByShader = pMSLCompiler->is_msl_resource_binding_used(ctxRB.resourceBinding.stage,
+ ctxRB.resourceBinding.desc_set,
+ ctxRB.resourceBinding.binding);
+ }
}
delete pMSLCompiler;
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
index 0a360de..062510c 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
@@ -19,11 +19,12 @@
#ifndef __SPIRVToMSLConverter_h_
#define __SPIRVToMSLConverter_h_ 1
+#include "SPIRVReflection.h"
#include <spirv.hpp>
#include <spirv_msl.hpp>
#include <string>
#include <vector>
-#include <unordered_map>
+
namespace mvk {
@@ -63,7 +64,7 @@
/**
* Defines MSL characteristics of a vertex attribute at a particular location.
*
- * The isUsedByShader flag is set to true during conversion of SPIR-V to MSL if the shader
+ * The outIsUsedByShader flag is set to true during conversion of SPIR-V to MSL if the shader
* makes use of this vertex attribute. This allows a pipeline to be optimized, and for two
* shader conversion configurations to be compared only against the attributes that are
* actually used by the shader.
@@ -73,13 +74,12 @@
*/
typedef struct MSLShaderInput {
SPIRV_CROSS_NAMESPACE::MSLShaderInput shaderInput;
-
uint32_t binding = 0;
- bool isUsedByShader = false;
+ bool outIsUsedByShader = false;
/**
* Returns whether the specified vertex attribute match this one.
- * It does if all corresponding elements except isUsedByShader are equal.
+ * It does if all corresponding elements except outIsUsedByShader are equal.
*/
bool matches(const MSLShaderInput& other) const;
@@ -89,17 +89,17 @@
* Matches the binding index of a MSL resource for a binding within a descriptor set.
* Taken together, the stage, desc_set and binding combine to form a reference to a resource
* descriptor used in a particular shading stage. Generally, only one of the buffer, texture,
- * or sampler elements will be populated. The isUsedByShader flag is set to true during
+ * or sampler elements will be populated. The outIsUsedByShader flag is set to true during
* compilation of SPIR-V to MSL if the shader makes use of this vertex attribute.
*
* If requiresConstExprSampler is true, the resource is a sampler whose content must be
* hardcoded into the MSL as a constexpr type, instead of passed in as a runtime-bound variable.
* The content of that constexpr sampler is defined in the constExprSampler parameter.
*
- * The isUsedByShader flag is set to true during conversion of SPIR-V to MSL if the shader
- * makes use of this resource binding. This allows a pipeline to be optimized, and for two
- * shader conversion configurations to be compared only against the resource bindings that
- * are actually used by the shader.
+ * The outIsUsedByShader value is set by the shader converter based on the content of the SPIR-V
+ * (and resulting MSL), and is set to true if the shader makes use of this resource binding.
+ * This allows a pipeline to be optimized, and for two shader conversion configurations to
+ * be compared only against the resource bindings that are actually used by the shader.
*
* THIS STRUCT IS STREAMED OUT AS PART OF THE PIEPLINE CACHE.
* CHANGES TO THIS STRUCT SHOULD BE CAPTURED IN THE STREAMING LOGIC OF THE PIPELINE CACHE.
@@ -108,18 +108,34 @@
SPIRV_CROSS_NAMESPACE::MSLResourceBinding resourceBinding;
SPIRV_CROSS_NAMESPACE::MSLConstexprSampler constExprSampler;
bool requiresConstExprSampler = false;
-
- bool isUsedByShader = false;
+ bool outIsUsedByShader = false;
/**
* Returns whether the specified resource binding match this one.
- * It does if all corresponding elements except isUsedByShader are equal.
+ * It does if all corresponding elements except outIsUsedByShader are equal.
*/
bool matches(const MSLResourceBinding& other) const;
} MSLResourceBinding;
/**
+ * Identifies a descriptor binding, and the index into a buffer that
+ * can be used for providing dynamic content like dynamic buffer offsets.
+ *
+ * THIS STRUCT IS STREAMED OUT AS PART OF THE PIPELINE CACHE.
+ * CHANGES TO THIS STRUCT SHOULD BE CAPTURED IN THE STREAMING LOGIC OF THE PIPELINE CACHE.
+ */
+ typedef struct DescriptorBinding {
+ spv::ExecutionModel stage = spv::ExecutionModelMax;
+ uint32_t descriptorSet = 0;
+ uint32_t binding = 0;
+ uint32_t index = 0;
+
+ bool matches(const DescriptorBinding& other) const;
+
+ } DescriptorBinding;
+
+ /**
* Configuration passed to the SPIRVToMSLConverter.
*
* THIS STRUCT IS STREAMED OUT AS PART OF THE PIEPLINE CACHE.
@@ -129,6 +145,8 @@
SPIRVToMSLConversionOptions options;
std::vector<MSLShaderInput> shaderInputs;
std::vector<MSLResourceBinding> resourceBindings;
+ std::vector<uint32_t> discreteDescriptorSets;
+ std::vector<DescriptorBinding> dynamicBufferDescriptors;
/** Returns whether the pipeline stage being converted supports vertex attributes. */
bool stageSupportsVertexAttributes() const;
@@ -142,6 +160,9 @@
/** Returns whether the vertex buffer at the specified Vulkan binding is used by the shader. */
bool isVertexBufferUsed(uint32_t binding) const { return countShaderInputsAt(binding) > 0; }
+ /** Returns whether the resource at the specified descriptor set binding is used by the shader. */
+ bool isResourceUsed(spv::ExecutionModel stage, uint32_t descSet, uint32_t binding) const;
+
/** Marks all input variables and resources as being used by the shader. */
void markAllInputsAndResourcesUsed();
@@ -209,6 +230,7 @@
bool needsOutputBuffer = false;
bool needsPatchOutputBuffer = false;
bool needsBufferSizeBuffer = false;
+ bool needsDynamicOffsetBuffer = false;
bool needsInputThreadgroupMem = false;
bool needsDispatchBaseBuffer = false;
bool needsViewRangeBuffer = false;