Merge pull request #1335 from billhollings/argument-buffers

Initial Metal argument buffer support
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index a5532c7..f1641c4 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -801,6 +801,30 @@
 	 */
 	VkBool32 resumeLostDevice;
 
+	/**
+	 * Controls whether MoltenVK should use Metal argument buffers for resources defined in
+	 * descriptor sets, if Metal argument buffers are supported on the platform. Using Metal
+	 * argument buffers dramatically increases the number of buffers, textures and samplers
+	 * that can be bound to a pipeline shader, and in most cases improves performance. If this
+	 * setting is enabled, MoltenVK will use Metal argument buffers to bind resources to the
+	 * shaders. If this setting is disabled, MoltenVK will bind resources to shaders discretely.
+	 *
+	 * NOTE: Currently, Metal argument buffer support is in beta stage, and is only supported
+	 * on macOS 10.16 (Big Sur) or later, or on older versions of macOS using an Intel GPU.
+	 * Metal argument buffers support is not available on iOS. Development to support iOS
+	 * and a wider combination of GPU's on older macOS versions is under way.
+	 *
+	 * The value of this parameter must be changed before creating a VkInstance,
+	 * for the change to take effect.
+	 *
+	 * The initial value or this parameter is set by the
+	 * MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS
+	 * runtime environment variable or MoltenVK compile-time build setting.
+	 * If neither is set, this setting is enabled by default, and MoltenVK will not
+	 * use Metal argument buffers, and will bind resources to shaders discretely.
+	 */
+	VkBool32 useMetalArgumentBuffers;
+
 } MVKConfiguration;
 
 /**
@@ -880,6 +904,8 @@
     uint32_t minSubgroupSize;			        /**< The minimum number of threads in a SIMD-group. */
     VkBool32 textureBarriers;                   /**< If true, texture barriers are supported within Metal render passes. */
     VkBool32 tileBasedDeferredRendering;        /**< If true, this device uses tile-based deferred rendering. */
+	VkBool32 argumentBuffers;					/**< If true, Metal argument buffers are supported. */
+	VkBool32 descriptorSetArgumentBuffers;		/**< If true, a Metal argument buffer can be assigned to a descriptor set, and used on any pipeline and pipeline stage. If false, a different Metal argument buffer must be used for each pipeline-stage/descriptor-set combination. */
 } MVKPhysicalDeviceMetalFeatures;
 
 /** MoltenVK performance of a particular type of activity. */
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
index e1919da..b691dfc 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
@@ -143,6 +143,7 @@
 
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+	void encode(MVKCommandEncoder* cmdEncoder, MVKArrayRef<uint32_t> dynamicOffsets);
 
 	MVKSmallVector<MVKDescriptorSet*, N> _descriptorSets;
 	MVKPipelineLayout* _pipelineLayout = nullptr;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
index 57c2179..bb5124d 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
@@ -215,7 +215,12 @@
 
 template <size_t N>
 void MVKCmdBindDescriptorSetsStatic<N>::encode(MVKCommandEncoder* cmdEncoder) {
-	_pipelineLayout->bindDescriptorSets(cmdEncoder, _descriptorSets.contents(), _firstSet, MVKArrayRef<uint32_t>());
+	encode(cmdEncoder, MVKArrayRef<uint32_t>());
+}
+
+template <size_t N>
+void MVKCmdBindDescriptorSetsStatic<N>::encode(MVKCommandEncoder* cmdEncoder, MVKArrayRef<uint32_t> dynamicOffsets) {
+	_pipelineLayout->bindDescriptorSets(cmdEncoder, _pipelineBindPoint, _descriptorSets.contents(), _firstSet, dynamicOffsets);
 }
 
 template <size_t N>
@@ -256,7 +261,7 @@
 
 template <size_t N>
 void MVKCmdBindDescriptorSetsDynamic<N>::encode(MVKCommandEncoder* cmdEncoder) {
-	MVKCmdBindDescriptorSetsStatic<N>::_pipelineLayout->bindDescriptorSets(cmdEncoder, MVKCmdBindDescriptorSetsStatic<N>::_descriptorSets.contents(), MVKCmdBindDescriptorSetsStatic<N>::_firstSet, _dynamicOffsets.contents());
+	MVKCmdBindDescriptorSetsStatic<N>::encode(cmdEncoder, _dynamicOffsets.contents());
 }
 
 template class MVKCmdBindDescriptorSetsDynamic<4>;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index 43b45a5..6bc4f4e 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -298,9 +298,20 @@
 	/** Returns the index of the currently active multiview subpass, or zero if the current render pass is not multiview. */
 	uint32_t getMultiviewPassIndex();
 
+	/** Begins a Metal compute encoding. */
+	void beginMetalComputeEncoding(MVKCommandUse cmdUse);
+
     /** Binds a pipeline to a bind point. */
     void bindPipeline(VkPipelineBindPoint pipelineBindPoint, MVKPipeline* pipeline);
 
+	/** Binds the descriptor set to the index at the bind point. */
+	void bindDescriptorSet(VkPipelineBindPoint pipelineBindPoint,
+						   uint32_t descSetIndex,
+						   MVKDescriptorSet* descSet,
+						   MVKShaderResourceBinding& dslMTLRezIdxOffsets,
+						   MVKArrayRef<uint32_t> dynamicOffsets,
+						   uint32_t& dynamicOffsetIndex);
+
 	/** Encodes an operation to signal an event to a status. */
 	void signalEvent(MVKEvent* mvkEvent, bool status);
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index b7866f7..2de0f53 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -415,11 +415,11 @@
 void MVKCommandEncoder::bindPipeline(VkPipelineBindPoint pipelineBindPoint, MVKPipeline* pipeline) {
     switch (pipelineBindPoint) {
         case VK_PIPELINE_BIND_POINT_GRAPHICS:
-            _graphicsPipelineState.setPipeline(pipeline);
+            _graphicsPipelineState.bindPipeline(pipeline);
             break;
 
         case VK_PIPELINE_BIND_POINT_COMPUTE:
-            _computePipelineState.setPipeline(pipeline);
+            _computePipelineState.bindPipeline(pipeline);
             break;
 
         default:
@@ -427,6 +427,28 @@
     }
 }
 
+void MVKCommandEncoder::bindDescriptorSet(VkPipelineBindPoint pipelineBindPoint,
+										  uint32_t descSetIndex,
+										  MVKDescriptorSet* descSet,
+										  MVKShaderResourceBinding& dslMTLRezIdxOffsets,
+										  MVKArrayRef<uint32_t> dynamicOffsets,
+										  uint32_t& dynamicOffsetIndex) {
+	switch (pipelineBindPoint) {
+		case VK_PIPELINE_BIND_POINT_GRAPHICS:
+			_graphicsResourcesState.bindDescriptorSet(descSetIndex, descSet, dslMTLRezIdxOffsets,
+													  dynamicOffsets, dynamicOffsetIndex);
+			break;
+
+		case VK_PIPELINE_BIND_POINT_COMPUTE:
+			_computeResourcesState.bindDescriptorSet(descSetIndex, descSet, dslMTLRezIdxOffsets,
+													 dynamicOffsets, dynamicOffsetIndex);
+			break;
+
+		default:
+			break;
+	}
+}
+
 void MVKCommandEncoder::signalEvent(MVKEvent* mvkEvent, bool status) {
 	endCurrentMetalEncoding();
 	mvkEvent->encodeSignal(_mtlCmdBuffer, status);
@@ -514,6 +536,14 @@
 	}
 }
 
+void MVKCommandEncoder::beginMetalComputeEncoding(MVKCommandUse cmdUse) {
+	if (cmdUse == kMVKCommandUseTessellationVertexTessCtl) {
+		_graphicsResourcesState.beginMetalComputeEncoding();
+	} else {
+		_computeResourcesState.beginMetalComputeEncoding();
+	}
+}
+
 void MVKCommandEncoder::finalizeDispatchState() {
     _computePipelineState.encode();    // Must do first..it sets others
     _computeResourcesState.encode();
@@ -571,6 +601,7 @@
 	if ( !_mtlComputeEncoder ) {
 		endCurrentMetalEncoding();
 		_mtlComputeEncoder = [_mtlCmdBuffer computeCommandEncoder];		// not retained
+		beginMetalComputeEncoding(cmdUse);
 	}
 	if (_mtlComputeEncoderUse != cmdUse) {
 		_mtlComputeEncoderUse = cmdUse;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
index 41e4544..d18fa87 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
@@ -21,11 +21,14 @@
 #include "MVKMTLResourceBindings.h"
 #include "MVKCommandResourceFactory.h"
 #include "MVKDevice.h"
+#include "MVKDescriptor.h"
 #include "MVKSmallVector.h"
+#include "MVKBitArray.h"
 #include <unordered_map>
 
 class MVKCommandEncoder;
 class MVKGraphicsPipeline;
+class MVKDescriptorSet;
 class MVKOcclusionQueryPool;
 
 struct MVKShaderImplicitRezBinding;
@@ -66,11 +69,17 @@
      */
 	virtual void beginMetalRenderPass() { if (_isModified) { markDirty(); } }
 
-	/**
-	 * Called automatically when a Metal render pass ends.
-	 */
+	/** Called automatically when a Metal render pass ends. */
 	virtual void endMetalRenderPass() { }
 
+	/**
+	 * Called automatically when a Metal compute pass begins. If the contents have been
+	 * modified from the default values, this instance is marked as dirty, so the contents
+	 * will be encoded to Metal, otherwise it is marked as clean, so the contents will NOT
+	 * be encoded. Default state can be left unencoded on a new Metal encoder.
+	 */
+	virtual void beginMetalComputeEncoding() { if (_isModified) { markDirty(); } }
+
     /**
      * If the content of this instance is dirty, marks this instance as no longer dirty
      * and calls the encodeImpl() function to encode the content onto the Metal encoder.
@@ -88,6 +97,7 @@
 
 protected:
     virtual void encodeImpl(uint32_t stage) = 0;
+	MVKDevice* getDevice();
 
     MVKCommandEncoder* _cmdEncoder;
 	bool _isDirty = false;
@@ -103,8 +113,8 @@
 
 public:
 
-    /** Sets the pipeline during pipeline binding. */
-    void setPipeline(MVKPipeline* pipeline);
+	/** Binds the pipeline. */
+    void bindPipeline(MVKPipeline* pipeline);
 
     /** Returns the currently bound pipeline. */
     MVKPipeline* getPipeline();
@@ -337,10 +347,27 @@
 
 public:
 
-    /** Constructs this instance for the specified command encoder. */
-    MVKResourcesCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {}
+	/** Returns the currently bound pipeline for this bind point. */
+	virtual MVKPipeline* getPipeline() = 0;
+
+	/** Binds the specified descriptor set to the specified index. */
+	void bindDescriptorSet(uint32_t descSetIndex,
+						   MVKDescriptorSet* descSet,
+						   MVKShaderResourceBinding& dslMTLRezIdxOffsets,
+						   MVKArrayRef<uint32_t> dynamicOffsets,
+						   uint32_t& dynamicOffsetIndex);
+
+	/** Encodes the Metal resource to the Metal command encoder. */
+	virtual void encodeArgumentBufferResourceUsage(MVKShaderStage stage,
+												   id<MTLResource> mtlResource,
+												   MTLResourceUsage mtlUsage,
+												   MTLRenderStages mtlStages) = 0;
+
+    MVKResourcesCommandEncoderState(MVKCommandEncoder* cmdEncoder) :
+		MVKCommandEncoderState(cmdEncoder), _boundDescriptorSets{} {}
 
 protected:
+	void markDirty() override;
 
     // Template function that marks both the vector and all binding elements in the vector as dirty.
     template<class T>
@@ -402,6 +429,8 @@
 	}
 
 	void assertMissingSwizzles(bool needsSwizzle, const char* stageName, const MVKArrayRef<MVKMTLTextureBinding>& texBindings);
+	void encodeMetalArgumentBuffer(MVKShaderStage stage);
+	virtual void bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) = 0;
 
 	template<size_t N>
 	struct ResourceBindings {
@@ -413,6 +442,7 @@
 
 		MVKMTLBufferBinding swizzleBufferBinding;
 		MVKMTLBufferBinding bufferSizeBufferBinding;
+		MVKMTLBufferBinding dynamicOffsetBufferBinding;
 		MVKMTLBufferBinding viewRangeBufferBinding;
 
 		bool areBufferBindingsDirty = false;
@@ -422,6 +452,11 @@
 		bool needsSwizzle = false;
 	};
 
+	MVKDescriptorSet* _boundDescriptorSets[kMVKMaxDescriptorSetCount];
+	MVKBitArray _metalUsageDirtyDescriptors[kMVKMaxDescriptorSetCount];
+
+	MVKSmallVector<uint32_t, 8> _dynamicOffsets;
+
 };
 
 
@@ -433,6 +468,9 @@
 
 public:
 
+	/** Returns the currently bound pipeline for this bind point. */
+	MVKPipeline* getPipeline() override;
+
     /** Binds the specified buffer for the specified shader stage. */
     void bindBuffer(MVKShaderStage stage, const MVKMTLBufferBinding& binding);
 
@@ -464,6 +502,13 @@
                               bool needTessEvalSizeBuffer,
                               bool needFragmentSizeBuffer);
 
+	/** Sets the current dynamic offset buffer state. */
+	void bindDynamicOffsetBuffer(const MVKShaderImplicitRezBinding& binding,
+								 bool needVertexDynanicOffsetBuffer,
+								 bool needTessCtlDynanicOffsetBuffer,
+								 bool needTessEvalDynanicOffsetBuffer,
+								 bool needFragmentDynanicOffsetBuffer);
+
     /** Sets the current view range buffer state. */
     void bindViewRangeBuffer(const MVKShaderImplicitRezBinding& binding,
                              bool needVertexViewBuffer,
@@ -477,6 +522,11 @@
                         std::function<void(MVKCommandEncoder*, MVKMTLTextureBinding&)> bindTexture,
                         std::function<void(MVKCommandEncoder*, MVKMTLSamplerStateBinding&)> bindSampler);
 
+	void encodeArgumentBufferResourceUsage(MVKShaderStage stage,
+										   id<MTLResource> mtlResource,
+										   MTLResourceUsage mtlUsage,
+										   MTLRenderStages mtlStages) override;
+
 	/** Offset all buffers for vertex attribute bindings with zero divisors by the given number of strides. */
 	void offsetZeroDivisorVertexBuffers(MVKGraphicsStage stage, MVKGraphicsPipeline* pipeline, uint32_t firstInstance);
 
@@ -488,6 +538,7 @@
 protected:
     void encodeImpl(uint32_t stage) override;
     void markDirty() override;
+	void bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) override;
 
     ResourceBindings<8> _shaderStageResourceBindings[4];
 };
@@ -501,6 +552,9 @@
 
 public:
 
+	/** Returns the currently bound pipeline for this bind point. */
+	MVKPipeline* getPipeline() override;
+
     /** Binds the specified buffer. */
     void bindBuffer(const MVKMTLBufferBinding& binding);
 
@@ -516,6 +570,14 @@
     /** Sets the current buffer size buffer state. */
     void bindBufferSizeBuffer(const MVKShaderImplicitRezBinding& binding, bool needSizeBuffer);
 
+	/** Sets the current dynamic offset buffer state. */
+	void bindDynamicOffsetBuffer(const MVKShaderImplicitRezBinding& binding, bool needDynamicOffsetBuffer);
+
+	void encodeArgumentBufferResourceUsage(MVKShaderStage stage,
+										   id<MTLResource> mtlResource,
+										   MTLResourceUsage mtlUsage,
+										   MTLRenderStages mtlStages) override;
+
     void markDirty() override;
 
 #pragma mark Construction
@@ -525,6 +587,7 @@
 
 protected:
     void encodeImpl(uint32_t) override;
+	void bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) override;
 
 	ResourceBindings<4> _resourceBindings;
 };
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index 968d087..1ff5a9c 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -30,12 +30,13 @@
 #pragma mark MVKCommandEncoderState
 
 MVKVulkanAPIObject* MVKCommandEncoderState::getVulkanAPIObject() { return _cmdEncoder->getVulkanAPIObject(); };
+MVKDevice* MVKCommandEncoderState::getDevice() { return _cmdEncoder->getDevice(); }
 
 
 #pragma mark -
 #pragma mark MVKPipelineCommandEncoderState
 
-void MVKPipelineCommandEncoderState::setPipeline(MVKPipeline* pipeline) {
+void MVKPipelineCommandEncoderState::bindPipeline(MVKPipeline* pipeline) {
     _pipeline = pipeline;
     markDirty();
 }
@@ -58,7 +59,7 @@
 												  bool isSettingDynamically) {
 
 	size_t vpCnt = viewports.size;
-	uint32_t maxViewports = _cmdEncoder->getDevice()->_pProperties->limits.maxViewports;
+	uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports;
 	if ((firstViewport + vpCnt > maxViewports) ||
 		(firstViewport >= maxViewports) ||
 		(isSettingDynamically && vpCnt == 0))
@@ -108,7 +109,7 @@
 												bool isSettingDynamically) {
 
 	size_t sCnt = scissors.size;
-	uint32_t maxScissors = _cmdEncoder->getDevice()->_pProperties->limits.maxViewports;
+	uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports;
 	if ((firstScissor + sCnt > maxScissors) ||
 		(firstScissor >= maxScissors) ||
 		(isSettingDynamically && sCnt == 0))
@@ -157,7 +158,7 @@
 	// MSL structs can have a larger size than the equivalent C struct due to MSL alignment needs.
 	// Typically any MSL struct that contains a float4 will also have a size that is rounded up to a multiple of a float4 size.
 	// Ensure that we pass along enough content to cover this extra space even if it is never actually accessed by the shader.
-	size_t pcSizeAlign = _cmdEncoder->getDevice()->_pMetalFeatures->pushConstantSizeAlignment;
+	size_t pcSizeAlign = getDevice()->_pMetalFeatures->pushConstantSizeAlignment;
     size_t pcSize = pushConstants.size;
 	size_t pcBuffSize = mvkAlignByteCount(offset + pcSize, pcSizeAlign);
     mvkEnsureSize(_pushConstants, pcBuffSize);
@@ -452,6 +453,131 @@
 #pragma mark -
 #pragma mark MVKResourcesCommandEncoderState
 
+void MVKResourcesCommandEncoderState::bindDescriptorSet(uint32_t descSetIndex,
+														MVKDescriptorSet* descSet,
+														MVKShaderResourceBinding& dslMTLRezIdxOffsets,
+														MVKArrayRef<uint32_t> dynamicOffsets,
+														uint32_t& dynamicOffsetIndex) {
+
+	bool dsChanged = (descSet != _boundDescriptorSets[descSetIndex]);
+
+	_boundDescriptorSets[descSetIndex] = descSet;
+
+	if (descSet->isUsingMetalArgumentBuffers()) {
+		// If the descriptor set has changed, track new resource usage.
+		if (dsChanged) {
+			auto& usageDirty = _metalUsageDirtyDescriptors[descSetIndex];
+			usageDirty.resize(descSet->getDescriptorCount());
+			usageDirty.setAllBits();
+		}
+
+		// Update dynamic buffer offsets
+		uint32_t baseDynOfstIdx = dslMTLRezIdxOffsets.getMetalResourceIndexes().dynamicOffsetBufferIndex;
+		uint32_t doCnt = descSet->getDynamicOffsetDescriptorCount();
+		for (uint32_t doIdx = 0; doIdx < doCnt && dynamicOffsetIndex < dynamicOffsets.size; doIdx++) {
+			updateImplicitBuffer(_dynamicOffsets, baseDynOfstIdx + doIdx, dynamicOffsets[dynamicOffsetIndex++]);
+		}
+
+		// If something changed, mark dirty
+		if (dsChanged || doCnt > 0) { MVKCommandEncoderState::markDirty(); }
+	}
+}
+
+// Encode the dirty descriptors to the Metal argument buffer, set the Metal command encoder
+// usage for each resource, and bind the Metal argument buffer to the command encoder.
+void MVKResourcesCommandEncoderState::encodeMetalArgumentBuffer(MVKShaderStage stage) {
+	if ( !_cmdEncoder->isUsingMetalArgumentBuffers() ) { return; }
+
+	// The Metal arg encoder can only write to one arg buffer at a time (it holds the arg buffer),
+	// so we need to lock out other access to it while we are writing to it.
+	MVKPipeline* pipeline = getPipeline();
+	lock_guard<mutex> lock(pipeline->_mtlArgumentEncodingLock);
+
+	uint32_t dsCnt = pipeline->getDescriptorSetCount();
+	for (uint32_t dsIdx = 0; dsIdx < dsCnt; dsIdx++) {
+		auto* descSet = _boundDescriptorSets[dsIdx];
+		if ( !descSet ) { continue; }
+
+		id<MTLArgumentEncoder> mtlArgEncoder = nil;
+		id<MTLBuffer> mtlArgBuffer = nil;
+		NSUInteger metalArgBufferOffset = 0;
+
+		auto* dsLayout = descSet->getLayout();
+		if (dsLayout->isUsingDescriptorSetMetalArgumentBuffers()) {
+			mtlArgEncoder = dsLayout->getMTLArgumentEncoder().getMTLArgumentEncoder();
+			mtlArgBuffer = descSet->getMetalArgumentBuffer();
+			metalArgBufferOffset = descSet->getMetalArgumentBufferOffset();
+		} else {
+			mtlArgEncoder = pipeline->getMTLArgumentEncoder(dsIdx, stage).getMTLArgumentEncoder();
+			// TODO: Source a different arg buffer & offset for each pipeline-stage/desccriptors set
+			// Also need to only encode the descriptors that are referenced in the shader.
+			// MVKMTLArgumentEncoder could include an MVKBitArray to track that and have it checked below.
+		}
+
+		if ( !(mtlArgEncoder && mtlArgBuffer) ) { continue; }
+
+		auto& argBuffDirtyDescs = descSet->getMetalArgumentBufferDirtyDescriptors();
+		auto& resourceUsageDirtyDescs = _metalUsageDirtyDescriptors[dsIdx];
+		auto& shaderBindingUsage = pipeline->getDescriptorBindingUse(dsIdx, stage);
+
+		bool mtlArgEncAttached = false;
+		bool shouldBindArgBuffToStage = false;
+		uint32_t dslBindCnt = dsLayout->getBindingCount();
+		for (uint32_t dslBindIdx = 0; dslBindIdx < dslBindCnt; dslBindIdx++) {
+			auto* dslBind = dsLayout->getBindingAt(dslBindIdx);
+			if (dslBind->getApplyToStage(stage) && shaderBindingUsage.getBit(dslBindIdx)) {
+				shouldBindArgBuffToStage = true;
+				uint32_t elemCnt = dslBind->getDescriptorCount(descSet);
+				for (uint32_t elemIdx = 0; elemIdx < elemCnt; elemIdx++) {
+					uint32_t descIdx = dslBind->getDescriptorIndex(elemIdx);
+					bool argBuffDirty = argBuffDirtyDescs.getBit(descIdx, true);
+					bool resourceUsageDirty = resourceUsageDirtyDescs.getBit(descIdx, true);
+					if (argBuffDirty || resourceUsageDirty) {
+						// Don't attach the arg buffer to the arg encoder unless something actually needs
+						// to be written to it. We often might only be updating command encoder resource usage.
+						if (!mtlArgEncAttached && argBuffDirty) {
+							[mtlArgEncoder setArgumentBuffer: mtlArgBuffer offset: metalArgBufferOffset];
+							mtlArgEncAttached = true;
+						}
+						auto* mvkDesc = descSet->getDescriptorAt(descIdx);
+						mvkDesc->encodeToMetalArgumentBuffer(this, mtlArgEncoder,
+															 dsIdx, dslBind, elemIdx,
+															 stage, argBuffDirty, true);
+					}
+				}
+			}
+		}
+
+		// If the arg buffer was attached to the arg encoder, detach it now.
+		if (mtlArgEncAttached) { [mtlArgEncoder setArgumentBuffer: nil offset: 0]; }
+
+		// If it is needed, bind the Metal argument buffer itself to the command encoder,
+		if (shouldBindArgBuffToStage) {
+			MVKMTLBufferBinding bb;
+			bb.mtlBuffer = descSet->getMetalArgumentBuffer();
+			bb.offset = descSet->getMetalArgumentBufferOffset();
+			bb.index = dsIdx;
+			bindMetalArgumentBuffer(stage, bb);
+		}
+
+		// For some unexpected reason, GPU capture on Xcode 12 doesn't always correctly expose
+		// the contents of Metal argument buffers. Triggering an extraction of the arg buffer
+		// contents here, after filling it, seems to correct that.
+		// Sigh. A bug report has been filed with Apple.
+		if (getDevice()->isCurrentlyAutoGPUCapturing()) { [descSet->getMetalArgumentBuffer() contents]; }
+	}
+}
+
+// Mark the resource usage as needing an update for each Metal render encoder.
+void MVKResourcesCommandEncoderState::markDirty() {
+	MVKCommandEncoderState::markDirty();
+	if (_cmdEncoder->isUsingMetalArgumentBuffers()) {
+		for (uint32_t dsIdx = 0; dsIdx < kMVKMaxDescriptorSetCount; dsIdx++) {
+			_metalUsageDirtyDescriptors[dsIdx].setAllBits();
+		}
+	}
+}
+
 // If a swizzle is needed for this stage, iterates all the bindings and logs errors for those that need texture swizzling.
 void MVKResourcesCommandEncoderState::assertMissingSwizzles(bool needsSwizzle, const char* stageName, const MVKArrayRef<MVKMTLTextureBinding>& texBindings) {
 	if (needsSwizzle) {
@@ -513,6 +639,20 @@
     _shaderStageResourceBindings[kMVKShaderStageFragment].bufferSizeBufferBinding.isDirty = needFragmentSizeBuffer;
 }
 
+void MVKGraphicsResourcesCommandEncoderState::bindDynamicOffsetBuffer(const MVKShaderImplicitRezBinding& binding,
+																	  bool needVertexDynamicOffsetBuffer,
+																	  bool needTessCtlDynamicOffsetBuffer,
+																	  bool needTessEvalDynamicOffsetBuffer,
+																	  bool needFragmentDynamicOffsetBuffer) {
+	for (uint32_t i = kMVKShaderStageVertex; i <= kMVKShaderStageFragment; i++) {
+		_shaderStageResourceBindings[i].dynamicOffsetBufferBinding.index = binding.stages[i];
+	}
+	_shaderStageResourceBindings[kMVKShaderStageVertex].dynamicOffsetBufferBinding.isDirty = needVertexDynamicOffsetBuffer;
+	_shaderStageResourceBindings[kMVKShaderStageTessCtl].dynamicOffsetBufferBinding.isDirty = needTessCtlDynamicOffsetBuffer;
+	_shaderStageResourceBindings[kMVKShaderStageTessEval].dynamicOffsetBufferBinding.isDirty = needTessEvalDynamicOffsetBuffer;
+	_shaderStageResourceBindings[kMVKShaderStageFragment].dynamicOffsetBufferBinding.isDirty = needFragmentDynamicOffsetBuffer;
+}
+
 void MVKGraphicsResourcesCommandEncoderState::bindViewRangeBuffer(const MVKShaderImplicitRezBinding& binding,
 																  bool needVertexViewBuffer,
 																  bool needFragmentViewBuffer) {
@@ -532,6 +672,9 @@
                                                              std::function<void(MVKCommandEncoder*, MVKMTLBufferBinding&, const MVKArrayRef<uint32_t>&)> bindImplicitBuffer,
                                                              std::function<void(MVKCommandEncoder*, MVKMTLTextureBinding&)> bindTexture,
                                                              std::function<void(MVKCommandEncoder*, MVKMTLSamplerStateBinding&)> bindSampler) {
+
+	encodeMetalArgumentBuffer(stage);
+
     auto& shaderStage = _shaderStageResourceBindings[stage];
     encodeBinding<MVKMTLBufferBinding>(shaderStage.bufferBindings, shaderStage.areBufferBindingsDirty, bindBuffer);
 
@@ -555,6 +698,10 @@
         bindImplicitBuffer(_cmdEncoder, shaderStage.bufferSizeBufferBinding, shaderStage.bufferSizes.contents());
     }
 
+	if (shaderStage.dynamicOffsetBufferBinding.isDirty) {
+		bindImplicitBuffer(_cmdEncoder, shaderStage.dynamicOffsetBufferBinding, _dynamicOffsets.contents());
+	}
+
     if (shaderStage.viewRangeBufferBinding.isDirty) {
         MVKSmallVector<uint32_t, 2> viewRange;
         viewRange.push_back(_cmdEncoder->getSubpass()->getFirstViewIndexInMetalPass(_cmdEncoder->getMultiviewPassIndex()));
@@ -592,7 +739,7 @@
 
 // Mark everything as dirty
 void MVKGraphicsResourcesCommandEncoderState::markDirty() {
-    MVKCommandEncoderState::markDirty();
+	MVKResourcesCommandEncoderState::markDirty();
     for (uint32_t i = kMVKShaderStageVertex; i <= kMVKShaderStageFragment; i++) {
         MVKResourcesCommandEncoderState::markDirty(_shaderStageResourceBindings[i].bufferBindings, _shaderStageResourceBindings[i].areBufferBindingsDirty);
         MVKResourcesCommandEncoderState::markDirty(_shaderStageResourceBindings[i].textureBindings, _shaderStageResourceBindings[i].areTextureBindingsDirty);
@@ -603,7 +750,7 @@
 void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
 
     MVKGraphicsPipeline* pipeline = (MVKGraphicsPipeline*)_cmdEncoder->_graphicsPipelineState.getPipeline();
-    bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || _cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle;
+    bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || getDevice()->_pMetalFeatures->nativeTextureSwizzle;
     bool forTessellation = pipeline->isTessellationPipeline();
 
 	if (stage == kMVKGraphicsStageVertex) {
@@ -765,6 +912,33 @@
     }
 }
 
+MVKPipeline* MVKGraphicsResourcesCommandEncoderState::getPipeline() {
+	return _cmdEncoder->_graphicsPipelineState.getPipeline();
+}
+
+void MVKGraphicsResourcesCommandEncoderState::bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) {
+	bindBuffer(stage, buffBind);
+}
+
+void MVKGraphicsResourcesCommandEncoderState::encodeArgumentBufferResourceUsage(MVKShaderStage stage,
+																				id<MTLResource> mtlResource,
+																				MTLResourceUsage mtlUsage,
+																				MTLRenderStages mtlStages) {
+	if (mtlResource && mtlStages) {
+		if (stage == kMVKShaderStageTessCtl) {
+			auto* mtlCompEnc = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
+			[mtlCompEnc useResource: mtlResource usage: mtlUsage];
+		} else {
+			auto* mtlRendEnc = _cmdEncoder->_mtlRenderEncoder;
+			if ([mtlRendEnc respondsToSelector: @selector(useResource:usage:stages:)]) {
+				[mtlRendEnc useResource: mtlResource usage: mtlUsage stages: mtlStages];
+			} else {
+				[mtlRendEnc useResource: mtlResource usage: mtlUsage];
+			}
+		}
+	}
+}
+
 
 #pragma mark -
 #pragma mark MVKComputeResourcesCommandEncoderState
@@ -793,9 +967,15 @@
     _resourceBindings.bufferSizeBufferBinding.isDirty = needBufferSizeBuffer;
 }
 
+void MVKComputeResourcesCommandEncoderState::bindDynamicOffsetBuffer(const MVKShaderImplicitRezBinding& binding,
+																	 bool needDynamicOffsetBuffer) {
+	_resourceBindings.dynamicOffsetBufferBinding.index = binding.stages[kMVKShaderStageCompute];
+	_resourceBindings.dynamicOffsetBufferBinding.isDirty = needDynamicOffsetBuffer;
+}
+
 // Mark everything as dirty
 void MVKComputeResourcesCommandEncoderState::markDirty() {
-    MVKCommandEncoderState::markDirty();
+    MVKResourcesCommandEncoderState::markDirty();
     MVKResourcesCommandEncoderState::markDirty(_resourceBindings.bufferBindings, _resourceBindings.areBufferBindingsDirty);
     MVKResourcesCommandEncoderState::markDirty(_resourceBindings.textureBindings, _resourceBindings.areTextureBindingsDirty);
     MVKResourcesCommandEncoderState::markDirty(_resourceBindings.samplerStateBindings, _resourceBindings.areSamplerStateBindingsDirty);
@@ -803,6 +983,8 @@
 
 void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) {
 
+	encodeMetalArgumentBuffer(kMVKShaderStageCompute);
+
     MVKPipeline* pipeline = _cmdEncoder->_computePipelineState.getPipeline();
 	bool fullImageViewSwizzle = pipeline ? pipeline->fullImageViewSwizzle() : false;
 
@@ -832,6 +1014,14 @@
 
     }
 
+	if (_resourceBindings.dynamicOffsetBufferBinding.isDirty) {
+		_cmdEncoder->setComputeBytes(_cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch),
+									 _dynamicOffsets.data(),
+									 _dynamicOffsets.size() * sizeof(uint32_t),
+									 _resourceBindings.dynamicOffsetBufferBinding.index);
+
+	}
+
 	encodeBinding<MVKMTLBufferBinding>(_resourceBindings.bufferBindings, _resourceBindings.areBufferBindingsDirty,
 									   [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
 		if (b.isInline) {
@@ -859,6 +1049,24 @@
                                              });
 }
 
+MVKPipeline* MVKComputeResourcesCommandEncoderState::getPipeline() {
+	return _cmdEncoder->_computePipelineState.getPipeline();
+}
+
+void MVKComputeResourcesCommandEncoderState::bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) {
+	bindBuffer(buffBind);
+}
+
+void MVKComputeResourcesCommandEncoderState::encodeArgumentBufferResourceUsage(MVKShaderStage stage,
+																			   id<MTLResource> mtlResource,
+																			   MTLResourceUsage mtlUsage,
+																			   MTLRenderStages mtlStages) {
+	if (mtlResource) {
+		auto* mtlCompEnc = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch);
+		[mtlCompEnc useResource: mtlResource usage: mtlUsage];
+	}
+}
+
 
 #pragma mark -
 #pragma mark MVKOcclusionQueryCommandEncoderState
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
index 21f1808..f862b61 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
@@ -20,22 +20,27 @@
 
 #include "MVKImage.h"
 #include "MVKSmallVector.h"
+#include "MVKMTLBufferAllocation.h"
 
 class MVKDescriptorSet;
 class MVKDescriptorSetLayout;
 class MVKCommandEncoder;
+class MVKResourcesCommandEncoderState;
 
 
 #pragma mark MVKShaderStageResourceBinding
 
 /** Indicates the Metal resource indexes used by a single shader stage in a descriptor. */
 typedef struct MVKShaderStageResourceBinding {
-	uint16_t bufferIndex = 0;
-	uint16_t textureIndex = 0;
-	uint16_t samplerIndex = 0;
+	uint32_t bufferIndex = 0;
+	uint32_t textureIndex = 0;
+	uint32_t samplerIndex = 0;
+	uint32_t resourceIndex = 0;
+	uint32_t dynamicOffsetBufferIndex = 0;
 
 	MVKShaderStageResourceBinding operator+ (const MVKShaderStageResourceBinding& rhs);
 	MVKShaderStageResourceBinding& operator+= (const MVKShaderStageResourceBinding& rhs);
+	void clearArgumentBufferResources();
 
 } MVKShaderStageResourceBinding;
 
@@ -46,15 +51,32 @@
 typedef struct MVKShaderResourceBinding {
 	MVKShaderStageResourceBinding stages[kMVKShaderStageMax];
 
+	uint16_t getMaxResourceIndex();
 	uint16_t getMaxBufferIndex();
 	uint16_t getMaxTextureIndex();
 	uint16_t getMaxSamplerIndex();
 
 	MVKShaderResourceBinding operator+ (const MVKShaderResourceBinding& rhs);
 	MVKShaderResourceBinding& operator+= (const MVKShaderResourceBinding& rhs);
+	MVKShaderStageResourceBinding& getMetalResourceIndexes(MVKShaderStage stage = kMVKShaderStageVertex) { return stages[stage]; }
+	void clearArgumentBufferResources();
+	void addArgumentBuffers(uint32_t count);
 
 } MVKShaderResourceBinding;
 
+/**
+ * If the shader stage binding has a binding defined for the specified stage, populates
+ * the context at the descriptor set binding from the shader stage resource binding.
+ */
+void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
+									   MVKShaderStageResourceBinding& ssRB,
+									   MVKShaderStage stage,
+									   uint32_t descriptorSetIndex,
+									   uint32_t bindingIndex,
+									   uint32_t count,
+									   VkDescriptorType descType,
+									   MVKSampler* immutableSampler);
+
 
 #pragma mark -
 #pragma mark MVKDescriptorSetLayoutBinding
@@ -71,7 +93,7 @@
 	inline uint32_t getBinding() { return _info.binding; }
 
 	/** Returns whether this binding has a variable descriptor count. */
-	inline bool hasVariableDescriptorCount() {
+	inline bool hasVariableDescriptorCount() const {
 		return mvkIsAnyFlagEnabled(_flags, VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT);
 	}
 
@@ -83,11 +105,14 @@
 	 * count provided to that descriptor set is returned. Otherwise returns the value
 	 * defined in VkDescriptorSetLayoutBinding::descriptorCount.
 	 */
-	uint32_t getDescriptorCount(MVKDescriptorSet* descSet);
+	uint32_t getDescriptorCount(MVKDescriptorSet* descSet = nullptr) const;
 
 	/** Returns the descriptor type of this layout. */
 	inline VkDescriptorType getDescriptorType() { return _info.descriptorType; }
 
+	/** Returns whether this binding uses immutable samplers. */
+	bool usesImmutableSamplers() { return !_immutableSamplers.empty(); }
+
 	/** Returns the immutable sampler at the index, or nullptr if immutable samplers are not used. */
 	MVKSampler* getImmutableSampler(uint32_t index);
 
@@ -109,7 +134,22 @@
               MVKShaderResourceBinding& dslMTLRezIdxOffsets);
 
 	/** Returns the index of the descriptor within the descriptor set of the element at the index within this descriptor layout. */
-	inline uint32_t getDescriptorIndex(uint32_t elementIndex = 0) { return _descriptorIndex + elementIndex; }
+	uint32_t getDescriptorIndex(uint32_t elementIndex = 0) const { return _descriptorIndex + elementIndex; }
+
+	/**
+	 * Returns the indexes into the resources, relative to the descriptor set.
+	 * When using Metal argument buffers, all stages have the same values, and
+	 * in that case the stage can be withheld and a default stage will be used.
+	 */
+	MVKShaderStageResourceBinding& getMetalResourceIndexOffsets(MVKShaderStage stage = kMVKShaderStageVertex) {
+		return _mtlResourceIndexOffsets.getMetalResourceIndexes(stage);
+	}
+
+	/** Returns a bitwise OR of Metal render stages. */
+	MTLRenderStages getMTLRenderStages();
+
+	/** Returns whether this binding should be applied to the shader stage. */
+	bool getApplyToStage(MVKShaderStage stage) { return _applyToStage[stage]; }
 
 	MVKDescriptorSetLayoutBinding(MVKDevice* device,
 								  MVKDescriptorSetLayout* layout,
@@ -125,9 +165,13 @@
 	friend class MVKDescriptorSetLayout;
     friend class MVKInlineUniformBlockDescriptor;
 	
-	void initMetalResourceIndexOffsets(MVKShaderStageResourceBinding* pBindingIndexes,
-									   MVKShaderStageResourceBinding* pDescSetCounts,
-									   const VkDescriptorSetLayoutBinding* pBinding);
+	void initMetalResourceIndexOffsets(const VkDescriptorSetLayoutBinding* pBinding, uint32_t stage);
+	void addMTLArgumentDescriptors(NSMutableArray<MTLArgumentDescriptor*>* args);
+	void addMTLArgumentDescriptor(NSMutableArray<MTLArgumentDescriptor*>* args,
+								  uint32_t argIndex,
+								  MTLDataType dataType,
+								  MTLArgumentAccess access);
+	bool isUsingMetalArgumentBuffer();
 	void populateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
 										MVKShaderResourceBinding& dslMTLRezIdxOffsets,
 										uint32_t dslIndex);
@@ -156,6 +200,9 @@
 
 	virtual VkDescriptorType getDescriptorType() = 0;
 
+	/** Returns whether this descriptor type uses dynamic buffer offsets. */
+	virtual bool usesDynamicBufferOffsets() { return false; }
+
 	/** Encodes this descriptor (based on its layout binding index) on the the command encoder. */
 	virtual void bind(MVKCommandEncoder* cmdEncoder,
 					  MVKDescriptorSetLayoutBinding* mvkDSLBind,
@@ -165,6 +212,16 @@
 					  MVKArrayRef<uint32_t> dynamicOffsets,
 					  uint32_t& dynamicOffsetIndex) = 0;
 
+	/** Encodes this descriptor to the Metal argument buffer. */
+	virtual void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+											 id<MTLArgumentEncoder> mtlArgEncoder,
+											 uint32_t descSetIndex,
+											 MVKDescriptorSetLayoutBinding* mvkDSLBind,
+											 uint32_t elementIndex,
+											 MVKShaderStage stage,
+											 bool encodeToArgBuffer,
+											 bool encodeUsage) = 0;
+
 	/**
 	 * Updates the internal binding from the specified content. The format of the content depends
 	 * on the descriptor type, and is extracted from pData at the location given by index * stride.
@@ -200,6 +257,9 @@
 
 	~MVKDescriptor() { reset(); }
 
+protected:
+	MTLResourceUsage getMTLResourceUsage();
+
 };
 
 
@@ -218,6 +278,15 @@
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex) override;
 
+	void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+									 id<MTLArgumentEncoder> mtlArgEncoder,
+									 uint32_t descSetIndex,
+									 MVKDescriptorSetLayoutBinding* mvkDSLBind,
+									 uint32_t elementIndex,
+									 MVKShaderStage stage,
+									 bool encodeToArgBuffer,
+									 bool encodeUsage) override;
+
 	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			   MVKDescriptorSet* mvkDescSet,
 			   uint32_t srcIndex,
@@ -258,6 +327,7 @@
 class MVKUniformBufferDynamicDescriptor : public MVKBufferDescriptor {
 public:
 	VkDescriptorType getDescriptorType() override { return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; }
+	bool usesDynamicBufferOffsets() override { return true; }
 };
 
 
@@ -276,6 +346,7 @@
 class MVKStorageBufferDynamicDescriptor : public MVKBufferDescriptor {
 public:
 	VkDescriptorType getDescriptorType() override { return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; }
+	bool usesDynamicBufferOffsets() override { return true; }
 };
 
 
@@ -296,6 +367,15 @@
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex) override;
 
+	void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+									 id<MTLArgumentEncoder> mtlArgEncoder,
+									 uint32_t descSetIndex,
+									 MVKDescriptorSetLayoutBinding* mvkDSLBind,
+									 uint32_t elementIndex,
+									 MVKShaderStage stage,
+									 bool encodeToArgBuffer,
+									 bool encodeUsage) override;
+
 	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			   MVKDescriptorSet* mvkDescSet,
 			   uint32_t dstOffset, // For inline buffers we are using this parameter as dst offset not as src descIdx
@@ -315,7 +395,9 @@
 	~MVKInlineUniformBlockDescriptor() { reset(); }
 
 protected:
-	uint8_t* _buffer = nullptr;
+	inline uint8_t* getData() { return _mvkMTLBufferAllocation ? (uint8_t*)_mvkMTLBufferAllocation->getContents() : nullptr; }
+
+	const MVKMTLBufferAllocation* _mvkMTLBufferAllocation = nullptr;
 };
 
 
@@ -334,6 +416,15 @@
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex) override;
 
+	void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+									 id<MTLArgumentEncoder> mtlArgEncoder,
+									 uint32_t descSetIndex,
+									 MVKDescriptorSetLayoutBinding* mvkDSLBind,
+									 uint32_t elementIndex,
+									 MVKShaderStage stage,
+									 bool encodeToArgBuffer,
+									 bool encodeUsage) override;
+
 	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			   MVKDescriptorSet* mvkDescSet,
 			   uint32_t srcIndex,
@@ -405,6 +496,14 @@
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex);
 
+	void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+									 id<MTLArgumentEncoder> mtlArgEncoder,
+									 uint32_t descSetIndex,
+									 MVKDescriptorSetLayoutBinding* mvkDSLBind,
+									 uint32_t elementIndex,
+									 MVKShaderStage stage,
+									 bool encodeToArgBuffer);
+
 	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			   MVKDescriptorSet* mvkDescSet,
 			   uint32_t srcIndex,
@@ -444,6 +543,15 @@
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex) override;
 
+	void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+									 id<MTLArgumentEncoder> mtlArgEncoder,
+									 uint32_t descSetIndex,
+									 MVKDescriptorSetLayoutBinding* mvkDSLBind,
+									 uint32_t elementIndex,
+									 MVKShaderStage stage,
+									 bool encodeToArgBuffer,
+									 bool encodeUsage) override;
+
 	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			   MVKDescriptorSet* mvkDescSet,
 			   uint32_t srcIndex,
@@ -482,6 +590,15 @@
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex) override;
 
+	void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+									 id<MTLArgumentEncoder> mtlArgEncoder,
+									 uint32_t descSetIndex,
+									 MVKDescriptorSetLayoutBinding* mvkDSLBind,
+									 uint32_t elementIndex,
+									 MVKShaderStage stage,
+									 bool encodeToArgBuffer,
+									 bool encodeUsage) override;
+
 	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			   MVKDescriptorSet* mvkDescSet,
 			   uint32_t srcIndex,
@@ -518,6 +635,15 @@
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex) override;
 
+	void encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+									 id<MTLArgumentEncoder> mtlArgEncoder,
+									 uint32_t descSetIndex,
+									 MVKDescriptorSetLayoutBinding* mvkDSLBind,
+									 uint32_t elementIndex,
+									 MVKShaderStage stage,
+									 bool encodeToArgBuffer,
+									 bool encodeUsage) override;
+
 	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			   MVKDescriptorSet* mvkDescSet,
 			   uint32_t srcIndex,
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
index 725752d..1f50b62 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
@@ -28,6 +28,8 @@
 	rslt.bufferIndex = this->bufferIndex + rhs.bufferIndex;
 	rslt.textureIndex = this->textureIndex + rhs.textureIndex;
 	rslt.samplerIndex = this->samplerIndex + rhs.samplerIndex;
+	rslt.resourceIndex = this->resourceIndex + rhs.resourceIndex;
+	rslt.dynamicOffsetBufferIndex = this->dynamicOffsetBufferIndex + rhs.dynamicOffsetBufferIndex;
 	return rslt;
 }
 
@@ -35,12 +37,24 @@
 	this->bufferIndex += rhs.bufferIndex;
 	this->textureIndex += rhs.textureIndex;
 	this->samplerIndex += rhs.samplerIndex;
+	this->resourceIndex += rhs.resourceIndex;
+	this->dynamicOffsetBufferIndex += rhs.dynamicOffsetBufferIndex;
 	return *this;
 }
 
+void MVKShaderStageResourceBinding::clearArgumentBufferResources() {
+	bufferIndex = 0;
+	textureIndex = 0;
+	samplerIndex = 0;
+}
+
 
 #pragma mark MVKShaderResourceBinding
 
+uint16_t MVKShaderResourceBinding::getMaxResourceIndex() {
+	return std::max({stages[kMVKShaderStageVertex].resourceIndex, stages[kMVKShaderStageTessCtl].resourceIndex, stages[kMVKShaderStageTessEval].resourceIndex, stages[kMVKShaderStageFragment].resourceIndex, stages[kMVKShaderStageCompute].resourceIndex});
+}
+
 uint16_t MVKShaderResourceBinding::getMaxBufferIndex() {
 	return std::max({stages[kMVKShaderStageVertex].bufferIndex, stages[kMVKShaderStageTessCtl].bufferIndex, stages[kMVKShaderStageTessEval].bufferIndex, stages[kMVKShaderStageFragment].bufferIndex, stages[kMVKShaderStageCompute].bufferIndex});
 }
@@ -68,13 +82,105 @@
 	return *this;
 }
 
+void MVKShaderResourceBinding::clearArgumentBufferResources() {
+	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+		stages[i].clearArgumentBufferResources();
+	}
+}
+
+void MVKShaderResourceBinding::addArgumentBuffers(uint32_t count) {
+	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+		stages[i].bufferIndex += count;
+		stages[i].resourceIndex += count;
+	}
+}
+
+void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
+									   MVKShaderStageResourceBinding& ssRB,
+									   MVKShaderStage stage,
+									   uint32_t descriptorSetIndex,
+									   uint32_t bindingIndex,
+									   uint32_t count,
+									   VkDescriptorType descType,
+									   MVKSampler* immutableSampler) {
+
+#define addResourceBinding(spvRezType)												\
+	do {																			\
+		mvk::MSLResourceBinding rb;													\
+		auto& rbb = rb.resourceBinding;												\
+		rbb.stage = spvExecModels[stage];											\
+		rbb.basetype = SPIRV_CROSS_NAMESPACE_OVERRIDE::SPIRType::spvRezType;		\
+		rbb.desc_set = descriptorSetIndex;											\
+		rbb.binding = bindingIndex;													\
+		rbb.count = count;															\
+		rbb.msl_buffer = ssRB.bufferIndex;											\
+		rbb.msl_texture = ssRB.textureIndex;										\
+		rbb.msl_sampler = ssRB.samplerIndex;										\
+		if (immutableSampler) { immutableSampler->getConstexprSampler(rb); }		\
+		context.resourceBindings.push_back(rb);										\
+	} while(false)
+
+	static const spv::ExecutionModel spvExecModels[] = {
+		spv::ExecutionModelVertex,
+		spv::ExecutionModelTessellationControl,
+		spv::ExecutionModelTessellationEvaluation,
+		spv::ExecutionModelFragment,
+		spv::ExecutionModelGLCompute
+	};
+
+	switch (descType) {
+		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+		case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+			addResourceBinding(Void);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+			addResourceBinding(Float);
+
+			mvk::DescriptorBinding db;
+			db.stage = spvExecModels[stage];
+			db.descriptorSet = descriptorSetIndex;
+			db.binding = bindingIndex;
+			db.index = ssRB.dynamicOffsetBufferIndex;
+			context.dynamicBufferDescriptors.push_back(db);
+			break;
+		}
+
+		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+			addResourceBinding(Image);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+			addResourceBinding(Image);
+			addResourceBinding(Void);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_SAMPLER:
+			addResourceBinding(Sampler);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+			addResourceBinding(SampledImage);
+			break;
+
+		default:
+			addResourceBinding(Unknown);
+			break;
+	}
+}
+
 
 #pragma mark -
 #pragma mark MVKDescriptorSetLayoutBinding
 
 MVKVulkanAPIObject* MVKDescriptorSetLayoutBinding::getVulkanAPIObject() { return _layout; };
 
-uint32_t MVKDescriptorSetLayoutBinding::getDescriptorCount(MVKDescriptorSet* descSet) {
+uint32_t MVKDescriptorSetLayoutBinding::getDescriptorCount(MVKDescriptorSet* descSet) const {
 
 	if (_info.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
 		return 1;
@@ -335,13 +441,71 @@
     }
 }
 
-// If depth compare is required, but unavailable on the device, the sampler can only be used as an immutable sampler
-bool MVKDescriptorSetLayoutBinding::validate(MVKSampler* mvkSampler) {
-	if (mvkSampler->getRequiresConstExprSampler()) {
-		mvkSampler->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdPushDescriptorSet/vkCmdPushDescriptorSetWithTemplate(): Tried to push an immutable sampler.");
-		return false;
+bool MVKDescriptorSetLayoutBinding::isUsingMetalArgumentBuffer() { return _layout->isUsingMetalArgumentBuffer(); };
+
+// Adds MTLArgumentDescriptors to the array, and updates resource indexes consumed.
+void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(NSMutableArray<MTLArgumentDescriptor*>* args) {
+	switch (getDescriptorType()) {
+
+		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+		case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+			addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadOnly);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+			addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+			addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadOnly);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+			addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite);
+			addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite);		// Needed for atomic operations
+			break;
+
+		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+			addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadOnly);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+			addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadWrite);
+			addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().bufferIndex, MTLDataTypePointer, MTLArgumentAccessReadWrite);		// Needed for atomic operations
+			break;
+
+		case VK_DESCRIPTOR_TYPE_SAMPLER:
+			addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().samplerIndex, MTLDataTypeSampler, MTLArgumentAccessReadOnly);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+			addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().textureIndex, MTLDataTypeTexture, MTLArgumentAccessReadOnly);
+			addMTLArgumentDescriptor(args, getMetalResourceIndexOffsets().samplerIndex, MTLDataTypeSampler, MTLArgumentAccessReadOnly);
+			break;
+
+		default:
+			break;
 	}
-	return true;
+}
+
+void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptor(NSMutableArray<MTLArgumentDescriptor*>* args,
+															 uint32_t argIndex,
+															 MTLDataType dataType,
+															 MTLArgumentAccess access) {
+	uint32_t descCnt = getDescriptorCount();
+	if (descCnt == 0) { return; }
+	
+	auto* argDesc = [MTLArgumentDescriptor argumentDescriptor];
+	argDesc.dataType = dataType;
+	argDesc.access = access;
+	argDesc.index = argIndex;
+	argDesc.arrayLength = descCnt;
+	argDesc.textureType = MTLTextureType2D;
+
+	[args addObject: argDesc];
 }
 
 void MVKDescriptorSetLayoutBinding::populateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
@@ -353,26 +517,54 @@
     // Establish the resource indices to use, by combining the offsets of the DSL and this DSL binding.
     MVKShaderResourceBinding mtlIdxs = _mtlResourceIndexOffsets + dslMTLRezIdxOffsets;
 
-    static const spv::ExecutionModel models[] = {
-        spv::ExecutionModelVertex,
-        spv::ExecutionModelTessellationControl,
-        spv::ExecutionModelTessellationEvaluation,
-        spv::ExecutionModelFragment,
-        spv::ExecutionModelGLCompute
-    };
-    for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
-        if (_applyToStage[i]) {
+	uint32_t descCnt = getDescriptorCount();
+	bool isUsingMtlArgBuff = isUsingMetalArgumentBuffer();
+	for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageMax; stage++) {
+        if ((_applyToStage[stage] || isUsingMtlArgBuff) && descCnt > 0) {
             mvkPopulateShaderConverterContext(context,
-                                              mtlIdxs.stages[i],
-                                              models[i],
+                                              mtlIdxs.stages[stage],
+                                              MVKShaderStage(stage),
                                               dslIndex,
                                               _info.binding,
-											  getDescriptorCount(nullptr),
+											  descCnt,
+											  getDescriptorType(),
 											  mvkSamp);
         }
     }
 }
 
+// If depth compare is required, but unavailable on the device, the sampler can only be used as an immutable sampler
+bool MVKDescriptorSetLayoutBinding::validate(MVKSampler* mvkSampler) {
+	if (mvkSampler->getRequiresConstExprSampler()) {
+		mvkSampler->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdPushDescriptorSet/vkCmdPushDescriptorSetWithTemplate(): Tried to push an immutable sampler.");
+		return false;
+	}
+	return true;
+}
+
+MTLRenderStages MVKDescriptorSetLayoutBinding::getMTLRenderStages() {
+	MTLRenderStages mtlStages = 0;
+	for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageMax; stage++) {
+		if (_applyToStage[stage]) {
+			switch (stage) {
+				case kMVKShaderStageVertex:
+				case kMVKShaderStageTessCtl:
+				case kMVKShaderStageTessEval:
+					mtlStages |= MTLRenderStageVertex;
+					break;
+
+				case kMVKShaderStageFragment:
+					mtlStages |= MTLRenderStageFragment;
+					break;
+
+				default:
+					break;
+			}
+		}
+	}
+	return mtlStages;
+}
+
 MVKDescriptorSetLayoutBinding::MVKDescriptorSetLayoutBinding(MVKDevice* device,
 															 MVKDescriptorSetLayout* layout,
 															 const VkDescriptorSetLayoutBinding* pBinding,
@@ -386,27 +578,23 @@
 
 	_info.pImmutableSamplers = nullptr;     // Remove dangling pointer
 
-	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
-        // Determine if this binding is used by this shader stage
-        _applyToStage[i] = mvkAreAllFlagsEnabled(pBinding->stageFlags, mvkVkShaderStageFlagBitsFromMVKShaderStage(MVKShaderStage(i)));
-	    // If this binding is used by the shader, set the Metal resource index
-        if (_applyToStage[i]) {
-            initMetalResourceIndexOffsets(&_mtlResourceIndexOffsets.stages[i],
-                                          &layout->_mtlResourceCounts.stages[i], pBinding);
-        }
-    }
+	// Determine if this binding is used by this shader stage, and initialize resource indexes.
+	for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageMax; stage++) {
+		_applyToStage[stage] = mvkAreAllFlagsEnabled(pBinding->stageFlags, mvkVkShaderStageFlagBitsFromMVKShaderStage(MVKShaderStage(stage)));
+		initMetalResourceIndexOffsets(pBinding, stage);
+	}
 
-    // If immutable samplers are defined, copy them in
-    if ( pBinding->pImmutableSamplers &&
-        (pBinding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER ||
-         pBinding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ) {
-            _immutableSamplers.reserve(pBinding->descriptorCount);
-            for (uint32_t i = 0; i < pBinding->descriptorCount; i++) {
-                _immutableSamplers.push_back((MVKSampler*)pBinding->pImmutableSamplers[i]);
-                _immutableSamplers.back()->retain();
-            }
-        }
+	// If immutable samplers are defined, copy them in
+	if ( pBinding->pImmutableSamplers &&
+		(pBinding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER ||
+		 pBinding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ) {
 
+		_immutableSamplers.reserve(pBinding->descriptorCount);
+		for (uint32_t i = 0; i < pBinding->descriptorCount; i++) {
+			_immutableSamplers.push_back((MVKSampler*)pBinding->pImmutableSamplers[i]);
+			_immutableSamplers.back()->retain();
+		}
+	}
 }
 
 MVKDescriptorSetLayoutBinding::MVKDescriptorSetLayoutBinding(const MVKDescriptorSetLayoutBinding& binding) :
@@ -434,13 +622,31 @@
 
 // Sets the appropriate Metal resource indexes within this binding from the
 // specified descriptor set binding counts, and updates those counts accordingly.
-void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(MVKShaderStageResourceBinding* pBindingIndexes,
-																  MVKShaderStageResourceBinding* pDescSetCounts,
-																  const VkDescriptorSetLayoutBinding* pBinding) {
+void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(const VkDescriptorSetLayoutBinding* pBinding, uint32_t stage) {
+
+	// Sets an index offset and updates both that index and the general resource index.
+	// Can be used multiply for combined multi-resource descriptor types.
+	// When using Metal argument buffers, we accumulate the resource indexes cummulatively,
+	// across all resource types, and do not increase the individual resources counts
+	// consumed by the descriptor set layout.
+#define setResourceIndexOffset(rezIdx)														\
+	do {																					\
+		bool isUsingMtlArgBuff = isUsingMetalArgumentBuffer();								\
+		if (_applyToStage[stage] || isUsingMtlArgBuff) {									\
+			bindIdxs.rezIdx = isUsingMtlArgBuff ? dslCnts.resourceIndex : dslCnts.rezIdx;	\
+			dslCnts.rezIdx += isUsingMtlArgBuff ? 0 : descCnt;								\
+			bindIdxs.resourceIndex = dslCnts.resourceIndex;									\
+			dslCnts.resourceIndex += descCnt;												\
+		}																					\
+	} while(false)
+
+	MVKShaderStageResourceBinding& bindIdxs = _mtlResourceIndexOffsets.stages[stage];
+	MVKShaderStageResourceBinding& dslCnts = _layout->_mtlResourceCounts.stages[stage];
+
+	uint32_t descCnt = pBinding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT ? 1 : pBinding->descriptorCount;
     switch (pBinding->descriptorType) {
         case VK_DESCRIPTOR_TYPE_SAMPLER:
-            pBindingIndexes->samplerIndex = pDescSetCounts->samplerIndex;
-            pDescSetCounts->samplerIndex += pBinding->descriptorCount;
+			setResourceIndexOffset(samplerIndex);
 
 			if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfSamplers) {
 				_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of samplers.", _device->getName()));
@@ -448,10 +654,8 @@
             break;
 
         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-            pBindingIndexes->textureIndex = pDescSetCounts->textureIndex;
-            pDescSetCounts->textureIndex += pBinding->descriptorCount;
-            pBindingIndexes->samplerIndex = pDescSetCounts->samplerIndex;
-            pDescSetCounts->samplerIndex += pBinding->descriptorCount;
+			setResourceIndexOffset(textureIndex);
+			setResourceIndexOffset(samplerIndex);
 
 			if (pBinding->descriptorCount > 1) {
 				if ( !_device->_pMetalFeatures->arrayOfTextures ) {
@@ -462,44 +666,49 @@
 				}
 			}
 
-            if ( pBinding->pImmutableSamplers ) {
+            if (pBinding->pImmutableSamplers && _applyToStage[stage]) {
                 for (uint32_t i = 0; i < pBinding->descriptorCount; i++) {
                     uint8_t planeCount = ((MVKSampler*)pBinding->pImmutableSamplers[i])->getPlaneCount();
                     if (planeCount > 1) {
-                        pDescSetCounts->textureIndex += planeCount - 1;
+                        dslCnts.textureIndex += planeCount - 1;
                     }
                 }
             }
             break;
 
-        case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
-        case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-            pBindingIndexes->bufferIndex = pDescSetCounts->bufferIndex;
-            pDescSetCounts->bufferIndex += pBinding->descriptorCount;
-            // fallthrough
         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
         case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
         case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-            pBindingIndexes->textureIndex = pDescSetCounts->textureIndex;
-            pDescSetCounts->textureIndex += pBinding->descriptorCount;
+			setResourceIndexOffset(textureIndex);
 
 			if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) {
 				_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
 			}
             break;
 
+		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+			setResourceIndexOffset(textureIndex);
+			setResourceIndexOffset(bufferIndex);
+
+			if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) {
+				_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
+			}
+			break;
+
         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-        case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-        case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-            pBindingIndexes->bufferIndex = pDescSetCounts->bufferIndex;
-            pDescSetCounts->bufferIndex += pBinding->descriptorCount;
+		case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+			setResourceIndexOffset(bufferIndex);
             break;
 
-        case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
-            pBindingIndexes->bufferIndex = pDescSetCounts->bufferIndex;
-            pDescSetCounts->bufferIndex += 1;
-            break;
+		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+			setResourceIndexOffset(bufferIndex);
+			bindIdxs.dynamicOffsetBufferIndex = dslCnts.dynamicOffsetBufferIndex;
+			dslCnts.dynamicOffsetBufferIndex += descCnt;
+
+			break;
 
         default:
             break;
@@ -508,6 +717,31 @@
 
 
 #pragma mark -
+#pragma mark MVKDescriptor
+
+MTLResourceUsage MVKDescriptor::getMTLResourceUsage() {
+	MTLResourceUsage mtlUsage = MTLResourceUsageRead;
+	switch (getDescriptorType()) {
+		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+			mtlUsage |= MTLResourceUsageSample;
+			break;
+
+		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+			mtlUsage |= MTLResourceUsageWrite;
+			break;
+
+		default:
+			break;
+	}
+	return mtlUsage;
+}
+
+
+#pragma mark -
 #pragma mark MVKBufferDescriptor
 
 // A null cmdEncoder can be passed to perform a validation pass
@@ -519,14 +753,8 @@
 							   MVKArrayRef<uint32_t> dynamicOffsets,
 							   uint32_t& dynamicOffsetIndex) {
 	MVKMTLBufferBinding bb;
-	NSUInteger bufferDynamicOffset = 0;
-	VkDescriptorType descType = getDescriptorType();
-	if (descType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
-		descType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
-		if (dynamicOffsets.size > dynamicOffsetIndex) {
-			bufferDynamicOffset = dynamicOffsets[dynamicOffsetIndex++];
-		}
-	}
+	NSUInteger bufferDynamicOffset = (usesDynamicBufferOffsets() && dynamicOffsets.size > dynamicOffsetIndex
+									  ? dynamicOffsets[dynamicOffsetIndex++] : 0);
 	if (_mvkBuffer) {
 		bb.mtlBuffer = _mvkBuffer->getMTLBuffer();
 		bb.offset = _mvkBuffer->getMTLBufferOffset() + _buffOffset + bufferDynamicOffset;
@@ -547,6 +775,28 @@
 	}
 }
 
+void MVKBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+													  id<MTLArgumentEncoder> mtlArgEncoder,
+													  uint32_t descSetIndex,
+													  MVKDescriptorSetLayoutBinding* mvkDSLBind,
+													  uint32_t elementIndex,
+													  MVKShaderStage stage,
+													  bool encodeToArgBuffer,
+													  bool encodeUsage) {
+	if (encodeToArgBuffer) {
+		uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + elementIndex;
+		[mtlArgEncoder setBuffer: _mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil
+						  offset: _mvkBuffer ? _mvkBuffer->getMTLBufferOffset() + _buffOffset : 0
+						 atIndex: argIdx];
+	}
+	if (encodeUsage) {
+		rezEncState->encodeArgumentBufferResourceUsage(stage,
+													   _mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil,
+													   getMTLResourceUsage(),
+													   mvkDSLBind->getMTLRenderStages());
+	}
+}
+
 void MVKBufferDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 								MVKDescriptorSet* mvkDescSet,
 								uint32_t srcIndex,
@@ -597,9 +847,12 @@
 										   MVKArrayRef<uint32_t> dynamicOffsets,
 										   uint32_t& dynamicOffsetIndex) {
 	MVKMTLBufferBinding bb;
-	bb.mtlBytes = _buffer;
-	bb.size = mvkDSLBind->_info.descriptorCount;
-	bb.isInline = true;
+	if (_mvkMTLBufferAllocation) {
+		bb.mtlBuffer = _mvkMTLBufferAllocation->_mtlBuffer;
+		bb.offset = _mvkMTLBufferAllocation->_offset;
+		bb.size = mvkDSLBind->_info.descriptorCount;
+	}
+
 	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
 		if (stages[i]) {
 			bb.index = mtlIndexes.stages[i].bufferIndex;
@@ -612,6 +865,28 @@
 	}
 }
 
+void MVKInlineUniformBlockDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+																  id<MTLArgumentEncoder> mtlArgEncoder,
+																  uint32_t descSetIndex,
+																  MVKDescriptorSetLayoutBinding* mvkDSLBind,
+																  uint32_t elementIndex,
+																  MVKShaderStage stage,
+																  bool encodeToArgBuffer,
+																  bool encodeUsage) {
+	if (encodeToArgBuffer) {
+		uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex;
+		[mtlArgEncoder setBuffer: _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil
+						  offset: _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_offset : 0
+						 atIndex: argIdx];
+	}
+	if (encodeUsage) {
+		rezEncState->encodeArgumentBufferResourceUsage(stage,
+													   _mvkMTLBufferAllocation ? _mvkMTLBufferAllocation->_mtlBuffer : nil,
+													   getMTLResourceUsage(),
+													   mvkDSLBind->getMTLRenderStages());
+	}
+}
+
 void MVKInlineUniformBlockDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 											MVKDescriptorSet* mvkDescSet,
 											uint32_t dstOffset,
@@ -619,12 +894,13 @@
 											const void* pData) {
 	// Ensure there is a destination to write to
 	uint32_t buffSize = mvkDSLBind->_info.descriptorCount;
-	if ( !_buffer ) { _buffer = (uint8_t*)malloc(buffSize); }
+	if ( !_mvkMTLBufferAllocation ) { _mvkMTLBufferAllocation = mvkDescSet->acquireMTLBufferRegion(buffSize); }
 
+	uint8_t* data = getData();
 	const auto& pInlineUniformBlock = *(VkWriteDescriptorSetInlineUniformBlockEXT*)pData;
-	if (_buffer && pInlineUniformBlock.pData && dstOffset < buffSize) {
+	if (data && pInlineUniformBlock.pData && dstOffset < buffSize) {
 		uint32_t dataLen = std::min(pInlineUniformBlock.dataSize, buffSize - dstOffset);
-		memcpy(_buffer + dstOffset, pInlineUniformBlock.pData, dataLen);
+		memcpy(data + dstOffset, pInlineUniformBlock.pData, dataLen);
 	}
 }
 
@@ -635,16 +911,17 @@
 										   VkDescriptorBufferInfo* pBufferInfo,
 										   VkBufferView* pTexelBufferView,
 										   VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock) {
+	uint8_t* data = getData();
 	uint32_t buffSize = mvkDSLBind->_info.descriptorCount;
-	if (_buffer && pInlineUniformBlock->pData && srcOffset < buffSize) {
+	if (data && pInlineUniformBlock->pData && srcOffset < buffSize) {
 		uint32_t dataLen = std::min(pInlineUniformBlock->dataSize, buffSize - srcOffset);
-		memcpy((void*)pInlineUniformBlock->pData, _buffer + srcOffset, dataLen);
+		memcpy((void*)pInlineUniformBlock->pData, data + srcOffset, dataLen);
 	}
 }
 
 void MVKInlineUniformBlockDescriptor::reset() {
-    free(_buffer);
-	_buffer = nullptr;
+	if (_mvkMTLBufferAllocation) { _mvkMTLBufferAllocation->returnToPool(); }
+	_mvkMTLBufferAllocation = nullptr;
 	MVKDescriptor::reset();
 }
 
@@ -701,6 +978,44 @@
     }
 }
 
+void MVKImageDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+													 id<MTLArgumentEncoder> mtlArgEncoder,
+													 uint32_t descSetIndex,
+													 MVKDescriptorSetLayoutBinding* mvkDSLBind,
+													 uint32_t elementIndex,
+													 MVKShaderStage stage,
+													 bool encodeToArgBuffer,
+													 bool encodeUsage) {
+	VkDescriptorType descType = getDescriptorType();
+	uint8_t planeCount = (_mvkImageView) ? _mvkImageView->getPlaneCount() : 1;
+
+	for (uint8_t planeIndex = 0; planeIndex < planeCount; planeIndex++) {
+		uint32_t planeDescIdx = (elementIndex * planeCount) + planeIndex;
+
+		id<MTLTexture> mtlTexture = _mvkImageView ? _mvkImageView->getMTLTexture(planeIndex) : nil;
+		if (encodeToArgBuffer) {
+			uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().textureIndex + planeDescIdx;
+			[mtlArgEncoder setTexture: mtlTexture atIndex: argIdx];
+		}
+		if (encodeUsage) {
+			rezEncState->encodeArgumentBufferResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages());
+		}
+		if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
+			id<MTLTexture> mtlTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture;
+			id<MTLBuffer> mtlBuff = mtlTex.buffer;
+			if (mtlBuff) {
+				if (encodeToArgBuffer) {
+					uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + planeDescIdx;
+					[mtlArgEncoder setBuffer: mtlBuff offset: mtlTex.bufferOffset atIndex: argIdx];
+				}
+				if (encodeUsage) {
+					rezEncState->encodeArgumentBufferResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages());
+				}
+			}
+		}
+	}
+}
+
 void MVKImageDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 							   MVKDescriptorSet* mvkDescSet,
 							   uint32_t srcIndex,
@@ -767,6 +1082,26 @@
 	}
 }
 
+// Metal validation requires each sampler in an array of samplers to be populated,
+// even if not used, so populate a default if one hasn't been set.
+void MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+															id<MTLArgumentEncoder> mtlArgEncoder,
+															uint32_t descSetIndex,
+															MVKDescriptorSetLayoutBinding* mvkDSLBind,
+															uint32_t elementIndex,
+															MVKShaderStage stage,
+															bool encodeToArgBuffer) {
+	if (encodeToArgBuffer) {
+		MVKSampler* imutSamp = mvkDSLBind->getImmutableSampler(elementIndex);
+		MVKSampler* mvkSamp = imutSamp ? imutSamp : _mvkSampler;
+		id<MTLSamplerState> mtlSamp = (mvkSamp
+									   ? mvkSamp->getMTLSamplerState()
+									   : mvkDSLBind->getDevice()->getDefaultMTLSamplerState());
+		uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().samplerIndex + elementIndex;
+		[mtlArgEncoder setSamplerState: mtlSamp atIndex: argIdx];
+	}
+}
+
 void MVKSamplerDescriptorMixin::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 									  MVKDescriptorSet* mvkDescSet,
 									  uint32_t srcIndex,
@@ -815,6 +1150,17 @@
 	MVKSamplerDescriptorMixin::bind(cmdEncoder, mvkDSLBind, elementIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
 }
 
+void MVKSamplerDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+													   id<MTLArgumentEncoder> mtlArgEncoder,
+													   uint32_t descSetIndex,
+													   MVKDescriptorSetLayoutBinding* mvkDSLBind,
+													   uint32_t elementIndex,
+													   MVKShaderStage stage,
+													   bool encodeToArgBuffer,
+													   bool encodeUsage) {
+	MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(rezEncState, mtlArgEncoder, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer);
+}
+
 void MVKSamplerDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 								 MVKDescriptorSet* mvkDescSet,
 								 uint32_t srcIndex,
@@ -854,6 +1200,18 @@
 	MVKSamplerDescriptorMixin::bind(cmdEncoder, mvkDSLBind, elementIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
 }
 
+void MVKCombinedImageSamplerDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+																	id<MTLArgumentEncoder> mtlArgEncoder,
+																	uint32_t descSetIndex,
+																	MVKDescriptorSetLayoutBinding* mvkDSLBind,
+																	uint32_t elementIndex,
+																	MVKShaderStage stage,
+																	bool encodeToArgBuffer,
+																	bool encodeUsage) {
+	MVKImageDescriptor::encodeToMetalArgumentBuffer(rezEncState, mtlArgEncoder, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer, encodeUsage);
+	MVKSamplerDescriptorMixin::encodeToMetalArgumentBuffer(rezEncState, mtlArgEncoder, descSetIndex, mvkDSLBind, elementIndex, stage, encodeToArgBuffer);
+}
+
 void MVKCombinedImageSamplerDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 											  MVKDescriptorSet* mvkDescSet,
 											  uint32_t srcIndex,
@@ -922,6 +1280,37 @@
 		}
 	}
 }
+void MVKTexelBufferDescriptor::encodeToMetalArgumentBuffer(MVKResourcesCommandEncoderState* rezEncState,
+														   id<MTLArgumentEncoder> mtlArgEncoder,
+														   uint32_t descSetIndex,
+														   MVKDescriptorSetLayoutBinding* mvkDSLBind,
+														   uint32_t elementIndex,
+														   MVKShaderStage stage,
+														   bool encodeToArgBuffer,
+														   bool encodeUsage) {
+	VkDescriptorType descType = getDescriptorType();
+	id<MTLTexture> mtlTexture = _mvkBufferView ? _mvkBufferView->getMTLTexture() : nil;
+	if (encodeToArgBuffer) {
+		uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().textureIndex + elementIndex;
+		[mtlArgEncoder setTexture: mtlTexture atIndex: argIdx];
+	}
+	if (encodeUsage) {
+		rezEncState->encodeArgumentBufferResourceUsage(stage, mtlTexture, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages());
+	}
+
+	if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
+		id<MTLBuffer> mtlBuff = mtlTexture.buffer;
+		if (mtlBuff) {
+			if (encodeToArgBuffer) {
+				uint32_t argIdx = mvkDSLBind->getMetalResourceIndexOffsets().bufferIndex + elementIndex;
+				[mtlArgEncoder setBuffer: mtlBuff offset: mtlTexture.bufferOffset atIndex: argIdx];
+			}
+			if (encodeUsage) {
+				rezEncState->encodeArgumentBufferResourceUsage(stage, mtlBuff, getMTLResourceUsage(), mvkDSLBind->getMTLRenderStages());
+			}
+		}
+	}
+}
 
 void MVKTexelBufferDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
 									 MVKDescriptorSet* mvkDescSet,
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
index 4d69c35..c8ec767 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
@@ -28,11 +28,28 @@
 class MVKDescriptorPool;
 class MVKPipelineLayout;
 class MVKCommandEncoder;
+class MVKResourcesCommandEncoderState;
 
 
 #pragma mark -
 #pragma mark MVKDescriptorSetLayout
 
+/** Holds and manages the lifecycle of a MTLArgumentEncoder. The encoder can only be set once. */
+struct MVKMTLArgumentEncoder {
+	NSUInteger mtlArgumentEncoderSize = 0;
+
+	id<MTLArgumentEncoder> getMTLArgumentEncoder() { return _mtlArgumentEncoder; }
+	void init(id<MTLArgumentEncoder> mtlArgEnc) {
+		if (_mtlArgumentEncoder) { return; }
+		_mtlArgumentEncoder = mtlArgEnc;		// takes ownership
+		mtlArgumentEncoderSize = mtlArgEnc.encodedLength;
+	}
+	~MVKMTLArgumentEncoder() { [_mtlArgumentEncoder release]; }
+
+private:
+	id<MTLArgumentEncoder> _mtlArgumentEncoder = nil;
+};
+
 /** Represents a Vulkan descriptor set layout. */
 class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject {
 
@@ -46,6 +63,8 @@
 
 	/** Encodes this descriptor set layout and the specified descriptor set on the specified command encoder. */
 	void bindDescriptorSet(MVKCommandEncoder* cmdEncoder,
+						   VkPipelineBindPoint pipelineBindPoint,
+						   uint32_t descSetIndex,
 						   MVKDescriptorSet* descSet,
 						   MVKShaderResourceBinding& dslMTLRezIdxOffsets,
 						   MVKArrayRef<uint32_t> dynamicOffsets,
@@ -67,11 +86,32 @@
 	/** Populates the specified shader converter context, at the specified DSL index. */
 	void populateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
                                         MVKShaderResourceBinding& dslMTLRezIdxOffsets,
-                                        uint32_t dslIndex);
+                                        uint32_t descSetIndex);
+
+	/**
+	 * Populates the bindings in this descriptor set layout used by the shader.
+	 * Returns false if the shader does not use the descriptor set at all.
+	 */
+	bool populateBindingUse(MVKBitArray& bindingUse,
+							mvk::SPIRVToMSLConversionConfiguration& context,
+							MVKShaderStage stage,
+							uint32_t descSetIndex);
+
+	/** Returns the number of bindings. */
+	uint32_t getBindingCount() { return (uint32_t)_bindings.size(); }
+
+	/** Returns the binding at the index in a descriptor set layout. */
+	MVKDescriptorSetLayoutBinding* getBindingAt(uint32_t index) { return &_bindings[index]; }
 
 	/** Returns true if this layout is for push descriptors only. */
 	bool isPushDescriptorLayout() const { return _isPushDescriptorLayout; }
 
+	/** Returns true if this layout is using a Metal argument buffer. */
+	bool isUsingMetalArgumentBuffer()  { return isUsingMetalArgumentBuffers() && !isPushDescriptorLayout(); };
+
+	/** Returns the MTLArgumentEncoder for the descriptor set. */
+	MVKMTLArgumentEncoder& getMTLArgumentEncoder() { return _mtlArgumentEncoder; }
+
 	MVKDescriptorSetLayout(MVKDevice* device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo);
 
 protected:
@@ -79,16 +119,17 @@
 	friend class MVKDescriptorSetLayoutBinding;
 	friend class MVKPipelineLayout;
 	friend class MVKDescriptorSet;
-	friend class MVKDescriptorPool;
 
 	void propagateDebugName() override {}
-	inline uint32_t getDescriptorCount() { return _descriptorCount; }
-	inline uint32_t getDescriptorIndex(uint32_t binding, uint32_t elementIndex = 0) { return getBinding(binding)->getDescriptorIndex(elementIndex); }
-	inline MVKDescriptorSetLayoutBinding* getBinding(uint32_t binding) { return &_bindings[_bindingToIndex[binding]]; }
+	uint32_t getDescriptorCount() { return _descriptorCount; }
+	uint32_t getDescriptorIndex(uint32_t binding, uint32_t elementIndex = 0) { return getBinding(binding)->getDescriptorIndex(elementIndex); }
+	MVKDescriptorSetLayoutBinding* getBinding(uint32_t binding) { return &_bindings[_bindingToIndex[binding]]; }
 	const VkDescriptorBindingFlags* getBindingFlags(const VkDescriptorSetLayoutCreateInfo* pCreateInfo);
+	void initMTLArgumentEncoder();
 
 	MVKSmallVector<MVKDescriptorSetLayoutBinding> _bindings;
 	std::unordered_map<uint32_t, uint32_t> _bindingToIndex;
+	MVKMTLArgumentEncoder _mtlArgumentEncoder;
 	MVKShaderResourceBinding _mtlResourceCounts;
 	uint32_t _descriptorCount;
 	bool _isPushDescriptorLayout;
@@ -109,6 +150,9 @@
 	/** Returns the debug report object type of this object. */
 	VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT; }
 
+	/** Returns the layout that defines this descriptor set. */
+	MVKDescriptorSetLayout* getLayout() { return _layout; }
+
 	/** Returns the descriptor type for the specified binding number. */
 	VkDescriptorType getDescriptorType(uint32_t binding);
 
@@ -126,6 +170,29 @@
 			  VkBufferView* pTexelBufferView,
 			  VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock);
 
+	/** Returns an MTLBuffer region allocation. */
+	const MVKMTLBufferAllocation* acquireMTLBufferRegion(NSUInteger length);
+	/**
+	 * Returns the Metal argument buffer to which resources are written,
+	 * or return nil if Metal argument buffers are not being used.
+	 */
+	id<MTLBuffer> getMetalArgumentBuffer();
+
+	/** Returns the offset into the Metal argument buffer to which resources are written. */
+	NSUInteger getMetalArgumentBufferOffset() { return _metalArgumentBufferOffset; }
+
+	/** Returns an array indicating the descriptors that have changed since the Metal argument buffer was last updated. */
+	MVKBitArray& getMetalArgumentBufferDirtyDescriptors() { return _metalArgumentBufferDirtyDescriptors; }
+
+	/** Returns the descriptor at an index. */
+	MVKDescriptor* getDescriptorAt(uint32_t descIndex) { return _descriptors[descIndex]; }
+
+	/** Returns the number of descriptors in this descriptor set. */
+	uint32_t getDescriptorCount() { return (uint32_t)_descriptors.size(); }
+
+	/** Returns the number of descriptors in this descriptor set that use dynamic offsets. */
+	uint32_t getDynamicOffsetDescriptorCount() { return _dynamicOffsetDescriptorCount; }
+
 	MVKDescriptorSet(MVKDescriptorPool* pool);
 
 protected:
@@ -134,12 +201,17 @@
 
 	void propagateDebugName() override {}
 	MVKDescriptor* getDescriptor(uint32_t binding, uint32_t elementIndex = 0);
-	VkResult allocate(MVKDescriptorSetLayout* layout, uint32_t variableDescriptorCount);
+	VkResult allocate(MVKDescriptorSetLayout* layout,
+					  uint32_t variableDescriptorCount,
+					  NSUInteger mtlArgBufferOffset);
 	void free(bool isPoolReset);
 
 	MVKDescriptorPool* _pool;
 	MVKDescriptorSetLayout* _layout;
 	MVKSmallVector<MVKDescriptor*> _descriptors;
+	MVKBitArray _metalArgumentBufferDirtyDescriptors;
+	NSUInteger _metalArgumentBufferOffset;
+	uint32_t _dynamicOffsetDescriptorCount;
 	uint32_t _variableDescriptorCount;
 };
 
@@ -207,9 +279,15 @@
 	void freeDescriptorSet(MVKDescriptorSet* mvkDS, bool isPoolReset);
 	VkResult allocateDescriptor(VkDescriptorType descriptorType, MVKDescriptor** pMVKDesc);
 	void freeDescriptor(MVKDescriptor* mvkDesc);
+	void initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo* pCreateInfo);
+	NSUInteger getMetalArgumentBufferResourceStorageSize(NSUInteger bufferCount, NSUInteger textureCount, NSUInteger samplerCount);
+	MTLArgumentDescriptor* getMTLArgumentDescriptor(MTLDataType resourceType, NSUInteger argIndex, NSUInteger count);
 
 	MVKSmallVector<MVKDescriptorSet> _descriptorSets;
 	MVKBitArray _descriptorSetAvailablility;
+	id<MTLBuffer> _metalArgumentBuffer;
+	NSUInteger _nextMetalArgumentBufferOffset;
+	MVKMTLBufferAllocator _inlineBlockMTLBufferAllocator;
 
 	MVKDescriptorTypePool<MVKUniformBufferDescriptor> _uniformBufferDescriptors;
 	MVKDescriptorTypePool<MVKStorageBufferDescriptor> _storageBufferDescriptors;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
index 7e233f1..36c1861 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
@@ -17,6 +17,9 @@
  */
 
 #include "MVKDescriptorSet.h"
+#include "MVKCommandBuffer.h"
+#include "MVKCommandEncoderState.h"
+#include "MVKPipeline.h"
 #include "MVKInstance.h"
 #include "MVKOSExtensions.h"
 
@@ -26,12 +29,19 @@
 
 // A null cmdEncoder can be passed to perform a validation pass
 void MVKDescriptorSetLayout::bindDescriptorSet(MVKCommandEncoder* cmdEncoder,
+											   VkPipelineBindPoint pipelineBindPoint,
+											   uint32_t descSetIndex,
 											   MVKDescriptorSet* descSet,
 											   MVKShaderResourceBinding& dslMTLRezIdxOffsets,
 											   MVKArrayRef<uint32_t> dynamicOffsets,
 											   uint32_t& dynamicOffsetIndex) {
 	if (!cmdEncoder) { clearConfigurationResult(); }
-	if ( !_isPushDescriptorLayout ) {
+	if (_isPushDescriptorLayout ) { return; }
+
+	if (cmdEncoder) { cmdEncoder->bindDescriptorSet(pipelineBindPoint, descSetIndex,
+													descSet, dslMTLRezIdxOffsets,
+													dynamicOffsets, dynamicOffsetIndex); }
+	if ( !isUsingMetalArgumentBuffers() ) {
 		for (auto& dslBind : _bindings) {
 			dslBind.bind(cmdEncoder, descSet, dslMTLRezIdxOffsets, dynamicOffsets, dynamicOffsetIndex);
 		}
@@ -161,11 +171,41 @@
 
 void MVKDescriptorSetLayout::populateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
                                                             MVKShaderResourceBinding& dslMTLRezIdxOffsets,
-															uint32_t dslIndex) {
+															uint32_t descSetIndex) {
 	uint32_t bindCnt = (uint32_t)_bindings.size();
 	for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
-		_bindings[bindIdx].populateShaderConverterContext(context, dslMTLRezIdxOffsets, dslIndex);
+		_bindings[bindIdx].populateShaderConverterContext(context, dslMTLRezIdxOffsets, descSetIndex);
 	}
+
+	// Mark if Metal argument buffers are in use, but this descriptor set layout is not using them.
+	if (isUsingMetalArgumentBuffers() && !isUsingMetalArgumentBuffer()) {
+		context.discreteDescriptorSets.push_back(descSetIndex);
+	}
+}
+
+bool MVKDescriptorSetLayout::populateBindingUse(MVKBitArray& bindingUse,
+												SPIRVToMSLConversionConfiguration& context,
+												MVKShaderStage stage,
+												uint32_t descSetIndex) {
+	static const spv::ExecutionModel spvExecModels[] = {
+		spv::ExecutionModelVertex,
+		spv::ExecutionModelTessellationControl,
+		spv::ExecutionModelTessellationEvaluation,
+		spv::ExecutionModelFragment,
+		spv::ExecutionModelGLCompute
+	};
+
+	bool descSetIsUsed = false;
+	uint32_t bindCnt = (uint32_t)_bindings.size();
+	bindingUse.resize(bindCnt);
+	for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
+		auto& dslBind = _bindings[bindIdx];
+		if (context.isResourceUsed(spvExecModels[stage], descSetIndex, dslBind.getBinding())) {
+			bindingUse.setBit(bindIdx);
+			descSetIsUsed = true;
+		}
+	}
+	return descSetIsUsed;
 }
 
 MVKDescriptorSetLayout::MVKDescriptorSetLayout(MVKDevice* device,
@@ -190,15 +230,18 @@
 		return bindInfo1.pBinding->binding < bindInfo2.pBinding->binding;
 	});
 
-	_isPushDescriptorLayout = (pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) != 0;
 	_descriptorCount = 0;
-    _bindings.reserve(bindCnt);
+	_isPushDescriptorLayout = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
+
+	_bindings.reserve(bindCnt);
     for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
 		BindInfo& bindInfo = sortedBindings[bindIdx];
         _bindings.emplace_back(_device, this, bindInfo.pBinding, bindInfo.bindingFlags, _descriptorCount);
 		_bindingToIndex[bindInfo.pBinding->binding] = bindIdx;
-		_descriptorCount += _bindings.back().getDescriptorCount(nullptr);
+		_descriptorCount += _bindings.back().getDescriptorCount();
 	}
+
+	initMTLArgumentEncoder();
 }
 
 // Find and return an array of binding flags from the pNext chain of pCreateInfo,
@@ -217,6 +260,16 @@
 	return nullptr;
 }
 
+void MVKDescriptorSetLayout::initMTLArgumentEncoder() {
+	if (isUsingDescriptorSetMetalArgumentBuffers() && isUsingMetalArgumentBuffer()) {
+		@autoreleasepool {
+			NSMutableArray<MTLArgumentDescriptor*>* args = [NSMutableArray arrayWithCapacity: _bindings.size()];
+			for (auto& dslBind : _bindings) { dslBind.addMTLArgumentDescriptors(args); }
+			_mtlArgumentEncoder.init(args.count ? [getMTLDevice() newArgumentEncoderWithArguments: args] : nil);
+		}
+	}
+}
+
 
 #pragma mark -
 #pragma mark MVKDescriptorSet
@@ -229,6 +282,8 @@
 	return _descriptors[_layout->getDescriptorIndex(binding, elementIndex)];
 }
 
+id<MTLBuffer> MVKDescriptorSet::getMetalArgumentBuffer() { return _pool->_metalArgumentBuffer; }
+
 template<typename DescriptorAction>
 void MVKDescriptorSet::write(const DescriptorAction* pDescriptorAction,
 							 size_t stride,
@@ -238,6 +293,7 @@
 		MVKDescriptor* mvkDesc = _descriptors[descIdx];           \
 		if (mvkDesc->getDescriptorType() == descType) {           \
 			mvkDesc->write(mvkDSLBind, this, IDX, stride, pData); \
+			_metalArgumentBufferDirtyDescriptors.setBit(descIdx); \
 		}                                                         \
 	} while(false)
 
@@ -283,21 +339,35 @@
     }
 }
 
-VkResult MVKDescriptorSet::allocate(MVKDescriptorSetLayout* layout, uint32_t variableDescriptorCount) {
+const MVKMTLBufferAllocation* MVKDescriptorSet::acquireMTLBufferRegion(NSUInteger length) {
+	return _pool->_inlineBlockMTLBufferAllocator.acquireMTLBufferRegion(length);
+}
+
+VkResult MVKDescriptorSet::allocate(MVKDescriptorSetLayout* layout,
+									uint32_t variableDescriptorCount,
+									NSUInteger mtlArgBufferOffset) {
 	_layout = layout;
 	_variableDescriptorCount = variableDescriptorCount;
 
+	// If the Metal argument buffer offset has not been set yet, set it now.
+	if ( !_metalArgumentBufferOffset ) { _metalArgumentBufferOffset = mtlArgBufferOffset; }
+
 	uint32_t descCnt = layout->getDescriptorCount();
 	_descriptors.reserve(descCnt);
+	_metalArgumentBufferDirtyDescriptors.resize(descCnt);
 
 	uint32_t bindCnt = (uint32_t)layout->_bindings.size();
 	for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
 		MVKDescriptorSetLayoutBinding* mvkDSLBind = &layout->_bindings[bindIdx];
 		uint32_t elemCnt = mvkDSLBind->getDescriptorCount(this);
 		for (uint32_t elemIdx = 0; elemIdx < elemCnt; elemIdx++) {
+			VkDescriptorType descType = mvkDSLBind->getDescriptorType();
+			uint32_t descIdx = (uint32_t)_descriptors.size();
 			MVKDescriptor* mvkDesc = nullptr;
-			setConfigurationResult(_pool->allocateDescriptor(mvkDSLBind->getDescriptorType(), &mvkDesc));
+			setConfigurationResult(_pool->allocateDescriptor(descType, &mvkDesc));
 			if ( !wasConfigurationSuccessful() ) { return getConfigurationResult(); }
+			if (mvkDesc->usesDynamicBufferOffsets()) { _dynamicOffsetDescriptorCount++; }
+			if (mvkDSLBind->usesImmutableSamplers()) { _metalArgumentBufferDirtyDescriptors.setBit(descIdx); }
 			_descriptors.push_back(mvkDesc);
 		}
 	}
@@ -306,14 +376,19 @@
 
 void MVKDescriptorSet::free(bool isPoolReset) {
 	_layout = nullptr;
+	_dynamicOffsetDescriptorCount = 0;
 	_variableDescriptorCount = 0;
 
+	// Only reset the Metal arg buffer offset if the entire pool is being reset
+	if (isPoolReset) { _metalArgumentBufferOffset = 0; }
+
 	// Pooled descriptors don't need to be individually freed under pool resets.
 	if ( !(_pool->_hasPooledDescriptors && isPoolReset) ) {
 		for (auto mvkDesc : _descriptors) { _pool->freeDescriptor(mvkDesc); }
 	}
 	_descriptors.clear();
 	_descriptors.shrink_to_fit();
+	_metalArgumentBufferDirtyDescriptors.resize(0);
 
 	clearConfigurationResult();
 }
@@ -330,7 +405,7 @@
 // If not preallocated, create one on the fly.
 template<class DescriptorClass>
 VkResult MVKDescriptorTypePool<DescriptorClass>::allocateDescriptor(MVKDescriptor** pMVKDesc,
-																			 MVKDescriptorPool* pool) {
+																	MVKDescriptorPool* pool) {
 	DescriptorClass* mvkDesc;
 	if (pool->_hasPooledDescriptors) {
 		size_t availDescIdx = _availability.getIndexOfFirstSetBit(true);
@@ -350,7 +425,7 @@
 // If not preallocated, simply destroy returning descriptor.
 template<typename DescriptorClass>
 void MVKDescriptorTypePool<DescriptorClass>::freeDescriptor(MVKDescriptor* mvkDesc,
-																	 MVKDescriptorPool* pool) {
+															MVKDescriptorPool* pool) {
 	if (pool->_hasPooledDescriptors) {
 		size_t descIdx = (DescriptorClass*)mvkDesc - _descriptors.data();
 		_availability.setBit(descIdx);
@@ -409,17 +484,50 @@
 VkResult MVKDescriptorPool::allocateDescriptorSet(MVKDescriptorSetLayout* mvkDSL,
 												  uint32_t variableDescriptorCount,
 												  VkDescriptorSet* pVKDS) {
+	VkResult rslt = VK_ERROR_OUT_OF_POOL_MEMORY;
+	NSUInteger mtlArgBuffAllocSize = mvkDSL->getMTLArgumentEncoder().mtlArgumentEncoderSize;
+	NSUInteger mtlArgBuffAlignedSize = mvkAlignByteCount(mtlArgBuffAllocSize,
+														 getDevice()->_pMetalFeatures->mtlBufferAlignment);
 
-	size_t dsIdx = _descriptorSetAvailablility.getIndexOfFirstSetBit(true);
-	if (dsIdx >= _descriptorSets.size()) { return VK_ERROR_OUT_OF_POOL_MEMORY; }
+	size_t dsCnt = _descriptorSetAvailablility.size();
+	_descriptorSetAvailablility.enumerateEnabledBits(true, [&](size_t dsIdx) {
+		bool isSpaceAvail = true;		// If not using Metal arg buffers, space will always be available.
+		MVKDescriptorSet* mvkDS = &_descriptorSets[dsIdx];
+		NSUInteger mtlArgBuffOffset = mvkDS->_metalArgumentBufferOffset;
 
-	MVKDescriptorSet* mvkDS = &_descriptorSets[dsIdx];
-	VkResult rslt = mvkDS->allocate(mvkDSL, variableDescriptorCount);
-	if (rslt) {
-		freeDescriptorSet(mvkDS, false);
-	} else {
-		*pVKDS = (VkDescriptorSet)mvkDS;
-	}
+		// If the desc set is using a Metal argument buffer, we also need to see if the desc set
+		// will fit in the slot that might already have been allocated for it in the Metal argument
+		// buffer from a previous allocation that was returned. If this pool has been reset recently,
+		// then the desc sets will not have had a Metal argument buffer allocation assigned yet.
+		if (isUsingDescriptorSetMetalArgumentBuffers() && mvkDSL->isUsingMetalArgumentBuffer()) {
+
+			// If the offset has not been set (and it's not the first desc set except
+			// on a reset pool), set the offset and update the next available offset value.
+			if ( !mtlArgBuffOffset && (dsIdx || !_nextMetalArgumentBufferOffset)) {
+				mtlArgBuffOffset = _nextMetalArgumentBufferOffset;
+				_nextMetalArgumentBufferOffset += mtlArgBuffAlignedSize;
+			}
+
+			// Get the offset of the next desc set, if one exists and
+			// its offset has been set, or the end of the arg buffer.
+			size_t nextDSIdx = dsIdx + 1;
+			NSUInteger nextOffset = (nextDSIdx < dsCnt ? _descriptorSets[nextDSIdx]._metalArgumentBufferOffset : 0);
+			if ( !nextOffset ) { nextOffset = _metalArgumentBuffer.length; }
+
+			isSpaceAvail = (mtlArgBuffOffset + mtlArgBuffAllocSize) <= nextOffset;
+		}
+
+		if (isSpaceAvail) {
+			rslt = mvkDS->allocate(mvkDSL, variableDescriptorCount, mtlArgBuffOffset);
+			if (rslt) {
+				freeDescriptorSet(mvkDS, false);
+			} else {
+				*pVKDS = (VkDescriptorSet)mvkDS;
+			}
+			return false;
+		}
+		return true;
+	});
 	return rslt;
 }
 
@@ -465,6 +573,8 @@
 	_uniformTexelBufferDescriptors.reset();
 	_storageTexelBufferDescriptors.reset();
 
+	_nextMetalArgumentBufferOffset = 0;
+
 	return VK_SUCCESS;
 }
 
@@ -573,6 +683,29 @@
 	return descCnt;
 }
 
+// Return the size of the preallocated pool for descriptors of the
+// VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT type, or zero if we
+// are not preallocating descriptors in the pool.
+// For consistency with getPoolSize() behavior, we support more than one pNext entry
+// for inline blocks. Accumulate the descriptor count for inline blocks accordingly.
+static size_t getInlineBlockPoolSize(const VkDescriptorPoolCreateInfo* pCreateInfo, bool poolDescriptors) {
+	uint32_t descCnt = 0;
+	if (poolDescriptors) {
+		for (const auto* next = (VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) {
+			switch (next->sType) {
+				case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: {
+					auto* pDescPoolInlineBlockCreateInfo = (VkDescriptorPoolInlineUniformBlockCreateInfoEXT*)next;
+					descCnt += pDescPoolInlineBlockCreateInfo->maxInlineUniformBlockBindings;
+					break;
+				}
+				default:
+					break;
+			}
+		}
+	}
+	return descCnt;
+}
+
 // Although poolDescriptors is derived from MVKConfiguration, it is passed in here to ensure all components of this instance see a SVOT for this value.
 // Alternate might have been to force _hasPooledDescriptors to be set first by changing member declaration order in class declaration.
 MVKDescriptorPool::MVKDescriptorPool(MVKDevice* device, const VkDescriptorPoolCreateInfo* pCreateInfo, bool poolDescriptors) :
@@ -583,7 +716,7 @@
 	_storageBufferDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, poolDescriptors)),
 	_uniformBufferDynamicDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, poolDescriptors)),
 	_storageBufferDynamicDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, poolDescriptors)),
-	_inlineUniformBlockDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT, poolDescriptors)),
+	_inlineUniformBlockDescriptors(getInlineBlockPoolSize(pCreateInfo, poolDescriptors)),
 	_sampledImageDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, poolDescriptors)),
 	_storageImageDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, poolDescriptors)),
 	_inputAttachmentDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, poolDescriptors)),
@@ -591,10 +724,140 @@
 	_combinedImageSamplerDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, poolDescriptors)),
 	_uniformTexelBufferDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, poolDescriptors)),
 	_storageTexelBufferDescriptors(getPoolSize(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, poolDescriptors)),
-	_hasPooledDescriptors(poolDescriptors) {}
+	_inlineBlockMTLBufferAllocator(device, device->_pMetalFeatures->dynamicMTLBufferSize, true),
+	_hasPooledDescriptors(poolDescriptors) {
+		initMetalArgumentBuffer(pCreateInfo);
+	}
+
+void MVKDescriptorPool::initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo* pCreateInfo) {
+	_metalArgumentBuffer = nil;
+	_nextMetalArgumentBufferOffset = 0;
+
+	if ( !isUsingDescriptorSetMetalArgumentBuffers() ) { return; }
+
+	@autoreleasepool {
+		NSUInteger mtlBuffCnt = 0;
+		NSUInteger mtlTexCnt = 0;
+		NSUInteger mtlSampCnt = 0;
+
+		uint32_t poolCnt = pCreateInfo->poolSizeCount;
+		for (uint32_t poolIdx = 0; poolIdx < poolCnt; poolIdx++) {
+			auto& poolSize = pCreateInfo->pPoolSizes[poolIdx];
+			switch (poolSize.type) {
+				// VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT counts handled separately below
+				case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+				case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+				case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+				case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+					mtlBuffCnt += poolSize.descriptorCount;
+					break;
+
+				case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+				case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+				case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+					mtlTexCnt += poolSize.descriptorCount;
+					break;
+
+				case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+				case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+					mtlTexCnt += poolSize.descriptorCount;
+					mtlBuffCnt += poolSize.descriptorCount;
+					break;
+
+				case VK_DESCRIPTOR_TYPE_SAMPLER:
+					mtlSampCnt += poolSize.descriptorCount;
+					break;
+
+				case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+					mtlTexCnt += poolSize.descriptorCount;
+					mtlSampCnt += poolSize.descriptorCount;
+					break;
+
+				default:
+					break;
+			}
+		}
+
+		// VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT counts pulled separately
+		for (const auto* next = (VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) {
+			switch (next->sType) {
+				case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: {
+					auto* pDescPoolInlineBlockCreateInfo = (VkDescriptorPoolInlineUniformBlockCreateInfoEXT*)next;
+					mtlBuffCnt += pDescPoolInlineBlockCreateInfo->maxInlineUniformBlockBindings;
+					break;
+				}
+				default:
+					break;
+			}
+		}
+
+		// Each descriptor set uses a separate Metal argument buffer, but all of these descriptor set
+		// Metal argument buffers share a single MTLBuffer. This single MTLBuffer needs to be large enough
+		// to hold all of the Metal resources for the descriptors. In addition, depending on the platform,
+		// a Metal argument buffer may have a fixed overhead storage, in addition to the storage required
+		// to hold the resources. This overhead per descriptor set is conservatively calculated by measuring
+		// the size of a Metal argument buffer containing one of each type of resource (S1), and the size
+		// of a Metal argument buffer containing two of each type of resource (S2), and then calculating
+		// the fixed overhead per argument buffer as (2 * S1 - S2). To this is added the overhead due to
+		// the alignment of each descriptor set Metal argument buffer offset.
+		NSUInteger overheadPerDescSet = (2 * getMetalArgumentBufferResourceStorageSize(1, 1, 1) -
+										 getMetalArgumentBufferResourceStorageSize(2, 2, 2) +
+										 _device->_pMetalFeatures->mtlBufferAlignment);
+
+		// Measure the size of an argument buffer that would hold all of the resources
+		// managed in this pool, then add any overhead for all the descriptor sets.
+		NSUInteger metalArgBuffSize = getMetalArgumentBufferResourceStorageSize(mtlBuffCnt, mtlTexCnt, mtlSampCnt);
+		metalArgBuffSize += (overheadPerDescSet * (pCreateInfo->maxSets - 1));	// metalArgBuffSize already includes overhead for one descriptor set
+		if (metalArgBuffSize) {
+			NSUInteger maxMTLBuffSize = _device->_pMetalFeatures->maxMTLBufferSize;
+			if (metalArgBuffSize > maxMTLBuffSize) {
+				setConfigurationResult(reportError(VK_ERROR_FRAGMENTATION_EXT, "vkCreateDescriptorPool(): The requested descriptor storage of %d MB is larger than the maximum descriptor storage of %d MB per VkDescriptorPool.", (uint32_t)(metalArgBuffSize / MEBI), (uint32_t)(maxMTLBuffSize / MEBI)));
+				metalArgBuffSize = maxMTLBuffSize;
+			}
+			_metalArgumentBuffer = [getMTLDevice() newBufferWithLength: metalArgBuffSize options: MTLResourceStorageModeShared];	// retained
+			_metalArgumentBuffer.label = @"Argument buffer";
+		}
+	}
+}
+
+// Returns the size of a Metal argument buffer containing the number of various types.
+// Make sure any call to this function is wrapped in @autoreleasepool.
+NSUInteger MVKDescriptorPool::getMetalArgumentBufferResourceStorageSize(NSUInteger bufferCount,
+																		NSUInteger textureCount,
+																		NSUInteger samplerCount) {
+	NSMutableArray<MTLArgumentDescriptor*>* args = [NSMutableArray arrayWithCapacity: 3];
+
+	NSUInteger argIdx = 0;
+	[args addObject: getMTLArgumentDescriptor(MTLDataTypePointer, argIdx, bufferCount)];
+	argIdx += bufferCount;
+	[args addObject: getMTLArgumentDescriptor(MTLDataTypeTexture, argIdx, textureCount)];
+	argIdx += textureCount;
+	[args addObject: getMTLArgumentDescriptor(MTLDataTypeSampler, argIdx, samplerCount)];
+	argIdx += samplerCount;
+
+	id<MTLArgumentEncoder> argEnc = [getMTLDevice() newArgumentEncoderWithArguments: args];
+	NSUInteger metalArgBuffSize = argEnc.encodedLength;
+	[argEnc release];
+
+	return metalArgBuffSize;
+}
+
+// Returns a MTLArgumentDescriptor of a particular type.
+// To be conservative, use some worse-case values, in case content makes a difference in argument size.
+MTLArgumentDescriptor* MVKDescriptorPool::getMTLArgumentDescriptor(MTLDataType resourceType, NSUInteger argIndex, NSUInteger count) {
+	auto* argDesc = [MTLArgumentDescriptor argumentDescriptor];
+	argDesc.dataType = resourceType;
+	argDesc.access = MTLArgumentAccessReadWrite;
+	argDesc.index = argIndex;
+	argDesc.arrayLength = count;
+	argDesc.textureType = MTLTextureTypeCubeArray;
+	return argDesc;
+}
 
 MVKDescriptorPool::~MVKDescriptorPool() {
 	reset(0);
+	[_metalArgumentBuffer release];
+	_metalArgumentBuffer = nil;
 }
 
 
@@ -701,26 +964,3 @@
 		dstSet->write(pEntry, pEntry->stride, pCurData);
 	}
 }
-
-void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
-									   MVKShaderStageResourceBinding& ssRB,
-									   spv::ExecutionModel stage,
-									   uint32_t descriptorSetIndex,
-									   uint32_t bindingIndex,
-									   uint32_t count,
-									   MVKSampler* immutableSampler) {
-	mvk::MSLResourceBinding rb;
-
-	auto& rbb = rb.resourceBinding;
-	rbb.stage = stage;
-	rbb.desc_set = descriptorSetIndex;
-	rbb.binding = bindingIndex;
-	rbb.count = count;
-	rbb.msl_buffer = ssRB.bufferIndex;
-	rbb.msl_texture = ssRB.textureIndex;
-	rbb.msl_sampler = ssRB.samplerIndex;
-
-	if (immutableSampler) { immutableSampler->getConstexprSampler(rb); }
-
-	context.resourceBindings.push_back(rb);
-}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 60cc8e7..c389deb 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -18,6 +18,7 @@
 
 #pragma once
 
+#include "MVKEnvironment.h"
 #include "MVKFoundation.h"
 #include "MVKVulkanAPIObject.h"
 #include "MVKMTLResourceBindings.h"
@@ -325,6 +326,9 @@
 	/** Returns whether the MSL version is supported on this device. */
 	inline bool mslVersionIsAtLeast(MTLLanguageVersion minVer) { return _metalFeatures.mslVersionEnum >= minVer; }
 
+	/** Returns whether this device is using Metal argument buffers. */
+	inline bool isUsingMetalArgumentBuffers() const  { return _metalFeatures.argumentBuffers && mvkConfig()->useMetalArgumentBuffers; };
+
 
 #pragma mark Construction
 
@@ -364,6 +368,7 @@
 	uint64_t getVRAMSize();
 	uint64_t getRecommendedMaxWorkingSetSize();
 	uint64_t getCurrentAllocatedSize();
+	uint32_t getMaxSamplerCount();
 	void initExternalMemoryProperties();
 	void initExtensions();
 	MVKArrayRef<MVKQueueFamily*> getQueueFamilies();
@@ -848,6 +853,15 @@
 	/** Returns info about the pixel format supported by the physical device. */
 	inline MVKPixelFormats* getPixelFormats() { return _device->getPixelFormats(); }
 
+	/** Returns whether this device is using Metal argument buffers. */
+	inline bool isUsingMetalArgumentBuffers() { return getPhysicalDevice()->isUsingMetalArgumentBuffers(); };
+
+	/** Returns whether this device is using one Metal argument buffer for each descriptor set, on multiple pipeline and pipeline stages. */
+	inline bool isUsingDescriptorSetMetalArgumentBuffers() { return isUsingMetalArgumentBuffers() && _device->_pMetalFeatures->descriptorSetArgumentBuffers; };
+
+	/** Returns whether this device is using one Metal argument buffer for each descriptor set-pipeline-stage combination. */
+	inline bool isUsingPipelineStageMetalArgumentBuffers() { return isUsingMetalArgumentBuffers() && !_device->_pMetalFeatures->descriptorSetArgumentBuffers; };
+
 	/** Constructs an instance for the specified device. */
     MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); }
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index d304fa0..d4e1a4b 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -375,6 +375,9 @@
                 break;
             }
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
+				bool isTier2 = isUsingMetalArgumentBuffers() && (_mtlDevice.argumentBuffersSupport >= MTLArgumentBuffersTier2);
+				uint32_t maxSampCnt = getMaxSamplerCount();
+
 				auto* pDescIdxProps = (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)next;
 				pDescIdxProps->maxUpdateAfterBindDescriptorsInAllPools				= kMVKUndefinedLargeUInt32;
 				pDescIdxProps->shaderUniformBufferArrayNonUniformIndexingNative		= false;
@@ -384,20 +387,20 @@
 				pDescIdxProps->shaderInputAttachmentArrayNonUniformIndexingNative	= _metalFeatures.arrayOfTextures;
 				pDescIdxProps->robustBufferAccessUpdateAfterBind					= _features.robustBufferAccess;
 				pDescIdxProps->quadDivergentImplicitLod								= false;
-				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSamplers			= _properties.limits.maxPerStageDescriptorSamplers;
-				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindUniformBuffers	= _properties.limits.maxPerStageDescriptorUniformBuffers;
-				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageBuffers	= _properties.limits.maxPerStageDescriptorStorageBuffers;
-				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSampledImages	= _properties.limits.maxPerStageDescriptorSampledImages;
-				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageImages	= _properties.limits.maxPerStageDescriptorStorageImages;
+				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSamplers			= isTier2 ? maxSampCnt : _properties.limits.maxPerStageDescriptorSamplers;
+				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindUniformBuffers	= isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorUniformBuffers;
+				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageBuffers	= isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageBuffers;
+				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSampledImages	= isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorSampledImages;
+				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageImages	= isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageImages;
 				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindInputAttachments	= _properties.limits.maxPerStageDescriptorInputAttachments;
-				pDescIdxProps->maxPerStageUpdateAfterBindResources					= _properties.limits.maxPerStageResources;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindSamplers				= _properties.limits.maxDescriptorSetSamplers;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffers		= _properties.limits.maxDescriptorSetUniformBuffers;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic	= _properties.limits.maxDescriptorSetUniformBuffersDynamic;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffers		= _properties.limits.maxDescriptorSetStorageBuffers;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic	= _properties.limits.maxDescriptorSetStorageBuffersDynamic;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindSampledImages			= _properties.limits.maxDescriptorSetSampledImages;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageImages			= _properties.limits.maxDescriptorSetStorageImages;
+				pDescIdxProps->maxPerStageUpdateAfterBindResources					= isTier2 ? 500000 : _properties.limits.maxPerStageResources;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindSamplers				= isTier2 ? maxSampCnt : _properties.limits.maxDescriptorSetSamplers;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffers		= isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffers;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic	= isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffersDynamic;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffers		= isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffers;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic	= isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffersDynamic;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindSampledImages			= isTier2 ? 500000 : _properties.limits.maxDescriptorSetSampledImages;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageImages			= isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageImages;
 				pDescIdxProps->maxDescriptorSetUpdateAfterBindInputAttachments		= _properties.limits.maxDescriptorSetInputAttachments;
 				break;
 			}
@@ -1205,6 +1208,7 @@
 	if (supportsMTLFeatureSet(tvOS_GPUFamily1_v3)) {
 		_metalFeatures.mslVersionEnum = MTLLanguageVersion2_0;
         _metalFeatures.renderWithoutAttachments = true;
+		_metalFeatures.argumentBuffers = true;
 	}
 
 	if (supportsMTLFeatureSet(tvOS_GPUFamily1_v4)) {
@@ -1279,6 +1283,7 @@
     if (supportsMTLFeatureSet(iOS_GPUFamily1_v4)) {
 		_metalFeatures.mslVersionEnum = MTLLanguageVersion2_0;
         _metalFeatures.renderWithoutAttachments = true;
+		_metalFeatures.argumentBuffers = true;
     }
 
 	if (supportsMTLFeatureSet(iOS_GPUFamily1_v5)) {
@@ -1387,6 +1392,7 @@
 		_metalFeatures.presentModeImmediate = true;
 		_metalFeatures.fences = true;
 		_metalFeatures.nonUniformThreadgroups = true;
+		_metalFeatures.argumentBuffers = true;
     }
 
     if (supportsMTLFeatureSet(macOS_GPUFamily1_v4)) {
@@ -1548,6 +1554,19 @@
 #if MVK_OS_SIMULATOR && !MVK_APPLE_SILICON
 	_metalFeatures.mtlBufferAlignment = 256;
 #endif
+
+	// Currently, Metal argument buffer support is in beta stage, and is only supported
+	// on macOS 10.16 (Big Sur) or later, or on older versions of macOS using an Intel GPU.
+	// Metal argument buffers support is not available on iOS. Development to support iOS
+	// and a wider combination of GPU's on older macOS versions is under way.
+#if MVK_MACOS
+	_metalFeatures.descriptorSetArgumentBuffers = (_metalFeatures.argumentBuffers &&
+												   (mvkOSVersionIsAtLeast(10.16) ||
+													_properties.vendorID == kIntelVendorId));
+#endif
+	// Currently, if we don't support descriptor set argument buffers, we can't support argument buffers.
+	_metalFeatures.argumentBuffers = _metalFeatures.descriptorSetArgumentBuffers;
+
 }
 
 // Initializes the physical device features of this instance.
@@ -2055,7 +2074,7 @@
     // Features with no specific limits - default to unlimited int values
 
     _properties.limits.maxMemoryAllocationCount = kMVKUndefinedLargeUInt32;
-    _properties.limits.maxSamplerAllocationCount = kMVKUndefinedLargeUInt32;
+	_properties.limits.maxSamplerAllocationCount = getMaxSamplerCount();
     _properties.limits.maxBoundDescriptorSets = kMVKMaxDescriptorSetCount;
 
     _properties.limits.maxComputeWorkGroupCount[0] = kMVKUndefinedLargeUInt32;
@@ -2377,6 +2396,7 @@
 	// Next 4 bytes contains flags based on enabled Metal features that
 	// might affect the contents of the pipeline cache (mostly MSL content).
 	uint32_t mtlFeatures = 0;
+	mtlFeatures |= isUsingMetalArgumentBuffers() << 0;
 	*(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatures);
 	uuidComponentOffset += sizeof(mtlFeatures);
 }
@@ -2625,6 +2645,11 @@
 #endif
 }
 
+uint32_t MVKPhysicalDevice::getMaxSamplerCount() {
+	return ([_mtlDevice respondsToSelector: @selector(maxArgumentBufferSamplerCount)]
+			? (uint32_t)_mtlDevice.maxArgumentBufferSamplerCount : 1024);
+}
+
 void MVKPhysicalDevice::initExternalMemoryProperties() {
 
 	// Buffers
@@ -3650,6 +3675,7 @@
 		if ( !_defaultMTLSamplerState ) {
 			@autoreleasepool {
 				MTLSamplerDescriptor* mtlSampDesc = [[MTLSamplerDescriptor new] autorelease];
+				mtlSampDesc.supportArgumentBuffers = _physicalDevice->isUsingMetalArgumentBuffers();
 				_defaultMTLSamplerState = [getMTLDevice() newSamplerStateWithDescriptor: mtlSampDesc];	// retained
 			}
 		}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index c2785f4..5029698 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -1929,6 +1929,7 @@
 								 ? mvkClamp(pCreateInfo->maxAnisotropy, 1.0f, _device->_pProperties->limits.maxSamplerAnisotropy)
 								 : 1);
 	mtlSampDesc.normalizedCoordinates = !pCreateInfo->unnormalizedCoordinates;
+	mtlSampDesc.supportArgumentBuffers = isUsingMetalArgumentBuffers();
 
 	// If compareEnable is true, but dynamic samplers with depth compare are not available
 	// on this device, this sampler must only be used as an immutable sampler, and will
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
index cdef884..913ae01 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
@@ -23,6 +23,7 @@
 #include "MVKShaderModule.h"
 #include "MVKSync.h"
 #include "MVKSmallVector.h"
+#include "MVKBitArray.h"
 #include <MoltenVKShaderConverter/SPIRVReflection.h>
 #include <MoltenVKShaderConverter/SPIRVToMSLConverter.h>
 #include <unordered_map>
@@ -55,6 +56,7 @@
 
 	/** Binds descriptor sets to a command encoder. */
     void bindDescriptorSets(MVKCommandEncoder* cmdEncoder,
+							VkPipelineBindPoint pipelineBindPoint,
                             MVKArrayRef<MVKDescriptorSet*> descriptorSets,
                             uint32_t firstSet,
                             MVKArrayRef<uint32_t> dynamicOffsets);
@@ -79,6 +81,9 @@
 	/** Returns the current buffer size buffer bindings. */
 	const MVKShaderImplicitRezBinding& getBufferSizeBufferIndex() { return _bufferSizeBufferIndex; }
 
+	/** Returns the current dynamic buffer offset buffer bindings. */
+	const MVKShaderImplicitRezBinding& getDynamicOffsetBufferIndex() { return _dynamicOffsetBufferIndex; }
+
 	/** Returns the current view range buffer binding for multiview draws. */
 	const MVKShaderImplicitRezBinding& getViewRangeBufferIndex() { return _viewRangeBufferIndex; }
 
@@ -100,6 +105,15 @@
 	/** Returns the number of buffers in this layout. This is used to calculate the size of the buffer size buffer. */
 	uint32_t getBufferCount() { return _pushConstantsMTLResourceIndexes.getMaxBufferIndex(); }
 
+	/** Returns the number of descriptor sets in this pipeline layout. */
+	uint32_t getDescriptorSetCount() { return (uint32_t)_descriptorSetLayouts.size(); }
+
+	/** Returns the number of descriptors in the descriptor set layout. */
+	uint32_t getDescriptorCount(uint32_t descSetIndex) { return getDescriptorSetLayout(descSetIndex)->getDescriptorCount(); }
+
+	/** Returns the descriptor set layout. */
+	MVKDescriptorSetLayout* getDescriptorSetLayout(uint32_t descSetIndex) { return _descriptorSetLayouts[descSetIndex]; }
+
 	/** Returns the push constant binding info. */
 	const MVKShaderResourceBinding& getPushConstantBindings() { return _pushConstantsMTLResourceIndexes; }
 
@@ -109,6 +123,8 @@
 	~MVKPipelineLayout() override;
 
 protected:
+	friend class MVKPipeline;
+
 	void propagateDebugName() override {}
 
 	MVKSmallVector<MVKDescriptorSetLayout*, 1> _descriptorSetLayouts;
@@ -117,6 +133,7 @@
 	MVKShaderResourceBinding _pushConstantsMTLResourceIndexes;
 	MVKShaderImplicitRezBinding _swizzleBufferIndex;
 	MVKShaderImplicitRezBinding _bufferSizeBufferIndex;
+	MVKShaderImplicitRezBinding _dynamicOffsetBufferIndex;
 	MVKShaderImplicitRezBinding _viewRangeBufferIndex;
 	MVKShaderImplicitRezBinding _indirectParamsIndex;
 	MVKShaderImplicitRezBinding _outputBufferIndex;
@@ -153,12 +170,6 @@
 	/** Binds the push constants to a command encoder. */
 	void bindPushConstants(MVKCommandEncoder* cmdEncoder);
 
-	/** Returns the current swizzle buffer bindings. */
-	const MVKShaderImplicitRezBinding& getSwizzleBufferIndex() { return _swizzleBufferIndex; }
-
-	/** Returns the current buffer size buffer bindings. */
-	const MVKShaderImplicitRezBinding& getBufferSizeBufferIndex() { return _bufferSizeBufferIndex; }
-
 	/** Returns the current indirect parameter buffer bindings. */
 	const MVKShaderImplicitRezBinding& getIndirectParamsIndex() { return _indirectParamsIndex; }
 
@@ -168,17 +179,35 @@
 	/** Returns whether all internal Metal pipeline states are valid. */
 	bool hasValidMTLPipelineStates() { return _hasValidMTLPipelineStates; }
 
+	/** Returns the MTLArgumentEncoder for the descriptor set. */
+	virtual MVKMTLArgumentEncoder& getMTLArgumentEncoder(uint32_t descSetIndex, MVKShaderStage stage) = 0;
+
+	/** Returns the array of descriptor binding use for the descriptor set. */
+	virtual MVKBitArray& getDescriptorBindingUse(uint32_t descSetIndex, MVKShaderStage stage) = 0;
+
+	/** Returns the number of descriptor sets in this pipeline layout. */
+	uint32_t getDescriptorSetCount() { return _descriptorSetCount; }
+
+	/** A mutex lock to protect access to the Metal argument encoders. */
+	std::mutex _mtlArgumentEncodingLock;
+
 	/** Constructs an instance for the device. layout, and parent (which may be NULL). */
 	MVKPipeline(MVKDevice* device, MVKPipelineCache* pipelineCache, MVKPipelineLayout* layout, MVKPipeline* parent);
 
 protected:
 	void propagateDebugName() override {}
+	template<typename CreateInfo> void addMTLArgumentEncoders(MVKMTLFunction& mvkMTLFunc,
+															  const CreateInfo* pCreateInfo,
+															  SPIRVToMSLConversionConfiguration& context,
+															  MVKShaderStage stage);
 
 	MVKPipelineCache* _pipelineCache;
 	MVKShaderImplicitRezBinding _swizzleBufferIndex;
 	MVKShaderImplicitRezBinding _bufferSizeBufferIndex;
+	MVKShaderImplicitRezBinding _dynamicOffsetBufferIndex;
 	MVKShaderImplicitRezBinding _indirectParamsIndex;
 	MVKShaderResourceBinding _pushConstantsMTLResourceIndexes;
+	uint32_t _descriptorSetCount;
 	bool _fullImageViewSwizzle;
 	bool _hasValidMTLPipelineStates = true;
 
@@ -200,6 +229,14 @@
 
 typedef MVKSmallVector<MVKGraphicsStage, 4> MVKPiplineStages;
 
+struct MVKStagedMTLArgumentEncoders {
+	MVKMTLArgumentEncoder stages[4] = {};
+};
+
+struct MVKStagedDescriptorBindingUse {
+	MVKBitArray stages[4] = {};
+};
+
 /** The number of dynamic states possible in Vulkan. */
 static const uint32_t kMVKVkDynamicStateCount = 32;
 
@@ -265,6 +302,12 @@
 	/** Returns the collection of instance-rate vertex bindings whose divisor is zero, along with their strides. */
 	MVKArrayRef<MVKZeroDivisorVertexBinding> getZeroDivisorVertexBindings() { return _zeroDivisorVertexBindings.contents(); }
 
+	/** Returns the MTLArgumentEncoder for the descriptor set. */
+	MVKMTLArgumentEncoder& getMTLArgumentEncoder(uint32_t descSetIndex, MVKShaderStage stage) override { return _mtlArgumentEncoders[descSetIndex].stages[stage]; }
+
+	/** Returns the array of descriptor binding use for the descriptor set. */
+	MVKBitArray& getDescriptorBindingUse(uint32_t descSetIndex, MVKShaderStage stage) override { return _descriptorBindingUse[descSetIndex].stages[stage]; }
+
 	/** Constructs an instance for the device and parent (which may be NULL). */
 	MVKGraphicsPipeline(MVKDevice* device,
 						MVKPipelineCache* pipelineCache,
@@ -314,6 +357,8 @@
 	MVKSmallVector<VkRect2D, kMVKCachedViewportScissorCount> _scissors;
 	MVKSmallVector<MVKTranslatedVertexBinding> _translatedVertexBindings;
 	MVKSmallVector<MVKZeroDivisorVertexBinding> _zeroDivisorVertexBindings;
+	MVKSmallVector<MVKStagedMTLArgumentEncoders> _mtlArgumentEncoders;
+	MVKSmallVector<MVKStagedDescriptorBindingUse> _descriptorBindingUse;
 
 	MTLComputePipelineDescriptor* _mtlTessVertexStageDesc = nil;
 	id<MTLFunction> _mtlTessVertexFunctions[3] = {nil, nil, nil};
@@ -340,17 +385,21 @@
 	bool _dynamicStateEnabled[kMVKVkDynamicStateCount];
 	bool _needsVertexSwizzleBuffer = false;
 	bool _needsVertexBufferSizeBuffer = false;
+	bool _needsVertexDynamicOffsetBuffer = false;
 	bool _needsVertexViewRangeBuffer = false;
 	bool _needsVertexOutputBuffer = false;
 	bool _needsTessCtlSwizzleBuffer = false;
 	bool _needsTessCtlBufferSizeBuffer = false;
+	bool _needsTessCtlDynamicOffsetBuffer = false;
 	bool _needsTessCtlOutputBuffer = false;
 	bool _needsTessCtlPatchOutputBuffer = false;
 	bool _needsTessCtlInputBuffer = false;
 	bool _needsTessEvalSwizzleBuffer = false;
 	bool _needsTessEvalBufferSizeBuffer = false;
+	bool _needsTessEvalDynamicOffsetBuffer = false;
 	bool _needsFragmentSwizzleBuffer = false;
 	bool _needsFragmentBufferSizeBuffer = false;
+	bool _needsFragmentDynamicOffsetBuffer = false;
 	bool _needsFragmentViewRangeBuffer = false;
 };
 
@@ -369,6 +418,12 @@
 	/** Returns if this pipeline allows non-zero dispatch bases in vkCmdDispatchBase(). */
 	bool allowsDispatchBase() { return _allowsDispatchBase; }
 
+	/** Returns the MTLArgumentEncoder for the descriptor set. */
+	MVKMTLArgumentEncoder& getMTLArgumentEncoder(uint32_t descSetIndex, MVKShaderStage stage) override { return _mtlArgumentEncoders[descSetIndex]; }
+
+	/** Returns the array of descriptor binding use for the descriptor set. */
+	MVKBitArray& getDescriptorBindingUse(uint32_t descSetIndex, MVKShaderStage stage) override { return _descriptorBindingUse[descSetIndex]; }
+
 	/** Constructs an instance for the device and parent (which may be NULL). */
 	MVKComputePipeline(MVKDevice* device,
 					   MVKPipelineCache* pipelineCache,
@@ -381,9 +436,12 @@
     MVKMTLFunction getMTLFunction(const VkComputePipelineCreateInfo* pCreateInfo);
 
     id<MTLComputePipelineState> _mtlPipelineState;
+	MVKSmallVector<MVKMTLArgumentEncoder> _mtlArgumentEncoders;
+	MVKSmallVector<MVKBitArray> _descriptorBindingUse;
     MTLSize _mtlThreadgroupSize;
     bool _needsSwizzleBuffer = false;
     bool _needsBufferSizeBuffer = false;
+	bool _needsDynamicOffsetBuffer = false;
     bool _needsDispatchBaseBuffer = false;
     bool _allowsDispatchBase = false;
 };
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index 792d768..e9b8839 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -17,7 +17,6 @@
  */
 
 #include "MVKPipeline.h"
-#include <MoltenVKShaderConverter/SPIRVToMSLConverter.h>
 #include "MVKRenderPass.h"
 #include "MVKCommandBuffer.h"
 #include "MVKFoundation.h"
@@ -38,6 +37,7 @@
 
 // A null cmdEncoder can be passed to perform a validation pass
 void MVKPipelineLayout::bindDescriptorSets(MVKCommandEncoder* cmdEncoder,
+										   VkPipelineBindPoint pipelineBindPoint,
                                            MVKArrayRef<MVKDescriptorSet*> descriptorSets,
                                            uint32_t firstSet,
                                            MVKArrayRef<uint32_t> dynamicOffsets) {
@@ -48,7 +48,9 @@
 		MVKDescriptorSet* descSet = descriptorSets[dsIdx];
 		uint32_t dslIdx = firstSet + dsIdx;
 		MVKDescriptorSetLayout* dsl = _descriptorSetLayouts[dslIdx];
-		dsl->bindDescriptorSet(cmdEncoder, descSet, _dslMTLResourceIndexOffsets[dslIdx],
+		dsl->bindDescriptorSet(cmdEncoder, pipelineBindPoint,
+							   dslIdx, descSet,
+							   _dslMTLResourceIndexOffsets[dslIdx],
 							   dynamicOffsets, dynamicOffsetIndex);
 		if (!cmdEncoder) { setConfigurationResult(dsl->getConfigurationResult()); }
 	}
@@ -77,30 +79,27 @@
 
 void MVKPipelineLayout::populateShaderConverterContext(SPIRVToMSLConversionConfiguration& context) {
 	context.resourceBindings.clear();
+	context.discreteDescriptorSets.clear();
+	context.dynamicBufferDescriptors.clear();
 
     // Add resource bindings defined in the descriptor set layouts
-	uint32_t dslCnt = (uint32_t)_descriptorSetLayouts.size();
+	uint32_t dslCnt = getDescriptorSetCount();
 	for (uint32_t dslIdx = 0; dslIdx < dslCnt; dslIdx++) {
 		_descriptorSetLayouts[dslIdx]->populateShaderConverterContext(context,
 																	  _dslMTLResourceIndexOffsets[dslIdx],
 																	  dslIdx);
 	}
 
-	// Add any resource bindings used by push-constants
-	static const spv::ExecutionModel models[] = {
-		spv::ExecutionModelVertex,
-		spv::ExecutionModelTessellationControl,
-		spv::ExecutionModelTessellationEvaluation,
-		spv::ExecutionModelFragment,
-		spv::ExecutionModelGLCompute
-	};
-	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+	// Add any resource bindings used by push-constants.
+	// Use VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT descriptor type as compatible with push constants in Metal.
+	for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageMax; stage++) {
 		mvkPopulateShaderConverterContext(context,
-										  _pushConstantsMTLResourceIndexes.stages[i],
-										  models[i],
+										  _pushConstantsMTLResourceIndexes.stages[stage],
+										  MVKShaderStage(stage),
 										  kPushConstDescSet,
 										  kPushConstBinding,
 										  1,
+										  VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT,
 										  nullptr);
 	}
 }
@@ -112,19 +111,32 @@
     // corresponding DSL, and associating the current accumulated resource index offsets
     // with each DSL as it is added. The final accumulation of resource index offsets
     // becomes the resource index offsets that will be used for push contants.
+	// If we are using Metal argument buffers, reserve space for the Metal argument
+	// buffers themselves, and clear indexes of offsets used in Metal argument buffers,
+	// but still accumulate dynamic offset buffer indexes across descriptor sets.
 
     // According to the Vulkan spec, VkDescriptorSetLayout is intended to be consumed when passed
 	// to any Vulkan function, and may be safely destroyed by app immediately after. In order for
 	// this pipeline layout to retain the VkDescriptorSetLayout, the MVKDescriptorSetLayout
 	// instance is retained, so that it will live on here after it has been destroyed by the API.
 
-	_descriptorSetLayouts.reserve(pCreateInfo->setLayoutCount);
-	for (uint32_t i = 0; i < pCreateInfo->setLayoutCount; i++) {
+	uint32_t dslCnt = pCreateInfo->setLayoutCount;
+	_pushConstantsMTLResourceIndexes.addArgumentBuffers(dslCnt);
+
+	_descriptorSetLayouts.reserve(dslCnt);
+	for (uint32_t i = 0; i < dslCnt; i++) {
 		MVKDescriptorSetLayout* pDescSetLayout = (MVKDescriptorSetLayout*)pCreateInfo->pSetLayouts[i];
 		pDescSetLayout->retain();
 		_descriptorSetLayouts.push_back(pDescSetLayout);
-		_dslMTLResourceIndexOffsets.push_back(_pushConstantsMTLResourceIndexes);
-		_pushConstantsMTLResourceIndexes += pDescSetLayout->_mtlResourceCounts;
+
+		MVKShaderResourceBinding adjstdDSLRezOfsts = _pushConstantsMTLResourceIndexes;
+		MVKShaderResourceBinding adjstdDSLRezCnts = pDescSetLayout->_mtlResourceCounts;
+		if (pDescSetLayout->isUsingMetalArgumentBuffer()) {
+			adjstdDSLRezOfsts.clearArgumentBufferResources();
+			adjstdDSLRezCnts.clearArgumentBufferResources();
+		}
+		_dslMTLResourceIndexOffsets.push_back(adjstdDSLRezOfsts);
+		_pushConstantsMTLResourceIndexes += adjstdDSLRezCnts;
 	}
 
 	// Add push constants
@@ -135,13 +147,13 @@
 
 	// Set implicit buffer indices
 	// FIXME: Many of these are optional. We shouldn't set the ones that aren't
-	// present--or at least, we should move the ones that are down to avoid
-	// running over the limit of available buffers. But we can't know that
-	// until we compile the shaders.
+	// present--or at least, we should move the ones that are down to avoid running over
+	// the limit of available buffers. But we can't know that until we compile the shaders.
 	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
-		_swizzleBufferIndex.stages[i] = _pushConstantsMTLResourceIndexes.stages[i].bufferIndex + 1;
-		_bufferSizeBufferIndex.stages[i] = _swizzleBufferIndex.stages[i] + 1;
-		_indirectParamsIndex.stages[i] = _bufferSizeBufferIndex.stages[i] + 1;
+		_dynamicOffsetBufferIndex.stages[i] = _pushConstantsMTLResourceIndexes.stages[i].bufferIndex + 1;
+		_bufferSizeBufferIndex.stages[i] = _dynamicOffsetBufferIndex.stages[i] + 1;
+		_swizzleBufferIndex.stages[i] = _bufferSizeBufferIndex.stages[i] + 1;
+		_indirectParamsIndex.stages[i] = _swizzleBufferIndex.stages[i] + 1;
 		_outputBufferIndex.stages[i] = _indirectParamsIndex.stages[i] + 1;
 		if (i == kMVKShaderStageTessCtl) {
 			_tessCtlPatchOutputBufferIndex = _outputBufferIndex.stages[i] + 1;
@@ -170,11 +182,32 @@
 	}
 }
 
+// For each descriptor set, populate the descriptor bindings used by the shader for this stage,
+// and if Metal argument encoders must be dedicated to a pipeline stage, create the encoder here.
+template<typename CreateInfo>
+void MVKPipeline::addMTLArgumentEncoders(MVKMTLFunction& mvkMTLFunc,
+										 const CreateInfo* pCreateInfo,
+										 SPIRVToMSLConversionConfiguration& context,
+										 MVKShaderStage stage) {
+	if ( !isUsingMetalArgumentBuffers() ) { return; }
+
+	bool needMTLArgEnc = isUsingPipelineStageMetalArgumentBuffers();
+	auto mtlFunc = mvkMTLFunc.getMTLFunction();
+	for (uint32_t dsIdx = 0; dsIdx < _descriptorSetCount; dsIdx++) {
+		auto* dsLayout = ((MVKPipelineLayout*)pCreateInfo->layout)->getDescriptorSetLayout(dsIdx);
+		bool descSetIsUsed = dsLayout->populateBindingUse(getDescriptorBindingUse(dsIdx, stage), context, stage, dsIdx);
+		if (descSetIsUsed && needMTLArgEnc) {
+			getMTLArgumentEncoder(dsIdx, stage).init([mtlFunc newArgumentEncoderWithBufferIndex: dsIdx]);
+		}
+	}
+}
+
 MVKPipeline::MVKPipeline(MVKDevice* device, MVKPipelineCache* pipelineCache, MVKPipelineLayout* layout, MVKPipeline* parent) :
 	MVKVulkanAPIDeviceObject(device),
 	_pipelineCache(pipelineCache),
 	_pushConstantsMTLResourceIndexes(layout->getPushConstantBindings()),
-	_fullImageViewSwizzle(mvkConfig()->fullImageViewSwizzle) {}
+	_fullImageViewSwizzle(mvkConfig()->fullImageViewSwizzle),
+	_descriptorSetCount(layout->getDescriptorSetCount()) {}
 
 
 #pragma mark -
@@ -265,6 +298,7 @@
     }
     cmdEncoder->_graphicsResourcesState.bindSwizzleBuffer(_swizzleBufferIndex, _needsVertexSwizzleBuffer, _needsTessCtlSwizzleBuffer, _needsTessEvalSwizzleBuffer, _needsFragmentSwizzleBuffer);
     cmdEncoder->_graphicsResourcesState.bindBufferSizeBuffer(_bufferSizeBufferIndex, _needsVertexBufferSizeBuffer, _needsTessCtlBufferSizeBuffer, _needsTessEvalBufferSizeBuffer, _needsFragmentBufferSizeBuffer);
+	cmdEncoder->_graphicsResourcesState.bindDynamicOffsetBuffer(_dynamicOffsetBufferIndex, _needsVertexDynamicOffsetBuffer, _needsTessCtlDynamicOffsetBuffer, _needsTessEvalDynamicOffsetBuffer, _needsFragmentDynamicOffsetBuffer);
     cmdEncoder->_graphicsResourcesState.bindViewRangeBuffer(_viewRangeBufferIndex, _needsVertexViewRangeBuffer, _needsFragmentViewRangeBuffer);
 }
 
@@ -473,6 +507,10 @@
 	_mtlPipelineState = nil;
 	_mtlTessVertexStageDesc = nil;
 	for (uint32_t i = 0; i < 3; i++) { _mtlTessVertexFunctions[i] = nil; }
+
+	if (isUsingMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); }
+	if (isUsingPipelineStageMetalArgumentBuffers()) { _mtlArgumentEncoders.resize(_descriptorSetCount); }
+
 	if (!isTessellationPipeline()) {
 		MTLRenderPipelineDescriptor* plDesc = newMTLRenderPipelineDescriptor(pCreateInfo, reflectData);	// temp retain
 		if (plDesc) {
@@ -879,6 +917,7 @@
 	shaderContext.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageVertex];
 	shaderContext.options.mslOptions.shader_output_buffer_index = _outputBufferIndex.stages[kMVKShaderStageVertex];
 	shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageVertex];
+	shaderContext.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageVertex];
 	shaderContext.options.mslOptions.view_mask_buffer_index = _viewRangeBufferIndex.stages[kMVKShaderStageVertex];
 	shaderContext.options.mslOptions.capture_output_to_buffer = false;
 	shaderContext.options.mslOptions.disable_rasterization = isRasterizationDisabled(pCreateInfo);
@@ -896,9 +935,12 @@
 	plDesc.rasterizationEnabled = !funcRslts.isRasterizationDisabled;
 	_needsVertexSwizzleBuffer = funcRslts.needsSwizzleBuffer;
 	_needsVertexBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+	_needsVertexDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer;
 	_needsVertexViewRangeBuffer = funcRslts.needsViewRangeBuffer;
 	_needsVertexOutputBuffer = funcRslts.needsOutputBuffer;
 
+	addMTLArgumentEncoders(func, pCreateInfo, shaderContext, kMVKShaderStageVertex);
+
 	if (funcRslts.isRasterizationDisabled) {
 		_pFragmentSS = nullptr;
 	}
@@ -911,6 +953,10 @@
 	if (!verifyImplicitBuffer(_needsVertexBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageVertex, "buffer size", vbCnt)) {
 		return false;
 	}
+	// Ditto dynamic offset buffer.
+	if (!verifyImplicitBuffer(_needsVertexDynamicOffsetBuffer, _dynamicOffsetBufferIndex, kMVKShaderStageVertex, "dynamic offset", vbCnt)) {
+		return false;
+	}
 	// Ditto captured output buffer.
 	if (!verifyImplicitBuffer(_needsVertexOutputBuffer, _outputBufferIndex, kMVKShaderStageVertex, "output", vbCnt)) {
 		return false;
@@ -935,6 +981,7 @@
 	shaderContext.options.mslOptions.shader_index_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageVertex];
 	shaderContext.options.mslOptions.shader_output_buffer_index = _outputBufferIndex.stages[kMVKShaderStageVertex];
 	shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageVertex];
+	shaderContext.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageVertex];
 	shaderContext.options.mslOptions.capture_output_to_buffer = true;
 	shaderContext.options.mslOptions.vertex_for_tessellation = true;
 	shaderContext.options.mslOptions.disable_rasterization = true;
@@ -946,9 +993,10 @@
 		CompilerMSL::Options::IndexType::UInt32,
 	};
 	// We need to compile this function three times, with no indexing, 16-bit indices, and 32-bit indices.
+	MVKMTLFunction func;
 	for (uint32_t i = 0; i < sizeof(indexTypes)/sizeof(indexTypes[0]); i++) {
 		shaderContext.options.mslOptions.vertex_index_type = indexTypes[i];
-		MVKMTLFunction func = ((MVKShaderModule*)_pVertexSS->module)->getMTLFunction(&shaderContext, _pVertexSS->pSpecializationInfo, _pipelineCache);
+		func = ((MVKShaderModule*)_pVertexSS->module)->getMTLFunction(&shaderContext, _pVertexSS->pSpecializationInfo, _pipelineCache);
 		id<MTLFunction> mtlFunc = func.getMTLFunction();
 		if ( !mtlFunc ) {
 			setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Vertex shader function could not be compiled into pipeline. See previous logged error."));
@@ -959,9 +1007,12 @@
 		auto& funcRslts = func.shaderConversionResults;
 		_needsVertexSwizzleBuffer = funcRslts.needsSwizzleBuffer;
 		_needsVertexBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+		_needsVertexDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer;
 		_needsVertexOutputBuffer = funcRslts.needsOutputBuffer;
 	}
 
+	addMTLArgumentEncoders(func, pCreateInfo, shaderContext, kMVKShaderStageVertex);
+
 	// If we need the swizzle buffer and there's no place to put it, we're in serious trouble.
 	if (!verifyImplicitBuffer(_needsVertexSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageVertex, "swizzle", vbCnt)) {
 		return false;
@@ -970,6 +1021,10 @@
 	if (!verifyImplicitBuffer(_needsVertexBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageVertex, "buffer size", vbCnt)) {
 		return false;
 	}
+	// Ditto dynamic offset buffer.
+	if (!verifyImplicitBuffer(_needsVertexDynamicOffsetBuffer, _dynamicOffsetBufferIndex, kMVKShaderStageVertex, "dynamic offset", vbCnt)) {
+		return false;
+	}
 	// Ditto captured output buffer.
 	if (!verifyImplicitBuffer(_needsVertexOutputBuffer, _outputBufferIndex, kMVKShaderStageVertex, "output", vbCnt)) {
 		return false;
@@ -993,6 +1048,7 @@
 	shaderContext.options.mslOptions.shader_patch_output_buffer_index = _tessCtlPatchOutputBufferIndex;
 	shaderContext.options.mslOptions.shader_tess_factor_buffer_index = _tessCtlLevelBufferIndex;
 	shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageTessCtl];
+	shaderContext.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageTessCtl];
 	shaderContext.options.mslOptions.capture_output_to_buffer = true;
 	shaderContext.options.mslOptions.multi_patch_workgroup = true;
 	shaderContext.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(_pTessCtlSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize;
@@ -1009,16 +1065,22 @@
 	auto& funcRslts = func.shaderConversionResults;
 	_needsTessCtlSwizzleBuffer = funcRslts.needsSwizzleBuffer;
 	_needsTessCtlBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+	_needsTessCtlDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer;
 	_needsTessCtlOutputBuffer = funcRslts.needsOutputBuffer;
 	_needsTessCtlPatchOutputBuffer = funcRslts.needsPatchOutputBuffer;
 	_needsTessCtlInputBuffer = funcRslts.needsInputThreadgroupMem;
 
+	addMTLArgumentEncoders(func, pCreateInfo, shaderContext, kMVKShaderStageTessCtl);
+
 	if (!verifyImplicitBuffer(_needsTessCtlSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageTessCtl, "swizzle", kMVKTessCtlNumReservedBuffers)) {
 		return false;
 	}
 	if (!verifyImplicitBuffer(_needsTessCtlBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageTessCtl, "buffer size", kMVKTessCtlNumReservedBuffers)) {
 		return false;
 	}
+	if (!verifyImplicitBuffer(_needsTessCtlDynamicOffsetBuffer, _dynamicOffsetBufferIndex, kMVKShaderStageTessCtl, "dynamic offset", kMVKTessCtlNumReservedBuffers)) {
+		return false;
+	}
 	if (!verifyImplicitBuffer(true, _indirectParamsIndex, kMVKShaderStageTessCtl, "indirect parameters", kMVKTessCtlNumReservedBuffers)) {
 		return false;
 	}
@@ -1044,6 +1106,7 @@
 	shaderContext.options.entryPointName = _pTessEvalSS->pName;
 	shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageTessEval];
 	shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageTessEval];
+	shaderContext.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageTessEval];
 	shaderContext.options.mslOptions.capture_output_to_buffer = false;
 	shaderContext.options.mslOptions.disable_rasterization = isRasterizationDisabled(pCreateInfo);
 	addPrevStageOutputToShaderConverterContext(shaderContext, tcOutputs);
@@ -1061,6 +1124,9 @@
 	plDesc.rasterizationEnabled = !funcRslts.isRasterizationDisabled;
 	_needsTessEvalSwizzleBuffer = funcRslts.needsSwizzleBuffer;
 	_needsTessEvalBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+	_needsTessEvalDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer;
+
+	addMTLArgumentEncoders(func, pCreateInfo, shaderContext, kMVKShaderStageTessEval);
 
 	if (funcRslts.isRasterizationDisabled) {
 		_pFragmentSS = nullptr;
@@ -1072,6 +1138,9 @@
 	if (!verifyImplicitBuffer(_needsTessEvalBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageTessEval, "buffer size", kMVKTessEvalNumReservedBuffers)) {
 		return false;
 	}
+	if (!verifyImplicitBuffer(_needsTessEvalDynamicOffsetBuffer, _dynamicOffsetBufferIndex, kMVKShaderStageTessEval, "dynamic offset", kMVKTessEvalNumReservedBuffers)) {
+		return false;
+	}
 	return true;
 }
 
@@ -1083,6 +1152,7 @@
 		shaderContext.options.entryPointStage = spv::ExecutionModelFragment;
 		shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageFragment];
 		shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageFragment];
+		shaderContext.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageFragment];
 		shaderContext.options.mslOptions.view_mask_buffer_index = _viewRangeBufferIndex.stages[kMVKShaderStageFragment];
 		shaderContext.options.entryPointName = _pFragmentSS->pName;
 		shaderContext.options.mslOptions.capture_output_to_buffer = false;
@@ -1109,13 +1179,20 @@
 		auto& funcRslts = func.shaderConversionResults;
 		_needsFragmentSwizzleBuffer = funcRslts.needsSwizzleBuffer;
 		_needsFragmentBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+		_needsFragmentDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer;
 		_needsFragmentViewRangeBuffer = funcRslts.needsViewRangeBuffer;
+
+		addMTLArgumentEncoders(func, pCreateInfo, shaderContext, kMVKShaderStageFragment);
+
 		if (!verifyImplicitBuffer(_needsFragmentSwizzleBuffer, _swizzleBufferIndex, kMVKShaderStageFragment, "swizzle", 0)) {
 			return false;
 		}
 		if (!verifyImplicitBuffer(_needsFragmentBufferSizeBuffer, _bufferSizeBufferIndex, kMVKShaderStageFragment, "buffer size", 0)) {
 			return false;
 		}
+		if (!verifyImplicitBuffer(_needsFragmentDynamicOffsetBuffer, _dynamicOffsetBufferIndex, kMVKShaderStageFragment, "dynamic offset", 0)) {
+			return false;
+		}
 		if (!verifyImplicitBuffer(_needsFragmentViewRangeBuffer, _viewRangeBufferIndex, kMVKShaderStageFragment, "view range", 0)) {
 			return false;
 		}
@@ -1448,10 +1525,16 @@
     shaderContext.options.mslOptions.r32ui_linear_texture_alignment = (uint32_t)_device->getVkFormatTexelBufferAlignment(VK_FORMAT_R32_UINT, this);
 	shaderContext.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers;
 
+	bool useMetalArgBuff = isUsingMetalArgumentBuffers();
+	shaderContext.options.mslOptions.argument_buffers = useMetalArgBuff;
+	shaderContext.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff;
+	shaderContext.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff;
+
     MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout;
     layout->populateShaderConverterContext(shaderContext);
     _swizzleBufferIndex = layout->getSwizzleBufferIndex();
     _bufferSizeBufferIndex = layout->getBufferSizeBufferIndex();
+	_dynamicOffsetBufferIndex = layout->getDynamicOffsetBufferIndex();
     _indirectParamsIndex = layout->getIndirectParamsIndex();
     _outputBufferIndex = layout->getOutputBufferIndex();
     _tessCtlPatchOutputBufferIndex = layout->getTessCtlPatchOutputBufferIndex();
@@ -1627,6 +1710,7 @@
     cmdEncoder->_mtlThreadgroupSize = _mtlThreadgroupSize;
 	cmdEncoder->_computeResourcesState.bindSwizzleBuffer(_swizzleBufferIndex, _needsSwizzleBuffer);
 	cmdEncoder->_computeResourcesState.bindBufferSizeBuffer(_bufferSizeBufferIndex, _needsBufferSizeBuffer);
+	cmdEncoder->_computeResourcesState.bindDynamicOffsetBuffer(_dynamicOffsetBufferIndex, _needsDynamicOffsetBuffer);
 }
 
 MVKComputePipeline::MVKComputePipeline(MVKDevice* device,
@@ -1637,6 +1721,9 @@
 
 	_allowsDispatchBase = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_DISPATCH_BASE_BIT);
 
+	if (isUsingMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); }
+	if (isUsingPipelineStageMetalArgumentBuffers()) { _mtlArgumentEncoders.resize(_descriptorSetCount); }
+
 	MVKMTLFunction func = getMTLFunction(pCreateInfo);
 	_mtlThreadgroupSize = func.threadGroupSize;
 	_mtlPipelineState = nil;
@@ -1669,6 +1756,9 @@
 	if (_needsBufferSizeBuffer && _bufferSizeBufferIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
 		setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires buffer size buffer, but there is no free slot to pass it."));
 	}
+	if (_needsDynamicOffsetBuffer && _dynamicOffsetBufferIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
+		setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires dynamic offset buffer, but there is no free slot to pass it."));
+	}
 	if (_needsDispatchBaseBuffer && _indirectParamsIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
 		setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires dispatch base buffer, but there is no free slot to pass it."));
 	}
@@ -1691,6 +1781,12 @@
 	shaderContext.options.mslOptions.dispatch_base = _allowsDispatchBase;
 	shaderContext.options.mslOptions.texture_1D_as_2D = mvkConfig()->texture1DAs2D;
     shaderContext.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize;
+
+	bool useMetalArgBuff = isUsingMetalArgumentBuffers();
+	shaderContext.options.mslOptions.argument_buffers = useMetalArgBuff;
+	shaderContext.options.mslOptions.force_active_argument_buffer_resources = useMetalArgBuff;
+	shaderContext.options.mslOptions.pad_argument_buffer_resources = useMetalArgBuff;
+
 #if MVK_MACOS
     shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
 #endif
@@ -1703,8 +1799,10 @@
     layout->populateShaderConverterContext(shaderContext);
     _swizzleBufferIndex = layout->getSwizzleBufferIndex();
     _bufferSizeBufferIndex = layout->getBufferSizeBufferIndex();
+	_dynamicOffsetBufferIndex = layout->getDynamicOffsetBufferIndex();
     shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageCompute];
     shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageCompute];
+	shaderContext.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageCompute];
     shaderContext.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageCompute];
 
     MVKMTLFunction func = ((MVKShaderModule*)pSS->module)->getMTLFunction(&shaderContext, pSS->pSpecializationInfo, _pipelineCache);
@@ -1712,8 +1810,11 @@
 	auto& funcRslts = func.shaderConversionResults;
 	_needsSwizzleBuffer = funcRslts.needsSwizzleBuffer;
     _needsBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+	_needsDynamicOffsetBuffer = funcRslts.needsDynamicOffsetBuffer;
     _needsDispatchBaseBuffer = funcRslts.needsDispatchBaseBuffer;
 
+	addMTLArgumentEncoders(func, pCreateInfo, shaderContext, kMVKShaderStageCompute);
+
 	return func;
 }
 
@@ -1998,6 +2099,7 @@
 				opt.enable_decoration_binding,
 				opt.texture_buffer_native,
 				opt.force_active_argument_buffer_resources,
+				opt.pad_argument_buffer_resources,
 				opt.force_native_arrays,
 				opt.enable_clip_distance_user_varying,
 				opt.multi_patch_workgroup,
@@ -2020,6 +2122,7 @@
 	template<class Archive>
 	void serialize(Archive & archive, MSLResourceBinding& rb) {
 		archive(rb.stage,
+				rb.basetype,
 				rb.desc_set,
 				rb.binding,
 				rb.count,
@@ -2091,7 +2194,7 @@
 	void serialize(Archive & archive, MSLShaderInput& si) {
 		archive(si.shaderInput,
 				si.binding,
-				si.isUsedByShader);
+				si.outIsUsedByShader);
 	}
 
 	template<class Archive>
@@ -2099,14 +2202,23 @@
 		archive(rb.resourceBinding,
 				rb.constExprSampler,
 				rb.requiresConstExprSampler,
-				rb.isUsedByShader);
+				rb.outIsUsedByShader);
+	}
+
+	template<class Archive>
+	void serialize(Archive & archive, DescriptorBinding& db) {
+		archive(db.stage,
+				db.descriptorSet,
+				db.binding,
+				db.index);
 	}
 
 	template<class Archive>
 	void serialize(Archive & archive, SPIRVToMSLConversionConfiguration& ctx) {
 		archive(ctx.options,
 				ctx.shaderInputs,
-				ctx.resourceBindings);
+				ctx.resourceBindings,
+				ctx.discreteDescriptorSets);
 	}
 
 	template<class Archive>
@@ -2118,6 +2230,7 @@
 				scr.needsOutputBuffer,
 				scr.needsPatchOutputBuffer,
 				scr.needsBufferSizeBuffer,
+				scr.needsDynamicOffsetBuffer,
 				scr.needsInputThreadgroupMem,
 				scr.needsDispatchBaseBuffer,
 				scr.needsViewRangeBuffer);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
index 7b45d55..1fa007e 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
@@ -46,10 +46,12 @@
 
 	MVKMTLFunction(id<MTLFunction> mtlFunc, const SPIRVToMSLConversionResults scRslts, MTLSize tgSize);
 	MVKMTLFunction(const MVKMTLFunction& other);
+	MVKMTLFunction& operator=(const MVKMTLFunction& other);
+	MVKMTLFunction() {}
 	~MVKMTLFunction();
 
 private:
-	id<MTLFunction> _mtlFunction;
+	id<MTLFunction> _mtlFunction = nil;
 
 } MVKMTLFunction;
 
@@ -92,9 +94,10 @@
 					 const void* mslCompiledCodeData,
 					 size_t mslCompiledCodeLength);
 
-	/** Copy constructor. */
 	MVKShaderLibrary(const MVKShaderLibrary& other);
 
+	MVKShaderLibrary& operator=(const MVKShaderLibrary& other);
+
 	~MVKShaderLibrary() override;
 
 protected:
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
index dead568..8fa6594 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
@@ -36,6 +36,14 @@
 	threadGroupSize = other.threadGroupSize;
 }
 
+MVKMTLFunction& MVKMTLFunction::operator=(const MVKMTLFunction& other) {
+	[_mtlFunction release];
+	_mtlFunction = [other._mtlFunction retain];		// retained
+	shaderConversionResults = other.shaderConversionResults;
+	threadGroupSize = other.threadGroupSize;
+	return *this;
+}
+
 MVKMTLFunction::~MVKMTLFunction() {
 	[_mtlFunction release];
 }
@@ -163,12 +171,22 @@
     mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslLoad, startTime);
 }
 
-MVKShaderLibrary::MVKShaderLibrary(const MVKShaderLibrary& other) : _owner(other._owner) {
+MVKShaderLibrary::MVKShaderLibrary(const MVKShaderLibrary& other) {
+	_owner = other._owner;
 	_mtlLibrary = [other._mtlLibrary retain];
 	_shaderConversionResults = other._shaderConversionResults;
 	_msl = other._msl;
 }
 
+MVKShaderLibrary& MVKShaderLibrary::operator=(const MVKShaderLibrary& other) {
+	[_mtlLibrary release];
+	_owner = other._owner;
+	_mtlLibrary = [other._mtlLibrary retain];
+	_shaderConversionResults = other._shaderConversionResults;
+	_msl = other._msl;
+	return *this;
+}
+
 // If err object is nil, the compilation succeeded without any warnings.
 // If err object exists, and the MTLLibrary was created, the compilation succeeded, but with warnings.
 // If err object exists, and the MTLLibrary was not created, the compilation failed.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.h b/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.h
index 98d5d68..cbb0219 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.h
@@ -96,7 +96,10 @@
 	MVKVulkanAPIObject() : _refCount(1) {}
 
 	/** Default copy constructor disallowed due to mutex. Copy starts with fresh reference counts. */
-	MVKVulkanAPIObject(const MVKVulkanAPIObject& other) : _refCount(1) {}
+	MVKVulkanAPIObject(const MVKVulkanAPIObject& other);
+
+	/** Default copy assignment disallowed due to mutex. Copy starts with fresh reference counts. */
+	MVKVulkanAPIObject& operator=(const MVKVulkanAPIObject& other);
 
 	~MVKVulkanAPIObject() override;
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.mm b/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.mm
index 07971cd..5df32e1 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKVulkanAPIObject.mm
@@ -61,6 +61,18 @@
 	}
 }
 
+MVKVulkanAPIObject::MVKVulkanAPIObject(const MVKVulkanAPIObject& other) {
+	_refCount = 1;
+	_debugName = [other._debugName retain];
+}
+
+MVKVulkanAPIObject& MVKVulkanAPIObject::operator=(const MVKVulkanAPIObject& other) {
+	[_debugName release];
+	_refCount = 1;
+	_debugName = [other._debugName retain];
+	return *this;
+}
+
 MVKVulkanAPIObject::~MVKVulkanAPIObject() {
 	[_debugName release];
 }
diff --git a/MoltenVK/MoltenVK/Utility/MVKBitArray.h b/MoltenVK/MoltenVK/Utility/MVKBitArray.h
index 4016547..719464e 100755
--- a/MoltenVK/MoltenVK/Utility/MVKBitArray.h
+++ b/MoltenVK/MoltenVK/Utility/MVKBitArray.h
@@ -28,37 +28,43 @@
 class MVKBitArray {

 

 	static constexpr size_t SectionMaskSize = 6;	// 64 bits

-	static constexpr size_t SectionBitCount = 1U << SectionMaskSize;

+	static constexpr size_t SectionBitCount = (size_t)1U << SectionMaskSize;

 	static constexpr size_t SectionByteCount = SectionBitCount / 8;

 	static constexpr uint64_t SectionMask = SectionBitCount - 1;

 

 public:

 

-	/** Returns the value of the bit. */

-	inline bool getBit(size_t bitIndex) {

-		return mvkIsAnyFlagEnabled(_pSections[getIndexOfSection(bitIndex)], getSectionSetMask(bitIndex));

+	/**

+	 * Returns the value of the bit, and optionally clears that bit if it was set.

+	 * Returns false if the bitIndex is beyond the size of this array, returns false.

+	 */

+	bool getBit(size_t bitIndex, bool shouldClear = false) {

+		if (bitIndex >= _bitCount) { return false; }

+		bool val = mvkIsAnyFlagEnabled(getSection(getIndexOfSection(bitIndex)), getSectionSetMask(bitIndex));

+		if (shouldClear && val) { clearBit(bitIndex); }

+		return val;

 	}

 

 	/** Sets the value of the bit to the val (or to 1 by default). */

-	inline void setBit(size_t bitIndex, bool val = true) {

+	void setBit(size_t bitIndex, bool val = true) {

 		size_t secIdx = getIndexOfSection(bitIndex);

 		if (val) {

-			mvkEnableFlags(_pSections[secIdx], getSectionSetMask(bitIndex));

+			mvkEnableFlags(getSection(secIdx), getSectionSetMask(bitIndex));

 			if (secIdx < _minUnclearedSectionIndex) { _minUnclearedSectionIndex = secIdx; }

 		} else {

-			mvkDisableFlags(_pSections[secIdx], getSectionSetMask(bitIndex));

-			if (secIdx == _minUnclearedSectionIndex && !_pSections[secIdx]) { _minUnclearedSectionIndex++; }

+			mvkDisableFlags(getSection(secIdx), getSectionSetMask(bitIndex));

+			if (secIdx == _minUnclearedSectionIndex && !getSection(secIdx)) { _minUnclearedSectionIndex++; }

 		}

 	}

 

 	/** Sets the value of the bit to 0. */

-	inline void clearBit(size_t bitIndex) { setBit(bitIndex, false); }

+	void clearBit(size_t bitIndex) { setBit(bitIndex, false); }

 

 	/** Sets all bits in the array to 1. */

-	inline void setAllBits() { setAllSections(~0); }

+	void setAllBits() { setAllSections(~0); }

 

 	/** Clears all bits in the array to 0. */

-	inline void clearAllBits() { setAllSections(0); }

+	void clearAllBits() { setAllSections(0); }

 

 	/**

 	 * Returns the index of the first bit that is set, at or after the specified index,

@@ -69,10 +75,10 @@
 		size_t bitIdx = startSecIdx << SectionMaskSize;

 		size_t secCnt = getSectionCount();

 		for (size_t secIdx = startSecIdx; secIdx < secCnt; secIdx++) {

-			size_t lclBitIdx = getIndexOfFirstSetBitInSection(_pSections[secIdx], getBitIndexInSection(startIndex));

+			size_t lclBitIdx = getIndexOfFirstSetBitInSection(getSection(secIdx), getBitIndexInSection(startIndex));

 			bitIdx += lclBitIdx;

 			if (lclBitIdx < SectionBitCount) {

-				if (startSecIdx == _minUnclearedSectionIndex && !_pSections[startSecIdx]) { _minUnclearedSectionIndex = secIdx; }

+				if (startSecIdx == _minUnclearedSectionIndex && !getSection(startSecIdx)) { _minUnclearedSectionIndex = secIdx; }

 				if (shouldClear) { clearBit(bitIdx); }

 				return bitIdx;

 			}

@@ -84,7 +90,7 @@
 	 * Returns the index of the first bit that is set, at or after the specified index.

 	 * If no bits are set, returns the size() of this bit array.

 	 */

-	inline size_t getIndexOfFirstSetBit(size_t startIndex) {

+	size_t getIndexOfFirstSetBit(size_t startIndex) {

 		return getIndexOfFirstSetBit(startIndex, false);

 	}

 

@@ -92,7 +98,7 @@
 	 * Returns the index of the first bit that is set and optionally clears that bit.

 	 * If no bits are set, returns the size() of this bit array.

 	 */

-	inline size_t getIndexOfFirstSetBit(bool shouldClear) {

+	size_t getIndexOfFirstSetBit(bool shouldClear) {

 		return getIndexOfFirstSetBit(0, shouldClear);

 	}

 

@@ -100,57 +106,139 @@
 	 * Returns the index of the first bit that is set.

 	 * If no bits are set, returns the size() of this bit array.

 	 */

-	inline size_t getIndexOfFirstSetBit() {

+	size_t getIndexOfFirstSetBit() {

 		return getIndexOfFirstSetBit(0, false);

 	}

 

+	/**

+	 * Enumerates the bits, executing a custom function on each bit that is enabled.

+	 *

+	 * The function to execute is passed a bitIndex parameter which indicates

+	 * the index of the bit for which the function is executing.

+	 *

+	 * The custom function should return true to continue processing further bits, or false

+	 * to stop processing further bits. This function returns false if any of the invocations

+	 * of the custom function halted further invocations, and returns true otherwise.

+	 *

+	 * If shouldClear is true, each enabled bit is cleared before the custom function executes.

+	 */

+	bool enumerateEnabledBits(bool shouldClear, std::function<bool(size_t bitIndex)> func) {

+		for (size_t bitIdx = getIndexOfFirstSetBit(shouldClear);

+			 bitIdx < _bitCount;

+			 getIndexOfFirstSetBit(++bitIdx, shouldClear)) {

+

+			if ( !func(bitIdx) ) { return false; }

+		}

+		return true;

+	}

+

 	/** Returns the number of bits in this array. */

-	inline size_t size() { return _bitCount; }

+	size_t size() const { return _bitCount; }

 

 	/** Returns whether this array is empty. */

-	inline bool empty() { return !_bitCount; }

+	bool empty() const { return !_bitCount; }

 

-	/** Resize this array to the specified number of bits, and sets the initial value of all the bits. */

-	inline void resize(size_t size = 0, bool val = false) {

-		free(_pSections);

+	/**

+	 * Resize this array to the specified number of bits.

+	 *

+	 * The value of existing bits that fit within the new size are retained, and any

+	 * new bits that are added to accommodate the new size are set to the given value.

+	 *

+	 * If the new size is larger than the existing size, new memory may be allocated.

+	 * If the new size is less than the existing size, consumed memory is retained

+	 * unless the size is set to zero.

+	 */

+	void resize(size_t size, bool val = false) {

+		size_t oldBitCnt = _bitCount;

+		size_t oldSecCnt = getSectionCount();

+		size_t oldEndBitCnt = oldSecCnt << SectionMaskSize;

+

+		// Some magic here. If we need only one section, _data is used as that section,

+		// and it will be stomped on if we reallocate, so we cache it here.

+		uint64_t* oldData = _data;

+		uint64_t* pOldData = oldSecCnt > 1 ? oldData : (uint64_t*)&oldData;

 

 		_bitCount = size;

-		_pSections = _bitCount ? (uint64_t*)malloc(getSectionCount() * SectionByteCount) : nullptr;

-		if (val) {

-			setAllBits();

-		} else {

-			clearAllBits();

+

+		size_t newSecCnt = getSectionCount();

+		if (newSecCnt == 0) {

+			// Clear out the existing data

+			if (oldSecCnt > 1) { free(pOldData); }

+			_data = 0;

+			_minUnclearedSectionIndex = 0;

+		} else if (newSecCnt == oldSecCnt) {

+			// Keep the existing data, but fill any bits in the last section

+			// that were beyond the old bit count with the new initial value.

+			for (size_t bitIdx = oldBitCnt; bitIdx < oldEndBitCnt; bitIdx++) { setBit(bitIdx, val); }

+		} else if (newSecCnt > oldSecCnt) {

+			size_t oldByteCnt = oldSecCnt * SectionByteCount;

+			size_t newByteCnt = newSecCnt * SectionByteCount;

+

+			// If needed, allocate new memory.

+			if (newSecCnt > 1) { _data = (uint64_t*)malloc(newByteCnt); }

+

+			// Fill the new memory with the new initial value, copy the old contents to

+			// the new memory, fill any bits in the old last section that were beyond

+			// the old bit count with the new initial value, and remove the old memory.

+			uint64_t* pNewData = getData();

+			memset(pNewData, val ? ~0 : 0, newByteCnt);

+			memcpy(pNewData, pOldData, oldByteCnt);

+			for (size_t bitIdx = oldBitCnt; bitIdx < oldEndBitCnt; bitIdx++) { setBit(bitIdx, val); }

+			if (oldSecCnt > 1) { free(pOldData); }

+

+			// If the entire old array and the new array are cleared, move the uncleared indicator to the new end.

+			if (_minUnclearedSectionIndex == oldSecCnt && !val) { _minUnclearedSectionIndex = newSecCnt; }

 		}

 	}

 

 	/** Constructs an instance for the specified number of bits, and sets the initial value of all the bits. */

-	MVKBitArray(size_t size = 0, bool val = false) {

-		_pSections = nullptr;

-		resize(size, val);

+	MVKBitArray(size_t size = 0, bool val = false) { resize(size, val); }

+

+	MVKBitArray(const MVKBitArray& other) {

+		resize(other._bitCount);

+		memcpy(getData(), other.getData(), getSectionCount() * SectionByteCount);

 	}

 

-	~MVKBitArray() { free(_pSections); }

+	MVKBitArray& operator=(const MVKBitArray& other) {

+		resize(0);

+		resize(other._bitCount);

+		memcpy(getData(), other.getData(), getSectionCount() * SectionByteCount);

+		return *this;

+	}

+

+	~MVKBitArray() { resize(0); }

 

 protected:

 

+	// Returns a pointer do the data.

+	// Some magic here. If we need only one section, _data is used as that section.

+	uint64_t* getData() const {

+		return getSectionCount() > 1 ? _data : (uint64_t*)&_data;

+	}

+

+	// Returns a reference to the section.

+	uint64_t& getSection(size_t secIdx) {

+		return getData()[secIdx];

+	}

+

 	// Returns the number of sections.

-	inline size_t getSectionCount() {

+	size_t getSectionCount() const {

 		return _bitCount ? getIndexOfSection(_bitCount - 1) + 1 : 0;

 	}

 

 	// Returns the index of the section that contains the specified bit.

-	static inline size_t getIndexOfSection(size_t bitIndex) {

+	static size_t getIndexOfSection(size_t bitIndex) {

 		return bitIndex >> SectionMaskSize;

 	}

 

 	// Converts the bit index to a local bit index within a section, and returns that local bit index.

-	static inline size_t getBitIndexInSection(size_t bitIndex) {

+	static size_t getBitIndexInSection(size_t bitIndex) {

 		return bitIndex & SectionMask;

 	}

 

 	// Returns a section mask containing a single 1 value in the bit in the section that

 	// corresponds to the specified global bit index, and 0 values in all other bits.

-	static inline uint64_t getSectionSetMask(size_t bitIndex) {

+	static uint64_t getSectionSetMask(size_t bitIndex) {

 		return (uint64_t)1U << ((SectionBitCount - 1) - getBitIndexInSection(bitIndex));

 	}

 

@@ -168,12 +256,12 @@
 	void setAllSections(uint64_t sectionValue) {

 		size_t secCnt = getSectionCount();

 		for (size_t secIdx = 0; secIdx < secCnt; secIdx++) {

-			_pSections[secIdx] = sectionValue;

+			getSection(secIdx) = sectionValue;

 		}

 		_minUnclearedSectionIndex = sectionValue ? 0 : secCnt;

 	}

 

-	uint64_t* _pSections;

-	size_t _bitCount;

-	size_t _minUnclearedSectionIndex;	// Tracks where to start looking for bits that are set

+	uint64_t* _data = 0;

+	size_t _bitCount = 0;

+	size_t _minUnclearedSectionIndex = 0;	// Tracks where to start looking for bits that are set

 };

diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
index cd1f956..c3b892a 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
@@ -61,6 +61,7 @@
 	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.apiVersionToAdvertise,                  MVK_CONFIG_API_VERSION_TO_ADVERTISE);
 	MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.advertiseExtensions,                    MVK_CONFIG_ADVERTISE_EXTENSIONS);
 	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.resumeLostDevice,                       MVK_CONFIG_RESUME_LOST_DEVICE);
+	MVK_SET_FROM_ENV_OR_BUILD_BOOL  (evCfg.useMetalArgumentBuffers,                MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS);
 
 	mvkSetConfig(&evCfg);
 }
diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
index 036dbbd..5f6c821 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
@@ -277,3 +277,8 @@
 #ifndef MVK_CONFIG_RESUME_LOST_DEVICE
 #   define MVK_CONFIG_RESUME_LOST_DEVICE    0
 #endif
+
+/** Support Metal argument buffers. Disabled by default. */
+#ifndef MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS
+#   define MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS    0
+#endif
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVReflection.h b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVReflection.h
index 5d851da..a9305ee 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVReflection.h
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVReflection.h
@@ -26,6 +26,7 @@
 #include <string>
 #include <vector>
 
+
 namespace mvk {
 
 #pragma mark -
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
index 996374a..7142113 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
@@ -32,6 +32,13 @@
 #pragma mark -
 #pragma mark SPIRVToMSLConversionConfiguration
 
+// Returns whether the container contains an item equal to the value.
+template<class C, class T>
+bool contains(const C& container, const T& val) {
+	for (const T& cVal : container) { if (cVal == val) { return true; } }
+	return false;
+}
+
 // Returns whether the vector contains the value (using a matches(T&) comparison member function). */
 template<class T>
 bool containsMatching(const vector<T>& vec, const T& val) {
@@ -100,13 +107,13 @@
 
 MVK_PUBLIC_SYMBOL bool mvk::MSLResourceBinding::matches(const MSLResourceBinding& other) const {
 	if (resourceBinding.stage != other.resourceBinding.stage) { return false; }
+	if (resourceBinding.basetype != other.resourceBinding.basetype) { return false; }
 	if (resourceBinding.desc_set != other.resourceBinding.desc_set) { return false; }
 	if (resourceBinding.binding != other.resourceBinding.binding) { return false; }
 	if (resourceBinding.count != other.resourceBinding.count) { return false; }
 	if (resourceBinding.msl_buffer != other.resourceBinding.msl_buffer) { return false; }
 	if (resourceBinding.msl_texture != other.resourceBinding.msl_texture) { return false; }
 	if (resourceBinding.msl_sampler != other.resourceBinding.msl_sampler) { return false; }
-
 	if (requiresConstExprSampler != other.requiresConstExprSampler) { return false; }
 
 	// If requiresConstExprSampler is false, constExprSampler can be ignored
@@ -144,6 +151,14 @@
 	return true;
 }
 
+MVK_PUBLIC_SYMBOL bool mvk::DescriptorBinding::matches(const mvk::DescriptorBinding& other) const {
+	if (stage != other.stage) { return false; }
+	if (descriptorSet != other.descriptorSet) { return false; }
+	if (binding != other.binding) { return false; }
+	if (index != other.index) { return false; }
+	return true;
+}
+
 MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::stageSupportsVertexAttributes() const {
 	return (options.entryPointStage == ExecutionModelVertex ||
 			options.entryPointStage == ExecutionModelTessellationControl ||
@@ -153,7 +168,7 @@
 // Check them all in case inactive VA's duplicate locations used by active VA's.
 MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::isShaderInputLocationUsed(uint32_t location) const {
     for (auto& si : shaderInputs) {
-        if ((si.shaderInput.location == location) && si.isUsedByShader) { return true; }
+        if ((si.shaderInput.location == location) && si.outIsUsedByShader) { return true; }
     }
     return false;
 }
@@ -161,14 +176,24 @@
 MVK_PUBLIC_SYMBOL uint32_t SPIRVToMSLConversionConfiguration::countShaderInputsAt(uint32_t binding) const {
 	uint32_t siCnt = 0;
 	for (auto& si : shaderInputs) {
-		if ((si.binding == binding) && si.isUsedByShader) { siCnt++; }
+		if ((si.binding == binding) && si.outIsUsedByShader) { siCnt++; }
 	}
 	return siCnt;
 }
 
+MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::isResourceUsed(ExecutionModel stage, uint32_t descSet, uint32_t binding) const {
+	for (auto& rb : resourceBindings) {
+		auto& rbb = rb.resourceBinding;
+		if (rbb.stage == stage && rbb.desc_set == descSet && rbb.binding == binding) {
+			return rb.outIsUsedByShader;
+		}
+	}
+	return false;
+}
+
 MVK_PUBLIC_SYMBOL void SPIRVToMSLConversionConfiguration::markAllInputsAndResourcesUsed() {
-	for (auto& si : shaderInputs) { si.isUsedByShader = true; }
-	for (auto& rb : resourceBindings) { rb.isUsedByShader = true; }
+	for (auto& si : shaderInputs) { si.outIsUsedByShader = true; }
+	for (auto& rb : resourceBindings) { rb.outIsUsedByShader = true; }
 }
 
 MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::matches(const SPIRVToMSLConversionConfiguration& other) const {
@@ -176,13 +201,21 @@
     if ( !options.matches(other.options) ) { return false; }
 
 	for (const auto& si : shaderInputs) {
-		if (si.isUsedByShader && !containsMatching(other.shaderInputs, si)) { return false; }
+		if (si.outIsUsedByShader && !containsMatching(other.shaderInputs, si)) { return false; }
 	}
 
     for (const auto& rb : resourceBindings) {
-        if (rb.isUsedByShader && !containsMatching(other.resourceBindings, rb)) { return false; }
+        if (rb.outIsUsedByShader && !containsMatching(other.resourceBindings, rb)) { return false; }
     }
 
+	for (uint32_t dsIdx : discreteDescriptorSets) {
+		if ( !contains(other.discreteDescriptorSets, dsIdx)) { return false; }
+	}
+
+	for (const auto& db : dynamicBufferDescriptors) {
+		if ( !containsMatching(other.dynamicBufferDescriptors, db)) { return false; }
+	}
+
     return true;
 }
 
@@ -190,16 +223,18 @@
 MVK_PUBLIC_SYMBOL void SPIRVToMSLConversionConfiguration::alignWith(const SPIRVToMSLConversionConfiguration& srcContext) {
 
 	for (auto& si : shaderInputs) {
-		si.isUsedByShader = false;
+		si.outIsUsedByShader = false;
 		for (auto& srcSI : srcContext.shaderInputs) {
-			if (si.matches(srcSI)) { si.isUsedByShader = srcSI.isUsedByShader; }
+			if (si.matches(srcSI)) { si.outIsUsedByShader = srcSI.outIsUsedByShader; }
 		}
 	}
 
     for (auto& rb : resourceBindings) {
-        rb.isUsedByShader = false;
+        rb.outIsUsedByShader = false;
         for (auto& srcRB : srcContext.resourceBindings) {
-            if (rb.matches(srcRB)) { rb.isUsedByShader = srcRB.isUsedByShader; }
+			if (rb.matches(srcRB)) {
+				rb.outIsUsedByShader = srcRB.outIsUsedByShader;
+			}
         }
     }
 }
@@ -278,6 +313,21 @@
 			}
 		}
 
+		// Add any descriptor sets that are not using Metal argument buffers.
+		// This only has an effect if SPIRVToMSLConversionConfiguration::options::mslOptions::argument_buffers is enabled.
+		for (uint32_t dsIdx : context.discreteDescriptorSets) {
+			pMSLCompiler->add_discrete_descriptor_set(dsIdx);
+		}
+
+		// Add any dynamic buffer bindings.
+		// This only has an applies if SPIRVToMSLConversionConfiguration::options::mslOptions::argument_buffers is enabled.
+		if (context.options.mslOptions.argument_buffers) {
+			for (auto& db : context.dynamicBufferDescriptors) {
+				if (db.stage == context.options.entryPointStage) {
+					pMSLCompiler->add_dynamic_buffer(db.descriptorSet, db.binding, db.index);
+				}
+			}
+		}
 		_msl = pMSLCompiler->compile();
 
         if (shouldLogMSL) { logSource(_msl, "MSL", "Converted"); }
@@ -307,13 +357,26 @@
 	_shaderConversionResults.needsDispatchBaseBuffer = pMSLCompiler && pMSLCompiler->needs_dispatch_base_buffer();
 	_shaderConversionResults.needsViewRangeBuffer = pMSLCompiler && pMSLCompiler->needs_view_mask_buffer();
 
+	// When using Metal argument buffers, if the shader is provided with dynamic buffer offsets,
+	// then it needs a buffer to hold these dynamic offsets.
+	_shaderConversionResults.needsDynamicOffsetBuffer = false;
+	if (context.options.mslOptions.argument_buffers) {
+		for (auto& db : context.dynamicBufferDescriptors) {
+			if (db.stage == context.options.entryPointStage) {
+				_shaderConversionResults.needsDynamicOffsetBuffer = true;
+			}
+		}
+	}
+
 	for (auto& ctxSI : context.shaderInputs) {
-		ctxSI.isUsedByShader = pMSLCompiler->is_msl_shader_input_used(ctxSI.shaderInput.location);
+		ctxSI.outIsUsedByShader = pMSLCompiler->is_msl_shader_input_used(ctxSI.shaderInput.location);
 	}
 	for (auto& ctxRB : context.resourceBindings) {
-		ctxRB.isUsedByShader = pMSLCompiler->is_msl_resource_binding_used(ctxRB.resourceBinding.stage,
-																		  ctxRB.resourceBinding.desc_set,
-																		  ctxRB.resourceBinding.binding);
+		if (ctxRB.resourceBinding.stage == context.options.entryPointStage) {
+			ctxRB.outIsUsedByShader = pMSLCompiler->is_msl_resource_binding_used(ctxRB.resourceBinding.stage,
+																				 ctxRB.resourceBinding.desc_set,
+																				 ctxRB.resourceBinding.binding);
+		}
 	}
 
 	delete pMSLCompiler;
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
index 0a360de..062510c 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
@@ -19,11 +19,12 @@
 #ifndef __SPIRVToMSLConverter_h_
 #define __SPIRVToMSLConverter_h_ 1
 
+#include "SPIRVReflection.h"
 #include <spirv.hpp>
 #include <spirv_msl.hpp>
 #include <string>
 #include <vector>
-#include <unordered_map>
+
 
 namespace mvk {
 
@@ -63,7 +64,7 @@
 	/**
 	 * Defines MSL characteristics of a vertex attribute at a particular location.
 	 *
-	 * The isUsedByShader flag is set to true during conversion of SPIR-V to MSL if the shader
+	 * The outIsUsedByShader flag is set to true during conversion of SPIR-V to MSL if the shader
 	 * makes use of this vertex attribute. This allows a pipeline to be optimized, and for two
 	 * shader conversion configurations to be compared only against the attributes that are
 	 * actually used by the shader.
@@ -73,13 +74,12 @@
 	 */
 	typedef struct MSLShaderInput {
 		SPIRV_CROSS_NAMESPACE::MSLShaderInput shaderInput;
-
 		uint32_t binding = 0;
-		bool isUsedByShader = false;
+		bool outIsUsedByShader = false;
 
 		/**
 		 * Returns whether the specified vertex attribute match this one.
-		 * It does if all corresponding elements except isUsedByShader are equal.
+		 * It does if all corresponding elements except outIsUsedByShader are equal.
 		 */
 		bool matches(const MSLShaderInput& other) const;
 
@@ -89,17 +89,17 @@
 	 * Matches the binding index of a MSL resource for a binding within a descriptor set.
 	 * Taken together, the stage, desc_set and binding combine to form a reference to a resource
 	 * descriptor used in a particular shading stage. Generally, only one of the buffer, texture,
-	 * or sampler elements will be populated. The isUsedByShader flag is set to true during
+	 * or sampler elements will be populated. The outIsUsedByShader flag is set to true during
 	 * compilation of SPIR-V to MSL if the shader makes use of this vertex attribute.
 	 *
 	 * If requiresConstExprSampler is true, the resource is a sampler whose content must be
 	 * hardcoded into the MSL as a constexpr type, instead of passed in as a runtime-bound variable.
 	 * The content of that constexpr sampler is defined in the constExprSampler parameter.
 	 *
-	 * The isUsedByShader flag is set to true during conversion of SPIR-V to MSL if the shader
-	 * makes use of this resource binding. This allows a pipeline to be optimized, and for two
-	 * shader conversion configurations to be compared only against the resource bindings that
-	 * are actually used by the shader.
+	 * The outIsUsedByShader value is set by the shader converter based on the content of the SPIR-V
+	 * (and resulting MSL), and is set to true if the shader makes use of this resource binding.
+	 * This allows a pipeline to be optimized, and for two shader conversion configurations to
+	 * be compared only against the resource bindings that are actually used by the shader.
 	 *
 	 * THIS STRUCT IS STREAMED OUT AS PART OF THE PIEPLINE CACHE.
 	 * CHANGES TO THIS STRUCT SHOULD BE CAPTURED IN THE STREAMING LOGIC OF THE PIPELINE CACHE.
@@ -108,18 +108,34 @@
 		SPIRV_CROSS_NAMESPACE::MSLResourceBinding resourceBinding;
 		SPIRV_CROSS_NAMESPACE::MSLConstexprSampler constExprSampler;
 		bool requiresConstExprSampler = false;
-
-		bool isUsedByShader = false;
+		bool outIsUsedByShader = false;
 
 		/**
 		 * Returns whether the specified resource binding match this one.
-		 * It does if all corresponding elements except isUsedByShader are equal.
+		 * It does if all corresponding elements except outIsUsedByShader are equal.
 		 */
 		bool matches(const MSLResourceBinding& other) const;
 
 	} MSLResourceBinding;
 
 	/**
+	 * Identifies a descriptor binding, and the index into a buffer that
+	 * can be used for providing dynamic content like dynamic buffer offsets.
+	 *
+	 * THIS STRUCT IS STREAMED OUT AS PART OF THE PIPELINE CACHE.
+	 * CHANGES TO THIS STRUCT SHOULD BE CAPTURED IN THE STREAMING LOGIC OF THE PIPELINE CACHE.
+	 */
+	typedef struct DescriptorBinding {
+		spv::ExecutionModel stage = spv::ExecutionModelMax;
+		uint32_t descriptorSet = 0;
+		uint32_t binding = 0;
+		uint32_t index = 0;
+
+		bool matches(const DescriptorBinding& other) const;
+
+	} DescriptorBinding;
+
+	/**
 	 * Configuration passed to the SPIRVToMSLConverter.
 	 *
 	 * THIS STRUCT IS STREAMED OUT AS PART OF THE PIEPLINE CACHE.
@@ -129,6 +145,8 @@
 		SPIRVToMSLConversionOptions options;
 		std::vector<MSLShaderInput> shaderInputs;
 		std::vector<MSLResourceBinding> resourceBindings;
+		std::vector<uint32_t> discreteDescriptorSets;
+		std::vector<DescriptorBinding> dynamicBufferDescriptors;
 
 		/** Returns whether the pipeline stage being converted supports vertex attributes. */
 		bool stageSupportsVertexAttributes() const;
@@ -142,6 +160,9 @@
         /** Returns whether the vertex buffer at the specified Vulkan binding is used by the shader. */
 		bool isVertexBufferUsed(uint32_t binding) const { return countShaderInputsAt(binding) > 0; }
 
+		/** Returns whether the resource at the specified descriptor set binding is used by the shader. */
+		bool isResourceUsed(spv::ExecutionModel stage, uint32_t descSet, uint32_t binding) const;
+
 		/** Marks all input variables and resources as being used by the shader. */
 		void markAllInputsAndResourcesUsed();
 
@@ -209,6 +230,7 @@
 		bool needsOutputBuffer = false;
 		bool needsPatchOutputBuffer = false;
 		bool needsBufferSizeBuffer = false;
+		bool needsDynamicOffsetBuffer = false;
 		bool needsInputThreadgroupMem = false;
 		bool needsDispatchBaseBuffer = false;
 		bool needsViewRangeBuffer = false;