Merge pull request #878 from billhollings/master

Reduce memory requirements for commands.
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.h b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.h
index 336355d..4fef8b0 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.h
@@ -19,7 +19,6 @@
 #pragma once
 
 #include "MVKCommand.h"
-#include "MVKMTLResourceBindings.h"
 
 #import <Metal/Metal.h>
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h
index 8d453fe..4425309 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h
@@ -28,7 +28,11 @@
 #pragma mark -
 #pragma mark MVKCmdBindVertexBuffers
 
-/** Vulkan command to bind buffers containing vertex content. */
+/**
+ * Vulkan command to bind buffers containing vertex content.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
 class MVKCmdBindVertexBuffers : public MVKCommand {
 
 public:
@@ -43,9 +47,14 @@
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
-    MVKVectorInline<MVKMTLBufferBinding, 8> _bindings;
+    MVKVectorInline<MVKMTLBufferBinding, N> _bindings;
 };
 
+// Concrete template class implementations.
+typedef MVKCmdBindVertexBuffers<1> MVKCmdBindVertexBuffers1;
+typedef MVKCmdBindVertexBuffers<2> MVKCmdBindVertexBuffers2;
+typedef MVKCmdBindVertexBuffers<8> MVKCmdBindVertexBuffersMulti;
+
 
 #pragma mark -
 #pragma mark MVKCmdBindIndexBuffer
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
index e8fbe51..26f833e 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
@@ -28,11 +28,12 @@
 #pragma mark -
 #pragma mark MVKCmdBindVertexBuffers
 
-VkResult MVKCmdBindVertexBuffers::setContent(MVKCommandBuffer* cmdBuff,
-											 uint32_t startBinding,
-											 uint32_t bindingCount,
-											 const VkBuffer* pBuffers,
-											 const VkDeviceSize* pOffsets) {
+template <size_t N>
+VkResult MVKCmdBindVertexBuffers<N>::setContent(MVKCommandBuffer* cmdBuff,
+												uint32_t startBinding,
+												uint32_t bindingCount,
+												const VkBuffer* pBuffers,
+												const VkDeviceSize* pOffsets) {
 
 	MVKDevice* mvkDvc = cmdBuff->getDevice();
 	_bindings.clear();	// Clear for reuse
@@ -49,10 +50,15 @@
 	return VK_SUCCESS;
 }
 
-void MVKCmdBindVertexBuffers::encode(MVKCommandEncoder* cmdEncoder) {
+template <size_t N>
+void MVKCmdBindVertexBuffers<N>::encode(MVKCommandEncoder* cmdEncoder) {
     for (auto& b : _bindings) { cmdEncoder->_graphicsResourcesState.bindBuffer(kMVKShaderStageVertex, b); }
 }
 
+template class MVKCmdBindVertexBuffers<1>;
+template class MVKCmdBindVertexBuffers<2>;
+template class MVKCmdBindVertexBuffers<8>;
+
 
 #pragma mark -
 #pragma mark MVKCmdBindIndexBuffer
@@ -300,7 +306,7 @@
     pipeline->getStages(stages);
 
     MVKIndexMTLBufferBinding& ibb = cmdEncoder->_graphicsResourcesState._mtlIndexBufferBinding;
-    size_t idxSize = mvkMTLIndexTypeSizeInBytes(ibb.mtlIndexType);
+    size_t idxSize = mvkMTLIndexTypeSizeInBytes((MTLIndexType)ibb.mtlIndexType);
     VkDeviceSize idxBuffOffset = ibb.offset + (_firstIndex * idxSize);
 
     const MVKMTLBufferAllocation* vtxOutBuff = nullptr;
@@ -327,7 +333,7 @@
             // Yeah, this sucks. But there aren't many good ways for dealing with this issue.
             mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationControl);
             tcIndexBuff = cmdEncoder->getTempMTLBuffer(_instanceCount * patchCount * outControlPointCount * idxSize);
-            id<MTLComputePipelineState> mtlCopyIndexState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndexedCopyIndexBufferMTLComputePipelineState(ibb.mtlIndexType);
+            id<MTLComputePipelineState> mtlCopyIndexState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndexedCopyIndexBufferMTLComputePipelineState((MTLIndexType)ibb.mtlIndexType);
             [mtlTessCtlEncoder setComputePipelineState: mtlCopyIndexState];
             [mtlTessCtlEncoder setBuffer: ibb.mtlBuffer
                                   offset: ibb.offset
@@ -376,7 +382,7 @@
                 if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) {
                     [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: MTLPrimitiveTypePoint
                                                               indexCount: _indexCount
-                                                               indexType: ibb.mtlIndexType
+                                                               indexType: (MTLIndexType)ibb.mtlIndexType
                                                              indexBuffer: ibb.mtlBuffer
                                                        indexBufferOffset: idxBuffOffset
                                                            instanceCount: _instanceCount
@@ -385,7 +391,7 @@
                 } else {
                     [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: MTLPrimitiveTypePoint
                                                               indexCount: _indexCount
-                                                               indexType: ibb.mtlIndexType
+                                                               indexType: (MTLIndexType)ibb.mtlIndexType
                                                              indexBuffer: ibb.mtlBuffer
                                                        indexBufferOffset: idxBuffOffset
                                                            instanceCount: _instanceCount];
@@ -476,7 +482,7 @@
                     if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) {
                         [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType
                                                                   indexCount: _indexCount
-                                                                   indexType: ibb.mtlIndexType
+                                                                   indexType: (MTLIndexType)ibb.mtlIndexType
                                                                  indexBuffer: ibb.mtlBuffer
                                                            indexBufferOffset: idxBuffOffset
                                                                instanceCount: _instanceCount
@@ -485,7 +491,7 @@
                     } else {
                         [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType
                                                                   indexCount: _indexCount
-                                                                   indexType: ibb.mtlIndexType
+                                                                   indexType: (MTLIndexType)ibb.mtlIndexType
                                                                  indexBuffer: ibb.mtlBuffer
                                                            indexBufferOffset: idxBuffOffset
                                                                instanceCount: _instanceCount];
@@ -769,7 +775,7 @@
 void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
 
     MVKIndexMTLBufferBinding& ibb = cmdEncoder->_graphicsResourcesState._mtlIndexBufferBinding;
-    size_t idxSize = mvkMTLIndexTypeSizeInBytes(ibb.mtlIndexType);
+    size_t idxSize = mvkMTLIndexTypeSizeInBytes((MTLIndexType)ibb.mtlIndexType);
     auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline();
     // The indirect calls for dispatchThreadgroups:... and drawPatches:... have different formats.
     // We have to convert from the drawIndexedPrimitives:... format to them.
@@ -857,7 +863,7 @@
                 // or not there are gaps in it, because there's no way to tell Metal to
                 // offset an index buffer from a value in an indirect buffer. This also
                 // means that, to make a copy, we have to use a compute shader.
-                id<MTLComputePipelineState> mtlCopyIndexState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndexedCopyIndexBufferMTLComputePipelineState(ibb.mtlIndexType);
+                id<MTLComputePipelineState> mtlCopyIndexState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndexedCopyIndexBufferMTLComputePipelineState((MTLIndexType)ibb.mtlIndexType);
                 [mtlTessCtlEncoder setComputePipelineState: mtlCopyIndexState];
                 [mtlTessCtlEncoder setBuffer: ibb.mtlBuffer
                                       offset: ibb.offset
@@ -896,7 +902,7 @@
                                                               atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageVertex]];
                     }
                     [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: MTLPrimitiveTypePoint
-                                                               indexType: ibb.mtlIndexType
+                                                               indexType: (MTLIndexType)ibb.mtlIndexType
                                                              indexBuffer: ibb.mtlBuffer
                                                        indexBufferOffset: ibb.offset
                                                           indirectBuffer: _mtlIndirectBuffer
@@ -985,7 +991,7 @@
                         cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
                     } else {
                         [cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType
-                                                                   indexType: ibb.mtlIndexType
+                                                                   indexType: (MTLIndexType)ibb.mtlIndexType
                                                                  indexBuffer: ibb.mtlBuffer
                                                            indexBufferOffset: ibb.offset
                                                               indirectBuffer: _mtlIndirectBuffer
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
index 22471b7..5b862ba 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "MVKCommand.h"
+#include "MVKMTLResourceBindings.h"
 #include "MVKSync.h"
 #include "MVKVector.h"
 
@@ -32,7 +33,11 @@
 #pragma mark -
 #pragma mark MVKCmdPipelineBarrier
 
-/** Represents an abstract Vulkan command to add a pipeline barrier. */
+/**
+ * Vulkan command to add a pipeline barrier.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
 class MVKCmdPipelineBarrier : public MVKCommand {
 
 public:
@@ -51,45 +56,113 @@
 
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+	bool coversTextures();
 
+	MVKVectorInline<MVKPipelineBarrier, N> _barriers;
 	VkPipelineStageFlags _srcStageMask;
 	VkPipelineStageFlags _dstStageMask;
 	VkDependencyFlags _dependencyFlags;
-	MVKVectorInline<VkMemoryBarrier, 4> _memoryBarriers;
-	MVKVectorInline<VkBufferMemoryBarrier, 4> _bufferMemoryBarriers;
-	MVKVectorInline<VkImageMemoryBarrier, 4> _imageMemoryBarriers;
 };
 
+// Concrete template class implementations.
+typedef MVKCmdPipelineBarrier<1> MVKCmdPipelineBarrier1;
+typedef MVKCmdPipelineBarrier<4> MVKCmdPipelineBarrier4;
+typedef MVKCmdPipelineBarrier<32> MVKCmdPipelineBarrierMulti;
+
 
 #pragma mark -
 #pragma mark MVKCmdBindPipeline
 
-/** Vulkan command to bind the pipeline state. */
+/** Abstract Vulkan command to bind a pipeline. */
 class MVKCmdBindPipeline : public MVKCommand {
 
 public:
-	VkResult setContent(MVKCommandBuffer* cmdBuff,
-						VkPipelineBindPoint pipelineBindPoint,
-						VkPipeline pipeline);
+	VkResult setContent(MVKCommandBuffer* cmdBuff, VkPipeline pipeline);
 
-	void encode(MVKCommandEncoder* cmdEncoder) override;
-
-	bool isTessellationPipeline();
+	virtual bool isTessellationPipeline() { return false; };
 
 protected:
-	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
-
-	VkPipelineBindPoint _bindPoint;
 	MVKPipeline* _pipeline;
 
 };
 
 
 #pragma mark -
-#pragma mark MVKCmdBindDescriptorSets
+#pragma mark MVKCmdBindGraphicsPipeline
 
-/** Vulkan command to bind descriptor sets. */
-class MVKCmdBindDescriptorSets : public MVKCommand {
+/** Vulkan command to bind a graphics pipeline. */
+class MVKCmdBindGraphicsPipeline : public MVKCmdBindPipeline {
+
+public:
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+	bool isTessellationPipeline() override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdBindComputePipeline
+
+/** Vulkan command to bind a compute pipeline. */
+class MVKCmdBindComputePipeline : public MVKCmdBindPipeline {
+
+public:
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdBindDescriptorSetsStatic
+
+/**
+ * Vulkan command to bind descriptor sets without dynamic offsets.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
+class MVKCmdBindDescriptorSetsStatic : public MVKCommand {
+
+public:
+	VkResult setContent(MVKCommandBuffer* cmdBuff,
+						VkPipelineBindPoint pipelineBindPoint,
+						VkPipelineLayout layout,
+						uint32_t firstSet,
+						uint32_t setCount,
+						const VkDescriptorSet* pDescriptorSets);
+
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+	MVKVectorInline<MVKDescriptorSet*, N> _descriptorSets;
+	MVKPipelineLayout* _pipelineLayout;
+	VkPipelineBindPoint _pipelineBindPoint;
+	uint32_t _firstSet;
+};
+
+// Concrete template class implementations.
+typedef MVKCmdBindDescriptorSetsStatic<1> MVKCmdBindDescriptorSetsStatic1;
+typedef MVKCmdBindDescriptorSetsStatic<4> MVKCmdBindDescriptorSetsStatic4;
+typedef MVKCmdBindDescriptorSetsStatic<8> MVKCmdBindDescriptorSetsStaticMulti;
+
+
+#pragma mark -
+#pragma mark MVKCmdBindDescriptorSetsDynamic
+
+/**
+ * Vulkan command to bind descriptor sets with dynamic offsets.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
+class MVKCmdBindDescriptorSetsDynamic : public MVKCmdBindDescriptorSetsStatic<N> {
 
 public:
 	VkResult setContent(MVKCommandBuffer* cmdBuff,
@@ -106,18 +179,22 @@
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
-	VkPipelineBindPoint _pipelineBindPoint;
-	MVKPipelineLayout* _pipelineLayout;
-	MVKVectorInline<MVKDescriptorSet*, 8> _descriptorSets;
-	MVKVectorInline<uint32_t, 8>          _dynamicOffsets;
-	uint32_t _firstSet;
+	MVKVectorInline<uint32_t, N> _dynamicOffsets;
 };
 
+// Concrete template class implementations.
+typedef MVKCmdBindDescriptorSetsDynamic<4> MVKCmdBindDescriptorSetsDynamic4;
+typedef MVKCmdBindDescriptorSetsDynamic<8> MVKCmdBindDescriptorSetsDynamicMulti;
+
 
 #pragma mark -
 #pragma mark MVKCmdPushConstants
 
-/** Vulkan command to bind push constants. */
+/**
+ * Vulkan command to bind push constants.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
 class MVKCmdPushConstants : public MVKCommand {
 
 public:
@@ -133,12 +210,17 @@
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
+	MVKVectorInline<char, N> _pushConstants;
 	MVKPipelineLayout* _pipelineLayout;
 	VkShaderStageFlags _stageFlags;
 	uint32_t _offset;
-	MVKVectorInline<char, 128> _pushConstants;
 };
 
+// Concrete template class implementations.
+typedef MVKCmdPushConstants<64> MVKCmdPushConstants64;
+typedef MVKCmdPushConstants<128> MVKCmdPushConstants128;
+typedef MVKCmdPushConstants<512> MVKCmdPushConstantsMulti;
+
 
 #pragma mark -
 #pragma mark MVKCmdPushDescriptorSet
@@ -162,9 +244,9 @@
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 	void clearDescriptorWrites();
 
-	VkPipelineBindPoint _pipelineBindPoint;
+	MVKVectorInline<VkWriteDescriptorSet, 1> _descriptorWrites;
 	MVKPipelineLayout* _pipelineLayout;
-	MVKVectorInline<VkWriteDescriptorSet, 8> _descriptorWrites;
+	VkPipelineBindPoint _pipelineBindPoint;
 	uint32_t _set;
 };
 
@@ -191,30 +273,54 @@
 
 	MVKDescriptorUpdateTemplate* _descUpdateTemplate;
 	MVKPipelineLayout* _pipelineLayout;
-	uint32_t _set;
 	void* _pData = nullptr;
+	uint32_t _set;
 };
 
 
 #pragma mark -
 #pragma mark MVKCmdSetResetEvent
 
-/** Vulkan command to set or reset an event. */
+/** Abstract Vulkan command to set or reset an event. */
 class MVKCmdSetResetEvent : public MVKCommand {
 
 public:
 	VkResult setContent(MVKCommandBuffer* cmdBuff,
 						VkEvent event,
-						VkPipelineStageFlags stageMask,
-						bool status);
+						VkPipelineStageFlags stageMask);
 
+protected:
+	MVKEvent* _mvkEvent;
+
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdSetEvent
+
+/** Vulkan command to set an event. */
+class MVKCmdSetEvent : public MVKCmdSetResetEvent {
+
+public:
 	void encode(MVKCommandEncoder* cmdEncoder) override;
 
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
-	MVKEvent* _mvkEvent;
-	bool _status;
+};
+
+
+#pragma mark -
+#pragma mark MVKCmdResetEvent
+
+/** Vulkan command to reset an event. */
+class MVKCmdResetEvent : public MVKCmdSetResetEvent {
+
+public:
+	void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
 };
 
@@ -223,6 +329,11 @@
 #pragma mark MVKCmdWaitEvents
 
 /** Vulkan command to wait for an event to be signaled. */
+/**
+ * Vulkan command to wait for an event to be signaled.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
 class MVKCmdWaitEvents : public MVKCommand {
 
 public:
@@ -243,6 +354,10 @@
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
-	MVKVectorInline<MVKEvent*, 4> _mvkEvents;
+	MVKVectorInline<MVKEvent*, N> _mvkEvents;
 
 };
+
+// Concrete template class implementations.
+typedef MVKCmdWaitEvents<1> MVKCmdWaitEvents1;
+typedef MVKCmdWaitEvents<8> MVKCmdWaitEventsMulti;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
index af83890..0d6dad8 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
@@ -30,108 +30,129 @@
 #pragma mark -
 #pragma mark MVKCmdPipelineBarrier
 
-VkResult MVKCmdPipelineBarrier::setContent(MVKCommandBuffer* cmdBuff,
-										   VkPipelineStageFlags srcStageMask,
-										   VkPipelineStageFlags dstStageMask,
-										   VkDependencyFlags dependencyFlags,
-										   uint32_t memoryBarrierCount,
-										   const VkMemoryBarrier* pMemoryBarriers,
-										   uint32_t bufferMemoryBarrierCount,
-										   const VkBufferMemoryBarrier* pBufferMemoryBarriers,
-										   uint32_t imageMemoryBarrierCount,
-										   const VkImageMemoryBarrier* pImageMemoryBarriers) {
+template <size_t N>
+VkResult MVKCmdPipelineBarrier<N>::setContent(MVKCommandBuffer* cmdBuff,
+											  VkPipelineStageFlags srcStageMask,
+											  VkPipelineStageFlags dstStageMask,
+											  VkDependencyFlags dependencyFlags,
+											  uint32_t memoryBarrierCount,
+											  const VkMemoryBarrier* pMemoryBarriers,
+											  uint32_t bufferMemoryBarrierCount,
+											  const VkBufferMemoryBarrier* pBufferMemoryBarriers,
+											  uint32_t imageMemoryBarrierCount,
+											  const VkImageMemoryBarrier* pImageMemoryBarriers) {
 	_srcStageMask = srcStageMask;
 	_dstStageMask = dstStageMask;
 	_dependencyFlags = dependencyFlags;
 
-	_memoryBarriers.clear();	// Clear for reuse
-	_memoryBarriers.reserve(memoryBarrierCount);
+	_barriers.clear();	// Clear for reuse
+	_barriers.reserve(memoryBarrierCount + bufferMemoryBarrierCount + imageMemoryBarrierCount);
+
 	for (uint32_t i = 0; i < memoryBarrierCount; i++) {
-		_memoryBarriers.push_back(pMemoryBarriers[i]);
+		_barriers.emplace_back(pMemoryBarriers[i]);
 	}
-
-	_bufferMemoryBarriers.clear();	// Clear for reuse
-	_bufferMemoryBarriers.reserve(bufferMemoryBarrierCount);
 	for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
-		_bufferMemoryBarriers.push_back(pBufferMemoryBarriers[i]);
+		_barriers.emplace_back(pBufferMemoryBarriers[i]);
 	}
-
-	_imageMemoryBarriers.clear();	// Clear for reuse
-	_imageMemoryBarriers.reserve(imageMemoryBarrierCount);
 	for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
-		_imageMemoryBarriers.push_back(pImageMemoryBarriers[i]);
+		_barriers.emplace_back(pImageMemoryBarriers[i]);
 	}
 
 	return VK_SUCCESS;
 }
 
-void MVKCmdPipelineBarrier::encode(MVKCommandEncoder* cmdEncoder) {
+template <size_t N>
+void MVKCmdPipelineBarrier<N>::encode(MVKCommandEncoder* cmdEncoder) {
 
 #if MVK_MACOS
-    // Calls below invoke MTLBlitCommandEncoder so must apply this first.
+	// Calls below invoke MTLBlitCommandEncoder so must apply this first.
 	// Check if pipeline barriers are available and we are in a renderpass.
 	if (cmdEncoder->getDevice()->_pMetalFeatures->memoryBarriers && cmdEncoder->_mtlRenderEncoder) {
 		MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(_srcStageMask, false);
 		MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(_dstStageMask, true);
-		for (auto& mb : _memoryBarriers) {
-			MTLBarrierScope scope = mvkMTLBarrierScopeFromVkAccessFlags(mb.dstAccessMask);
-			scope |= mvkMTLBarrierScopeFromVkAccessFlags(mb.srcAccessMask);
-			[cmdEncoder->_mtlRenderEncoder memoryBarrierWithScope: scope
-													  afterStages: srcStages
-													 beforeStages: dstStages];
+
+		id<MTLResource> resources[_barriers.size()];
+		uint32_t rezCnt = 0;
+
+		for (auto& b : _barriers) {
+			switch (b.type) {
+				case MVKPipelineBarrier::Memory: {
+					MTLBarrierScope scope = (mvkMTLBarrierScopeFromVkAccessFlags(b.srcAccessMask) |
+											 mvkMTLBarrierScopeFromVkAccessFlags(b.dstAccessMask));
+					[cmdEncoder->_mtlRenderEncoder memoryBarrierWithScope: scope
+															  afterStages: srcStages
+															 beforeStages: dstStages];
+					break;
+				}
+
+				case MVKPipelineBarrier::Buffer:
+					resources[rezCnt++] = b.mvkBuffer->getMTLBuffer();
+					break;
+
+				case MVKPipelineBarrier::Image:
+					resources[rezCnt++] = b.mvkImage->getMTLTexture();
+					break;
+
+				default:
+					break;
+			}
 		}
-		MVKVectorInline<id<MTLResource>, 16> resources;
-		resources.reserve(_bufferMemoryBarriers.size() + _imageMemoryBarriers.size());
-		for (auto& mb : _bufferMemoryBarriers) {
-			auto* mvkBuff = (MVKBuffer*)mb.buffer;
-			resources.push_back(mvkBuff->getMTLBuffer());
-		}
-		for (auto& mb : _imageMemoryBarriers) {
-			auto* mvkImg = (MVKImage*)mb.image;
-			resources.push_back(mvkImg->getMTLTexture());
-		}
-		if ( !resources.empty() ) {
-			[cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: resources.data()
-																count: resources.size()
+
+		if (rezCnt) {
+			[cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: resources
+																count: rezCnt
 														  afterStages: srcStages
 														 beforeStages: dstStages];
 		}
 	} else {
-		if ( !(_memoryBarriers.empty() && _imageMemoryBarriers.empty()) ) {
-			[cmdEncoder->_mtlRenderEncoder textureBarrier];
-		}
+		if (coversTextures()) { [cmdEncoder->_mtlRenderEncoder textureBarrier]; }
 	}
 #endif
 
 	MVKDevice* mvkDvc = cmdEncoder->getDevice();
-    MVKCommandUse cmdUse = kMVKCommandUsePipelineBarrier;
+	MVKCommandUse cmdUse = kMVKCommandUsePipelineBarrier;
 
-	// Apply global memory barriers
-    for (auto& mb : _memoryBarriers) {
-        mvkDvc->applyMemoryBarrier(_srcStageMask, _dstStageMask, &mb, cmdEncoder, cmdUse);
-    }
+	for (auto& b : _barriers) {
+		switch (b.type) {
+			case MVKPipelineBarrier::Memory:
+				mvkDvc->applyMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse);
+				break;
 
-    // Apply specific buffer barriers
-    for (auto& mb : _bufferMemoryBarriers) {
-        MVKBuffer* mvkBuff = (MVKBuffer*)mb.buffer;
-        mvkBuff->applyBufferMemoryBarrier(_srcStageMask, _dstStageMask, &mb, cmdEncoder, cmdUse);
-    }
+			case MVKPipelineBarrier::Buffer:
+				b.mvkBuffer->applyBufferMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse);
+				break;
 
-    // Apply specific image barriers
-    for (auto& mb : _imageMemoryBarriers) {
-        MVKImage* mvkImg = (MVKImage*)mb.image;
-        mvkImg->applyImageMemoryBarrier(_srcStageMask, _dstStageMask, &mb, cmdEncoder, cmdUse);
-    }
+			case MVKPipelineBarrier::Image:
+				b.mvkImage->applyImageMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse);
+				break;
+
+			default:
+				break;
+		}
+	}
 }
 
+template <size_t N>
+bool MVKCmdPipelineBarrier<N>::coversTextures() {
+	for (auto& b : _barriers) {
+		switch (b.type) {
+			case MVKPipelineBarrier::Memory:	return true;
+			case MVKPipelineBarrier::Image: 	return true;
+			default: 							break;
+		}
+	}
+	return false;
+}
+
+template class MVKCmdPipelineBarrier<1>;
+template class MVKCmdPipelineBarrier<4>;
+template class MVKCmdPipelineBarrier<32>;
+
 
 #pragma mark -
 #pragma mark MVKCmdBindPipeline
 
-VkResult MVKCmdBindPipeline::setContent(MVKCommandBuffer* cmdBuff,
-										VkPipelineBindPoint pipelineBindPoint,
-										VkPipeline pipeline) {
-	_bindPoint = pipelineBindPoint;
+VkResult MVKCmdBindPipeline::setContent(MVKCommandBuffer* cmdBuff, VkPipeline pipeline) {
 	_pipeline = (MVKPipeline*)pipeline;
 
 	cmdBuff->recordBindPipeline(this);
@@ -139,29 +160,37 @@
 	return VK_SUCCESS;
 }
 
-void MVKCmdBindPipeline::encode(MVKCommandEncoder* cmdEncoder) {
-    cmdEncoder->bindPipeline(_bindPoint, _pipeline);
+
+#pragma mark -
+#pragma mark MVKCmdBindGraphicsPipeline
+
+void MVKCmdBindGraphicsPipeline::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->bindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, _pipeline);
 }
 
-bool MVKCmdBindPipeline::isTessellationPipeline() {
-	if (_bindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
-		return ((MVKGraphicsPipeline*)_pipeline)->isTessellationPipeline();
-	else
-		return false;
+bool MVKCmdBindGraphicsPipeline::isTessellationPipeline() {
+	return ((MVKGraphicsPipeline*)_pipeline)->isTessellationPipeline();
 }
 
 
 #pragma mark -
-#pragma mark MVKCmdBindDescriptorSets
+#pragma mark MVKCmdBindComputePipeline
 
-VkResult MVKCmdBindDescriptorSets::setContent(MVKCommandBuffer* cmdBuff,
-											  VkPipelineBindPoint pipelineBindPoint,
-											  VkPipelineLayout layout,
-											  uint32_t firstSet,
-											  uint32_t setCount,
-											  const VkDescriptorSet* pDescriptorSets,
-											  uint32_t dynamicOffsetCount,
-											  const uint32_t* pDynamicOffsets) {
+void MVKCmdBindComputePipeline::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->bindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, _pipeline);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdBindDescriptorSetsStatic
+
+template <size_t N>
+VkResult MVKCmdBindDescriptorSetsStatic<N>::setContent(MVKCommandBuffer* cmdBuff,
+													   VkPipelineBindPoint pipelineBindPoint,
+													   VkPipelineLayout layout,
+													   uint32_t firstSet,
+													   uint32_t setCount,
+													   const VkDescriptorSet* pDescriptorSets) {
 	_pipelineBindPoint = pipelineBindPoint;
 	_pipelineLayout = (MVKPipelineLayout*)layout;
 	_firstSet = firstSet;
@@ -173,6 +202,35 @@
 		_descriptorSets.push_back((MVKDescriptorSet*)pDescriptorSets[dsIdx]);
 	}
 
+	return VK_SUCCESS;
+}
+
+template <size_t N>
+void MVKCmdBindDescriptorSetsStatic<N>::encode(MVKCommandEncoder* cmdEncoder) {
+	_pipelineLayout->bindDescriptorSets(cmdEncoder, _descriptorSets, _firstSet, nullptr);
+}
+
+template class MVKCmdBindDescriptorSetsStatic<1>;
+template class MVKCmdBindDescriptorSetsStatic<4>;
+template class MVKCmdBindDescriptorSetsStatic<8>;
+
+
+#pragma mark -
+#pragma mark MVKCmdBindDescriptorSetsDynamic
+
+template <size_t N>
+VkResult MVKCmdBindDescriptorSetsDynamic<N>::setContent(MVKCommandBuffer* cmdBuff,
+														VkPipelineBindPoint pipelineBindPoint,
+														VkPipelineLayout layout,
+														uint32_t firstSet,
+														uint32_t setCount,
+														const VkDescriptorSet* pDescriptorSets,
+														uint32_t dynamicOffsetCount,
+														const uint32_t* pDynamicOffsets) {
+
+	MVKCmdBindDescriptorSetsStatic<N>::setContent(cmdBuff, pipelineBindPoint, layout,
+												  firstSet, setCount, pDescriptorSets);
+
 	// Add the dynamic offsets
 	_dynamicOffsets.clear();	// Clear for reuse
 	_dynamicOffsets.reserve(dynamicOffsetCount);
@@ -183,20 +241,25 @@
 	return VK_SUCCESS;
 }
 
-void MVKCmdBindDescriptorSets::encode(MVKCommandEncoder* cmdEncoder) {
-	_pipelineLayout->bindDescriptorSets(cmdEncoder, _descriptorSets, _firstSet, _dynamicOffsets);
+template <size_t N>
+void MVKCmdBindDescriptorSetsDynamic<N>::encode(MVKCommandEncoder* cmdEncoder) {
+	MVKCmdBindDescriptorSetsStatic<N>::_pipelineLayout->bindDescriptorSets(cmdEncoder, MVKCmdBindDescriptorSetsStatic<N>::_descriptorSets, MVKCmdBindDescriptorSetsStatic<N>::_firstSet, &_dynamicOffsets);
 }
 
+template class MVKCmdBindDescriptorSetsDynamic<4>;
+template class MVKCmdBindDescriptorSetsDynamic<8>;
+
 
 #pragma mark -
 #pragma mark MVKCmdPushConstants
 
-VkResult MVKCmdPushConstants::setContent(MVKCommandBuffer* cmdBuff,
-										 VkPipelineLayout layout,
-										 VkShaderStageFlags stageFlags,
-										 uint32_t offset,
-										 uint32_t size,
-										 const void* pValues) {
+template <size_t N>
+VkResult MVKCmdPushConstants<N>::setContent(MVKCommandBuffer* cmdBuff,
+											VkPipelineLayout layout,
+											VkShaderStageFlags stageFlags,
+											uint32_t offset,
+											uint32_t size,
+											const void* pValues) {
 	_pipelineLayout = (MVKPipelineLayout*)layout;
 	_stageFlags = stageFlags;
 	_offset = offset;
@@ -207,7 +270,8 @@
 	return VK_SUCCESS;
 }
 
-void MVKCmdPushConstants::encode(MVKCommandEncoder* cmdEncoder) {
+template <size_t N>
+void MVKCmdPushConstants<N>::encode(MVKCommandEncoder* cmdEncoder) {
     VkShaderStageFlagBits stages[] = {
         VK_SHADER_STAGE_VERTEX_BIT,
         VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
@@ -222,6 +286,10 @@
     }
 }
 
+template class MVKCmdPushConstants<64>;
+template class MVKCmdPushConstants<128>;
+template class MVKCmdPushConstants<512>;
+
 
 #pragma mark -
 #pragma mark MVKCmdPushDescriptorSet
@@ -382,33 +450,44 @@
 
 VkResult MVKCmdSetResetEvent::setContent(MVKCommandBuffer* cmdBuff,
 										 VkEvent event,
-										 VkPipelineStageFlags stageMask,
-										 bool status) {
+										 VkPipelineStageFlags stageMask) {
 	_mvkEvent = (MVKEvent*)event;
-	_status = status;
 
 	return VK_SUCCESS;
 }
 
-void MVKCmdSetResetEvent::encode(MVKCommandEncoder* cmdEncoder) {
-	cmdEncoder->signalEvent(_mvkEvent, _status);
+
+#pragma mark -
+#pragma mark MVKCmdSetEvent
+
+void MVKCmdSetEvent::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->signalEvent(_mvkEvent, true);
+}
+
+
+#pragma mark -
+#pragma mark MVKCmdResetEvent
+
+void MVKCmdResetEvent::encode(MVKCommandEncoder* cmdEncoder) {
+	cmdEncoder->signalEvent(_mvkEvent, false);
 }
 
 
 #pragma mark -
 #pragma mark MVKCmdWaitEvents
 
-VkResult MVKCmdWaitEvents::setContent(MVKCommandBuffer* cmdBuff,
-									  uint32_t eventCount,
-									  const VkEvent* pEvents,
-									  VkPipelineStageFlags srcStageMask,
-									  VkPipelineStageFlags dstStageMask,
-									  uint32_t memoryBarrierCount,
-									  const VkMemoryBarrier* pMemoryBarriers,
-									  uint32_t bufferMemoryBarrierCount,
-									  const VkBufferMemoryBarrier* pBufferMemoryBarriers,
-									  uint32_t imageMemoryBarrierCount,
-									  const VkImageMemoryBarrier* pImageMemoryBarriers) {
+template <size_t N>
+VkResult MVKCmdWaitEvents<N>::setContent(MVKCommandBuffer* cmdBuff,
+										 uint32_t eventCount,
+										 const VkEvent* pEvents,
+										 VkPipelineStageFlags srcStageMask,
+										 VkPipelineStageFlags dstStageMask,
+										 uint32_t memoryBarrierCount,
+										 const VkMemoryBarrier* pMemoryBarriers,
+										 uint32_t bufferMemoryBarrierCount,
+										 const VkBufferMemoryBarrier* pBufferMemoryBarriers,
+										 uint32_t imageMemoryBarrierCount,
+										 const VkImageMemoryBarrier* pImageMemoryBarriers) {
 	_mvkEvents.clear();	// Clear for reuse
 	_mvkEvents.reserve(eventCount);
 	for (uint32_t i = 0; i < eventCount; i++) {
@@ -418,9 +497,13 @@
 	return VK_SUCCESS;
 }
 
-void MVKCmdWaitEvents::encode(MVKCommandEncoder* cmdEncoder) {
+template <size_t N>
+void MVKCmdWaitEvents<N>::encode(MVKCommandEncoder* cmdEncoder) {
 	for (MVKEvent* mvkEvt : _mvkEvents) {
 		mvkEvt->encodeWait(cmdEncoder->_mtlCmdBuffer);
 	}
 }
 
+template class MVKCmdWaitEvents<1>;
+template class MVKCmdWaitEvents<8>;
+
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h
index ac0d848..e76d01e 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h
@@ -140,9 +140,9 @@
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
-    uint32_t _queryCount;
     MVKBuffer* _destBuffer;
     VkDeviceSize _destOffset;
     VkDeviceSize _destStride;
     VkQueryResultFlags _flags;
+	uint32_t _queryCount;
 };
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h
index a41b4b9..4a4eff7 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.h
@@ -31,7 +31,11 @@
 #pragma mark -
 #pragma mark MVKCmdBeginRenderPass
 
-/** Vulkan command to begin a render pass. */
+/**
+ * Vulkan command to begin a render pass.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
 class MVKCmdBeginRenderPass : public MVKCommand, public MVKLoadStoreOverrideMixin {
 
 public:
@@ -44,13 +48,18 @@
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
-	VkRenderPassBeginInfo _info;
-	VkSubpassContents _contents;
+	MVKVectorInline<VkClearValue, N> _clearValues;
 	MVKRenderPass* _renderPass;
 	MVKFramebuffer* _framebuffer;
-	MVKVectorInline<VkClearValue, 8> _clearValues;
+	VkRect2D _renderArea;
+	VkSubpassContents _contents;
 };
 
+// Concrete template class implementations.
+typedef MVKCmdBeginRenderPass<1> MVKCmdBeginRenderPass1;
+typedef MVKCmdBeginRenderPass<2> MVKCmdBeginRenderPass2;
+typedef MVKCmdBeginRenderPass<9> MVKCmdBeginRenderPassMulti;
+
 
 #pragma mark -
 #pragma mark MVKCmdNextSubpass
@@ -91,7 +100,11 @@
 #pragma mark -
 #pragma mark MVKCmdExecuteCommands
 
-/** Vulkan command to execute secondary command buffers. */
+/**
+ * Vulkan command to execute secondary command buffers.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
 class MVKCmdExecuteCommands : public MVKCommand {
 
 public:
@@ -104,18 +117,20 @@
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
-	MVKVectorInline<MVKCommandBuffer*, 64> _secondaryCommandBuffers;
+	MVKVectorInline<MVKCommandBuffer*, N> _secondaryCommandBuffers;
 };
 
+// Concrete template class implementations.
+typedef MVKCmdExecuteCommands<1> MVKCmdExecuteCommands1;
+typedef MVKCmdExecuteCommands<16> MVKCmdExecuteCommandsMulti;
+
 
 #pragma mark -
 #pragma mark MVKCmdSetViewport
 
 /**
  * Vulkan command to set the viewports.
- * This is a template class to support different vector pre-allocations, so we can balance
- * in-line memory allocation betweeen the very common case of a single viewport, and the
- * maximal number, by choosing which concrete implementation to use based on viewport count.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
  */
 template <size_t N>
 class MVKCmdSetViewport : public MVKCommand {
@@ -131,11 +146,11 @@
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
-	uint32_t _firstViewport;
 	MVKVectorInline<VkViewport, N> _viewports;
+	uint32_t _firstViewport;
 };
 
-// Concrete template class implemenations.
+// Concrete template class implementations.
 typedef MVKCmdSetViewport<1> MVKCmdSetViewport1;
 typedef MVKCmdSetViewport<kMVKCachedViewportScissorCount> MVKCmdSetViewportMulti;
 
@@ -145,9 +160,7 @@
 
 /**
  * Vulkan command to set the scissor rectangles.
- * This is a template class to support different vector pre-allocations, so we can balance
- * in-line memory allocation betweeen the very common case of a single scissor, and the
- * maximal number, by choosing which concrete implementation to use based on scissor count.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
  */
 template <size_t N>
 class MVKCmdSetScissor : public MVKCommand {
@@ -163,11 +176,11 @@
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
-	uint32_t _firstScissor;
 	MVKVectorInline<VkRect2D, N> _scissors;
+	uint32_t _firstScissor;
 };
 
-// Concrete template class implemenations.
+// Concrete template class implementations.
 typedef MVKCmdSetScissor<1> MVKCmdSetScissor1;
 typedef MVKCmdSetScissor<kMVKCachedViewportScissorCount> MVKCmdSetScissorMulti;
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm
index 393341f..c31292c 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdRenderPass.mm
@@ -28,21 +28,23 @@
 #pragma mark -
 #pragma mark MVKCmdBeginRenderPass
 
-VkResult MVKCmdBeginRenderPass::setContent(MVKCommandBuffer* cmdBuff,
-										   const VkRenderPassBeginInfo* pRenderPassBegin,
-										   VkSubpassContents contents) {
-	_info = *pRenderPassBegin;
+template <size_t N>
+VkResult MVKCmdBeginRenderPass<N>::setContent(MVKCommandBuffer* cmdBuff,
+											  const VkRenderPassBeginInfo* pRenderPassBegin,
+											  VkSubpassContents contents) {
 	_contents = contents;
-	_renderPass = (MVKRenderPass*)_info.renderPass;
-	_framebuffer = (MVKFramebuffer*)_info.framebuffer;
+	_renderPass = (MVKRenderPass*)pRenderPassBegin->renderPass;
+	_framebuffer = (MVKFramebuffer*)pRenderPassBegin->framebuffer;
+	_renderArea = pRenderPassBegin->renderArea;
     _loadOverride = false;
     _storeOverride = false;
 
 	// Add clear values
+	uint32_t cvCnt = pRenderPassBegin->clearValueCount;
 	_clearValues.clear();	// Clear for reuse
-	_clearValues.reserve(_info.clearValueCount);
-	for (uint32_t i = 0; i < _info.clearValueCount; i++) {
-		_clearValues.push_back(_info.pClearValues[i]);
+	_clearValues.reserve(cvCnt);
+	for (uint32_t i = 0; i < cvCnt; i++) {
+		_clearValues.push_back(pRenderPassBegin->pClearValues[i]);
 	}
 
 	cmdBuff->recordBeginRenderPass(this);
@@ -50,11 +52,16 @@
 	return VK_SUCCESS;
 }
 
-void MVKCmdBeginRenderPass::encode(MVKCommandEncoder* cmdEncoder) {
+template <size_t N>
+void MVKCmdBeginRenderPass<N>::encode(MVKCommandEncoder* cmdEncoder) {
 //	MVKLogDebug("Encoding vkCmdBeginRenderPass(). Elapsed time: %.6f ms.", mvkGetElapsedMilliseconds());
-	cmdEncoder->beginRenderpass(_contents, _renderPass, _framebuffer, _info.renderArea, &_clearValues, _loadOverride, _storeOverride);
+	cmdEncoder->beginRenderpass(_contents, _renderPass, _framebuffer, _renderArea, &_clearValues, _loadOverride, _storeOverride);
 }
 
+template class MVKCmdBeginRenderPass<1>;
+template class MVKCmdBeginRenderPass<2>;
+template class MVKCmdBeginRenderPass<9>;
+
 
 #pragma mark -
 #pragma mark MVKCmdNextSubpass
@@ -75,7 +82,7 @@
 #pragma mark MVKCmdEndRenderPass
 
 VkResult MVKCmdEndRenderPass::setContent(MVKCommandBuffer* cmdBuff) {
-	cmdBuff->recordEndRenderPass(this);
+	cmdBuff->recordEndRenderPass();
 	return VK_SUCCESS;
 }
 
@@ -88,9 +95,10 @@
 #pragma mark -
 #pragma mark MVKCmdExecuteCommands
 
-VkResult MVKCmdExecuteCommands::setContent(MVKCommandBuffer* cmdBuff,
-										   uint32_t commandBuffersCount,
-										   const VkCommandBuffer* pCommandBuffers) {
+template <size_t N>
+VkResult MVKCmdExecuteCommands<N>::setContent(MVKCommandBuffer* cmdBuff,
+											  uint32_t commandBuffersCount,
+											  const VkCommandBuffer* pCommandBuffers) {
 	// Add clear values
 	_secondaryCommandBuffers.clear();	// Clear for reuse
 	_secondaryCommandBuffers.reserve(commandBuffersCount);
@@ -101,10 +109,14 @@
 	return VK_SUCCESS;
 }
 
-void MVKCmdExecuteCommands::encode(MVKCommandEncoder* cmdEncoder) {
+template <size_t N>
+void MVKCmdExecuteCommands<N>::encode(MVKCommandEncoder* cmdEncoder) {
     for (auto& cb : _secondaryCommandBuffers) { cmdEncoder->encodeSecondary(cb); }
 }
 
+template class MVKCmdExecuteCommands<1>;
+template class MVKCmdExecuteCommands<16>;
+
 
 #pragma mark -
 #pragma mark MVKCmdSetViewport
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.h b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.h
index 66e316a..0e341c4 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.h
@@ -33,7 +33,11 @@
 #pragma mark -
 #pragma mark MVKCmdCopyImage
 
-/** Vulkan command to copy image regions. */
+/**
+ * Vulkan command to copy image regions.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
 class MVKCmdCopyImage : public MVKCommand {
 
 public:
@@ -43,40 +47,26 @@
 						VkImage dstImage,
 						VkImageLayout dstImageLayout,
 						uint32_t regionCount,
-						const VkImageCopy* pRegions,
-						MVKCommandUse commandUse = kMVKCommandUseCopyImage);
+						const VkImageCopy* pRegions);
 
-	void encode(MVKCommandEncoder* cmdEncoder) override;
+	void encode(MVKCommandEncoder* cmdEncoder) override { encode(cmdEncoder, kMVKCommandUseCopyImage); }
+
+	void encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse commandUse);
 
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
-	VkResult setContent(MVKCommandBuffer* cmdBuff,
-						VkImage srcImage,
-						VkImageLayout srcImageLayout,
-						VkImage dstImage,
-						VkImageLayout dstImageLayout,
-						bool formatsMustMatch,
-						MVKCommandUse commandUse);
-	void addImageCopyRegion(const VkImageCopy& region, MVKPixelFormats* pixFmts);
-	void addTempBufferImageCopyRegion(const VkImageCopy& region, MVKPixelFormats* pixFmts);
 
+	MVKVectorInline<VkImageCopy, N> _vkImageCopies;
 	MVKImage* _srcImage;
-	VkImageLayout _srcLayout;
 	MVKImage* _dstImage;
+	VkImageLayout _srcLayout;
 	VkImageLayout _dstLayout;
-	uint32_t _srcSampleCount;
-	uint32_t _dstSampleCount;
-	bool _isSrcCompressed;
-	bool _isDstCompressed;
-	bool _canCopyFormats;
-	bool _useTempBuffer;
-	MVKVectorInline<VkImageCopy, 4> _imageCopyRegions;
-	MVKVectorInline<VkBufferImageCopy, 4> _srcTmpBuffImgCopies;
-	MVKVectorInline<VkBufferImageCopy, 4> _dstTmpBuffImgCopies;
-	size_t _tmpBuffSize;
-    MVKCommandUse _commandUse;
 };
 
+// Concrete template class implementations.
+typedef MVKCmdCopyImage<1> MVKCmdCopyImage1;
+typedef MVKCmdCopyImage<4> MVKCmdCopyImageMulti;
+
 
 #pragma mark -
 #pragma mark MVKCmdBlitImage
@@ -90,8 +80,12 @@
 	MVKVertexPosTex vertices[kMVKBlitVertexCount];
 } MVKImageBlitRender;
 
-/** Vulkan command to BLIT image regions. */
-class MVKCmdBlitImage : public MVKCmdCopyImage {
+/**
+ * Vulkan command to BLIT image regions.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
+class MVKCmdBlitImage : public MVKCommand {
 
 public:
 	VkResult setContent(MVKCommandBuffer* cmdBuff,
@@ -101,28 +95,30 @@
 						VkImageLayout dstImageLayout,
 						uint32_t regionCount,
 						const VkImageBlit* pRegions,
-						VkFilter filter,
-						MVKCommandUse commandUse = kMVKCommandUseBlitImage);
+						VkFilter filter);
 
-	void encode(MVKCommandEncoder* cmdEncoder) override;
+	void encode(MVKCommandEncoder* cmdEncoder) override { encode(cmdEncoder, kMVKCommandUseBlitImage); }
 
-	MVKCmdBlitImage();
-
-	~MVKCmdBlitImage() override;
+	void encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse commandUse);
 
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+	bool canCopyFormats();
 	bool canCopy(const VkImageBlit& region);
-	void addImageBlitRegion(const VkImageBlit& region, MVKPixelFormats* pixFmts);
-	void addImageCopyRegionFromBlitRegion(const VkImageBlit& region, MVKPixelFormats* pixFmts);
 	void populateVertices(MVKVertexPosTex* vertices, const VkImageBlit& region);
-    void initMTLRenderPassDescriptor();
 
-	MTLRenderPassDescriptor* _mtlRenderPassDescriptor;
-	MVKRPSKeyBlitImg _blitKey;
-	MVKVectorInline<MVKImageBlitRender, 4> _mvkImageBlitRenders;
+	MVKVectorInline<VkImageBlit, N> _vkImageBlits;
+	MVKImage* _srcImage;
+	MVKImage* _dstImage;
+	VkImageLayout _srcLayout;
+	VkImageLayout _dstLayout;
+	VkFilter _filter;
 };
 
+// Concrete template class implementations.
+typedef MVKCmdBlitImage<1> MVKCmdBlitImage1;
+typedef MVKCmdBlitImage<4> MVKCmdBlitImageMulti;
+
 
 #pragma mark -
 #pragma mark MVKCmdResolveImage
@@ -133,7 +129,11 @@
     uint32_t	slice;
 } MVKMetalResolveSlice;
 
-/** Vulkan command to resolve image regions. */
+/**
+ * Vulkan command to resolve image regions.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
 class MVKCmdResolveImage : public MVKCommand {
 
 public:
@@ -147,33 +147,29 @@
 
     void encode(MVKCommandEncoder* cmdEncoder) override;
 
-    MVKCmdResolveImage();
-
-    ~MVKCmdResolveImage() override;
-
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
-	void addExpansionRegion(const VkImageResolve& resolveRegion);
-    void addCopyRegion(const VkImageResolve& resolveRegion);
-    void addResolveSlices(const VkImageResolve& resolveRegion);
-    void initMTLRenderPassDescriptor();
 
+	MVKVectorInline<VkImageResolve, N> _vkImageResolves;
     MVKImage* _srcImage;
+	MVKImage* _dstImage;
     VkImageLayout _srcLayout;
-    MVKImage* _dstImage;
     VkImageLayout _dstLayout;
-    MVKImageDescriptorData _transferImageData;
-    MTLRenderPassDescriptor* _mtlRenderPassDescriptor;
-	MVKVectorInline<VkImageBlit, 4> _expansionRegions;
-	MVKVectorInline<VkImageCopy, 4> _copyRegions;
-	MVKVectorInline<MVKMetalResolveSlice, 4> _mtlResolveSlices;
 };
 
+// Concrete template class implementations.
+typedef MVKCmdResolveImage<1> MVKCmdResolveImage1;
+typedef MVKCmdResolveImage<4> MVKCmdResolveImageMulti;
+
 
 #pragma mark -
 #pragma mark MVKCmdCopyBuffer
 
-/** Vulkan command to copy buffer regions. */
+/**
+ * Vulkan command to copy buffer regions.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
 class MVKCmdCopyBuffer : public MVKCommand {
 
 public:
@@ -188,16 +184,24 @@
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
+	MVKVectorInline<VkBufferCopy, N> _bufferCopyRegions;
 	MVKBuffer* _srcBuffer;
 	MVKBuffer* _dstBuffer;
-	MVKVectorInline<VkBufferCopy, 4> _mtlBuffCopyRegions;
 };
 
+// Concrete template class implementations.
+typedef MVKCmdCopyBuffer<1> MVKCmdCopyBuffer1;
+typedef MVKCmdCopyBuffer<4> MVKCmdCopyBufferMulti;
+
 
 #pragma mark -
 #pragma mark MVKCmdBufferImageCopy
 
-/** Command to copy either from a buffer to an image, or from an image to a buffer. */
+/**
+ * Vulkan command to copy either from a buffer to an image, or from an image to a buffer.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
 class MVKCmdBufferImageCopy : public MVKCommand {
 
 public:
@@ -215,18 +219,28 @@
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 	bool isArrayTexture();
 
+	MVKVectorInline<VkBufferImageCopy, N> _bufferImageCopyRegions;
     MVKBuffer* _buffer;
     MVKImage* _image;
     VkImageLayout _imageLayout;
-	MVKVectorInline<VkBufferImageCopy, 4> _bufferImageCopyRegions;
     bool _toImage = false;
 };
 
+// Concrete template class implementations.
+typedef MVKCmdBufferImageCopy<1> MVKCmdBufferImageCopy1;
+typedef MVKCmdBufferImageCopy<4> MVKCmdBufferImageCopy4;	// To support MVKCmdCopyImage
+typedef MVKCmdBufferImageCopy<8> MVKCmdBufferImageCopy8;
+typedef MVKCmdBufferImageCopy<16> MVKCmdBufferImageCopyMulti;
+
 
 #pragma mark -
 #pragma mark MVKCmdClearAttachments
 
-/** Vulkan command to clear attachment regions. */
+/**
+ * Abstract Vulkan command to clear attachment regions.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
 class MVKCmdClearAttachments : public MVKCommand {
 
 public:
@@ -239,25 +253,73 @@
     void encode(MVKCommandEncoder* cmdEncoder) override;
 
 protected:
-	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
-    void populateVertices(float attWidth, float attHeight);
-    void populateVertices(VkClearRect& clearRect, float attWidth, float attHeight);
+    void populateVertices(simd::float4* vertices, float attWidth, float attHeight);
+	uint32_t populateVertices(simd::float4* vertices, uint32_t startVertex,
+							  VkClearRect& clearRect, float attWidth, float attHeight);
+	virtual VkClearValue& getClearValue(uint32_t attIdx) = 0;
+	virtual void setClearValue(uint32_t attIdx, const VkClearValue& clearValue) = 0;
 
-	MVKVectorInline<VkClearRect, 4> _clearRects;
-	MVKVectorInline<simd::float4, (4 * 6)> _vertices;
-    simd::float4 _clearColors[kMVKClearAttachmentCount];
-    VkClearValue _vkClearValues[kMVKClearAttachmentCount];
+	MVKVectorInline<VkClearRect, N> _clearRects;
     MVKRPSKeyClearAtt _rpsKey;
+	bool _isClearingDepth;
+	bool _isClearingStencil;
+	float _mtlDepthVal;
     uint32_t _mtlStencilValue;
-    bool _isClearingDepth;
-    bool _isClearingStencil;
 };
 
 
 #pragma mark -
+#pragma mark MVKCmdClearSingleAttachment
+
+/**
+ * Vulkan command to clear regions in a single attachment.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
+class MVKCmdClearSingleAttachment : public MVKCmdClearAttachments<N> {
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+	VkClearValue& getClearValue(uint32_t attIdx) override { return _vkClearValue; }
+	void setClearValue(uint32_t attIdx, const VkClearValue& clearValue) override { _vkClearValue = clearValue; }
+
+	VkClearValue _vkClearValue;
+};
+
+typedef MVKCmdClearSingleAttachment<1> MVKCmdClearSingleAttachment1;
+typedef MVKCmdClearSingleAttachment<4> MVKCmdClearSingleAttachmentMulti;
+
+
+#pragma mark -
+#pragma mark MVKCmdClearMultiAttachments
+
+/**
+ * Vulkan command to clear regions multiple attachment.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
+class MVKCmdClearMultiAttachments : public MVKCmdClearAttachments<N> {
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+	VkClearValue& getClearValue(uint32_t attIdx) override { return _vkClearValues[attIdx]; }
+	void setClearValue(uint32_t attIdx, const VkClearValue& clearValue) override { _vkClearValues[attIdx] = clearValue; }
+
+	VkClearValue _vkClearValues[kMVKCachedColorAttachmentCount];
+};
+
+typedef MVKCmdClearMultiAttachments<1> MVKCmdClearMultiAttachments1;
+typedef MVKCmdClearMultiAttachments<4> MVKCmdClearMultiAttachmentsMulti;
+
+
+#pragma mark -
 #pragma mark MVKCmdClearImage
 
-/** Vulkan command to clear an image. */
+/**
+ * Abstract Vulkan command to clear an image.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
 class MVKCmdClearImage : public MVKCommand {
 
 public:
@@ -266,24 +328,58 @@
 						VkImageLayout imageLayout,
 						const VkClearValue& clearValue,
 						uint32_t rangeCount,
-						const VkImageSubresourceRange* pRanges,
-						bool isDepthStencilClear);
+						const VkImageSubresourceRange* pRanges);
 
     void encode(MVKCommandEncoder* cmdEncoder) override;
 
 protected:
-	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
     uint32_t populateMetalCopyRegions(const VkImageBlit* pRegion, uint32_t cpyRgnIdx);
     uint32_t populateMetalBlitRenders(const VkImageBlit* pRegion, uint32_t rendRgnIdx);
     void populateVertices(MVKVertexPosTex* vertices, const VkImageBlit* pRegion);
-    
-    MVKImage* _image;
-    VkImageLayout _imgLayout;
-	MVKVectorInline<VkImageSubresourceRange, 4> _subresourceRanges;
+	virtual bool isDepthStencilClear() = 0;
+
+	MVKVectorInline<VkImageSubresourceRange, N> _subresourceRanges;
+	MVKImage* _image;
 	VkClearValue _clearValue;
-    bool _isDepthStencilClear;
 };
 
+#pragma mark -
+#pragma mark MVKCmdClearColorImage
+
+/**
+ * Abstract Vulkan command to clear a color image.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
+class MVKCmdClearColorImage : public MVKCmdClearImage<N> {
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+	bool isDepthStencilClear() override { return false; }
+};
+
+typedef MVKCmdClearColorImage<1> MVKCmdClearColorImage1;
+typedef MVKCmdClearColorImage<4> MVKCmdClearColorImageMulti;
+
+
+#pragma mark -
+#pragma mark MVKCmdClearDepthStencilImage
+
+/**
+ * Abstract Vulkan command to clear a depth stencil image.
+ * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
+ */
+template <size_t N>
+class MVKCmdClearDepthStencilImage : public MVKCmdClearImage<N> {
+
+protected:
+	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+	bool isDepthStencilClear() override { return true; }
+};
+
+typedef MVKCmdClearDepthStencilImage<1> MVKCmdClearDepthStencilImage1;
+typedef MVKCmdClearDepthStencilImage<4> MVKCmdClearDepthStencilImageMulti;
+
 
 #pragma mark -
 #pragma mark MVKCmdFillBuffer
@@ -328,8 +424,8 @@
 protected:
 	MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
 
+	MVKVectorDefault<uint8_t> _srcDataCache;
 	MVKBuffer* _dstBuffer;
     VkDeviceSize _dstOffset;
     VkDeviceSize _dataSize;
-    MVKVectorDefault<uint8_t> _srcDataCache;
 };
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
index cb9f410..341c5ce 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
@@ -47,25 +47,30 @@
 #pragma mark -
 #pragma mark MVKCmdCopyImage
 
-VkResult MVKCmdCopyImage::setContent(MVKCommandBuffer* cmdBuff,
-									 VkImage srcImage,
-									 VkImageLayout srcImageLayout,
-									 VkImage dstImage,
-									 VkImageLayout dstImageLayout,
-									 uint32_t regionCount,
-									 const VkImageCopy* pRegions,
-									 MVKCommandUse commandUse) {
+template <size_t N>
+VkResult MVKCmdCopyImage<N>::setContent(MVKCommandBuffer* cmdBuff,
+										VkImage srcImage,
+										VkImageLayout srcImageLayout,
+										VkImage dstImage,
+										VkImageLayout dstImageLayout,
+										uint32_t regionCount,
+										const VkImageCopy* pRegions) {
+	_srcImage = (MVKImage*)srcImage;
+	_srcLayout = srcImageLayout;
 
-	setContent(cmdBuff, srcImage, srcImageLayout, dstImage, dstImageLayout, false, commandUse);
+	_dstImage = (MVKImage*)dstImage;
+	_dstLayout = dstImageLayout;
 
-	MVKPixelFormats* pixFmts = cmdBuff->getPixelFormats();
+	_vkImageCopies.clear();		// Clear for reuse
 	for (uint32_t i = 0; i < regionCount; i++) {
-		addImageCopyRegion(pRegions[i], pixFmts);
+		_vkImageCopies.push_back(pRegions[i]);
 	}
 
 	// Validate
-	if ( !_canCopyFormats ) {
-		return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdCopyImage(): Cannot copy between incompatible formats, such as formats of different pixel sizes.");
+	MVKPixelFormats* pixFmts = cmdBuff->getPixelFormats();
+	if ((_dstImage->getSampleCount() != _srcImage->getSampleCount()) ||
+		(pixFmts->getBytesPerBlock(_dstImage->getMTLPixelFormat()) != pixFmts->getBytesPerBlock(_srcImage->getMTLPixelFormat()))) {
+		return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdCopyImage(): Cannot copy between incompatible formats, such as formats of different pixel sizes, or between images with different sample counts.");
 	}
 	if ((_srcImage->getMTLTextureType() == MTLTextureType3D) != (_dstImage->getMTLTextureType() == MTLTextureType3D)) {
 		return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdCopyImage(): Metal does not support copying to or from slices of a 3D texture.");
@@ -74,243 +79,176 @@
 	return VK_SUCCESS;
 }
 
-// Sets common content for use by this class and subclasses
-VkResult MVKCmdCopyImage::setContent(MVKCommandBuffer* cmdBuff,
-									 VkImage srcImage,
-									 VkImageLayout srcImageLayout,
-									 VkImage dstImage,
-									 VkImageLayout dstImageLayout,
-									 bool formatsMustMatch,
-									 MVKCommandUse commandUse) {
-	MVKPixelFormats* pixFmts = cmdBuff->getPixelFormats();
+template <size_t N>
+void MVKCmdCopyImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse commandUse) {
 
-	_srcImage = (MVKImage*)srcImage;
-	_srcLayout = srcImageLayout;
-	_srcSampleCount = mvkSampleCountFromVkSampleCountFlagBits(_srcImage->getSampleCount());
-	_isSrcCompressed = _srcImage->getIsCompressed();
 	MTLPixelFormat srcMTLPixFmt = _srcImage->getMTLPixelFormat();
-	uint32_t srcBytesPerBlock = pixFmts->getBytesPerBlock(srcMTLPixFmt);
+	bool isSrcCompressed = _srcImage->getIsCompressed();
 
-	_dstImage = (MVKImage*)dstImage;
-	_dstLayout = dstImageLayout;
-	_dstSampleCount = mvkSampleCountFromVkSampleCountFlagBits(_dstImage->getSampleCount());
-	_isDstCompressed = _dstImage->getIsCompressed();
 	MTLPixelFormat dstMTLPixFmt = _dstImage->getMTLPixelFormat();
-	uint32_t dstBytesPerBlock = pixFmts->getBytesPerBlock(dstMTLPixFmt);
+	bool isDstCompressed = _dstImage->getIsCompressed();
 
-	_canCopyFormats = (_dstSampleCount == _srcSampleCount) && (formatsMustMatch
-																? (dstMTLPixFmt == srcMTLPixFmt)
-																: (dstBytesPerBlock == srcBytesPerBlock));
+	// If source and destination have different formats and at least one is compressed, use a temporary intermediary buffer
+	bool useTempBuffer = (srcMTLPixFmt != dstMTLPixFmt) && (isSrcCompressed || isDstCompressed);
+	if (useTempBuffer) {
+		MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats();
+		uint32_t copyCnt = (uint32_t)_vkImageCopies.size();
+		VkBufferImageCopy vkSrcCopies[copyCnt];
+		VkBufferImageCopy vkDstCopies[copyCnt];
+		size_t tmpBuffSize = 0;
+		for (uint32_t copyIdx = 0; copyIdx < copyCnt; copyIdx++) {
+			auto& vkIC = _vkImageCopies[copyIdx];
 
-	_useTempBuffer = (srcMTLPixFmt != dstMTLPixFmt) && (_isSrcCompressed || _isDstCompressed);	// Different formats and at least one is compressed
+			// Add copy from source image to temp buffer.
+			auto& srcCpy = vkSrcCopies[copyIdx];
+			srcCpy.bufferOffset = tmpBuffSize;
+			srcCpy.bufferRowLength = 0;
+			srcCpy.bufferImageHeight = 0;
+			srcCpy.imageSubresource = vkIC.srcSubresource;
+			srcCpy.imageOffset = vkIC.srcOffset;
+			srcCpy.imageExtent = vkIC.extent;
 
-	_commandUse = commandUse;
-	_tmpBuffSize = 0;
+			// Add copy from temp buffer to destination image.
+			// Extent is provided in source texels. If the source is compressed but the
+			// destination is not, each destination pixel will consume an entire source block,
+			// so we must downscale the destination extent by the size of the source block.
+			VkExtent3D dstExtent = vkIC.extent;
+			if (isSrcCompressed && !isDstCompressed) {
+				VkExtent2D srcBlockExtent = pixFmts->getBlockTexelSize(srcMTLPixFmt);
+				dstExtent.width /= srcBlockExtent.width;
+				dstExtent.height /= srcBlockExtent.height;
+			}
+			auto& dstCpy = vkDstCopies[copyIdx];
+			dstCpy.bufferOffset = tmpBuffSize;
+			dstCpy.bufferRowLength = 0;
+			dstCpy.bufferImageHeight = 0;
+			dstCpy.imageSubresource = vkIC.dstSubresource;
+			dstCpy.imageOffset = vkIC.dstOffset;
+			dstCpy.imageExtent = dstExtent;
 
-	_imageCopyRegions.clear();		// Clear for reuse
-	_srcTmpBuffImgCopies.clear();	// Clear for reuse
-	_dstTmpBuffImgCopies.clear();	// Clear for reuse
-
-	return VK_SUCCESS;
-}
-
-void MVKCmdCopyImage::addImageCopyRegion(const VkImageCopy& region, MVKPixelFormats* pixFmts) {
-	if (_useTempBuffer) {
-		addTempBufferImageCopyRegion(region, pixFmts);	// Convert to image->buffer->image copies
-	} else {
-		_imageCopyRegions.push_back(region);
-	}
-}
-
-// Add an image->buffer copy and buffer->image copy to replace the image->image copy
-void MVKCmdCopyImage::addTempBufferImageCopyRegion(const VkImageCopy& region, MVKPixelFormats* pixFmts) {
-
-	// Add copy from source image to temp buffer.
-	VkBufferImageCopy buffImgCpy;
-	buffImgCpy.bufferOffset = _tmpBuffSize;
-	buffImgCpy.bufferRowLength = 0;
-	buffImgCpy.bufferImageHeight = 0;
-	buffImgCpy.imageSubresource = region.srcSubresource;
-	buffImgCpy.imageOffset = region.srcOffset;
-	buffImgCpy.imageExtent = region.extent;
-	_srcTmpBuffImgCopies.push_back(buffImgCpy);
-
-	// Add copy from temp buffer to destination image.
-	// Extent is provided in source texels. If the source is compressed but the
-	// destination is not, each destination pixel will consume an entire source block,
-	// so we must downscale the destination extent by the size of the source block.
-	MTLPixelFormat srcMTLPixFmt = _srcImage->getMTLPixelFormat();
-	VkExtent3D dstExtent = region.extent;
-	if (_isSrcCompressed && !_isDstCompressed) {
-		VkExtent2D srcBlockExtent = pixFmts->getBlockTexelSize(srcMTLPixFmt);
-		dstExtent.width /= srcBlockExtent.width;
-		dstExtent.height /= srcBlockExtent.height;
-	}
-	buffImgCpy.bufferOffset = _tmpBuffSize;
-	buffImgCpy.bufferRowLength = 0;
-	buffImgCpy.bufferImageHeight = 0;
-	buffImgCpy.imageSubresource = region.dstSubresource;
-	buffImgCpy.imageOffset = region.dstOffset;
-	buffImgCpy.imageExtent = dstExtent;
-	_dstTmpBuffImgCopies.push_back(buffImgCpy);
-
-	NSUInteger bytesPerRow = pixFmts->getBytesPerRow(srcMTLPixFmt, region.extent.width);
-	NSUInteger bytesPerRegion = pixFmts->getBytesPerLayer(srcMTLPixFmt, bytesPerRow, region.extent.height);
-	_tmpBuffSize += bytesPerRegion;
-}
-
-void MVKCmdCopyImage::encode(MVKCommandEncoder* cmdEncoder) {
-	// Unless we need to use an intermediary buffer copy, map the source pixel format to the
-	// dest pixel format through a texture view on the source texture. If the source and dest
-	// pixel formats are the same, this will simply degenerate to the source texture itself.
-	MTLPixelFormat mapSrcMTLPixFmt = (_useTempBuffer ? _srcImage : _dstImage)->getMTLPixelFormat();
-	id<MTLTexture> srcMTLTex = _srcImage->getMTLTexture(mapSrcMTLPixFmt);
-	id<MTLTexture> dstMTLTex = _dstImage->getMTLTexture();
-	if ( !srcMTLTex || !dstMTLTex ) { return; }
-
-	id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(_commandUse);
-
-	// If copies can be performed using direct texture-texture copying, do so
-	for (auto& cpyRgn : _imageCopyRegions) {
-		uint32_t  srcLevel = cpyRgn.srcSubresource.mipLevel;
-		MTLOrigin srcOrigin = mvkMTLOriginFromVkOffset3D(cpyRgn.srcOffset);
-		MTLSize   srcSize = mvkClampMTLSize(mvkMTLSizeFromVkExtent3D(cpyRgn.extent),
-											srcOrigin,
-											mvkMTLSizeFromVkExtent3D(_srcImage->getExtent3D(srcLevel)));
-		uint32_t  dstLevel = cpyRgn.dstSubresource.mipLevel;
-		MTLOrigin dstOrigin = mvkMTLOriginFromVkOffset3D(cpyRgn.dstOffset);
-		uint32_t  srcBaseLayer = cpyRgn.srcSubresource.baseArrayLayer;
-		uint32_t  dstBaseLayer = cpyRgn.dstSubresource.baseArrayLayer;
-		uint32_t  layCnt = cpyRgn.srcSubresource.layerCount;
-
-		for (uint32_t layIdx = 0; layIdx < layCnt; layIdx++) {
-			[mtlBlitEnc copyFromTexture: srcMTLTex
-							sourceSlice: srcBaseLayer + layIdx
-							sourceLevel: srcLevel
-						   sourceOrigin: srcOrigin
-							 sourceSize: srcSize
-							  toTexture: dstMTLTex
-					   destinationSlice: dstBaseLayer + layIdx
-					   destinationLevel: dstLevel
-					  destinationOrigin: dstOrigin];
+			size_t bytesPerRow = pixFmts->getBytesPerRow(srcMTLPixFmt, vkIC.extent.width);
+			size_t bytesPerRegion = pixFmts->getBytesPerLayer(srcMTLPixFmt, bytesPerRow, vkIC.extent.height);
+			tmpBuffSize += bytesPerRegion;
 		}
-	}
 
-	// If copies could not be performed directly between images,
-	// use a temporary buffer acting as a waystation between the images.
-	if ( !_srcTmpBuffImgCopies.empty() ) {
 		MVKBufferDescriptorData tempBuffData;
-		tempBuffData.size = _tmpBuffSize;
+		tempBuffData.size = tmpBuffSize;
 		tempBuffData.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
-		MVKBuffer* tempBuff = cmdEncoder->getCommandEncodingPool()->getTransferMVKBuffer(tempBuffData);
+		VkBuffer tempBuff = (VkBuffer)cmdEncoder->getCommandEncodingPool()->getTransferMVKBuffer(tempBuffData);
 
-		MVKCmdBufferImageCopy cpyCmd;
+		MVKCmdBufferImageCopy<N> cpyCmd;
 
 		// Copy from source image to buffer
 		// Create and execute a temporary buffer image command.
 		// To be threadsafe...do NOT acquire and return the command from the pool.
-		cpyCmd.setContent(cmdEncoder->_cmdBuffer,
-						  (VkBuffer) tempBuff,
-						  (VkImage) _srcImage,
-						  _srcLayout,
-						  (uint32_t)_srcTmpBuffImgCopies.size(),
-						  _srcTmpBuffImgCopies.data(),
-						  false);
+		cpyCmd.setContent(cmdEncoder->_cmdBuffer, tempBuff, (VkImage)_srcImage, _srcLayout, copyCnt, vkSrcCopies, false);
 		cpyCmd.encode(cmdEncoder);
 
 		// Copy from buffer to destination image
 		// Create and execute a temporary buffer image command.
 		// To be threadsafe...do NOT acquire and return the command from the pool.
-		cpyCmd.setContent(cmdEncoder->_cmdBuffer,
-						  (VkBuffer) tempBuff,
-						  (VkImage) _dstImage,
-						  _dstLayout,
-						  (uint32_t)_dstTmpBuffImgCopies.size(),
-						  _dstTmpBuffImgCopies.data(),
-						  true);
+		cpyCmd.setContent(cmdEncoder->_cmdBuffer, tempBuff, (VkImage)_dstImage, _dstLayout, copyCnt, vkDstCopies, true);
 		cpyCmd.encode(cmdEncoder);
+
+	} else {
+		// Map the source pixel format to the dest pixel format through a texture view on the source texture.
+		// If the source and dest pixel formats are the same, this will simply degenerate to the source texture itself.
+		id<MTLTexture> srcMTLTex = _srcImage->getMTLTexture(_dstImage->getMTLPixelFormat());
+		id<MTLTexture> dstMTLTex = _dstImage->getMTLTexture();
+		if ( !srcMTLTex || !dstMTLTex ) { return; }
+
+		id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(commandUse);
+
+		// If copies can be performed using direct texture-texture copying, do so
+		for (auto& cpyRgn : _vkImageCopies) {
+			uint32_t srcLevel = cpyRgn.srcSubresource.mipLevel;
+			MTLOrigin srcOrigin = mvkMTLOriginFromVkOffset3D(cpyRgn.srcOffset);
+			MTLSize srcSize = mvkClampMTLSize(mvkMTLSizeFromVkExtent3D(cpyRgn.extent),
+											  srcOrigin,
+											  mvkMTLSizeFromVkExtent3D(_srcImage->getExtent3D(srcLevel)));
+			uint32_t dstLevel = cpyRgn.dstSubresource.mipLevel;
+			MTLOrigin dstOrigin = mvkMTLOriginFromVkOffset3D(cpyRgn.dstOffset);
+			uint32_t srcBaseLayer = cpyRgn.srcSubresource.baseArrayLayer;
+			uint32_t dstBaseLayer = cpyRgn.dstSubresource.baseArrayLayer;
+			uint32_t layCnt = cpyRgn.srcSubresource.layerCount;
+
+			for (uint32_t layIdx = 0; layIdx < layCnt; layIdx++) {
+				[mtlBlitEnc copyFromTexture: srcMTLTex
+								sourceSlice: srcBaseLayer + layIdx
+								sourceLevel: srcLevel
+							   sourceOrigin: srcOrigin
+								 sourceSize: srcSize
+								  toTexture: dstMTLTex
+						   destinationSlice: dstBaseLayer + layIdx
+						   destinationLevel: dstLevel
+						  destinationOrigin: dstOrigin];
+			}
+		}
 	}
 }
 
+template class MVKCmdCopyImage<1>;
+template class MVKCmdCopyImage<4>;
+
 
 #pragma mark -
 #pragma mark MVKCmdBlitImage
 
-VkResult MVKCmdBlitImage::setContent(MVKCommandBuffer* cmdBuff,
-									 VkImage srcImage,
-									 VkImageLayout srcImageLayout,
-									 VkImage dstImage,
-									 VkImageLayout dstImageLayout,
-									 uint32_t regionCount,
-									 const VkImageBlit* pRegions,
-									 VkFilter filter,
-									 MVKCommandUse commandUse) {
-
-	VkResult rslt = MVKCmdCopyImage::setContent(cmdBuff, srcImage, srcImageLayout, dstImage, dstImageLayout, true, commandUse);
-
-	_blitKey.srcMTLPixelFormat = _srcImage->getMTLPixelFormat();
-	_blitKey.srcMTLTextureType = _srcImage->getMTLTextureType();
-	_blitKey.dstMTLPixelFormat = _dstImage->getMTLPixelFormat();
-	_blitKey.srcFilter = mvkMTLSamplerMinMagFilterFromVkFilter(filter);
-	_blitKey.dstSampleCount = _dstSampleCount;
+template <size_t N>
+VkResult MVKCmdBlitImage<N>::setContent(MVKCommandBuffer* cmdBuff,
+										VkImage srcImage,
+										VkImageLayout srcImageLayout,
+										VkImage dstImage,
+										VkImageLayout dstImageLayout,
+										uint32_t regionCount,
+										const VkImageBlit* pRegions,
+										VkFilter filter) {
 
 	MVKPixelFormats* pixFmts = cmdBuff->getPixelFormats();
 
-	_mvkImageBlitRenders.clear();		// Clear for reuse
+	_srcImage = (MVKImage*)srcImage;
+	_srcLayout = srcImageLayout;
+
+	_dstImage = (MVKImage*)dstImage;
+	_dstLayout = dstImageLayout;
+
+	_filter = filter;
+
+	_vkImageBlits.clear();		// Clear for reuse
 	for (uint32_t i = 0; i < regionCount; i++) {
-		addImageBlitRegion(pRegions[i], pixFmts);
+		_vkImageBlits.push_back(pRegions[i]);
 	}
 
-	// Validate
+	// Validate - depth stencil formats cannot be scaled or inverted
 	MTLPixelFormat srcMTLPixFmt = _srcImage->getMTLPixelFormat();
-	if ( !_mvkImageBlitRenders.empty() &&
-		(pixFmts->isDepthFormat(srcMTLPixFmt) || pixFmts->isStencilFormat(srcMTLPixFmt)) ) {
-
-		_mvkImageBlitRenders.clear();
-		return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdBlitImage(): Scaling or inverting depth/stencil images is not supported.");
+	if (pixFmts->isDepthFormat(srcMTLPixFmt) || pixFmts->isStencilFormat(srcMTLPixFmt)) {
+		bool canCopyFmts = canCopyFormats();
+		for (auto& vkIB : _vkImageBlits) {
+			if ( !(canCopyFmts && canCopy(vkIB)) ) {
+				return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdBlitImage(): Scaling or inverting depth/stencil images is not supported.");
+			}
+		}
 	}
-
-	return rslt;
+	return VK_SUCCESS;
 }
 
-void MVKCmdBlitImage::addImageBlitRegion(const VkImageBlit& region,
-										 MVKPixelFormats* pixFmts) {
-	if (_canCopyFormats && canCopy(region)) {
-		addImageCopyRegionFromBlitRegion(region, pixFmts);	// Convert to image copy
-	} else {
-		MVKImageBlitRender blitRender;
-		blitRender.region = region;
-		populateVertices(blitRender.vertices, region);
-		_mvkImageBlitRenders.push_back(blitRender);
-	}
+template <size_t N>
+bool MVKCmdBlitImage<N>::canCopyFormats() {
+	return ((_srcImage->getMTLPixelFormat() == _dstImage->getMTLPixelFormat()) &&
+			(_dstImage->getSampleCount() == _srcImage->getSampleCount()));
 }
 
 // The source and destination sizes must be equal and not be negative in any direction
-bool MVKCmdBlitImage::canCopy(const VkImageBlit& region) {
+template <size_t N>
+bool MVKCmdBlitImage<N>::canCopy(const VkImageBlit& region) {
 	VkOffset3D srcSize = mvkVkOffset3DDifference(region.srcOffsets[1], region.srcOffsets[0]);
 	VkOffset3D dstSize = mvkVkOffset3DDifference(region.dstOffsets[1], region.dstOffsets[0]);
 	return (mvkVkOffset3DsAreEqual(srcSize, dstSize) &&
 			(srcSize.x >= 0) && (srcSize.y >= 0) && (srcSize.z >= 0));
 }
 
-void MVKCmdBlitImage::addImageCopyRegionFromBlitRegion(const VkImageBlit& region,
-													   MVKPixelFormats* pixFmts) {
-	const VkOffset3D& so0 = region.srcOffsets[0];
-	const VkOffset3D& so1 = region.srcOffsets[1];
-
-	VkImageCopy cpyRgn;
-	cpyRgn.srcSubresource = region.srcSubresource;
-	cpyRgn.srcOffset = region.srcOffsets[0];
-	cpyRgn.dstSubresource = region.dstSubresource;
-	cpyRgn.dstOffset = region.dstOffsets[0];
-	cpyRgn.extent.width = so1.x - so0.x;
-	cpyRgn.extent.height = so1.y - so0.y;
-	cpyRgn.extent.depth = so1.z - so0.z;
-
-	MVKCmdCopyImage::addImageCopyRegion(cpyRgn, pixFmts);
-}
-
-void MVKCmdBlitImage::populateVertices(MVKVertexPosTex* vertices, const VkImageBlit& region) {
+template <size_t N>
+void MVKCmdBlitImage<N>::populateVertices(MVKVertexPosTex* vertices, const VkImageBlit& region) {
     const VkOffset3D& so0 = region.srcOffsets[0];
     const VkOffset3D& so1 = region.srcOffsets[1];
     const VkOffset3D& do0 = region.dstOffsets[0];
@@ -368,50 +306,96 @@
     pVtx->texCoord.y = (1.0 - srcTR.y);
 }
 
-void MVKCmdBlitImage::encode(MVKCommandEncoder* cmdEncoder) {
+template <size_t N>
+void MVKCmdBlitImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse commandUse) {
+
+	size_t vkIBCnt = _vkImageBlits.size();
+	VkImageCopy vkImageCopies[vkIBCnt];
+	MVKImageBlitRender mvkBlitRenders[vkIBCnt];
+	uint32_t copyCnt = 0;
+	uint32_t blitCnt = 0;
+
+	// Separate BLITs into those that are really just simple texure region copies,
+	// and those that require rendering
+	bool canCopyFmts = canCopyFormats();
+	for (auto& vkIB : _vkImageBlits) {
+		if (canCopyFmts && canCopy(vkIB)) {
+
+			const VkOffset3D& so0 = vkIB.srcOffsets[0];
+			const VkOffset3D& so1 = vkIB.srcOffsets[1];
+
+			auto& vkIC = vkImageCopies[copyCnt++];
+			vkIC.srcSubresource = vkIB.srcSubresource;
+			vkIC.srcOffset = vkIB.srcOffsets[0];
+			vkIC.dstSubresource = vkIB.dstSubresource;
+			vkIC.dstOffset = vkIB.dstOffsets[0];
+			vkIC.extent.width = so1.x - so0.x;
+			vkIC.extent.height = so1.y - so0.y;
+			vkIC.extent.depth = so1.z - so0.z;
+
+		} else {
+			auto& mvkIBR = mvkBlitRenders[blitCnt++];
+			mvkIBR.region = vkIB;
+			populateVertices(mvkIBR.vertices, vkIB);
+		}
+	}
 
 	// Perform those BLITs that can be covered by simple texture copying.
-	if ( !_imageCopyRegions.empty() ) {
-		MVKCmdCopyImage::encode(cmdEncoder);
+	if (copyCnt) {
+		MVKCmdCopyImage<N> copyCmd;
+		copyCmd.setContent(cmdEncoder->_cmdBuffer,
+						   (VkImage)_srcImage, _srcLayout,
+						   (VkImage)_dstImage, _dstLayout,
+						   copyCnt, vkImageCopies);
+		copyCmd.encode(cmdEncoder, kMVKCommandUseBlitImage);
 	}
 
 	// Perform those BLITs that require rendering to destination texture.
-	if ( !_mvkImageBlitRenders.empty() ) {
+	id<MTLTexture> srcMTLTex = _srcImage->getMTLTexture();
+	id<MTLTexture> dstMTLTex = _dstImage->getMTLTexture();
+	if (blitCnt && srcMTLTex && dstMTLTex) {
 
 		cmdEncoder->endCurrentMetalEncoding();
 
-		id<MTLTexture> srcMTLTex = _srcImage->getMTLTexture();
-		id<MTLTexture> dstMTLTex = _dstImage->getMTLTexture();
-		if ( !srcMTLTex || !dstMTLTex ) { return; }
-
-		MTLRenderPassColorAttachmentDescriptor* mtlColorAttDesc = _mtlRenderPassDescriptor.colorAttachments[0];
+		MTLRenderPassDescriptor* mtlRPD = [MTLRenderPassDescriptor renderPassDescriptor];
+		MTLRenderPassColorAttachmentDescriptor* mtlColorAttDesc = mtlRPD.colorAttachments[0];
+		mtlColorAttDesc.loadAction = MTLLoadActionLoad;
+		mtlColorAttDesc.storeAction = MTLStoreActionStore;
 		mtlColorAttDesc.texture = dstMTLTex;
 
+		MVKRPSKeyBlitImg blitKey;
+		blitKey.srcMTLPixelFormat = _srcImage->getMTLPixelFormat();
+		blitKey.srcMTLTextureType = _srcImage->getMTLTextureType();
+		blitKey.dstMTLPixelFormat = _dstImage->getMTLPixelFormat();
+		blitKey.srcFilter = mvkMTLSamplerMinMagFilterFromVkFilter(_filter);
+		blitKey.dstSampleCount = mvkSampleCountFromVkSampleCountFlagBits(_dstImage->getSampleCount());
+		id<MTLRenderPipelineState> mtlRPS = cmdEncoder->getCommandEncodingPool()->getCmdBlitImageMTLRenderPipelineState(blitKey);
+
 		uint32_t vtxBuffIdx = cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKVertexContentBufferIndex);
-		id<MTLRenderPipelineState> mtlRPS = cmdEncoder->getCommandEncodingPool()->getCmdBlitImageMTLRenderPipelineState(_blitKey);
 
-		for (auto& bltRend : _mvkImageBlitRenders) {
+		for (uint32_t blitIdx = 0; blitIdx < blitCnt; blitIdx++) {
+			auto& mvkIBR = mvkBlitRenders[blitIdx];
 
-			mtlColorAttDesc.level = bltRend.region.dstSubresource.mipLevel;
+			mtlColorAttDesc.level = mvkIBR.region.dstSubresource.mipLevel;
 
-			uint32_t layCnt = bltRend.region.srcSubresource.layerCount;
+			uint32_t layCnt = mvkIBR.region.srcSubresource.layerCount;
 			for (uint32_t layIdx = 0; layIdx < layCnt; layIdx++) {
 				// Update the render pass descriptor for the texture level and slice, and create a render encoder.
-				mtlColorAttDesc.slice = bltRend.region.dstSubresource.baseArrayLayer + layIdx;
-				id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: _mtlRenderPassDescriptor];
-				setLabelIfNotNil(mtlRendEnc, mvkMTLRenderCommandEncoderLabel(_commandUse));
+				mtlColorAttDesc.slice = mvkIBR.region.dstSubresource.baseArrayLayer + layIdx;
+				id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPD];
+				setLabelIfNotNil(mtlRendEnc, mvkMTLRenderCommandEncoderLabel(commandUse));
 
 				[mtlRendEnc pushDebugGroup: @"vkCmdBlitImage"];
 				[mtlRendEnc setRenderPipelineState: mtlRPS];
-				cmdEncoder->setVertexBytes(mtlRendEnc, bltRend.vertices, sizeof(bltRend.vertices), vtxBuffIdx);
+				cmdEncoder->setVertexBytes(mtlRendEnc, mvkIBR.vertices, sizeof(mvkIBR.vertices), vtxBuffIdx);
 				[mtlRendEnc setFragmentTexture: srcMTLTex atIndex: 0];
 
 				struct {
 					uint slice;
 					float lod;
 				} texSubRez;
-				texSubRez.slice = bltRend.region.srcSubresource.baseArrayLayer + layIdx;
-				texSubRez.lod = bltRend.region.srcSubresource.mipLevel;
+				texSubRez.slice = mvkIBR.region.srcSubresource.baseArrayLayer + layIdx;
+				texSubRez.lod = mvkIBR.region.srcSubresource.mipLevel;
 				cmdEncoder->setFragmentBytes(mtlRendEnc, &texSubRez, sizeof(texSubRez), 0);
 
 				[mtlRendEnc drawPrimitives: MTLPrimitiveTypeTriangleStrip vertexStart: 0 vertexCount: kMVKBlitVertexCount];
@@ -422,216 +406,162 @@
 	}
 }
 
-
-#pragma mark Construction
-
-MVKCmdBlitImage::MVKCmdBlitImage() {
-    initMTLRenderPassDescriptor();
-}
-
-// Create and configure the render pass descriptor
-void MVKCmdBlitImage::initMTLRenderPassDescriptor() {
-    _mtlRenderPassDescriptor = [[MTLRenderPassDescriptor renderPassDescriptor] retain];		// retained
-    MTLRenderPassColorAttachmentDescriptor* mtlColorAttDesc = _mtlRenderPassDescriptor.colorAttachments[0];
-    mtlColorAttDesc.loadAction = MTLLoadActionLoad;
-    mtlColorAttDesc.storeAction = MTLStoreActionStore;
-}
-
-MVKCmdBlitImage::~MVKCmdBlitImage() {
-	[_mtlRenderPassDescriptor release];
-}
+template class MVKCmdBlitImage<1>;
+template class MVKCmdBlitImage<4>;
 
 
 #pragma mark -
 #pragma mark MVKCmdResolveImage
 
-VkResult MVKCmdResolveImage::setContent(MVKCommandBuffer* cmdBuff,
-										VkImage srcImage,
-										VkImageLayout srcImageLayout,
-										VkImage dstImage,
-										VkImageLayout dstImageLayout,
-										uint32_t regionCount,
-										const VkImageResolve* pRegions) {
+template <size_t N>
+VkResult MVKCmdResolveImage<N>::setContent(MVKCommandBuffer* cmdBuff,
+										   VkImage srcImage,
+										   VkImageLayout srcImageLayout,
+										   VkImage dstImage,
+										   VkImageLayout dstImageLayout,
+										   uint32_t regionCount,
+										   const VkImageResolve* pRegions) {
     _srcImage = (MVKImage*)srcImage;
     _srcLayout = srcImageLayout;
     _dstImage = (MVKImage*)dstImage;
     _dstLayout = dstImageLayout;
 
-    // Deterine the total number of texture layers being affected
-    uint32_t layerCnt = 0;
+	_vkImageResolves.clear();	// Clear for reuse
+	_vkImageResolves.reserve(regionCount);
     for (uint32_t i = 0; i < regionCount; i++) {
-        layerCnt += pRegions[i].dstSubresource.layerCount;
+		_vkImageResolves.push_back(pRegions[i]);
     }
 
-    // Resize the region arrays accordingly
-    _expansionRegions.clear();              // Clear for reuse
-    _expansionRegions.reserve(regionCount);
-    _copyRegions.clear();                   // Clear for reuse
-    _copyRegions.reserve(regionCount);
-    _mtlResolveSlices.clear();              // Clear for reuse
-    _mtlResolveSlices.reserve(layerCnt);
-
-    // Add image regions
-    for (uint32_t i = 0; i < regionCount; i++) {
-        const VkImageResolve& rslvRgn = pRegions[i];
-        addExpansionRegion(rslvRgn);
-        addCopyRegion(rslvRgn);
-        addResolveSlices(rslvRgn);
-    }
-
-    _dstImage->getTransferDescriptorData(_transferImageData);
-	_transferImageData.samples = _srcImage->getSampleCount();
-
 	// Validate
-	if ( !mvkAreAllFlagsEnabled(cmdBuff->getPixelFormats()->getCapabilities(_dstImage->getMTLPixelFormat()), kMVKMTLFmtCapsResolve) ) {
-		return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdResolveImage(): %s cannot be used as a resolve destination on this device.", cmdBuff->getPixelFormats()->getName(_dstImage->getVkFormat()));
+	MVKPixelFormats* pixFmts = cmdBuff->getPixelFormats();
+	if ( !mvkAreAllFlagsEnabled(pixFmts->getCapabilities(_dstImage->getMTLPixelFormat()), kMVKMTLFmtCapsResolve) ) {
+		return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdResolveImage(): %s cannot be used as a resolve destination on this device.", pixFmts->getName(_dstImage->getVkFormat()));
 	}
 
 	return VK_SUCCESS;
 }
 
-/**
- * Adds a VkImageBlit region, constructed from the resolve region, to the internal collection
- * of expansion regions, unless the entire content of the destination texture of this command 
- * is to be resolved, an expansion region will not be added.
- *
- * The purpose of an expansion regions is to render the existing content of the destination
- * image of this command to the temporary transfer multisample image, so that regions of that 
- * temporary transfer image can then be overwritten with content from the source image of this
- * command, prior to resolving it back to the destination image of this command.
- *
- * As such, the source of this expansion stage is the destination image of this command,
- * and the destination of this expansion stage is a temp image that has the same shape
- * as the source image of this command.
- */
-void MVKCmdResolveImage::addExpansionRegion(const VkImageResolve& resolveRegion) {
-    uint32_t mipLvl = resolveRegion.dstSubresource.mipLevel;
-    VkExtent3D srcImgExt = _srcImage->getExtent3D(mipLvl);
-    VkExtent3D dstImgExt = _dstImage->getExtent3D(mipLvl);
+template <size_t N>
+void MVKCmdResolveImage<N>::encode(MVKCommandEncoder* cmdEncoder) {
 
-    // No need to add an expansion region if the entire content of
-    // the source image is being resolved to the destination image.
-    if (mvkVkExtent3DsAreEqual(srcImgExt, resolveRegion.extent)) { return; }
+	size_t vkIRCnt = _vkImageResolves.size();
+	VkImageBlit expansionRegions[vkIRCnt];
+	VkImageCopy copyRegions[vkIRCnt];
 
-    // The source of this temporary content move is the full extent of the DESTINATION
-    // image of the resolve command, and the destination of this temporary content move
-    // is the full extent of the SOURCE image of the resolve command.
-    VkImageBlit expRgn = {
-        .srcSubresource = resolveRegion.dstSubresource,
-        .srcOffsets[0] = { 0, 0, 0 },
-        .srcOffsets[1] = { int32_t(dstImgExt.width), int32_t(dstImgExt.height), int32_t(dstImgExt.depth) },
-        .dstSubresource = resolveRegion.dstSubresource,
-        .dstOffsets[0] = { 0, 0, 0 },
-        .dstOffsets[1] = { int32_t(srcImgExt.width), int32_t(srcImgExt.height), int32_t(srcImgExt.depth) },
-    };
-    _expansionRegions.push_back(expRgn);
-}
+	uint32_t layerCnt = 0;
+	for (VkImageResolve& vkIR : _vkImageResolves) { layerCnt += vkIR.dstSubresource.layerCount; }
+	MVKMetalResolveSlice mtlResolveSlices[layerCnt];
 
-/**
- * Adds a VkImageCopy region, constructed from the resolve region,
- * to the internal collection of copy regions.
- *
- * The purpose of a copy region is to copy regions from the source image of this command to
- * the temporary image, prior to the temporary image being resolved back to the destination
- * image of this command.
- *
- * As such, the source of this copy stage is the source image of this command, and the
- * destination of this copy stage is the temporary transfer image that has the same shape 
- * as the source image of this command.
- */
-void MVKCmdResolveImage::addCopyRegion(const VkImageResolve& resolveRegion) {
-    VkImageCopy cpyRgn = {
-        .srcSubresource = resolveRegion.srcSubresource,
-        .srcOffset = resolveRegion.srcOffset,
-        .dstSubresource = resolveRegion.srcSubresource,
-        .dstOffset = resolveRegion.srcOffset,
-        .extent = resolveRegion.extent,
-    };
-    _copyRegions.push_back(cpyRgn);
-}
+	uint32_t expCnt = 0;
+	uint32_t copyCnt = 0;
+	uint32_t sliceCnt = 0;
 
-/** Adds a resolve slice struct for each destination layer in the resolve region. */
-void MVKCmdResolveImage::addResolveSlices(const VkImageResolve& resolveRegion) {
-    MVKMetalResolveSlice rslvSlice;
-    rslvSlice.level = resolveRegion.dstSubresource.mipLevel;
+	for (VkImageResolve& vkIR : _vkImageResolves) {
 
-    uint32_t baseLayer = resolveRegion.dstSubresource.baseArrayLayer;
-    uint32_t layCnt = resolveRegion.dstSubresource.layerCount;
-    for (uint32_t layIdx = 0; layIdx < layCnt; layIdx++) {
-        rslvSlice.slice = baseLayer + layIdx;
-        _mtlResolveSlices.push_back(rslvSlice);
-    }
-}
+		uint32_t mipLvl = vkIR.dstSubresource.mipLevel;
+		VkExtent3D srcImgExt = _srcImage->getExtent3D(mipLvl);
+		VkExtent3D dstImgExt = _dstImage->getExtent3D(mipLvl);
 
-void MVKCmdResolveImage::encode(MVKCommandEncoder* cmdEncoder) {
-    MVKImage* xfrImage = cmdEncoder->getCommandEncodingPool()->getTransferMVKImage(_transferImageData);
+		// If the region does not cover the entire content of the source level, expand the
+		// destination content in the region to the temporary image. The purpose of this
+		// expansion is to render the existing content of the destination image to the
+		// temporary transfer multisample image, so that regions of that temporary transfer
+		// image can then be overwritten with content from the source image, prior to
+		// resolving it back to the destination image. The source of this temporary content
+		// move is the full extent of the DESTINATION image of the resolve command, and the
+		// destination of this temporary content move is the full extent of the SOURCE image.
+		if ( !mvkVkExtent3DsAreEqual(srcImgExt, vkIR.extent) ) {
+			VkImageBlit& expRgn = expansionRegions[expCnt++];
+			expRgn.srcSubresource = vkIR.dstSubresource;
+			expRgn.srcOffsets[0] = { 0, 0, 0 };
+			expRgn.srcOffsets[1] = { int32_t(dstImgExt.width), int32_t(dstImgExt.height), int32_t(dstImgExt.depth) };
+			expRgn.dstSubresource = vkIR.dstSubresource;
+			expRgn.dstOffsets[0] = { 0, 0, 0 };
+			expRgn.dstOffsets[1] = { int32_t(srcImgExt.width), int32_t(srcImgExt.height), int32_t(srcImgExt.depth) };
+		}
 
-    id<MTLTexture> xfrMTLTex = xfrImage->getMTLTexture();
-    id<MTLTexture> dstMTLTex = _dstImage->getMTLTexture();
-    if ( !xfrMTLTex || !dstMTLTex ) { return; }
+		// Copy the region from the source image to the temporary multisample image,
+		// prior to the temporary image being resolved back to the destination image.
+		// The source of this copy stage is the source image, and the destination of
+		// this copy stage is the temporary transfer image.
+		VkImageCopy& cpyRgn = copyRegions[copyCnt++];
+		cpyRgn.srcSubresource = vkIR.srcSubresource;
+		cpyRgn.srcOffset = vkIR.srcOffset;
+		cpyRgn.dstSubresource = vkIR.srcSubresource;
+		cpyRgn.dstOffset = vkIR.srcOffset;
+		cpyRgn.extent = vkIR.extent;
 
-    // Expand the current content of the destination image to the temporary transfer image.
-    // Create and execute a temporary BLIT image command.
-    // To be threadsafe...do NOT acquire and return the command from the pool.
-    uint32_t expRgnCnt = uint32_t(_expansionRegions.size());
-    if (expRgnCnt > 0) {
-        MVKCmdBlitImage expandCmd;
-        expandCmd.setContent(cmdEncoder->_cmdBuffer,
-							 (VkImage)_dstImage, _dstLayout, (VkImage)xfrImage, _dstLayout,
-                             expRgnCnt, _expansionRegions.data(),
-                             VK_FILTER_LINEAR, kMVKCommandUseResolveExpandImage);
-        expandCmd.encode(cmdEncoder);
-    }
+		// Adds a resolve slice struct for each destination layer in the resolve region.
+		uint32_t baseLayer = vkIR.dstSubresource.baseArrayLayer;
+		uint32_t layCnt = vkIR.dstSubresource.layerCount;
+		for (uint32_t layIdx = 0; layIdx < layCnt; layIdx++) {
+			MVKMetalResolveSlice& rslvSlice = mtlResolveSlices[sliceCnt++];
+			rslvSlice.level = vkIR.dstSubresource.mipLevel;
+			rslvSlice.slice = baseLayer + layIdx;
+		}
+	}
 
-    // Copy the resolve regions of the source image to the temporary transfer image.
-    // Create and execute a temporary copy image command.
-    // To be threadsafe...do NOT acquire and return the command from the pool.
-    uint32_t cpyRgnCnt = uint32_t(_copyRegions.size());
-    if (cpyRgnCnt > 0) {
-        MVKCmdCopyImage copyCmd;
-        copyCmd.setContent(cmdEncoder->_cmdBuffer,
-						   (VkImage)_srcImage, _srcLayout, (VkImage)xfrImage, _dstLayout,
-                           cpyRgnCnt, _copyRegions.data(), kMVKCommandUseResolveCopyImage);
-        copyCmd.encode(cmdEncoder);
-    }
+	id<MTLTexture> srcMTLTex;
+	if (expCnt == 0) {
+		// Expansion and copying is not required. Each mip level of the source image
+		// is being resolved entirely. Resolve directly from the source image.
+		srcMTLTex = _srcImage->getMTLTexture();
 
-    cmdEncoder->endCurrentMetalEncoding();
+	} else {
+		// Expansion and copying is required. Acquire a temporary transfer image, expand
+		// the destination image into it, copy from the source image to the temporary image,
+		// and then resolve from the temporary image to the destination image.
+		MVKImageDescriptorData xferImageData;
+		_dstImage->getTransferDescriptorData(xferImageData);
+		xferImageData.samples = _srcImage->getSampleCount();
+		MVKImage* xfrImage = cmdEncoder->getCommandEncodingPool()->getTransferMVKImage(xferImageData);
 
-    MTLRenderPassColorAttachmentDescriptor* mtlColorAttDesc = _mtlRenderPassDescriptor.colorAttachments[0];
-    mtlColorAttDesc.texture = xfrMTLTex;
-    mtlColorAttDesc.resolveTexture = dstMTLTex;
+		// Expand the current content of the destination image to the temporary transfer image.
+		MVKCmdBlitImage<N> expCmd;
+		expCmd.setContent(cmdEncoder->_cmdBuffer,
+						  (VkImage)_dstImage, _dstLayout, (VkImage)xfrImage, _dstLayout,
+						  expCnt, expansionRegions, VK_FILTER_LINEAR);
+		expCmd.encode(cmdEncoder, kMVKCommandUseResolveExpandImage);
 
-    for (auto& rslvSlice : _mtlResolveSlices) {
+		// Copy the resolve regions of the source image to the temporary transfer image.
+		MVKCmdCopyImage<N> copyCmd;
+		copyCmd.setContent(cmdEncoder->_cmdBuffer,
+						   (VkImage)_srcImage, _srcLayout,
+						   (VkImage)xfrImage, _dstLayout,
+						   copyCnt, copyRegions);
+		copyCmd.encode(cmdEncoder, kMVKCommandUseResolveCopyImage);
 
-        // Update the render pass descriptor for the texture level and slice, and create a render encoder.
-        mtlColorAttDesc.level = rslvSlice.level;
-        mtlColorAttDesc.slice = rslvSlice.slice;
-        mtlColorAttDesc.resolveLevel = rslvSlice.level;
-        mtlColorAttDesc.resolveSlice = rslvSlice.slice;
-        id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: _mtlRenderPassDescriptor];
+		srcMTLTex = xfrImage->getMTLTexture();
+	}
+
+	cmdEncoder->endCurrentMetalEncoding();
+
+	MTLRenderPassDescriptor* mtlRPD = [MTLRenderPassDescriptor renderPassDescriptor];
+	MTLRenderPassColorAttachmentDescriptor* mtlColorAttDesc = mtlRPD.colorAttachments[0];
+	mtlColorAttDesc.loadAction = MTLLoadActionLoad;
+	mtlColorAttDesc.storeAction = MTLStoreActionMultisampleResolve;
+	mtlColorAttDesc.texture = srcMTLTex;
+	mtlColorAttDesc.resolveTexture = _dstImage->getMTLTexture();
+
+	// For each resolve slice, update the render pass descriptor for
+	// the texture level and slice and create a render encoder.
+	for (uint32_t sIdx = 0; sIdx < sliceCnt; sIdx++) {
+		MVKMetalResolveSlice& rslvSlice = mtlResolveSlices[sIdx];
+		mtlColorAttDesc.level = rslvSlice.level;
+		mtlColorAttDesc.slice = rslvSlice.slice;
+		mtlColorAttDesc.resolveLevel = rslvSlice.level;
+		mtlColorAttDesc.resolveSlice = rslvSlice.slice;
+		id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPD];
 		setLabelIfNotNil(mtlRendEnc, mvkMTLRenderCommandEncoderLabel(kMVKCommandUseResolveImage));
 
-        [mtlRendEnc pushDebugGroup: @"vkCmdResolveImage"];
-        [mtlRendEnc popDebugGroup];
-        [mtlRendEnc endEncoding];
-    }
+		[mtlRendEnc pushDebugGroup: @"vkCmdResolveImage"];
+		[mtlRendEnc popDebugGroup];
+		[mtlRendEnc endEncoding];
+	}
 }
 
-MVKCmdResolveImage::MVKCmdResolveImage() {
-    initMTLRenderPassDescriptor();
-}
-
-// Create and configure the render pass descriptor
-void MVKCmdResolveImage::initMTLRenderPassDescriptor() {
-    _mtlRenderPassDescriptor = [[MTLRenderPassDescriptor renderPassDescriptor] retain];		// retained
-    MTLRenderPassColorAttachmentDescriptor* mtlColorAttDesc = _mtlRenderPassDescriptor.colorAttachments[0];
-    mtlColorAttDesc.loadAction = MTLLoadActionLoad;
-    mtlColorAttDesc.storeAction = MTLStoreActionMultisampleResolve;
-}
-
-MVKCmdResolveImage::~MVKCmdResolveImage() {
-    [_mtlRenderPassDescriptor release];
-}
+template class MVKCmdResolveImage<1>;
+template class MVKCmdResolveImage<4>;
 
 
 #pragma mark -
@@ -644,25 +574,27 @@
 	uint32_t size;
 } MVKCmdCopyBufferInfo;
 
-VkResult MVKCmdCopyBuffer::setContent(MVKCommandBuffer* cmdBuff,
-									  VkBuffer srcBuffer,
-									  VkBuffer destBuffer,
-									  uint32_t regionCount,
-									  const VkBufferCopy* pRegions) {
+template <size_t N>
+VkResult MVKCmdCopyBuffer<N>::setContent(MVKCommandBuffer* cmdBuff,
+										 VkBuffer srcBuffer,
+										 VkBuffer destBuffer,
+										 uint32_t regionCount,
+										 const VkBufferCopy* pRegions) {
 	_srcBuffer = (MVKBuffer*)srcBuffer;
 	_dstBuffer = (MVKBuffer*)destBuffer;
 
 	// Add buffer regions
-	_mtlBuffCopyRegions.clear();	// Clear for reuse
-	_mtlBuffCopyRegions.reserve(regionCount);
+	_bufferCopyRegions.clear();	// Clear for reuse
+	_bufferCopyRegions.reserve(regionCount);
 	for (uint32_t i = 0; i < regionCount; i++) {
-		_mtlBuffCopyRegions.push_back(pRegions[i]);
+		_bufferCopyRegions.push_back(pRegions[i]);
 	}
 
 	return VK_SUCCESS;
 }
 
-void MVKCmdCopyBuffer::encode(MVKCommandEncoder* cmdEncoder) {
+template <size_t N>
+void MVKCmdCopyBuffer<N>::encode(MVKCommandEncoder* cmdEncoder) {
 	id<MTLBuffer> srcMTLBuff = _srcBuffer->getMTLBuffer();
 	NSUInteger srcMTLBuffOffset = _srcBuffer->getMTLBufferOffset();
 
@@ -671,7 +603,7 @@
 
 	VkDeviceSize buffAlign = cmdEncoder->getDevice()->_pMetalFeatures->mtlCopyBufferAlignment;
 
-	for (auto& cpyRgn : _mtlBuffCopyRegions) {
+	for (auto& cpyRgn : _bufferCopyRegions) {
 		const bool useComputeCopy = buffAlign > 1 && (cpyRgn.srcOffset % buffAlign != 0 ||
 													  cpyRgn.dstOffset % buffAlign != 0 ||
 													  cpyRgn.size      % buffAlign != 0);
@@ -703,6 +635,9 @@
 	}
 }
 
+template class MVKCmdCopyBuffer<1>;
+template class MVKCmdCopyBuffer<4>;
+
 
 #pragma mark -
 #pragma mark MVKCmdBufferImageCopy
@@ -722,13 +657,14 @@
     VkExtent3D extent;
 } MVKCmdCopyBufferToImageInfo;
 
-VkResult MVKCmdBufferImageCopy::setContent(MVKCommandBuffer* cmdBuff,
-										   VkBuffer buffer,
-										   VkImage image,
-										   VkImageLayout imageLayout,
-										   uint32_t regionCount,
-										   const VkBufferImageCopy* pRegions,
-										   bool toImage) {
+template <size_t N>
+VkResult MVKCmdBufferImageCopy<N>::setContent(MVKCommandBuffer* cmdBuff,
+											  VkBuffer buffer,
+											  VkImage image,
+											  VkImageLayout imageLayout,
+											  uint32_t regionCount,
+											  const VkBufferImageCopy* pRegions,
+											  bool toImage) {
     _buffer = (MVKBuffer*)buffer;
     _image = (MVKImage*)image;
     _imageLayout = imageLayout;
@@ -750,7 +686,8 @@
 	return VK_SUCCESS;
 }
 
-void MVKCmdBufferImageCopy::encode(MVKCommandEncoder* cmdEncoder) {
+template <size_t N>
+void MVKCmdBufferImageCopy<N>::encode(MVKCommandEncoder* cmdEncoder) {
     id<MTLBuffer> mtlBuffer = _buffer->getMTLBuffer();
     id<MTLTexture> mtlTexture = _image->getMTLTexture();
     if ( !mtlBuffer || !mtlTexture ) { return; }
@@ -913,7 +850,8 @@
     }
 }
 
-bool MVKCmdBufferImageCopy::isArrayTexture() {
+template <size_t N>
+bool MVKCmdBufferImageCopy<N>::isArrayTexture() {
 	MTLTextureType mtlTexType = _image->getMTLTextureType();
 	return (mtlTexType == MTLTextureType3D ||
 			mtlTexType == MTLTextureType2DArray ||
@@ -923,20 +861,26 @@
 			mtlTexType == MTLTextureType1DArray);
 }
 
+template class MVKCmdBufferImageCopy<1>;
+template class MVKCmdBufferImageCopy<4>;	// To support MVKCmdCopyImage
+template class MVKCmdBufferImageCopy<8>;
+template class MVKCmdBufferImageCopy<16>;
+
 
 #pragma mark -
 #pragma mark MVKCmdClearAttachments
 
-VkResult MVKCmdClearAttachments::setContent(MVKCommandBuffer* cmdBuff,
-											uint32_t attachmentCount,
-											const VkClearAttachment* pAttachments,
-											uint32_t rectCount,
-											const VkClearRect* pRects) {
+template <size_t N>
+VkResult MVKCmdClearAttachments<N>::setContent(MVKCommandBuffer* cmdBuff,
+											   uint32_t attachmentCount,
+											   const VkClearAttachment* pAttachments,
+											   uint32_t rectCount,
+											   const VkClearRect* pRects) {
 	_rpsKey.reset();
+	_mtlDepthVal = 0.0;
     _mtlStencilValue = 0;
     _isClearingDepth = false;
     _isClearingStencil = false;
-    float mtlDepthVal = 0.0;
 	MVKPixelFormats* pixFmts = cmdBuff->getPixelFormats();
 
     // For each attachment to be cleared, mark it so in the render pipeline state
@@ -949,14 +893,14 @@
             uint32_t caIdx = clrAtt.colorAttachment;        // Might be VK_ATTACHMENT_UNUSED
             if (caIdx != VK_ATTACHMENT_UNUSED) {
                 _rpsKey.enableAttachment(caIdx);
-                _vkClearValues[caIdx] = clrAtt.clearValue;
+                setClearValue(caIdx, clrAtt.clearValue);
             }
         }
 
         if (mvkIsAnyFlagEnabled(clrAtt.aspectMask, VK_IMAGE_ASPECT_DEPTH_BIT)) {
             _isClearingDepth = true;
             _rpsKey.enableAttachment(kMVKClearAttachmentDepthStencilIndex);
-            mtlDepthVal = pixFmts->getMTLClearDepthValue(clrAtt.clearValue);
+            _mtlDepthVal = pixFmts->getMTLClearDepthValue(clrAtt.clearValue);
         }
 
         if (mvkIsAnyFlagEnabled(clrAtt.aspectMask, VK_IMAGE_ASPECT_STENCIL_BIT)) {
@@ -966,30 +910,33 @@
         }
     }
 
-    // The depth value (including vertex position Z value) is held in the last index.
-    _clearColors[kMVKClearAttachmentDepthStencilIndex] = { mtlDepthVal, mtlDepthVal, mtlDepthVal, mtlDepthVal };
-
     _clearRects.clear();		// Clear for reuse
     _clearRects.reserve(rectCount);
     for (uint32_t i = 0; i < rectCount; i++) {
         _clearRects.push_back(pRects[i]);
     }
 
-	_vertices.clear();			// Clear for reuse
-    _vertices.reserve(rectCount * 6);
-
 	return VK_SUCCESS;
 }
 
 // Populates the vertices for all clear rectangles within an attachment of the specified size.
-void MVKCmdClearAttachments::populateVertices(float attWidth, float attHeight) {
-    for (auto& rect : _clearRects) { populateVertices(rect, attWidth, attHeight); }
+template <size_t N>
+void MVKCmdClearAttachments<N>::populateVertices(simd::float4* vertices, float attWidth, float attHeight) {
+	uint32_t vtxIdx = 0;
+    for (auto& rect : _clearRects) {
+		vtxIdx = populateVertices(vertices, vtxIdx, rect, attWidth, attHeight);
+	}
 }
 
-// Populates the vertices from the specified rectangle within an attachment of the specified size.
-void MVKCmdClearAttachments::populateVertices(VkClearRect& clearRect, float attWidth, float attHeight) {
-
-    // Determine the positions of the four edges of the
+// Populates the vertices, starting at the vertex, from the specified rectangle within
+// an attachment of the specified size. Returns the next vertex that needs to be populated.
+template <size_t N>
+uint32_t MVKCmdClearAttachments<N>::populateVertices(simd::float4* vertices,
+													 uint32_t startVertex,
+													 VkClearRect& clearRect,
+													 float attWidth,
+													 float attHeight) {
+	// Determine the positions of the four edges of the
     // clear rectangle as a fraction of the attachment size.
     float leftPos = (float)(clearRect.rect.offset.x) / attWidth;
     float rightPos = (float)(clearRect.rect.extent.width) / attWidth + leftPos;
@@ -1005,6 +952,7 @@
 
     simd::float4 vtx;
 
+	uint32_t vtxIdx = startVertex;
 	uint32_t startLayer = clearRect.baseArrayLayer;
 	uint32_t endLayer = startLayer + clearRect.layerCount;
 	for (uint32_t layer = startLayer; layer < endLayer; layer++) {
@@ -1015,40 +963,47 @@
 		// Top left vertex	- First triangle
 		vtx.y = topPos;
 		vtx.x = leftPos;
-		_vertices.push_back(vtx);
+		vertices[vtxIdx++] = vtx;
 
 		// Bottom left vertex
 		vtx.y = bottomPos;
 		vtx.x = leftPos;
-		_vertices.push_back(vtx);
+		vertices[vtxIdx++] = vtx;
 
 		// Bottom right vertex
 		vtx.y = bottomPos;
 		vtx.x = rightPos;
-		_vertices.push_back(vtx);
+		vertices[vtxIdx++] = vtx;
 
 		// Bottom right vertex	- Second triangle
-		_vertices.push_back(vtx);
+		vertices[vtxIdx++] = vtx;
 
 		// Top right vertex
 		vtx.y = topPos;
 		vtx.x = rightPos;
-		_vertices.push_back(vtx);
+		vertices[vtxIdx++] = vtx;
 
 		// Top left vertex
 		vtx.y = topPos;
 		vtx.x = leftPos;
-		_vertices.push_back(vtx);
+		vertices[vtxIdx++] = vtx;
 	}
+
+	return vtxIdx;
 }
 
-void MVKCmdClearAttachments::encode(MVKCommandEncoder* cmdEncoder) {
+template <size_t N>
+void MVKCmdClearAttachments<N>::encode(MVKCommandEncoder* cmdEncoder) {
+
+	uint32_t vtxCnt = (uint32_t)_clearRects.size() * 6;
+	simd::float4 vertices[vtxCnt];
+	simd::float4 clearColors[kMVKClearAttachmentCount];
+
+	VkExtent2D fbExtent = cmdEncoder->_framebuffer->getExtent2D();
+	populateVertices(vertices, fbExtent.width, fbExtent.height);
 
 	MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats();
     MVKRenderSubpass* subpass = cmdEncoder->getSubpass();
-    VkExtent2D fbExtent = cmdEncoder->_framebuffer->getExtent2D();
-    populateVertices(fbExtent.width, fbExtent.height);
-    uint32_t vtxCnt = (uint32_t)_vertices.size();
     uint32_t vtxBuffIdx = cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKVertexContentBufferIndex);
 
     // Populate the render pipeline state attachment key with info from the subpass and framebuffer.
@@ -1059,10 +1014,13 @@
     for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) {
         VkFormat vkAttFmt = subpass->getColorAttachmentFormat(caIdx);
 		_rpsKey.attachmentMTLPixelFormats[caIdx] = pixFmts->getMTLPixelFormat(vkAttFmt);
-		MTLClearColor mtlCC = pixFmts->getMTLClearColor(_vkClearValues[caIdx], vkAttFmt);
-		_clearColors[caIdx] = { (float)mtlCC.red, (float)mtlCC.green, (float)mtlCC.blue, (float)mtlCC.alpha};
+		MTLClearColor mtlCC = pixFmts->getMTLClearColor(getClearValue(caIdx), vkAttFmt);
+		clearColors[caIdx] = { (float)mtlCC.red, (float)mtlCC.green, (float)mtlCC.blue, (float)mtlCC.alpha};
     }
 
+    // The depth value (including vertex position Z value) is held in the last index.
+    clearColors[kMVKClearAttachmentDepthStencilIndex] = { _mtlDepthVal, _mtlDepthVal, _mtlDepthVal, _mtlDepthVal };
+
     VkFormat vkAttFmt = subpass->getDepthStencilFormat();
 	MTLPixelFormat mtlAttFmt = pixFmts->getMTLPixelFormat(vkAttFmt);
     _rpsKey.attachmentMTLPixelFormats[kMVKClearAttachmentDepthStencilIndex] = mtlAttFmt;
@@ -1078,9 +1036,9 @@
     [mtlRendEnc setDepthStencilState: cmdEncPool->getMTLDepthStencilState(isClearingDepth, isClearingStencil)];
     [mtlRendEnc setStencilReferenceValue: _mtlStencilValue];
 
-    cmdEncoder->setVertexBytes(mtlRendEnc, _clearColors, sizeof(_clearColors), 0);
-    cmdEncoder->setFragmentBytes(mtlRendEnc, _clearColors, sizeof(_clearColors), 0);
-    cmdEncoder->setVertexBytes(mtlRendEnc, _vertices.data(), vtxCnt * sizeof(_vertices[0]), vtxBuffIdx);
+    cmdEncoder->setVertexBytes(mtlRendEnc, clearColors, sizeof(clearColors), 0);
+    cmdEncoder->setFragmentBytes(mtlRendEnc, clearColors, sizeof(clearColors), 0);
+    cmdEncoder->setVertexBytes(mtlRendEnc, vertices, vtxCnt * sizeof(vertices[0]), vtxBuffIdx);
     [mtlRendEnc drawPrimitives: MTLPrimitiveTypeTriangle vertexStart: 0 vertexCount: vtxCnt];
     [mtlRendEnc popDebugGroup];
 
@@ -1091,21 +1049,28 @@
 	cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
 }
 
+template class MVKCmdClearAttachments<1>;
+template class MVKCmdClearAttachments<4>;
+
+template class MVKCmdClearSingleAttachment<1>;
+template class MVKCmdClearSingleAttachment<4>;
+
+template class MVKCmdClearMultiAttachments<1>;
+template class MVKCmdClearMultiAttachments<4>;
+
 
 #pragma mark -
 #pragma mark MVKCmdClearImage
 
-VkResult MVKCmdClearImage::setContent(MVKCommandBuffer* cmdBuff,
-									  VkImage image,
-									  VkImageLayout imageLayout,
-									  const VkClearValue& clearValue,
-									  uint32_t rangeCount,
-									  const VkImageSubresourceRange* pRanges,
-									  bool isDepthStencilClear) {
+template <size_t N>
+VkResult MVKCmdClearImage<N>::setContent(MVKCommandBuffer* cmdBuff,
+										 VkImage image,
+										 VkImageLayout imageLayout,
+										 const VkClearValue& clearValue,
+										 uint32_t rangeCount,
+										 const VkImageSubresourceRange* pRanges) {
     _image = (MVKImage*)image;
-    _imgLayout = imageLayout;
 	_clearValue = clearValue;
-    _isDepthStencilClear = isDepthStencilClear;
 
     // Add subresource ranges
     _subresourceRanges.clear();		// Clear for reuse
@@ -1115,23 +1080,26 @@
     }
 
 	// Validate
+	bool isDS = isDepthStencilClear();
 	if (_image->getImageType() == VK_IMAGE_TYPE_1D) {
-		return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdClearImage(): Native 1D images cannot be cleared on this device. Consider enabling MVK_CONFIG_TEXTURE_1D_AS_2D.");
+		return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdClear%sImage(): Native 1D images cannot be cleared on this device. Consider enabling MVK_CONFIG_TEXTURE_1D_AS_2D.", (isDS ? "DepthStencil" : "Color"));
 	}
 	MVKMTLFmtCaps mtlFmtCaps = cmdBuff->getPixelFormats()->getCapabilities(_image->getMTLPixelFormat());
-	if ((_isDepthStencilClear && !mvkAreAllFlagsEnabled(mtlFmtCaps, kMVKMTLFmtCapsDSAtt)) ||
-		( !_isDepthStencilClear && !mvkAreAllFlagsEnabled(mtlFmtCaps, kMVKMTLFmtCapsColorAtt))) {
-		return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdClearImage(): Format %s cannot be cleared on this device.", cmdBuff->getPixelFormats()->getName(_image->getVkFormat()));
+	if ((isDS && !mvkAreAllFlagsEnabled(mtlFmtCaps, kMVKMTLFmtCapsDSAtt)) ||
+		( !isDS && !mvkAreAllFlagsEnabled(mtlFmtCaps, kMVKMTLFmtCapsColorAtt))) {
+		return reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdClear%sImage(): Format %s cannot be cleared on this device.", (isDS ? "DepthStencil" : "Color"), cmdBuff->getPixelFormats()->getName(_image->getVkFormat()));
 	}
 
 	return VK_SUCCESS;
 }
 
-void MVKCmdClearImage::encode(MVKCommandEncoder* cmdEncoder) {
+template <size_t N>
+void MVKCmdClearImage<N>::encode(MVKCommandEncoder* cmdEncoder) {
 	id<MTLTexture> imgMTLTex = _image->getMTLTexture();
     if ( !imgMTLTex ) { return; }
 
-	NSString* mtlRendEncName = (_isDepthStencilClear
+	bool isDS = isDepthStencilClear();
+	NSString* mtlRendEncName = (isDS
 								? mvkMTLRenderCommandEncoderLabel(kMVKCommandUseClearDepthStencilImage)
 								: mvkMTLRenderCommandEncoderLabel(kMVKCommandUseClearColorImage));
 
@@ -1145,9 +1113,9 @@
 		MTLRenderPassDepthAttachmentDescriptor* mtlRPDADesc = nil;
 		MTLRenderPassStencilAttachmentDescriptor* mtlRPSADesc = nil;
 
-		bool isClearingColor = !_isDepthStencilClear && mvkIsAnyFlagEnabled(srRange.aspectMask, VK_IMAGE_ASPECT_COLOR_BIT);
-        bool isClearingDepth = _isDepthStencilClear && mvkIsAnyFlagEnabled(srRange.aspectMask, VK_IMAGE_ASPECT_DEPTH_BIT);
-        bool isClearingStencil = _isDepthStencilClear && mvkIsAnyFlagEnabled(srRange.aspectMask, VK_IMAGE_ASPECT_STENCIL_BIT);
+		bool isClearingColor = !isDS && mvkIsAnyFlagEnabled(srRange.aspectMask, VK_IMAGE_ASPECT_COLOR_BIT);
+		bool isClearingDepth = isDS && mvkIsAnyFlagEnabled(srRange.aspectMask, VK_IMAGE_ASPECT_DEPTH_BIT);
+		bool isClearingStencil = isDS && mvkIsAnyFlagEnabled(srRange.aspectMask, VK_IMAGE_ASPECT_STENCIL_BIT);
 
 		if (isClearingColor) {
 			mtlRPCADesc = mtlRPDesc.colorAttachments[0];
@@ -1206,6 +1174,15 @@
     }
 }
 
+template class MVKCmdClearImage<1>;
+template class MVKCmdClearImage<4>;
+
+template class MVKCmdClearColorImage<1>;
+template class MVKCmdClearColorImage<4>;
+
+template class MVKCmdClearDepthStencilImage<1>;
+template class MVKCmdClearDepthStencilImage<4>;
+
 
 #pragma mark -
 #pragma mark MVKCmdFillBuffer
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index 8b12e05..a42d588 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -39,8 +39,6 @@
 class MVKPipeline;
 class MVKGraphicsPipeline;
 class MVKComputePipeline;
-class MVKCmdBeginRenderPass;
-class MVKCmdEndRenderPass;
 class MVKLoadStoreOverrideMixin;
 
 typedef uint64_t MVKMTLCommandBufferID;
@@ -99,11 +97,12 @@
 
 
 #pragma mark Tessellation constituent command management
+
     /** Preps metadata for recording render pass */
-	void recordBeginRenderPass(MVKCmdBeginRenderPass* mvkBeginRenderPass);
+	void recordBeginRenderPass(MVKLoadStoreOverrideMixin* mvkBeginRenderPass);
 	
 	/** Finishes metadata for recording render pass */
-	void recordEndRenderPass(MVKCmdEndRenderPass* mvkEndRenderPass);
+	void recordEndRenderPass();
 	
 	/** Update the last recorded pipeline if it will end and start a new Metal render pass (ie, in tessellation) */
 	void recordBindPipeline(MVKCmdBindPipeline* mvkBindPipeline);
@@ -112,7 +111,7 @@
 	void recordDraw(MVKLoadStoreOverrideMixin* mvkDraw);
 	
 	/** The most recent recorded begin renderpass */
-	MVKCmdBeginRenderPass* _lastBeginRenderPass;
+	MVKLoadStoreOverrideMixin* _lastBeginRenderPass;
 	
 	/** The most recent recorded multi-pass (ie, tessellation) pipeline */
 	MVKCmdBindPipeline* _lastTessellationPipeline;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index d871019..775c24f 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -197,13 +197,13 @@
 #pragma mark -
 #pragma mark Tessellation constituent command management
 
-void MVKCommandBuffer::recordBeginRenderPass(MVKCmdBeginRenderPass* mvkBeginRenderPass) {
+void MVKCommandBuffer::recordBeginRenderPass(MVKLoadStoreOverrideMixin* mvkBeginRenderPass) {
 	_lastBeginRenderPass = mvkBeginRenderPass;
 	_lastTessellationPipeline = nullptr;
 	_lastTessellationDraw = nullptr;
 }
 
-void MVKCommandBuffer::recordEndRenderPass(MVKCmdEndRenderPass* /*mvkEndRenderPass*/) {
+void MVKCommandBuffer::recordEndRenderPass() {
 	// Unset the store override for the last draw call
 	if (_lastTessellationDraw != nullptr)
 	{
@@ -421,7 +421,7 @@
 
     // Create and execute a temporary clear attachments command.
     // To be threadsafe...do NOT acquire and return the command from the pool.
-    MVKCmdClearAttachments cmd;
+    MVKCmdClearMultiAttachments<1> cmd;
     cmd.setContent(_cmdBuffer, clearAttCnt, clearAtts.data(), 1, &clearRect);
     cmd.encode(this);
 }
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def
index c47d949..87a9518 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def
@@ -41,17 +41,33 @@
 #	define MVK_CMD_TYPE_POOL_LAST(cmdType) MVK_CMD_TYPE_POOL(cmdType)
 #endif
 
-MVK_CMD_TYPE_POOL(PipelineBarrier)
-MVK_CMD_TYPE_POOL(BindPipeline)
-MVK_CMD_TYPE_POOL(BeginRenderPass)
+#define MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(cmdType, threshold)	\
+	MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(cmdType ##threshold)		\
+	MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(cmdType ##Multi)
+
+#define MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(cmdType, threshold1, threshold2)	\
+	MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(cmdType ##threshold1)						\
+	MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(cmdType ##threshold2)						\
+	MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(cmdType ##Multi)
+
+#define MVK_CMD_TYPE_POOLS_FROM_3_THRESHOLDS(cmdType, threshold1, threshold2, threshold3)	\
+	MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(cmdType ##threshold1)									\
+	MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(cmdType ##threshold2)									\
+	MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(cmdType ##threshold3)									\
+	MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(cmdType ##Multi)
+
+
+MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(PipelineBarrier, 1, 4)
+MVK_CMD_TYPE_POOL(BindGraphicsPipeline)
+MVK_CMD_TYPE_POOL(BindComputePipeline)
+MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BeginRenderPass, 1, 2)
 MVK_CMD_TYPE_POOL(NextSubpass)
 MVK_CMD_TYPE_POOL(EndRenderPass)
-MVK_CMD_TYPE_POOL(ExecuteCommands)
-MVK_CMD_TYPE_POOL(BindDescriptorSets)
-MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(SetViewport1)
-MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(SetViewportMulti)
-MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(SetScissor1)
-MVK_TMPLT_DECL MVK_CMD_TYPE_POOL(SetScissorMulti)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(ExecuteCommands, 1)
+MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindDescriptorSetsStatic, 1, 4)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(BindDescriptorSetsDynamic, 4)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(SetViewport, 1)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(SetScissor, 1)
 MVK_CMD_TYPE_POOL(SetLineWidth)
 MVK_CMD_TYPE_POOL(SetDepthBias)
 MVK_CMD_TYPE_POOL(SetBlendConstants)
@@ -59,27 +75,29 @@
 MVK_CMD_TYPE_POOL(SetStencilCompareMask)
 MVK_CMD_TYPE_POOL(SetStencilWriteMask)
 MVK_CMD_TYPE_POOL(SetStencilReference)
-MVK_CMD_TYPE_POOL(BindVertexBuffers)
+MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindVertexBuffers, 1, 2)
 MVK_CMD_TYPE_POOL(BindIndexBuffer)
 MVK_CMD_TYPE_POOL(Draw)
 MVK_CMD_TYPE_POOL(DrawIndexed)
 MVK_CMD_TYPE_POOL(DrawIndirect)
 MVK_CMD_TYPE_POOL(DrawIndexedIndirect)
-MVK_CMD_TYPE_POOL(CopyImage)
-MVK_CMD_TYPE_POOL(BlitImage)
-MVK_CMD_TYPE_POOL(ResolveImage)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(CopyImage, 1)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(BlitImage, 1)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(ResolveImage, 1)
 MVK_CMD_TYPE_POOL(FillBuffer)
 MVK_CMD_TYPE_POOL(UpdateBuffer)
-MVK_CMD_TYPE_POOL(CopyBuffer)
-MVK_CMD_TYPE_POOL(BufferImageCopy)
-MVK_CMD_TYPE_POOL(ClearAttachments)
-MVK_CMD_TYPE_POOL(ClearImage)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(CopyBuffer, 1)
+MVK_CMD_TYPE_POOLS_FROM_3_THRESHOLDS(BufferImageCopy, 1, 4, 8)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(ClearSingleAttachment, 1)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(ClearMultiAttachments, 1)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(ClearColorImage, 1)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(ClearDepthStencilImage, 1)
 MVK_CMD_TYPE_POOL(BeginQuery)
 MVK_CMD_TYPE_POOL(EndQuery)
 MVK_CMD_TYPE_POOL(WriteTimestamp)
 MVK_CMD_TYPE_POOL(ResetQueryPool)
 MVK_CMD_TYPE_POOL(CopyQueryPoolResults)
-MVK_CMD_TYPE_POOL(PushConstants)
+MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(PushConstants, 64, 128)
 MVK_CMD_TYPE_POOL(Dispatch)
 MVK_CMD_TYPE_POOL(DispatchIndirect)
 MVK_CMD_TYPE_POOL(PushDescriptorSet)
@@ -87,8 +105,9 @@
 MVK_CMD_TYPE_POOL(DebugMarkerBegin)
 MVK_CMD_TYPE_POOL(DebugMarkerEnd)
 MVK_CMD_TYPE_POOL(DebugMarkerInsert)
-MVK_CMD_TYPE_POOL(SetResetEvent)
-MVK_CMD_TYPE_POOL_LAST(WaitEvents)
+MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(WaitEvents, 1)
+MVK_CMD_TYPE_POOL(SetEvent)
+MVK_CMD_TYPE_POOL_LAST(ResetEvent)
 
 #undef MVK_CMD_TYPE_POOL
 #undef MVK_CMD_TYPE_POOL_LAST
diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h
index f7188d6..a530288 100644
--- a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h
@@ -18,29 +18,37 @@
 
 #pragma once
 
+#include "mvk_vulkan.h"
+
 #import <Metal/Metal.h>
 
+
+class MVKResource;
+class MVKBuffer;
+class MVKImage;
+
+
 /** Describes a MTLTexture resource binding. */
 typedef struct {
     union { id<MTLTexture> mtlTexture = nil; id<MTLTexture> mtlResource; }; // aliases
-    uint32_t index = 0;
     uint32_t swizzle = 0;
+	uint16_t index = 0;
     bool isDirty = true;
 } MVKMTLTextureBinding;
 
 /** Describes a MTLSamplerState resource binding. */
 typedef struct {
     union { id<MTLSamplerState> mtlSamplerState = nil; id<MTLSamplerState> mtlResource; }; // aliases
-    uint32_t index = 0;
+    uint16_t index = 0;
     bool isDirty = true;
 } MVKMTLSamplerStateBinding;
 
 /** Describes a MTLBuffer resource binding. */
 typedef struct {
     union { id<MTLBuffer> mtlBuffer = nil; id<MTLBuffer> mtlResource; const void* mtlBytes; }; // aliases
-    NSUInteger offset = 0;
-    uint32_t index = 0;
+    VkDeviceSize offset = 0;
     uint32_t size = 0;
+	uint16_t index = 0;
     bool isDirty = true;
     bool isInline = false;
 } MVKMTLBufferBinding;
@@ -48,7 +56,78 @@
 /** Describes a MTLBuffer resource binding as used for an index buffer. */
 typedef struct {
     union { id<MTLBuffer> mtlBuffer = nil; id<MTLBuffer> mtlResource; }; // aliases
-    NSUInteger offset = 0;
-    MTLIndexType mtlIndexType;
+    VkDeviceSize offset = 0;
+    uint8_t mtlIndexType = 0;		// MTLIndexType
     bool isDirty = true;
 } MVKIndexMTLBufferBinding;
+
+/** Concise and consistent structure for holding pipeline barrier info. */
+typedef struct MVKPipelineBarrier {
+
+	typedef enum : uint8_t {
+		None,
+		Memory,
+		Buffer,
+		Image,
+	} MVKPipelineBarrierType;
+
+	union { MVKBuffer* mvkBuffer = nullptr; MVKImage* mvkImage; MVKResource* mvkResource; };
+	union {
+		struct {
+			VkDeviceSize offset = 0;
+			VkDeviceSize size = 0;
+		};
+		struct {
+			VkImageLayout newLayout;
+			VkImageAspectFlags aspectMask;
+			uint16_t baseArrayLayer;
+			uint16_t layerCount;
+			uint8_t baseMipLevel;
+			uint8_t levelCount;
+		};
+	};
+	VkAccessFlags srcAccessMask = 0;
+	VkAccessFlags dstAccessMask = 0;
+	uint8_t srcQueueFamilyIndex = 0;
+	uint8_t dstQueueFamilyIndex = 0;
+
+	MVKPipelineBarrierType type = None;
+
+	bool isMemoryBarrier() { return type == Memory; }
+	bool isBufferBarrier() { return type == Buffer; }
+	bool isImageBarrier() { return type == Image; }
+
+	MVKPipelineBarrier(const VkMemoryBarrier& vkBarrier) :
+		type(Memory),
+		srcAccessMask(vkBarrier.srcAccessMask),
+		dstAccessMask(vkBarrier.dstAccessMask)
+		{}
+
+	MVKPipelineBarrier(const VkBufferMemoryBarrier& vkBarrier) :
+		type(Buffer),
+		srcAccessMask(vkBarrier.srcAccessMask),
+		dstAccessMask(vkBarrier.dstAccessMask),
+		srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex),
+		dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex),
+		mvkBuffer((MVKBuffer*)vkBarrier.buffer),
+		offset(vkBarrier.offset),
+		size(vkBarrier.size)
+		{}
+
+	MVKPipelineBarrier(const VkImageMemoryBarrier& vkBarrier) :
+		type(Image),
+		srcAccessMask(vkBarrier.srcAccessMask),
+		dstAccessMask(vkBarrier.dstAccessMask),
+		newLayout(vkBarrier.newLayout),
+		srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex),
+		dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex),
+		mvkImage((MVKImage*)vkBarrier.image),
+		aspectMask(vkBarrier.subresourceRange.aspectMask),
+		baseMipLevel(vkBarrier.subresourceRange.baseMipLevel),
+		levelCount(vkBarrier.subresourceRange.levelCount),
+		baseArrayLayer(vkBarrier.subresourceRange.baseArrayLayer),
+		layerCount(vkBarrier.subresourceRange.layerCount)
+		{}
+
+} MVKPipelineBarrier;
+
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
index a83cee8..3552657 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
@@ -52,18 +52,18 @@
 	VkResult bindDeviceMemory2(const VkBindBufferMemoryInfo* pBindInfo);
 
 	/** Applies the specified global memory barrier. */
-    void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
-                            VkPipelineStageFlags dstStageMask,
-                            VkMemoryBarrier* pMemoryBarrier,
-                            MVKCommandEncoder* cmdEncoder,
-                            MVKCommandUse cmdUse) override;
+	void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
+							VkPipelineStageFlags dstStageMask,
+							MVKPipelineBarrier& barrier,
+							MVKCommandEncoder* cmdEncoder,
+							MVKCommandUse cmdUse) override;
 
 	/** Applies the specified buffer memory barrier. */
-    void applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask,
-                                  VkPipelineStageFlags dstStageMask,
-                                  VkBufferMemoryBarrier* pBufferMemoryBarrier,
-                                  MVKCommandEncoder* cmdEncoder,
-                                  MVKCommandUse cmdUse);
+	void applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask,
+								  VkPipelineStageFlags dstStageMask,
+								  MVKPipelineBarrier& barrier,
+								  MVKCommandEncoder* cmdEncoder,
+								  MVKCommandUse cmdUse);
 
     /** Returns the intended usage of this buffer. */
     VkBufferUsageFlags getUsage() const { return _usage; }
@@ -91,7 +91,7 @@
 	void propogateDebugName() override;
 	bool needsHostReadSync(VkPipelineStageFlags srcStageMask,
 						   VkPipelineStageFlags dstStageMask,
-						   VkBufferMemoryBarrier* pBufferMemoryBarrier);
+						   MVKPipelineBarrier& barrier);
 	bool shouldFlushHostMemory();
 	VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size);
 	VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
index 8c55b3b..09244fa 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
@@ -99,11 +99,11 @@
 
 void MVKBuffer::applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
 								   VkPipelineStageFlags dstStageMask,
-								   VkMemoryBarrier* pMemoryBarrier,
+								   MVKPipelineBarrier& barrier,
                                    MVKCommandEncoder* cmdEncoder,
                                    MVKCommandUse cmdUse) {
 #if MVK_MACOS
-	if ( needsHostReadSync(srcStageMask, dstStageMask, pMemoryBarrier) ) {
+	if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) {
 		[cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLBuffer()];
 	}
 #endif
@@ -111,16 +111,31 @@
 
 void MVKBuffer::applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask,
 										 VkPipelineStageFlags dstStageMask,
-										 VkBufferMemoryBarrier* pBufferMemoryBarrier,
+										 MVKPipelineBarrier& barrier,
                                          MVKCommandEncoder* cmdEncoder,
                                          MVKCommandUse cmdUse) {
 #if MVK_MACOS
-	if ( needsHostReadSync(srcStageMask, dstStageMask, pBufferMemoryBarrier) ) {
+	if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) {
 		[cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLBuffer()];
 	}
 #endif
 }
 
+// Returns whether the specified buffer memory barrier requires a sync between this
+// buffer and host memory for the purpose of the host reading texture memory.
+bool MVKBuffer::needsHostReadSync(VkPipelineStageFlags srcStageMask,
+								  VkPipelineStageFlags dstStageMask,
+								  MVKPipelineBarrier& barrier) {
+#if MVK_MACOS
+	return (mvkIsAnyFlagEnabled(dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) &&
+			mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT)) &&
+			isMemoryHostAccessible() && (!isMemoryHostCoherent() || _isHostCoherentTexelBuffer));
+#endif
+#if MVK_IOS
+	return false;
+#endif
+}
+
 #if MVK_MACOS
 bool MVKBuffer::shouldFlushHostMemory() { return _isHostCoherentTexelBuffer; }
 #endif
@@ -146,21 +161,6 @@
 	return VK_SUCCESS;
 }
 
-// Returns whether the specified buffer memory barrier requires a sync between this
-// buffer and host memory for the purpose of the host reading texture memory.
-bool MVKBuffer::needsHostReadSync(VkPipelineStageFlags srcStageMask,
-								  VkPipelineStageFlags dstStageMask,
-								  VkBufferMemoryBarrier* pBufferMemoryBarrier) {
-#if MVK_IOS
-	return false;
-#endif
-#if MVK_MACOS
-	return (mvkIsAnyFlagEnabled(dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) &&
-			mvkIsAnyFlagEnabled(pBufferMemoryBarrier->dstAccessMask, (VK_ACCESS_HOST_READ_BIT)) &&
-			isMemoryHostAccessible() && (!isMemoryHostCoherent() || _isHostCoherentTexelBuffer));
-#endif
-}
-
 
 #pragma mark Metal
 
@@ -176,8 +176,7 @@
 #if MVK_MACOS
 		} else if (_isHostCoherentTexelBuffer) {
 			// According to the Vulkan spec, buffers, like linear images, can always use host-coherent memory.
-                        // But texel buffers on Mac cannot use shared memory. So we need to use host-cached
-                        // memory here.
+			// But texel buffers on Mac cannot use shared memory. So we need to use host-cached memory here.
 			_mtlBuffer = [_device->getMTLDevice() newBufferWithLength: getByteCount()
 															  options: MTLResourceStorageModeManaged];	// retained
 			propogateDebugName();
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
index d4ef426..c19f2eb 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
@@ -31,9 +31,9 @@
 
 /** Indicates the Metal resource indexes used by a single shader stage in a descriptor. */
 typedef struct MVKShaderStageResourceBinding {
-	uint32_t bufferIndex = 0;
-	uint32_t textureIndex = 0;
-	uint32_t samplerIndex = 0;
+	uint16_t bufferIndex = 0;
+	uint16_t textureIndex = 0;
+	uint16_t samplerIndex = 0;
 
 	MVKShaderStageResourceBinding operator+ (const MVKShaderStageResourceBinding& rhs);
 	MVKShaderStageResourceBinding& operator+= (const MVKShaderStageResourceBinding& rhs);
@@ -47,9 +47,9 @@
 typedef struct MVKShaderResourceBinding {
 	MVKShaderStageResourceBinding stages[kMVKShaderStageMax];
 
-	uint32_t getMaxBufferIndex();
-	uint32_t getMaxTextureIndex();
-	uint32_t getMaxSamplerIndex();
+	uint16_t getMaxBufferIndex();
+	uint16_t getMaxTextureIndex();
+	uint16_t getMaxSamplerIndex();
 
 	MVKShaderResourceBinding operator+ (const MVKShaderResourceBinding& rhs);
 	MVKShaderResourceBinding& operator+= (const MVKShaderResourceBinding& rhs);
@@ -89,7 +89,7 @@
 				  MVKDescriptorSet* descSet,
 				  uint32_t descStartIndex,
 				  MVKShaderResourceBinding& dslMTLRezIdxOffsets,
-				  MVKVector<uint32_t>& dynamicOffsets,
+				  MVKVector<uint32_t>* pDynamicOffsets,
 				  uint32_t* pDynamicOffsetIndex);
 
     /** Encodes this binding layout and the specified descriptor on the specified command encoder immediately. */
@@ -148,7 +148,7 @@
 					  uint32_t descriptorIndex,
 					  bool stages[],
 					  MVKShaderResourceBinding& mtlIndexes,
-					  MVKVector<uint32_t>& dynamicOffsets,
+					  MVKVector<uint32_t>* pDynamicOffsets,
 					  uint32_t* pDynamicOffsetIndex) = 0;
 
 	/**
@@ -202,7 +202,7 @@
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
-			  MVKVector<uint32_t>& dynamicOffsets,
+			  MVKVector<uint32_t>* pDynamicOffsets,
 			  uint32_t* pDynamicOffsetIndex) override;
 
 	void write(MVKDescriptorSet* mvkDescSet,
@@ -280,7 +280,7 @@
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
-			  MVKVector<uint32_t>& dynamicOffsets,
+			  MVKVector<uint32_t>* pDynamicOffsets,
 			  uint32_t* pDynamicOffsetIndex) override;
 
 	void write(MVKDescriptorSet* mvkDescSet,
@@ -319,7 +319,7 @@
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
-			  MVKVector<uint32_t>& dynamicOffsets,
+			  MVKVector<uint32_t>* pDynamicOffsets,
 			  uint32_t* pDynamicOffsetIndex) override;
 
 	void write(MVKDescriptorSet* mvkDescSet,
@@ -391,7 +391,7 @@
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
-			  MVKVector<uint32_t>& dynamicOffsets,
+			  MVKVector<uint32_t>* pDynamicOffsets,
 			  uint32_t* pDynamicOffsetIndex);
 
 	void write(MVKDescriptorSet* mvkDescSet,
@@ -433,7 +433,7 @@
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
-			  MVKVector<uint32_t>& dynamicOffsets,
+			  MVKVector<uint32_t>* pDynamicOffsets,
 			  uint32_t* pDynamicOffsetIndex) override;
 
 	void write(MVKDescriptorSet* mvkDescSet,
@@ -473,7 +473,7 @@
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
-			  MVKVector<uint32_t>& dynamicOffsets,
+			  MVKVector<uint32_t>* pDynamicOffsets,
 			  uint32_t* pDynamicOffsetIndex) override;
 
 	void write(MVKDescriptorSet* mvkDescSet,
@@ -511,7 +511,7 @@
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
-			  MVKVector<uint32_t>& dynamicOffsets,
+			  MVKVector<uint32_t>* pDynamicOffsets,
 			  uint32_t* pDynamicOffsetIndex) override;
 
 	void write(MVKDescriptorSet* mvkDescSet,
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
index 672b2de..3179cc2 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
@@ -41,15 +41,15 @@
 
 #pragma mark MVKShaderResourceBinding
 
-uint32_t MVKShaderResourceBinding::getMaxBufferIndex() {
+uint16_t MVKShaderResourceBinding::getMaxBufferIndex() {
 	return std::max({stages[kMVKShaderStageVertex].bufferIndex, stages[kMVKShaderStageTessCtl].bufferIndex, stages[kMVKShaderStageTessEval].bufferIndex, stages[kMVKShaderStageFragment].bufferIndex, stages[kMVKShaderStageCompute].bufferIndex});
 }
 
-uint32_t MVKShaderResourceBinding::getMaxTextureIndex() {
+uint16_t MVKShaderResourceBinding::getMaxTextureIndex() {
 	return std::max({stages[kMVKShaderStageVertex].textureIndex, stages[kMVKShaderStageTessCtl].textureIndex, stages[kMVKShaderStageTessEval].textureIndex, stages[kMVKShaderStageFragment].textureIndex, stages[kMVKShaderStageCompute].textureIndex});
 }
 
-uint32_t MVKShaderResourceBinding::getMaxSamplerIndex() {
+uint16_t MVKShaderResourceBinding::getMaxSamplerIndex() {
 	return std::max({stages[kMVKShaderStageVertex].samplerIndex, stages[kMVKShaderStageTessCtl].samplerIndex, stages[kMVKShaderStageTessEval].samplerIndex, stages[kMVKShaderStageFragment].samplerIndex, stages[kMVKShaderStageCompute].samplerIndex});
 }
 
@@ -83,7 +83,7 @@
 											 MVKDescriptorSet* descSet,
 											 uint32_t descStartIndex,
 											 MVKShaderResourceBinding& dslMTLRezIdxOffsets,
-											 MVKVector<uint32_t>& dynamicOffsets,
+											 MVKVector<uint32_t>* pDynamicOffsets,
 											 uint32_t* pDynamicOffsetIndex) {
 
 	// Establish the resource indices to use, by combining the offsets of the DSL and this DSL binding.
@@ -93,7 +93,7 @@
     for (uint32_t descIdx = 0; descIdx < descCnt; descIdx++) {
 		MVKDescriptor* mvkDesc = descSet->getDescriptor(descStartIndex + descIdx);
 		mvkDesc->bind(cmdEncoder, _info.descriptorType, descIdx, _applyToStage,
-					  mtlIdxs, dynamicOffsets, pDynamicOffsetIndex);
+					  mtlIdxs, pDynamicOffsets, pDynamicOffsetIndex);
     }
 	return descCnt;
 }
@@ -476,7 +476,7 @@
 							   uint32_t descriptorIndex,
 							   bool stages[],
 							   MVKShaderResourceBinding& mtlIndexes,
-							   MVKVector<uint32_t>& dynamicOffsets,
+							   MVKVector<uint32_t>* pDynamicOffsets,
 							   uint32_t* pDynamicOffsetIndex) {
 	MVKMTLBufferBinding bb;
 	NSUInteger bufferDynamicOffset = 0;
@@ -485,8 +485,10 @@
 		// After determining dynamic part of offset (zero otherwise), fall through to non-dynamic handling
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
 		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-			bufferDynamicOffset = dynamicOffsets[*pDynamicOffsetIndex];
-			(*pDynamicOffsetIndex)++;           // Move on to next dynamic offset (and feedback to caller)
+			if (pDynamicOffsets) {
+				bufferDynamicOffset = (*pDynamicOffsets)[*pDynamicOffsetIndex];
+				(*pDynamicOffsetIndex)++;           // Move on to next dynamic offset (and feedback to caller)
+			}
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
 		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
 			if (_mvkBuffer) {
@@ -581,7 +583,7 @@
 									  uint32_t descriptorIndex,
 									  bool stages[],
 									  MVKShaderResourceBinding& mtlIndexes,
-									  MVKVector<uint32_t>& dynamicOffsets,
+									  MVKVector<uint32_t>* pDynamicOffsets,
 									  uint32_t* pDynamicOffsetIndex) {
 	MVKMTLBufferBinding bb;
 
@@ -678,7 +680,7 @@
 							  uint32_t descriptorIndex,
 							  bool stages[],
 							  MVKShaderResourceBinding& mtlIndexes,
-							  MVKVector<uint32_t>& dynamicOffsets,
+							  MVKVector<uint32_t>* pDynamicOffsets,
 							  uint32_t* pDynamicOffsetIndex) {
 	MVKMTLTextureBinding tb;
 	MVKMTLBufferBinding bb;
@@ -796,7 +798,7 @@
 									 uint32_t descriptorIndex,
 									 bool stages[],
 									 MVKShaderResourceBinding& mtlIndexes,
-									 MVKVector<uint32_t>& dynamicOffsets,
+									 MVKVector<uint32_t>* pDynamicOffsets,
 									 uint32_t* pDynamicOffsetIndex) {
 	MVKMTLSamplerStateBinding sb;
 	switch (descriptorType) {
@@ -914,12 +916,12 @@
 								uint32_t descriptorIndex,
 								bool stages[],
 								MVKShaderResourceBinding& mtlIndexes,
-								MVKVector<uint32_t>& dynamicOffsets,
+								MVKVector<uint32_t>* pDynamicOffsets,
 								uint32_t* pDynamicOffsetIndex) {
 	switch (descriptorType) {
 		case VK_DESCRIPTOR_TYPE_SAMPLER: {
 			MVKSamplerDescriptorMixin::bind(cmdEncoder, descriptorType, descriptorIndex, stages,
-											mtlIndexes, dynamicOffsets, pDynamicOffsetIndex);
+											mtlIndexes, pDynamicOffsets, pDynamicOffsetIndex);
 			break;
 		}
 
@@ -983,14 +985,14 @@
 											 uint32_t descriptorIndex,
 											 bool stages[],
 											 MVKShaderResourceBinding& mtlIndexes,
-											 MVKVector<uint32_t>& dynamicOffsets,
+											 MVKVector<uint32_t>* pDynamicOffsets,
 											 uint32_t* pDynamicOffsetIndex) {
 	switch (descriptorType) {
 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
 			MVKImageDescriptor::bind(cmdEncoder, descriptorType, descriptorIndex, stages,
-									 mtlIndexes, dynamicOffsets, pDynamicOffsetIndex);
+									 mtlIndexes, pDynamicOffsets, pDynamicOffsetIndex);
 			MVKSamplerDescriptorMixin::bind(cmdEncoder, descriptorType, descriptorIndex, stages,
-											mtlIndexes, dynamicOffsets, pDynamicOffsetIndex);
+											mtlIndexes, pDynamicOffsets, pDynamicOffsetIndex);
 			break;
 		}
 
@@ -1057,7 +1059,7 @@
 									uint32_t descriptorIndex,
 									bool stages[],
 									MVKShaderResourceBinding& mtlIndexes,
-									MVKVector<uint32_t>& dynamicOffsets,
+									MVKVector<uint32_t>* pDynamicOffsets,
 									uint32_t* pDynamicOffsetIndex) {
 	MVKMTLTextureBinding tb;
 	MVKMTLBufferBinding bb;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
index 51756a3..a5cbe57 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
@@ -46,7 +46,7 @@
     void bindDescriptorSet(MVKCommandEncoder* cmdEncoder,
                            MVKDescriptorSet* descSet,
                            MVKShaderResourceBinding& dslMTLRezIdxOffsets,
-                           MVKVector<uint32_t>& dynamicOffsets,
+                           MVKVector<uint32_t>* pDynamicOffsets,
                            uint32_t* pDynamicOffsetIndex);
 
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
index 45d5ba5..59c2902 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
@@ -38,7 +38,7 @@
 void MVKDescriptorSetLayout::bindDescriptorSet(MVKCommandEncoder* cmdEncoder,
                                                MVKDescriptorSet* descSet,
                                                MVKShaderResourceBinding& dslMTLRezIdxOffsets,
-                                               MVKVector<uint32_t>& dynamicOffsets,
+                                               MVKVector<uint32_t>* pDynamicOffsets,
                                                uint32_t* pDynamicOffsetIndex) {
     if (_isPushDescriptorLayout) return;
 
@@ -46,7 +46,7 @@
     uint32_t bindCnt = (uint32_t)_bindings.size();
     for (uint32_t descIdx = 0, bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
 		descIdx += _bindings[bindIdx].bind(cmdEncoder, descSet, descIdx,
-										   dslMTLRezIdxOffsets, dynamicOffsets,
+										   dslMTLRezIdxOffsets, pDynamicOffsets,
 										   pDynamicOffsetIndex);
     }
 }
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index f5e1f4d..d23ad13 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -20,6 +20,7 @@
 
 #include "MVKFoundation.h"
 #include "MVKVulkanAPIObject.h"
+#include "MVKMTLResourceBindings.h"
 #include "MVKLayers.h"
 #include "MVKObjectPool.h"
 #include "MVKVector.h"
@@ -561,9 +562,9 @@
 	/** Applies the specified global memory barrier to all resource issued by this device. */
 	void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
 							VkPipelineStageFlags dstStageMask,
-							VkMemoryBarrier* pMemoryBarrier,
-                            MVKCommandEncoder* cmdEncoder,
-                            MVKCommandUse cmdUse);
+							MVKPipelineBarrier& barrier,
+							MVKCommandEncoder* cmdEncoder,
+							MVKCommandUse cmdUse);
 
     /**
 	 * If performance is being tracked, returns a monotonic timestamp value for use performance timestamping.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 00b8b42..bde8c19 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -1972,16 +1972,15 @@
 void MVKPhysicalDevice::initExternalMemoryProperties() {
 
 	// Buffers
-	_mtlBufferExternalMemoryProperties.externalMemoryFeatures = (VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT |
-																 VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
+	_mtlBufferExternalMemoryProperties.externalMemoryFeatures = (VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
 																 VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT);
 	_mtlBufferExternalMemoryProperties.exportFromImportedHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_KHR;
 	_mtlBufferExternalMemoryProperties.compatibleHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_KHR;
 
 	// Images
-	_mtlTextureExternalMemoryProperties.externalMemoryFeatures = (VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT |
-																  VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
-																  VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT);
+	_mtlTextureExternalMemoryProperties.externalMemoryFeatures = (VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
+																  VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT |
+																  VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT);
 	_mtlTextureExternalMemoryProperties.exportFromImportedHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_KHR;
 	_mtlTextureExternalMemoryProperties.compatibleHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_KHR;
 }
@@ -2203,8 +2202,10 @@
 
 void MVKDevice::destroyBuffer(MVKBuffer* mvkBuff,
 							  const VkAllocationCallbacks* pAllocator) {
-	removeResource(mvkBuff);
-	mvkBuff->destroy();
+	if (mvkBuff) {
+		removeResource(mvkBuff);
+		mvkBuff->destroy();
+	}
 }
 
 MVKBufferView* MVKDevice::createBufferView(const VkBufferViewCreateInfo* pCreateInfo,
@@ -2214,7 +2215,7 @@
 
 void MVKDevice::destroyBufferView(MVKBufferView* mvkBuffView,
                                   const VkAllocationCallbacks* pAllocator) {
-    mvkBuffView->destroy();
+	if (mvkBuffView) { mvkBuffView->destroy(); }
 }
 
 MVKImage* MVKDevice::createImage(const VkImageCreateInfo* pCreateInfo,
@@ -2238,8 +2239,10 @@
 
 void MVKDevice::destroyImage(MVKImage* mvkImg,
 							 const VkAllocationCallbacks* pAllocator) {
-	removeResource(mvkImg);
-	mvkImg->destroy();
+	if (mvkImg) {
+		removeResource(mvkImg);
+		mvkImg->destroy();
+	}
 }
 
 MVKImageView* MVKDevice::createImageView(const VkImageViewCreateInfo* pCreateInfo,
@@ -2249,7 +2252,7 @@
 
 void MVKDevice::destroyImageView(MVKImageView* mvkImgView,
 								 const VkAllocationCallbacks* pAllocator) {
-	mvkImgView->destroy();
+	if (mvkImgView) { mvkImgView->destroy(); }
 }
 
 MVKSwapchain* MVKDevice::createSwapchain(const VkSwapchainCreateInfoKHR* pCreateInfo,
@@ -2259,7 +2262,7 @@
 
 void MVKDevice::destroySwapchain(MVKSwapchain* mvkSwpChn,
 								 const VkAllocationCallbacks* pAllocator) {
-	mvkSwpChn->destroy();
+	if (mvkSwpChn) { mvkSwpChn->destroy(); }
 }
 
 MVKPresentableSwapchainImage* MVKDevice::createPresentableSwapchainImage(const VkImageCreateInfo* pCreateInfo,
@@ -2271,8 +2274,10 @@
 
 void MVKDevice::destroyPresentableSwapchainImage(MVKPresentableSwapchainImage* mvkImg,
 												 const VkAllocationCallbacks* pAllocator) {
-	removeResource(mvkImg);
-	mvkImg->destroy();
+	if (mvkImg) {
+		removeResource(mvkImg);
+		mvkImg->destroy();
+	}
 }
 
 MVKFence* MVKDevice::createFence(const VkFenceCreateInfo* pCreateInfo,
@@ -2282,7 +2287,7 @@
 
 void MVKDevice::destroyFence(MVKFence* mvkFence,
 							 const VkAllocationCallbacks* pAllocator) {
-	mvkFence->destroy();
+	if (mvkFence) { mvkFence->destroy(); }
 }
 
 MVKSemaphore* MVKDevice::createSemaphore(const VkSemaphoreCreateInfo* pCreateInfo,
@@ -2298,7 +2303,7 @@
 
 void MVKDevice::destroySemaphore(MVKSemaphore* mvkSem4,
 								 const VkAllocationCallbacks* pAllocator) {
-	mvkSem4->destroy();
+	if (mvkSem4) { mvkSem4->destroy(); }
 }
 
 MVKEvent* MVKDevice::createEvent(const VkEventCreateInfo* pCreateInfo,
@@ -2311,7 +2316,7 @@
 }
 
 void MVKDevice::destroyEvent(MVKEvent* mvkEvent, const VkAllocationCallbacks* pAllocator) {
-	mvkEvent->destroy();
+	if (mvkEvent) { mvkEvent->destroy(); }
 }
 
 MVKQueryPool* MVKDevice::createQueryPool(const VkQueryPoolCreateInfo* pCreateInfo,
@@ -2330,7 +2335,7 @@
 
 void MVKDevice::destroyQueryPool(MVKQueryPool* mvkQP,
 								 const VkAllocationCallbacks* pAllocator) {
-	mvkQP->destroy();
+	if (mvkQP) { mvkQP->destroy(); }
 }
 
 MVKShaderModule* MVKDevice::createShaderModule(const VkShaderModuleCreateInfo* pCreateInfo,
@@ -2340,7 +2345,7 @@
 
 void MVKDevice::destroyShaderModule(MVKShaderModule* mvkShdrMod,
 									const VkAllocationCallbacks* pAllocator) {
-	mvkShdrMod->destroy();
+	if (mvkShdrMod) { mvkShdrMod->destroy(); }
 }
 
 MVKPipelineCache* MVKDevice::createPipelineCache(const VkPipelineCacheCreateInfo* pCreateInfo,
@@ -2350,7 +2355,7 @@
 
 void MVKDevice::destroyPipelineCache(MVKPipelineCache* mvkPLC,
 									 const VkAllocationCallbacks* pAllocator) {
-	mvkPLC->destroy();
+	if (mvkPLC) { mvkPLC->destroy(); }
 }
 
 MVKPipelineLayout* MVKDevice::createPipelineLayout(const VkPipelineLayoutCreateInfo* pCreateInfo,
@@ -2360,7 +2365,7 @@
 
 void MVKDevice::destroyPipelineLayout(MVKPipelineLayout* mvkPLL,
 									  const VkAllocationCallbacks* pAllocator) {
-	mvkPLL->destroy();
+	if (mvkPLL) { mvkPLL->destroy(); }
 }
 
 template<typename PipelineType, typename PipelineInfoType>
@@ -2421,7 +2426,7 @@
 
 void MVKDevice::destroyPipeline(MVKPipeline* mvkPL,
                                 const VkAllocationCallbacks* pAllocator) {
-    mvkPL->destroy();
+	if (mvkPL) { mvkPL->destroy(); }
 }
 
 MVKSampler* MVKDevice::createSampler(const VkSamplerCreateInfo* pCreateInfo,
@@ -2431,7 +2436,7 @@
 
 void MVKDevice::destroySampler(MVKSampler* mvkSamp,
 							   const VkAllocationCallbacks* pAllocator) {
-	mvkSamp->destroy();
+	if (mvkSamp) { mvkSamp->destroy(); }
 }
 
 MVKDescriptorSetLayout* MVKDevice::createDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
@@ -2441,7 +2446,7 @@
 
 void MVKDevice::destroyDescriptorSetLayout(MVKDescriptorSetLayout* mvkDSL,
 										   const VkAllocationCallbacks* pAllocator) {
-	mvkDSL->destroy();
+	if (mvkDSL) { mvkDSL->destroy(); }
 }
 
 MVKDescriptorPool* MVKDevice::createDescriptorPool(const VkDescriptorPoolCreateInfo* pCreateInfo,
@@ -2451,7 +2456,7 @@
 
 void MVKDevice::destroyDescriptorPool(MVKDescriptorPool* mvkDP,
 									  const VkAllocationCallbacks* pAllocator) {
-	mvkDP->destroy();
+	if (mvkDP) { mvkDP->destroy(); }
 }
 
 MVKDescriptorUpdateTemplate* MVKDevice::createDescriptorUpdateTemplate(
@@ -2462,7 +2467,7 @@
 
 void MVKDevice::destroyDescriptorUpdateTemplate(MVKDescriptorUpdateTemplate* mvkDUT,
 												const VkAllocationCallbacks* pAllocator) {
-	mvkDUT->destroy();
+	if (mvkDUT) { mvkDUT->destroy(); }
 }
 
 MVKFramebuffer* MVKDevice::createFramebuffer(const VkFramebufferCreateInfo* pCreateInfo,
@@ -2472,7 +2477,7 @@
 
 void MVKDevice::destroyFramebuffer(MVKFramebuffer* mvkFB,
 								   const VkAllocationCallbacks* pAllocator) {
-	mvkFB->destroy();
+	if (mvkFB) { mvkFB->destroy(); }
 }
 
 MVKRenderPass* MVKDevice::createRenderPass(const VkRenderPassCreateInfo* pCreateInfo,
@@ -2482,7 +2487,7 @@
 
 void MVKDevice::destroyRenderPass(MVKRenderPass* mvkRP,
 								  const VkAllocationCallbacks* pAllocator) {
-	mvkRP->destroy();
+	if (mvkRP) { mvkRP->destroy(); }
 }
 
 MVKCommandPool* MVKDevice::createCommandPool(const VkCommandPoolCreateInfo* pCreateInfo,
@@ -2492,7 +2497,7 @@
 
 void MVKDevice::destroyCommandPool(MVKCommandPool* mvkCmdPool,
 								   const VkAllocationCallbacks* pAllocator) {
-	mvkCmdPool->destroy();
+	if (mvkCmdPool) { mvkCmdPool->destroy(); }
 }
 
 MVKDeviceMemory* MVKDevice::allocateMemory(const VkMemoryAllocateInfo* pAllocateInfo,
@@ -2502,7 +2507,7 @@
 
 void MVKDevice::freeMemory(MVKDeviceMemory* mvkDevMem,
 						   const VkAllocationCallbacks* pAllocator) {
-	mvkDevMem->destroy();
+	if (mvkDevMem) { mvkDevMem->destroy(); }
 }
 
 
@@ -2524,14 +2529,14 @@
 
 void MVKDevice::applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
 								   VkPipelineStageFlags dstStageMask,
-								   VkMemoryBarrier* pMemoryBarrier,
-                                   MVKCommandEncoder* cmdEncoder,
-                                   MVKCommandUse cmdUse) {
+								   MVKPipelineBarrier& barrier,
+								   MVKCommandEncoder* cmdEncoder,
+								   MVKCommandUse cmdUse) {
 	if (!mvkIsAnyFlagEnabled(dstStageMask, VK_PIPELINE_STAGE_HOST_BIT) ||
-		!mvkIsAnyFlagEnabled(pMemoryBarrier->dstAccessMask, VK_ACCESS_HOST_READ_BIT) ) { return; }
+		!mvkIsAnyFlagEnabled(barrier.dstAccessMask, VK_ACCESS_HOST_READ_BIT) ) { return; }
 	lock_guard<mutex> lock(_rezLock);
-    for (auto& rez : _resources) {
-		rez->applyMemoryBarrier(srcStageMask, dstStageMask, pMemoryBarrier, cmdEncoder, cmdUse);
+	for (auto& rez : _resources) {
+		rez->applyMemoryBarrier(srcStageMask, dstStageMask, barrier, cmdEncoder, cmdUse);
 	}
 }
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
index 8bbfa05..0fc5ad7 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
@@ -134,18 +134,18 @@
 	virtual VkResult bindDeviceMemory2(const VkBindImageMemoryInfo* pBindInfo);
 
 	/** Applies the specified global memory barrier. */
-    void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
-                            VkPipelineStageFlags dstStageMask,
-                            VkMemoryBarrier* pMemoryBarrier,
-                            MVKCommandEncoder* cmdEncoder,
-                            MVKCommandUse cmdUse) override;
+	void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
+							VkPipelineStageFlags dstStageMask,
+							MVKPipelineBarrier& barrier,
+							MVKCommandEncoder* cmdEncoder,
+							MVKCommandUse cmdUse) override;
 
 	/** Applies the specified image memory barrier. */
-    void applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask,
-                                 VkPipelineStageFlags dstStageMask,
-                                 VkImageMemoryBarrier* pImageMemoryBarrier,
-                                 MVKCommandEncoder* cmdEncoder,
-                                 MVKCommandUse cmdUse);
+	void applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask,
+								 VkPipelineStageFlags dstStageMask,
+								 MVKPipelineBarrier& barrier,
+								 MVKCommandEncoder* cmdEncoder,
+								 MVKCommandUse cmdUse);
 
 #pragma mark Metal
 
@@ -250,7 +250,7 @@
 	VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size);
 	bool needsHostReadSync(VkPipelineStageFlags srcStageMask,
 						   VkPipelineStageFlags dstStageMask,
-						   VkImageMemoryBarrier* pImageMemoryBarrier);
+						   MVKPipelineBarrier& barrier);
 
 	MVKVectorInline<MVKImageSubresource, 1> _subresources;
 	std::unordered_map<NSUInteger, id<MTLTexture>> _mtlTextureViews;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index a90913b..68663fa 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -83,11 +83,11 @@
 
 void MVKImage::applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
 								  VkPipelineStageFlags dstStageMask,
-								  VkMemoryBarrier* pMemoryBarrier,
-                                  MVKCommandEncoder* cmdEncoder,
-                                  MVKCommandUse cmdUse) {
+								  MVKPipelineBarrier& barrier,
+								  MVKCommandEncoder* cmdEncoder,
+								  MVKCommandUse cmdUse) {
 #if MVK_MACOS
-	if ( needsHostReadSync(srcStageMask, dstStageMask, pMemoryBarrier) ) {
+	if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) {
 		[cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLTexture()];
 	}
 #endif
@@ -95,27 +95,24 @@
 
 void MVKImage::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask,
 									   VkPipelineStageFlags dstStageMask,
-									   VkImageMemoryBarrier* pImageMemoryBarrier,
-                                       MVKCommandEncoder* cmdEncoder,
-                                       MVKCommandUse cmdUse) {
-	const VkImageSubresourceRange& srRange = pImageMemoryBarrier->subresourceRange;
+									   MVKPipelineBarrier& barrier,
+									   MVKCommandEncoder* cmdEncoder,
+									   MVKCommandUse cmdUse) {
 
 	// Extract the mipmap levels that are to be updated
-	uint32_t mipLvlStart = srRange.baseMipLevel;
-	uint32_t mipLvlCnt = srRange.levelCount;
-	uint32_t mipLvlEnd = (mipLvlCnt == VK_REMAINING_MIP_LEVELS
+	uint32_t mipLvlStart = barrier.baseMipLevel;
+	uint32_t mipLvlEnd = (barrier.levelCount == (uint8_t)VK_REMAINING_MIP_LEVELS
 						  ? getMipLevelCount()
-						  : (mipLvlStart + mipLvlCnt));
+						  : (mipLvlStart + barrier.levelCount));
 
 	// Extract the cube or array layers (slices) that are to be updated
-	uint32_t layerStart = srRange.baseArrayLayer;
-	uint32_t layerCnt = srRange.layerCount;
-	uint32_t layerEnd = (layerCnt == VK_REMAINING_ARRAY_LAYERS
+	uint32_t layerStart = barrier.baseArrayLayer;
+	uint32_t layerEnd = (barrier.layerCount == (uint16_t)VK_REMAINING_ARRAY_LAYERS
 						 ? getLayerCount()
-						 : (layerStart + layerCnt));
+						 : (layerStart + barrier.layerCount));
 
 #if MVK_MACOS
-	bool needsSync = needsHostReadSync(srcStageMask, dstStageMask, pImageMemoryBarrier);
+	bool needsSync = needsHostReadSync(srcStageMask, dstStageMask, barrier);
 	id<MTLTexture> mtlTex = needsSync ? getMTLTexture() : nil;
 	id<MTLBlitCommandEncoder> mtlBlitEncoder = needsSync ? cmdEncoder->getMTLBlitEncoder(cmdUse) : nil;
 #endif
@@ -124,7 +121,7 @@
 	for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) {
 		for (uint32_t layer = layerStart; layer < layerEnd; layer++) {
 			MVKImageSubresource* pImgRez = getSubresource(mipLvl, layer);
-			if (pImgRez) { pImgRez->layoutState = pImageMemoryBarrier->newLayout; }
+			if (pImgRez) { pImgRez->layoutState = barrier.newLayout; }
 #if MVK_MACOS
 			if (needsSync) { [mtlBlitEncoder synchronizeTexture: mtlTex slice: layer level: mipLvl]; }
 #endif
@@ -136,15 +133,15 @@
 // texture and host memory for the purpose of the host reading texture memory.
 bool MVKImage::needsHostReadSync(VkPipelineStageFlags srcStageMask,
 								 VkPipelineStageFlags dstStageMask,
-								 VkImageMemoryBarrier* pImageMemoryBarrier) {
+								 MVKPipelineBarrier& barrier) {
+#if MVK_MACOS
+	return ((barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) &&
+			mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) &&
+			isMemoryHostAccessible() && !isMemoryHostCoherent());
+#endif
 #if MVK_IOS
 	return false;
 #endif
-#if MVK_MACOS
-	return ((pImageMemoryBarrier->newLayout == VK_IMAGE_LAYOUT_GENERAL) &&
-			mvkIsAnyFlagEnabled(pImageMemoryBarrier->dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) &&
-			isMemoryHostAccessible() && !isMemoryHostCoherent());
-#endif
 }
 
 // Returns a pointer to the internal subresource for the specified MIP level layer.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
index 66d7a6b..ee566c4 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
@@ -108,7 +108,7 @@
 
 void MVKInstance::destroySurface(MVKSurface* mvkSrfc,
 								const VkAllocationCallbacks* pAllocator) {
-	mvkSrfc->destroy();
+	if (mvkSrfc) { mvkSrfc->destroy(); }
 }
 
 MVKDebugReportCallback* MVKInstance::createDebugReportCallback(const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
@@ -123,6 +123,8 @@
 
 void MVKInstance::destroyDebugReportCallback(MVKDebugReportCallback* mvkDRCB,
 								const VkAllocationCallbacks* pAllocator) {
+	if ( !mvkDRCB ) { return; }
+
 	lock_guard<mutex> lock(_dcbLock);
 
 	mvkRemoveAllOccurances(_debugReportCallbacks, mvkDRCB);
@@ -166,6 +168,8 @@
 
 void MVKInstance::destroyDebugUtilsMessenger(MVKDebugUtilsMessenger* mvkDUM,
 											 const VkAllocationCallbacks* pAllocator) {
+	if ( !mvkDUM ) { return; }
+
 	lock_guard<mutex> lock(_dcbLock);
 
 	mvkRemoveAllOccurances(_debugUtilMessengers, mvkDUM);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
index 431f02f..3221299 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
@@ -56,7 +56,7 @@
     void bindDescriptorSets(MVKCommandEncoder* cmdEncoder,
                             MVKVector<MVKDescriptorSet*>& descriptorSets,
                             uint32_t firstSet,
-                            MVKVector<uint32_t>& dynamicOffsets);
+                            MVKVector<uint32_t>* pDynamicOffsets);
 
 	/** Updates a descriptor set in a command encoder. */
 	void pushDescriptorSet(MVKCommandEncoder* cmdEncoder,
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index c67ab8c..d270906 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -40,7 +40,7 @@
 void MVKPipelineLayout::bindDescriptorSets(MVKCommandEncoder* cmdEncoder,
                                            MVKVector<MVKDescriptorSet*>& descriptorSets,
                                            uint32_t firstSet,
-                                           MVKVector<uint32_t>& dynamicOffsets) {
+                                           MVKVector<uint32_t>* pDynamicOffsets) {
 	clearConfigurationResult();
 	uint32_t pDynamicOffsetIndex = 0;
 	uint32_t dsCnt = (uint32_t)descriptorSets.size();
@@ -50,7 +50,7 @@
 		MVKDescriptorSetLayout* dsl = _descriptorSetLayouts[dslIdx];
 		dsl->bindDescriptorSet(cmdEncoder, descSet,
 							   _dslMTLResourceIndexOffsets[dslIdx],
-							   dynamicOffsets, &pDynamicOffsetIndex);
+							   pDynamicOffsets, &pDynamicOffsetIndex);
 		setConfigurationResult(dsl->getConfigurationResult());
 	}
 }
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h
index fb6f7ee..ca3ccfa 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h
@@ -20,6 +20,7 @@
 
 #include "MVKDevice.h"
 #include "MVKDeviceMemory.h"
+#include "MVKMTLResourceBindings.h"
 
 class MVKCommandEncoder;
 
@@ -67,9 +68,9 @@
 	/** Applies the specified global memory barrier. */
 	virtual void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
 									VkPipelineStageFlags dstStageMask,
-									VkMemoryBarrier* pMemoryBarrier,
-                                    MVKCommandEncoder* cmdEncoder,
-                                    MVKCommandUse cmdUse) = 0;
+									MVKPipelineBarrier& barrier,
+									MVKCommandEncoder* cmdEncoder,
+									MVKCommandUse cmdUse) = 0;
 
 	
 #pragma mark Construction
diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
index c2c17f3..d4ae601 100644
--- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h
+++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
@@ -70,7 +70,7 @@
 #pragma mark Vulkan support
 
 /** Tracks the Vulkan command currently being used. */
-typedef enum {
+typedef enum : uint8_t {
     kMVKCommandUseNone,                     /**< No use defined. */
     kMVKCommandUseQueueSubmit,              /**< vkQueueSubmit. */
     kMVKCommandUseQueuePresent,             /**< vkQueuePresentKHR. */
@@ -321,6 +321,9 @@
 			mvkVKComponentSwizzlesMatch(cm1.a, cm2.a, VK_COMPONENT_SWIZZLE_A));
 }
 
+/** Print the size of the type. */
+#define mvkPrintSizeOf(type)    printf("Size of " #type " is %lu.\n", sizeof(type))
+
 
 #pragma mark -
 #pragma mark Template functions
diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
index 51990e8..8514843 100644
--- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm
+++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
@@ -117,14 +117,45 @@
 // Create and configure a command of particular type.
 // If the command is configured correctly, add it to the buffer,
 // otherwise indicate the configuration error to the command buffer.
-#define MVKAddCmd(cmdType, vkCmdBuff, ...)  										\
-	MVKCommandBuffer* cmdBuff = MVKCommandBuffer::getMVKCommandBuffer(vkCmdBuff);	\
+#define MVKAddCmd(cmdType, vkCmdBuff, ...)  													\
+	MVKCommandBuffer* cmdBuff = MVKCommandBuffer::getMVKCommandBuffer(vkCmdBuff);				\
 	MVKCmd ##cmdType* cmd = cmdBuff->getCommandPool()->_cmd ##cmdType ##Pool.acquireObject();	\
-	VkResult cmdRslt = cmd->setContent(cmdBuff, ##__VA_ARGS__);						\
-	if(cmdRslt == VK_SUCCESS) {														\
-		cmdBuff->addCommand(cmd);													\
-	} else {																		\
-		cmdBuff->setConfigurationResult(cmdRslt);									\
+	VkResult cmdRslt = cmd->setContent(cmdBuff, ##__VA_ARGS__);									\
+	if (cmdRslt == VK_SUCCESS) {																\
+		cmdBuff->addCommand(cmd);																\
+	} else {																					\
+		cmdBuff->setConfigurationResult(cmdRslt);												\
+	}
+
+// Add one of two commands, based on comparing a command parameter against a threshold value
+#define MVKAddCmdFromThreshold(baseCmdType, value, threshold, vkCmdBuff, ...)					\
+	if (value <= threshold) {																	\
+		MVKAddCmd(baseCmdType ##threshold, vkCmdBuff, ##__VA_ARGS__);							\
+	} else {																					\
+		MVKAddCmd(baseCmdType ##Multi, vkCmdBuff, ##__VA_ARGS__);								\
+	}
+
+// Add one of three commands, based on comparing a command parameter against two threshold values
+#define MVKAddCmdFrom2Thresholds(baseCmdType, value, threshold1, threshold2, vkCmdBuff, ...)	\
+	if (value <= threshold1) {																	\
+		MVKAddCmd(baseCmdType ##threshold1, vkCmdBuff, ##__VA_ARGS__);							\
+	} else if (value <= threshold2) {															\
+		MVKAddCmd(baseCmdType ##threshold2, vkCmdBuff, ##__VA_ARGS__);							\
+	} else {																					\
+		MVKAddCmd(baseCmdType ##Multi, vkCmdBuff, ##__VA_ARGS__);								\
+	}
+
+
+// Add one of four commands, based on comparing a command parameter against two threshold values
+#define MVKAddCmdFrom3Thresholds(baseCmdType, value, threshold1, threshold2, threshold3, vkCmdBuff, ...)	\
+	if (value <= threshold1) {																				\
+		MVKAddCmd(baseCmdType ##threshold1, vkCmdBuff, ##__VA_ARGS__);										\
+	} else if (value <= threshold2) {																		\
+		MVKAddCmd(baseCmdType ##threshold2, vkCmdBuff, ##__VA_ARGS__);										\
+	} else if (value <= threshold3) {																		\
+		MVKAddCmd(baseCmdType ##threshold3, vkCmdBuff, ##__VA_ARGS__);										\
+	} else {																								\
+		MVKAddCmd(baseCmdType ##Multi, vkCmdBuff, ##__VA_ARGS__);											\
 	}
 
 
@@ -149,8 +180,7 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !instance ) { return; }
-	MVKInstance::getMVKInstance(instance)->destroy();
+	if (instance) { MVKInstance::getMVKInstance(instance)->destroy(); }
 	MVKTraceVulkanCallEnd();
 }
 
@@ -287,8 +317,7 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !device ) { return; }
-	MVKDevice::getMVKDevice(device)->destroy();
+	if (device) { MVKDevice::getMVKDevice(device)->destroy(); }
 	MVKTraceVulkanCallEnd();
 }
 
@@ -345,10 +374,8 @@
     VkQueue*                                    pQueue) {
 
 	MVKTraceVulkanCallStart();
-	if (pQueue) {
-		MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
-		*pQueue = mvkDev->getQueue(queueFamilyIndex, queueIndex)->getVkQueue();
-	}
+	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
+	*pQueue = mvkDev->getQueue(queueFamilyIndex, queueIndex)->getVkQueue();
 	MVKTraceVulkanCallEnd();
 }
 
@@ -406,7 +433,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !mem ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->freeMemory((MVKDeviceMemory*)mem, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -472,8 +498,6 @@
     VkDeviceSize*                               pCommittedMemoryInBytes) {
 
 	MVKTraceVulkanCallStart();
-    if ( !pCommittedMemoryInBytes ) { return; }
-
     MVKDeviceMemory* mvkMem = (MVKDeviceMemory*)memory;
     *pCommittedMemoryInBytes = mvkMem->getDeviceMemoryCommitment();
 	MVKTraceVulkanCallEnd();
@@ -599,7 +623,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !fence ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyFence((MVKFence*)fence, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -662,7 +685,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !semaphore ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroySemaphore((MVKSemaphore*)semaphore, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -689,7 +711,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !event ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyEvent((MVKEvent*)event, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -749,7 +770,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !queryPool ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyQueryPool((MVKQueryPool*)queryPool, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -793,7 +813,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !buffer ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyBuffer((MVKBuffer*)buffer, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -820,7 +839,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !bufferView ) { return; }
     MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
     mvkDev->destroyBufferView((MVKBufferView*)bufferView, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -847,7 +865,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !image ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyImage((MVKImage*)image, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -886,7 +903,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !imageView ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyImageView((MVKImageView*)imageView, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -913,7 +929,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !shaderModule ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyShaderModule((MVKShaderModule*)shaderModule, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -940,7 +955,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !pipelineCache ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyPipelineCache((MVKPipelineCache*)pipelineCache, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -1008,7 +1022,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !pipeline ) { return; }
     MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
     mvkDev->destroyPipeline((MVKPipeline*)pipeline, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -1035,7 +1048,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !pipelineLayout ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyPipelineLayout((MVKPipelineLayout*)pipelineLayout, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -1062,7 +1074,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !sampler ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroySampler((MVKSampler*)sampler, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -1089,7 +1100,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !descriptorSetLayout ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyDescriptorSetLayout((MVKDescriptorSetLayout*)descriptorSetLayout, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -1116,7 +1126,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !descriptorPool ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyDescriptorPool((MVKDescriptorPool*)descriptorPool, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -1194,7 +1203,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !framebuffer ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyFramebuffer((MVKFramebuffer*)framebuffer, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -1221,7 +1229,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !renderPass ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyRenderPass((MVKRenderPass*)renderPass, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -1233,8 +1240,6 @@
     VkExtent2D*                                 pGranularity) {
 
 	MVKTraceVulkanCallStart();
-    if ( !pGranularity ) { return; }
-
     MVKRenderPass* mvkRendPass = (MVKRenderPass*)renderPass;
     *pGranularity = mvkRendPass->getRenderAreaGranularity();
 	MVKTraceVulkanCallEnd();
@@ -1261,7 +1266,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !commandPool ) { return; }
 	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
 	mvkDev->destroyCommandPool((MVKCommandPool*)commandPool, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -1341,7 +1345,18 @@
     VkPipeline                                  pipeline) {
 	
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(BindPipeline, commandBuffer, pipelineBindPoint, pipeline);
+	switch (pipelineBindPoint) {
+		case VK_PIPELINE_BIND_POINT_GRAPHICS: {
+			MVKAddCmd(BindGraphicsPipeline, commandBuffer, pipeline);
+			break;
+		}
+		case VK_PIPELINE_BIND_POINT_COMPUTE: {
+			MVKAddCmd(BindComputePipeline, commandBuffer, pipeline);
+			break;
+		}
+		default:
+			break;
+	}
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1352,11 +1367,7 @@
 	const VkViewport*                           pViewports) {
 
 	MVKTraceVulkanCallStart();
-	if (viewportCount <= 1) {
-		MVKAddCmd(SetViewport1, commandBuffer, firstViewport, viewportCount, pViewports);
-	} else {
-		MVKAddCmd(SetViewportMulti, commandBuffer, firstViewport, viewportCount, pViewports);
-	}
+	MVKAddCmdFromThreshold(SetViewport, viewportCount, 1, commandBuffer, firstViewport, viewportCount, pViewports);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1367,11 +1378,7 @@
 	const VkRect2D*                             pScissors) {
 
 	MVKTraceVulkanCallStart();
-	if (scissorCount <= 1) {
-		MVKAddCmd(SetScissor1, commandBuffer, firstScissor, scissorCount, pScissors);
-	} else {
-		MVKAddCmd(SetScissorMulti, commandBuffer, firstScissor, scissorCount, pScissors);
-	}
+	MVKAddCmdFromThreshold(SetScissor, scissorCount, 1, commandBuffer, firstScissor, scissorCount, pScissors);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1455,8 +1462,13 @@
     const uint32_t*                             pDynamicOffsets) {
 	
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout,
-			  firstSet, setCount, pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
+	if (dynamicOffsetCount) {
+		MVKAddCmdFromThreshold(BindDescriptorSetsDynamic, setCount, 4, commandBuffer, pipelineBindPoint, layout,
+				  firstSet, setCount, pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
+	} else {
+		MVKAddCmdFrom2Thresholds(BindDescriptorSetsStatic, setCount, 1, 4, commandBuffer, pipelineBindPoint, layout,
+				  firstSet, setCount, pDescriptorSets);
+	}
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1479,7 +1491,7 @@
     const VkDeviceSize*                         pOffsets) {
 	
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(BindVertexBuffers, commandBuffer, startBinding, bindingCount, pBuffers, pOffsets);
+	MVKAddCmdFrom2Thresholds(BindVertexBuffers, bindingCount, 1, 2, commandBuffer, startBinding, bindingCount, pBuffers, pOffsets);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1561,7 +1573,7 @@
     const VkBufferCopy*                         pRegions) {
 	
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(CopyBuffer, commandBuffer, srcBuffer, destBuffer, regionCount, pRegions);
+	MVKAddCmdFromThreshold(CopyBuffer, regionCount, 1, commandBuffer, srcBuffer, destBuffer, regionCount, pRegions);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1575,7 +1587,8 @@
     const VkImageCopy*                          pRegions) {
 
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(CopyImage, commandBuffer, srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions);
+	MVKAddCmdFromThreshold(CopyImage, regionCount, 1, commandBuffer,
+						   srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1590,7 +1603,8 @@
     VkFilter                                    filter) {
 	
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(BlitImage, commandBuffer, srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions, filter);
+	MVKAddCmdFromThreshold(BlitImage, regionCount, 1, commandBuffer,
+						   srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions, filter);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1603,7 +1617,8 @@
     const VkBufferImageCopy*                    pRegions) {
 	
 	MVKTraceVulkanCallStart();
-    MVKAddCmd(BufferImageCopy, commandBuffer, srcBuffer, dstImage, dstImageLayout, regionCount, pRegions, true);
+    MVKAddCmdFrom3Thresholds(BufferImageCopy, regionCount, 1, 4, 8, commandBuffer,
+							 srcBuffer, dstImage, dstImageLayout, regionCount, pRegions, true);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1616,7 +1631,8 @@
     const VkBufferImageCopy*                    pRegions) {
 	
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(BufferImageCopy, commandBuffer, dstBuffer, srcImage, srcImageLayout, regionCount, pRegions, false);
+	MVKAddCmdFrom3Thresholds(BufferImageCopy, regionCount, 1, 4, 8, commandBuffer,
+							 dstBuffer, srcImage, srcImageLayout, regionCount, pRegions, false);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1655,7 +1671,8 @@
 	MVKTraceVulkanCallStart();
 	VkClearValue clrVal;
 	clrVal.color = *pColor;
-	MVKAddCmd(ClearImage, commandBuffer, image, imageLayout, clrVal, rangeCount, pRanges, false);
+	MVKAddCmdFromThreshold(ClearColorImage, rangeCount, 1, commandBuffer,
+						   image, imageLayout, clrVal, rangeCount, pRanges);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1670,7 +1687,8 @@
 	MVKTraceVulkanCallStart();
 	VkClearValue clrVal;
 	clrVal.depthStencil = *pDepthStencil;
-    MVKAddCmd(ClearImage, commandBuffer, image, imageLayout, clrVal, rangeCount, pRanges, true);
+    MVKAddCmdFromThreshold(ClearDepthStencilImage, rangeCount, 1, commandBuffer,
+						   image, imageLayout, clrVal, rangeCount, pRanges);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1682,7 +1700,13 @@
 	const VkClearRect*                          pRects) {
 
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects);
+	if (attachmentCount > 1) {
+		MVKAddCmdFromThreshold(ClearMultiAttachments, rectCount, 1, commandBuffer,
+							   attachmentCount, pAttachments, rectCount, pRects);
+	} else {
+		MVKAddCmdFromThreshold(ClearSingleAttachment, rectCount, 1, commandBuffer,
+							   attachmentCount, pAttachments, rectCount, pRects);
+	}
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1696,8 +1720,8 @@
     const VkImageResolve*                       pRegions) {
 	
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(ResolveImage, commandBuffer, srcImage, srcImageLayout,
-			  dstImage, dstImageLayout, regionCount, pRegions);
+	MVKAddCmdFromThreshold(ResolveImage, regionCount, 1, commandBuffer,
+						   srcImage, srcImageLayout, dstImage, dstImageLayout, regionCount, pRegions);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1705,9 +1729,9 @@
     VkCommandBuffer                             commandBuffer,
     VkEvent                                     event,
     VkPipelineStageFlags                        stageMask) {
-	
+
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(SetResetEvent, commandBuffer, event, stageMask, true);
+	MVKAddCmd(SetEvent, commandBuffer, event, stageMask);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1715,9 +1739,9 @@
     VkCommandBuffer                             commandBuffer,
     VkEvent                                     event,
     VkPipelineStageFlags                        stageMask) {
-	
+
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(SetResetEvent, commandBuffer, event, stageMask, false);
+	MVKAddCmd(ResetEvent, commandBuffer, event, stageMask);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1735,11 +1759,11 @@
 	const VkImageMemoryBarrier*                 pImageMemoryBarriers) {
 
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(WaitEvents, commandBuffer, eventCount, pEvents,
-			  srcStageMask, dstStageMask,
-			  memoryBarrierCount, pMemoryBarriers,
-			  bufferMemoryBarrierCount, pBufferMemoryBarriers,
-			  imageMemoryBarrierCount, pImageMemoryBarriers);
+	MVKAddCmdFromThreshold(WaitEvents, eventCount, 1, commandBuffer,
+						   eventCount, pEvents, srcStageMask, dstStageMask,
+						   memoryBarrierCount, pMemoryBarriers,
+						   bufferMemoryBarrierCount, pBufferMemoryBarriers,
+						   imageMemoryBarrierCount, pImageMemoryBarriers);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1756,10 +1780,12 @@
 	const VkImageMemoryBarrier*                 pImageMemoryBarriers) {
 
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(PipelineBarrier, commandBuffer, srcStageMask, dstStageMask, dependencyFlags,
-			  memoryBarrierCount, pMemoryBarriers,
-			  bufferMemoryBarrierCount, pBufferMemoryBarriers,
-			  imageMemoryBarrierCount, pImageMemoryBarriers);
+	uint32_t barrierCount = memoryBarrierCount + bufferMemoryBarrierCount + imageMemoryBarrierCount;
+	MVKAddCmdFrom2Thresholds(PipelineBarrier, barrierCount, 1, 4, commandBuffer,
+							   srcStageMask, dstStageMask, dependencyFlags,
+							   memoryBarrierCount, pMemoryBarriers,
+							   bufferMemoryBarrierCount, pBufferMemoryBarriers,
+							   imageMemoryBarrierCount, pImageMemoryBarriers);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1831,7 +1857,7 @@
     const void*                                 pValues) {
 	
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues);
+	MVKAddCmdFrom2Thresholds(PushConstants, size, 64, 128, commandBuffer, layout, stageFlags, offset, size, pValues);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1841,7 +1867,7 @@
     VkSubpassContents							contents) {
 	
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(BeginRenderPass, commandBuffer,pRenderPassBegin, contents);
+	MVKAddCmdFrom2Thresholds(BeginRenderPass, pRenderPassBegin->clearValueCount, 1, 2, commandBuffer,pRenderPassBegin, contents);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1868,7 +1894,7 @@
     const VkCommandBuffer*						pCommandBuffers) {
 	
 	MVKTraceVulkanCallStart();
-	MVKAddCmd(ExecuteCommands, commandBuffer, cmdBuffersCount, pCommandBuffers);
+	MVKAddCmdFromThreshold(ExecuteCommands, cmdBuffersCount, 1, commandBuffer, cmdBuffersCount, pCommandBuffers);
 	MVKTraceVulkanCallEnd();
 }
 
@@ -1934,7 +1960,6 @@
     const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-    if (!descriptorUpdateTemplate) { return; }
     MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
     mvkDev->destroyDescriptorUpdateTemplate((MVKDescriptorUpdateTemplate*)descriptorUpdateTemplate, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -2217,7 +2242,6 @@
     const VkAllocationCallbacks*             pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !swapchain ) { return; }
     MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
     mvkDev->destroySwapchain((MVKSwapchain*)swapchain, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -2326,7 +2350,6 @@
     const VkAllocationCallbacks*                 pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !surface ) { return; }
     MVKInstance* mvkInst = MVKInstance::getMVKInstance(instance);
     mvkInst->destroySurface((MVKSurface*)surface, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -2443,7 +2466,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !callback ) { return; }
 	MVKInstance* mvkInst = MVKInstance::getMVKInstance(instance);
 	mvkInst->destroyDebugReportCallback((MVKDebugReportCallback*)callback, pAllocator);
 	MVKTraceVulkanCallEnd();
@@ -2611,7 +2633,6 @@
 	const VkAllocationCallbacks*                pAllocator) {
 
 	MVKTraceVulkanCallStart();
-	if ( !messenger ) { return; }
 	MVKInstance* mvkInst = MVKInstance::getMVKInstance(instance);
 	mvkInst->destroyDebugUtilsMessenger((MVKDebugUtilsMessenger*)messenger, pAllocator);
 	MVKTraceVulkanCallEnd();