Merge pull request #1185 from billhollings/argument-buffers

WIP: Initial implementation of Metal argument buffers for Vulkan descriptor sets.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 4271434..f2b1c38 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -13,6 +13,28 @@
 
 
 
+MoltenVK 1.1.2
+--------------
+
+Released TBD
+
+- Add support for using Metal argument buffers for shader resources, by setting 
+  `MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS` environment variable (enabled by default).
+- To improve performance during descriptor set allocations, preallocate memory for descriptor sets, descriptors, 
+  and the Metal argument buffer (one `MTLBuffer` per descriptor pool) during `vkCreateDescriptorPool()`.
+- Add support for embedding inline uniform content directly in Metal argument buffers, by setting 
+  `MVK_CONFIG_EMBED_INLINE_BLOCKS_IN_METAL_ARGUMENT_BUFFER` environment variable (disabled by default).
+- Support fast math on MSL compiler via `MVKConfiguration::fastMathEnabled` configuration 
+  setting and `MVK_CONFIG_FAST_MATH_ENABLED` environment variable (both disabled by default).
+  - Add ability to automatically capture first GPU frame by setting `MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE` to `2`.
+  - Add `MVKBitArray` and remove `MVKVector`.
+- Support _GitHub Actions_ for CI builds on pull requests.
+- Remove support for _Travis-CI_.
+- `Makefile` and `fetchDependencies` support `xcpretty` (if available)
+- Update `VK_MVK_MOLTENVK_SPEC_VERSION` to `30`.
+
+
+
 MoltenVK 1.1.1
 --------------
 
@@ -21,7 +43,7 @@
 - Add support for extensions:
 	- `VK_KHR_sampler_mirror_clamp_to_edge` (iOS)
 	- `VK_KHR_timeline_semaphore`
-	- `VK_EXT_descriptor_indexing` (initial release limited to Metal Tier 1: 96/128 textures, 16 samplers)
+	- `VK_EXT_descriptor_indexing`
 	- `VK_EXT_post_depth_coverage` (macOS)
 	- `VK_EXT_private_data`
 	- `VK_EXT_subgroup_size_control`
diff --git a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
index d84c521..2cafeea 100644
--- a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
+++ b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
@@ -27,13 +27,11 @@
 		2FEA0A5224902F9F00EEF3AD /* MVKLogging.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F0429E1FB4CF82009FCCB8 /* MVKLogging.h */; };
 		2FEA0A5324902F9F00EEF3AD /* MVKQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7911C7DFB4800632CA3 /* MVKQueue.h */; };
 		2FEA0A5424902F9F00EEF3AD /* MVKFramebuffer.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7871C7DFB4800632CA3 /* MVKFramebuffer.h */; };
-		2FEA0A5524902F9F00EEF3AD /* MVKVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */; };
 		2FEA0A5624902F9F00EEF3AD /* MVKWatermarkShaderSource.h in Headers */ = {isa = PBXBuildFile; fileRef = A981494B1FB6A3F7005F00B4 /* MVKWatermarkShaderSource.h */; };
 		2FEA0A5724902F9F00EEF3AD /* MTLSamplerDescriptor+MoltenVK.h in Headers */ = {isa = PBXBuildFile; fileRef = A9E53DD32100B197002781DD /* MTLSamplerDescriptor+MoltenVK.h */; };
 		2FEA0A5824902F9F00EEF3AD /* MVKSync.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB79D1C7DFB4800632CA3 /* MVKSync.h */; };
 		2FEA0A5924902F9F00EEF3AD /* MVKDevice.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7831C7DFB4800632CA3 /* MVKDevice.h */; };
 		2FEA0A5A24902F9F00EEF3AD /* MVKSmallVector.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F3D9DB24732A4D00745190 /* MVKSmallVector.h */; };
-		2FEA0A5B24902F9F00EEF3AD /* MVKVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2521BD75570006C935 /* MVKVector.h */; };
 		2FEA0A5C24902F9F00EEF3AD /* MVKCommandPool.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB77A1C7DFB4800632CA3 /* MVKCommandPool.h */; };
 		2FEA0A5D24902F9F00EEF3AD /* MVKShaderModule.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7971C7DFB4800632CA3 /* MVKShaderModule.h */; };
 		2FEA0A5E24902F9F00EEF3AD /* MVKVulkanAPIObject.h in Headers */ = {isa = PBXBuildFile; fileRef = A99C91012295FAC500A061DA /* MVKVulkanAPIObject.h */; };
@@ -142,10 +140,6 @@
 		45557A5321C9EFF3008868BD /* MVKCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */; };
 		45557A5421C9EFF3008868BD /* MVKCodec.h in Headers */ = {isa = PBXBuildFile; fileRef = 45557A5121C9EFF3008868BD /* MVKCodec.h */; };
 		45557A5521C9EFF3008868BD /* MVKCodec.h in Headers */ = {isa = PBXBuildFile; fileRef = 45557A5121C9EFF3008868BD /* MVKCodec.h */; };
-		83A4AD2A21BD75570006C935 /* MVKVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2521BD75570006C935 /* MVKVector.h */; };
-		83A4AD2B21BD75570006C935 /* MVKVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2521BD75570006C935 /* MVKVector.h */; };
-		83A4AD2C21BD75570006C935 /* MVKVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */; };
-		83A4AD2D21BD75570006C935 /* MVKVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */; };
 		A9096E5E1F81E16300DFBEA6 /* MVKCmdDispatch.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */; };
 		A9096E5F1F81E16300DFBEA6 /* MVKCmdDispatch.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */; };
 		A909F65F213B190700FCD6BE /* MVKExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A909F65A213B190600FCD6BE /* MVKExtensions.h */; };
@@ -310,6 +304,9 @@
 		A98149641FB6A3F7005F00B4 /* MVKWatermarkTextureContent.h in Headers */ = {isa = PBXBuildFile; fileRef = A981494C1FB6A3F7005F00B4 /* MVKWatermarkTextureContent.h */; };
 		A981496B1FB6A998005F00B4 /* MVKStrings.h in Headers */ = {isa = PBXBuildFile; fileRef = A981496A1FB6A998005F00B4 /* MVKStrings.h */; };
 		A981496C1FB6A998005F00B4 /* MVKStrings.h in Headers */ = {isa = PBXBuildFile; fileRef = A981496A1FB6A998005F00B4 /* MVKStrings.h */; };
+		A98C81A1257AD5C1006A9F80 /* MVKBitArray.h in Headers */ = {isa = PBXBuildFile; fileRef = A98C819F257AD5C0006A9F80 /* MVKBitArray.h */; };
+		A98C81A2257AD5C1006A9F80 /* MVKBitArray.h in Headers */ = {isa = PBXBuildFile; fileRef = A98C819F257AD5C0006A9F80 /* MVKBitArray.h */; };
+		A98C81A3257AD5C1006A9F80 /* MVKBitArray.h in Headers */ = {isa = PBXBuildFile; fileRef = A98C819F257AD5C0006A9F80 /* MVKBitArray.h */; };
 		A99C90EE229455B300A061DA /* MVKCmdDebug.h in Headers */ = {isa = PBXBuildFile; fileRef = A99C90EC229455B200A061DA /* MVKCmdDebug.h */; };
 		A99C90EF229455B300A061DA /* MVKCmdDebug.h in Headers */ = {isa = PBXBuildFile; fileRef = A99C90EC229455B200A061DA /* MVKCmdDebug.h */; };
 		A99C90F0229455B300A061DA /* MVKCmdDebug.mm in Sources */ = {isa = PBXBuildFile; fileRef = A99C90ED229455B300A061DA /* MVKCmdDebug.mm */; };
@@ -428,8 +425,6 @@
 		45557A4D21C9EFF3008868BD /* MVKCodec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MVKCodec.cpp; sourceTree = "<group>"; };
 		45557A5121C9EFF3008868BD /* MVKCodec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCodec.h; sourceTree = "<group>"; };
 		45557A5721CD83C3008868BD /* MVKDXTnCodec.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKDXTnCodec.def; sourceTree = "<group>"; };
-		83A4AD2521BD75570006C935 /* MVKVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKVector.h; sourceTree = "<group>"; };
-		83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKVectorAllocator.h; sourceTree = "<group>"; };
 		A9096E5C1F81E16300DFBEA6 /* MVKCmdDispatch.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = MVKCmdDispatch.h; sourceTree = "<group>"; };
 		A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdDispatch.mm; sourceTree = "<group>"; };
 		A909F65A213B190600FCD6BE /* MVKExtensions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKExtensions.h; sourceTree = "<group>"; };
@@ -513,6 +508,7 @@
 		A981494B1FB6A3F7005F00B4 /* MVKWatermarkShaderSource.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKWatermarkShaderSource.h; sourceTree = "<group>"; };
 		A981494C1FB6A3F7005F00B4 /* MVKWatermarkTextureContent.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKWatermarkTextureContent.h; sourceTree = "<group>"; };
 		A981496A1FB6A998005F00B4 /* MVKStrings.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKStrings.h; sourceTree = "<group>"; };
+		A98C819F257AD5C0006A9F80 /* MVKBitArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKBitArray.h; sourceTree = "<group>"; };
 		A99C90EC229455B200A061DA /* MVKCmdDebug.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdDebug.h; sourceTree = "<group>"; };
 		A99C90ED229455B300A061DA /* MVKCmdDebug.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdDebug.mm; sourceTree = "<group>"; };
 		A99C91002295FAC500A061DA /* MVKVulkanAPIObject.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKVulkanAPIObject.mm; sourceTree = "<group>"; };
@@ -682,6 +678,7 @@
 			children = (
 				A98149421FB6A3F7005F00B4 /* MVKBaseObject.h */,
 				A98149411FB6A3F7005F00B4 /* MVKBaseObject.mm */,
+				A98C819F257AD5C0006A9F80 /* MVKBitArray.h */,
 				4553AEFA2251617100E8EBCD /* MVKBlockObserver.h */,
 				4553AEF62251617100E8EBCD /* MVKBlockObserver.m */,
 				45557A4D21C9EFF3008868BD /* MVKCodec.cpp */,
@@ -693,8 +690,6 @@
 				A98149461FB6A3F7005F00B4 /* MVKObjectPool.h */,
 				A9F3D9DB24732A4D00745190 /* MVKSmallVector.h */,
 				A9F3D9D924732A4C00745190 /* MVKSmallVectorAllocator.h */,
-				83A4AD2521BD75570006C935 /* MVKVector.h */,
-				83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */,
 				A98149491FB6A3F7005F00B4 /* MVKWatermark.h */,
 				A981494A1FB6A3F7005F00B4 /* MVKWatermark.mm */,
 				A981494B1FB6A3F7005F00B4 /* MVKWatermarkShaderSource.h */,
@@ -807,15 +802,14 @@
 				2FEA0A5024902F9F00EEF3AD /* MVKCodec.h in Headers */,
 				2FEA0A5124902F9F00EEF3AD /* MVKRenderPass.h in Headers */,
 				2FEA0A5224902F9F00EEF3AD /* MVKLogging.h in Headers */,
+				A98C81A2257AD5C1006A9F80 /* MVKBitArray.h in Headers */,
 				2FEA0A5324902F9F00EEF3AD /* MVKQueue.h in Headers */,
 				2FEA0A5424902F9F00EEF3AD /* MVKFramebuffer.h in Headers */,
-				2FEA0A5524902F9F00EEF3AD /* MVKVectorAllocator.h in Headers */,
 				2FEA0A5624902F9F00EEF3AD /* MVKWatermarkShaderSource.h in Headers */,
 				2FEA0A5724902F9F00EEF3AD /* MTLSamplerDescriptor+MoltenVK.h in Headers */,
 				2FEA0A5824902F9F00EEF3AD /* MVKSync.h in Headers */,
 				2FEA0A5924902F9F00EEF3AD /* MVKDevice.h in Headers */,
 				2FEA0A5A24902F9F00EEF3AD /* MVKSmallVector.h in Headers */,
-				2FEA0A5B24902F9F00EEF3AD /* MVKVector.h in Headers */,
 				2FEA0A5C24902F9F00EEF3AD /* MVKCommandPool.h in Headers */,
 				2FEA0A5D24902F9F00EEF3AD /* MVKShaderModule.h in Headers */,
 				2FEA0A5E24902F9F00EEF3AD /* MVKVulkanAPIObject.h in Headers */,
@@ -883,19 +877,18 @@
 				453638322508A4C7000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.h in Headers */,
 				A94FB8001C7DFB4800632CA3 /* MVKQueue.h in Headers */,
 				A94FB7EC1C7DFB4800632CA3 /* MVKFramebuffer.h in Headers */,
-				83A4AD2C21BD75570006C935 /* MVKVectorAllocator.h in Headers */,
 				A98149611FB6A3F7005F00B4 /* MVKWatermarkShaderSource.h in Headers */,
 				A9E53DE32100B197002781DD /* MTLSamplerDescriptor+MoltenVK.h in Headers */,
 				A94FB8181C7DFB4800632CA3 /* MVKSync.h in Headers */,
 				A94FB7E41C7DFB4800632CA3 /* MVKDevice.h in Headers */,
 				A9F3D9DE24732A4D00745190 /* MVKSmallVector.h in Headers */,
-				83A4AD2A21BD75570006C935 /* MVKVector.h in Headers */,
 				A94FB7D41C7DFB4800632CA3 /* MVKCommandPool.h in Headers */,
 				A94FB80C1C7DFB4800632CA3 /* MVKShaderModule.h in Headers */,
 				A99C91042295FAC600A061DA /* MVKVulkanAPIObject.h in Headers */,
 				A94FB7C01C7DFB4800632CA3 /* MVKCmdQueries.h in Headers */,
 				A94FB7CC1C7DFB4800632CA3 /* MVKCommand.h in Headers */,
 				A981494F1FB6A3F7005F00B4 /* MVKBaseObject.h in Headers */,
+				A98C81A1257AD5C1006A9F80 /* MVKBitArray.h in Headers */,
 				A9C96DD01DDC20C20053187F /* MVKMTLBufferAllocation.h in Headers */,
 				A98149571FB6A3F7005F00B4 /* MVKObjectPool.h in Headers */,
 				A94FB8141C7DFB4800632CA3 /* MVKSwapchain.h in Headers */,
@@ -957,19 +950,18 @@
 				453638342508A4C7000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.h in Headers */,
 				A94FB8011C7DFB4800632CA3 /* MVKQueue.h in Headers */,
 				A94FB7ED1C7DFB4800632CA3 /* MVKFramebuffer.h in Headers */,
-				83A4AD2D21BD75570006C935 /* MVKVectorAllocator.h in Headers */,
 				A98149621FB6A3F7005F00B4 /* MVKWatermarkShaderSource.h in Headers */,
 				A9E53DE42100B197002781DD /* MTLSamplerDescriptor+MoltenVK.h in Headers */,
 				A94FB8191C7DFB4800632CA3 /* MVKSync.h in Headers */,
 				A94FB7E51C7DFB4800632CA3 /* MVKDevice.h in Headers */,
 				A9F3D9DF24732A4D00745190 /* MVKSmallVector.h in Headers */,
-				83A4AD2B21BD75570006C935 /* MVKVector.h in Headers */,
 				A94FB7D51C7DFB4800632CA3 /* MVKCommandPool.h in Headers */,
 				A94FB80D1C7DFB4800632CA3 /* MVKShaderModule.h in Headers */,
 				A99C91052295FAC600A061DA /* MVKVulkanAPIObject.h in Headers */,
 				A94FB7C11C7DFB4800632CA3 /* MVKCmdQueries.h in Headers */,
 				A94FB7CD1C7DFB4800632CA3 /* MVKCommand.h in Headers */,
 				A98149501FB6A3F7005F00B4 /* MVKBaseObject.h in Headers */,
+				A98C81A3257AD5C1006A9F80 /* MVKBitArray.h in Headers */,
 				A9C96DD11DDC20C20053187F /* MVKMTLBufferAllocation.h in Headers */,
 				A98149581FB6A3F7005F00B4 /* MVKObjectPool.h in Headers */,
 				A94FB8151C7DFB4800632CA3 /* MVKSwapchain.h in Headers */,
diff --git a/MoltenVK/MoltenVK/API/mvk_datatypes.h b/MoltenVK/MoltenVK/API/mvk_datatypes.h
index 611e9d6..3cf55e7 100644
--- a/MoltenVK/MoltenVK/API/mvk_datatypes.h
+++ b/MoltenVK/MoltenVK/API/mvk_datatypes.h
@@ -323,7 +323,7 @@
 	kMVKShaderStageTessEval,
 	kMVKShaderStageFragment,
 	kMVKShaderStageCompute,
-	kMVKShaderStageMax
+	kMVKShaderStageCount
 } MVKShaderStage;
 
 /** Returns the Metal MTLColorWriteMask corresponding to the specified Vulkan VkColorComponentFlags. */
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index 9b667a7..56578fd 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -50,12 +50,12 @@
  */
 #define MVK_VERSION_MAJOR   1
 #define MVK_VERSION_MINOR   1
-#define MVK_VERSION_PATCH   1
+#define MVK_VERSION_PATCH   2
 
 #define MVK_MAKE_VERSION(major, minor, patch)    (((major) * 10000) + ((minor) * 100) + (patch))
 #define MVK_VERSION     MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH)
 
-#define VK_MVK_MOLTENVK_SPEC_VERSION            29
+#define VK_MVK_MOLTENVK_SPEC_VERSION            30
 #define VK_MVK_MOLTENVK_EXTENSION_NAME          "VK_MVK_moltenvk"
 
 /**
@@ -137,14 +137,26 @@
  *     Xcode user interface.
  *       0: No automatic GPU capture.
  *       1: Capture all GPU commands issued during the lifetime of the VkDevice.
- *     If MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE is also set, it is a filename where the automatic
- *     GPU capture should be saved. In this case, the Xcode scheme need not have Metal GPU capture
- *     enabled, and in fact the app need not be run under Xcode's control at all. This is useful
- *     in case the app cannot be run under Xcode's control. A path starting with '~' can be used
- *     to place it in a user's home directory, as in the shell. This feature requires Metal 3.0
- *     (macOS 10.15, iOS 13).
+ *       2: Capture all GPU commands issued during the rendering of the first frame.
  *     If none of these is set, no automatic GPU capture will occur.
  *
+ *     If MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE is set to 2, to enable capturing the first frame,
+ *     the command queue from which the frame is captured is determined by the values of
+ *     defaultGPUCaptureScopeQueueFamilyIndex and defaultGPUCaptureScopeQueueIndex from MVKConfiguration,
+ *     or the corresponding MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX and
+ *     MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX runtime environment variable or
+ *     MoltenVK compile-time build settings.
+ *
+ *     If MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE is also set, it is a filename (with a file
+ *     extension of .gputrace) where the automatic GPU capture should be saved. In this case,
+ *     the Xcode scheme need not have Metal GPU capture enabled, and in fact the app need not
+ *     be run under Xcode's control at all. This is useful in case the app cannot be run under
+ *     Xcode's control. A path starting with '~' can be used to place it in a user's home
+ *     directory, as in the shell. This feature requires Metal 3.0 (macOS 10.15, iOS 13).
+ *     In addition, for automatic file capture, the app requires at least a minimal Info.plist
+ *     file with the MetalCaptureEnabled key set to true. For command line executables (like
+ *     Vulkan CTS), the Info.plist file can be placed in the same directory as the executable.
+ *
  * 6.  The MVK_CONFIG_TEXTURE_1D_AS_2D runtime environment variable or MoltenVK compile-time build
  *     setting controls whether MoltenVK should use a Metal 2D texture with a height of 1 for a
  *     Vulkan 1D image, or use a native Metal 1D texture. Metal imposes significant restrictions
@@ -152,14 +164,14 @@
  *     Using a Metal 2D texture allows Vulkan 1D textures to support this additional functionality.
  *     This setting is enabled by default, and MoltenVK will use a Metal 2D texture for each Vulkan 1D image.
  *
- * 7.  The MVK_CONFIG_PREALLOCATE_DESCRIPTORS runtime environment variable or MoltenVK compile-time
- *     build setting controls whether MoltenVK should preallocate memory in each VkDescriptorPool
- *     according to the values of the VkDescriptorPoolSize parameters. Doing so may improve
- *     descriptor set allocation performance at a cost of preallocated application memory.
- *     If this setting is disabled, the descriptors required for a descriptor set will
- *     be dynamically allocated in application memory when the descriptor set itself is allocated.
- *     This setting is disabled by default, and MoltenVK will dynamically allocate descriptors
- *     when the containing descriptor set is allocated.
+ * 7.  The MVK_CONFIG_PREALLOCATE_DESCRIPTORS runtime environment variable or MoltenVK
+ *     compile-time build setting controls whether MoltenVK should preallocate memory during
+ *     vkCreateDescriptorPool() according to the values of the VkDescriptorPoolSize parameters.
+ *     Doing so may improve descriptor set allocation performance at a cost of preallocated
+ *     application memory. If this setting is disabled, the descriptors required for
+ *     a descriptor set will be dynamically allocated in application memory when the
+ *     descriptor set itself is allocated. This setting is enabled by default,
+ *     and MoltenVK will preallocate descriptors during vkCreateDescriptorPool().
  *
  * 8.  The MVK_CONFIG_USE_COMMAND_POOLING runtime environment variable or MoltenVK compile-time
  *     build setting controls whether MoltenVK should use pools to manage memory used when
@@ -190,6 +202,29 @@
  *     MVK_CONFIG_PERFORMANCE_LOGGING_FRAME_COUNT environment variable or MoltenVK
  *     compile-time build setting. This setting is disabled by default, and activity
  *     performance will be logged only when frame activity is logged.
+ *
+ * 11. The MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS runtime environment variable or MoltenVK
+ *     compile-time build setting controls whether MoltenVK should use Metal argument
+ *     buffers for resources defined in descriptor sets. Using Metal argument buffers
+ *     dramatically increases the number of buffers, textures and samplers that can be
+ *     bound to a pipeline shader, and in most cases improves performance.
+ *     If this setting is enabled, MoltenVK will use Metal argument buffers to bind
+ *     resources to the shaders. If this setting is disabled, MoltenVK will bind
+ *     resources to shaders discretely. This setting is enabled by default, and MoltenVK
+ *     will use Metal argument buffers.
+ *
+ * 12. The MVK_CONFIG_EMBED_INLINE_BLOCKS_IN_METAL_ARGUMENT_BUFFER runtime environment variable
+ *     or MoltenVK compile-time build setting controls whether MoltenVK should embed the contents
+ *     of inline-block descriptors directly in the Metal argument buffer, instead of writing the
+ *     contents of the descriptor in an intermediary MTLBuffer, which is then inserted into the
+ *     Metal argument buffer. Embedding inline-block descriptor content directly into the Metal
+ *     argument buffer improves efficiency and reduces resources, but currently does not cover
+ *     all types of possible inline content, and may cause errors in some cases. If this setting
+ *     is enabled, MoltenVK will embed inline-block descriptor content directly into the Metal
+ *     argument buffers. If this setting is disabled, MoltenVK will write inline-block content
+ *     to an intermediary MTLBuffer, and then insert that MTLBuffer into the Metal argument buffer.
+ *     This setting is disabled by default, and MoltenVK will use an intermediary MTLBuffer.
+ *     This setting only takes effect if the MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS setting is enabled.
  */
 typedef struct {
 
@@ -648,6 +683,7 @@
 	VkBool32 simdPermute;						/**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */
 	VkBool32 simdReduction;						/**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */
     uint32_t minSubgroupSize;			        /**< The minimum number of threads in a SIMD-group. */
+	VkBool32 argumentBuffers;					/**< If true, argument buffers are supported and will be used for descriptor sets. */
 } MVKPhysicalDeviceMetalFeatures;
 
 /** MoltenVK performance of a particular type of activity. */
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index 9e2ef99..3fe84d1 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -302,6 +302,18 @@
     /** Binds a pipeline to a bind point. */
     void bindPipeline(VkPipelineBindPoint pipelineBindPoint, MVKPipeline* pipeline);
 
+	/** Indicate to either the graphic or compute encoder that a resource is being used within an argument buffer. */
+	void useArgumentBufferResource(const MVKMTLArgumentBufferResourceUsage& resourceUsage, bool isComputeStage);
+
+	/** Bind a buffer to either the graphic or compute encoder, based on the stage. */
+	void bindBuffer(const MVKMTLBufferBinding& binding, MVKShaderStage stage);
+
+	/** Bind a texture to either the graphic or compute encoder, based on the stage. */
+	void bindTexture(const MVKMTLTextureBinding& binding, MVKShaderStage stage);
+
+	/** Bind a sampler to either the graphic or compute encoder, based on the stage. */
+	void bindSamplerState(const MVKMTLSamplerStateBinding& binding, MVKShaderStage stage);
+
 	/** Encodes an operation to signal an event to a status. */
 	void signalEvent(MVKEvent* mvkEvent, bool status);
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index 65f06a6..19585da 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -422,6 +422,38 @@
     }
 }
 
+void MVKCommandEncoder::useArgumentBufferResource(const MVKMTLArgumentBufferResourceUsage& resourceUsage, bool isComputeStage) {
+	if (isComputeStage) {
+		_computeResourcesState.useArgumentBufferResource(resourceUsage);
+	} else {
+		_graphicsResourcesState.useArgumentBufferResource(resourceUsage);
+	}
+}
+
+void MVKCommandEncoder::bindBuffer(const MVKMTLBufferBinding& binding, MVKShaderStage stage) {
+	if (stage == kMVKShaderStageCompute) {
+		_computeResourcesState.bindBuffer(binding);
+	} else {
+		_graphicsResourcesState.bindBuffer(stage, binding);
+	}
+}
+
+void MVKCommandEncoder::bindTexture(const MVKMTLTextureBinding& binding, MVKShaderStage stage) {
+	if (stage == kMVKShaderStageCompute) {
+		_computeResourcesState.bindTexture(binding);
+	} else {
+		_graphicsResourcesState.bindTexture(stage, binding);
+	}
+}
+
+void MVKCommandEncoder::bindSamplerState(const MVKMTLSamplerStateBinding& binding, MVKShaderStage stage) {
+	if (stage == kMVKShaderStageCompute) {
+		_computeResourcesState.bindSamplerState(binding);
+	} else {
+		_graphicsResourcesState.bindSamplerState(stage, binding);
+	}
+}
+
 void MVKCommandEncoder::signalEvent(MVKEvent* mvkEvent, bool status) {
 	endCurrentMetalEncoding();
 	mvkEvent->encodeSignal(_mtlCmdBuffer, status);
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
index 62d4a8a..cfdd69b 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
@@ -352,7 +352,9 @@
 
 public:
 
-    /** Constructs this instance for the specified command encoder. */
+	/** Indicates that a resource is being used within an argument buffer. */
+	void useArgumentBufferResource(const MVKMTLArgumentBufferResourceUsage& resourceUsage);
+
     MVKResourcesCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {}
 
 protected:
@@ -454,6 +456,11 @@
 		}
 	};
 
+	void resetImpl() override;
+	void markDirty() override;
+
+	MVKSmallVector<MVKMTLArgumentBufferResourceUsage, 8> _argumentBufferResourceUsage;
+	bool _areArgumentBufferResourceUsageDirty = false;
 };
 
 
@@ -521,6 +528,7 @@
     void encodeImpl(uint32_t stage) override;
     void resetImpl() override;
     void markDirty() override;
+	void encodeArgumentBufferResources();
 
     ResourceBindings<8> _shaderStageResourceBindings[4];
 };
@@ -559,6 +567,7 @@
 protected:
     void encodeImpl(uint32_t) override;
     void resetImpl() override;
+	void encodeArgumentBufferResources();
 
 	ResourceBindings<4> _resourceBindings;
 };
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index 671ed45..caa896b 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -513,6 +513,38 @@
 	}
 }
 
+#pragma mark -
+#pragma mark MVKGraphicsResourcesCommandEncoderState
+
+void MVKResourcesCommandEncoderState::useArgumentBufferResource(const MVKMTLArgumentBufferResourceUsage& resourceUsage) {
+
+	if ( !resourceUsage.mtlResource ) { return; }
+
+	MVKMTLArgumentBufferResourceUsage dru = resourceUsage;   // Copy that can be marked dirty
+	MVKCommandEncoderState::markDirty();
+	_areArgumentBufferResourceUsageDirty = true;
+	dru.isDirty = true;
+
+	for (auto iter = _argumentBufferResourceUsage.begin(), end = _argumentBufferResourceUsage.end(); iter != end; ++iter) {
+		if( iter->mtlResource == dru.mtlResource ) {
+			*iter = dru;
+			return;
+		}
+	}
+	_argumentBufferResourceUsage.push_back(dru);
+}
+
+// Mark everything as dirty
+void MVKResourcesCommandEncoderState::markDirty() {
+	MVKCommandEncoderState::markDirty();
+	markDirty(_argumentBufferResourceUsage, _areArgumentBufferResourceUsageDirty);
+}
+
+void MVKResourcesCommandEncoderState::resetImpl() {
+	_argumentBufferResourceUsage.clear();
+	_areArgumentBufferResourceUsageDirty = false;
+}
+
 
 #pragma mark -
 #pragma mark MVKGraphicsResourcesCommandEncoderState
@@ -636,7 +668,7 @@
 
 // Mark everything as dirty
 void MVKGraphicsResourcesCommandEncoderState::markDirty() {
-    MVKCommandEncoderState::markDirty();
+	MVKResourcesCommandEncoderState::markDirty();
     for (uint32_t i = kMVKShaderStageVertex; i <= kMVKShaderStageFragment; i++) {
         MVKResourcesCommandEncoderState::markDirty(_shaderStageResourceBindings[i].bufferBindings, _shaderStageResourceBindings[i].areBufferBindingsDirty);
         MVKResourcesCommandEncoderState::markDirty(_shaderStageResourceBindings[i].textureBindings, _shaderStageResourceBindings[i].areTextureBindingsDirty);
@@ -646,7 +678,9 @@
 
 void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
 
-    MVKGraphicsPipeline* pipeline = (MVKGraphicsPipeline*)_cmdEncoder->_graphicsPipelineState.getPipeline();
+	encodeArgumentBufferResources();
+
+	MVKGraphicsPipeline* pipeline = (MVKGraphicsPipeline*)_cmdEncoder->_graphicsPipelineState.getPipeline();
     bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || _cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle;
     bool forTessellation = pipeline->isTessellationPipeline();
 
@@ -809,7 +843,24 @@
     }
 }
 
+void MVKGraphicsResourcesCommandEncoderState::encodeArgumentBufferResources() {
+
+	encodeBinding<MVKMTLArgumentBufferResourceUsage>(_argumentBufferResourceUsage,
+													 _areArgumentBufferResourceUsageDirty,
+													 [](MVKCommandEncoder* cmdEncoder, MVKMTLArgumentBufferResourceUsage& abru)->void {
+														 if (abru.mtlStages) {
+															 auto* mtlEnc = cmdEncoder->_mtlRenderEncoder;
+															 if ([mtlEnc respondsToSelector: @selector(useResource:usage:stages:)]) {
+																 [mtlEnc useResource: abru.mtlResource usage: abru.mtlUsage stages: abru.mtlStages];
+															 } else {
+																 [mtlEnc useResource: abru.mtlResource usage: abru.mtlUsage];
+															 }
+														 }
+													 });
+}
+
 void MVKGraphicsResourcesCommandEncoderState::resetImpl() {
+	MVKResourcesCommandEncoderState::resetImpl();
 	for (uint32_t i = kMVKShaderStageVertex; i <= kMVKShaderStageFragment; i++) {
 		_shaderStageResourceBindings[i].reset();
 	}
@@ -845,7 +896,7 @@
 
 // Mark everything as dirty
 void MVKComputeResourcesCommandEncoderState::markDirty() {
-    MVKCommandEncoderState::markDirty();
+	MVKResourcesCommandEncoderState::markDirty();
     MVKResourcesCommandEncoderState::markDirty(_resourceBindings.bufferBindings, _resourceBindings.areBufferBindingsDirty);
     MVKResourcesCommandEncoderState::markDirty(_resourceBindings.textureBindings, _resourceBindings.areTextureBindingsDirty);
     MVKResourcesCommandEncoderState::markDirty(_resourceBindings.samplerStateBindings, _resourceBindings.areSamplerStateBindingsDirty);
@@ -853,6 +904,8 @@
 
 void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) {
 
+	encodeArgumentBufferResources();
+
     MVKPipeline* pipeline = _cmdEncoder->_computePipelineState.getPipeline();
 	bool fullImageViewSwizzle = pipeline ? pipeline->fullImageViewSwizzle() : false;
 
@@ -908,8 +961,18 @@
 																												   atIndex: b.index];
                                              });
 }
+void MVKComputeResourcesCommandEncoderState::encodeArgumentBufferResources() {
+
+	encodeBinding<MVKMTLArgumentBufferResourceUsage>(_argumentBufferResourceUsage,
+													 _areArgumentBufferResourceUsageDirty,
+													 [](MVKCommandEncoder* cmdEncoder, MVKMTLArgumentBufferResourceUsage& abru)->void {
+														 auto* mtlEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch);
+														 [mtlEnc useResource: abru.mtlResource usage: abru.mtlUsage];
+													 });
+}
 
 void MVKComputeResourcesCommandEncoderState::resetImpl() {
+	MVKResourcesCommandEncoderState::resetImpl();
 	_resourceBindings.reset();
 }
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
index a09386b..789aa0a 100644
--- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
@@ -18,6 +18,7 @@
 
 #include "MVKMTLBufferAllocation.h"
 #include "MVKLogging.h"
+#include <algorithm>
 
 
 #pragma mark -
@@ -80,6 +81,9 @@
 const MVKMTLBufferAllocation* MVKMTLBufferAllocator::acquireMTLBufferRegion(NSUInteger length) {
 	MVKAssert(length <= _maxAllocationLength, "This MVKMTLBufferAllocator has been configured to dispense MVKMTLBufferRegions no larger than %lu bytes.", (unsigned long)_maxAllocationLength);
 
+	// Can't allocate a segment smaller than the minimum MTLBuffer alignment.
+	length = std::max<NSUInteger>(length, _device->_pMetalFeatures->mtlBufferAlignment);
+
     // Convert max length to the next power-of-two exponent to use as a lookup
     NSUInteger p2Exp = mvkPowerOfTwoExponent(length);
 	MVKMTLBufferAllocationPool* pRP = _regionPools[p2Exp];
@@ -87,7 +91,7 @@
 }
 
 MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe) : MVKBaseDeviceObject(device) {
-    _maxAllocationLength = maxRegionLength;
+	_maxAllocationLength = std::max<NSUInteger>(maxRegionLength, _device->_pMetalFeatures->mtlBufferAlignment);
 	_makeThreadSafe = makeThreadSafe;
 
     // Convert max length to the next power-of-two exponent
diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h
index a530288..bf96e5e 100644
--- a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h
@@ -53,6 +53,14 @@
     bool isInline = false;
 } MVKMTLBufferBinding;
 
+/** Describes a MTLResource binding used in a Metal argument buffer. */
+typedef struct {
+	id<MTLResource> mtlResource = nil;
+	MTLResourceUsage mtlUsage = 0;
+	MTLRenderStages mtlStages = 0;
+	bool isDirty = true;
+} MVKMTLArgumentBufferResourceUsage;
+
 /** Describes a MTLBuffer resource binding as used for an index buffer. */
 typedef struct {
     union { id<MTLBuffer> mtlBuffer = nil; id<MTLBuffer> mtlResource; }; // aliases
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
index 1339972..0065408 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "MVKImage.h"
+#include "MVKMTLBufferAllocation.h"
 #include "MVKSmallVector.h"
 
 class MVKDescriptorSet;
@@ -30,12 +31,14 @@
 
 /** Indicates the Metal resource indexes used by a single shader stage in a descriptor. */
 typedef struct MVKShaderStageResourceBinding {
+	uint16_t resourceIndex = 0;
 	uint16_t bufferIndex = 0;
 	uint16_t textureIndex = 0;
 	uint16_t samplerIndex = 0;
 
 	MVKShaderStageResourceBinding operator+ (const MVKShaderStageResourceBinding& rhs);
 	MVKShaderStageResourceBinding& operator+= (const MVKShaderStageResourceBinding& rhs);
+	void addArgumentBuffer(const MVKShaderStageResourceBinding& rhs);
 
 } MVKShaderStageResourceBinding;
 
@@ -44,7 +47,7 @@
 
 /** Indicates the Metal resource indexes used by each shader stage in a descriptor. */
 typedef struct MVKShaderResourceBinding {
-	MVKShaderStageResourceBinding stages[kMVKShaderStageMax];
+	MVKShaderStageResourceBinding stages[kMVKShaderStageCount];
 
 	uint16_t getMaxBufferIndex();
 	uint16_t getMaxTextureIndex();
@@ -52,9 +55,25 @@
 
 	MVKShaderResourceBinding operator+ (const MVKShaderResourceBinding& rhs);
 	MVKShaderResourceBinding& operator+= (const MVKShaderResourceBinding& rhs);
+	void addArgumentBuffer(const MVKShaderResourceBinding& rhs);
 
 } MVKShaderResourceBinding;
 
+/**
+ * If the shader stage binding has a binding defined for the specified stage, populates
+ * the context at the descriptor set binding from the shader stage resource binding.
+ */
+void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
+									   MVKShaderStageResourceBinding& ssRB,
+									   spv::ExecutionModel stage,
+									   uint32_t descriptorSetIndex,
+									   uint32_t bindingIndex,
+									   uint32_t count,
+									   MVKSampler* immutableSampler);
+
+/** Returns a bitwise OR of Metal render stages from the array of shader stages, which must have a length of kMVKShaderStageCount. */
+MTLRenderStages mvkMTLRenderStagesFromMVKShaderStages(bool stageEnabled[]);
+
 
 #pragma mark -
 #pragma mark MVKDescriptorSetLayoutBinding
@@ -83,7 +102,7 @@
 	 * count provided to that descriptor set is returned. Otherwise returns the value
 	 * defined in VkDescriptorSetLayoutBinding::descriptorCount.
 	 */
-	uint32_t getDescriptorCount(MVKDescriptorSet* descSet);
+	uint32_t getDescriptorCount(MVKDescriptorSet* descSet = nullptr);
 
 	/** Returns the descriptor type of this layout. */
 	inline VkDescriptorType getDescriptorType() { return _info.descriptorType; }
@@ -108,33 +127,68 @@
               const void* pData,
               MVKShaderResourceBinding& dslMTLRezIdxOffsets);
 
-	/** Populates the specified shader converter context, at the specified descriptor set binding. */
-	void populateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
-                                        MVKShaderResourceBinding& dslMTLRezIdxOffsets,
-                                        uint32_t dslIndex);
+	/** Returns whether this binding layout is using an argument buffer. */
+	bool isUsingMetalArgumentBuffer() const;
+
+	/** Writes the buffer content to the Metal argument buffer. */
+	void writeToMetalArgumentBuffer(id<MTLBuffer> mtlBuffer,
+									NSUInteger offset,
+									uint32_t elementIndex);
+
+	/** Writes the texture content to the Metal argument buffer. */
+	void writeToMetalArgumentBuffer(id<MTLTexture> mtlTexture,
+									uint32_t planeCount,
+									uint32_t planeIndex,
+									uint32_t elementIndex);
+
+	/** Writes the sampler content to the Metal argument buffer. */
+	void writeToMetalArgumentBuffer(id<MTLSamplerState> mtlSamplerState,
+									uint32_t elementIndex);
+
+	/** Writes the inline data content to the Metal argument buffer. */
+	void writeToMetalArgumentBuffer(uint8_t* pSrcData,
+									NSUInteger dstOffset,
+									NSUInteger dataLen);
 
 	MVKDescriptorSetLayoutBinding(MVKDevice* device,
 								  MVKDescriptorSetLayout* layout,
 								  const VkDescriptorSetLayoutBinding* pBinding,
-								  VkDescriptorBindingFlagsEXT bindingFlags);
+								  VkDescriptorBindingFlagsEXT bindingFlags,
+								  uint32_t descriptorIndex);
 
 	MVKDescriptorSetLayoutBinding(const MVKDescriptorSetLayoutBinding& binding);
 
 	~MVKDescriptorSetLayoutBinding() override;
 
 protected:
-    friend class MVKInlineUniformBlockDescriptor;
+    friend class MVKDescriptorSetLayout;
+	friend class MVKInlineUniformBlockDescriptor;
+
+	inline uint32_t getDescriptorIndex(uint32_t elementIndex = 0) { return _descriptorIndex + elementIndex; }
+	inline uint32_t getMTLArgumentBufferIndex(uint32_t mvkShaderStage, uint32_t elementIndex = 0) {
+		return _argumentBufferIndex[mvkShaderStage] + elementIndex;
+	}
 	void initMetalResourceIndexOffsets(MVKShaderStageResourceBinding* pBindingIndexes,
 									   MVKShaderStageResourceBinding* pDescSetCounts,
 									   const VkDescriptorSetLayoutBinding* pBinding);
+	void addMTLArgumentDescriptors(uint32_t stage, NSMutableArray<MTLArgumentDescriptor*>* args, uint32_t& argIdx);
+	void addMTLArgumentDescriptor(NSMutableArray<MTLArgumentDescriptor*>* args,
+								  MTLDataType dataType,
+								  MTLArgumentAccess access,
+								  uint32_t& argIdx);
 	bool validate(MVKSampler* mvkSampler);
+	void populateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
+										MVKShaderResourceBinding& dslMTLRezIdxOffsets,
+										uint32_t dslIndex);
 
 	MVKDescriptorSetLayout* _layout;
 	VkDescriptorSetLayoutBinding _info;
 	VkDescriptorBindingFlagsEXT _flags;
 	MVKSmallVector<MVKSampler*> _immutableSamplers;
+	uint32_t _descriptorIndex;
 	MVKShaderResourceBinding _mtlResourceIndexOffsets;
-	bool _applyToStage[kMVKShaderStageMax];
+	uint32_t _argumentBufferIndex[kMVKShaderStageCount];
+	bool _applyToStage[kMVKShaderStageCount];
 };
 
 
@@ -153,6 +207,7 @@
 
 	/** Encodes this descriptor (based on its layout binding index) on the the command encoder. */
 	virtual void bind(MVKCommandEncoder* cmdEncoder,
+					  MVKDescriptorSetLayoutBinding* mvkDSLBind,
 					  uint32_t descriptorIndex,
 					  bool stages[],
 					  MVKShaderResourceBinding& mtlIndexes,
@@ -161,11 +216,13 @@
 
 	/**
 	 * Updates the internal binding from the specified content. The format of the content depends
-	 * on the descriptor type, and is extracted from pData at the location given by index * stride.
+	 * on the descriptor type, and is extracted from pData at the location given by srcIndex * stride.
 	 * MVKInlineUniformBlockDescriptor uses the index as byte offset to write to.
 	 */
-	virtual void write(MVKDescriptorSet* mvkDescSet,
-					   uint32_t index,
+	virtual void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+					   MVKDescriptorSet* descSet,
+					   uint32_t srcIndex,
+					   uint32_t dstIndex,
 					   size_t stride,
 					   const void* pData) = 0;
 
@@ -180,8 +237,7 @@
 	 * at which to start writing.
 	 * MVKInlineUniformBlockDescriptor uses the index as byte offset to read from.
 	 */
-	virtual void read(MVKDescriptorSet* mvkDescSet,
-					  uint32_t index,
+	virtual void read(uint32_t index,
 					  VkDescriptorImageInfo* pImageInfo,
 					  VkDescriptorBufferInfo* pBufferInfo,
 					  VkBufferView* pTexelBufferView,
@@ -195,9 +251,13 @@
 
 	~MVKDescriptor() { reset(); }
 
+protected:
+	MTLResourceUsage getMTLResourceUsage();
+
 };
 
 
+
 #pragma mark -
 #pragma mark MVKBufferDescriptor
 
@@ -206,19 +266,21 @@
 
 public:
 	void bind(MVKCommandEncoder* cmdEncoder,
+			  MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex) override;
 
-	void write(MVKDescriptorSet* mvkDescSet,
+	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+			   MVKDescriptorSet* descSet,
 			   uint32_t srcIndex,
+			   uint32_t dstIndex,
 			   size_t stride,
 			   const void* pData) override;
 
-	void read(MVKDescriptorSet* mvkDescSet,
-			  uint32_t dstIndex,
+	void read(uint32_t dstIndex,
 			  VkDescriptorImageInfo* pImageInfo,
 			  VkDescriptorBufferInfo* pBufferInfo,
 			  VkBufferView* pTexelBufferView,
@@ -281,33 +343,42 @@
 	VkDescriptorType getDescriptorType() override { return VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT; }
 
 	void bind(MVKCommandEncoder* cmdEncoder,
+			  MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex) override;
 
-	void write(MVKDescriptorSet* mvkDescSet,
+	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+			   MVKDescriptorSet* descSet,
 			   uint32_t dstOffset, // For inline buffers we are using this parameter as dst offset not as src descIdx
+			   uint32_t dstIndex,
 			   size_t stride,
 			   const void* pData) override;
 
-	void read(MVKDescriptorSet* mvkDescSet,
-			  uint32_t srcOffset, // For inline buffers we are using this parameter as src offset not as dst descIdx
+	void read(uint32_t srcOffset, // For inline buffers we are using this parameter as src offset not as dst descIdx
 			  VkDescriptorImageInfo* pImageInfo,
 			  VkDescriptorBufferInfo* pBufferInfo,
 			  VkBufferView* pTexelBufferView,
 			  VkWriteDescriptorSetInlineUniformBlockEXT* inlineUniformBlock) override;
-    
-    void setLayout(MVKDescriptorSetLayoutBinding* dslBinding, uint32_t index) override;
 
 	void reset() override;
 
+	/**
+	 * Returns whether inline blocks should be embedded directly into a Metal argument buffer, instead of
+	 * being held in an intermediaary MTLBuffer, with that MTLBuffer inserted into the Metal argument buffer.
+	 */
+	static bool shouldEmbedInlineBlocksInMetalAgumentBuffer();
+
 	~MVKInlineUniformBlockDescriptor() { reset(); }
 
 protected:
-	uint8_t* _buffer = nullptr;
-    uint32_t _length;
+	uint8_t* getData();
+
+	void* _buffer = nullptr;
+    uint32_t _length = 0;
+	bool _isUsingIntermediaryMTLBuffer = false;
 };
 
 
@@ -319,19 +390,21 @@
 
 public:
 	void bind(MVKCommandEncoder* cmdEncoder,
+			  MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex) override;
 
-	void write(MVKDescriptorSet* mvkDescSet,
+	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+			   MVKDescriptorSet* descSet,
 			   uint32_t srcIndex,
+			   uint32_t dstIndex,
 			   size_t stride,
 			   const void* pData) override;
 
-	void read(MVKDescriptorSet* mvkDescSet,
-			  uint32_t dstIndex,
+	void read(uint32_t dstIndex,
 			  VkDescriptorImageInfo* pImageInfo,
 			  VkDescriptorBufferInfo* pBufferInfo,
 			  VkBufferView* pTexelBufferView,
@@ -388,19 +461,21 @@
 
 protected:
 	void bind(MVKCommandEncoder* cmdEncoder,
+			  MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex);
 
-	void write(MVKDescriptorSet* mvkDescSet,
+	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+			   MVKDescriptorSet* descSet,
 			   uint32_t srcIndex,
+			   uint32_t dstIndex,
 			   size_t stride,
 			   const void* pData);
 
-	void read(MVKDescriptorSet* mvkDescSet,
-			  uint32_t dstIndex,
+	void read(uint32_t dstIndex,
 			  VkDescriptorImageInfo* pImageInfo,
 			  VkDescriptorBufferInfo* pBufferInfo,
 			  VkBufferView* pTexelBufferView,
@@ -408,6 +483,12 @@
 
 	void setLayout(MVKDescriptorSetLayoutBinding* dslBinding, uint32_t index);
 
+	/**
+	 * Offset to the first sampler index in the argument buffer. Defaults to zero for simple sampler
+	 * descriptors, but combined image/sampler descriptor will override to index samplers after textures.
+	 */
+	virtual uint32_t getSamplerArgBufferIndexOffset(MVKDescriptorSetLayoutBinding* dslBinding) { return 0; }
+
 	void reset();
 
 	~MVKSamplerDescriptorMixin() { reset(); }
@@ -427,19 +508,21 @@
 	VkDescriptorType getDescriptorType() override { return VK_DESCRIPTOR_TYPE_SAMPLER; }
 
 	void bind(MVKCommandEncoder* cmdEncoder,
+			  MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex) override;
 
-	void write(MVKDescriptorSet* mvkDescSet,
+	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+			   MVKDescriptorSet* descSet,
 			   uint32_t srcIndex,
+			   uint32_t dstIndex,
 			   size_t stride,
 			   const void* pData) override;
 
-	void read(MVKDescriptorSet* mvkDescSet,
-			  uint32_t dstIndex,
+	void read(uint32_t dstIndex,
 			  VkDescriptorImageInfo* pImageInfo,
 			  VkDescriptorBufferInfo* pBufferInfo,
 			  VkBufferView* pTexelBufferView,
@@ -464,19 +547,21 @@
 	VkDescriptorType getDescriptorType() override { return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; }
 
 	void bind(MVKCommandEncoder* cmdEncoder,
+			  MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex) override;
 
-	void write(MVKDescriptorSet* mvkDescSet,
+	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+			   MVKDescriptorSet* descSet,
 			   uint32_t srcIndex,
+			   uint32_t dstIndex,
 			   size_t stride,
 			   const void* pData) override;
 
-	void read(MVKDescriptorSet* mvkDescSet,
-			  uint32_t dstIndex,
+	void read(uint32_t dstIndex,
 			  VkDescriptorImageInfo* pImageInfo,
 			  VkDescriptorBufferInfo* pBufferInfo,
 			  VkBufferView* pTexelBufferView,
@@ -484,6 +569,8 @@
 
 	void setLayout(MVKDescriptorSetLayoutBinding* dslBinding, uint32_t index) override;
 
+	uint32_t getSamplerArgBufferIndexOffset(MVKDescriptorSetLayoutBinding* dslBinding) override;
+
 	void reset() override;
 
 	~MVKCombinedImageSamplerDescriptor() { reset(); }
@@ -499,19 +586,21 @@
 
 public:
 	void bind(MVKCommandEncoder* cmdEncoder,
+			  MVKDescriptorSetLayoutBinding* mvkDSLBind,
 			  uint32_t descriptorIndex,
 			  bool stages[],
 			  MVKShaderResourceBinding& mtlIndexes,
 			  MVKArrayRef<uint32_t> dynamicOffsets,
 			  uint32_t& dynamicOffsetIndex) override;
 
-	void write(MVKDescriptorSet* mvkDescSet,
+	void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+			   MVKDescriptorSet* descSet,
 			   uint32_t srcIndex,
+			   uint32_t dstIndex,
 			   size_t stride,
 			   const void* pData) override;
 
-	void read(MVKDescriptorSet* mvkDescSet,
-			  uint32_t dstIndex,
+	void read(uint32_t dstIndex,
 			  VkDescriptorImageInfo* pImageInfo,
 			  VkDescriptorBufferInfo* pBufferInfo,
 			  VkBufferView* pTexelBufferView,
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
index 189be4d..88ae6c5 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
@@ -18,6 +18,7 @@
 
 #include "MVKDescriptor.h"
 #include "MVKDescriptorSet.h"
+#include "MVKCommandBuffer.h"
 #include "MVKBuffer.h"
 
 
@@ -25,6 +26,7 @@
 
 MVKShaderStageResourceBinding MVKShaderStageResourceBinding::operator+ (const MVKShaderStageResourceBinding& rhs) {
 	MVKShaderStageResourceBinding rslt;
+	rslt.resourceIndex = this->resourceIndex + rhs.resourceIndex;
 	rslt.bufferIndex = this->bufferIndex + rhs.bufferIndex;
 	rslt.textureIndex = this->textureIndex + rhs.textureIndex;
 	rslt.samplerIndex = this->samplerIndex + rhs.samplerIndex;
@@ -32,12 +34,19 @@
 }
 
 MVKShaderStageResourceBinding& MVKShaderStageResourceBinding::operator+= (const MVKShaderStageResourceBinding& rhs) {
+	this->resourceIndex += rhs.resourceIndex;
 	this->bufferIndex += rhs.bufferIndex;
 	this->textureIndex += rhs.textureIndex;
 	this->samplerIndex += rhs.samplerIndex;
 	return *this;
 }
 
+void MVKShaderStageResourceBinding::addArgumentBuffer(const MVKShaderStageResourceBinding& rhs) {
+	bool isUsed = rhs.resourceIndex > 0;
+	this->bufferIndex += isUsed;
+	this->resourceIndex += isUsed;
+}
+
 
 #pragma mark MVKShaderResourceBinding
 
@@ -55,19 +64,71 @@
 
 MVKShaderResourceBinding MVKShaderResourceBinding::operator+ (const MVKShaderResourceBinding& rhs) {
 	MVKShaderResourceBinding rslt;
-	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
 		rslt.stages[i] = this->stages[i] + rhs.stages[i];
 	}
 	return rslt;
 }
 
 MVKShaderResourceBinding& MVKShaderResourceBinding::operator+= (const MVKShaderResourceBinding& rhs) {
-	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
 		this->stages[i] += rhs.stages[i];
 	}
 	return *this;
 }
 
+void MVKShaderResourceBinding::addArgumentBuffer(const MVKShaderResourceBinding& rhs) {
+	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
+		this->stages[i].addArgumentBuffer(rhs.stages[i]);
+	}
+}
+
+void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
+									   MVKShaderStageResourceBinding& ssRB,
+									   spv::ExecutionModel stage,
+									   uint32_t descriptorSetIndex,
+									   uint32_t bindingIndex,
+									   uint32_t count,
+									   MVKSampler* immutableSampler) {
+	mvk::MSLResourceBinding rb;
+
+	auto& rbb = rb.resourceBinding;
+	rbb.stage = stage;
+	rbb.desc_set = descriptorSetIndex;
+	rbb.binding = bindingIndex;
+	rbb.count = count;
+	rbb.msl_buffer = ssRB.bufferIndex;
+	rbb.msl_texture = ssRB.textureIndex;
+	rbb.msl_sampler = ssRB.samplerIndex;
+
+	if (immutableSampler) { immutableSampler->getConstexprSampler(rb); }
+
+	context.resourceBindings.push_back(rb);
+}
+
+MTLRenderStages mvkMTLRenderStagesFromMVKShaderStages(bool stageEnabled[]) {
+	MTLRenderStages mtlStages = 0;
+	for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+		if (stageEnabled[stage]) {
+			switch (stage) {
+				case kMVKShaderStageVertex:
+				case kMVKShaderStageTessCtl:
+				case kMVKShaderStageTessEval:
+					mtlStages |= MTLRenderStageVertex;
+					break;
+
+				case kMVKShaderStageFragment:
+					mtlStages |= MTLRenderStageFragment;
+					break;
+
+				default:
+					break;
+			}
+		}
+	}
+	return mtlStages;
+}
+
 
 #pragma mark -
 #pragma mark MVKDescriptorSetLayoutBinding
@@ -80,7 +141,7 @@
 		return 1;
 	}
 
-	if (descSet && hasVariableDescriptorCount()) {
+	if (descSet && mvkIsAnyFlagEnabled(_flags, VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
 		return descSet->_variableDescriptorCount;
 	}
 
@@ -106,7 +167,7 @@
     for (uint32_t descIdx = 0; descIdx < descCnt; descIdx++) {
 		MVKDescriptor* mvkDesc = descSet->getDescriptor(getBinding(), descIdx);
 		if (mvkDesc->getDescriptorType() == descType) {
-			mvkDesc->bind(cmdEncoder, descIdx, _applyToStage, mtlIdxs, dynamicOffsets, dynamicOffsetIndex);
+			mvkDesc->bind(cmdEncoder, this, descIdx, _applyToStage, mtlIdxs, dynamicOffsets, dynamicOffsetIndex);
 		}
     }
 }
@@ -166,14 +227,10 @@
                 else
                     bb.size = (uint32_t)bufferInfo.range;
 
-                for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+                for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
                     if (_applyToStage[i]) {
                         bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx;
-                        if (i == kMVKShaderStageCompute) {
-							if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
-                        } else {
-							if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
-                        }
+						if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
                     }
                 }
                 break;
@@ -184,14 +241,10 @@
                 bb.mtlBytes = inlineUniformBlock.pData;
                 bb.size = inlineUniformBlock.dataSize;
                 bb.isInline = true;
-                for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+                for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
                     if (_applyToStage[i]) {
                         bb.index = mtlIdxs.stages[i].bufferIndex;
-                        if (i == kMVKShaderStageCompute) {
-                            if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
-                        } else {
-                            if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
-                        }
+						if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
                     }
                 }
                 break;
@@ -213,21 +266,13 @@
                         bb.offset = mtlTex.bufferOffset;
                         bb.size = (uint32_t)(mtlTex.height * mtlTex.bufferBytesPerRow);
                     }
-                    for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+                    for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
                         if (_applyToStage[i]) {
                             tb.index = mtlIdxs.stages[i].textureIndex + rezIdx + planeIndex;
-                            if (i == kMVKShaderStageCompute) {
-                                if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindTexture(tb); }
-                            } else {
-                                if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindTexture(MVKShaderStage(i), tb); }
-                            }
+							if (cmdEncoder) { cmdEncoder->bindTexture(tb, MVKShaderStage(i)); }
                             if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
                                 bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx;
-                                if (i == kMVKShaderStageCompute) {
-                                    if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
-                                } else {
-                                    if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
-                                }
+								if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
                             }
                         }
                     }
@@ -246,21 +291,13 @@
                     bb.offset = mtlTex.bufferOffset;
                     bb.size = (uint32_t)(mtlTex.height * mtlTex.bufferBytesPerRow);
                 }
-                for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+                for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
                     if (_applyToStage[i]) {
                         tb.index = mtlIdxs.stages[i].textureIndex + rezIdx;
-                        if (i == kMVKShaderStageCompute) {
-							if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindTexture(tb); }
-                        } else {
-							if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindTexture(MVKShaderStage(i), tb); }
-                        }
+						if (cmdEncoder) { cmdEncoder->bindTexture(tb, MVKShaderStage(i)); }
                         if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
                             bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx;
-                            if (i == kMVKShaderStageCompute) {
-                                if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
-                            } else {
-                                if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
-                            }
+							if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
                         }
                     }
                 }
@@ -276,14 +313,10 @@
                     sampler = _immutableSamplers[rezIdx];
 				}
                 sb.mtlSamplerState = sampler->getMTLSamplerState();
-                for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+                for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
                     if (_applyToStage[i]) {
                         sb.index = mtlIdxs.stages[i].samplerIndex + rezIdx;
-                        if (i == kMVKShaderStageCompute) {
-							if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindSamplerState(sb); }
-                        } else {
-							if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindSamplerState(MVKShaderStage(i), sb); }
-                        }
+						if (cmdEncoder) { cmdEncoder->bindSamplerState(sb, MVKShaderStage(i)); }
                     }
                 }
                 break;
@@ -304,17 +337,14 @@
                         sampler = _immutableSamplers[rezIdx];
                     }
                     sb.mtlSamplerState = sampler->getMTLSamplerState();
-                    for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+                    for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
                         if (_applyToStage[i]) {
                             tb.index = mtlIdxs.stages[i].textureIndex + rezIdx + planeIndex;
                             sb.index = mtlIdxs.stages[i].samplerIndex + rezIdx;
-                            if (i == kMVKShaderStageCompute) {
-                                if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindTexture(tb); }
-                                if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindSamplerState(sb); }
-                            } else {
-                                if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindTexture(MVKShaderStage(i), tb); }
-                                if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindSamplerState(MVKShaderStage(i), sb); }
-                            }
+							if (cmdEncoder) {
+								cmdEncoder->bindTexture(tb, MVKShaderStage(i));
+								cmdEncoder->bindSamplerState(sb, MVKShaderStage(i));
+							}
                         }
                     }
                 }
@@ -335,6 +365,146 @@
     }
 }
 
+bool MVKDescriptorSetLayoutBinding::isUsingMetalArgumentBuffer() const  { return _layout->isUsingMetalArgumentBuffer(); };
+
+// Adds MTLArgumentDescriptors to the array, and updates resource indexes consumed.
+void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(uint32_t stage,
+															  NSMutableArray<MTLArgumentDescriptor*>* args,
+															  uint32_t& argIdx) {
+	if ( !_applyToStage[stage]) { return; }
+
+	_argumentBufferIndex[stage] = argIdx;
+
+	switch (getDescriptorType()) {
+
+		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+			addMTLArgumentDescriptor(args, MTLDataTypePointer, MTLArgumentAccessReadOnly, argIdx);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+			addMTLArgumentDescriptor(args, MTLDataTypePointer, MTLArgumentAccessReadWrite, argIdx);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+			if (MVKInlineUniformBlockDescriptor::shouldEmbedInlineBlocksInMetalAgumentBuffer()) {
+				addMTLArgumentDescriptor(args, MTLDataTypeUChar, MTLArgumentAccessReadOnly, argIdx);
+			} else {
+				addMTLArgumentDescriptor(args, MTLDataTypePointer, MTLArgumentAccessReadOnly, argIdx);
+			}
+			break;
+
+		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+			addMTLArgumentDescriptor(args, MTLDataTypeTexture, MTLArgumentAccessReadOnly, argIdx);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+			addMTLArgumentDescriptor(args, MTLDataTypeTexture, MTLArgumentAccessReadWrite, argIdx);
+//			addMTLArgumentDescriptor(args, MTLDataTypePointer, MTLArgumentAccessReadWrite, argIdx);		// Needed for atomic operations
+			break;
+
+		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+			addMTLArgumentDescriptor(args, MTLDataTypeTexture, MTLArgumentAccessReadOnly, argIdx);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+			addMTLArgumentDescriptor(args, MTLDataTypeTexture, MTLArgumentAccessReadWrite, argIdx);
+//			addMTLArgumentDescriptor(args, MTLDataTypePointer, MTLArgumentAccessReadWrite, argIdx);		// Needed for atomic operations
+			break;
+
+		case VK_DESCRIPTOR_TYPE_SAMPLER:
+			addMTLArgumentDescriptor(args, MTLDataTypeSampler, MTLArgumentAccessReadOnly, argIdx);
+			break;
+
+		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+			addMTLArgumentDescriptor(args, MTLDataTypeTexture, MTLArgumentAccessReadOnly, argIdx);
+			addMTLArgumentDescriptor(args, MTLDataTypeSampler, MTLArgumentAccessReadOnly, argIdx);
+			break;
+
+		default:
+			break;
+	}
+}
+
+// Adds an MTLArgumentDescriptor if the specified type to the array, and updates resource indexes consumed.
+void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptor(NSMutableArray<MTLArgumentDescriptor*>* args,
+															 MTLDataType dataType,
+															 MTLArgumentAccess access,
+															 uint32_t& argIdx) {
+
+	NSUInteger mtlArgDescAryLen = ((_info.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT &&
+									MVKInlineUniformBlockDescriptor::shouldEmbedInlineBlocksInMetalAgumentBuffer())
+								   ? _info.descriptorCount : getDescriptorCount());
+
+	auto* argDesc = [MTLArgumentDescriptor argumentDescriptor];
+	argDesc.dataType = dataType;
+	argDesc.access = access;
+	argDesc.index = argIdx;
+	argDesc.arrayLength = mtlArgDescAryLen;
+	argDesc.textureType = MTLTextureType2D;
+
+	[args addObject: argDesc];
+	argIdx += getDescriptorCount();
+}
+
+void MVKDescriptorSetLayoutBinding::writeToMetalArgumentBuffer(id<MTLBuffer> mtlBuffer,
+															   NSUInteger offset,
+															   uint32_t elementIndex) {
+	if ( !isUsingMetalArgumentBuffer() || !mtlBuffer ) { return; }
+
+	for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+		if (_applyToStage[stage]) {
+			uint32_t argIdx = getMTLArgumentBufferIndex(stage, elementIndex);
+			[_layout->_argumentEncoder[stage].mtlArgumentEncoder setBuffer: mtlBuffer offset: offset atIndex: argIdx];
+		}
+	}
+}
+
+void MVKDescriptorSetLayoutBinding::writeToMetalArgumentBuffer(id<MTLTexture> mtlTexture,
+															   uint32_t planeCount,
+															   uint32_t planeIndex,
+															   uint32_t elementIndex) {
+	if ( !isUsingMetalArgumentBuffer() || !mtlTexture ) { return; }
+
+	for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+		if (_applyToStage[stage]) {
+			uint32_t argIdx = getMTLArgumentBufferIndex(stage, elementIndex * planeCount + planeIndex);
+			[_layout->_argumentEncoder[stage].mtlArgumentEncoder setTexture: mtlTexture atIndex: argIdx];
+		}
+	}
+}
+
+void MVKDescriptorSetLayoutBinding::writeToMetalArgumentBuffer(id<MTLSamplerState> mtlSamplerState,
+															   uint32_t elementIndex) {
+	if ( !isUsingMetalArgumentBuffer() ) { return; }
+
+	// Metal requires sampler, so get default if not provided.
+	if ( !mtlSamplerState ) { mtlSamplerState = getDevice()->getDefaultMTLSamplerState(); }
+
+	for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+		if (_applyToStage[stage]) {
+			uint32_t argIdx = getMTLArgumentBufferIndex(stage, elementIndex);
+			[_layout->_argumentEncoder[stage].mtlArgumentEncoder setSamplerState: mtlSamplerState atIndex: argIdx];
+		}
+	}
+}
+
+void MVKDescriptorSetLayoutBinding::writeToMetalArgumentBuffer(uint8_t* pSrcData,
+															   NSUInteger dstOffset,
+															   NSUInteger dataLen) {
+	if ( !isUsingMetalArgumentBuffer() || !pSrcData ) { return; }
+
+	for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+		if (_applyToStage[stage]) {
+			uint32_t argIdx = getMTLArgumentBufferIndex(stage);
+			uint8_t* pDstData = (uint8_t*)[_layout->_argumentEncoder[stage].mtlArgumentEncoder constantDataAtIndex: argIdx];
+			if (pDstData) { memcpy(pDstData + dstOffset, pSrcData, dataLen); }
+		}
+	}
+}
+
 // If depth compare is required, but unavailable on the device, the sampler can only be used as an immutable sampler
 bool MVKDescriptorSetLayoutBinding::validate(MVKSampler* mvkSampler) {
 	if (mvkSampler->getRequiresConstExprSampler()) {
@@ -360,31 +530,44 @@
         spv::ExecutionModelFragment,
         spv::ExecutionModelGLCompute
     };
-    for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+    for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
         if (_applyToStage[i]) {
             mvkPopulateShaderConverterContext(context,
                                               mtlIdxs.stages[i],
                                               models[i],
                                               dslIndex,
                                               _info.binding,
-											  getDescriptorCount(nullptr),
+											  getDescriptorCount(),
 											  mvkSamp);
-        }
+
+			// If Metal argument buffers are in use, identify any inline uniform block bindings.
+			if (_info.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT &&
+				isUsingMetalArgumentBuffer() &&
+				MVKInlineUniformBlockDescriptor::shouldEmbedInlineBlocksInMetalAgumentBuffer()) {
+
+				mvk::DescriptorBinding db;
+				db.descriptorSet = dslIndex;
+				db.binding = _info.binding;
+				context.inlineUniformBlocks.push_back(db);
+			}
+		}
     }
 }
 
 MVKDescriptorSetLayoutBinding::MVKDescriptorSetLayoutBinding(MVKDevice* device,
 															 MVKDescriptorSetLayout* layout,
 															 const VkDescriptorSetLayoutBinding* pBinding,
-															 VkDescriptorBindingFlagsEXT bindingFlags) :
+															 VkDescriptorBindingFlagsEXT bindingFlags,
+															 uint32_t descriptorIndex) :
 	MVKBaseDeviceObject(device),
 	_layout(layout),
 	_info(*pBinding),
-	_flags(bindingFlags) {
+	_flags(bindingFlags),
+	_descriptorIndex(descriptorIndex) {
 
 	_info.pImmutableSamplers = nullptr;     // Remove dangling pointer
 
-	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
         // Determine if this binding is used by this shader stage
         _applyToStage[i] = mvkAreAllFlagsEnabled(pBinding->stageFlags, mvkVkShaderStageFlagBitsFromMVKShaderStage(MVKShaderStage(i)));
 	    // If this binding is used by the shader, set the Metal resource index
@@ -412,10 +595,11 @@
 	_layout(binding._layout),
 	_info(binding._info),
 	_flags(binding._flags),
+	_descriptorIndex(binding._descriptorIndex),
 	_immutableSamplers(binding._immutableSamplers),
 	_mtlResourceIndexOffsets(binding._mtlResourceIndexOffsets) {
 
-	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
         _applyToStage[i] = binding._applyToStage[i];
     }
 	for (MVKSampler* sampler : _immutableSamplers) {
@@ -434,10 +618,21 @@
 void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(MVKShaderStageResourceBinding* pBindingIndexes,
 																  MVKShaderStageResourceBinding* pDescSetCounts,
 																  const VkDescriptorSetLayoutBinding* pBinding) {
+
+	// Sets an index offset and updates both that index and the general resource index.
+	// Can be used multiply for combined multi-resource descriptor types.
+#	define setResourceIndexOffset(rezIdx) \
+	do { \
+		pBindingIndexes->rezIdx = isUsingMetalArgumentBuffer() ?  pDescSetCounts->resourceIndex : pDescSetCounts->rezIdx; \
+		pDescSetCounts->rezIdx += descCnt; \
+		pBindingIndexes->resourceIndex = pDescSetCounts->resourceIndex; \
+		pDescSetCounts->resourceIndex += descCnt; \
+	} while(false)
+
+	uint32_t descCnt = pBinding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT ? 1 : pBinding->descriptorCount;
     switch (pBinding->descriptorType) {
         case VK_DESCRIPTOR_TYPE_SAMPLER:
-            pBindingIndexes->samplerIndex = pDescSetCounts->samplerIndex;
-            pDescSetCounts->samplerIndex += pBinding->descriptorCount;
+			setResourceIndexOffset(samplerIndex);
 
 			if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfSamplers) {
 				_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of samplers.", _device->getName()));
@@ -445,10 +640,8 @@
             break;
 
         case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-            pBindingIndexes->textureIndex = pDescSetCounts->textureIndex;
-            pDescSetCounts->textureIndex += pBinding->descriptorCount;
-            pBindingIndexes->samplerIndex = pDescSetCounts->samplerIndex;
-            pDescSetCounts->samplerIndex += pBinding->descriptorCount;
+			setResourceIndexOffset(textureIndex);
+			setResourceIndexOffset(samplerIndex);
 
 			if (pBinding->descriptorCount > 1) {
 				if ( !_device->_pMetalFeatures->arrayOfTextures ) {
@@ -474,14 +667,12 @@
 
         case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
         case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-            pBindingIndexes->bufferIndex = pDescSetCounts->bufferIndex;
-            pDescSetCounts->bufferIndex += pBinding->descriptorCount;
+			setResourceIndexOffset(bufferIndex);
             // fallthrough
         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
         case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
         case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-            pBindingIndexes->textureIndex = pDescSetCounts->textureIndex;
-            pDescSetCounts->textureIndex += pBinding->descriptorCount;
+			setResourceIndexOffset(textureIndex);
 
 			if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) {
 				_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
@@ -492,13 +683,8 @@
         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
         case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-            pBindingIndexes->bufferIndex = pDescSetCounts->bufferIndex;
-            pDescSetCounts->bufferIndex += pBinding->descriptorCount;
-            break;
-
-        case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
-            pBindingIndexes->bufferIndex = pDescSetCounts->bufferIndex;
-            pDescSetCounts->bufferIndex += 1;
+		case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+			setResourceIndexOffset(bufferIndex);
             break;
 
         default:
@@ -508,10 +694,36 @@
 
 
 #pragma mark -
+#pragma mark MVKDescriptor
+
+MTLResourceUsage MVKDescriptor::getMTLResourceUsage() {
+	MTLResourceUsage mtlUsage = MTLResourceUsageRead;
+	switch (getDescriptorType()) {
+		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+			mtlUsage |= MTLResourceUsageSample;
+			break;
+
+		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+			mtlUsage |= MTLResourceUsageWrite;
+			break;
+
+		default:
+			break;
+	}
+	return mtlUsage;
+}
+
+
+#pragma mark -
 #pragma mark MVKBufferDescriptor
 
 // A null cmdEncoder can be passed to perform a validation pass
 void MVKBufferDescriptor::bind(MVKCommandEncoder* cmdEncoder,
+							   MVKDescriptorSetLayoutBinding* mvkDSLBind,
 							   uint32_t descriptorIndex,
 							   bool stages[],
 							   MVKShaderResourceBinding& mtlIndexes,
@@ -534,20 +746,33 @@
 		else
 			bb.size = (uint32_t)_buffRange;
 	}
-	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
-		if (stages[i]) {
-			bb.index = mtlIndexes.stages[i].bufferIndex + descriptorIndex;
-			if (i == kMVKShaderStageCompute) {
-				if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
-			} else {
-				if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
+
+	// Update the Metal argument buffer entry with the dynamic offset
+	if (descType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || descType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
+		mvkDSLBind->writeToMetalArgumentBuffer(bb.mtlBuffer, bb.offset, descriptorIndex);
+	}
+
+	if (mvkDSLBind->isUsingMetalArgumentBuffer()) {
+		MVKMTLArgumentBufferResourceUsage abru;
+		abru.mtlResource = bb.mtlResource;
+		abru.mtlUsage = getMTLResourceUsage();
+		abru.mtlStages = mvkMTLRenderStagesFromMVKShaderStages(stages);
+		if (cmdEncoder) { cmdEncoder->useArgumentBufferResource(abru, stages[kMVKShaderStageCompute]); }
+	} else {
+		// If not using Metal argument buffer, bind discretely
+		for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
+			if (stages[i]) {
+				bb.index = mtlIndexes.stages[i].bufferIndex + descriptorIndex;
+				if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
 			}
 		}
 	}
 }
 
-void MVKBufferDescriptor::write(MVKDescriptorSet* mvkDescSet,
+void MVKBufferDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+								MVKDescriptorSet* descSet,
 								uint32_t srcIndex,
+								uint32_t dstIndex,
 								size_t stride,
 								const void* pData) {
 	auto* oldBuff = _mvkBuffer;
@@ -559,10 +784,14 @@
 
 	if (_mvkBuffer) { _mvkBuffer->retain(); }
 	if (oldBuff) { oldBuff->release(); }
+
+	// Update the Metal argument buffer entry
+	id<MTLBuffer> mtlBuffer = _mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil;
+	NSUInteger mtlBuffOffset = _mvkBuffer ? _mvkBuffer->getMTLBufferOffset() + _buffOffset : 0;
+	mvkDSLBind->writeToMetalArgumentBuffer(mtlBuffer, mtlBuffOffset, dstIndex);
 }
 
-void MVKBufferDescriptor::read(MVKDescriptorSet* mvkDescSet,
-							   uint32_t dstIndex,
+void MVKBufferDescriptor::read(uint32_t dstIndex,
 							   VkDescriptorImageInfo* pImageInfo,
 							   VkDescriptorBufferInfo* pBufferInfo,
 							   VkBufferView* pTexelBufferView,
@@ -587,66 +816,120 @@
 
 // A null cmdEncoder can be passed to perform a validation pass
 void MVKInlineUniformBlockDescriptor::bind(MVKCommandEncoder* cmdEncoder,
+										   MVKDescriptorSetLayoutBinding* mvkDSLBind,
 										   uint32_t descriptorIndex,
 										   bool stages[],
 										   MVKShaderResourceBinding& mtlIndexes,
 										   MVKArrayRef<uint32_t> dynamicOffsets,
 										   uint32_t& dynamicOffsetIndex) {
-	MVKMTLBufferBinding bb;
-	bb.mtlBytes = _buffer;
-	bb.size = _length;
-	bb.isInline = true;
-	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
-		if (stages[i]) {
-			bb.index = mtlIndexes.stages[i].bufferIndex;
-			if (i == kMVKShaderStageCompute) {
-				if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
-			} else {
-				if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
+
+	if (mvkDSLBind->isUsingMetalArgumentBuffer()) {
+		if (cmdEncoder && _isUsingIntermediaryMTLBuffer) {
+			MVKMTLArgumentBufferResourceUsage abru;
+			abru.mtlResource = ((MVKMTLBufferAllocation*)_buffer)->_mtlBuffer;
+			abru.mtlUsage = getMTLResourceUsage();
+			abru.mtlStages = mvkMTLRenderStagesFromMVKShaderStages(stages);
+			cmdEncoder->useArgumentBufferResource(abru, stages[kMVKShaderStageCompute]);
+		}
+	} else {
+		// If not using Metal argument buffer, bind discretely
+		MVKMTLBufferBinding bb;
+		bb.mtlBytes = getData();
+		bb.size = _length;
+		bb.isInline = true;
+		for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
+			if (stages[i]) {
+				bb.index = mtlIndexes.stages[i].bufferIndex;
+				if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
 			}
 		}
 	}
 }
 
-void MVKInlineUniformBlockDescriptor::write(MVKDescriptorSet* mvkDescSet,
+void MVKInlineUniformBlockDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+											MVKDescriptorSet* descSet,
                                             uint32_t dstOffset,
+											uint32_t dstIndex,
                                             size_t stride,
                                             const void* pData) {
+	// Ensure there is a destination to write to
+	if ( !_buffer ) {
+		_length = mvkDSLBind->_info.descriptorCount;
+		_isUsingIntermediaryMTLBuffer = mvkDSLBind->supportsMetalArgumentBuffers() && !shouldEmbedInlineBlocksInMetalAgumentBuffer();
+		if (_isUsingIntermediaryMTLBuffer) {
+			// Acquire an intermediary buffer and write it to the Metal argument buffer
+			auto* mtlBuffRgn = descSet->acquireMTLBufferRegion(_length);
+			_buffer = (void*)mtlBuffRgn;
+			mvkDSLBind->writeToMetalArgumentBuffer(mtlBuffRgn->_mtlBuffer, mtlBuffRgn->_offset, dstIndex);
+		} else {
+			_buffer = malloc(_length);
+		}
+	}
+
 	const auto& pInlineUniformBlock = *(VkWriteDescriptorSetInlineUniformBlockEXT*)pData;
-	if (pInlineUniformBlock.pData && _buffer) {
-		memcpy(_buffer + dstOffset, pInlineUniformBlock.pData, pInlineUniformBlock.dataSize);
+	uint8_t* data = getData();
+	if (data && pInlineUniformBlock.pData && dstOffset < _length) {
+		uint32_t dataLen = std::min(pInlineUniformBlock.dataSize, _length - dstOffset);
+		memcpy(data + dstOffset, pInlineUniformBlock.pData, dataLen);
+
+		// If using intermediary buffer, it only needs to be written to Metal argument buffer once.
+		// If writing content directly to Metal argument buffer, update that content.
+		if ( !_isUsingIntermediaryMTLBuffer ) {
+			mvkDSLBind->writeToMetalArgumentBuffer((uint8_t*)pInlineUniformBlock.pData, dstOffset, dataLen);
+		}
 	}
 }
 
-void MVKInlineUniformBlockDescriptor::read(MVKDescriptorSet* mvkDescSet,
-                                           uint32_t srcOffset,
+void MVKInlineUniformBlockDescriptor::read(uint32_t srcOffset,
                                            VkDescriptorImageInfo* pImageInfo,
                                            VkDescriptorBufferInfo* pBufferInfo,
                                            VkBufferView* pTexelBufferView,
                                            VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock) {
-	if (_buffer && pInlineUniformBlock->pData) {
-		memcpy((void*)pInlineUniformBlock->pData, _buffer + srcOffset, pInlineUniformBlock->dataSize);
+	uint8_t* data = getData();
+	if (data && pInlineUniformBlock->pData && srcOffset < _length) {
+		uint32_t dataLen = std::min(pInlineUniformBlock->dataSize, _length - srcOffset);
+		memcpy((void*)pInlineUniformBlock->pData, data + srcOffset, dataLen);
 	}
 }
 
-void MVKInlineUniformBlockDescriptor::setLayout(MVKDescriptorSetLayoutBinding* dslBinding, uint32_t index) {
-    _length = dslBinding->_info.descriptorCount;
-    _buffer = (uint8_t*)malloc(_length);
-}
-
 void MVKInlineUniformBlockDescriptor::reset() {
-    free(_buffer);
+	if (_isUsingIntermediaryMTLBuffer) {
+		if (_buffer) { ((MVKMTLBufferAllocation*)_buffer)->returnToPool(); }
+	} else {
+		free(_buffer);
+	}
 	_buffer = nullptr;
     _length = 0;
+	_isUsingIntermediaryMTLBuffer = false;
 	MVKDescriptor::reset();
 }
 
+uint8_t* MVKInlineUniformBlockDescriptor::getData() {
+	return (uint8_t*)((_isUsingIntermediaryMTLBuffer && _buffer) ? ((MVKMTLBufferAllocation*)_buffer)->getContents() : _buffer);
+}
+
+// We do this once lazily instead of in a library constructor function to
+// ensure the NSProcessInfo environment is available when called upon.
+bool MVKInlineUniformBlockDescriptor::shouldEmbedInlineBlocksInMetalAgumentBuffer() {
+#	ifndef MVK_CONFIG_EMBED_INLINE_BLOCKS_IN_METAL_ARGUMENT_BUFFER
+#   	define MVK_CONFIG_EMBED_INLINE_BLOCKS_IN_METAL_ARGUMENT_BUFFER    0
+#	endif
+	static bool _shouldEmbedInlineBlocksInMetalAgumentBuffer = MVK_CONFIG_EMBED_INLINE_BLOCKS_IN_METAL_ARGUMENT_BUFFER;
+	static bool _shouldEmbedInlineBlocksInMetalAgumentBufferInitialized = false;
+	if ( !_shouldEmbedInlineBlocksInMetalAgumentBufferInitialized ) {
+		_shouldEmbedInlineBlocksInMetalAgumentBufferInitialized = true;
+		MVK_SET_FROM_ENV_OR_BUILD_BOOL(_shouldEmbedInlineBlocksInMetalAgumentBuffer, MVK_CONFIG_EMBED_INLINE_BLOCKS_IN_METAL_ARGUMENT_BUFFER);
+	}
+	return _shouldEmbedInlineBlocksInMetalAgumentBuffer;
+}
+
 
 #pragma mark -
 #pragma mark MVKImageDescriptor
 
 // A null cmdEncoder can be passed to perform a validation pass
 void MVKImageDescriptor::bind(MVKCommandEncoder* cmdEncoder,
+							  MVKDescriptorSetLayoutBinding* mvkDSLBind,
 							  uint32_t descriptorIndex,
 							  bool stages[],
 							  MVKShaderResourceBinding& mtlIndexes,
@@ -654,7 +937,7 @@
 							  uint32_t& dynamicOffsetIndex) {
 
 	VkDescriptorType descType = getDescriptorType();
-	uint8_t planeCount = (_mvkImageView) ? _mvkImageView->getPlaneCount() : 1;
+	uint8_t planeCount = _mvkImageView ? _mvkImageView->getPlaneCount() : 1;
     for (uint8_t planeIndex = 0; planeIndex < planeCount; planeIndex++) {
         MVKMTLTextureBinding tb;
         MVKMTLBufferBinding bb;
@@ -672,29 +955,39 @@
             bb.offset = mtlTex.bufferOffset;
             bb.size = (uint32_t)(mtlTex.height * mtlTex.bufferBytesPerRow);
         }
-        for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
-            if (stages[i]) {
-                tb.index = mtlIndexes.stages[i].textureIndex + descriptorIndex + planeIndex;
-                if (i == kMVKShaderStageCompute) {
-                    if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindTexture(tb); }
-                } else {
-                    if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindTexture(MVKShaderStage(i), tb); }
-                }
-                if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
-                    bb.index = mtlIndexes.stages[i].bufferIndex + descriptorIndex + planeIndex;
-                    if (i == kMVKShaderStageCompute) {
-                        if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
-                    } else {
-                        if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
-                    }
-                }
-            }
-        }
+
+		if (mvkDSLBind->isUsingMetalArgumentBuffer()) {
+			MVKMTLArgumentBufferResourceUsage abru;
+			abru.mtlResource = tb.mtlResource;
+			abru.mtlUsage = getMTLResourceUsage();
+			abru.mtlStages = mvkMTLRenderStagesFromMVKShaderStages(stages);
+			if (cmdEncoder) { cmdEncoder->useArgumentBufferResource(abru, stages[kMVKShaderStageCompute]); }
+
+// Needed for atomic operations
+//			if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
+//				abru.mtlResource = bb.mtlResource;
+//				if (cmdEncoder) { cmdEncoder->useArgumentBufferResource(abru, stages[kMVKShaderStageCompute]); }
+//			}
+		} else {
+			// If not using Metal argument buffer, bind discretely
+			for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
+				if (stages[i]) {
+					tb.index = mtlIndexes.stages[i].textureIndex + (descriptorIndex * planeCount) + planeIndex;
+					if (cmdEncoder) { cmdEncoder->bindTexture(tb, MVKShaderStage(i)); }
+					if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
+						bb.index = mtlIndexes.stages[i].bufferIndex + (descriptorIndex * planeCount) + planeIndex;
+						if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
+					}
+				}
+			}
+		}
     }
 }
 
-void MVKImageDescriptor::write(MVKDescriptorSet* mvkDescSet,
+void MVKImageDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+							   MVKDescriptorSet* descSet,
 							   uint32_t srcIndex,
+							   uint32_t dstIndex,
 							   size_t stride,
 							   const void* pData) {
 	auto* oldImgView = _mvkImageView;
@@ -705,10 +998,23 @@
 
 	if (_mvkImageView) { _mvkImageView->retain(); }
 	if (oldImgView) { oldImgView->release(); }
+
+	// Update the Metal argument buffer entry
+	uint8_t planeCount = _mvkImageView ? _mvkImageView->getPlaneCount() : 1;
+	for (uint8_t planeIndex = 0; planeIndex < planeCount; planeIndex++) {
+		id<MTLTexture> mtlTexture = _mvkImageView ? _mvkImageView->getMTLTexture(planeIndex) : nil;
+		mvkDSLBind->writeToMetalArgumentBuffer(mtlTexture, planeCount, planeIndex, dstIndex);
+
+// Needed for atomic operations
+//		if (getDescriptorType() == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
+//			id<MTLTexture> baseMTLTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture;
+//			uint32_t buffArgIdx = mvkDSLBind->getDescriptorCount() * planeCount + dstIndex;
+//			mvkDSLBind->writeToMetalArgumentBuffer(baseMTLTex.buffer, baseMTLTex.bufferOffset, buffArgIdx);
+//		}
+	}
 }
 
-void MVKImageDescriptor::read(MVKDescriptorSet* mvkDescSet,
-							  uint32_t dstIndex,
+void MVKImageDescriptor::read(uint32_t dstIndex,
 							  VkDescriptorImageInfo* pImageInfo,
 							  VkDescriptorBufferInfo* pBufferInfo,
 							  VkBufferView* pTexelBufferView,
@@ -733,29 +1039,38 @@
 // Metal validation requires each sampler in an array of samplers to be populated,
 // even if not used, so populate a default if one hasn't been set.
 void MVKSamplerDescriptorMixin::bind(MVKCommandEncoder* cmdEncoder,
+									 MVKDescriptorSetLayoutBinding* mvkDSLBind,
 									 uint32_t descriptorIndex,
 									 bool stages[],
 									 MVKShaderResourceBinding& mtlIndexes,
 									 MVKArrayRef<uint32_t> dynamicOffsets,
 									 uint32_t& dynamicOffsetIndex) {
-	MVKMTLSamplerStateBinding sb;
-	sb.mtlSamplerState = (_mvkSampler
-						  ? _mvkSampler->getMTLSamplerState()
-						  : cmdEncoder->getDevice()->getDefaultMTLSamplerState());
-	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
-		if (stages[i]) {
-			sb.index = mtlIndexes.stages[i].samplerIndex + descriptorIndex;
-			if (i == kMVKShaderStageCompute) {
-				if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindSamplerState(sb); }
-			} else {
-				if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindSamplerState(MVKShaderStage(i), sb); }
+
+	if (mvkDSLBind->isUsingMetalArgumentBuffer()) {
+		// Write any immutable sampler to the argument buffer now
+		if ( !_hasDynamicSampler ) {
+			id<MTLSamplerState> mtlSampler = _mvkSampler ? _mvkSampler->getMTLSamplerState() : nil;
+			uint32_t argBuffIdx = getSamplerArgBufferIndexOffset(mvkDSLBind) + descriptorIndex;
+			mvkDSLBind->writeToMetalArgumentBuffer(mtlSampler, argBuffIdx);
+		}
+	} else {
+		MVKMTLSamplerStateBinding sb;
+		sb.mtlSamplerState = (_mvkSampler
+							  ? _mvkSampler->getMTLSamplerState()
+							  : cmdEncoder->getDevice()->getDefaultMTLSamplerState());
+		for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
+			if (stages[i]) {
+				sb.index = mtlIndexes.stages[i].samplerIndex + descriptorIndex;
+				if (cmdEncoder) { cmdEncoder->bindSamplerState(sb, MVKShaderStage(i)); }
 			}
 		}
 	}
 }
 
-void MVKSamplerDescriptorMixin::write(MVKDescriptorSet* mvkDescSet,
+void MVKSamplerDescriptorMixin::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+									  MVKDescriptorSet* descSet,
 									  uint32_t srcIndex,
+									  uint32_t dstIndex,
 									  size_t stride,
 									  const void* pData) {
 	if (_hasDynamicSampler) {
@@ -769,11 +1084,15 @@
 
 		if (_mvkSampler) { _mvkSampler->retain(); }
 		if (oldSamp) { oldSamp->release(); }
+
+		// Update the Metal argument buffer entry
+		id<MTLSamplerState> mtlSampler = _mvkSampler ? _mvkSampler->getMTLSamplerState() : nil;
+		uint32_t argBuffIdx = getSamplerArgBufferIndexOffset(mvkDSLBind) + dstIndex;
+		mvkDSLBind->writeToMetalArgumentBuffer(mtlSampler, argBuffIdx);
 	}
 }
 
-void MVKSamplerDescriptorMixin::read(MVKDescriptorSet* mvkDescSet,
-									 uint32_t dstIndex,
+void MVKSamplerDescriptorMixin::read(uint32_t dstIndex,
 									 VkDescriptorImageInfo* pImageInfo,
 									 VkDescriptorBufferInfo* pBufferInfo,
 									 VkBufferView* pTexelBufferView,
@@ -806,28 +1125,30 @@
 
 // A null cmdEncoder can be passed to perform a validation pass
 void MVKSamplerDescriptor::bind(MVKCommandEncoder* cmdEncoder,
+								MVKDescriptorSetLayoutBinding* mvkDSLBind,
 								uint32_t descriptorIndex,
 								bool stages[],
 								MVKShaderResourceBinding& mtlIndexes,
 								MVKArrayRef<uint32_t> dynamicOffsets,
 								uint32_t& dynamicOffsetIndex) {
-	MVKSamplerDescriptorMixin::bind(cmdEncoder, descriptorIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
+	MVKSamplerDescriptorMixin::bind(cmdEncoder, mvkDSLBind, descriptorIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
 }
 
-void MVKSamplerDescriptor::write(MVKDescriptorSet* mvkDescSet,
+void MVKSamplerDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+								 MVKDescriptorSet* descSet,
 								 uint32_t srcIndex,
+								 uint32_t dstIndex,
 								 size_t stride,
 								 const void* pData) {
-	MVKSamplerDescriptorMixin::write(mvkDescSet, srcIndex, stride, pData);
+	MVKSamplerDescriptorMixin::write(mvkDSLBind, descSet, srcIndex, dstIndex, stride, pData);
 }
 
-void MVKSamplerDescriptor::read(MVKDescriptorSet* mvkDescSet,
-								uint32_t dstIndex,
+void MVKSamplerDescriptor::read(uint32_t dstIndex,
 								VkDescriptorImageInfo* pImageInfo,
 								VkDescriptorBufferInfo* pBufferInfo,
 								VkBufferView* pTexelBufferView,
 								VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock) {
-	MVKSamplerDescriptorMixin::read(mvkDescSet, dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
+	MVKSamplerDescriptorMixin::read(dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
 }
 
 void MVKSamplerDescriptor::setLayout(MVKDescriptorSetLayoutBinding* dslBinding, uint32_t index) {
@@ -846,31 +1167,33 @@
 
 // A null cmdEncoder can be passed to perform a validation pass
 void MVKCombinedImageSamplerDescriptor::bind(MVKCommandEncoder* cmdEncoder,
+											 MVKDescriptorSetLayoutBinding* mvkDSLBind,
 											 uint32_t descriptorIndex,
 											 bool stages[],
 											 MVKShaderResourceBinding& mtlIndexes,
 											 MVKArrayRef<uint32_t> dynamicOffsets,
 											 uint32_t& dynamicOffsetIndex) {
-	MVKImageDescriptor::bind(cmdEncoder, descriptorIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
-	MVKSamplerDescriptorMixin::bind(cmdEncoder, descriptorIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
+	MVKImageDescriptor::bind(cmdEncoder, mvkDSLBind, descriptorIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
+	MVKSamplerDescriptorMixin::bind(cmdEncoder, mvkDSLBind, descriptorIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
 }
 
-void MVKCombinedImageSamplerDescriptor::write(MVKDescriptorSet* mvkDescSet,
+void MVKCombinedImageSamplerDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+											  MVKDescriptorSet* descSet,
 											  uint32_t srcIndex,
+											  uint32_t dstIndex,
 											  size_t stride,
 											  const void* pData) {
-	MVKImageDescriptor::write(mvkDescSet, srcIndex, stride, pData);
-	MVKSamplerDescriptorMixin::write(mvkDescSet, srcIndex, stride, pData);
+	MVKImageDescriptor::write(mvkDSLBind, descSet, srcIndex, dstIndex, stride, pData);
+	MVKSamplerDescriptorMixin::write(mvkDSLBind, descSet, srcIndex, dstIndex, stride, pData);
 }
 
-void MVKCombinedImageSamplerDescriptor::read(MVKDescriptorSet* mvkDescSet,
-											 uint32_t dstIndex,
+void MVKCombinedImageSamplerDescriptor::read(uint32_t dstIndex,
 											 VkDescriptorImageInfo* pImageInfo,
 											 VkDescriptorBufferInfo* pBufferInfo,
 											 VkBufferView* pTexelBufferView,
 											 VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock) {
-	MVKImageDescriptor::read(mvkDescSet, dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
-	MVKSamplerDescriptorMixin::read(mvkDescSet, dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
+	MVKImageDescriptor::read(dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
+	MVKSamplerDescriptorMixin::read(dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
 }
 
 void MVKCombinedImageSamplerDescriptor::setLayout(MVKDescriptorSetLayoutBinding* dslBinding, uint32_t index) {
@@ -878,6 +1201,11 @@
 	MVKSamplerDescriptorMixin::setLayout(dslBinding, index);
 }
 
+uint32_t MVKCombinedImageSamplerDescriptor::getSamplerArgBufferIndexOffset(MVKDescriptorSetLayoutBinding* dslBinding) {
+	uint8_t planeCount = _mvkImageView ? _mvkImageView->getPlaneCount() : 1;
+	return dslBinding->getDescriptorCount() * planeCount;
+}
+
 void MVKCombinedImageSamplerDescriptor::reset() {
 	MVKSamplerDescriptorMixin::reset();
 	MVKImageDescriptor::reset();
@@ -889,6 +1217,7 @@
 
 // A null cmdEncoder can be passed to perform a validation pass
 void MVKTexelBufferDescriptor::bind(MVKCommandEncoder* cmdEncoder,
+									MVKDescriptorSetLayoutBinding* mvkDSLBind,
 									uint32_t descriptorIndex,
 									bool stages[],
 									MVKShaderResourceBinding& mtlIndexes,
@@ -906,28 +1235,37 @@
 			bb.size = (uint32_t)(mtlTex.height * mtlTex.bufferBytesPerRow);
 		}
 	}
-	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
-		if (stages[i]) {
-			tb.index = mtlIndexes.stages[i].textureIndex + descriptorIndex;
-			if (i == kMVKShaderStageCompute) {
-				if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindTexture(tb); }
-			} else {
-				if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindTexture(MVKShaderStage(i), tb); }
-			}
-			if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
-				bb.index = mtlIndexes.stages[i].bufferIndex + descriptorIndex;
-				if (i == kMVKShaderStageCompute) {
-					if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
-				} else {
-					if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
+	if (mvkDSLBind->isUsingMetalArgumentBuffer()) {
+		MVKMTLArgumentBufferResourceUsage abru;
+		abru.mtlResource = tb.mtlResource;
+		abru.mtlUsage = getMTLResourceUsage();
+		abru.mtlStages = mvkMTLRenderStagesFromMVKShaderStages(stages);
+		if (cmdEncoder) { cmdEncoder->useArgumentBufferResource(abru, stages[kMVKShaderStageCompute]); }
+
+// Needed for atomic operations
+//		if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
+//			abru.mtlResource = bb.mtlResource;
+//			if (cmdEncoder) { cmdEncoder->useArgumentBufferResource(abru, stages[kMVKShaderStageCompute]); }
+//		}
+	} else {
+		// If not using Metal argument buffer, bind discretely
+		for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
+			if (stages[i]) {
+				tb.index = mtlIndexes.stages[i].textureIndex + descriptorIndex;
+				if (cmdEncoder) { cmdEncoder->bindTexture(tb, MVKShaderStage(i)); }
+				if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
+					bb.index = mtlIndexes.stages[i].bufferIndex + descriptorIndex;
+					if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
 				}
 			}
 		}
 	}
 }
 
-void MVKTexelBufferDescriptor::write(MVKDescriptorSet* mvkDescSet,
+void MVKTexelBufferDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+									 MVKDescriptorSet* descSet,
 									 uint32_t srcIndex,
+									 uint32_t dstIndex,
 									 size_t stride,
 									 const void* pData) {
 	auto* oldBuffView = _mvkBufferView;
@@ -937,10 +1275,19 @@
 
 	if (_mvkBufferView) { _mvkBufferView->retain(); }
 	if (oldBuffView) { oldBuffView->release(); }
+
+	// Update the Metal argument buffer entry
+	id<MTLTexture> mtlTexture = _mvkBufferView ? _mvkBufferView->getMTLTexture() : nil;
+	mvkDSLBind->writeToMetalArgumentBuffer(mtlTexture, 1, 0, dstIndex);
+
+// Needed for atomic operations
+//	if (getDescriptorType() == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
+//		uint32_t buffArgIdx = mvkDSLBind->getDescriptorCount() + dstIndex;
+//		mvkDSLBind->writeToMetalArgumentBuffer(mtlTexture.buffer, mtlTexture.bufferOffset, buffArgIdx);
+//	}
 }
 
-void MVKTexelBufferDescriptor::read(MVKDescriptorSet* mvkDescSet,
-									uint32_t dstIndex,
+void MVKTexelBufferDescriptor::read(uint32_t dstIndex,
 									VkDescriptorImageInfo* pImageInfo,
 									VkDescriptorBufferInfo* pBufferInfo,
 									VkBufferView* pTexelBufferView,
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
index 6bbf686..e80fe32 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
@@ -20,9 +20,11 @@
 
 #include "MVKDescriptor.h"
 #include "MVKSmallVector.h"
+#include "MVKBitArray.h"
 #include <unordered_set>
 #include <unordered_map>
 #include <vector>
+#include <mutex>
 
 class MVKDescriptorPool;
 class MVKPipelineLayout;
@@ -32,6 +34,13 @@
 #pragma mark -
 #pragma mark MVKDescriptorSetLayout
 
+/** Tracks a MTLArgumentEncoder and its offset into a Metal argument buffer. */
+typedef struct MVKMTLArgumentEncoder {
+	id<MTLArgumentEncoder> mtlArgumentEncoder = nil;
+	NSUInteger argumentBufferOffset = 0;
+	~MVKMTLArgumentEncoder() { [mtlArgumentEncoder release]; }
+} MVKMTLArgumentEncoder;
+
 /** Represents a Vulkan descriptor set layout. */
 class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject {
 
@@ -46,6 +55,7 @@
 	/** Encodes this descriptor set layout and the specified descriptor set on the specified command encoder. */
 	void bindDescriptorSet(MVKCommandEncoder* cmdEncoder,
 						   MVKDescriptorSet* descSet,
+						   uint32_t descSetLayoutIndex,
 						   MVKShaderResourceBinding& dslMTLRezIdxOffsets,
 						   MVKArrayRef<uint32_t> dynamicOffsets,
 						   uint32_t& dynamicOffsetIndex);
@@ -69,27 +79,34 @@
                                         uint32_t dslIndex);
 
 	/** Returns true if this layout is for push descriptors only. */
-	bool isPushDescriptorLayout() const { return _isPushDescriptorLayout; }
+	inline bool isPushDescriptorLayout() const { return _isPushDescriptorLayout; }
+
+	/** Returns whether this layout is using an argument buffer. */
+	inline bool isUsingMetalArgumentBuffer() const  { return supportsMetalArgumentBuffers() && !isPushDescriptorLayout(); };
 
 	MVKDescriptorSetLayout(MVKDevice* device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo);
 
 protected:
-
-	friend class MVKDescriptorSetLayoutBinding;
 	friend class MVKPipelineLayout;
-	friend class MVKDescriptorSet;
 	friend class MVKDescriptorPool;
+	friend class MVKDescriptorSetLayoutBinding;
+	friend class MVKDescriptorSet;
 
 	void propagateDebugName() override {}
 	inline uint32_t getDescriptorCount() { return _descriptorCount; }
-	inline uint32_t getDescriptorIndex(uint32_t binding, uint32_t elementIndex = 0) { return _bindingToDescriptorIndex[binding] + elementIndex; }
 	inline MVKDescriptorSetLayoutBinding* getBinding(uint32_t binding) { return &_bindings[_bindingToIndex[binding]]; }
+	inline uint32_t getDescriptorIndex(uint32_t binding, uint32_t elementIndex = 0) { return getBinding(binding)->getDescriptorIndex(elementIndex); }
+	inline NSUInteger getArgumentBufferSize() { return _argumentBufferSize; }
 	const VkDescriptorBindingFlags* getBindingFlags(const VkDescriptorSetLayoutCreateInfo* pCreateInfo);
+	void bindMetalArgumentBuffer(MVKDescriptorSet* descSet);
+	void initMTLArgumentEncoders();
 
 	MVKSmallVector<MVKDescriptorSetLayoutBinding> _bindings;
 	std::unordered_map<uint32_t, uint32_t> _bindingToIndex;
-	std::unordered_map<uint32_t, uint32_t> _bindingToDescriptorIndex;
 	MVKShaderResourceBinding _mtlResourceCounts;
+	MVKMTLArgumentEncoder _argumentEncoder[kMVKShaderStageCount];
+	NSUInteger _argumentBufferSize;
+	std::mutex _argEncodingLock;
 	uint32_t _descriptorCount;
 	bool _isPushDescriptorLayout;
 };
@@ -126,22 +143,29 @@
 			  VkBufferView* pTexelBufferView,
 			  VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock);
 
-	MVKDescriptorSet(MVKDescriptorSetLayout* layout,
-					 uint32_t variableDescriptorCount,
-					 MVKDescriptorPool* pool);
+	/** Returns an MTLBuffer region allocation. */
+	const MVKMTLBufferAllocation* acquireMTLBufferRegion(NSUInteger length);
 
-	~MVKDescriptorSet() override;
+	MVKDescriptorSet(MVKDescriptorPool* pool);
 
 protected:
-	friend class MVKDescriptorSetLayoutBinding;
 	friend class MVKDescriptorPool;
+	friend class MVKDescriptorSetLayout;
+	friend class MVKDescriptorSetLayoutBinding;
 
 	void propagateDebugName() override {}
 	MVKDescriptor* getDescriptor(uint32_t binding, uint32_t elementIndex = 0);
+	VkResult allocate(MVKDescriptorSetLayout* layout,
+					  uint32_t variableDescriptorCount,
+					  NSUInteger mtlArgumentBufferOffset);
+	void free(bool isPoolReset);
+	id<MTLBuffer> getMetalArgumentBuffer();
+	inline NSUInteger getMetalArgumentBufferOffset() { return _mtlArgumentBufferOffset; }
 
+	MVKSmallVector<MVKDescriptor*> _descriptors;
 	MVKDescriptorSetLayout* _layout;
 	MVKDescriptorPool* _pool;
-	MVKSmallVector<MVKDescriptor*> _descriptors;
+	NSUInteger _mtlArgumentBufferOffset;
 	uint32_t _variableDescriptorCount;
 };
 
@@ -162,7 +186,7 @@
 								   VkDescriptorType descriptorType);
 
 protected:
-	friend class MVKPreallocatedDescriptors;
+	friend class MVKDescriptorPool;
 
 	VkResult allocateDescriptor(MVKDescriptor** pMVKDesc);
 	bool findDescriptor(uint32_t endIndex, MVKDescriptor** pMVKDesc);
@@ -177,41 +201,6 @@
 
 
 #pragma mark -
-#pragma mark MVKPreallocatedDescriptors
-
-/** Support class for MVKDescriptorPool that holds preallocated instances of all concrete descriptor classes. */
-class MVKPreallocatedDescriptors : public MVKBaseObject {
-
-public:
-
-	/** Returns the Vulkan API opaque object controlling this object. */
-	MVKVulkanAPIObject* getVulkanAPIObject() override { return nullptr; };
-
-	MVKPreallocatedDescriptors(const VkDescriptorPoolCreateInfo* pCreateInfo);
-
-protected:
-	friend class MVKDescriptorPool;
-
-	VkResult allocateDescriptor(VkDescriptorType descriptorType, MVKDescriptor** pMVKDesc);
-	void freeDescriptor(MVKDescriptor* mvkDesc);
-	void reset();
-
-	MVKDescriptorTypePreallocation<MVKUniformBufferDescriptor> _uniformBufferDescriptors;
-	MVKDescriptorTypePreallocation<MVKStorageBufferDescriptor> _storageBufferDescriptors;
-	MVKDescriptorTypePreallocation<MVKUniformBufferDynamicDescriptor> _uniformBufferDynamicDescriptors;
-	MVKDescriptorTypePreallocation<MVKStorageBufferDynamicDescriptor> _storageBufferDynamicDescriptors;
-	MVKDescriptorTypePreallocation<MVKInlineUniformBlockDescriptor> _inlineUniformBlockDescriptors;
-	MVKDescriptorTypePreallocation<MVKSampledImageDescriptor> _sampledImageDescriptors;
-	MVKDescriptorTypePreallocation<MVKStorageImageDescriptor> _storageImageDescriptors;
-	MVKDescriptorTypePreallocation<MVKInputAttachmentDescriptor> _inputAttachmentDescriptors;
-	MVKDescriptorTypePreallocation<MVKSamplerDescriptor> _samplerDescriptors;
-	MVKDescriptorTypePreallocation<MVKCombinedImageSamplerDescriptor> _combinedImageSamplerDescriptors;
-	MVKDescriptorTypePreallocation<MVKUniformTexelBufferDescriptor> _uniformTexelBufferDescriptors;
-	MVKDescriptorTypePreallocation<MVKStorageTexelBufferDescriptor> _storageTexelBufferDescriptors;
-};
-
-
-#pragma mark -
 #pragma mark MVKDescriptorPool
 
 /** Represents a Vulkan descriptor pool. */
@@ -245,13 +234,30 @@
 	void propagateDebugName() override {}
 	VkResult allocateDescriptorSet(MVKDescriptorSetLayout* mvkDSL, uint32_t variableDescriptorCount, VkDescriptorSet* pVKDS);
 	const uint32_t* getVariableDecriptorCounts(const VkDescriptorSetAllocateInfo* pAllocateInfo);
-	void freeDescriptorSet(MVKDescriptorSet* mvkDS);
+	void freeDescriptorSet(MVKDescriptorSet* mvkDS, bool isPoolReset);
 	VkResult allocateDescriptor(VkDescriptorType descriptorType, MVKDescriptor** pMVKDesc);
 	void freeDescriptor(MVKDescriptor* mvkDesc);
+	static NSUInteger getDescriptorByteCountForMetalArgumentBuffer(VkDescriptorType descriptorType);
+	static NSUInteger getMaxInlineBlockSize(MVKDevice* device, const VkDescriptorPoolCreateInfo* pCreateInfo);
 
-	uint32_t _maxSets;
-	std::unordered_set<MVKDescriptorSet*> _allocatedSets;
-	MVKPreallocatedDescriptors* _preallocatedDescriptors;
+	MVKSmallVector<MVKDescriptorSet> _descriptorSets;
+	MVKBitArray _descriptorSetAvailablility;
+	id<MTLBuffer> _mtlArgumentBuffer;
+	NSUInteger _nextMTLArgumentBufferOffset;
+	MVKMTLBufferAllocator _inlineBlockMTLBufferAllocator;
+
+	MVKDescriptorTypePreallocation<MVKUniformBufferDescriptor> _uniformBufferDescriptors;
+	MVKDescriptorTypePreallocation<MVKStorageBufferDescriptor> _storageBufferDescriptors;
+	MVKDescriptorTypePreallocation<MVKUniformBufferDynamicDescriptor> _uniformBufferDynamicDescriptors;
+	MVKDescriptorTypePreallocation<MVKStorageBufferDynamicDescriptor> _storageBufferDynamicDescriptors;
+	MVKDescriptorTypePreallocation<MVKInlineUniformBlockDescriptor> _inlineUniformBlockDescriptors;
+	MVKDescriptorTypePreallocation<MVKSampledImageDescriptor> _sampledImageDescriptors;
+	MVKDescriptorTypePreallocation<MVKStorageImageDescriptor> _storageImageDescriptors;
+	MVKDescriptorTypePreallocation<MVKInputAttachmentDescriptor> _inputAttachmentDescriptors;
+	MVKDescriptorTypePreallocation<MVKSamplerDescriptor> _samplerDescriptors;
+	MVKDescriptorTypePreallocation<MVKCombinedImageSamplerDescriptor> _combinedImageSamplerDescriptors;
+	MVKDescriptorTypePreallocation<MVKUniformTexelBufferDescriptor> _uniformTexelBufferDescriptors;
+	MVKDescriptorTypePreallocation<MVKStorageTexelBufferDescriptor> _storageTexelBufferDescriptors;
 };
 
 
@@ -304,15 +310,3 @@
 void mvkUpdateDescriptorSetWithTemplate(VkDescriptorSet descriptorSet,
 										VkDescriptorUpdateTemplateKHR updateTemplate,
 										const void* pData);
-
-/**
- * If the shader stage binding has a binding defined for the specified stage, populates
- * the context at the descriptor set binding from the shader stage resource binding.
- */
-void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
-									   MVKShaderStageResourceBinding& ssRB,
-									   spv::ExecutionModel stage,
-									   uint32_t descriptorSetIndex,
-									   uint32_t bindingIndex,
-									   uint32_t count,
-									   MVKSampler* immutableSampler);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
index ed9d5e2..a81c71d 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
@@ -17,9 +17,12 @@
  */
 
 #include "MVKDescriptorSet.h"
+#include "MVKCommandBuffer.h"
 #include "MVKInstance.h"
 #include "MVKOSExtensions.h"
 
+using namespace std;
+
 
 #pragma mark -
 #pragma mark MVKDescriptorSetLayout
@@ -27,17 +30,51 @@
 // A null cmdEncoder can be passed to perform a validation pass
 void MVKDescriptorSetLayout::bindDescriptorSet(MVKCommandEncoder* cmdEncoder,
 											   MVKDescriptorSet* descSet,
+											   uint32_t descSetLayoutIndex,
 											   MVKShaderResourceBinding& dslMTLRezIdxOffsets,
 											   MVKArrayRef<uint32_t> dynamicOffsets,
 											   uint32_t& dynamicOffsetIndex) {
 	if (!cmdEncoder) { clearConfigurationResult(); }
-	if ( !_isPushDescriptorLayout ) {
-		for (auto& dslBind : _bindings) {
-			dslBind.bind(cmdEncoder, descSet, dslMTLRezIdxOffsets, dynamicOffsets, dynamicOffsetIndex);
+	if (_isPushDescriptorLayout ) { return; }
+
+	lock_guard<mutex> lock(_argEncodingLock);
+	bindMetalArgumentBuffer(descSet);
+
+	for (auto& dslBind : _bindings) {
+		dslBind.bind(cmdEncoder, descSet, dslMTLRezIdxOffsets, dynamicOffsets, dynamicOffsetIndex);
+	}
+
+	bindMetalArgumentBuffer(nullptr);
+
+	// If we're using Metal argument buffer, bind it to the command encoder in each stage that will use it.
+	if (cmdEncoder) {
+		id<MTLBuffer> mtlArgBuff = descSet->getMetalArgumentBuffer();
+		NSUInteger descSetOffset = descSet->getMetalArgumentBufferOffset();
+		if (mtlArgBuff) {
+			MVKMTLBufferBinding bb;
+			bb.mtlBuffer = mtlArgBuff;
+			bb.index = descSetLayoutIndex;
+			for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+				auto& argEnc = _argumentEncoder[stage];
+				if (argEnc.mtlArgumentEncoder) {
+					bb.offset = descSetOffset + argEnc.argumentBufferOffset;
+					cmdEncoder->bindBuffer(bb, MVKShaderStage(stage));
+				}
+			}
 		}
 	}
 }
 
+void MVKDescriptorSetLayout::bindMetalArgumentBuffer(MVKDescriptorSet* descSet) {
+	id<MTLBuffer> mtlArgBuff = descSet ? descSet->getMetalArgumentBuffer() : nil;
+	NSUInteger descSetOffset = descSet ? descSet->getMetalArgumentBufferOffset() : 0;
+	for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+		auto& argEnc = _argumentEncoder[stage];
+		[argEnc.mtlArgumentEncoder setArgumentBuffer: mtlArgBuff
+											  offset: (descSetOffset + argEnc.argumentBufferOffset)];
+	}
+}
+
 static const void* getWriteParameters(VkDescriptorType type, const VkDescriptorImageInfo* pImageInfo,
                                       const VkDescriptorBufferInfo* pBufferInfo, const VkBufferView* pTexelBufferView,
                                       const VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock,
@@ -166,6 +203,11 @@
 	for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
 		_bindings[bindIdx].populateShaderConverterContext(context, dslMTLRezIdxOffsets, dslIndex);
 	}
+
+	// Mark if Metal argument buffers are in use, but this descriptor set layout is not using them.
+	if (supportsMetalArgumentBuffers() && !isUsingMetalArgumentBuffer()) {
+		context.discreteDescriptorSets.push_back(dslIndex);
+	}
 }
 
 MVKDescriptorSetLayout::MVKDescriptorSetLayout(MVKDevice* device,
@@ -186,20 +228,21 @@
 	for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
 		sortedBindings.push_back( { &pCreateInfo->pBindings[bindIdx], pBindingFlags ? pBindingFlags[bindIdx] : 0 } );
 	}
-	std::sort(sortedBindings.begin(), sortedBindings.end(), [](BindInfo bindInfo1, BindInfo bindInfo2) {
+	sort(sortedBindings.begin(), sortedBindings.end(), [](BindInfo bindInfo1, BindInfo bindInfo2) {
 		return bindInfo1.pBinding->binding < bindInfo2.pBinding->binding;
 	});
 
-	_isPushDescriptorLayout = (pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) != 0;
+	_isPushDescriptorLayout = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
 	_descriptorCount = 0;
     _bindings.reserve(bindCnt);
     for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
 		BindInfo& bindInfo = sortedBindings[bindIdx];
-        _bindings.emplace_back(_device, this, bindInfo.pBinding, bindInfo.bindingFlags);
+        _bindings.emplace_back(_device, this, bindInfo.pBinding, bindInfo.bindingFlags, _descriptorCount);
 		_bindingToIndex[bindInfo.pBinding->binding] = bindIdx;
-		_bindingToDescriptorIndex[bindInfo.pBinding->binding] = _descriptorCount;
-		_descriptorCount += _bindings.back().getDescriptorCount(nullptr);
+		_descriptorCount += _bindings.back().getDescriptorCount();
 	}
+
+	initMTLArgumentEncoders();
 }
 
 // Find and return an array of binding flags from the pNext chain of pCreateInfo,
@@ -218,6 +261,32 @@
 	return nullptr;
 }
 
+void MVKDescriptorSetLayout::initMTLArgumentEncoders() {
+	_argumentBufferSize = 0;
+
+	if ( !isUsingMetalArgumentBuffer() ) { return; }
+
+	auto* mvkDvc = getDevice();
+	@autoreleasepool {
+		id<MTLDevice> mtlDvc = mvkDvc->getMTLDevice();
+		NSMutableArray<MTLArgumentDescriptor*>* args = [NSMutableArray arrayWithCapacity: _bindings.size()];
+		for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+			[args removeAllObjects];
+			uint32_t argIdx = 0;
+			for (auto& dslBind : _bindings) {
+				dslBind.addMTLArgumentDescriptors(stage, args, argIdx);
+			}
+			if (args.count) {
+				auto& argEnc = _argumentEncoder[stage];
+				argEnc.mtlArgumentEncoder = [mtlDvc newArgumentEncoderWithArguments: args];		// retained
+				argEnc.argumentBufferOffset = _argumentBufferSize;
+				_argumentBufferSize += mvkAlignByteCount(argEnc.mtlArgumentEncoder.encodedLength,
+														 mvkDvc->_pMetalFeatures->mtlBufferAlignment);
+			}
+		}
+	}
+}
+
 
 #pragma mark -
 #pragma mark MVKDescriptorSet
@@ -230,28 +299,43 @@
 	return _descriptors[_layout->getDescriptorIndex(binding, elementIndex)];
 }
 
+id<MTLBuffer> MVKDescriptorSet::getMetalArgumentBuffer() { return _pool->_mtlArgumentBuffer; }
+
 template<typename DescriptorAction>
 void MVKDescriptorSet::write(const DescriptorAction* pDescriptorAction,
 							 size_t stride,
 							 const void* pData) {
 
-	VkDescriptorType descType = getDescriptorType(pDescriptorAction->dstBinding);
+	lock_guard<mutex> lock(_layout->_argEncodingLock);
+	_layout->bindMetalArgumentBuffer(this);
+
+	MVKDescriptorSetLayoutBinding* mvkDSLBind = _layout->getBinding(pDescriptorAction->dstBinding);
+	VkDescriptorType descType = mvkDSLBind->getDescriptorType();
     if (descType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
 		// For inline buffers dstArrayElement is a byte offset
 		MVKDescriptor* mvkDesc = getDescriptor(pDescriptorAction->dstBinding);
 		if (mvkDesc->getDescriptorType() == descType) {
-			mvkDesc->write(this, pDescriptorAction->dstArrayElement, stride, pData);
+			mvkDesc->write(mvkDSLBind, this, pDescriptorAction->dstArrayElement, 0, stride, pData);
 		}
     } else {
-        uint32_t dstStartIdx = _layout->getDescriptorIndex(pDescriptorAction->dstBinding, pDescriptorAction->dstArrayElement);
+		uint32_t descStartIdx = _layout->getDescriptorIndex(pDescriptorAction->dstBinding, pDescriptorAction->dstArrayElement);
 		uint32_t descCnt = pDescriptorAction->descriptorCount;
-        for (uint32_t descIdx = 0; descIdx < descCnt; descIdx++) {
-			MVKDescriptor* mvkDesc = _descriptors[dstStartIdx + descIdx];
+		for (uint32_t srcIdx = 0; srcIdx < descCnt; srcIdx++) {
+			MVKDescriptor* mvkDesc = _descriptors[descStartIdx + srcIdx];
 			if (mvkDesc->getDescriptorType() == descType) {
-				mvkDesc->write(this, descIdx, stride, pData);
+				uint32_t dstIdx = pDescriptorAction->dstArrayElement + srcIdx;
+				mvkDesc->write(mvkDSLBind, this, srcIdx, dstIdx, stride, pData);
 			}
-        }
-    }
+		}
+	}
+
+	// For some unexpected reason, GPU capture on Xcode 12 doesn't always correctly expose
+	// the contents of Metal argument buffers. Triggering an extraction of the arg buffer
+	// contents here, after filling it, seems to correct that.
+	// Sigh. A bug report has been filed with Apple.
+	if (getInstance()->isCurrentlyAutoGPUCapturing()) { [_pool->_mtlArgumentBuffer contents]; }
+
+	_layout->bindMetalArgumentBuffer(nullptr);
 }
 
 // Create concrete implementations of the three variations of the write() function.
@@ -274,26 +358,31 @@
 		// For inline buffers srcArrayElement is a byte offset
 		MVKDescriptor* mvkDesc = getDescriptor(pDescriptorCopy->srcBinding);
 		if (mvkDesc->getDescriptorType() == descType) {
-			mvkDesc->read(this, pDescriptorCopy->srcArrayElement, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
+			mvkDesc->read(pDescriptorCopy->srcArrayElement, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
 		}
     } else {
         uint32_t srcStartIdx = _layout->getDescriptorIndex(pDescriptorCopy->srcBinding, pDescriptorCopy->srcArrayElement);
         for (uint32_t descIdx = 0; descIdx < descCnt; descIdx++) {
 			MVKDescriptor* mvkDesc = _descriptors[srcStartIdx + descIdx];
 			if (mvkDesc->getDescriptorType() == descType) {
-				mvkDesc->read(this, descIdx, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
+				mvkDesc->read(descIdx, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
 			}
         }
     }
 }
 
-MVKDescriptorSet::MVKDescriptorSet(MVKDescriptorSetLayout* layout,
-								   uint32_t variableDescriptorCount,
-								   MVKDescriptorPool* pool) :
-	MVKVulkanAPIDeviceObject(pool->_device),
-	_layout(layout),
-	_variableDescriptorCount(variableDescriptorCount),
-	_pool(pool) {
+const MVKMTLBufferAllocation* MVKDescriptorSet::acquireMTLBufferRegion(NSUInteger length) {
+	return _pool->_inlineBlockMTLBufferAllocator.acquireMTLBufferRegion(length);
+}
+
+VkResult MVKDescriptorSet::allocate(MVKDescriptorSetLayout* layout,
+									uint32_t variableDescriptorCount,
+									NSUInteger mtlArgumentBufferOffset) {
+	_layout = layout;
+	_variableDescriptorCount = variableDescriptorCount;
+
+	// If the Metal argument buffer offset has not been set yet, set it now.
+	if ( !_mtlArgumentBufferOffset ) { _mtlArgumentBufferOffset = mtlArgumentBufferOffset; }
 
 	_descriptors.reserve(layout->getDescriptorCount());
 	uint32_t bindCnt = (uint32_t)layout->_bindings.size();
@@ -310,10 +399,24 @@
 		}
 		if ( !wasConfigurationSuccessful() ) { break; }
 	}
+	return getConfigurationResult();
 }
 
-MVKDescriptorSet::~MVKDescriptorSet() {
+void MVKDescriptorSet::free(bool isPoolReset) {
+	_layout = nullptr;
+	_variableDescriptorCount = 0;
+
+	// Only reset the Metal arg buffer offset if the entire pool is being reset
+	if (isPoolReset) { _mtlArgumentBufferOffset = 0; }
+
 	for (auto mvkDesc : _descriptors) { _pool->freeDescriptor(mvkDesc); }
+	_descriptors.clear();
+
+	clearConfigurationResult();
+}
+
+MVKDescriptorSet::MVKDescriptorSet(MVKDescriptorPool* pool) : MVKVulkanAPIDeviceObject(pool->_device), _pool(pool) {
+	free(true);
 }
 
 
@@ -321,26 +424,31 @@
 #pragma mark MVKDescriptorTypePreallocation
 
 #ifndef MVK_CONFIG_PREALLOCATE_DESCRIPTORS
-#   define MVK_CONFIG_PREALLOCATE_DESCRIPTORS    0
+#   define MVK_CONFIG_PREALLOCATE_DESCRIPTORS    1
 #endif
 
-static bool _mvkPreallocateDescriptors = MVK_CONFIG_PREALLOCATE_DESCRIPTORS;
-static bool _mvkPreallocateDescriptorsInitialized = false;
-
 // Returns whether descriptors should be preallocated in the descriptor pools
 // We do this once lazily instead of in a library constructor function to
 // ensure the NSProcessInfo environment is available when called upon.
-static inline bool getMVKPreallocateDescriptors() {
-	if ( !_mvkPreallocateDescriptorsInitialized ) {
-		_mvkPreallocateDescriptorsInitialized = true;
-		MVK_SET_FROM_ENV_OR_BUILD_BOOL(_mvkPreallocateDescriptors, MVK_CONFIG_PREALLOCATE_DESCRIPTORS);
+static inline bool mvkShouldPreallocateDescriptors() {
+	static bool _mvkShouldPreallocateDescriptors = MVK_CONFIG_PREALLOCATE_DESCRIPTORS;
+	static bool _mvkShouldPreallocateDescriptorsInitialized = false;
+	if ( !_mvkShouldPreallocateDescriptorsInitialized ) {
+		_mvkShouldPreallocateDescriptorsInitialized = true;
+		MVK_SET_FROM_ENV_OR_BUILD_BOOL(_mvkShouldPreallocateDescriptors, MVK_CONFIG_PREALLOCATE_DESCRIPTORS);
 	}
-	return _mvkPreallocateDescriptors;
+	return _mvkShouldPreallocateDescriptors;
 }
 
 template<class DescriptorClass>
 VkResult MVKDescriptorTypePreallocation<DescriptorClass>::allocateDescriptor(MVKDescriptor** pMVKDesc) {
 
+	// If we don't preallocate, create and return an instance on the fly.
+	if ( !mvkShouldPreallocateDescriptors() ) {
+		*pMVKDesc = new DescriptorClass();
+		return VK_SUCCESS;
+	}
+
 	uint32_t descCnt = (uint32_t)_descriptors.size();
 
 	// Preallocated descriptors that CANNOT be freed.
@@ -389,6 +497,12 @@
 template<typename DescriptorClass>
 void MVKDescriptorTypePreallocation<DescriptorClass>::freeDescriptor(MVKDescriptor* mvkDesc) {
 
+	// If we don't preallocate, create and return an instance on the fly.
+	if ( !mvkShouldPreallocateDescriptors() ) {
+		mvkDesc->destroy();
+		return;
+	}
+
 	mvkDesc->reset();
 
 	if (_supportAvailability) {
@@ -411,29 +525,155 @@
 template<typename DescriptorClass>
 MVKDescriptorTypePreallocation<DescriptorClass>::MVKDescriptorTypePreallocation(const VkDescriptorPoolCreateInfo* pCreateInfo,
 																				VkDescriptorType descriptorType) {
-	// There may be more than  one poolSizeCount instance for the desired VkDescriptorType.
-	// Accumulate the descriptor count for the desired VkDescriptorType, and size the collections accordingly.
-	uint32_t descriptorCount = 0;
-	uint32_t poolCnt = pCreateInfo->poolSizeCount;
-	for (uint32_t poolIdx = 0; poolIdx < poolCnt; poolIdx++) {
-		auto& poolSize = pCreateInfo->pPoolSizes[poolIdx];
-		if (poolSize.type == descriptorType) { descriptorCount += poolSize.descriptorCount; }
-	}
-	_descriptors.resize(descriptorCount);
-
 	// Determine whether we need to track the availability of previously freed descriptors.
 	_supportAvailability = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT);
-	if (_supportAvailability) { _availability.resize(descriptorCount, true); }
 	_nextAvailableIndex = 0;
+
+	if (mvkShouldPreallocateDescriptors()) {
+		// There may be more than  one poolSizeCount instance for the desired VkDescriptorType.
+		// Accumulate the descriptor count for the desired VkDescriptorType, and size the collections accordingly.
+		uint32_t descriptorCount = 0;
+		uint32_t poolCnt = pCreateInfo->poolSizeCount;
+		for (uint32_t poolIdx = 0; poolIdx < poolCnt; poolIdx++) {
+			auto& poolSize = pCreateInfo->pPoolSizes[poolIdx];
+			if (poolSize.type == descriptorType) { descriptorCount += poolSize.descriptorCount; }
+		}
+
+		_descriptors.resize(descriptorCount);
+		if (_supportAvailability) { _availability.resize(descriptorCount, true); }
+	}
 }
 
 
 #pragma mark -
-#pragma mark MVKPreallocatedDescriptors
+#pragma mark MVKDescriptorPool
+
+VkResult MVKDescriptorPool::allocateDescriptorSets(const VkDescriptorSetAllocateInfo* pAllocateInfo,
+												   VkDescriptorSet* pDescriptorSets) {
+	VkResult rslt = VK_SUCCESS;
+	const auto* pVarDescCounts = getVariableDecriptorCounts(pAllocateInfo);
+	for (uint32_t dsIdx = 0; dsIdx < pAllocateInfo->descriptorSetCount; dsIdx++) {
+		MVKDescriptorSetLayout* mvkDSL = (MVKDescriptorSetLayout*)pAllocateInfo->pSetLayouts[dsIdx];
+		if ( !mvkDSL->isPushDescriptorLayout() ) {
+			rslt = allocateDescriptorSet(mvkDSL, (pVarDescCounts ? pVarDescCounts[dsIdx] : 0), &pDescriptorSets[dsIdx]);
+			if (rslt) { break; }
+		}
+	}
+	return rslt;
+}
+
+// Find and return an array of variable descriptor counts from the pNext chain of pCreateInfo,
+// or return nullptr if the chain does not include variable descriptor counts.
+const uint32_t* MVKDescriptorPool::getVariableDecriptorCounts(const VkDescriptorSetAllocateInfo* pAllocateInfo) {
+	for (const auto* next = (VkBaseInStructure*)pAllocateInfo->pNext; next; next = next->pNext) {
+		switch (next->sType) {
+			case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT: {
+				auto* pVarDescSetVarCounts = (VkDescriptorSetVariableDescriptorCountAllocateInfoEXT*)next;
+				return pVarDescSetVarCounts->descriptorSetCount ? pVarDescSetVarCounts->pDescriptorCounts : nullptr;
+			}
+			default:
+				break;
+		}
+	}
+	return nullptr;
+}
+
+// Ensure descriptor set was actually allocated, then return to pool
+VkResult MVKDescriptorPool::freeDescriptorSets(uint32_t count, const VkDescriptorSet* pDescriptorSets) {
+	for (uint32_t dsIdx = 0; dsIdx < count; dsIdx++) {
+		freeDescriptorSet((MVKDescriptorSet*)pDescriptorSets[dsIdx], false);
+	}
+	return VK_SUCCESS;
+}
+
+// Free all descriptor sets.
+VkResult MVKDescriptorPool::reset(VkDescriptorPoolResetFlags flags) {
+	for (auto& mvkDS : _descriptorSets) { freeDescriptorSet(&mvkDS, true); }
+
+	_uniformBufferDescriptors.reset();
+	_storageBufferDescriptors.reset();
+	_uniformBufferDynamicDescriptors.reset();
+	_storageBufferDynamicDescriptors.reset();
+	_inlineUniformBlockDescriptors.reset();
+	_sampledImageDescriptors.reset();
+	_storageImageDescriptors.reset();
+	_inputAttachmentDescriptors.reset();
+	_samplerDescriptors.reset();
+	_combinedImageSamplerDescriptors.reset();
+	_uniformTexelBufferDescriptors.reset();
+	_storageTexelBufferDescriptors.reset();
+
+	_nextMTLArgumentBufferOffset = 0;
+
+	return VK_SUCCESS;
+}
+
+// Retieves the first available descriptor set, and configures it.
+// If none are available, returns an error.
+VkResult MVKDescriptorPool::allocateDescriptorSet(MVKDescriptorSetLayout* mvkDSL,
+												  uint32_t variableDescriptorCount,
+												  VkDescriptorSet* pVKDS) {
+	NSUInteger mtlArgBuffAllocSize = mvkDSL->getArgumentBufferSize();
+	size_t dsCnt = _descriptorSets.size();
+	size_t dsIdx = 0;
+	while (true) {
+		dsIdx = _descriptorSetAvailablility.getIndexOfFirstSetBit(dsIdx, true);
+		if (dsIdx >= dsCnt) { return VK_ERROR_OUT_OF_POOL_MEMORY; }
+
+		bool isSpaceAvail = true;		// If not using Metal arg buffers, space will always be available.
+		MVKDescriptorSet* mvkDS = &_descriptorSets[dsIdx];
+		NSUInteger mtlArgBuffOffset = mvkDS->getMetalArgumentBufferOffset();
+
+		// If the desc set is using a Metal argument buffer, we also need to see if the desc set
+		// will fit in the slot that might already have been allocated for it in the Metal argument
+		// buffer from a previous allocation that was returned. If this pool has been reset recently,
+		// then the desc sets will not have had a Metal argument buffer allocation assigned yet.
+		if (mvkDSL->isUsingMetalArgumentBuffer()) {
+
+			// If the offset has not been set (and it's not the first desc set except
+			// on a reset pool), set the offset and update the next available offset value.
+			if ( !mtlArgBuffOffset && (dsIdx || !_nextMTLArgumentBufferOffset)) {
+				mtlArgBuffOffset = _nextMTLArgumentBufferOffset;
+				_nextMTLArgumentBufferOffset += mtlArgBuffAllocSize;
+			}
+
+			// Get the offset of the next desc set, if one exists and
+			// its offset has been set, or the end of the arg buffer.
+			size_t nextDSIdx = dsIdx + 1;
+			NSUInteger nextOffset = (nextDSIdx < dsCnt ? _descriptorSets[nextDSIdx].getMetalArgumentBufferOffset() : 0);
+			if ( !nextOffset ) { nextOffset = _mtlArgumentBuffer.length; }
+
+			isSpaceAvail = (mtlArgBuffOffset + mtlArgBuffAllocSize) <= nextOffset;
+		}
+
+		if (isSpaceAvail) {
+			mvkDS->allocate(mvkDSL, variableDescriptorCount, mtlArgBuffOffset);
+			if (mvkDS->wasConfigurationSuccessful()) {
+				*pVKDS = (VkDescriptorSet)mvkDS;
+			} else {
+				freeDescriptorSet(mvkDS, false);
+			}
+			return mvkDS->getConfigurationResult();
+		}
+		dsIdx++;	// Skip to next desc set and resume looking
+	}
+}
+
+// Descriptor sets are held in contiguous memory, so the index of the returning descriptor
+// set can be calculated by pointer differences, and it can be marked as available.
+void MVKDescriptorPool::freeDescriptorSet(MVKDescriptorSet* mvkDS, bool isPoolReset) {
+	if ( !mvkDS ) { return; }	// Vulkan allows NULL refs.
+
+	if (mvkDS->_pool != this) { reportError(VK_ERROR_INITIALIZATION_FAILED, "A descriptor set is being returned to a descriptor pool that did not allocate it."); }
+
+	mvkDS->free(isPoolReset);
+	size_t dsIdx = mvkDS - _descriptorSets.data();
+	_descriptorSetAvailablility.setBit(dsIdx);
+}
 
 // Allocate a descriptor of the specified type
-VkResult MVKPreallocatedDescriptors::allocateDescriptor(VkDescriptorType descriptorType,
-														MVKDescriptor** pMVKDesc) {
+VkResult MVKDescriptorPool::allocateDescriptor(VkDescriptorType descriptorType,
+											   MVKDescriptor** pMVKDesc) {
 	switch (descriptorType) {
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
 			return _uniformBufferDescriptors.allocateDescriptor(pMVKDesc);
@@ -476,7 +716,7 @@
 	}
 }
 
-void MVKPreallocatedDescriptors::freeDescriptor(MVKDescriptor* mvkDesc) {
+void MVKDescriptorPool::freeDescriptor(MVKDescriptor* mvkDesc) {
 	VkDescriptorType descriptorType = mvkDesc->getDescriptorType();
 	switch (descriptorType) {
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
@@ -520,22 +760,10 @@
 	}
 }
 
-void MVKPreallocatedDescriptors::reset() {
-	_uniformBufferDescriptors.reset();
-	_storageBufferDescriptors.reset();
-	_uniformBufferDynamicDescriptors.reset();
-	_storageBufferDynamicDescriptors.reset();
-	_inlineUniformBlockDescriptors.reset();
-	_sampledImageDescriptors.reset();
-	_storageImageDescriptors.reset();
-	_inputAttachmentDescriptors.reset();
-	_samplerDescriptors.reset();
-	_combinedImageSamplerDescriptors.reset();
-	_uniformTexelBufferDescriptors.reset();
-	_storageTexelBufferDescriptors.reset();
-}
-
-MVKPreallocatedDescriptors::MVKPreallocatedDescriptors(const VkDescriptorPoolCreateInfo* pCreateInfo) :
+MVKDescriptorPool::MVKDescriptorPool(MVKDevice* device, const VkDescriptorPoolCreateInfo* pCreateInfo) :
+	MVKVulkanAPIDeviceObject(device),
+	_descriptorSets(pCreateInfo->maxSets, MVKDescriptorSet(this)),
+	_descriptorSetAvailablility(pCreateInfo->maxSets, true),
 	_uniformBufferDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER),
 	_storageBufferDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER),
 	_uniformBufferDynamicDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC),
@@ -547,172 +775,84 @@
 	_samplerDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_SAMPLER),
 	_combinedImageSamplerDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER),
 	_uniformTexelBufferDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER),
-	_storageTexelBufferDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
-}
+	_storageTexelBufferDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER),
+	_inlineBlockMTLBufferAllocator(device, getMaxInlineBlockSize(device, pCreateInfo), true) {
 
+	_mtlArgumentBuffer = nil;
+	_nextMTLArgumentBufferOffset = 0;
+	if (supportsMetalArgumentBuffers()) {
+		NSUInteger mtlArgBuffSize = 0;
+		uint32_t poolCnt = pCreateInfo->poolSizeCount;
+		for (uint32_t poolIdx = 0; poolIdx < poolCnt; poolIdx++) {
+			auto& poolSize = pCreateInfo->pPoolSizes[poolIdx];
+			mtlArgBuffSize += getDescriptorByteCountForMetalArgumentBuffer(poolSize.type) * poolSize.descriptorCount;
+		}
+		mtlArgBuffSize += pCreateInfo->maxSets * _device->_pMetalFeatures->mtlBufferAlignment;	// Leave room for each desc set to be aligned
 
-#pragma mark -
-#pragma mark MVKDescriptorPool
+		// Each shader stage uses it's own arg buffer layout. As a result, we need to significantly
+		// overallocate space here, since we don't yet know how the descriptor set layouts will make
+		// use of the descriptors across each pipeline stage. Ideally, the same MVKMTLArgumentEncoder
+		// should be used across all pipeline stages, but that doesn't seem to be possible with present
+		// combination of SPIRV-Cross and Metal behaviour.
+		mtlArgBuffSize *= kMVKShaderStageCount;
 
-VkResult MVKDescriptorPool::allocateDescriptorSets(const VkDescriptorSetAllocateInfo* pAllocateInfo,
-												   VkDescriptorSet* pDescriptorSets) {
-	if (_allocatedSets.size() + pAllocateInfo->descriptorSetCount > _maxSets) {
-		if (_device->_enabledExtensions.vk_KHR_maintenance1.enabled ||
-			_device->getInstance()->getAPIVersion() >= VK_API_VERSION_1_1) {
-			return VK_ERROR_OUT_OF_POOL_MEMORY;		// Failure is an acceptable test...don't log as error.
-		} else {
-			return reportError(VK_ERROR_INITIALIZATION_FAILED, "The maximum number of descriptor sets that can be allocated by this descriptor pool is %d.", _maxSets);
+		if (mtlArgBuffSize) {
+			_mtlArgumentBuffer = [getMTLDevice() newBufferWithLength: mtlArgBuffSize options: MTLResourceStorageModeShared];	// retained
+			_mtlArgumentBuffer.label = @"Argument buffer";
 		}
 	}
-
-	VkResult rslt = VK_SUCCESS;
-	const auto* pVarDescCounts = getVariableDecriptorCounts(pAllocateInfo);
-	for (uint32_t dsIdx = 0; dsIdx < pAllocateInfo->descriptorSetCount; dsIdx++) {
-		MVKDescriptorSetLayout* mvkDSL = (MVKDescriptorSetLayout*)pAllocateInfo->pSetLayouts[dsIdx];
-		if ( !mvkDSL->isPushDescriptorLayout() ) {
-			rslt = allocateDescriptorSet(mvkDSL, (pVarDescCounts ? pVarDescCounts[dsIdx] : 0), &pDescriptorSets[dsIdx]);
-			if (rslt) { break; }
-		}
-	}
-	return rslt;
 }
 
-// Find and return an array of variable descriptor counts from the pNext chain of pCreateInfo,
-// or return nullptr if the chain does not include variable descriptor counts.
-const uint32_t* MVKDescriptorPool::getVariableDecriptorCounts(const VkDescriptorSetAllocateInfo* pAllocateInfo) {
-	for (const auto* next = (VkBaseInStructure*)pAllocateInfo->pNext; next; next = next->pNext) {
-		switch (next->sType) {
-			case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT: {
-				auto* pVarDescSetVarCounts = (VkDescriptorSetVariableDescriptorCountAllocateInfoEXT*)next;
-				return pVarDescSetVarCounts->descriptorSetCount ? pVarDescSetVarCounts->pDescriptorCounts : nullptr;
-			}
-			default:
-				break;
-		}
-	}
-	return nullptr;
-}
-
-// Ensure descriptor set was actually allocated, then return to pool
-VkResult MVKDescriptorPool::freeDescriptorSets(uint32_t count, const VkDescriptorSet* pDescriptorSets) {
-	for (uint32_t dsIdx = 0; dsIdx < count; dsIdx++) {
-		MVKDescriptorSet* mvkDS = (MVKDescriptorSet*)pDescriptorSets[dsIdx];
-		if (_allocatedSets.erase(mvkDS)) {
-			freeDescriptorSet(mvkDS);
-		}
-	}
-	return VK_SUCCESS;
-}
-
-// Destroy all allocated descriptor sets
-VkResult MVKDescriptorPool::reset(VkDescriptorPoolResetFlags flags) {
-	for (auto& mvkDS : _allocatedSets) { freeDescriptorSet(mvkDS); }
-	_allocatedSets.clear();
-	if (_preallocatedDescriptors) { _preallocatedDescriptors->reset(); }
-	return VK_SUCCESS;
-}
-
-VkResult MVKDescriptorPool::allocateDescriptorSet(MVKDescriptorSetLayout* mvkDSL,
-												  uint32_t variableDescriptorCount,
-												  VkDescriptorSet* pVKDS) {
-	MVKDescriptorSet* mvkDS = new MVKDescriptorSet(mvkDSL, variableDescriptorCount, this);
-	VkResult rslt = mvkDS->getConfigurationResult();
-
-	if (mvkDS->wasConfigurationSuccessful()) {
-		_allocatedSets.insert(mvkDS);
-		*pVKDS = (VkDescriptorSet)mvkDS;
-	} else {
-		freeDescriptorSet(mvkDS);
-	}
-	return rslt;
-}
-
-void MVKDescriptorPool::freeDescriptorSet(MVKDescriptorSet* mvkDS) { mvkDS->destroy(); }
-
-// Allocate a descriptor of the specified type
-VkResult MVKDescriptorPool::allocateDescriptor(VkDescriptorType descriptorType,
-											   MVKDescriptor** pMVKDesc) {
-
-	// If descriptors are preallocated allocate from the preallocated pools
-	if (_preallocatedDescriptors) {
-		return _preallocatedDescriptors->allocateDescriptor(descriptorType, pMVKDesc);
-	}
-
-	// Otherwise instantiate one of the appropriate type now
+NSUInteger MVKDescriptorPool::getDescriptorByteCountForMetalArgumentBuffer(VkDescriptorType descriptorType) {
 	switch (descriptorType) {
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
-			*pMVKDesc = new MVKUniformBufferDescriptor();
-			break;
-
-		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-			*pMVKDesc = new MVKStorageBufferDescriptor();
-			break;
-
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
-			*pMVKDesc = new MVKUniformBufferDynamicDescriptor();
-			break;
-
+		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
 		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-			*pMVKDesc = new MVKStorageBufferDynamicDescriptor();
-			break;
+			return sizeof(id<MTLBuffer>);
 
 		case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
-			*pMVKDesc = new MVKInlineUniformBlockDescriptor();
-			break;
+			return 1;
 
 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-			*pMVKDesc = new MVKSampledImageDescriptor();
-			break;
+		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+			return sizeof(id<MTLTexture>);
 
 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
-			*pMVKDesc = new MVKStorageImageDescriptor();
-			break;
-
-		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-			*pMVKDesc = new MVKInputAttachmentDescriptor();
-			break;
+		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+			return sizeof(id<MTLTexture>) + sizeof(id<MTLBuffer>);
 
 		case VK_DESCRIPTOR_TYPE_SAMPLER:
-			*pMVKDesc = new MVKSamplerDescriptor();
-			break;
+			return sizeof(id<MTLSamplerState>);
 
 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-			*pMVKDesc = new MVKCombinedImageSamplerDescriptor();
-			break;
-
-		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
-			*pMVKDesc = new MVKUniformTexelBufferDescriptor();
-			break;
-
-		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
-			*pMVKDesc = new MVKStorageTexelBufferDescriptor();
-			break;
+			return sizeof(id<MTLTexture>) + sizeof(id<MTLSamplerState>);
 
 		default:
-			return reportError(VK_ERROR_INITIALIZATION_FAILED, "Unrecognized VkDescriptorType %d.", descriptorType);
-	}
-	return VK_SUCCESS;
-}
-
-// Free a descriptor, either through the preallocated pool, or directly destroy it
-void MVKDescriptorPool::freeDescriptor(MVKDescriptor* mvkDesc) {
-	if (_preallocatedDescriptors) {
-		_preallocatedDescriptors->freeDescriptor(mvkDesc);
-	} else {
-		mvkDesc->destroy();
+			return 0;
 	}
 }
 
-MVKDescriptorPool::MVKDescriptorPool(MVKDevice* device,
-									 const VkDescriptorPoolCreateInfo* pCreateInfo) : MVKVulkanAPIDeviceObject(device) {
-	_maxSets = pCreateInfo->maxSets;
-	_preallocatedDescriptors = getMVKPreallocateDescriptors() ? new MVKPreallocatedDescriptors(pCreateInfo) : nullptr;
+NSUInteger MVKDescriptorPool::getMaxInlineBlockSize(MVKDevice* device, const VkDescriptorPoolCreateInfo* pCreateInfo) {
+	if ( !device->_pMetalFeatures->argumentBuffers || MVKInlineUniformBlockDescriptor::shouldEmbedInlineBlocksInMetalAgumentBuffer()) { return 0; }
+
+	NSUInteger maxInlineBlockSize = 0;
+	uint32_t poolCnt = pCreateInfo->poolSizeCount;
+	for (uint32_t poolIdx = 0; poolIdx < poolCnt; poolIdx++) {
+		auto& poolSize = pCreateInfo->pPoolSizes[poolIdx];
+		if (poolSize.type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+			NSUInteger iubSize = getDescriptorByteCountForMetalArgumentBuffer(poolSize.type) * poolSize.descriptorCount;
+			maxInlineBlockSize = std::max(iubSize, maxInlineBlockSize);
+		}
+	}
+	return std::min<NSUInteger>(maxInlineBlockSize, device->_pMetalFeatures->maxMTLBufferSize);
 }
 
 // Destroy all allocated descriptor sets and preallocated descriptors
 MVKDescriptorPool::~MVKDescriptorPool() {
 	reset(0);
-	if (_preallocatedDescriptors) { _preallocatedDescriptors->destroy(); }
+	[_mtlArgumentBuffer release];
 }
 
 
@@ -819,26 +959,3 @@
 		dstSet->write(pEntry, pEntry->stride, pCurData);
 	}
 }
-
-void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
-									   MVKShaderStageResourceBinding& ssRB,
-									   spv::ExecutionModel stage,
-									   uint32_t descriptorSetIndex,
-									   uint32_t bindingIndex,
-									   uint32_t count,
-									   MVKSampler* immutableSampler) {
-	mvk::MSLResourceBinding rb;
-
-	auto& rbb = rb.resourceBinding;
-	rbb.stage = stage;
-	rbb.desc_set = descriptorSetIndex;
-	rbb.binding = bindingIndex;
-	rbb.count = count;
-	rbb.msl_buffer = ssRB.bufferIndex;
-	rbb.msl_texture = ssRB.textureIndex;
-	rbb.msl_sampler = ssRB.samplerIndex;
-
-	if (immutableSampler) { immutableSampler->getConstexprSampler(rb); }
-
-	context.resourceBindings.push_back(rb);
-}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 484db4a..624918a 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -819,6 +819,9 @@
 	/** Returns info about the pixel format supported by the physical device. */
 	inline MVKPixelFormats* getPixelFormats() { return _device->getPixelFormats(); }
 
+	/** Returns whether the device supports using Metal argument buffers. */
+	inline bool supportsMetalArgumentBuffers() const  { return _device->_pMetalFeatures->argumentBuffers; };
+
 	/** Constructs an instance for the specified device. */
     MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); }
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 6545444..37a1cb9 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -377,6 +377,7 @@
                 break;
             }
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
+				bool isTier2 = _metalFeatures.argumentBuffers && _mtlDevice.argumentBuffersSupport == MTLArgumentBuffersTier2;
 				auto* pDescIdxProps = (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)next;
 				pDescIdxProps->maxUpdateAfterBindDescriptorsInAllPools				= kMVKUndefinedLargeUInt32;
 				pDescIdxProps->shaderUniformBufferArrayNonUniformIndexingNative		= false;
@@ -386,20 +387,20 @@
 				pDescIdxProps->shaderInputAttachmentArrayNonUniformIndexingNative	= _metalFeatures.arrayOfTextures;
 				pDescIdxProps->robustBufferAccessUpdateAfterBind					= _features.robustBufferAccess;
 				pDescIdxProps->quadDivergentImplicitLod								= false;
-				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSamplers			= _properties.limits.maxPerStageDescriptorSamplers;
-				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindUniformBuffers	= _properties.limits.maxPerStageDescriptorUniformBuffers;
-				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageBuffers	= _properties.limits.maxPerStageDescriptorStorageBuffers;
-				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSampledImages	= _properties.limits.maxPerStageDescriptorSampledImages;
-				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageImages	= _properties.limits.maxPerStageDescriptorStorageImages;
+				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSamplers			= isTier2 ? 2048 : _properties.limits.maxPerStageDescriptorSamplers;
+				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindUniformBuffers	= isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorUniformBuffers;
+				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageBuffers	= isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageBuffers;
+				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSampledImages	= isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorSampledImages;
+				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageImages	= isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageImages;
 				pDescIdxProps->maxPerStageDescriptorUpdateAfterBindInputAttachments	= _properties.limits.maxPerStageDescriptorInputAttachments;
-				pDescIdxProps->maxPerStageUpdateAfterBindResources					= _properties.limits.maxPerStageResources;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindSamplers				= _properties.limits.maxDescriptorSetSamplers;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffers		= _properties.limits.maxDescriptorSetUniformBuffers;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic	= _properties.limits.maxDescriptorSetUniformBuffersDynamic;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffers		= _properties.limits.maxDescriptorSetStorageBuffers;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic	= _properties.limits.maxDescriptorSetStorageBuffersDynamic;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindSampledImages			= _properties.limits.maxDescriptorSetSampledImages;
-				pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageImages			= _properties.limits.maxDescriptorSetStorageImages;
+				pDescIdxProps->maxPerStageUpdateAfterBindResources					= isTier2 ? 500000 : _properties.limits.maxPerStageResources;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindSamplers				= isTier2 ? 2048 : _properties.limits.maxDescriptorSetSamplers;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffers		= isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffers;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic	= isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffersDynamic;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffers		= isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffers;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic	= isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffersDynamic;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindSampledImages			= isTier2 ? 500000 : _properties.limits.maxDescriptorSetSampledImages;
+				pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageImages			= isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageImages;
 				pDescIdxProps->maxDescriptorSetUpdateAfterBindInputAttachments		= _properties.limits.maxDescriptorSetInputAttachments;
 				break;
 			}
@@ -456,54 +457,51 @@
 // Populates the device ID properties structure
 void MVKPhysicalDevice::populate(VkPhysicalDeviceIDProperties* pDevIdProps) {
 
-	uint8_t* uuid;
 	size_t uuidComponentOffset;
 
 	//  ---- Device ID ----------------------------------------------
-	uuid = pDevIdProps->deviceUUID;
 	uuidComponentOffset = 0;
-	mvkClear(uuid, VK_UUID_SIZE);
+	mvkClear(&pDevIdProps->deviceUUID);
 
 	// First 4 bytes contains GPU vendor ID
 	uint32_t vendorID = _properties.vendorID;
-	*(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(vendorID);
+	*(uint32_t*)&pDevIdProps->deviceUUID[uuidComponentOffset] = NSSwapHostIntToBig(vendorID);
 	uuidComponentOffset += sizeof(vendorID);
 
 	// Next 4 bytes contains GPU device ID
 	uint32_t deviceID = _properties.deviceID;
-	*(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(deviceID);
+	*(uint32_t*)&pDevIdProps->deviceUUID[uuidComponentOffset] = NSSwapHostIntToBig(deviceID);
 	uuidComponentOffset += sizeof(deviceID);
 
 	// Last 8 bytes contain the GPU registry ID
 	uint64_t regID = mvkGetRegistryID(_mtlDevice);
-	*(uint64_t*)&uuid[uuidComponentOffset] = NSSwapHostLongLongToBig(regID);
+	*(uint64_t*)&pDevIdProps->deviceUUID[uuidComponentOffset] = NSSwapHostLongLongToBig(regID);
 	uuidComponentOffset += sizeof(regID);
 
 
 	// ---- Driver ID ----------------------------------------------
-	uuid = pDevIdProps->driverUUID;
 	uuidComponentOffset = 0;
-	mvkClear(uuid, VK_UUID_SIZE);
+	mvkClear(&pDevIdProps->driverUUID);
 
 	// First 4 bytes contains MoltenVK prefix
 	const char* mvkPfx = "MVK";
 	size_t mvkPfxLen = strlen(mvkPfx);
-	mvkCopy(&uuid[uuidComponentOffset], (uint8_t*)mvkPfx, mvkPfxLen);
+	mvkCopy(&pDevIdProps->driverUUID[uuidComponentOffset], (uint8_t*)mvkPfx, mvkPfxLen);
 	uuidComponentOffset += mvkPfxLen + 1;
 
 	// Next 4 bytes contains MoltenVK version
 	uint32_t mvkVersion = MVK_VERSION;
-	*(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(mvkVersion);
+	*(uint32_t*)&pDevIdProps->driverUUID[uuidComponentOffset] = NSSwapHostIntToBig(mvkVersion);
 	uuidComponentOffset += sizeof(mvkVersion);
 
 	// Next 4 bytes contains highest Metal feature set supported by this device
 	uint32_t mtlFeatSet = getHighestMTLFeatureSet();
-	*(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatSet);
+	*(uint32_t*)&pDevIdProps->driverUUID[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatSet);
 	uuidComponentOffset += sizeof(mtlFeatSet);
 
 
 	// ---- LUID ignored for Metal devices ------------------------
-	mvkClear(pDevIdProps->deviceLUID, VK_LUID_SIZE);
+	mvkClear(&pDevIdProps->deviceLUID);
 	pDevIdProps->deviceNodeMask = 0;
 	pDevIdProps->deviceLUIDValid = VK_FALSE;
 }
@@ -1111,8 +1109,8 @@
 		switch (next->sType) {
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
 				auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next;
-				mvkClear(budgetProps->heapBudget, VK_MAX_MEMORY_HEAPS);
-				mvkClear(budgetProps->heapUsage, VK_MAX_MEMORY_HEAPS);
+				mvkClear(&budgetProps->heapBudget);
+				mvkClear(&budgetProps->heapUsage);
 				budgetProps->heapBudget[0] = (VkDeviceSize)getRecommendedMaxWorkingSetSize();
 				budgetProps->heapUsage[0] = (VkDeviceSize)getCurrentAllocatedSize();
 				if (!getHasUnifiedMemory()) {
@@ -1144,7 +1142,8 @@
 	initExtensions();
 	initMemoryProperties();
 	initExternalMemoryProperties();
-	logGPUInfo();
+	initPipelineCacheUUID();			// Call penultimate
+	logGPUInfo();						// Call last
 }
 
 // Initializes the physical device properties (except limits).
@@ -1155,7 +1154,6 @@
 	_properties.driverVersion = MVK_VERSION;
 
 	initGPUInfoProperties();
-	initPipelineCacheUUID();
 }
 
 // Initializes the Metal-specific physical device features of this instance.
@@ -1214,6 +1212,7 @@
 	if (supportsMTLFeatureSet(tvOS_GPUFamily1_v3)) {
 		_metalFeatures.mslVersionEnum = MTLLanguageVersion2_0;
         _metalFeatures.renderWithoutAttachments = true;
+		MVK_SET_FROM_ENV_OR_BUILD_BOOL(_metalFeatures.argumentBuffers, MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS);
 	}
 
 	if (supportsMTLFeatureSet(tvOS_GPUFamily1_v4)) {
@@ -1287,6 +1286,7 @@
     if (supportsMTLFeatureSet(iOS_GPUFamily1_v4)) {
 		_metalFeatures.mslVersionEnum = MTLLanguageVersion2_0;
         _metalFeatures.renderWithoutAttachments = true;
+		MVK_SET_FROM_ENV_OR_BUILD_BOOL(_metalFeatures.argumentBuffers, MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS);
     }
 
 	if (supportsMTLFeatureSet(iOS_GPUFamily1_v5)) {
@@ -1395,6 +1395,7 @@
 		_metalFeatures.presentModeImmediate = true;
 		_metalFeatures.fences = true;
 		_metalFeatures.nonUniformThreadgroups = true;
+		MVK_SET_FROM_ENV_OR_BUILD_BOOL(_metalFeatures.argumentBuffers, MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS);
     }
 
     if (supportsMTLFeatureSet(macOS_GPUFamily1_v4)) {
@@ -2336,24 +2337,26 @@
 void MVKPhysicalDevice::initPipelineCacheUUID() {
 
 	// Clear the UUID
-	mvkClear(&_properties.pipelineCacheUUID, VK_UUID_SIZE);
+	mvkClear(&_properties.pipelineCacheUUID);
 
 	size_t uuidComponentOffset = 0;
 
-	// First 4 bytes contains MoltenVK version
-	uint32_t mvkVersion = MVK_VERSION;
-	*(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mvkVersion);
-	uuidComponentOffset += sizeof(mvkVersion);
+	// First 8 bytes contain the first part of the MoltenVK Git revision
+	uint64_t mvkRev = getMoltenVKGitRevision();
+	*(uint64_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostLongLongToBig(mvkRev);
+	uuidComponentOffset += sizeof(mvkRev);
 
 	// Next 4 bytes contains highest Metal feature set supported by this device
 	uint32_t mtlFeatSet = getHighestMTLFeatureSet();
 	*(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatSet);
 	uuidComponentOffset += sizeof(mtlFeatSet);
 
-	// Last 8 bytes contain the first part of the MoltenVK Git revision
-	uint64_t mvkRev = getMoltenVKGitRevision();
-	*(uint64_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostLongLongToBig(mvkRev);
-	uuidComponentOffset += sizeof(mvkRev);
+	// Last 4 bytes contains flags based on enabled Metal features that
+	// might affect the contents of the pipeline cache (mostly MSL content).
+	uint32_t mtlFeatures = 0;
+	mtlFeatures |= ((bool)_metalFeatures.argumentBuffers) << 0;
+	*(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatures);
+	uuidComponentOffset += sizeof(mtlFeatures);
 }
 
 uint32_t MVKPhysicalDevice::getHighestMTLFeatureSet() {
@@ -2953,7 +2956,7 @@
 }
 
 VkResult MVKDevice::getDeviceGroupPresentCapabilities(VkDeviceGroupPresentCapabilitiesKHR* pDeviceGroupPresentCapabilities) {
-	mvkClear(pDeviceGroupPresentCapabilities->presentMask, VK_MAX_DEVICE_GROUP_SIZE);
+	mvkClear(&pDeviceGroupPresentCapabilities->presentMask);
 	pDeviceGroupPresentCapabilities->presentMask[0] = 0x1;
 
 	pDeviceGroupPresentCapabilities->modes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR;
@@ -3688,40 +3691,7 @@
 
 	_commandResourceFactory = new MVKCommandResourceFactory(this);
 
-// This code will be refactored in an upcoming release, but for now,
-// suppress deprecation warnings for startCaptureWithDevice: on MacCatalyst.
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wdeprecated-declarations"
-	if (getInstance()->_autoGPUCaptureScope == MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE) {
-		MTLCaptureManager *captureMgr = [MTLCaptureManager sharedCaptureManager];
-		if (!getInstance()->_autoGPUCaptureOutputFile.empty()) {
-			if ( ![captureMgr respondsToSelector: @selector(supportsDestination:)] ||
-				 ![captureMgr supportsDestination: MTLCaptureDestinationGPUTraceDocument] ) {
-				reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Capturing GPU traces to a file requires macOS 10.15 or iOS 13.0. Falling back to Xcode GPU capture.");
-				[captureMgr startCaptureWithDevice: getMTLDevice()];
-			} else {
-				NSError *err = nil;
-				NSString *path, *expandedPath;
-				MTLCaptureDescriptor *captureDesc = [MTLCaptureDescriptor new];
-				captureDesc.captureObject = getMTLDevice();
-				captureDesc.destination = MTLCaptureDestinationGPUTraceDocument;
-				path = [NSString stringWithUTF8String: getInstance()->_autoGPUCaptureOutputFile.c_str()];
-				expandedPath = path.stringByExpandingTildeInPath;
-				captureDesc.outputURL = [NSURL fileURLWithPath: expandedPath];
-				if (![captureMgr startCaptureWithDescriptor: captureDesc error: &err]) {
-					reportError(VK_ERROR_INITIALIZATION_FAILED, "Failed to start GPU capture session to %s (Error code %li): %s", getInstance()->_autoGPUCaptureOutputFile.c_str(), (long)err.code, err.localizedDescription.UTF8String);
-					[err release];
-				}
-				[captureDesc.outputURL release];
-				[captureDesc release];
-				[expandedPath release];
-				[path release];
-			}
-		} else {
-			[captureMgr startCaptureWithDevice: getMTLDevice()];
-		}
-	}
-#pragma clang diagnostic pop
+	getInstance()->startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE, getMTLDevice());
 
 	MVKLogInfo("Created VkDevice to run on GPU %s with the following %d Vulkan extensions enabled:%s",
 			   _pProperties->deviceName,
@@ -4099,9 +4069,7 @@
     [_globalVisibilityResultMTLBuffer release];
 	[_defaultMTLSamplerState release];
 
-	if (getInstance()->_autoGPUCaptureScope == MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE) {
-		[[MTLCaptureManager sharedCaptureManager] stopCapture];
-	}
+	getInstance()->stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE);
 
 	mvkDestroyContainerContents(_privateDataSlots);
 }
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index 16771af..0a2c3ba 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -1930,6 +1930,7 @@
 								 ? mvkClamp(pCreateInfo->maxAnisotropy, 1.0f, _device->_pProperties->limits.maxSamplerAnisotropy)
 								 : 1);
 	mtlSampDesc.normalizedCoordinates = !pCreateInfo->unnormalizedCoordinates;
+	mtlSampDesc.supportArgumentBuffers = supportsMetalArgumentBuffers();
 
 	// If compareEnable is true, but dynamic samplers with depth compare are not available
 	// on this device, this sampler must only be used as an immutable sampler, and will
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h
index 3ff1a95..9bb0175 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h
@@ -158,6 +158,37 @@
 	/** The list of Vulkan extensions, indicating whether each has been enabled by the app. */
 	const MVKExtensionList _enabledExtensions;
 
+	/**
+	 * Checks if automatic GPU capture is enabled for the specified auto capture
+	 * scope, and if so, starts capturing from the specified Metal capture object.
+	 * The capture will be made to either Xcode, or to a file if the
+	 * MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE environment has specified.
+	 *
+	 * The autoGPUCaptureScope parameter must be one of:
+	 *   - MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_NONE
+	 *   - MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE
+	 *   - MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME
+	 *
+	 * The mtlCaptureObject must be one of:
+	 *   - MTLDevice for scope MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE
+	 *   - MTLCommandQueue for scope MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME.
+	 */
+	void startAutoGPUCapture(int32_t autoGPUCaptureScope, id mtlCaptureObject);
+
+	/**
+	 * Checks if automatic GPU capture is enabled for the specified
+	 * auto capture scope, and if so, stops capturing.
+	 *
+	 * The autoGPUCaptureScope parameter must be one of:
+	 *   - MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_NONE
+	 *   - MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE
+	 *   - MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME
+	 */
+	void stopAutoGPUCapture(int32_t autoGPUCaptureScope);
+
+	/** Returns whether this instance is currently automatically capturing a GPU trace. */
+	inline bool isCurrentlyAutoGPUCapturing() { return _isCurrentlyAutoGPUCapturing; }
+
 
 #pragma mark Object Creation
 
@@ -205,6 +236,7 @@
 	bool _useCreationCallbacks;
 	const char* _debugReportCallbackLayerPrefix;
 	int32_t _autoGPUCaptureScope;
+	bool _isCurrentlyAutoGPUCapturing;
 	std::string _autoGPUCaptureOutputFile;
 };
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
index 78efef5..9f1d555 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
@@ -280,6 +280,49 @@
 	}
 }
 
+void MVKInstance::startAutoGPUCapture(int32_t autoGPUCaptureScope, id mtlCaptureObject) {
+
+	if (_isCurrentlyAutoGPUCapturing || (_autoGPUCaptureScope != autoGPUCaptureScope)) { return; }
+
+	_isCurrentlyAutoGPUCapturing = true;
+
+	@autoreleasepool {
+		MTLCaptureManager *captureMgr = [MTLCaptureManager sharedCaptureManager];
+
+		MTLCaptureDescriptor *captureDesc = [[MTLCaptureDescriptor new] autorelease];
+		captureDesc.captureObject = mtlCaptureObject;
+		captureDesc.destination = MTLCaptureDestinationDeveloperTools;
+
+		if ( !_autoGPUCaptureOutputFile.empty() ) {
+			if ([captureMgr respondsToSelector: @selector(supportsDestination:)] &&
+				[captureMgr supportsDestination: MTLCaptureDestinationGPUTraceDocument] ) {
+
+				NSString* filePath = [[NSString stringWithUTF8String: _autoGPUCaptureOutputFile.c_str()] stringByExpandingTildeInPath];
+				MVKLogInfo("Capturing GPU trace to file %s.", filePath.UTF8String);
+
+				captureDesc.destination = MTLCaptureDestinationGPUTraceDocument;
+				captureDesc.outputURL = [NSURL fileURLWithPath: filePath];
+
+			} else {
+				reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Capturing GPU traces to a file requires macOS 10.15 or iOS 13.0. Falling back to Xcode GPU capture.");
+			}
+		} else {
+			MVKLogInfo("Capturing GPU trace to Xcode.");
+		}
+
+		NSError *err = nil;
+		if ( ![captureMgr startCaptureWithDescriptor: captureDesc error: &err] ) {
+			reportError(VK_ERROR_INITIALIZATION_FAILED, "Failed to automatically start GPU capture session (Error code %li): %s", (long)err.code, err.localizedDescription.UTF8String);
+		}
+	}
+}
+
+void MVKInstance::stopAutoGPUCapture(int32_t autoGPUCaptureScope) {
+	if (_isCurrentlyAutoGPUCapturing && _autoGPUCaptureScope == autoGPUCaptureScope) {
+		[[MTLCaptureManager sharedCaptureManager] stopCapture];
+		_isCurrentlyAutoGPUCapturing = false;
+	}
+}
 
 #pragma mark Object Creation
 
@@ -364,6 +407,8 @@
 		setConfigurationResult(reportError(VK_ERROR_INCOMPATIBLE_DRIVER, "Vulkan is not supported on this device. MoltenVK requires Metal, which is not available on this device."));
 	}
 
+	_isCurrentlyAutoGPUCapturing = false;
+
 	if (MVK_MACCAT && !mvkOSVersionIsAtLeast(11.0)) {
 		setConfigurationResult(reportError(VK_ERROR_INCOMPATIBLE_DRIVER, "To support Mac Catalyst, MoltenVK requires macOS 11.0 or above."));
 	}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
index 45161b5..f6c2f93 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
@@ -39,7 +39,7 @@
 #pragma mark MVKPipelineLayout
 
 struct MVKShaderImplicitRezBinding {
-	uint32_t stages[kMVKShaderStageMax];
+	uint32_t stages[kMVKShaderStageCount];
 };
 
 /** Represents a Vulkan pipeline layout. */
@@ -111,7 +111,7 @@
 protected:
 	void propagateDebugName() override {}
 
-	MVKSmallVector<MVKDescriptorSetLayout*, 1> _descriptorSetLayouts;
+	MVKSmallVector<MVKDescriptorSetLayout*, 8> _descriptorSetLayouts;
 	MVKSmallVector<MVKShaderResourceBinding, 1> _dslMTLResourceIndexOffsets;
 	MVKSmallVector<VkPushConstantRange> _pushConstants;
 	MVKShaderResourceBinding _pushConstantsMTLResourceIndexes;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index 7a160fa..4bc3481 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -48,7 +48,8 @@
 		MVKDescriptorSet* descSet = descriptorSets[dsIdx];
 		uint32_t dslIdx = firstSet + dsIdx;
 		MVKDescriptorSetLayout* dsl = _descriptorSetLayouts[dslIdx];
-		dsl->bindDescriptorSet(cmdEncoder, descSet, _dslMTLResourceIndexOffsets[dslIdx],
+		dsl->bindDescriptorSet(cmdEncoder, descSet, dslIdx,
+							   _dslMTLResourceIndexOffsets[dslIdx],
 							   dynamicOffsets, dynamicOffsetIndex);
 		if (!cmdEncoder) { setConfigurationResult(dsl->getConfigurationResult()); }
 	}
@@ -77,6 +78,8 @@
 
 void MVKPipelineLayout::populateShaderConverterContext(SPIRVToMSLConversionConfiguration& context) {
 	context.resourceBindings.clear();
+	context.discreteDescriptorSets.clear();
+	context.inlineUniformBlocks.clear();
 
     // Add resource bindings defined in the descriptor set layouts
 	uint32_t dslCnt = (uint32_t)_descriptorSetLayouts.size();
@@ -94,7 +97,7 @@
 		spv::ExecutionModelFragment,
 		spv::ExecutionModelGLCompute
 	};
-	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
 		mvkPopulateShaderConverterContext(context,
 										  _pushConstantsMTLResourceIndexes.stages[i],
 										  models[i],
@@ -105,40 +108,40 @@
 	}
 }
 
+// Add descriptor set layouts.
+// According to the Vulkan spec, VkDescriptorSetLayout is intended to be consumed when passed
+// to any Vulkan function, and may be safely destroyed by app immediately after. In order for
+// this pipeline layout to retain the VkDescriptorSetLayout, the MVKDescriptorSetLayout
+// instance is retained, so that it will live on here after it has been destroyed by the API.
+
+// If we're not using Metal argument buffers, accumulate the resource index offsets used
+// by the corresponding DSL, and associating the current accumulated resource index offsets
+// with each DSL as it is added. If we're using Metal argument buffers, just accumulate the
+// number of Metal argument buffers we need for each stage.
+// The final accumulation of resource index offsets becomes the resource index offsets that
+// will be used for push contants and any additional auxilliary buffers.
 MVKPipelineLayout::MVKPipelineLayout(MVKDevice* device,
                                      const VkPipelineLayoutCreateInfo* pCreateInfo) : MVKVulkanAPIDeviceObject(device) {
-
-    // Add descriptor set layouts, accumulating the resource index offsets used by the
-    // corresponding DSL, and associating the current accumulated resource index offsets
-    // with each DSL as it is added. The final accumulation of resource index offsets
-    // becomes the resource index offsets that will be used for push contants.
-
-    // According to the Vulkan spec, VkDescriptorSetLayout is intended to be consumed when passed
-	// to any Vulkan function, and may be safely destroyed by app immediately after. In order for
-	// this pipeline layout to retain the VkDescriptorSetLayout, the MVKDescriptorSetLayout
-	// instance is retained, so that it will live on here after it has been destroyed by the API.
-
-	_descriptorSetLayouts.reserve(pCreateInfo->setLayoutCount);
-	for (uint32_t i = 0; i < pCreateInfo->setLayoutCount; i++) {
-		MVKDescriptorSetLayout* pDescSetLayout = (MVKDescriptorSetLayout*)pCreateInfo->pSetLayouts[i];
-		pDescSetLayout->retain();
-		_descriptorSetLayouts.push_back(pDescSetLayout);
-		_dslMTLResourceIndexOffsets.push_back(_pushConstantsMTLResourceIndexes);
-		_pushConstantsMTLResourceIndexes += pDescSetLayout->_mtlResourceCounts;
-	}
-
-	// Add push constants
-	_pushConstants.reserve(pCreateInfo->pushConstantRangeCount);
-	for (uint32_t i = 0; i < pCreateInfo->pushConstantRangeCount; i++) {
-		_pushConstants.push_back(pCreateInfo->pPushConstantRanges[i]);
+	uint32_t dslCnt = pCreateInfo->setLayoutCount;
+	_descriptorSetLayouts.resize(dslCnt);
+	_dslMTLResourceIndexOffsets.resize(dslCnt);
+	for (uint32_t dslIdx = 0; dslIdx < dslCnt; dslIdx++) {
+		MVKDescriptorSetLayout* mvkDSL = (MVKDescriptorSetLayout*)pCreateInfo->pSetLayouts[dslIdx];
+		mvkDSL->retain();
+		_descriptorSetLayouts[dslIdx] = mvkDSL;
+		if (mvkDSL->isUsingMetalArgumentBuffer()) {
+			_pushConstantsMTLResourceIndexes.addArgumentBuffer(mvkDSL->_mtlResourceCounts);
+		} else {
+			_dslMTLResourceIndexOffsets[dslIdx] = _pushConstantsMTLResourceIndexes;
+			_pushConstantsMTLResourceIndexes += mvkDSL->_mtlResourceCounts;
+		}
 	}
 
 	// Set implicit buffer indices
 	// FIXME: Many of these are optional. We shouldn't set the ones that aren't
-	// present--or at least, we should move the ones that are down to avoid
-	// running over the limit of available buffers. But we can't know that
-	// until we compile the shaders.
-	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+	// present--or at least, we should move the ones that are down to avoid running over
+	// the limit of available buffers. But we can't know that until we compile the shaders.
+	for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
 		_swizzleBufferIndex.stages[i] = _pushConstantsMTLResourceIndexes.stages[i].bufferIndex + 1;
 		_bufferSizeBufferIndex.stages[i] = _swizzleBufferIndex.stages[i] + 1;
 		_indirectParamsIndex.stages[i] = _bufferSizeBufferIndex.stages[i] + 1;
@@ -148,6 +151,13 @@
 			_tessCtlLevelBufferIndex = _tessCtlPatchOutputBufferIndex + 1;
 		}
 	}
+
+	// Add push constants
+	_pushConstants.reserve(pCreateInfo->pushConstantRangeCount);
+	for (uint32_t i = 0; i < pCreateInfo->pushConstantRangeCount; i++) {
+		_pushConstants.push_back(pCreateInfo->pPushConstantRanges[i]);
+	}
+
 	// Since we currently can't use multiview with tessellation or geometry shaders,
 	// to conserve the number of buffer bindings, use the same bindings for the
 	// view range buffer as for the indirect paramters buffer.
@@ -164,7 +174,7 @@
 
 void MVKPipeline::bindPushConstants(MVKCommandEncoder* cmdEncoder) {
 	if (cmdEncoder) {
-		for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+		for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
 			cmdEncoder->getPushConstants(mvkVkShaderStageFlagBitsFromMVKShaderStage(MVKShaderStage(i)))->setMTLBufferIndex(_pushConstantsMTLResourceIndexes.stages[i].bufferIndex);
 		}
 	}
@@ -367,7 +377,7 @@
 	}
 
 	// Track dynamic state in _dynamicStateEnabled array
-	mvkClear(_dynamicStateEnabled, kMVKVkDynamicStateCount);	// start with all dynamic state disabled
+	mvkClear(&_dynamicStateEnabled);	// start with all dynamic state disabled
 	const VkPipelineDynamicStateCreateInfo* pDS = pCreateInfo->pDynamicState;
 	if (pDS) {
 		for (uint32_t i = 0; i < pDS->dynamicStateCount; i++) {
@@ -1451,6 +1461,8 @@
     shaderContext.options.mslOptions.texel_buffer_texture_width = _device->_pMetalFeatures->maxTextureDimension;
     shaderContext.options.mslOptions.r32ui_linear_texture_alignment = (uint32_t)_device->getVkFormatTexelBufferAlignment(VK_FORMAT_R32_UINT, this);
 	shaderContext.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers;
+	shaderContext.options.mslOptions.argument_buffers = supportsMetalArgumentBuffers();
+	shaderContext.options.mslOptions.force_active_argument_buffer_resources = supportsMetalArgumentBuffers();
 
     MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout;
     layout->populateShaderConverterContext(shaderContext);
@@ -1693,6 +1705,8 @@
 	shaderContext.options.mslOptions.dispatch_base = _allowsDispatchBase;
 	shaderContext.options.mslOptions.texture_1D_as_2D = mvkTreatTexture1DAs2D();
     shaderContext.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize;
+	shaderContext.options.mslOptions.argument_buffers = supportsMetalArgumentBuffers();
+	shaderContext.options.mslOptions.force_active_argument_buffer_resources = supportsMetalArgumentBuffers();
 #if MVK_MACOS
     shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
 #endif
@@ -2102,10 +2116,18 @@
 	}
 
 	template<class Archive>
+	void serialize(Archive & archive, DescriptorBinding& db) {
+		archive(db.descriptorSet,
+				db.binding);
+	}
+
+	template<class Archive>
 	void serialize(Archive & archive, SPIRVToMSLConversionConfiguration& ctx) {
 		archive(ctx.options,
 				ctx.shaderInputs,
-				ctx.resourceBindings);
+				ctx.resourceBindings,
+				ctx.discreteDescriptorSets,
+				ctx.inlineUniformBlocks);
 	}
 
 	template<class Archive>
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm
index 4ca6b4f..9cde2b0 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm
@@ -754,7 +754,7 @@
 
 void MVKPixelFormats::initVkFormatCapabilities() {
 
-	mvkClear(_vkFormatDescriptions, _vkFormatCount);
+	mvkClear(&_vkFormatDescriptions);
 
 	uint32_t fmtIdx = 0;
 
@@ -1066,7 +1066,7 @@
 
 void MVKPixelFormats::initMTLPixelFormatCapabilities() {
 
-	mvkClear(_mtlPixelFormatDescriptions, _mtlPixelFormatCount);
+	mvkClear(&_mtlPixelFormatDescriptions);
 
 	uint32_t fmtIdx = 0;
 
@@ -1250,7 +1250,7 @@
 
 void MVKPixelFormats::initMTLVertexFormatCapabilities() {
 
-	mvkClear(_mtlVertexFormatDescriptions, _mtlVertexFormatCount);
+	mvkClear(&_mtlVertexFormatDescriptions);
 
 	uint32_t fmtIdx = 0;
 
@@ -1331,8 +1331,8 @@
 void MVKPixelFormats::buildMTLFormatMaps() {
 
 	// Set all MTLPixelFormats and MTLVertexFormats to undefined/invalid
-	mvkClear(_mtlFormatDescIndicesByMTLPixelFormatsCore, _mtlPixelFormatCoreCount);
-	mvkClear(_mtlFormatDescIndicesByMTLVertexFormats, _mtlVertexFormatCount);
+	mvkClear(&_mtlFormatDescIndicesByMTLPixelFormatsCore);
+	mvkClear(&_mtlFormatDescIndicesByMTLVertexFormats);
 
 	// Build lookup table for MTLPixelFormat specs.
 	// For most Metal format values, which are small and consecutive, use a simple lookup array.
@@ -1933,7 +1933,7 @@
 void MVKPixelFormats::buildVkFormatMaps() {
 
 	// Set the VkFormats to undefined/invalid
-	mvkClear(_vkFormatDescIndicesByVkFormatsCore, _vkFormatCoreCount);
+	mvkClear(&_vkFormatDescIndicesByVkFormatsCore);
 
 	// Iterate through the VkFormat descriptions, populate the lookup maps and back pointers,
 	// and validate the Metal formats for the platform and OS.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index 5df8401..91b66c0 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -208,7 +208,6 @@
 	id<MTLCommandBuffer> _activeMTLCommandBuffer;
 };
 
-
 /**
  * Submits the commands in a set of command buffers to the queue.
  * Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
@@ -253,6 +252,7 @@
 
 protected:
 	id<MTLCommandBuffer> getMTLCommandBuffer();
+	void stopAutoGPUCapture();
 
 	MVKSmallVector<MVKPresentTimingInfo, 4> _presentInfo;
 };
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index caec05e..0fc8979 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -162,7 +162,7 @@
 	initName();
 	initExecQueue();
 	initMTLCommandQueue();
-	initGPUCaptureScopes();
+	initGPUCaptureScopes();		// After initMTLCommandQueue()
 }
 
 void MVKQueue::initName() {
@@ -203,6 +203,8 @@
 		_submissionCaptureScope->makeDefault();
 	}
 	_submissionCaptureScope->beginScope();	// Allow Xcode to capture the first frame if desired.
+
+	getInstance()->startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME, _mtlQueue);
 }
 
 MVKQueue::~MVKQueue() {
@@ -402,6 +404,7 @@
 	auto cs = _queue->_submissionCaptureScope;
 	cs->endScope();
 	cs->beginScope();
+	stopAutoGPUCapture();
 
 	this->destroy();
 }
@@ -413,6 +416,16 @@
 	return mtlCmdBuff;
 }
 
+
+void MVKQueuePresentSurfaceSubmission::stopAutoGPUCapture() {
+	MVKInstance* mvkInst = _queue->getInstance();
+	const MVKConfiguration* pMVKConfig = mvkInst->getMoltenVKConfiguration();
+	if (_queue->_queueFamily->getIndex() == pMVKConfig->defaultGPUCaptureScopeQueueFamilyIndex &&
+		_queue->_index == pMVKConfig->defaultGPUCaptureScopeQueueIndex) {
+		mvkInst->stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME);
+	}
+}
+
 MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKQueue* queue,
 																   const VkPresentInfoKHR* pPresentInfo)
 	: MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores) {
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
index 0e0eba8..fcf304e 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
@@ -33,7 +33,7 @@
 // Returns a VkExtensionProperties struct populated with a name and version
 static VkExtensionProperties mvkMakeExtProps(const char* extensionName, uint32_t specVersion) {
 	VkExtensionProperties extProps;
-	mvkClear(extProps.extensionName, VK_MAX_EXTENSION_NAME_SIZE);
+	mvkClear(&extProps.extensionName);
 	if (extensionName) { strcpy(extProps.extensionName, extensionName); }
 	extProps.specVersion = specVersion;
 	return extProps;
diff --git a/MoltenVK/MoltenVK/Layers/MVKLayers.mm b/MoltenVK/MoltenVK/Layers/MVKLayers.mm
index aefaef3..3a2bc71 100644
--- a/MoltenVK/MoltenVK/Layers/MVKLayers.mm
+++ b/MoltenVK/MoltenVK/Layers/MVKLayers.mm
@@ -41,9 +41,9 @@
 MVKLayer::MVKLayer() : _supportedInstanceExtensions(nullptr, true) {
 
 	// The core driver layer
-	mvkClear(_layerProperties.layerName, VK_MAX_EXTENSION_NAME_SIZE);
+	mvkClear(&_layerProperties.layerName);
 	strcpy(_layerProperties.layerName, "MoltenVK");
-	mvkClear(_layerProperties.description, VK_MAX_DESCRIPTION_SIZE);
+	mvkClear(&_layerProperties.description);
 	strcpy(_layerProperties.description, "MoltenVK driver layer");
 	_layerProperties.specVersion = MVK_VULKAN_API_VERSION;
 	_layerProperties.implementationVersion = MVK_VERSION;
diff --git a/MoltenVK/MoltenVK/Utility/MVKBitArray.h b/MoltenVK/MoltenVK/Utility/MVKBitArray.h
new file mode 100755
index 0000000..6f2226d
--- /dev/null
+++ b/MoltenVK/MoltenVK/Utility/MVKBitArray.h
@@ -0,0 +1,179 @@
+/*

+ * MVKBitArray.h

+ *

+ * Copyright (c) 2020-2020 The Brenwill Workshop Ltd. (http://www.brenwill.com)

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ * 

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ * 

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+

+#pragma once

+

+#include "MVKFoundation.h"

+

+

+#pragma mark -

+#pragma mark MVKBitArray

+

+/** Represents an array of bits, optimized for storage and fast scanning for bits that are set. */

+class MVKBitArray {

+

+	static constexpr size_t SectionMaskSize = 6;

+	static constexpr size_t SectionSize = 1U << SectionMaskSize;

+

+public:

+

+	/** Returns the value of the bit. */

+	inline bool getBit(size_t bitIndex) {

+		return mvkIsAnyFlagEnabled(_pSections[getIndexOfSection(bitIndex)], getSectionSetMask(bitIndex));

+	}

+

+	/** Sets the value of the bit to 1. */

+	inline void setBit(size_t bitIndex) {

+		size_t secIdx = getIndexOfSection(bitIndex);

+		mvkEnableFlags(_pSections[secIdx], getSectionSetMask(bitIndex));

+

+		if (secIdx < _minUnclearedSectionIndex) { _minUnclearedSectionIndex = secIdx; }

+	}

+

+	/** Sets the value of the bit to 0. */

+	inline void clearBit(size_t bitIndex) {

+		size_t secIdx = getIndexOfSection(bitIndex);

+		mvkDisableFlags(_pSections[secIdx], getSectionSetMask(bitIndex));

+

+		if (secIdx == _minUnclearedSectionIndex && !_pSections[secIdx]) { _minUnclearedSectionIndex++; }

+	}

+

+	/** Sets the value of the bit to the value. */

+	inline void setBit(size_t bitIndex, bool val) {

+		if (val) {

+			setBit(bitIndex);

+		} else {

+			clearBit(bitIndex);

+		}

+	}

+

+	/** Sets all bits in the array to 1. */

+	inline void setAllBits() { setAllSections(~0); }

+

+	/** Clears all bits in the array to 0. */

+	inline void clearAllBits() { setAllSections(0); }

+

+	/**

+	 * Returns the index of the first bit that is set, at or after the specified index,

+	 * and optionally clears that bit. If no bits are set, returns the size() of this bit array.

+	 */

+	size_t getIndexOfFirstSetBit(size_t startIndex, bool shouldClear) {

+		size_t startSecIdx = std::max(getIndexOfSection(startIndex), _minUnclearedSectionIndex);

+		size_t bitIdx = startSecIdx << SectionMaskSize;

+		size_t secCnt = getSectionCount();

+		for (size_t secIdx = startSecIdx; secIdx < secCnt; secIdx++) {

+			size_t lclBitIdx = getIndexOfFirstSetBitInSection(_pSections[secIdx], getBitIndexInSection(startIndex));

+			bitIdx += lclBitIdx;

+			if (lclBitIdx < SectionSize) {

+				if (startSecIdx == _minUnclearedSectionIndex && !_pSections[startSecIdx]) { _minUnclearedSectionIndex = secIdx; }

+				if (shouldClear) { clearBit(bitIdx); }

+				return bitIdx;

+			}

+		}

+		return std::min(bitIdx, _bitCount);

+	}

+

+	/**

+	 * Returns the index of the first bit that is set, at or after the specified index.

+	 * If no bits are set, returns the size() of this bit array.

+	 */

+	inline size_t getIndexOfFirstSetBit(size_t startIndex) {

+		return getIndexOfFirstSetBit(startIndex, false);

+	}

+

+	/**

+	 * Returns the index of the first bit that is set and optionally clears that bit.

+	 * If no bits are set, returns the size() of this bit array.

+	 */

+	inline size_t getIndexOfFirstSetBit(bool shouldClear) {

+		return getIndexOfFirstSetBit(0, shouldClear);

+	}

+

+	/**

+	 * Returns the index of the first bit that is set.

+	 * If no bits are set, returns the size() of this bit array.

+	 */

+	inline size_t getIndexOfFirstSetBit() {

+		return getIndexOfFirstSetBit(0, false);

+	}

+

+	/** Returns the number of bits in this array. */

+	inline size_t size() { return _bitCount; }

+

+	/** Returns whether this array is empty. */

+	inline bool empty() { return !_bitCount; }

+

+	/** Constructs an instance for the specified number of bits, and sets the initial value of all the bits. */

+	MVKBitArray(size_t size, bool val = false) {

+		_bitCount = size;

+		_pSections = _bitCount ? (uint64_t*)malloc(getSectionCount() * SectionSize) : nullptr;

+		if (val) {

+			setAllBits();

+		} else {

+			clearAllBits();

+		}

+	}

+

+	~MVKBitArray() { free(_pSections); }

+

+protected:

+

+	// Returns the number of sections.

+	inline size_t getSectionCount() {

+		return _bitCount ? getIndexOfSection(_bitCount - 1) + 1 : 0;

+	}

+

+	// Returns the index of the section that contains the specified bit.

+	static inline size_t getIndexOfSection(size_t bitIndex) {

+		return bitIndex >> SectionMaskSize;

+	}

+

+	// Converts the bit index to a local bit index within a section, and returns that local bit index.

+	static inline size_t getBitIndexInSection(size_t bitIndex) {

+		return bitIndex & (SectionSize - 1);

+	}

+

+	// Returns a section mask containing a single 1 value in the bit in the section that

+	// corresponds to the specified global bit index, and 0 values in all other bits.

+	static inline uint64_t getSectionSetMask(size_t bitIndex) {

+		return (uint64_t)1U << ((SectionSize - 1) - getBitIndexInSection(bitIndex));

+	}

+

+	// Returns the local index of the first set bit in the section, starting from the highest order bit.

+	// Clears all bits ahead of the start bit so they will be ignored, then counts the number of zeros

+	// ahead of the set bit. If there are no set bits, returns the number of bits in a section.

+	static size_t getIndexOfFirstSetBitInSection(uint64_t section, size_t lclStartBitIndex) {

+		uint64_t lclStartMask = ~(uint64_t)0;

+		lclStartMask >>= lclStartBitIndex;

+		section &= lclStartMask;

+		return section ? __builtin_clzll(section) : SectionSize;

+	}

+

+	// Sets the content of all sections to the value

+	void setAllSections(uint64_t sectionValue) {

+		size_t secCnt = getSectionCount();

+		for (size_t secIdx = 0; secIdx < secCnt; secIdx++) {

+			_pSections[secIdx] = sectionValue;

+		}

+		_minUnclearedSectionIndex = sectionValue ? 0 : secCnt;

+	}

+

+	uint64_t* _pSections;

+	size_t _bitCount;

+	size_t _minUnclearedSectionIndex;	// Tracks where to start looking for bits that are set

+};

diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
index 63cc2eb..008b6fc 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
@@ -167,6 +167,7 @@
  */
 #define MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_NONE		0
 #define MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE	1
+#define MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME		2
 #ifndef MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE
 #   define MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE    	MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_NONE
 #endif
@@ -205,6 +206,11 @@
 #   define MVK_CONFIG_TEXTURE_1D_AS_2D    1
 #endif
 
+/** Support Metal argument buffers. Enabled by default. */
+#ifndef MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS
+#   define MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS    1
+#endif
+
 /**
  * IOSurfaces are supported on macOS, and on iOS starting with iOS 11.
  *
diff --git a/MoltenVK/MoltenVK/Utility/MVKSmallVector.h b/MoltenVK/MoltenVK/Utility/MVKSmallVector.h
index 1d1612b..ff3042d 100755
--- a/MoltenVK/MoltenVK/Utility/MVKSmallVector.h
+++ b/MoltenVK/MoltenVK/Utility/MVKSmallVector.h
@@ -43,11 +43,11 @@
 // If you don't need any inline storage use:

 //  MVKSmallVector<int> v;   // this is essentially the same as using std::vector

 //

-// The per-instance memory overhead of MVKSmallVector (16 bytes) is smaller than MVKVector (40 bytes)

-// and std::vector (24 bytes), but MVKSmallVector lacks the polymorphism of MVKVector (or std::vector),

-// that allows them to be passed around to functions without reference to the pre-allocation size.

-// MVKSmallVector supports the contents() function to derive an MVKArrayRef from its contents,

-// which can be passed around without reference to the pre-allocaton size.

+// The per-instance memory overhead of MVKSmallVector (16 bytes) is smaller than std::vector (24 bytes),

+// and MVKSmallVector lacks the polymorphism of std::vector, that allows them to be passed around to

+// functions without reference to the pre-allocation size. MVKSmallVector supports the contents()

+// function to derive an MVKArrayRef from its contents, which can be passed around without reference

+// to the pre-allocaton size.

 

 #include "MVKSmallVectorAllocator.h"

 #include "MVKFoundation.h"

diff --git a/MoltenVK/MoltenVK/Utility/MVKVector.h b/MoltenVK/MoltenVK/Utility/MVKVector.h
deleted file mode 100755
index 60a4871..0000000
--- a/MoltenVK/MoltenVK/Utility/MVKVector.h
+++ /dev/null
@@ -1,1000 +0,0 @@
-/*

- * MVKVector.h

- *

- * Copyright (c) 2012-2020 Dr. Torsten Hans (hans@ipacs.de)

- *

- * Licensed under the Apache License, Version 2.0 (the "License");

- * you may not use this file except in compliance with the License.

- * You may obtain a copy of the License at

- * 

- *     http://www.apache.org/licenses/LICENSE-2.0

- * 

- * Unless required by applicable law or agreed to in writing, software

- * distributed under the License is distributed on an "AS IS" BASIS,

- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- * See the License for the specific language governing permissions and

- * limitations under the License.

- */

-

-#pragma once

-

-//

-// in case MVKVector should use std::vector

-//

-#if 0

-

-template<typename T, size_t N = 0>

-using MVKVectorInline = std::vector<T>;

-

-template<typename T>

-using MVKVectorDefault = std::vector<T>;

-

-template<typename T>

-using MVKVector = std::vector<T>;

-

-#else

-

-//

-// MVKVector.h is a sequence container that (optionally) implements a small

-// buffer optimization.

-// It behaves similarly to std::vector, except until a certain number of

-// elements are reserved, it does not use the heap.

-// Like std::vector, MVKVector is guaranteed to use contiguous memory, so if the

-// preallocated number of elements are exceeded, all elements are then in heap.

-// MVKVector supports just the necessary members to be compatible with MoltenVK

-// If C++17 will be the default in the future, code can be simplified quite a bit.

-//

-// Example:

-//

-//  MVKVectorInline<int, 3> vector;

-//  vector.emplace_back( 1 );

-//  vector.emplace_back( 2 );

-//  vector.emplace_back( 3 );

-//  // adding another element now reserves memory from heap

-//  vector.emplace_back( 4 );

-//

-// If you don't need any inline storage use

-//  MVKVectorDefault<int> vector;   // this is essentially the same as using std::vector

-//

-// Passing MVKVectorInline to a function would require to use the same template

-// parameters that have been used for declaration. To avoid this MVKVectorInline

-// is derived from MVKVector. If you want to pass MVKVectorInline to a function

-// use MVKVector.

-//

-#include "MVKVectorAllocator.h"

-#include "MVKFoundation.h"

-#include <type_traits>

-#include <initializer_list>

-#include <utility>

-

-

-template<class Type> class MVKVector

-{

-  mvk_vector_allocator_base<Type> *alc_ptr;

-

-public:

-  class iterator : public std::iterator<std::forward_iterator_tag, Type>

-  {

-    const MVKVector *vector;

-    size_t           index;

-

-  public:

-    iterator() = delete;

-    iterator( const size_t _index, const MVKVector &_vector ) : vector{ &_vector }, index{ _index } { }

-    iterator &operator=( const iterator &it ) = delete;

-

-    Type *operator->() const { return &vector->alc_ptr->ptr[index]; }

-    Type &operator*()  const { return  vector->alc_ptr->ptr[index]; }

-    operator Type*( )  const { return &vector->alc_ptr->ptr[index]; }

-

-    bool operator==( const iterator &it ) const { return vector == it.vector && index == it.index; }

-    bool operator!=( const iterator &it ) const { return vector != it.vector || index != it.index; }

-

-    iterator& operator++()      {                 ++index; return *this; }

-    iterator  operator++( int ) { auto t = *this; ++index; return t; }

-

-    bool   is_valid()     const { return index < vector->size(); }

-    size_t get_position() const { return index; }

-  };

-

-public:

-  typedef Type value_type;

-

-  MVKVector() = delete;

-  MVKVector( mvk_vector_allocator_base<Type> *a ) : alc_ptr{ a } { }

-  virtual ~MVKVector() { }

-

-  iterator begin() const { return iterator( 0,               *this ); }

-  iterator end()   const { return iterator( alc_ptr->size(), *this ); }

-

-  const MVKArrayRef<Type> contents() const { return MVKArrayRef<Type>(data(), size()); }

-        MVKArrayRef<Type> contents()       { return MVKArrayRef<Type>(data(), size()); }

-

-  virtual const Type &operator[]( const size_t i ) const                  = 0;

-  virtual       Type &operator[]( const size_t i )                        = 0;

-  virtual const Type &at( const size_t i ) const                          = 0;

-  virtual       Type &at( const size_t i )                                = 0;

-  virtual const Type &front() const                                       = 0;

-  virtual       Type &front()                                             = 0;

-  virtual const Type &back() const                                        = 0;

-  virtual       Type &back()                                              = 0;

-  virtual const Type *data() const                                        = 0;

-  virtual       Type *data()                                              = 0;

-

-  virtual size_t      size()     const                                    = 0;

-  virtual bool        empty()    const                                    = 0;

-  virtual size_t      capacity() const                                    = 0;

-

-  virtual void        pop_back()                                          = 0;

-  virtual void        clear()                                             = 0;

-  virtual void        reset()                                             = 0;

-  virtual void        reserve( const size_t new_size )                    = 0;

-  virtual void        assign( const size_t new_size, const Type &t )      = 0;

-  virtual void        resize( const size_t new_size, const Type t = { } ) = 0;

-  virtual void        shrink_to_fit()                                     = 0;

-  virtual void        push_back( const Type &t )                          = 0;

-  virtual void        push_back( Type &&t )                               = 0;

-};

-

-

-template<class Type> class MVKVector<Type *>

-{

-  mvk_vector_allocator_base<Type*> *alc_ptr;

-

-  class iterator : public std::iterator<std::forward_iterator_tag, Type*>

-  {

-    const MVKVector *vector;

-    size_t           index;

-

-  public:

-    iterator() = delete;

-    iterator( const size_t _index, const MVKVector &_vector ) : vector{ &_vector }, index{ _index } { }

-    iterator &operator=( const iterator &it ) = delete;

-

-    Type *operator->() const { return vector->alc_ptr->ptr[index]; }

-    Type *&operator*()       { return vector->alc_ptr->ptr[index]; }

-    operator Type*&()  const { return &vector->alc_ptr->ptr[index]; }

-

-    bool operator==( const iterator &it ) const { return vector == it.vector && index == it.index; }

-    bool operator!=( const iterator &it ) const { return vector != it.vector || index != it.index; }

-

-    iterator& operator++()      {                 ++index; return *this; }

-    iterator  operator++( int ) { auto t = *this; ++index; return t; }

-

-    bool   is_valid()     const { return index < vector->size(); }

-    size_t get_position() const { return index; }

-  };

-

-public:

-  typedef Type* value_type;

-

-  MVKVector() = delete;

-  MVKVector( mvk_vector_allocator_base<Type*> *a ) : alc_ptr{ a } { }

-  virtual ~MVKVector() { }

-

-  iterator begin() const { return iterator( 0,               *this ); }

-  iterator end()   const { return iterator( alc_ptr->size(), *this ); }

-

-  const MVKArrayRef<Type*> contents() const { return MVKArrayRef<Type*>(data(), size()); }

-        MVKArrayRef<Type*> contents()       { return MVKArrayRef<Type*>(data(), size()); }

-

-  virtual const Type * const  operator[]( const size_t i ) const             = 0;

-  virtual       Type *       &operator[]( const size_t i )                   = 0;

-  virtual const Type * const  at( const size_t i ) const                     = 0;

-  virtual       Type *       &at( const size_t i )                           = 0;

-  virtual const Type * const  front() const                                  = 0;

-  virtual       Type *       &front()                                        = 0;

-  virtual const Type * const  back() const                                   = 0;

-  virtual       Type *       &back()                                         = 0;

-  virtual const Type * const *data() const                                   = 0;

-  virtual       Type *       *data()                                         = 0;

-

-  virtual size_t              size() const                                   = 0;

-  virtual bool                empty() const                                  = 0;

-  virtual size_t              capacity() const                               = 0;

-

-  virtual void                pop_back()                                     = 0;

-  virtual void                clear()                                        = 0;

-  virtual void                reset()                                        = 0;

-  virtual void                reserve( const size_t new_size )               = 0;

-  virtual void                assign( const size_t new_size, const Type *t ) = 0;

-  virtual void                resize( const size_t new_size, const Type *t = nullptr ) = 0;

-  virtual void                shrink_to_fit()                                = 0;

-  virtual void                push_back( const Type *t )                     = 0;

-};

-

-

-// this is the actual implementation of MVKVector

-template<class Type, typename Allocator = mvk_vector_allocator_default<Type>> class MVKVectorImpl : public MVKVector<Type>

-{

-  friend class MVKVectorImpl;

-

-  Allocator  alc;

-  

-public:

-  class iterator : public std::iterator<std::forward_iterator_tag, Type>

-  {

-    const MVKVectorImpl *vector;

-    size_t               index;

-

-  public:

-    iterator() = delete;

-    iterator( const size_t _index, const MVKVectorImpl &_vector ) : vector{ &_vector }, index{ _index } { }

-

-    iterator &operator=( const iterator &it )

-    {

-      vector = it.vector;

-      index  = it.index;

-      return *this;

-    }

-

-    Type *operator->() { return &vector->alc.ptr[index]; }

-    Type &operator*()  { return  vector->alc.ptr[index]; }

-    operator Type*()   { return &vector->alc.ptr[index]; }

-

-    bool operator==( const iterator &it ) const { return vector == it.vector && index == it.index; }

-    bool operator!=( const iterator &it ) const { return vector != it.vector || index != it.index; }

-

-    iterator& operator++()      {                 ++index; return *this; }

-    iterator  operator++( int ) { auto t = *this; ++index; return t; }

-

-    bool   is_valid()     const { return index < vector->alc.size(); }

-    size_t get_position() const { return index; }

-  };

-

-private:

-  // this is the growth strategy -> adjust to your needs

-  size_t vector_GetNextCapacity() const

-  {

-    constexpr auto ELEMENTS_FOR_64_BYTES = 64 / sizeof( Type );

-    constexpr auto MINIMUM_CAPACITY = ELEMENTS_FOR_64_BYTES > 4 ? ELEMENTS_FOR_64_BYTES : 4;

-    const auto current_capacity = capacity();

-    return MINIMUM_CAPACITY + ( 3 * current_capacity ) / 2;

-  }

-

-  void vector_Allocate( const size_t s )

-  {

-    const auto new_reserved_size = s > size() ? s : size();

-

-    alc.allocate( new_reserved_size );

-  }

-

-  void vector_ReAllocate( const size_t s )

-  {

-    alc.re_allocate( s );

-  }

-

-public:

-  MVKVectorImpl() : MVKVector<Type>{ &alc }

-  {

-  }

-

-  MVKVectorImpl( const size_t n, const Type t ) : MVKVector<Type>{ &alc }

-  {

-    if( n > 0 )

-    {

-      alc.allocate( n );

-

-      for( size_t i = 0; i < n; ++i )

-      {

-        alc.construct( &alc.ptr[i], t );

-      }

-

-      alc.num_elements_used = n;

-    }

-  }

-

-  MVKVectorImpl( const MVKVectorImpl &a ) : MVKVector<Type>{ &alc }

-  {

-    const size_t n = a.size();

-

-    if( n > 0 )

-    {

-      alc.allocate( n );

-

-      for( size_t i = 0; i < n; ++i )

-      {

-        alc.construct( &alc.ptr[i], a.alc.ptr[i] );

-      }

-

-      alc.num_elements_used = n;

-    }

-  }

-

-  template<typename U>

-  MVKVectorImpl( const U &a ) : MVKVector<Type>{ &alc }

-  {

-    const size_t n = a.size();

-

-    if( n > 0 )

-    {

-      alc.allocate( n );

-

-      for( size_t i = 0; i < n; ++i )

-      {

-        alc.construct( &alc.ptr[i], a[i] );

-      }

-

-      alc.num_elements_used = n;

-    }

-  }

-

-  MVKVectorImpl( MVKVectorImpl &&a ) : MVKVector<Type>{ &alc }, alc{ std::move( a.alc ) }

-  {

-  }

-

-  MVKVectorImpl( std::initializer_list<Type> vector ) : MVKVector<Type>{ &alc }

-  {

-    if( vector.size() > capacity() )

-    {

-      vector_Allocate( vector.size() );

-    }

-

-    // std::initializer_list does not yet support std::move, we use it anyway but it has no effect

-    for( auto &&element : vector )

-    {

-      alc.construct( &alc.ptr[alc.num_elements_used], std::move( element ) );

-      ++alc.num_elements_used;

-    }

-  }

-

-  ~MVKVectorImpl()

-  {

-  }

-

-  template<typename U>

-  MVKVectorImpl& operator=( const U &a )

-  {

-    static_assert( std::is_base_of<MVKVector<Type>, U>::value, "argument is not of type MVKVector" );

-

-    if( this != reinterpret_cast<const MVKVector<Type>*>( &a ) )

-    {

-      const auto n = a.size();

-

-      if( alc.num_elements_used == n )

-      {

-        for( size_t i = 0; i < n; ++i )

-        {

-          alc.ptr[i] = a.alc.ptr[i];

-        }

-      }

-      else

-      {

-        if( n > capacity() )

-        {

-          vector_ReAllocate( n );

-        }

-        else

-        {

-          alc.template destruct_all<Type>();

-        }

-

-        for( size_t i = 0; i < n; ++i )

-        {

-          alc.construct( &alc.ptr[i], a[i] );

-        }

-

-        alc.num_elements_used = n;

-      }

-    }

-

-    return *this;

-  }

-

-  MVKVectorImpl& operator=( MVKVectorImpl &&a )

-  {

-    alc.swap( a.alc );

-    return *this;

-  }

-

-  bool operator==( const MVKVectorImpl &a ) const

-  {

-    if( alc.num_elements_used != a.alc.num_elements_used )

-      return false;

-    for( size_t i = 0; i < alc.num_elements_used; ++i )

-    {

-      if( alc[i] != a.alc[i] )

-        return false;

-    }

-    return true;

-  }

-

-  bool operator!=( const MVKVectorImpl &a ) const

-  {

-    if( alc.num_elements_used != a.alc.num_elements_used )

-      return true;

-    for( size_t i = 0; i < alc.num_elements_used; ++i )

-    {

-      if( alc.ptr[i] != a.alc[i] )

-        return true;

-    }

-    return false;

-  }

-

-  void swap( MVKVectorImpl &a )

-  {

-    alc.swap( a.alc );

-  }

-

-  iterator begin() const { return iterator( 0, *this ); }

-  iterator end()   const { return iterator( alc.num_elements_used, *this ); }

-

-  const Type &operator[]( const size_t i ) const override { return alc[i]; }

-        Type &operator[]( const size_t i )       override { return alc[i]; }

-  const Type &at( const size_t i )         const override { return alc[i]; }

-        Type &at( const size_t i )               override { return alc[i]; }

-  const Type &front()                      const override { return alc[0]; }

-        Type &front()                            override { return alc[0]; }

-  const Type &back()                       const override { return alc[alc.num_elements_used - 1]; }

-        Type &back()                             override { return alc[alc.num_elements_used - 1]; }

-  const Type *data()                       const override { return alc.ptr; }

-        Type *data()                             override { return alc.ptr; }

-

-  size_t      size()                       const override { return alc.num_elements_used; }

-  bool        empty()                      const override { return alc.num_elements_used == 0; }

-  size_t      capacity()                   const override { return alc.get_capacity(); }

-

-  void pop_back() override

-  {

-    if( alc.num_elements_used > 0 )

-    {

-      --alc.num_elements_used;

-      alc.destruct( &alc.ptr[alc.num_elements_used] );

-    }

-  }

-

-  void clear() override

-  {

-    alc.template destruct_all<Type>();

-  }

-

-  void reset() override

-  {

-    alc.deallocate();

-  }

-

-  void reserve( const size_t new_size ) override

-  {

-    if( new_size > capacity() )

-    {

-      vector_ReAllocate( new_size );

-    }

-  }

-

-  void assign( const size_t new_size, const Type &t ) override

-  {

-    if( new_size <= capacity() )

-    {

-      clear();

-    }

-    else

-    {

-      vector_Allocate( new_size );

-    }

-

-    for( size_t i = 0; i < new_size; ++i )

-    {

-      alc.construct( &alc.ptr[i], t );

-    }

-

-    alc.num_elements_used = new_size;

-  }

-

-  template <class InputIterator>

-  void assign( InputIterator first, InputIterator last )

-  {

-    clear();

-

-    while( first != last )

-    {

-      emplace_back( *first );

-      ++first;

-    }

-  }

-

-  void resize( const size_t new_size, const Type t = { } ) override

-  {

-    if( new_size == alc.num_elements_used )

-    {

-      return;

-    }

-

-    if( new_size == 0 )

-    {

-      clear();

-      return;

-    }

-

-    if( new_size > alc.num_elements_used )

-    {

-      if( new_size > capacity() )

-      {

-        vector_ReAllocate( new_size );

-      }

-

-      while( alc.num_elements_used < new_size )

-      {

-        alc.construct( &alc.ptr[alc.num_elements_used], t );

-        ++alc.num_elements_used;

-      }

-    }

-    else

-    {

-      //if constexpr( !std::is_trivially_destructible<Type>::value )

-      {

-        while( alc.num_elements_used > new_size )

-        {

-          --alc.num_elements_used;

-          alc.destruct( &alc.ptr[alc.num_elements_used] );

-        }

-      }

-      //else

-      //{

-      //  alc.num_elements_used = new_size;

-      //}

-    }

-  }

-

-  // trims the capacity of the slist to the number of alc.ptr

-  void shrink_to_fit() override

-  {

-    alc.shrink_to_fit();

-  }

-

-  void erase( const iterator it )

-  {

-    if( it.is_valid() )

-    {

-      --alc.num_elements_used;

-

-      for( size_t i = it.get_position(); i < alc.num_elements_used; ++i )

-      {

-        alc.ptr[i] = std::move( alc.ptr[i + 1] );

-      }

-

-      // this is required for types with a destructor

-      alc.destruct( &alc.ptr[alc.num_elements_used] );

-    }

-  }

-

-  void erase( const iterator first, const iterator last )

-  {

-    if( first.is_valid() )

-    {

-      size_t last_pos = last.is_valid() ? last.get_position() : size();

-      size_t n = last_pos - first.get_position();

-      alc.num_elements_used -= n;

-

-      for( size_t i = first.get_position(), e = last_pos; i < alc.num_elements_used && e < alc.num_elements_used + n; ++i, ++e )

-      {

-        alc.ptr[i] = std::move( alc.ptr[e] );

-      }

-

-      // this is required for types with a destructor

-      for( size_t i = alc.num_elements_used; i < alc.num_elements_used + n; ++i )

-      {

-        alc.destruct( &alc.ptr[i] );

-      }

-    }

-  }

-

-  // adds t before it and automatically resizes vector if necessary

-  void insert( const iterator it, Type t )

-  {

-    if( !it.is_valid() || alc.num_elements_used == 0 )

-    {

-      push_back( std::move( t ) );

-    }

-    else

-    {

-      if( alc.num_elements_used == capacity() )

-        vector_ReAllocate( vector_GetNextCapacity() );

-

-      // move construct last element

-      alc.construct( &alc.ptr[alc.num_elements_used], std::move( alc.ptr[alc.num_elements_used - 1] ) );

-

-      // move the remaining elements

-      const size_t it_position = it.get_position();

-      for( size_t i = alc.num_elements_used - 1; i > it_position; --i )

-      {

-        alc.ptr[i] = std::move( alc.ptr[i - 1] );

-      }

-

-      alc.ptr[it_position] = std::move( t );

-      ++alc.num_elements_used;

-    }

-  }

-

-  void push_back( const Type &t ) override

-  {

-    if( alc.num_elements_used == capacity() )

-      vector_ReAllocate( vector_GetNextCapacity() );

-

-    alc.construct( &alc.ptr[alc.num_elements_used], t );

-    ++alc.num_elements_used;

-  }

-

-  void push_back( Type &&t ) override

-  {

-    if( alc.num_elements_used == capacity() )

-      vector_ReAllocate( vector_GetNextCapacity() );

-

-    alc.construct( &alc.ptr[alc.num_elements_used], std::forward<Type>( t ) );

-    ++alc.num_elements_used;

-  }

-

-  template<class... Args>

-  Type &emplace_back( Args&&... args )

-  {

-    if( alc.num_elements_used == capacity() )

-      vector_ReAllocate( vector_GetNextCapacity() );

-

-    alc.construct( &alc.ptr[alc.num_elements_used], std::forward<Args>( args )... );

-    ++alc.num_elements_used;

-

-    return alc.ptr[alc.num_elements_used - 1];

-  }

-};

-

-// specialization for pointer types

-template<class Type, typename Allocator> class MVKVectorImpl<Type*, Allocator> : public MVKVector<Type*>

-{

-  friend class MVKVectorImpl;

-

-  Allocator  alc;

-

-public:

-  class iterator : public std::iterator<std::forward_iterator_tag, Type*>

-  {

-    MVKVectorImpl *vector;

-    size_t         index;

-

-  public:

-    iterator() = delete;

-    iterator( const size_t _index, MVKVectorImpl &_vector ) : vector{ &_vector }, index{ _index } { }

-

-    iterator &operator=( const iterator &it )

-    {

-      vector = it.vector;

-      index = it.index;

-      return *this;

-    }

-

-    Type *&operator*() { return vector->alc[index]; }

-

-    bool operator==( const iterator &it ) const { return vector == it.vector && index == it.index; }

-    bool operator!=( const iterator &it ) const { return vector != it.vector || index != it.index; }

-

-    iterator& operator++() { ++index; return *this; }

-    iterator  operator++( int ) { auto t = *this; ++index; return t; }

-

-    bool   is_valid()     const { return index < vector->alc.size(); }

-    size_t get_position() const { return index; }

-  };

-

-private:

-  // this is the growth strategy -> adjust to your needs

-  size_t vector_GetNextCapacity() const

-  {

-    constexpr auto ELEMENTS_FOR_64_BYTES = 64 / sizeof( Type* );

-    constexpr auto MINIMUM_CAPACITY = ELEMENTS_FOR_64_BYTES > 4 ? ELEMENTS_FOR_64_BYTES : 4;

-    const auto current_capacity = capacity();

-    return MINIMUM_CAPACITY + ( 3 * current_capacity ) / 2;

-  }

-

-  void vector_Allocate( const size_t s )

-  {

-    const auto new_reserved_size = s > size() ? s : size();

-

-    alc.allocate( new_reserved_size );

-  }

-

-  void vector_ReAllocate( const size_t s )

-  {

-    alc.re_allocate( s );

-  }

-

-public:

-  MVKVectorImpl() : MVKVector<Type*>{ &alc }

-  {

-  }

-

-  MVKVectorImpl( const size_t n, const Type *t ) : MVKVector<Type*>{ &alc }

-  {

-    if ( n > 0 )

-    {

-      alc.allocate( n );

-

-      for ( size_t i = 0; i < n; ++i )

-      {

-        alc.ptr[i] = t;

-      }

-

-      alc.num_elements_used = n;

-    }

-  }

-

-  MVKVectorImpl( const MVKVectorImpl &a ) : MVKVector<Type*>{ &alc }

-  {

-    const size_t n = a.size();

-

-    if ( n > 0 )

-    {

-      alc.allocate( n );

-

-      for ( size_t i = 0; i < n; ++i )

-      {

-        alc.ptr[i] = a.alc.ptr[i];

-      }

-

-      alc.num_elements_used = n;

-    }

-  }

-

-  MVKVectorImpl( MVKVectorImpl &&a ) : MVKVector<Type*>{ &alc }, alc{ std::move( a.alc ) }

-  {

-  }

-

-  MVKVectorImpl( std::initializer_list<Type*> vector ) : MVKVector<Type*>{ &alc }

-  {

-    if ( vector.size() > capacity() )

-    {

-      vector_Allocate( vector.size() );

-    }

-

-    // std::initializer_list does not yet support std::move, we use it anyway but it has no effect

-    for ( auto element : vector )

-    {

-      alc.ptr[alc.num_elements_used] = element;

-      ++alc.num_elements_used;

-    }

-  }

-

-  ~MVKVectorImpl()

-  {

-  }

-

-  template<typename U>

-  MVKVectorImpl& operator=( const U &a )

-  {

-    static_assert( std::is_base_of<MVKVector<U>, U>::value, "argument is not of type MVKVector" );

-

-    if ( this != reinterpret_cast< const MVKVector<Type>* >( &a ) )

-    {

-      const auto n = a.size();

-

-      if ( alc.num_elements_used == n )

-      {

-        for ( size_t i = 0; i < n; ++i )

-        {

-          alc.ptr[i] = a.alc.ptr[i];

-        }

-      }

-      else

-      {

-        if ( n > capacity() )

-        {

-          vector_ReAllocate( n );

-        }

-

-        for ( size_t i = 0; i < n; ++i )

-        {

-          alc.ptr[i] = a[i];

-        }

-

-        alc.num_elements_used = n;

-      }

-    }

-

-    return *this;

-  }

-

-  MVKVectorImpl& operator=( MVKVectorImpl &&a )

-  {

-    alc.swap( a.alc );

-    return *this;

-  }

-

-  bool operator==( const MVKVectorImpl &a ) const

-  {

-    if ( alc.num_elements_used != a.alc.num_elements_used )

-      return false;

-    for ( size_t i = 0; i < alc.num_elements_used; ++i )

-    {

-      if ( alc[i] != a.alc[i] )

-        return false;

-    }

-    return true;

-  }

-

-  bool operator!=( const MVKVectorImpl &a ) const

-  {

-    if ( alc.num_elements_used != a.alc.num_elements_used )

-      return true;

-    for ( size_t i = 0; i < alc.num_elements_used; ++i )

-    {

-      if ( alc.ptr[i] != a.alc[i] )

-        return true;

-    }

-    return false;

-  }

-

-  void swap( MVKVectorImpl &a )

-  {

-    alc.swap( a.alc );

-  }

-

-  iterator begin()        { return iterator( 0, *this ); }

-  iterator end()          { return iterator( alc.num_elements_used, *this ); }

-

-  const Type * const  at( const size_t i )         const override { return alc[i]; }

-        Type *       &at( const size_t i )               override { return alc[i]; }

-  const Type * const  operator[]( const size_t i ) const override { return alc[i]; }

-        Type *       &operator[]( const size_t i )       override { return alc[i]; }

-  const Type * const  front()                      const override { return alc[0]; }

-        Type *       &front()                            override { return alc[0]; }

-  const Type * const  back()                       const override { return alc[alc.num_elements_used - 1]; }

-        Type *       &back()                             override { return alc[alc.num_elements_used - 1]; }

-  const Type * const *data()                       const override { return alc.ptr; }

-        Type *       *data()                             override { return alc.ptr; }

-

-  size_t   size()                                  const override { return alc.num_elements_used; }

-  bool     empty()                                 const override { return alc.num_elements_used == 0; }

-  size_t   capacity()                              const override { return alc.get_capacity(); }

-

-  void pop_back() override

-  {

-    if ( alc.num_elements_used > 0 )

-    {

-      --alc.num_elements_used;

-    }

-  }

-

-  void clear() override

-  {

-    alc.num_elements_used = 0;

-  }

-

-  void reset() override

-  {

-    alc.deallocate();

-  }

-

-  void reserve( const size_t new_size ) override

-  {

-    if ( new_size > capacity() )

-    {

-      vector_ReAllocate( new_size );

-    }

-  }

-

-  void assign( const size_t new_size, const Type *t ) override

-  {

-    if ( new_size <= capacity() )

-    {

-      clear();

-    }

-    else

-    {

-      vector_Allocate( new_size );

-    }

-

-    for ( size_t i = 0; i < new_size; ++i )

-    {

-      alc.ptr[i] = const_cast< Type* >( t );

-    }

-

-    alc.num_elements_used = new_size;

-  }

-

-  void resize( const size_t new_size, const Type *t = nullptr ) override

-  {

-    if ( new_size == alc.num_elements_used )

-    {

-      return;

-    }

-

-    if ( new_size == 0 )

-    {

-      clear();

-      return;

-    }

-

-    if ( new_size > alc.num_elements_used )

-    {

-      if ( new_size > capacity() )

-      {

-        vector_ReAllocate( new_size );

-      }

-

-      while ( alc.num_elements_used < new_size )

-      {

-        alc.ptr[alc.num_elements_used] = const_cast< Type* >( t );

-        ++alc.num_elements_used;

-      }

-    }

-    else

-    {

-      alc.num_elements_used = new_size;

-    }

-  }

-

-  // trims the capacity of the MVKVector to the number of used elements

-  void shrink_to_fit() override

-  {

-    alc.shrink_to_fit();

-  }

-

-  void erase( const iterator it )

-  {

-    if ( it.is_valid() )

-    {

-      --alc.num_elements_used;

-

-      for ( size_t i = it.get_position(); i < alc.num_elements_used; ++i )

-      {

-        alc.ptr[i] = alc.ptr[i + 1];

-      }

-    }

-  }

-

-  void erase( const iterator first, const iterator last )

-  {

-    if( first.is_valid() )

-    {

-      size_t last_pos = last.is_valid() ? last.get_position() : size();

-      size_t n = last_pos - first.get_position();

-      alc.num_elements_used -= n;

-

-      for( size_t i = first.get_position(), e = last_pos; i < alc.num_elements_used && e < alc.num_elements_used + n; ++i, ++e )

-      {

-        alc.ptr[i] = alc.ptr[e];

-      }

-    }

-  }

-

-  // adds t before position it and automatically resizes vector if necessary

-  void insert( const iterator it, const Type *t )

-  {

-    if ( !it.is_valid() || alc.num_elements_used == 0 )

-    {

-      push_back( t );

-    }

-    else

-    {

-      if ( alc.num_elements_used == capacity() )

-        vector_ReAllocate( vector_GetNextCapacity() );

-

-      // move the remaining elements

-      const size_t it_position = it.get_position();

-      for ( size_t i = alc.num_elements_used; i > it_position; --i )

-      {

-        alc.ptr[i] = alc.ptr[i - 1];

-      }

-

-      alc.ptr[it_position] = const_cast< Type* >( t );

-      ++alc.num_elements_used;

-    }

-  }

-

-  void push_back( const Type *t ) override

-  {

-    if ( alc.num_elements_used == capacity() )

-      vector_ReAllocate( vector_GetNextCapacity() );

-

-    alc.ptr[alc.num_elements_used] = const_cast< Type* >( t );

-    ++alc.num_elements_used;

-  }

-};

-

-

-template<typename Type>

-using MVKVectorDefault = MVKVectorImpl<Type, mvk_vector_allocator_default<Type>>;

-

-template<typename Type, size_t N = 8>

-using MVKVectorInline  = MVKVectorImpl<Type, mvk_vector_allocator_with_stack<Type, N>>;

-

-

-#endif

-

-

diff --git a/MoltenVK/MoltenVK/Utility/MVKVectorAllocator.h b/MoltenVK/MoltenVK/Utility/MVKVectorAllocator.h
deleted file mode 100755
index 38623c0..0000000
--- a/MoltenVK/MoltenVK/Utility/MVKVectorAllocator.h
+++ /dev/null
@@ -1,552 +0,0 @@
-/*

- * MVKVectorAllocator.h

- *

- * Copyright (c) 2012-2020 Dr. Torsten Hans (hans@ipacs.de)

- *

- * Licensed under the Apache License, Version 2.0 (the "License");

- * you may not use this file except in compliance with the License.

- * You may obtain a copy of the License at

- * 

- *     http://www.apache.org/licenses/LICENSE-2.0

- * 

- * Unless required by applicable law or agreed to in writing, software

- * distributed under the License is distributed on an "AS IS" BASIS,

- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- * See the License for the specific language governing permissions and

- * limitations under the License.

- */

-

-#pragma once

-

-#include <new>

-#include <type_traits>

-

-

-#define MVK_VECTOR_CHECK_BOUNDS if (i >= num_elements_used) { throw std::out_of_range("Index out of range"); }

-

-

-namespace mvk_memory_allocator

-{

-  inline char *alloc( const size_t num_bytes )

-  {

-    return new char[num_bytes];

-  }

-

-  inline void free( void *ptr )

-  {

-    delete[] (char*)ptr;

-  }

-};

-

-

-//////////////////////////////////////////////////////////////////////////////////////////

-//

-// mvk_vector_allocator_base -> base class so we can use MVKVector with template parameter

-//

-//////////////////////////////////////////////////////////////////////////////////////////

-template<typename T>

-class mvk_vector_allocator_base

-{

-public:

-  typedef T value_type;

-  T      *ptr;

-  size_t  num_elements_used;

-

-public:

-  mvk_vector_allocator_base()                                           : ptr{ nullptr }, num_elements_used{ 0 }                  { }

-  mvk_vector_allocator_base( T *_ptr, const size_t _num_elements_used ) : ptr{ _ptr },    num_elements_used{ _num_elements_used } { }

-  virtual ~mvk_vector_allocator_base() { }

-

-  const T &operator[]( const size_t i ) const { MVK_VECTOR_CHECK_BOUNDS return ptr[i]; }

-  T       &operator[]( const size_t i )       { MVK_VECTOR_CHECK_BOUNDS return ptr[i]; }

-

-  size_t size() const { return num_elements_used; }

-

-  virtual size_t get_capacity() const = 0;

-  virtual void   allocate( const size_t num_elements_to_reserve ) = 0;

-  virtual void   re_allocate( const size_t num_elements_to_reserve ) = 0;

-  virtual void   shrink_to_fit() = 0;

-  virtual void   deallocate() = 0;

-};

-

-

-//////////////////////////////////////////////////////////////////////////////////////////

-//

-// mvk_vector_allocator_default -> malloc based allocator for MVKVector

-//

-//////////////////////////////////////////////////////////////////////////////////////////

-template <typename T>

-class mvk_vector_allocator_default final : public mvk_vector_allocator_base<T>

-{

-private:

-  size_t  num_elements_reserved;

-

-public:

-  template<class S, class... Args> typename std::enable_if< !std::is_trivially_constructible<S>::value >::type

-    construct( S *_ptr, Args&&... _args )

-  {

-    new ( _ptr ) S( std::forward<Args>( _args )... );

-  }

-

-  template<class S, class... Args> typename std::enable_if< std::is_trivially_constructible<S>::value >::type

-    construct( S *_ptr, Args&&... _args )

-  {

-    *_ptr = S( std::forward<Args>( _args )... );

-  }

-

-  template<class S> typename std::enable_if< !std::is_trivially_destructible<S>::value >::type

-    destruct( S *_ptr )

-  {

-    _ptr->~S();

-  }

-

-  template<class S> typename std::enable_if< std::is_trivially_destructible<S>::value >::type

-    destruct( S *_ptr )

-  {

-  }

-

-  template<class S> typename std::enable_if< !std::is_trivially_destructible<S>::value >::type

-    destruct_all()

-  {

-    for( size_t i = 0; i < mvk_vector_allocator_base<S>::num_elements_used; ++i )

-    {

-      mvk_vector_allocator_base<S>::ptr[i].~S();

-    }

-

-    mvk_vector_allocator_base<S>::num_elements_used = 0;

-  }

-

-  template<class S> typename std::enable_if< std::is_trivially_destructible<S>::value >::type

-    destruct_all()

-  {

-    mvk_vector_allocator_base<T>::num_elements_used = 0;

-  }

-

-public:

-  constexpr mvk_vector_allocator_default() : mvk_vector_allocator_base<T>{}, num_elements_reserved{ 0 }

-  {

-  }

-

-  mvk_vector_allocator_default( mvk_vector_allocator_default &&a ) : mvk_vector_allocator_base<T>{ a.ptr, a.num_elements_used }, num_elements_reserved{ a.num_elements_reserved }

-  {

-    a.ptr                   = nullptr;

-    a.num_elements_used     = 0;

-    a.num_elements_reserved = 0;

-  }

-

-  virtual ~mvk_vector_allocator_default()

-  {

-    deallocate();

-  }

-

-  size_t get_capacity() const override

-  {

-    return num_elements_reserved;

-  }

-

-  void swap( mvk_vector_allocator_default &a )

-  {

-    const auto copy_ptr                   = a.ptr;

-    const auto copy_num_elements_used     = a.num_elements_used;

-    const auto copy_num_elements_reserved = a.num_elements_reserved;

-

-    a.ptr                   = mvk_vector_allocator_base<T>::ptr;

-    a.num_elements_used     = mvk_vector_allocator_base<T>::num_elements_used;

-    a.num_elements_reserved = num_elements_reserved;

-

-    mvk_vector_allocator_base<T>::ptr                = copy_ptr;

-    mvk_vector_allocator_base<T>::num_elements_used  = copy_num_elements_used;

-    num_elements_reserved = copy_num_elements_reserved;

-  }

-

-  void allocate( const size_t num_elements_to_reserve ) override

-  {

-    deallocate();

-

-    mvk_vector_allocator_base<T>::ptr                = reinterpret_cast< T* >( mvk_memory_allocator::alloc( num_elements_to_reserve * sizeof( T ) ) );

-    mvk_vector_allocator_base<T>::num_elements_used  = 0;

-    num_elements_reserved = num_elements_to_reserve;

-  }

-

-  void re_allocate( const size_t num_elements_to_reserve ) override

-  {

-    //if constexpr( std::is_trivially_copyable<T>::value )

-    //{

-    //  ptr = reinterpret_cast< T* >( mvk_memory_allocator::tm_memrealloc( ptr, num_elements_to_reserve * sizeof( T ) );

-    //}

-    //else

-    {

-      auto *new_ptr = reinterpret_cast< T* >( mvk_memory_allocator::alloc( num_elements_to_reserve * sizeof( T ) ) );

-

-      for( size_t i = 0; i < mvk_vector_allocator_base<T>::num_elements_used; ++i )

-      {

-        construct( &new_ptr[i], std::move( mvk_vector_allocator_base<T>::ptr[i] ) );

-        destruct( &mvk_vector_allocator_base<T>::ptr[i] );

-      }

-

-      //if ( ptr != nullptr )

-      {

-        mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );

-      }

-

-      mvk_vector_allocator_base<T>::ptr = new_ptr;

-    }

-

-    num_elements_reserved = num_elements_to_reserve;

-  }

-

-  void shrink_to_fit() override

-  {

-    if( mvk_vector_allocator_base<T>::num_elements_used == 0 )

-    {

-      deallocate();

-    }

-    else

-    {

-      auto *new_ptr = reinterpret_cast< T* >( mvk_memory_allocator::alloc( mvk_vector_allocator_base<T>::num_elements_used * sizeof( T ) ) );

-

-      for( size_t i = 0; i < mvk_vector_allocator_base<T>::num_elements_used; ++i )

-      {

-        construct( &new_ptr[i], std::move( mvk_vector_allocator_base<T>::ptr[i] ) );

-        destruct( &mvk_vector_allocator_base<T>::ptr[i] );

-      }

-

-      mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );

-

-      mvk_vector_allocator_base<T>::ptr = new_ptr;

-      num_elements_reserved = mvk_vector_allocator_base<T>::num_elements_used;

-    }

-  }

-

-  void deallocate() override

-  {

-    destruct_all<T>();

-

-    mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );

-

-    mvk_vector_allocator_base<T>::ptr = nullptr;

-    num_elements_reserved = 0;

-  }

-};

-

-

-//////////////////////////////////////////////////////////////////////////////////////////

-//

-// mvk_vector_allocator_with_stack -> malloc based MVKVector allocator with preallocated storage

-//

-//////////////////////////////////////////////////////////////////////////////////////////

-template <typename T, int N>

-class mvk_vector_allocator_with_stack final : public mvk_vector_allocator_base<T>

-{

-private:

-  //size_t  num_elements_reserved; // uhh, num_elements_reserved is mapped onto the stack elements, let the fun begin

-  alignas( alignof( T ) ) unsigned char   elements_stack[N * sizeof( T )];

-

-  static_assert( N * sizeof( T ) >= sizeof( size_t ), "Initial static allocation must be at least 8 bytes. Increase the count of pre-allocated elements." );

-

-  void set_num_elements_reserved( const size_t num_elements_reserved )

-  {

-    *reinterpret_cast<size_t*>( &elements_stack[0] ) = num_elements_reserved;

-  }

-

-public:

-  //

-  // faster element construction and destruction using type traits

-  //

-  template<class S, class... Args> typename std::enable_if< !std::is_trivially_constructible<S, Args...>::value >::type

-    construct( S *_ptr, Args&&... _args )

-  {

-    new ( _ptr ) S( std::forward<Args>( _args )... );

-  }

-

-  template<class S, class... Args> typename std::enable_if< std::is_trivially_constructible<S, Args...>::value >::type

-    construct( S *_ptr, Args&&... _args )

-  {

-    *_ptr = S( std::forward<Args>( _args )... );

-  }

-

-  template<class S> typename std::enable_if< !std::is_trivially_destructible<S>::value >::type

-    destruct( S *_ptr )

-  {

-    _ptr->~S();

-  }

-

-  template<class S> typename std::enable_if< std::is_trivially_destructible<S>::value >::type

-    destruct( S *_ptr )

-  {

-  }

-

-  template<class S> typename std::enable_if< !std::is_trivially_destructible<S>::value >::type

-    destruct_all()

-  {

-    for( size_t i = 0; i < mvk_vector_allocator_base<S>::num_elements_used; ++i )

-    {

-      mvk_vector_allocator_base<S>::ptr[i].~S();

-    }

-

-    mvk_vector_allocator_base<S>::num_elements_used = 0;

-  }

-

-  template<class S> typename std::enable_if< std::is_trivially_destructible<S>::value >::type

-    destruct_all()

-  {

-    mvk_vector_allocator_base<S>::num_elements_used = 0;

-  }

-

-  template<class S> typename std::enable_if< !std::is_trivially_destructible<S>::value >::type

-    swap_stack( mvk_vector_allocator_with_stack &a )

-  {

-    T stack_copy[N];

-

-    for( size_t i = 0; i < mvk_vector_allocator_base<S>::num_elements_used; ++i )

-    {

-      construct( &stack_copy[i], std::move( S::ptr[i] ) );

-      destruct( &mvk_vector_allocator_base<S>::ptr[i] );

-    }

-

-    for( size_t i = 0; i < a.num_elements_used; ++i )

-    {

-      construct( &mvk_vector_allocator_base<S>::ptr[i], std::move( a.ptr[i] ) );

-      destruct( &mvk_vector_allocator_base<S>::ptr[i] );

-    }

-

-    for( size_t i = 0; i < mvk_vector_allocator_base<S>::num_elements_used; ++i )

-    {

-      construct( &a.ptr[i], std::move( stack_copy[i] ) );

-      destruct( &stack_copy[i] );

-    }

-  }

-

-  template<class S> typename std::enable_if< std::is_trivially_destructible<S>::value >::type

-    swap_stack( mvk_vector_allocator_with_stack &a )

-  {

-    constexpr int STACK_SIZE = N * sizeof( T );

-    for( int i = 0; i < STACK_SIZE; ++i )

-    {

-      const auto v = elements_stack[i];

-      elements_stack[i] = a.elements_stack[i];

-      a.elements_stack[i] = v;

-    }

-  }

-

-public:

-  mvk_vector_allocator_with_stack() : mvk_vector_allocator_base<T>{ reinterpret_cast<T*>( &elements_stack[0] ), 0 }

-  {

-  }

-

-  mvk_vector_allocator_with_stack( mvk_vector_allocator_with_stack &&a ) : mvk_vector_allocator_base<T>{ nullptr, a.num_elements_used }

-  {

-    // is a heap based -> steal ptr from a

-    if( !a.get_data_on_stack() )

-    {

-      mvk_vector_allocator_base<T>::ptr = a.ptr;

-      set_num_elements_reserved( a.get_capacity() );

-

-      a.ptr = a.get_default_ptr();

-    }

-    else

-    {

-      mvk_vector_allocator_base<T>::ptr = get_default_ptr();

-      for( size_t i = 0; i < a.num_elements_used; ++i )

-      {

-        construct( &mvk_vector_allocator_base<T>::ptr[i], std::move( a.ptr[i] ) );

-        destruct( &a.ptr[i] );

-      }

-    }

-

-    a.num_elements_used = 0;

-  }

-

-  ~mvk_vector_allocator_with_stack()

-  {

-    deallocate();

-  }

-

-  size_t get_capacity() const override

-  {

-    return get_data_on_stack() ? N : *reinterpret_cast<const size_t*>( &elements_stack[0] );

-  }

-

-  constexpr T *get_default_ptr() const

-  {

-    return reinterpret_cast< T* >( const_cast< unsigned char * >( &elements_stack[0] ) );

-  }

-

-  bool get_data_on_stack() const

-  {

-    return mvk_vector_allocator_base<T>::ptr == get_default_ptr();

-  }

-

-  void swap( mvk_vector_allocator_with_stack &a )

-  {

-    // both allocators on heap -> easy case

-    if( !get_data_on_stack() && !a.get_data_on_stack() )

-    {

-      auto copy_ptr = mvk_vector_allocator_base<T>::ptr;

-      auto copy_num_elements_reserved = get_capacity();

-      mvk_vector_allocator_base<T>::ptr = a.ptr;

-      set_num_elements_reserved( a.get_capacity() );

-      a.ptr = copy_ptr;

-      a.set_num_elements_reserved( copy_num_elements_reserved );

-    }

-    // both allocators on stack -> just switch the stack contents

-    else if( get_data_on_stack() && a.get_data_on_stack() )

-    {

-      swap_stack<T>( a );

-    }

-    else if( get_data_on_stack() && !a.get_data_on_stack() )

-    {

-      auto copy_ptr = a.ptr;

-      auto copy_num_elements_reserved = a.get_capacity();

-

-      a.ptr = a.get_default_ptr();

-      for( size_t i = 0; i < mvk_vector_allocator_base<T>::num_elements_used; ++i )

-      {

-        construct( &a.ptr[i], std::move( mvk_vector_allocator_base<T>::ptr[i] ) );

-        destruct( &mvk_vector_allocator_base<T>::ptr[i] );

-      }

-

-      mvk_vector_allocator_base<T>::ptr = copy_ptr;

-      set_num_elements_reserved( copy_num_elements_reserved );

-    }

-    else if( !get_data_on_stack() && a.get_data_on_stack() )

-    {

-      auto copy_ptr = mvk_vector_allocator_base<T>::ptr;

-      auto copy_num_elements_reserved = get_capacity();

-

-      mvk_vector_allocator_base<T>::ptr = get_default_ptr();

-      for( size_t i = 0; i < a.num_elements_used; ++i )

-      {

-        construct( &mvk_vector_allocator_base<T>::ptr[i], std::move( a.ptr[i] ) );

-        destruct( &a.ptr[i] );

-      }

-

-      a.ptr = copy_ptr;

-      a.set_num_elements_reserved( copy_num_elements_reserved );

-    }

-

-    auto copy_num_elements_used = mvk_vector_allocator_base<T>::num_elements_used;

-    mvk_vector_allocator_base<T>::num_elements_used = a.num_elements_used;

-    a.num_elements_used = copy_num_elements_used;

-  }

-

-  //

-  // allocates rounded up to the defined alignment the number of bytes / if the system cannot allocate the specified amount of memory then a null block is returned

-  //

-  void allocate( const size_t num_elements_to_reserve ) override

-  {

-    deallocate();

-

-    // check if enough memory on stack space is left

-    if( num_elements_to_reserve <= N )

-    {

-      return;

-    }

-

-    mvk_vector_allocator_base<T>::ptr               = reinterpret_cast< T* >( mvk_memory_allocator::alloc( num_elements_to_reserve * sizeof( T ) ) );

-    mvk_vector_allocator_base<T>::num_elements_used = 0;

-    set_num_elements_reserved( num_elements_to_reserve );

-  }

-

-  //template<class S> typename std::enable_if< !std::is_trivially_copyable<S>::value >::type

-  void _re_allocate( const size_t num_elements_to_reserve )

-  {

-    auto *new_ptr = reinterpret_cast< T* >( mvk_memory_allocator::alloc( num_elements_to_reserve * sizeof( T ) ) );

-

-    for( size_t i = 0; i < mvk_vector_allocator_base<T>::num_elements_used; ++i )

-    {

-      construct( &new_ptr[i], std::move( mvk_vector_allocator_base<T>::ptr[i] ) );

-      destruct( &mvk_vector_allocator_base<T>::ptr[i] );

-    }

-

-    if( mvk_vector_allocator_base<T>::ptr != get_default_ptr() )

-    {

-      mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );

-    }

-

-    mvk_vector_allocator_base<T>::ptr = new_ptr;

-    set_num_elements_reserved( num_elements_to_reserve );

-  }

-

-  //template<class S> typename std::enable_if< std::is_trivially_copyable<S>::value >::type

-  //  _re_allocate( const size_t num_elements_to_reserve )

-  //{

-  //  const bool data_is_on_stack = get_data_on_stack();

-  //

-  //  auto *new_ptr = reinterpret_cast< S* >( mvk_memory_allocator::tm_memrealloc( data_is_on_stack ? nullptr : ptr, num_elements_to_reserve * sizeof( S ) ) );

-  //  if( data_is_on_stack )

-  //  {

-  //    for( int i = 0; i < N; ++i )

-  //    {

-  //      new_ptr[i] = ptr[i];

-  //    }

-  //  }

-  //

-  //  ptr = new_ptr;

-  //  set_num_elements_reserved( num_elements_to_reserve );

-  //}

-

-  void re_allocate( const size_t num_elements_to_reserve ) override

-  {

-    //TM_ASSERT( num_elements_to_reserve > get_capacity() );

-

-    if( num_elements_to_reserve > N )

-    {

-      _re_allocate( num_elements_to_reserve );

-    }

-  }

-

-  void shrink_to_fit() override

-  {

-    // nothing to do if data is on stack already

-    if( get_data_on_stack() )

-      return;

-

-    // move elements to stack space

-    if( mvk_vector_allocator_base<T>::num_elements_used <= N )

-    {

-      //const auto num_elements_reserved = get_capacity();

-

-      auto *stack_ptr = get_default_ptr();

-      for( size_t i = 0; i < mvk_vector_allocator_base<T>::num_elements_used; ++i )

-      {

-        construct( &stack_ptr[i], std::move( mvk_vector_allocator_base<T>::ptr[i] ) );

-        destruct( &mvk_vector_allocator_base<T>::ptr[i] );

-      }

-

-      mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );

-

-      mvk_vector_allocator_base<T>::ptr = stack_ptr;

-    }

-    else

-    {

-      auto *new_ptr = reinterpret_cast< T* >( mvk_memory_allocator::alloc( mvk_vector_allocator_base<T>::num_elements_used * sizeof( T ) ) );

-

-      for( size_t i = 0; i < mvk_vector_allocator_base<T>::num_elements_used; ++i )

-      {

-        construct( &new_ptr[i], std::move( mvk_vector_allocator_base<T>::ptr[i] ) );

-        destruct( &mvk_vector_allocator_base<T>::ptr[i] );

-      }

-

-      mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );

-

-      mvk_vector_allocator_base<T>::ptr = new_ptr;

-      set_num_elements_reserved( mvk_vector_allocator_base<T>::num_elements_used );

-    }

-  }

-

-  void deallocate() override

-  {

-    destruct_all<T>();

-

-    if( !get_data_on_stack() )

-    {

-      mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );

-    }

-

-    mvk_vector_allocator_base<T>::ptr = get_default_ptr();

-    mvk_vector_allocator_base<T>::num_elements_used = 0;

-  }

-};

-

-

diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
index f348b54..0d22e17 100644
--- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
+++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
@@ -638,7 +638,7 @@
 		case VK_SHADER_STAGE_COMPUTE_BIT:					return kMVKShaderStageCompute;
 		default:
 			MVKBaseObject::reportError(mvkObj, VK_ERROR_FORMAT_NOT_SUPPORTED, "VkShaderStage %x is not supported.", vkStage);
-			return kMVKShaderStageMax;
+			return kMVKShaderStageCount;
 	}
 }
 
@@ -650,8 +650,8 @@
 		/* FIXME: kMVKShaderStageGeometry */
 		case kMVKShaderStageFragment:	return VK_SHADER_STAGE_FRAGMENT_BIT;
 		case kMVKShaderStageCompute:	return VK_SHADER_STAGE_COMPUTE_BIT;
-		case kMVKShaderStageMax:
-			assert(!"This function should never be called with kMVKShaderStageMax!");
+		case kMVKShaderStageCount:
+			assert(!"This function should never be called with kMVKShaderStageCount!");
 			return VK_SHADER_STAGE_ALL;
 	}
 }
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
index fb1bb93..36c803f 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
@@ -31,10 +31,17 @@
 #pragma mark -
 #pragma mark SPIRVToMSLConversionConfiguration
 
+// Returns whether the container contains an item equal to the value.
+template<class C, class T>
+bool contains(const C& container, const T& val) {
+	for (const T& cVal : container) { if (cVal == val) { return true; } }
+	return false;
+}
+
 // Returns whether the vector contains the value (using a matches(T&) comparison member function). */
-template<class T>
-bool containsMatching(const vector<T>& vec, const T& val) {
-    for (const T& vecVal : vec) { if (vecVal.matches(val)) { return true; } }
+template<class C, class T>
+bool containsMatching(const C& container, const T& val) {
+    for (const T& cVal : container) { if (cVal.matches(val)) { return true; } }
     return false;
 }
 
@@ -143,6 +150,12 @@
 	return true;
 }
 
+MVK_PUBLIC_SYMBOL bool mvk::DescriptorBinding::matches(const mvk::DescriptorBinding& other) const {
+	if (descriptorSet != other.descriptorSet) { return false; }
+	if (binding != other.binding) { return false; }
+	return true;
+}
+
 MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::stageSupportsVertexAttributes() const {
 	return (options.entryPointStage == spv::ExecutionModelVertex ||
 			options.entryPointStage == spv::ExecutionModelTessellationControl ||
@@ -182,6 +195,14 @@
         if (rb.isUsedByShader && !containsMatching(other.resourceBindings, rb)) { return false; }
     }
 
+	for (uint32_t dsIdx : discreteDescriptorSets) {
+		if ( !contains(other.discreteDescriptorSets, dsIdx)) { return false; }
+	}
+
+	for (const auto& db : inlineUniformBlocks) {
+		if ( !containsMatching(other.inlineUniformBlocks, db)) { return false; }
+	}
+
     return true;
 }
 
@@ -277,6 +298,18 @@
 			}
 		}
 
+		// Add any descriptor sets that are not using Metal argument buffers.
+		// This only has an effect if SPIRVToMSLConversionConfiguration::options::mslOptions::argument_buffers is enabled.
+		for (uint32_t dsIdx : context.discreteDescriptorSets) {
+			pMSLCompiler->add_discrete_descriptor_set(dsIdx);
+		}
+
+		// Add any inline block bindings.
+		// This only has an effect if SPIRVToMSLConversionConfiguration::options::mslOptions::argument_buffers is enabled.
+		for (auto& db : context.inlineUniformBlocks) {
+			pMSLCompiler->add_inline_uniform_block(db.descriptorSet, db.binding);
+		}
+
 		_msl = pMSLCompiler->compile();
 
         if (shouldLogMSL) { logSource(_msl, "MSL", "Converted"); }
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
index f642644..a9d9199 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
@@ -120,6 +120,20 @@
 	} MSLResourceBinding;
 
 	/**
+	 * Identifies a descriptor set binding.
+	 *
+	 * THIS STRUCT IS STREAMED OUT AS PART OF THE PIPELINE CACHE.
+	 * CHANGES TO THIS STRUCT SHOULD BE CAPTURED IN THE STREAMING LOGIC OF THE PIPELINE CACHE.
+	 */
+	typedef struct DescriptorBinding {
+		uint32_t descriptorSet = 0;
+		uint32_t binding = 0;
+
+		bool matches(const DescriptorBinding& other) const;
+
+	} DescriptorBinding;
+
+	/**
 	 * Configuration passed to the SPIRVToMSLConverter.
 	 *
 	 * THIS STRUCT IS STREAMED OUT AS PART OF THE PIEPLINE CACHE.
@@ -129,6 +143,8 @@
 		SPIRVToMSLConversionOptions options;
 		std::vector<MSLShaderInput> shaderInputs;
 		std::vector<MSLResourceBinding> resourceBindings;
+		std::vector<uint32_t> discreteDescriptorSets;
+		std::vector<DescriptorBinding> inlineUniformBlocks;
 
 		/** Returns whether the pipeline stage being converted supports vertex attributes. */
 		bool stageSupportsVertexAttributes() const;