Merge pull request #896 from billhollings/master

Support vertex attribute offsets larger than the vertex buffer stride.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 9afd2df..c1ce731 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -20,22 +20,54 @@
 Released 2020/06/01
 
 - Add support for extensions:
+	- `VK_GOOGLE_display_timing`
 	- `VK_KHR_external_memory` (non-functional groundwork for future 
 	  Metal-resource Vulkan extension).
 	- `VK_KHR_external_memory_capabilities` (non-functional groundwork 
 	   for future Metal-resource Vulkan extension).
-- Memory consumption improvements.
+- Memory consumption improvements in command handling and vector pre-allocation optimizations.
+- `vkQueuePresentKHR()` returns a `VkResult` for each swapchain.
+- `MVKPipeline` disable fragment shader outputs for unused attachments.
+- `MVKBuffer` support texel buffers in host-coherent memory on Mac.
+- `MVKDescriptor` pass buffers to shaders that do atomic image accesses.
+- Support vertex attribute offsets larger than the vertex buffer stride.
+- Fix crash when more than two GPUs.
+- Fix issue where `vkGetPhysicalDeviceFormatProperties()` incorrectly returned 
+  properties for unsupported formats.
+- Fix stack overflow in when logging and reporting very long messages.
+- Fix situation where compute pipeline state not retained across copy and renderpass operations.
+- Fix buffer offset calculation.
+- Fixes to maximum FPS calculations.
+- Enables format atomic capabilities only when format supports it.
+- Add `MVKSmallVector` as a more memory efficient substitute of `MVKVector`.
 - Reinstate `VulkanSamples API-Samples` demo apps and add 
   `input_attachment` and `push_descriptors` demos.
-- `vkQueuePresentKHR()` returns a `VkResult` for each swapchain.
+- Add `MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE` environment variable 
+  to support capturing GPU traces to a file.
 - Consolidate frame and non-frame performance reporting.
 	- Remove `vkGetSwapchainPerformanceMVK()` from API.
 	- Swapchain performance can be retrieved with other activity performance 
 	  through `vkGetPerformanceStatisticsMVK()`.
 	- Add `MVK_CONFIG_PERFORMANCE_LOGGING_INLINE` env var to enable/disable
 	  logging of performance of each activity when it happens. 
-- Fix crash when more than two GPUs.
+	- Reduce thread locking on performance statistics collection.
+- Numerous documentation typo corrections.
 - Support Xcode 11.5.
+- Update to latest SPIRV-Cross version:
+	- MSL: mark `BuiltInFragCoord` as implicitly used for subpass reads.
+	- MSL: Deal with cases where builtin is implicitly needed, declared, but unused.
+	- MSL: Do not use base expression with PhysicalTypeID `OpCompositeExtract`.
+	- MSL: Add options to control emission of fragment outputs.
+	- MSL: Force disabled fragment builtins to have the right name.
+	- MSL: Allow removing clip distance user varyings.
+	- MSL: Support edge case with DX layout in scalar block layout.
+	- MSL: Deal correctly with initializers on Private variables.
+	- MSL: Fix case where `subpassInput` is passed to leaf functions.
+	- MSL: Redirect member indices when buffer has been sorted by Offset.
+	- MSL: If the packed type is scalar, don't emit "pack_" prefix.
+	- MSL: Avoid packed arrays in more cases.
+	- Do not add NonWritable/NonReadable decorations for regular images.
+	- Expose a query if samplers or images are comparison resources.
 
 
 
diff --git a/ExternalDependencies.xcodeproj/project.pbxproj b/ExternalDependencies.xcodeproj/project.pbxproj
index 1b0f51c..ec56c28 100644
--- a/ExternalDependencies.xcodeproj/project.pbxproj
+++ b/ExternalDependencies.xcodeproj/project.pbxproj
@@ -2980,7 +2980,7 @@
 		A9F55D25198BE6A7004EC31B /* Project object */ = {
 			isa = PBXProject;
 			attributes = {
-				LastUpgradeCheck = 1140;
+				LastUpgradeCheck = 1150;
 				ORGANIZATIONNAME = "The Brenwill Workshop Ltd.";
 				TargetAttributes = {
 					A972A7E421CEC72F0013AB25 = {
diff --git "a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies \050Debug\051.xcscheme" "b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies \050Debug\051.xcscheme"
index 18a6d08..9677c5a 100644
--- "a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies \050Debug\051.xcscheme"
+++ "b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies \050Debug\051.xcscheme"
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Scheme
-   LastUpgradeVersion = "1140"
+   LastUpgradeVersion = "1150"
    version = "2.0">
    <BuildAction
       parallelizeBuildables = "YES"
diff --git a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies-iOS.xcscheme b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies-iOS.xcscheme
index fab5380..f5912bb 100644
--- a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies-iOS.xcscheme
+++ b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies-iOS.xcscheme
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Scheme
-   LastUpgradeVersion = "1140"
+   LastUpgradeVersion = "1150"
    version = "2.0">
    <BuildAction
       parallelizeBuildables = "YES"
diff --git a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies-macOS.xcscheme b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies-macOS.xcscheme
index 2313055..72e1129 100644
--- a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies-macOS.xcscheme
+++ b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies-macOS.xcscheme
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Scheme
-   LastUpgradeVersion = "1140"
+   LastUpgradeVersion = "1150"
    version = "2.0">
    <BuildAction
       parallelizeBuildables = "YES"
diff --git a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies.xcscheme b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies.xcscheme
index 2ebf29f..7d3952c 100644
--- a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies.xcscheme
+++ b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/ExternalDependencies.xcscheme
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Scheme
-   LastUpgradeVersion = "1140"
+   LastUpgradeVersion = "1150"
    version = "2.0">
    <BuildAction
       parallelizeBuildables = "YES"
diff --git a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Cross-iOS.xcscheme b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Cross-iOS.xcscheme
index 944ddf4..50db954 100644
--- a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Cross-iOS.xcscheme
+++ b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Cross-iOS.xcscheme
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Scheme
-   LastUpgradeVersion = "1140"
+   LastUpgradeVersion = "1150"
    version = "2.0">
    <BuildAction
       parallelizeBuildables = "YES"
diff --git a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Cross-macOS.xcscheme b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Cross-macOS.xcscheme
index 9f89d19..a4ceebf 100644
--- a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Cross-macOS.xcscheme
+++ b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Cross-macOS.xcscheme
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Scheme
-   LastUpgradeVersion = "1140"
+   LastUpgradeVersion = "1150"
    version = "2.0">
    <BuildAction
       parallelizeBuildables = "YES"
diff --git a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Tools-iOS.xcscheme b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Tools-iOS.xcscheme
index 03437e5..915c7e2 100644
--- a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Tools-iOS.xcscheme
+++ b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Tools-iOS.xcscheme
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Scheme
-   LastUpgradeVersion = "1140"
+   LastUpgradeVersion = "1150"
    version = "2.0">
    <BuildAction
       parallelizeBuildables = "YES"
diff --git a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Tools-macOS.xcscheme b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Tools-macOS.xcscheme
index cbfbdd3..2f53c1a 100644
--- a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Tools-macOS.xcscheme
+++ b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/SPIRV-Tools-macOS.xcscheme
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Scheme
-   LastUpgradeVersion = "1140"
+   LastUpgradeVersion = "1150"
    version = "2.0">
    <BuildAction
       parallelizeBuildables = "YES"
diff --git a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/glslang-iOS.xcscheme b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/glslang-iOS.xcscheme
index dc8c0d1..62d3081 100644
--- a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/glslang-iOS.xcscheme
+++ b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/glslang-iOS.xcscheme
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Scheme
-   LastUpgradeVersion = "1140"
+   LastUpgradeVersion = "1150"
    version = "2.0">
    <BuildAction
       parallelizeBuildables = "YES"
diff --git a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/glslang-macOS.xcscheme b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/glslang-macOS.xcscheme
index 4a85559..4b203b9 100644
--- a/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/glslang-macOS.xcscheme
+++ b/ExternalDependencies.xcodeproj/xcshareddata/xcschemes/glslang-macOS.xcscheme
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <Scheme
-   LastUpgradeVersion = "1140"
+   LastUpgradeVersion = "1150"
    version = "2.0">
    <BuildAction
       parallelizeBuildables = "YES"
diff --git a/ExternalRevisions/SPIRV-Cross_repo_revision b/ExternalRevisions/SPIRV-Cross_repo_revision
index e20a35a..9fa21a3 100644
--- a/ExternalRevisions/SPIRV-Cross_repo_revision
+++ b/ExternalRevisions/SPIRV-Cross_repo_revision
@@ -1 +1 @@
-fbc560782c42bab4581dbc7705da5013bf864956
+d385bf096f5dabbc4cdaeb6872b0f64be1a63ad0
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index d63ba37..c06b32a 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -598,22 +598,33 @@
 
 void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
 
-    MVKPipeline* pipeline = _cmdEncoder->_graphicsPipelineState.getPipeline();
+    MVKGraphicsPipeline* pipeline = (MVKGraphicsPipeline*)_cmdEncoder->_graphicsPipelineState.getPipeline();
     bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || _cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle;
-    bool forTessellation = ((MVKGraphicsPipeline*)pipeline)->isTessellationPipeline();
+    bool forTessellation = pipeline->isTessellationPipeline();
 
     if (stage == (forTessellation ? kMVKGraphicsStageVertex : kMVKGraphicsStageRasterization)) {
         encodeBindings(kMVKShaderStageVertex, "vertex", fullImageViewSwizzle,
-                       [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
-                           if (b.isInline)
+                       [pipeline](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
+					       if (b.isInline) {
                                cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder,
                                                           b.mtlBytes,
                                                           b.size,
                                                           b.index);
-                           else
+					       } else {
                                [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer
                                                                        offset: b.offset
                                                                       atIndex: b.index];
+
+							   // Add any translated vertex bindings for this binding
+							   auto xltdVtxBindings = pipeline->getTranslatedVertexBindings();
+							   for (auto& xltdBind : xltdVtxBindings) {
+								   if (b.index == pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.binding)) {
+									   [cmdEncoder->_mtlRenderEncoder setVertexBuffer: b.mtlBuffer
+																			   offset: b.offset + xltdBind.translationOffset
+																			  atIndex: pipeline->getMetalBufferIndexForVertexAttributeBinding(xltdBind.translationBinding)];
+								   }
+							   }
+					       }
                        },
                        [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef<uint32_t>& s)->void {
                            cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder,
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
index 9503ba9..5f01d27 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
@@ -184,6 +184,13 @@
 #pragma mark -
 #pragma mark MVKGraphicsPipeline
 
+/** Describes a buffer binding to accommodate vertex attributes with offsets greater than the stride. */
+struct MVKTranslatedVertexBinding {
+	uint16_t binding;
+	uint16_t translationBinding;
+	uint32_t translationOffset;
+};
+
 typedef MVKSmallVector<MVKGraphicsStage, 4> MVKPiplineStages;
 
 /** The number of dynamic states possible in Vulkan. */
@@ -230,6 +237,12 @@
 	/** Returns true if the tessellation control shader needs a buffer to store its per-patch output. */
 	bool needsTessCtlPatchOutputBuffer() { return _needsTessCtlPatchOutputBuffer; }
 
+	/** Returns the Metal vertex buffer index to use for the specified vertex attribute binding number.  */
+	uint32_t getMetalBufferIndexForVertexAttributeBinding(uint32_t binding) { return _device->getMetalBufferIndexForVertexAttributeBinding(binding); }
+
+	/** Returns the collection of translated vertex bindings. */
+	MVKArrayRef<MVKTranslatedVertexBinding> getTranslatedVertexBindings() { return _translatedVertexBindings.contents(); }
+
 	/** Constructs an instance for the device and parent (which may be NULL). */
 	MVKGraphicsPipeline(MVKDevice* device,
 						MVKPipelineCache* pipelineCache,
@@ -260,6 +273,7 @@
     void addFragmentOutputToPipeline(MTLRenderPipelineDescriptor* plDesc, const SPIRVTessReflectionData& reflectData, const VkGraphicsPipelineCreateInfo* pCreateInfo, bool isTessellationVertexPipeline = false);
     bool isRenderingPoints(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData);
 	bool verifyImplicitBuffer(bool needsBuffer, MVKShaderImplicitRezBinding& index, MVKShaderStage stage, const char* name, uint32_t reservedBuffers);
+	uint32_t getTranslatedVertexBinding(uint32_t binding, uint32_t translationOffset, uint32_t maxBinding);
 
 	const VkPipelineShaderStageCreateInfo* _pVertexSS = nullptr;
 	const VkPipelineShaderStageCreateInfo* _pTessCtlSS = nullptr;
@@ -272,6 +286,7 @@
 
 	MVKSmallVector<VkViewport, kMVKCachedViewportScissorCount> _viewports;
 	MVKSmallVector<VkRect2D, kMVKCachedViewportScissorCount> _scissors;
+	MVKSmallVector<MVKTranslatedVertexBinding> _translatedVertexBindings;
 
 	MTLComputePipelineDescriptor* _mtlTessControlStageDesc = nil;
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index 781681a..e2d61ec 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -953,40 +953,12 @@
         }
     }
 
-    // Vertex attributes
-    uint32_t vaCnt = pVI->vertexAttributeDescriptionCount;
-	uint32_t vbCnt = pVI->vertexBindingDescriptionCount;
-    for (uint32_t i = 0; i < vaCnt; i++) {
-        const VkVertexInputAttributeDescription* pVKVA = &pVI->pVertexAttributeDescriptions[i];
-        if (shaderContext.isVertexAttributeLocationUsed(pVKVA->location)) {
-
-      // Vulkan allows offsets to exceed the buffer stride, but Metal doesn't.
-			// Only check non-zero offsets, as it's common for both to be zero when step rate is instance.
-			if (pVKVA->offset > 0) {
-				const VkVertexInputBindingDescription* pVKVB = pVI->pVertexBindingDescriptions;
-				for (uint32_t j = 0; j < vbCnt; j++, pVKVB++) {
-					if (pVKVB->binding == pVKVA->binding) {
-						if (pVKVA->offset >= pVKVB->stride) {
-							setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Under Metal, vertex attribute offsets must not exceed the vertex buffer stride."));
-							return false;
-						}
-						break;
-					}
-				}
-			}
-
-			MTLVertexAttributeDescriptor* vaDesc = plDesc.vertexDescriptor.attributes[pVKVA->location];
-            vaDesc.format = getPixelFormats()->getMTLVertexFormat(pVKVA->format);
-            vaDesc.bufferIndex = _device->getMetalBufferIndexForVertexAttributeBinding(pVKVA->binding);
-            vaDesc.offset = pVKVA->offset;
-        }
-    }
-
     // Vertex buffer bindings
+	uint32_t vbCnt = pVI->vertexBindingDescriptionCount;
+	uint32_t maxBinding = 0;
     for (uint32_t i = 0; i < vbCnt; i++) {
         const VkVertexInputBindingDescription* pVKVB = &pVI->pVertexBindingDescriptions[i];
-        uint32_t vbIdx = _device->getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
-        if (shaderContext.isVertexBufferUsed(vbIdx)) {
+        if (shaderContext.isVertexBufferUsed(pVKVB->binding)) {
 
 			// Vulkan allows any stride, but Metal only allows multiples of 4.
             // TODO: We should try to expand the buffer to the required alignment in that case.
@@ -995,6 +967,8 @@
                 return false;
             }
 
+			maxBinding = max(pVKVB->binding, maxBinding);
+			uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
 			MTLVertexBufferLayoutDescriptor* vbDesc = plDesc.vertexDescriptor.layouts[vbIdx];
 			vbDesc.stride = (pVKVB->stride == 0) ? sizeof(simd::float4) : pVKVB->stride;      // Vulkan allows zero stride but Metal doesn't. Default to float4
             vbDesc.stepFunction = mvkMTLVertexStepFunctionFromVkVertexInputRate(pVKVB->inputRate);
@@ -1007,7 +981,7 @@
         vbCnt = pVertexInputDivisorState->vertexBindingDivisorCount;
         for (uint32_t i = 0; i < vbCnt; i++) {
             const VkVertexInputBindingDivisorDescriptionEXT* pVKVB = &pVertexInputDivisorState->pVertexBindingDivisors[i];
-            uint32_t vbIdx = _device->getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
+            uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
             if (shaderContext.isVertexBufferUsed(vbIdx)) {
                 MTLVertexBufferLayoutDescriptor* vbDesc = plDesc.vertexDescriptor.layouts[vbIdx];
                 if (vbDesc.stepFunction == MTLVertexStepFunctionPerInstance) {
@@ -1019,9 +993,93 @@
         }
     }
 
+	// Vertex attributes
+	uint32_t vaCnt = pVI->vertexAttributeDescriptionCount;
+	for (uint32_t i = 0; i < vaCnt; i++) {
+		const VkVertexInputAttributeDescription* pVKVA = &pVI->pVertexAttributeDescriptions[i];
+		if (shaderContext.isVertexAttributeLocationUsed(pVKVA->location)) {
+			uint32_t vaBinding = pVKVA->binding;
+			uint32_t vaOffset = pVKVA->offset;
+
+			// Vulkan allows offsets to exceed the buffer stride, but Metal doesn't.
+			// If this is the case, fetch an a translated artificial buffer binding, using the same MTLBuffer,
+			// but that is translated so that the reduced VA offset fits into the binding stride.
+			// Only check non-zero offsets, as it's common for both to be zero when step rate is instance.
+			if (vaOffset > 0) {
+				const VkVertexInputBindingDescription* pVKVB = pVI->pVertexBindingDescriptions;
+				for (uint32_t j = 0; j < vbCnt; j++, pVKVB++) {
+					if (pVKVB->binding == pVKVA->binding) {
+						if (vaOffset >= pVKVB->stride) {
+							// Move vertex attribute offset into the stride. This vertex attribute may be
+							// combined with other vertex attributes into the same translated buffer binding.
+							// But if the reduced offset combined with the vertex attribute size still won't
+							// fit into the buffer binding stride, force the vertex attribute offset to zero,
+							// effectively dedicating this vertex attribute to its own buffer binding.
+							uint32_t origOffset = vaOffset;
+							vaOffset %= pVKVB->stride;
+							if (vaOffset + getPixelFormats()->getBytesPerBlock(pVKVA->format) > pVKVB->stride) {
+								vaOffset = 0;
+							}
+							vaBinding = getTranslatedVertexBinding(vaBinding, origOffset - vaOffset, maxBinding);
+						}
+						break;
+					}
+				}
+			}
+
+			MTLVertexAttributeDescriptor* vaDesc = plDesc.vertexDescriptor.attributes[pVKVA->location];
+			vaDesc.format = getPixelFormats()->getMTLVertexFormat(pVKVA->format);
+			vaDesc.bufferIndex = getMetalBufferIndexForVertexAttributeBinding(vaBinding);
+			vaDesc.offset = vaOffset;
+		}
+	}
+
+	// Run through the vertex bindings. Add a new Metal vertex layout for each translated binding,
+	// identical to the original layout. The translated binding will index into the same MTLBuffer,
+	// but at an offset that is one or more strides away from the original.
+	for (uint32_t i = 0; i < vbCnt; i++) {
+		const VkVertexInputBindingDescription* pVKVB = &pVI->pVertexBindingDescriptions[i];
+		uint32_t vbVACnt = shaderContext.countVertexAttributesAt(pVKVB->binding);
+		if (vbVACnt > 0) {
+			uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
+			MTLVertexBufferLayoutDescriptor* vbDesc = plDesc.vertexDescriptor.layouts[vbIdx];
+
+			uint32_t xldtVACnt = 0;
+			for (auto& xltdBind : _translatedVertexBindings) {
+				if (xltdBind.binding == pVKVB->binding) {
+					uint32_t vbXltdIdx = getMetalBufferIndexForVertexAttributeBinding(xltdBind.translationBinding);
+					MTLVertexBufferLayoutDescriptor* vbXltdDesc = plDesc.vertexDescriptor.layouts[vbXltdIdx];
+					vbXltdDesc.stride = vbDesc.stride;
+					vbXltdDesc.stepFunction = vbDesc.stepFunction;
+					vbXltdDesc.stepRate = vbDesc.stepRate;
+					xldtVACnt++;
+				}
+			}
+
+			// If all of the vertex attributes at this vertex buffer binding have been translated, remove it.
+			if (xldtVACnt == vbVACnt) { vbDesc.stride = 0; }
+		}
+	}
+
 	return true;
 }
 
+// Returns a translated binding for the existing binding and translation offset, creating it if needed.
+uint32_t MVKGraphicsPipeline::getTranslatedVertexBinding(uint32_t binding, uint32_t translationOffset, uint32_t maxBinding) {
+	// See if a translated binding already exists (for example if more than one VA needs the same translation).
+	for (auto& xltdBind : _translatedVertexBindings) {
+		if (xltdBind.binding == binding && xltdBind.translationOffset == translationOffset) {
+			return xltdBind.translationBinding;
+		}
+	}
+
+	// Get next available binding point and add a translation binding description for it
+	uint16_t xltdBindPt = (uint16_t)(maxBinding + _translatedVertexBindings.size() + 1);
+	_translatedVertexBindings.push_back( {.binding = (uint16_t)binding, .translationBinding = xltdBindPt, .translationOffset = translationOffset} );
+
+	return xltdBindPt;
+}
+
 void MVKGraphicsPipeline::addTessellationToPipeline(MTLRenderPipelineDescriptor* plDesc,
 													const SPIRVTessReflectionData& reflectData,
 													const VkPipelineTessellationStateCreateInfo* pTS) {
@@ -1186,8 +1244,7 @@
         // Set binding and offset from Vulkan vertex attribute
         MSLVertexAttribute va;
         va.vertexAttribute.location = pVKVA->location;
-        va.vertexAttribute.msl_buffer = _device->getMetalBufferIndexForVertexAttributeBinding(pVKVA->binding);
-        va.vertexAttribute.msl_offset = pVKVA->offset;
+        va.binding = pVKVA->binding;
 
         // Metal can't do signedness conversions on vertex buffers (rdar://45922847). If the shader
         // and the vertex attribute have mismatched signedness, we have to fix the shader
@@ -1223,17 +1280,6 @@
 
         }
 
-        // Set stride and input rate of vertex attribute from corresponding Vulkan vertex bindings
-        uint32_t vbCnt = pCreateInfo->pVertexInputState->vertexBindingDescriptionCount;
-        for (uint32_t vbIdx = 0; vbIdx < vbCnt; vbIdx++) {
-            const VkVertexInputBindingDescription* pVKVB = &pCreateInfo->pVertexInputState->pVertexBindingDescriptions[vbIdx];
-            if (pVKVB->binding == pVKVA->binding) {
-                va.vertexAttribute.msl_stride = pVKVB->stride;
-                va.vertexAttribute.per_instance = (pVKVB->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE);
-                break;
-            }
-        }
-
         shaderContext.vertexAttributes.push_back(va);
     }
 }
@@ -1652,10 +1698,6 @@
 	template<class Archive>
 	void serialize(Archive & archive, MSLVertexAttr& va) {
 		archive(va.location,
-				va.msl_buffer,
-				va.msl_offset,
-				va.msl_stride,
-				va.per_instance,
 				va.format,
 				va.builtin);
 	}
@@ -1731,6 +1773,7 @@
 	template<class Archive>
 	void serialize(Archive & archive, MSLVertexAttribute& va) {
 		archive(va.vertexAttribute,
+				va.binding,
 				va.isUsedByShader);
 	}
 
diff --git a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp
index 6ee2d97..19fc8f8 100644
--- a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp
+++ b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp
@@ -103,12 +103,9 @@
 
 MVK_PUBLIC_SYMBOL bool MSLVertexAttribute::matches(const MSLVertexAttribute& other) const {
 	if (vertexAttribute.location != other.vertexAttribute.location) { return false; }
-	if (vertexAttribute.msl_buffer != other.vertexAttribute.msl_buffer) { return false; }
-	if (vertexAttribute.msl_offset != other.vertexAttribute.msl_offset) { return false; }
-	if (vertexAttribute.msl_stride != other.vertexAttribute.msl_stride) { return false; }
 	if (vertexAttribute.format != other.vertexAttribute.format) { return false; }
 	if (vertexAttribute.builtin != other.vertexAttribute.builtin) { return false; }
-	if (!!vertexAttribute.per_instance != !!other.vertexAttribute.per_instance) { return false; }
+	if (binding != other.binding) { return false; }
 	return true;
 }
 
@@ -158,12 +155,12 @@
     return false;
 }
 
-// Check them all in case inactive VA's duplicate buffers used by active VA's.
-MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::isVertexBufferUsed(uint32_t mslBuffer) const {
-    for (auto& va : vertexAttributes) {
-        if ((va.vertexAttribute.msl_buffer == mslBuffer) && va.isUsedByShader) { return true; }
-    }
-    return false;
+MVK_PUBLIC_SYMBOL uint32_t SPIRVToMSLConversionConfiguration::countVertexAttributesAt(uint32_t binding) const {
+	uint32_t vaCnt = 0;
+	for (auto& va : vertexAttributes) {
+		if ((va.binding == binding) && va.isUsedByShader) { vaCnt++; }
+	}
+	return vaCnt;
 }
 
 MVK_PUBLIC_SYMBOL void SPIRVToMSLConversionConfiguration::markAllAttributesAndResourcesUsed() {
diff --git a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h
index 6cb0cea..0d5ddb7 100644
--- a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h
+++ b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h
@@ -74,6 +74,7 @@
 	typedef struct MSLVertexAttribute {
 		SPIRV_CROSS_NAMESPACE::MSLVertexAttr vertexAttribute;
 
+		uint32_t binding = 0;
 		bool isUsedByShader = false;
 
 		/**
@@ -135,8 +136,11 @@
         /** Returns whether the vertex attribute at the specified location is used by the shader. */
         bool isVertexAttributeLocationUsed(uint32_t location) const;
 
-        /** Returns whether the vertex buffer at the specified Metal binding index is used by the shader. */
-        bool isVertexBufferUsed(uint32_t mslBuffer) const;
+		/** Returns the number of vertex attributes bound to the specified Vulkan buffer binding, and used by the shader. */
+		uint32_t countVertexAttributesAt(uint32_t binding) const;
+
+        /** Returns whether the vertex buffer at the specified Vulkan binding is used by the shader. */
+		bool isVertexBufferUsed(uint32_t binding) const { return countVertexAttributesAt(binding) > 0; }
 
 		/** Marks all vertex attributes and resources as being used by the shader. */
 		void markAllAttributesAndResourcesUsed();