Merge pull request #935 from billhollings/master

Support iOS Simulator and tvOS Simulator, plus further tvOS integration
diff --git a/ExternalRevisions/SPIRV-Cross_repo_revision b/ExternalRevisions/SPIRV-Cross_repo_revision
index 9fa21a3..46387f8 100644
--- a/ExternalRevisions/SPIRV-Cross_repo_revision
+++ b/ExternalRevisions/SPIRV-Cross_repo_revision
@@ -1 +1 @@
-d385bf096f5dabbc4cdaeb6872b0f64be1a63ad0
+a64484f62b55d2ded4639fb248e21e835606d2ee
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index 71e165e..37c27d0 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -611,6 +611,7 @@
 	VkBool32 placementHeaps;					/**< If true, MTLHeap objects support placement of resources. */
 	VkDeviceSize pushConstantSizeAlignment;		/**< The alignment used internally when allocating memory for push constants. Must be PoT. */
 	uint32_t maxTextureLayers;					/**< The maximum number of layers in an array texture. */
+    uint32_t subgroupSize;			            /**< The number of threads in a SIMD-group. */
 } MVKPhysicalDeviceMetalFeatures;
 
 /** MoltenVK performance of a particular type of activity. */
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
index 04f1427..ee10bdd 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
@@ -72,10 +72,13 @@
 #pragma mark Metal
 
 	/** Returns the Metal buffer underlying this memory allocation. */
-	id<MTLBuffer> getMTLBuffer();
+    id<MTLBuffer> getMTLBuffer();
 
 	/** Returns the offset at which the contents of this instance starts within the underlying Metal buffer. */
-	inline NSUInteger getMTLBufferOffset() { return !_deviceMemory || _deviceMemory->getMTLHeap() || _isHostCoherentTexelBuffer ? 0 : _deviceMemoryOffset; }
+	inline NSUInteger getMTLBufferOffset() { return !_deviceMemory || _deviceMemory->getMTLHeap() ? 0 : _deviceMemoryOffset; }
+
+    /** Returns the Metal buffer used as a cache for host-coherent texel buffers. */
+    id<MTLBuffer> getMTLBufferCache();
 
 
 #pragma mark Construction
@@ -91,6 +94,7 @@
 	bool needsHostReadSync(VkPipelineStageFlags srcStageMask,
 						   VkPipelineStageFlags dstStageMask,
 						   MVKPipelineBarrier& barrier);
+    bool overlaps(VkDeviceSize offset, VkDeviceSize size, VkDeviceSize &overlapOffset, VkDeviceSize &overlapSize);
 	bool shouldFlushHostMemory();
 	VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size);
 	VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size);
@@ -98,7 +102,9 @@
 
 	VkBufferUsageFlags _usage;
 	bool _isHostCoherentTexelBuffer = false;
+    id<MTLBuffer> _mtlBufferCache = nil;
 	id<MTLBuffer> _mtlBuffer = nil;
+    std::mutex _lock;
 };
 
 
@@ -131,9 +137,9 @@
 	void propagateDebugName() override;
 
     MVKBuffer* _buffer;
+    NSUInteger _offset;
 	id<MTLTexture> _mtlTexture;
 	MTLPixelFormat _mtlPixelFormat;
-    NSUInteger _mtlBufferOffset;
 	NSUInteger _mtlBytesPerRow;
     VkExtent2D _textureSize;
 	std::mutex _lock;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
index d22c994..91054c6 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
@@ -136,6 +136,18 @@
 #endif
 }
 
+bool MVKBuffer::overlaps(VkDeviceSize offset, VkDeviceSize size, VkDeviceSize &overlapOffset, VkDeviceSize &overlapSize) {
+    VkDeviceSize end = offset + size;
+    VkDeviceSize bufferEnd = _deviceMemoryOffset + _byteCount;
+    if (offset < bufferEnd && end > _deviceMemoryOffset) {
+        overlapOffset = max(_deviceMemoryOffset, offset);
+        overlapSize = min(bufferEnd, end) - overlapOffset;
+        return true;
+    }
+
+    return false;
+}
+
 #if MVK_MACOS
 bool MVKBuffer::shouldFlushHostMemory() { return _isHostCoherentTexelBuffer; }
 #endif
@@ -143,9 +155,10 @@
 // Flushes the device memory at the specified memory range into the MTLBuffer.
 VkResult MVKBuffer::flushToDevice(VkDeviceSize offset, VkDeviceSize size) {
 #if MVK_MACOS
-	if (shouldFlushHostMemory()) {
-		memcpy(getMTLBuffer().contents, reinterpret_cast<const char *>(_deviceMemory->getHostMemoryAddress()) + offset, size);
-		[getMTLBuffer() didModifyRange: NSMakeRange(0, size)];
+    VkDeviceSize flushOffset, flushSize;
+	if (shouldFlushHostMemory() && _mtlBufferCache && overlaps(offset, size, flushOffset, flushSize)) {
+		memcpy(_mtlBufferCache.contents, reinterpret_cast<const char *>(_deviceMemory->getHostMemoryAddress()) + flushOffset, flushSize);
+		[_mtlBufferCache didModifyRange: NSMakeRange(flushOffset - _deviceMemoryOffset, flushSize)];
 	}
 #endif
 	return VK_SUCCESS;
@@ -154,8 +167,9 @@
 // Pulls content from the MTLBuffer into the device memory at the specified memory range.
 VkResult MVKBuffer::pullFromDevice(VkDeviceSize offset, VkDeviceSize size) {
 #if MVK_MACOS
-	if (shouldFlushHostMemory()) {
-		memcpy(reinterpret_cast<char *>(_deviceMemory->getHostMemoryAddress()) + offset, getMTLBuffer().contents, size);
+    VkDeviceSize pullOffset, pullSize;
+	if (shouldFlushHostMemory() && _mtlBufferCache && overlaps(offset, size, pullOffset, pullSize)) {
+		memcpy(reinterpret_cast<char *>(_deviceMemory->getHostMemoryAddress()) + pullOffset, reinterpret_cast<char *>(_mtlBufferCache.contents) + pullOffset - _deviceMemoryOffset, pullSize);
 	}
 #endif
 	return VK_SUCCESS;
@@ -168,20 +182,13 @@
 	if (_mtlBuffer) { return _mtlBuffer; }
 	if (_deviceMemory) {
 		if (_deviceMemory->getMTLHeap()) {
+            lock_guard<mutex> lock(_lock);
+            if (_mtlBuffer) { return _mtlBuffer; }
 			_mtlBuffer = [_deviceMemory->getMTLHeap() newBufferWithLength: getByteCount()
 																  options: _deviceMemory->getMTLResourceOptions()
 																   offset: _deviceMemoryOffset];	// retained
 			propagateDebugName();
 			return _mtlBuffer;
-#if MVK_MACOS
-		} else if (_isHostCoherentTexelBuffer) {
-			// According to the Vulkan spec, buffers, like linear images, can always use host-coherent memory.
-			// But texel buffers on Mac cannot use shared memory. So we need to use host-cached memory here.
-			_mtlBuffer = [_device->getMTLDevice() newBufferWithLength: getByteCount()
-															  options: MTLResourceStorageModeManaged];	// retained
-			propagateDebugName();
-			return _mtlBuffer;
-#endif
 		} else {
 			return _deviceMemory->getMTLBuffer();
 		}
@@ -189,6 +196,19 @@
 	return nil;
 }
 
+id<MTLBuffer> MVKBuffer::getMTLBufferCache() {
+#if MVK_MACOS
+    if (_isHostCoherentTexelBuffer && !_mtlBufferCache) {
+        lock_guard<mutex> lock(_lock);
+        if (_mtlBufferCache) { return _mtlBufferCache; }
+
+        _mtlBufferCache = [_device->getMTLDevice() newBufferWithLength: getByteCount()
+                                                               options: MTLResourceStorageModeManaged];    // retained
+        flushToDevice(_deviceMemoryOffset, _byteCount);
+    }
+#endif
+    return _mtlBufferCache;
+}
 
 #pragma mark Construction
 
@@ -222,6 +242,7 @@
 MVKBuffer::~MVKBuffer() {
 	if (_deviceMemory) { _deviceMemory->removeBuffer(this); }
 	if (_mtlBuffer) { [_mtlBuffer release]; }
+    if (_mtlBufferCache) { [_mtlBufferCache release]; }
 }
 
 
@@ -245,7 +266,15 @@
         if ( mvkIsAnyFlagEnabled(_buffer->getUsage(), VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) ) {
             usage |= MTLTextureUsageShaderWrite;
         }
-        id<MTLBuffer> mtlBuff = _buffer->getMTLBuffer();
+        id<MTLBuffer> mtlBuff;
+        VkDeviceSize mtlBuffOffset;
+        if (MVK_MACOS && _buffer->isMemoryHostCoherent()) {
+            mtlBuff = _buffer->getMTLBufferCache();
+            mtlBuffOffset = _offset;
+        } else {
+            mtlBuff = _buffer->getMTLBuffer();
+            mtlBuffOffset = _buffer->getMTLBufferOffset() + _offset;
+        }
         MTLTextureDescriptor* mtlTexDesc;
         if ( _device->_pMetalFeatures->textureBuffers ) {
             mtlTexDesc = [MTLTextureDescriptor textureBufferDescriptorWithPixelFormat: _mtlPixelFormat
@@ -262,7 +291,7 @@
             mtlTexDesc.usage = usage;
         }
 		_mtlTexture = [mtlBuff newTextureWithDescriptor: mtlTexDesc
-												 offset: _mtlBufferOffset
+												 offset: mtlBuffOffset
 											bytesPerRow: _mtlBytesPerRow];
 		propagateDebugName();
     }
@@ -275,7 +304,7 @@
 MVKBufferView::MVKBufferView(MVKDevice* device, const VkBufferViewCreateInfo* pCreateInfo) : MVKVulkanAPIDeviceObject(device) {
 	MVKPixelFormats* pixFmts = getPixelFormats();
     _buffer = (MVKBuffer*)pCreateInfo->buffer;
-    _mtlBufferOffset = _buffer->getMTLBufferOffset() + pCreateInfo->offset;
+    _offset = pCreateInfo->offset;
     _mtlPixelFormat = pixFmts->getMTLPixelFormat(pCreateInfo->format);
     VkExtent2D fmtBlockSize = pixFmts->getBlockTexelSize(pCreateInfo->format);  // Pixel size of format
     size_t bytesPerBlock = pixFmts->getBytesPerBlock(pCreateInfo->format);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index bef2153..7e4ff30 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -233,6 +233,28 @@
                 inlineUniformBlockProps->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = _properties.limits.maxDescriptorSetUniformBuffers;
 				break;
 			}
+#if MVK_MACOS
+            case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES:
+                if (mvkOSVersionIsAtLeast(10.14)) {
+                    auto* subgroupProps = (VkPhysicalDeviceSubgroupProperties*)next;
+                    subgroupProps->subgroupSize = _metalFeatures.subgroupSize;
+                    subgroupProps->supportedStages =
+                        VK_SHADER_STAGE_COMPUTE_BIT |
+                        VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
+                        VK_SHADER_STAGE_FRAGMENT_BIT;
+                    subgroupProps->supportedOperations =
+                        VK_SUBGROUP_FEATURE_BASIC_BIT |
+                        VK_SUBGROUP_FEATURE_VOTE_BIT |
+                        VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
+                        VK_SUBGROUP_FEATURE_BALLOT_BIT |
+                        VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
+                        VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
+                        // VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
+                        VK_SUBGROUP_FEATURE_QUAD_BIT;
+                    subgroupProps->quadOperationsInAllStages = true;
+                }
+				break;
+#endif
 			default:
 				break;
 		}
@@ -1094,6 +1116,13 @@
         }
     }
 
+#if MVK_MACOS
+    if (mvkOSVersionIsAtLeast(10.14)) {
+        static const uint32_t kAMDVendorId = 0x1002;
+        _metalFeatures.subgroupSize = (_properties.vendorID == kAMDVendorId) ? 64 : 32;
+    }
+#endif
+
 #define setMSLVersion(maj, min)	\
 	_metalFeatures.mslVersion = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::make_msl_version(maj, min);
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index 05da8a8..0f3523b 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -1419,12 +1419,9 @@
 
         // VK_KHR_maintenance1 supports taking 2D image views of 3D slices. No dice in Metal.
         if ((viewType == VK_IMAGE_VIEW_TYPE_2D || viewType == VK_IMAGE_VIEW_TYPE_2D_ARRAY) && (imgType == VK_IMAGE_TYPE_3D)) {
-            if (pCreateInfo->subresourceRange.layerCount != _image->_extent.depth) {
-                reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImageView(): Metal does not fully support views on a subset of a 3D texture.");
-            }
             if ( !mvkIsAnyFlagEnabled(_usage, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ) {
                 setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImageView(): 2D views on 3D images can only be used as color attachments."));
-            } else if (mvkIsOnlyAnyFlagEnabled(_usage, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) {
+            } else if (!mvkIsOnlyAnyFlagEnabled(_usage, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) {
                 reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImageView(): 2D views on 3D images can only be used as color attachments.");
             }
         }
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
index 5f01d27..a8c48eb 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
@@ -267,7 +267,7 @@
 	bool addVertexShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderContext);
 	bool addTessCtlShaderToPipeline(MTLComputePipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderContext, SPIRVShaderOutputs& prevOutput);
 	bool addTessEvalShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderContext, SPIRVShaderOutputs& prevOutput);
-    bool addFragmentShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderContext);
+    bool addFragmentShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderContext, SPIRVShaderOutputs& prevOutput);
 	bool addVertexInputToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkPipelineVertexInputStateCreateInfo* pVI, const SPIRVToMSLConversionConfiguration& shaderContext);
     void addTessellationToPipeline(MTLRenderPipelineDescriptor* plDesc, const SPIRVTessReflectionData& reflectData, const VkPipelineTessellationStateCreateInfo* pTS);
     void addFragmentOutputToPipeline(MTLRenderPipelineDescriptor* plDesc, const SPIRVTessReflectionData& reflectData, const VkGraphicsPipelineCreateInfo* pCreateInfo, bool isTessellationVertexPipeline = false);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index 2afcc78..026a968 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -466,15 +466,23 @@
 
 	MTLRenderPipelineDescriptor* plDesc = [MTLRenderPipelineDescriptor new];	// retained
 
+	SPIRVShaderOutputs vtxOutputs;
+	std::string errorLog;
+	if (!getShaderOutputs(((MVKShaderModule*)_pVertexSS->module)->getSPIRV(), spv::ExecutionModelVertex, _pVertexSS->pName, vtxOutputs, errorLog) ) {
+		setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Failed to get vertex outputs: %s", errorLog.c_str()));
+		return nil;
+	}
+
 	// Add shader stages. Compile vertex shader before others just in case conversion changes anything...like rasterizaion disable.
 	if (!addVertexShaderToPipeline(plDesc, pCreateInfo, shaderContext)) { return nil; }
 
-	// Fragment shader - only add if rasterization is enabled
-	if (!addFragmentShaderToPipeline(plDesc, pCreateInfo, shaderContext)) { return nil; }
-
 	// Vertex input
+	// This needs to happen before compiling the fragment shader, or we'll lose information on vertex attributes.
 	if (!addVertexInputToPipeline(plDesc, pCreateInfo->pVertexInputState, shaderContext)) { return nil; }
 
+	// Fragment shader - only add if rasterization is enabled
+	if (!addFragmentShaderToPipeline(plDesc, pCreateInfo, shaderContext, vtxOutputs)) { return nil; }
+
 	// Output
 	addFragmentOutputToPipeline(plDesc, reflectData, pCreateInfo);
 
@@ -625,7 +633,7 @@
 	for (const SPIRVShaderOutput& output : vtxOutputs) {
 		if (output.builtin == spv::BuiltInPointSize && !reflectData.pointMode) { continue; }
 		offset = (uint32_t)mvkAlignByteCount(offset, sizeOfOutput(output));
-		if (shaderContext.isVertexAttributeLocationUsed(output.location)) {
+		if (shaderContext.isShaderInputLocationUsed(output.location)) {
 			plDesc.stageInputDescriptor.attributes[output.location].bufferIndex = kMVKTessCtlInputBufferIndex;
 			plDesc.stageInputDescriptor.attributes[output.location].format = (MTLAttributeFormat)getPixelFormats()->getMTLVertexFormat(mvkFormatFromOutput(output));
 			plDesc.stageInputDescriptor.attributes[output.location].offset = offset;
@@ -653,12 +661,16 @@
 																				  SPIRVToMSLConversionConfiguration& shaderContext) {
 	MTLRenderPipelineDescriptor* plDesc = [MTLRenderPipelineDescriptor new];	// retained
 
-	SPIRVShaderOutputs tcOutputs;
+	SPIRVShaderOutputs tcOutputs, teOutputs;
 	std::string errorLog;
 	if (!getShaderOutputs(((MVKShaderModule*)_pTessCtlSS->module)->getSPIRV(), spv::ExecutionModelTessellationControl, _pTessCtlSS->pName, tcOutputs, errorLog) ) {
 		setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Failed to get tessellation control outputs: %s", errorLog.c_str()));
 		return nil;
 	}
+	if (!getShaderOutputs(((MVKShaderModule*)_pTessEvalSS->module)->getSPIRV(), spv::ExecutionModelTessellationEvaluation, _pTessEvalSS->pName, teOutputs, errorLog) ) {
+		setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Failed to get tessellation evaluation outputs: %s", errorLog.c_str()));
+		return nil;
+	}
 
 	// Add shader stages. Compile tessellation evaluation shader before others just in case conversion changes anything...like rasterizaion disable.
 	if (!addTessEvalShaderToPipeline(plDesc, pCreateInfo, shaderContext, tcOutputs)) {
@@ -666,20 +678,15 @@
 		return nil;
 	}
 
-	// Fragment shader - only add if rasterization is enabled
-	if (!addFragmentShaderToPipeline(plDesc, pCreateInfo, shaderContext)) {
-		[plDesc release];
-		return nil;
-	}
-
-	// Stage input
+	// Tessellation evaluation stage input
+	// This needs to happen before compiling the fragment shader, or we'll lose information on shader inputs.
 	plDesc.vertexDescriptor = [MTLVertexDescriptor vertexDescriptor];
 	uint32_t offset = 0, patchOffset = 0, outerLoc = -1, innerLoc = -1;
 	bool usedPerVertex = false, usedPerPatch = false;
 	const SPIRVShaderOutput* firstVertex = nullptr, * firstPatch = nullptr;
 	for (const SPIRVShaderOutput& output : tcOutputs) {
 		if (output.builtin == spv::BuiltInPointSize && !reflectData.pointMode) { continue; }
-		if (!shaderContext.isVertexAttributeLocationUsed(output.location)) {
+		if (!shaderContext.isShaderInputLocationUsed(output.location)) {
 			if (output.perPatch && !(output.builtin == spv::BuiltInTessLevelOuter || output.builtin == spv::BuiltInTessLevelInner) ) {
 				if (!firstPatch) { firstPatch = &output; }
 				patchOffset += sizeOfOutput(output);
@@ -749,6 +756,12 @@
 																   sizeof(MTLQuadTessellationFactorsHalf);
 	}
 
+	// Fragment shader - only add if rasterization is enabled
+	if (!addFragmentShaderToPipeline(plDesc, pCreateInfo, shaderContext, teOutputs)) {
+		[plDesc release];
+		return nil;
+	}
+
 	// Tessellation state
 	addTessellationToPipeline(plDesc, reflectData, pCreateInfo->pTessellationState);
 
@@ -909,13 +922,15 @@
 
 bool MVKGraphicsPipeline::addFragmentShaderToPipeline(MTLRenderPipelineDescriptor* plDesc,
 													  const VkGraphicsPipelineCreateInfo* pCreateInfo,
-													  SPIRVToMSLConversionConfiguration& shaderContext) {
+													  SPIRVToMSLConversionConfiguration& shaderContext,
+													  SPIRVShaderOutputs& shaderOutputs) {
 	if (_pFragmentSS) {
 		shaderContext.options.entryPointStage = spv::ExecutionModelFragment;
 		shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageFragment];
 		shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageFragment];
 		shaderContext.options.entryPointName = _pFragmentSS->pName;
 		shaderContext.options.mslOptions.capture_output_to_buffer = false;
+		addPrevStageOutputToShaderConverterContext(shaderContext, shaderOutputs);
 
 		MVKMTLFunction func = ((MVKShaderModule*)_pFragmentSS->module)->getMTLFunction(&shaderContext, _pFragmentSS->pSpecializationInfo, _pipelineCache);
 		id<MTLFunction> mtlFunc = func.getMTLFunction();
@@ -1005,7 +1020,7 @@
 	uint32_t vaCnt = pVI->vertexAttributeDescriptionCount;
 	for (uint32_t i = 0; i < vaCnt; i++) {
 		const VkVertexInputAttributeDescription* pVKVA = &pVI->pVertexAttributeDescriptions[i];
-		if (shaderContext.isVertexAttributeLocationUsed(pVKVA->location)) {
+		if (shaderContext.isShaderInputLocationUsed(pVKVA->location)) {
 			uint32_t vaBinding = pVKVA->binding;
 			uint32_t vaOffset = pVKVA->offset;
 
@@ -1052,7 +1067,7 @@
 	// but at an offset that is one or more strides away from the original.
 	for (uint32_t i = 0; i < vbCnt; i++) {
 		const VkVertexInputBindingDescription* pVKVB = &pVI->pVertexBindingDescriptions[i];
-		uint32_t vbVACnt = shaderContext.countVertexAttributesAt(pVKVB->binding);
+		uint32_t vbVACnt = shaderContext.countShaderInputsAt(pVKVB->binding);
 		if (vbVACnt > 0) {
 			uint32_t vbIdx = getMetalBufferIndexForVertexAttributeBinding(pVKVB->binding);
 			MTLVertexBufferLayoutDescriptor* vbDesc = plDesc.vertexDescriptor.layouts[vbIdx];
@@ -1249,15 +1264,15 @@
 void MVKGraphicsPipeline::addVertexInputToShaderConverterContext(SPIRVToMSLConversionConfiguration& shaderContext,
                                                                  const VkGraphicsPipelineCreateInfo* pCreateInfo) {
     // Set the shader context vertex attribute information
-    shaderContext.vertexAttributes.clear();
+    shaderContext.shaderInputs.clear();
     uint32_t vaCnt = pCreateInfo->pVertexInputState->vertexAttributeDescriptionCount;
     for (uint32_t vaIdx = 0; vaIdx < vaCnt; vaIdx++) {
         const VkVertexInputAttributeDescription* pVKVA = &pCreateInfo->pVertexInputState->pVertexAttributeDescriptions[vaIdx];
 
         // Set binding and offset from Vulkan vertex attribute
-        MSLVertexAttribute va;
-        va.vertexAttribute.location = pVKVA->location;
-        va.binding = pVKVA->binding;
+        mvk::MSLShaderInput si;
+        si.shaderInput.location = pVKVA->location;
+        si.binding = pVKVA->binding;
 
         // Metal can't do signedness conversions on vertex buffers (rdar://45922847). If the shader
         // and the vertex attribute have mismatched signedness, we have to fix the shader
@@ -1266,11 +1281,11 @@
         // declared type. Programs that try to invoke undefined behavior are on their own.
         switch (getPixelFormats()->getFormatType(pVKVA->format) ) {
         case kMVKFormatColorUInt8:
-            va.vertexAttribute.format = MSL_VERTEX_FORMAT_UINT8;
+            si.shaderInput.format = MSL_VERTEX_FORMAT_UINT8;
             break;
 
         case kMVKFormatColorUInt16:
-            va.vertexAttribute.format = MSL_VERTEX_FORMAT_UINT16;
+            si.shaderInput.format = MSL_VERTEX_FORMAT_UINT16;
             break;
 
         case kMVKFormatDepthStencil:
@@ -1280,7 +1295,7 @@
             case VK_FORMAT_D16_UNORM_S8_UINT:
             case VK_FORMAT_D24_UNORM_S8_UINT:
             case VK_FORMAT_D32_SFLOAT_S8_UINT:
-                va.vertexAttribute.format = MSL_VERTEX_FORMAT_UINT8;
+                si.shaderInput.format = MSL_VERTEX_FORMAT_UINT8;
                 break;
 
             default:
@@ -1293,35 +1308,36 @@
 
         }
 
-        shaderContext.vertexAttributes.push_back(va);
+        shaderContext.shaderInputs.push_back(si);
     }
 }
 
-// Initializes the vertex attributes in a shader converter context from the previous stage output.
+// Initializes the shader inputs in a shader converter context from the previous stage output.
 void MVKGraphicsPipeline::addPrevStageOutputToShaderConverterContext(SPIRVToMSLConversionConfiguration& shaderContext,
                                                                      SPIRVShaderOutputs& shaderOutputs) {
-    // Set the shader context vertex attribute information
-    shaderContext.vertexAttributes.clear();
-    uint32_t vaCnt = (uint32_t)shaderOutputs.size();
-    for (uint32_t vaIdx = 0; vaIdx < vaCnt; vaIdx++) {
-        MSLVertexAttribute va;
-        va.vertexAttribute.location = shaderOutputs[vaIdx].location;
-        va.vertexAttribute.builtin = shaderOutputs[vaIdx].builtin;
+    // Set the shader context input variable information
+    shaderContext.shaderInputs.clear();
+    uint32_t siCnt = (uint32_t)shaderOutputs.size();
+    for (uint32_t siIdx = 0; siIdx < siCnt; siIdx++) {
+        mvk::MSLShaderInput si;
+        si.shaderInput.location = shaderOutputs[siIdx].location;
+        si.shaderInput.builtin = shaderOutputs[siIdx].builtin;
+        si.shaderInput.vecsize = shaderOutputs[siIdx].vecWidth;
 
-        switch (getPixelFormats()->getFormatType(mvkFormatFromOutput(shaderOutputs[vaIdx]) ) ) {
+        switch (getPixelFormats()->getFormatType(mvkFormatFromOutput(shaderOutputs[siIdx]) ) ) {
             case kMVKFormatColorUInt8:
-                va.vertexAttribute.format = MSL_VERTEX_FORMAT_UINT8;
+                si.shaderInput.format = MSL_VERTEX_FORMAT_UINT8;
                 break;
 
             case kMVKFormatColorUInt16:
-                va.vertexAttribute.format = MSL_VERTEX_FORMAT_UINT16;
+                si.shaderInput.format = MSL_VERTEX_FORMAT_UINT16;
                 break;
 
             default:
                 break;
         }
 
-        shaderContext.vertexAttributes.push_back(va);
+        shaderContext.shaderInputs.push_back(si);
     }
 }
 
@@ -1709,10 +1725,11 @@
 	}
 
 	template<class Archive>
-	void serialize(Archive & archive, MSLVertexAttr& va) {
-		archive(va.location,
-				va.format,
-				va.builtin);
+	void serialize(Archive & archive, MSLShaderInput& si) {
+		archive(si.location,
+				si.format,
+				si.builtin,
+				si.vecsize);
 	}
 
 	template<class Archive>
@@ -1784,10 +1801,10 @@
 	}
 
 	template<class Archive>
-	void serialize(Archive & archive, MSLVertexAttribute& va) {
-		archive(va.vertexAttribute,
-				va.binding,
-				va.isUsedByShader);
+	void serialize(Archive & archive, MSLShaderInput& si) {
+		archive(si.shaderInput,
+				si.binding,
+				si.isUsedByShader);
 	}
 
 	template<class Archive>
@@ -1801,7 +1818,7 @@
 	template<class Archive>
 	void serialize(Archive & archive, SPIRVToMSLConversionConfiguration& ctx) {
 		archive(ctx.options,
-				ctx.vertexAttributes,
+				ctx.shaderInputs,
 				ctx.resourceBindings);
 	}
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
index b0ebe66..4d02a67 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
@@ -278,7 +278,7 @@
 		_device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.shaderLibraryFromCache, startTime);
 	} else {
 		mvkLib->setEntryPointName(pContext->options.entryPointName);
-		pContext->markAllAttributesAndResourcesUsed();
+		pContext->markAllInputsAndResourcesUsed();
 	}
 
 	return mvkLib ? mvkLib->getMTLFunction(pSpecializationInfo, this) : MVKMTLFunctionNull;
diff --git a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp
index cfbe20a..ef03423 100644
--- a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp
+++ b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp
@@ -104,10 +104,11 @@
 #endif
 }
 
-MVK_PUBLIC_SYMBOL bool MSLVertexAttribute::matches(const MSLVertexAttribute& other) const {
-	if (vertexAttribute.location != other.vertexAttribute.location) { return false; }
-	if (vertexAttribute.format != other.vertexAttribute.format) { return false; }
-	if (vertexAttribute.builtin != other.vertexAttribute.builtin) { return false; }
+MVK_PUBLIC_SYMBOL bool mvk::MSLShaderInput::matches(const mvk::MSLShaderInput& other) const {
+	if (shaderInput.location != other.shaderInput.location) { return false; }
+	if (shaderInput.format != other.shaderInput.format) { return false; }
+	if (shaderInput.builtin != other.shaderInput.builtin) { return false; }
+	if (shaderInput.vecsize != other.shaderInput.vecsize) { return false; }
 	if (binding != other.binding) { return false; }
 	return true;
 }
@@ -164,26 +165,23 @@
 }
 
 // Check them all in case inactive VA's duplicate locations used by active VA's.
-MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::isVertexAttributeLocationUsed(uint32_t location) const {
-    for (auto& va : vertexAttributes) {
-        if ((va.vertexAttribute.location == location) && va.isUsedByShader) { return true; }
+MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::isShaderInputLocationUsed(uint32_t location) const {
+    for (auto& si : shaderInputs) {
+        if ((si.shaderInput.location == location) && si.isUsedByShader) { return true; }
     }
     return false;
 }
 
-MVK_PUBLIC_SYMBOL uint32_t SPIRVToMSLConversionConfiguration::countVertexAttributesAt(uint32_t binding) const {
-	uint32_t vaCnt = 0;
-	for (auto& va : vertexAttributes) {
-		if ((va.binding == binding) && va.isUsedByShader) { vaCnt++; }
+MVK_PUBLIC_SYMBOL uint32_t SPIRVToMSLConversionConfiguration::countShaderInputsAt(uint32_t binding) const {
+	uint32_t siCnt = 0;
+	for (auto& si : shaderInputs) {
+		if ((si.binding == binding) && si.isUsedByShader) { siCnt++; }
 	}
-	return vaCnt;
+	return siCnt;
 }
 
-MVK_PUBLIC_SYMBOL void SPIRVToMSLConversionConfiguration::markAllAttributesAndResourcesUsed() {
-	if (stageSupportsVertexAttributes()) {
-		for (auto& va : vertexAttributes) { va.isUsedByShader = true; }
-	}
-
+MVK_PUBLIC_SYMBOL void SPIRVToMSLConversionConfiguration::markAllInputsAndResourcesUsed() {
+	for (auto& si : shaderInputs) { si.isUsedByShader = true; }
 	for (auto& rb : resourceBindings) { rb.isUsedByShader = true; }
 }
 
@@ -191,10 +189,8 @@
 
     if ( !options.matches(other.options) ) { return false; }
 
-	if (stageSupportsVertexAttributes()) {
-		for (const auto& va : vertexAttributes) {
-			if (va.isUsedByShader && !containsMatching(other.vertexAttributes, va)) { return false; }
-		}
+	for (const auto& si : shaderInputs) {
+		if (si.isUsedByShader && !containsMatching(other.shaderInputs, si)) { return false; }
 	}
 
     for (const auto& rb : resourceBindings) {
@@ -207,12 +203,10 @@
 
 MVK_PUBLIC_SYMBOL void SPIRVToMSLConversionConfiguration::alignWith(const SPIRVToMSLConversionConfiguration& srcContext) {
 
-	if (stageSupportsVertexAttributes()) {
-		for (auto& va : vertexAttributes) {
-			va.isUsedByShader = false;
-			for (auto& srcVA : srcContext.vertexAttributes) {
-				if (va.matches(srcVA)) { va.isUsedByShader = srcVA.isUsedByShader; }
-			}
+	for (auto& si : shaderInputs) {
+		si.isUsedByShader = false;
+		for (auto& srcSI : srcContext.shaderInputs) {
+			if (si.matches(srcSI)) { si.isUsedByShader = srcSI.isUsedByShader; }
 		}
 	}
 
@@ -283,11 +277,9 @@
 		scOpts.vertex.flip_vert_y = context.options.shouldFlipVertexY;
 		pMSLCompiler->set_common_options(scOpts);
 
-		// Add vertex attributes
-		if (context.stageSupportsVertexAttributes()) {
-			for (auto& va : context.vertexAttributes) {
-				pMSLCompiler->add_msl_vertex_attribute(va.vertexAttribute);
-			}
+		// Add shader inputs
+		for (auto& si : context.shaderInputs) {
+			pMSLCompiler->add_msl_shader_input(si.shaderInput);
 		}
 
 		// Add resource bindings and hardcoded constexpr samplers
@@ -327,10 +319,8 @@
 	_shaderConversionResults.needsInputThreadgroupMem = pMSLCompiler && pMSLCompiler->needs_input_threadgroup_mem();
 	_shaderConversionResults.needsDispatchBaseBuffer = pMSLCompiler && pMSLCompiler->needs_dispatch_base_buffer();
 
-	if (context.stageSupportsVertexAttributes()) {
-		for (auto& ctxVA : context.vertexAttributes) {
-			ctxVA.isUsedByShader = pMSLCompiler->is_msl_vertex_attribute_used(ctxVA.vertexAttribute.location);
-		}
+	for (auto& ctxSI : context.shaderInputs) {
+		ctxSI.isUsedByShader = pMSLCompiler->is_msl_shader_input_used(ctxSI.shaderInput.location);
 	}
 	for (auto& ctxRB : context.resourceBindings) {
 		ctxRB.isUsedByShader = pMSLCompiler->is_msl_resource_binding_used(ctxRB.resourceBinding.stage,
diff --git a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h
index 0d5ddb7..22d405b 100644
--- a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h
+++ b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h
@@ -68,11 +68,11 @@
 	 * shader conversion configurations to be compared only against the attributes that are
 	 * actually used by the shader.
 	 *
-	 * THIS STRUCT IS STREAMED OUT AS PART OF THE PIEPLINE CACHE.
+	 * THIS STRUCT IS STREAMED OUT AS PART OF THE PIPELINE CACHE.
 	 * CHANGES TO THIS STRUCT SHOULD BE CAPTURED IN THE STREAMING LOGIC OF THE PIPELINE CACHE.
 	 */
-	typedef struct MSLVertexAttribute {
-		SPIRV_CROSS_NAMESPACE::MSLVertexAttr vertexAttribute;
+	typedef struct MSLShaderInput {
+		SPIRV_CROSS_NAMESPACE::MSLShaderInput shaderInput;
 
 		uint32_t binding = 0;
 		bool isUsedByShader = false;
@@ -81,9 +81,9 @@
 		 * Returns whether the specified vertex attribute match this one.
 		 * It does if all corresponding elements except isUsedByShader are equal.
 		 */
-		bool matches(const MSLVertexAttribute& other) const;
+		bool matches(const MSLShaderInput& other) const;
 
-	} MSLVertexAttribute;
+	} MSLShaderInput;
 
 	/**
 	 * Matches the binding index of a MSL resource for a binding within a descriptor set.
@@ -127,23 +127,23 @@
 	 */
 	typedef struct SPIRVToMSLConversionConfiguration {
 		SPIRVToMSLConversionOptions options;
-		std::vector<MSLVertexAttribute> vertexAttributes;
+		std::vector<MSLShaderInput> shaderInputs;
 		std::vector<MSLResourceBinding> resourceBindings;
 
 		/** Returns whether the pipeline stage being converted supports vertex attributes. */
 		bool stageSupportsVertexAttributes() const;
 
-        /** Returns whether the vertex attribute at the specified location is used by the shader. */
-        bool isVertexAttributeLocationUsed(uint32_t location) const;
+        /** Returns whether the shader input variable at the specified location is used by the shader. */
+        bool isShaderInputLocationUsed(uint32_t location) const;
 
-		/** Returns the number of vertex attributes bound to the specified Vulkan buffer binding, and used by the shader. */
-		uint32_t countVertexAttributesAt(uint32_t binding) const;
+		/** Returns the number of shader input variables bound to the specified Vulkan buffer binding, and used by the shader. */
+		uint32_t countShaderInputsAt(uint32_t binding) const;
 
         /** Returns whether the vertex buffer at the specified Vulkan binding is used by the shader. */
-		bool isVertexBufferUsed(uint32_t binding) const { return countVertexAttributesAt(binding) > 0; }
+		bool isVertexBufferUsed(uint32_t binding) const { return countShaderInputsAt(binding) > 0; }
 
-		/** Marks all vertex attributes and resources as being used by the shader. */
-		void markAllAttributesAndResourcesUsed();
+		/** Marks all input variables and resources as being used by the shader. */
+		void markAllInputsAndResourcesUsed();
 
         /**
          * Returns whether this configuration matches the other context. It does if the