Merge pull request #671 from cdavis5e/texel-buffer-alignment

Support the VK_EXT_texel_buffer_alignment extension.
diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md
index 4d891cc..ab4c480 100644
--- a/Docs/MoltenVK_Runtime_UserGuide.md
+++ b/Docs/MoltenVK_Runtime_UserGuide.md
@@ -253,12 +253,13 @@
 - `VK_EXT_debug_report`
 - `VK_EXT_debug_utils`
 - `VK_EXT_host_query_reset`
-- `VK_EXT_memory_budget`
+- `VK_EXT_memory_budget` *(requires Metal 2.0)*
 - `VK_EXT_metal_surface`
 - `VK_EXT_shader_stencil_export` *(requires Mac GPU family 2 or iOS GPU family 5)*
 - `VK_EXT_shader_viewport_index_layer`
 - `VK_EXT_swapchain_colorspace` *(macOS)*
 - `VK_EXT_vertex_attribute_divisor`
+- `VK_EXT_texel_buffer_alignment` *(requires Metal 2.0)*
 - `VK_EXTX_portability_subset`
 - `VK_MVK_ios_surface` *(iOS) (Obsolete. Use `VK_EXT_metal_surface` instead.)*
 - `VK_MVK_macos_surface` *(macOS) (Obsolete. Use `VK_EXT_metal_surface` instead.)*
diff --git a/ExternalRevisions/Vulkan-Headers_repo_revision b/ExternalRevisions/Vulkan-Headers_repo_revision
index 6dd277b..eb32d0f 100644
--- a/ExternalRevisions/Vulkan-Headers_repo_revision
+++ b/ExternalRevisions/Vulkan-Headers_repo_revision
@@ -1 +1 @@
-097a1045098213919fd56442f52c716fc78eeb27
+737f4c1cd96283747967c2024a0108b742214455
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 233d360..e22b724 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -336,6 +336,7 @@
 	VkPhysicalDeviceFeatures _features;
 	MVKPhysicalDeviceMetalFeatures _metalFeatures;
 	VkPhysicalDeviceProperties _properties;
+	VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT _texelBuffAlignProperties;
 	VkPhysicalDeviceMemoryProperties _memoryProperties;
 	std::vector<MVKQueueFamily*> _queueFamilies;
 	uint32_t _allMemoryTypes;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 4e8357b..3f3d823 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -69,8 +69,7 @@
     if (features) {
         features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
         features->features = _features;
-        auto* next = (VkBaseOutStructure*)features->pNext;
-        while (next) {
+        for (auto* next = (VkBaseOutStructure*)features->pNext; next; next = next->pNext) {
             switch ((uint32_t)next->sType) {
                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
                     auto* storageFeatures = (VkPhysicalDevice16BitStorageFeatures*)next;
@@ -109,6 +108,11 @@
                     hostQueryResetFeatures->hostQueryReset = true;
                     break;
                 }
+                case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
+                    auto* texelBuffAlignFeatures = (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT*)next;
+                    texelBuffAlignFeatures->texelBufferAlignment = _metalFeatures.texelBuffers && [_mtlDevice respondsToSelector: @selector(minimumLinearTextureAlignmentForPixelFormat:)];
+                    break;
+                }
                 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
                     auto* divisorFeatures = (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT*)next;
                     divisorFeatures->vertexAttributeInstanceRateDivisor = true;
@@ -127,7 +131,6 @@
                 default:
                     break;
             }
-            next = next->pNext;
         }
     }
 }
@@ -140,42 +143,44 @@
     if (properties) {
         properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
         properties->properties = _properties;
-		auto* next = (MVKVkAPIStructHeader*)properties->pNext;
-		while (next) {
+		for (auto* next = (VkBaseOutStructure*)properties->pNext; next; next = next->pNext) {
 			switch ((uint32_t)next->sType) {
             case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
                 auto* pointClipProps = (VkPhysicalDevicePointClippingProperties*)next;
                 pointClipProps->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
-                next = (MVKVkAPIStructHeader*)pointClipProps->pNext;
                 break;
             }
             case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
                 auto* maint3Props = (VkPhysicalDeviceMaintenance3Properties*)next;
-                maint3Props->maxPerSetDescriptors = (_metalFeatures.maxPerStageBufferCount + _metalFeatures.maxPerStageTextureCount + _metalFeatures.maxPerStageSamplerCount) * 2;
+                maint3Props->maxPerSetDescriptors = (_metalFeatures.maxPerStageBufferCount + _metalFeatures.maxPerStageTextureCount + _metalFeatures.maxPerStageSamplerCount) * 4;
                 maint3Props->maxMemoryAllocationSize = _metalFeatures.maxMTLBufferSize;
-                next = (MVKVkAPIStructHeader*)maint3Props->pNext;
                 break;
             }
             case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
                 auto* pushDescProps = (VkPhysicalDevicePushDescriptorPropertiesKHR*)next;
                 pushDescProps->maxPushDescriptors = _properties.limits.maxPerStageResources;
-                next = (MVKVkAPIStructHeader*)pushDescProps->pNext;
+                break;
+            }
+            case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
+                auto* texelBuffAlignProps = (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT*)next;
+                // Save the 'next' pointer; we'll unintentionally overwrite it
+                // on the next line. Put it back when we're done.
+                void* savedNext = texelBuffAlignProps->pNext;
+                *texelBuffAlignProps = _texelBuffAlignProperties;
+                texelBuffAlignProps->pNext = savedNext;
                 break;
             }
             case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
                 auto* divisorProps = (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT*)next;
                 divisorProps->maxVertexAttribDivisor = kMVKUndefinedLargeUInt32;
-                next = (MVKVkAPIStructHeader*)divisorProps->pNext;
                 break;
             }
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PORTABILITY_SUBSET_PROPERTIES_EXTX: {
 				auto* portabilityProps = (VkPhysicalDevicePortabilitySubsetPropertiesEXTX*)next;
 				portabilityProps->minVertexInputBindingStrideAlignment = 4;
-				next = (MVKVkAPIStructHeader*)portabilityProps->pNext;
 				break;
 			}
             default:
-                next = (MVKVkAPIStructHeader*)next->pNext;
                 break;
             }
         }
@@ -1113,14 +1118,86 @@
     _properties.limits.bufferImageGranularity = _metalFeatures.mtlBufferAlignment;
     _properties.limits.nonCoherentAtomSize = _metalFeatures.mtlBufferAlignment;
 
+    if ([_mtlDevice respondsToSelector: @selector(minimumLinearTextureAlignmentForPixelFormat:)]) {
+        // Figure out the greatest alignment required by all supported formats, and
+        // whether or not they only require alignment to a single texel. We'll use this
+        // information to fill out the VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT
+        // struct.
+        uint32_t maxStorage = 0, maxUniform = 0;
+        bool singleTexelStorage = true, singleTexelUniform = true;
+        mvkEnumerateSupportedFormats({0, 0, VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT}, true, [&](VkFormat vk) {
+            NSUInteger alignment;
+            if ([_mtlDevice respondsToSelector: @selector(minimumTextureBufferAlignmentForPixelFormat:)]) {
+                alignment = [_mtlDevice minimumTextureBufferAlignmentForPixelFormat: mvkMTLPixelFormatFromVkFormat(vk)];
+            } else {
+                alignment = [_mtlDevice minimumLinearTextureAlignmentForPixelFormat: mvkMTLPixelFormatFromVkFormat(vk)];
+            }
+            VkFormatProperties props = mvkVkFormatProperties(vk, getFormatIsSupported(vk));
+            // For uncompressed formats, this is the size of a single texel.
+            // Note that no implementations of Metal support compressed formats
+            // in a linear texture (including texture buffers). It's likely that even
+            // if they did, this would be the absolute minimum alignment.
+            uint32_t texelSize = mvkVkFormatBytesPerBlock(vk);
+            // From the spec:
+            //   "If the size of a single texel is a multiple of three bytes, then
+            //    the size of a single component of the format is used instead."
+            if (texelSize % 3 == 0) {
+                switch (mvkFormatTypeFromVkFormat(vk)) {
+                case kMVKFormatColorInt8:
+                case kMVKFormatColorUInt8:
+                    texelSize = 1;
+                    break;
+                case kMVKFormatColorHalf:
+                case kMVKFormatColorInt16:
+                case kMVKFormatColorUInt16:
+                    texelSize = 2;
+                    break;
+                case kMVKFormatColorFloat:
+                case kMVKFormatColorInt32:
+                case kMVKFormatColorUInt32:
+                default:
+                    texelSize = 4;
+                    break;
+                }
+            }
+            if (mvkAreAllFlagsEnabled(props.bufferFeatures, VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) {
+                maxStorage = max(maxStorage, uint32_t(alignment));
+                if (alignment % texelSize != 0) { singleTexelStorage = false; }
+            }
+            if (mvkAreAllFlagsEnabled(props.bufferFeatures, VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT)) {
+                maxUniform = max(maxUniform, uint32_t(alignment));
+                if (alignment % texelSize != 0) { singleTexelUniform = false; }
+            }
+            return true;
+        });
+        _texelBuffAlignProperties.storageTexelBufferOffsetAlignmentBytes = maxStorage;
+        _texelBuffAlignProperties.storageTexelBufferOffsetSingleTexelAlignment = singleTexelStorage;
+        _texelBuffAlignProperties.uniformTexelBufferOffsetAlignmentBytes = maxUniform;
+        _texelBuffAlignProperties.uniformTexelBufferOffsetSingleTexelAlignment = singleTexelUniform;
+        _properties.limits.minTexelBufferOffsetAlignment = max(maxStorage, maxUniform);
+    } else {
+#if MVK_IOS
+        if ([_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily3_v1]) {
+            _properties.limits.minTexelBufferOffsetAlignment = 16;
+        } else {
+            _properties.limits.minTexelBufferOffsetAlignment = 64;
+        }
+#endif
+#if MVK_MACOS
+        _properties.limits.minTexelBufferOffsetAlignment = 256;
+#endif
+        _texelBuffAlignProperties.storageTexelBufferOffsetAlignmentBytes = _properties.limits.minTexelBufferOffsetAlignment;
+        _texelBuffAlignProperties.storageTexelBufferOffsetSingleTexelAlignment = VK_FALSE;
+        _texelBuffAlignProperties.uniformTexelBufferOffsetAlignmentBytes = _properties.limits.minTexelBufferOffsetAlignment;
+        _texelBuffAlignProperties.uniformTexelBufferOffsetSingleTexelAlignment = VK_FALSE;
+    }
+
 #if MVK_IOS
     _properties.limits.maxFragmentInputComponents = 60;
 
     if ([_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily3_v1]) {
-        _properties.limits.minTexelBufferOffsetAlignment = 16;
         _properties.limits.optimalBufferCopyOffsetAlignment = 16;
     } else {
-        _properties.limits.minTexelBufferOffsetAlignment = 64;
         _properties.limits.optimalBufferCopyOffsetAlignment = 64;
     }
 
@@ -1137,7 +1214,6 @@
 #endif
 #if MVK_MACOS
     _properties.limits.maxFragmentInputComponents = 128;
-    _properties.limits.minTexelBufferOffsetAlignment = 256;
     _properties.limits.optimalBufferCopyOffsetAlignment = 256;
 
     if ([_mtlDevice supportsFeatureSet: MTLFeatureSet_macOS_GPUFamily1_v2]) {
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
index 57c8328..b7bf198 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
@@ -62,6 +62,7 @@
 MVK_EXTENSION(EXT_shader_stencil_export, EXT_SHADER_STENCIL_EXPORT)
 MVK_EXTENSION(EXT_shader_viewport_index_layer, EXT_SHADER_VIEWPORT_INDEX_LAYER)
 MVK_EXTENSION(EXT_swapchain_colorspace, EXT_SWAPCHAIN_COLOR_SPACE)
+MVK_EXTENSION(EXT_texel_buffer_alignment, EXT_TEXEL_BUFFER_ALIGNMENT)
 MVK_EXTENSION(EXT_vertex_attribute_divisor, EXT_VERTEX_ATTRIBUTE_DIVISOR)
 MVK_EXTENSION(EXTX_portability_subset, EXTX_PORTABILITY_SUBSET)
 MVK_EXTENSION(MVK_ios_surface, MVK_IOS_SURFACE)
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
index 16d8e21..1ba1a70 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
@@ -53,6 +53,9 @@
 	if (pProperties == &kVkExtProps_EXT_SHADER_STENCIL_EXPORT) {
 		return mvkOSVersion() >= 10.14;
 	}
+	if (pProperties == &kVkExtProps_EXT_TEXEL_BUFFER_ALIGNMENT) {
+		return mvkOSVersion() >= 10.13;
+	}
 	if (pProperties == &kVkExtProps_MVK_IOS_SURFACE) { return false; }
 	if (pProperties == &kVkExtProps_AMD_SHADER_IMAGE_LOAD_STORE_LOD) { return false; }
 	if (pProperties == &kVkExtProps_IMG_FORMAT_PVRTC) { return false; }
@@ -66,6 +69,9 @@
 		return mvkOSVersion() >= 12.0;
 	}
 	if (pProperties == &kVkExtProps_EXT_SWAPCHAIN_COLOR_SPACE) { return false; }
+	if (pProperties == &kVkExtProps_EXT_TEXEL_BUFFER_ALIGNMENT) {
+		return mvkOSVersion() >= 11.0;
+	}
 	if (pProperties == &kVkExtProps_MVK_MACOS_SURFACE) { return false; }
 #endif
 
diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.hpp b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.hpp
index 6cc918f..62eeae7 100644
--- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.hpp
+++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.hpp
@@ -22,6 +22,8 @@
 
 #include "mvk_datatypes.h"
 
+#include <functional>
+
 class MVKBaseObject;
 
 /*
@@ -75,4 +77,13 @@
 MTLTessellationPartitionMode mvkMTLTessellationPartitionModeFromSpvExecutionModeInObj(uint32_t spvMode, MVKBaseObject* mvkObj);
 #define mvkMTLTessellationPartitionModeFromSpvExecutionMode(spvMode) mvkMTLTessellationPartitionModeFromSpvExecutionModeInObj(spvMode, this)
 
+
+#pragma mark -
+#pragma mark Image properties
+
+#pragma mark Texture formats
+
+/** Enumerates all formats that support the given features, calling a specified function for each one. */
+void mvkEnumerateSupportedFormats(VkFormatProperties properties, bool any, std::function<bool(VkFormat)> func);
+
 #endif
diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
index b58a603..b69f09a 100644
--- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
+++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
@@ -665,6 +665,25 @@
     return formatDescForMTLPixelFormat(mtlFormat).mtlName;
 }
 
+void mvkEnumerateSupportedFormats(VkFormatProperties properties, bool any, std::function<bool(VkFormat)> func) {
+    static const auto areFeaturesSupported = [any](uint32_t a, uint32_t b) {
+        if (any)
+            return mvkIsAnyFlagEnabled(a, b);
+        else
+            return mvkAreAllFlagsEnabled(a, b);
+    };
+    for (auto& formatDesc : _formatDescriptions) {
+        if (formatDesc.isSupported() &&
+            areFeaturesSupported(formatDesc.properties.linearTilingFeatures, properties.linearTilingFeatures) &&
+            areFeaturesSupported(formatDesc.properties.optimalTilingFeatures, properties.optimalTilingFeatures) &&
+            areFeaturesSupported(formatDesc.properties.bufferFeatures, properties.bufferFeatures)) {
+            if (!func(formatDesc.vk)) {
+                break;
+            }
+        }
+    }
+}
+
 #undef mvkMTLVertexFormatFromVkFormat
 MVK_PUBLIC_SYMBOL MTLVertexFormat mvkMTLVertexFormatFromVkFormat(VkFormat vkFormat) {
 	return mvkMTLVertexFormatFromVkFormatInObj(vkFormat, nullptr);