Merge pull request #1185 from billhollings/argument-buffers
WIP: Initial implementation of Metal argument buffers for Vulkan descriptor sets.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 4271434..f2b1c38 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -13,6 +13,28 @@
+MoltenVK 1.1.2
+--------------
+
+Released TBD
+
+- Add support for using Metal argument buffers for shader resources, by setting
+ `MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS` environment variable (enabled by default).
+- To improve performance during descriptor set allocations, preallocate memory for descriptor sets, descriptors,
+ and the Metal argument buffer (one `MTLBuffer` per descriptor pool) during `vkCreateDescriptorPool()`.
+- Add support for embedding inline uniform content directly in Metal argument buffers, by setting
+ `MVK_CONFIG_EMBED_INLINE_BLOCKS_IN_METAL_ARGUMENT_BUFFER` environment variable (disabled by default).
+- Support fast math on MSL compiler via `MVKConfiguration::fastMathEnabled` configuration
+ setting and `MVK_CONFIG_FAST_MATH_ENABLED` environment variable (both disabled by default).
+ - Add ability to automatically capture first GPU frame by setting `MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE` to `2`.
+ - Add `MVKBitArray` and remove `MVKVector`.
+- Support _GitHub Actions_ for CI builds on pull requests.
+- Remove support for _Travis-CI_.
+- `Makefile` and `fetchDependencies` support `xcpretty` (if available)
+- Update `VK_MVK_MOLTENVK_SPEC_VERSION` to `30`.
+
+
+
MoltenVK 1.1.1
--------------
@@ -21,7 +43,7 @@
- Add support for extensions:
- `VK_KHR_sampler_mirror_clamp_to_edge` (iOS)
- `VK_KHR_timeline_semaphore`
- - `VK_EXT_descriptor_indexing` (initial release limited to Metal Tier 1: 96/128 textures, 16 samplers)
+ - `VK_EXT_descriptor_indexing`
- `VK_EXT_post_depth_coverage` (macOS)
- `VK_EXT_private_data`
- `VK_EXT_subgroup_size_control`
diff --git a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
index d84c521..2cafeea 100644
--- a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
+++ b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj
@@ -27,13 +27,11 @@
2FEA0A5224902F9F00EEF3AD /* MVKLogging.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F0429E1FB4CF82009FCCB8 /* MVKLogging.h */; };
2FEA0A5324902F9F00EEF3AD /* MVKQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7911C7DFB4800632CA3 /* MVKQueue.h */; };
2FEA0A5424902F9F00EEF3AD /* MVKFramebuffer.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7871C7DFB4800632CA3 /* MVKFramebuffer.h */; };
- 2FEA0A5524902F9F00EEF3AD /* MVKVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */; };
2FEA0A5624902F9F00EEF3AD /* MVKWatermarkShaderSource.h in Headers */ = {isa = PBXBuildFile; fileRef = A981494B1FB6A3F7005F00B4 /* MVKWatermarkShaderSource.h */; };
2FEA0A5724902F9F00EEF3AD /* MTLSamplerDescriptor+MoltenVK.h in Headers */ = {isa = PBXBuildFile; fileRef = A9E53DD32100B197002781DD /* MTLSamplerDescriptor+MoltenVK.h */; };
2FEA0A5824902F9F00EEF3AD /* MVKSync.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB79D1C7DFB4800632CA3 /* MVKSync.h */; };
2FEA0A5924902F9F00EEF3AD /* MVKDevice.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7831C7DFB4800632CA3 /* MVKDevice.h */; };
2FEA0A5A24902F9F00EEF3AD /* MVKSmallVector.h in Headers */ = {isa = PBXBuildFile; fileRef = A9F3D9DB24732A4D00745190 /* MVKSmallVector.h */; };
- 2FEA0A5B24902F9F00EEF3AD /* MVKVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2521BD75570006C935 /* MVKVector.h */; };
2FEA0A5C24902F9F00EEF3AD /* MVKCommandPool.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB77A1C7DFB4800632CA3 /* MVKCommandPool.h */; };
2FEA0A5D24902F9F00EEF3AD /* MVKShaderModule.h in Headers */ = {isa = PBXBuildFile; fileRef = A94FB7971C7DFB4800632CA3 /* MVKShaderModule.h */; };
2FEA0A5E24902F9F00EEF3AD /* MVKVulkanAPIObject.h in Headers */ = {isa = PBXBuildFile; fileRef = A99C91012295FAC500A061DA /* MVKVulkanAPIObject.h */; };
@@ -142,10 +140,6 @@
45557A5321C9EFF3008868BD /* MVKCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 45557A4D21C9EFF3008868BD /* MVKCodec.cpp */; };
45557A5421C9EFF3008868BD /* MVKCodec.h in Headers */ = {isa = PBXBuildFile; fileRef = 45557A5121C9EFF3008868BD /* MVKCodec.h */; };
45557A5521C9EFF3008868BD /* MVKCodec.h in Headers */ = {isa = PBXBuildFile; fileRef = 45557A5121C9EFF3008868BD /* MVKCodec.h */; };
- 83A4AD2A21BD75570006C935 /* MVKVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2521BD75570006C935 /* MVKVector.h */; };
- 83A4AD2B21BD75570006C935 /* MVKVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2521BD75570006C935 /* MVKVector.h */; };
- 83A4AD2C21BD75570006C935 /* MVKVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */; };
- 83A4AD2D21BD75570006C935 /* MVKVectorAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */; };
A9096E5E1F81E16300DFBEA6 /* MVKCmdDispatch.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */; };
A9096E5F1F81E16300DFBEA6 /* MVKCmdDispatch.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */; };
A909F65F213B190700FCD6BE /* MVKExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A909F65A213B190600FCD6BE /* MVKExtensions.h */; };
@@ -310,6 +304,9 @@
A98149641FB6A3F7005F00B4 /* MVKWatermarkTextureContent.h in Headers */ = {isa = PBXBuildFile; fileRef = A981494C1FB6A3F7005F00B4 /* MVKWatermarkTextureContent.h */; };
A981496B1FB6A998005F00B4 /* MVKStrings.h in Headers */ = {isa = PBXBuildFile; fileRef = A981496A1FB6A998005F00B4 /* MVKStrings.h */; };
A981496C1FB6A998005F00B4 /* MVKStrings.h in Headers */ = {isa = PBXBuildFile; fileRef = A981496A1FB6A998005F00B4 /* MVKStrings.h */; };
+ A98C81A1257AD5C1006A9F80 /* MVKBitArray.h in Headers */ = {isa = PBXBuildFile; fileRef = A98C819F257AD5C0006A9F80 /* MVKBitArray.h */; };
+ A98C81A2257AD5C1006A9F80 /* MVKBitArray.h in Headers */ = {isa = PBXBuildFile; fileRef = A98C819F257AD5C0006A9F80 /* MVKBitArray.h */; };
+ A98C81A3257AD5C1006A9F80 /* MVKBitArray.h in Headers */ = {isa = PBXBuildFile; fileRef = A98C819F257AD5C0006A9F80 /* MVKBitArray.h */; };
A99C90EE229455B300A061DA /* MVKCmdDebug.h in Headers */ = {isa = PBXBuildFile; fileRef = A99C90EC229455B200A061DA /* MVKCmdDebug.h */; };
A99C90EF229455B300A061DA /* MVKCmdDebug.h in Headers */ = {isa = PBXBuildFile; fileRef = A99C90EC229455B200A061DA /* MVKCmdDebug.h */; };
A99C90F0229455B300A061DA /* MVKCmdDebug.mm in Sources */ = {isa = PBXBuildFile; fileRef = A99C90ED229455B300A061DA /* MVKCmdDebug.mm */; };
@@ -428,8 +425,6 @@
45557A4D21C9EFF3008868BD /* MVKCodec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MVKCodec.cpp; sourceTree = "<group>"; };
45557A5121C9EFF3008868BD /* MVKCodec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCodec.h; sourceTree = "<group>"; };
45557A5721CD83C3008868BD /* MVKDXTnCodec.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKDXTnCodec.def; sourceTree = "<group>"; };
- 83A4AD2521BD75570006C935 /* MVKVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKVector.h; sourceTree = "<group>"; };
- 83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKVectorAllocator.h; sourceTree = "<group>"; };
A9096E5C1F81E16300DFBEA6 /* MVKCmdDispatch.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = MVKCmdDispatch.h; sourceTree = "<group>"; };
A9096E5D1F81E16300DFBEA6 /* MVKCmdDispatch.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdDispatch.mm; sourceTree = "<group>"; };
A909F65A213B190600FCD6BE /* MVKExtensions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKExtensions.h; sourceTree = "<group>"; };
@@ -513,6 +508,7 @@
A981494B1FB6A3F7005F00B4 /* MVKWatermarkShaderSource.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKWatermarkShaderSource.h; sourceTree = "<group>"; };
A981494C1FB6A3F7005F00B4 /* MVKWatermarkTextureContent.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKWatermarkTextureContent.h; sourceTree = "<group>"; };
A981496A1FB6A998005F00B4 /* MVKStrings.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKStrings.h; sourceTree = "<group>"; };
+ A98C819F257AD5C0006A9F80 /* MVKBitArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKBitArray.h; sourceTree = "<group>"; };
A99C90EC229455B200A061DA /* MVKCmdDebug.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKCmdDebug.h; sourceTree = "<group>"; };
A99C90ED229455B300A061DA /* MVKCmdDebug.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKCmdDebug.mm; sourceTree = "<group>"; };
A99C91002295FAC500A061DA /* MVKVulkanAPIObject.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKVulkanAPIObject.mm; sourceTree = "<group>"; };
@@ -682,6 +678,7 @@
children = (
A98149421FB6A3F7005F00B4 /* MVKBaseObject.h */,
A98149411FB6A3F7005F00B4 /* MVKBaseObject.mm */,
+ A98C819F257AD5C0006A9F80 /* MVKBitArray.h */,
4553AEFA2251617100E8EBCD /* MVKBlockObserver.h */,
4553AEF62251617100E8EBCD /* MVKBlockObserver.m */,
45557A4D21C9EFF3008868BD /* MVKCodec.cpp */,
@@ -693,8 +690,6 @@
A98149461FB6A3F7005F00B4 /* MVKObjectPool.h */,
A9F3D9DB24732A4D00745190 /* MVKSmallVector.h */,
A9F3D9D924732A4C00745190 /* MVKSmallVectorAllocator.h */,
- 83A4AD2521BD75570006C935 /* MVKVector.h */,
- 83A4AD2921BD75570006C935 /* MVKVectorAllocator.h */,
A98149491FB6A3F7005F00B4 /* MVKWatermark.h */,
A981494A1FB6A3F7005F00B4 /* MVKWatermark.mm */,
A981494B1FB6A3F7005F00B4 /* MVKWatermarkShaderSource.h */,
@@ -807,15 +802,14 @@
2FEA0A5024902F9F00EEF3AD /* MVKCodec.h in Headers */,
2FEA0A5124902F9F00EEF3AD /* MVKRenderPass.h in Headers */,
2FEA0A5224902F9F00EEF3AD /* MVKLogging.h in Headers */,
+ A98C81A2257AD5C1006A9F80 /* MVKBitArray.h in Headers */,
2FEA0A5324902F9F00EEF3AD /* MVKQueue.h in Headers */,
2FEA0A5424902F9F00EEF3AD /* MVKFramebuffer.h in Headers */,
- 2FEA0A5524902F9F00EEF3AD /* MVKVectorAllocator.h in Headers */,
2FEA0A5624902F9F00EEF3AD /* MVKWatermarkShaderSource.h in Headers */,
2FEA0A5724902F9F00EEF3AD /* MTLSamplerDescriptor+MoltenVK.h in Headers */,
2FEA0A5824902F9F00EEF3AD /* MVKSync.h in Headers */,
2FEA0A5924902F9F00EEF3AD /* MVKDevice.h in Headers */,
2FEA0A5A24902F9F00EEF3AD /* MVKSmallVector.h in Headers */,
- 2FEA0A5B24902F9F00EEF3AD /* MVKVector.h in Headers */,
2FEA0A5C24902F9F00EEF3AD /* MVKCommandPool.h in Headers */,
2FEA0A5D24902F9F00EEF3AD /* MVKShaderModule.h in Headers */,
2FEA0A5E24902F9F00EEF3AD /* MVKVulkanAPIObject.h in Headers */,
@@ -883,19 +877,18 @@
453638322508A4C7000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.h in Headers */,
A94FB8001C7DFB4800632CA3 /* MVKQueue.h in Headers */,
A94FB7EC1C7DFB4800632CA3 /* MVKFramebuffer.h in Headers */,
- 83A4AD2C21BD75570006C935 /* MVKVectorAllocator.h in Headers */,
A98149611FB6A3F7005F00B4 /* MVKWatermarkShaderSource.h in Headers */,
A9E53DE32100B197002781DD /* MTLSamplerDescriptor+MoltenVK.h in Headers */,
A94FB8181C7DFB4800632CA3 /* MVKSync.h in Headers */,
A94FB7E41C7DFB4800632CA3 /* MVKDevice.h in Headers */,
A9F3D9DE24732A4D00745190 /* MVKSmallVector.h in Headers */,
- 83A4AD2A21BD75570006C935 /* MVKVector.h in Headers */,
A94FB7D41C7DFB4800632CA3 /* MVKCommandPool.h in Headers */,
A94FB80C1C7DFB4800632CA3 /* MVKShaderModule.h in Headers */,
A99C91042295FAC600A061DA /* MVKVulkanAPIObject.h in Headers */,
A94FB7C01C7DFB4800632CA3 /* MVKCmdQueries.h in Headers */,
A94FB7CC1C7DFB4800632CA3 /* MVKCommand.h in Headers */,
A981494F1FB6A3F7005F00B4 /* MVKBaseObject.h in Headers */,
+ A98C81A1257AD5C1006A9F80 /* MVKBitArray.h in Headers */,
A9C96DD01DDC20C20053187F /* MVKMTLBufferAllocation.h in Headers */,
A98149571FB6A3F7005F00B4 /* MVKObjectPool.h in Headers */,
A94FB8141C7DFB4800632CA3 /* MVKSwapchain.h in Headers */,
@@ -957,19 +950,18 @@
453638342508A4C7000EFFD3 /* MTLRenderPassStencilAttachmentDescriptor+MoltenVK.h in Headers */,
A94FB8011C7DFB4800632CA3 /* MVKQueue.h in Headers */,
A94FB7ED1C7DFB4800632CA3 /* MVKFramebuffer.h in Headers */,
- 83A4AD2D21BD75570006C935 /* MVKVectorAllocator.h in Headers */,
A98149621FB6A3F7005F00B4 /* MVKWatermarkShaderSource.h in Headers */,
A9E53DE42100B197002781DD /* MTLSamplerDescriptor+MoltenVK.h in Headers */,
A94FB8191C7DFB4800632CA3 /* MVKSync.h in Headers */,
A94FB7E51C7DFB4800632CA3 /* MVKDevice.h in Headers */,
A9F3D9DF24732A4D00745190 /* MVKSmallVector.h in Headers */,
- 83A4AD2B21BD75570006C935 /* MVKVector.h in Headers */,
A94FB7D51C7DFB4800632CA3 /* MVKCommandPool.h in Headers */,
A94FB80D1C7DFB4800632CA3 /* MVKShaderModule.h in Headers */,
A99C91052295FAC600A061DA /* MVKVulkanAPIObject.h in Headers */,
A94FB7C11C7DFB4800632CA3 /* MVKCmdQueries.h in Headers */,
A94FB7CD1C7DFB4800632CA3 /* MVKCommand.h in Headers */,
A98149501FB6A3F7005F00B4 /* MVKBaseObject.h in Headers */,
+ A98C81A3257AD5C1006A9F80 /* MVKBitArray.h in Headers */,
A9C96DD11DDC20C20053187F /* MVKMTLBufferAllocation.h in Headers */,
A98149581FB6A3F7005F00B4 /* MVKObjectPool.h in Headers */,
A94FB8151C7DFB4800632CA3 /* MVKSwapchain.h in Headers */,
diff --git a/MoltenVK/MoltenVK/API/mvk_datatypes.h b/MoltenVK/MoltenVK/API/mvk_datatypes.h
index 611e9d6..3cf55e7 100644
--- a/MoltenVK/MoltenVK/API/mvk_datatypes.h
+++ b/MoltenVK/MoltenVK/API/mvk_datatypes.h
@@ -323,7 +323,7 @@
kMVKShaderStageTessEval,
kMVKShaderStageFragment,
kMVKShaderStageCompute,
- kMVKShaderStageMax
+ kMVKShaderStageCount
} MVKShaderStage;
/** Returns the Metal MTLColorWriteMask corresponding to the specified Vulkan VkColorComponentFlags. */
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index 9b667a7..56578fd 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -50,12 +50,12 @@
*/
#define MVK_VERSION_MAJOR 1
#define MVK_VERSION_MINOR 1
-#define MVK_VERSION_PATCH 1
+#define MVK_VERSION_PATCH 2
#define MVK_MAKE_VERSION(major, minor, patch) (((major) * 10000) + ((minor) * 100) + (patch))
#define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH)
-#define VK_MVK_MOLTENVK_SPEC_VERSION 29
+#define VK_MVK_MOLTENVK_SPEC_VERSION 30
#define VK_MVK_MOLTENVK_EXTENSION_NAME "VK_MVK_moltenvk"
/**
@@ -137,14 +137,26 @@
* Xcode user interface.
* 0: No automatic GPU capture.
* 1: Capture all GPU commands issued during the lifetime of the VkDevice.
- * If MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE is also set, it is a filename where the automatic
- * GPU capture should be saved. In this case, the Xcode scheme need not have Metal GPU capture
- * enabled, and in fact the app need not be run under Xcode's control at all. This is useful
- * in case the app cannot be run under Xcode's control. A path starting with '~' can be used
- * to place it in a user's home directory, as in the shell. This feature requires Metal 3.0
- * (macOS 10.15, iOS 13).
+ * 2: Capture all GPU commands issued during the rendering of the first frame.
* If none of these is set, no automatic GPU capture will occur.
*
+ * If MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE is set to 2, to enable capturing the first frame,
+ * the command queue from which the frame is captured is determined by the values of
+ * defaultGPUCaptureScopeQueueFamilyIndex and defaultGPUCaptureScopeQueueIndex from MVKConfiguration,
+ * or the corresponding MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX and
+ * MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX runtime environment variable or
+ * MoltenVK compile-time build settings.
+ *
+ * If MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE is also set, it is a filename (with a file
+ * extension of .gputrace) where the automatic GPU capture should be saved. In this case,
+ * the Xcode scheme need not have Metal GPU capture enabled, and in fact the app need not
+ * be run under Xcode's control at all. This is useful in case the app cannot be run under
+ * Xcode's control. A path starting with '~' can be used to place it in a user's home
+ * directory, as in the shell. This feature requires Metal 3.0 (macOS 10.15, iOS 13).
+ * In addition, for automatic file capture, the app requires at least a minimal Info.plist
+ * file with the MetalCaptureEnabled key set to true. For command line executables (like
+ * Vulkan CTS), the Info.plist file can be placed in the same directory as the executable.
+ *
* 6. The MVK_CONFIG_TEXTURE_1D_AS_2D runtime environment variable or MoltenVK compile-time build
* setting controls whether MoltenVK should use a Metal 2D texture with a height of 1 for a
* Vulkan 1D image, or use a native Metal 1D texture. Metal imposes significant restrictions
@@ -152,14 +164,14 @@
* Using a Metal 2D texture allows Vulkan 1D textures to support this additional functionality.
* This setting is enabled by default, and MoltenVK will use a Metal 2D texture for each Vulkan 1D image.
*
- * 7. The MVK_CONFIG_PREALLOCATE_DESCRIPTORS runtime environment variable or MoltenVK compile-time
- * build setting controls whether MoltenVK should preallocate memory in each VkDescriptorPool
- * according to the values of the VkDescriptorPoolSize parameters. Doing so may improve
- * descriptor set allocation performance at a cost of preallocated application memory.
- * If this setting is disabled, the descriptors required for a descriptor set will
- * be dynamically allocated in application memory when the descriptor set itself is allocated.
- * This setting is disabled by default, and MoltenVK will dynamically allocate descriptors
- * when the containing descriptor set is allocated.
+ * 7. The MVK_CONFIG_PREALLOCATE_DESCRIPTORS runtime environment variable or MoltenVK
+ * compile-time build setting controls whether MoltenVK should preallocate memory during
+ * vkCreateDescriptorPool() according to the values of the VkDescriptorPoolSize parameters.
+ * Doing so may improve descriptor set allocation performance at a cost of preallocated
+ * application memory. If this setting is disabled, the descriptors required for
+ * a descriptor set will be dynamically allocated in application memory when the
+ * descriptor set itself is allocated. This setting is enabled by default,
+ * and MoltenVK will preallocate descriptors during vkCreateDescriptorPool().
*
* 8. The MVK_CONFIG_USE_COMMAND_POOLING runtime environment variable or MoltenVK compile-time
* build setting controls whether MoltenVK should use pools to manage memory used when
@@ -190,6 +202,29 @@
* MVK_CONFIG_PERFORMANCE_LOGGING_FRAME_COUNT environment variable or MoltenVK
* compile-time build setting. This setting is disabled by default, and activity
* performance will be logged only when frame activity is logged.
+ *
+ * 11. The MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS runtime environment variable or MoltenVK
+ * compile-time build setting controls whether MoltenVK should use Metal argument
+ * buffers for resources defined in descriptor sets. Using Metal argument buffers
+ * dramatically increases the number of buffers, textures and samplers that can be
+ * bound to a pipeline shader, and in most cases improves performance.
+ * If this setting is enabled, MoltenVK will use Metal argument buffers to bind
+ * resources to the shaders. If this setting is disabled, MoltenVK will bind
+ * resources to shaders discretely. This setting is enabled by default, and MoltenVK
+ * will use Metal argument buffers.
+ *
+ * 12. The MVK_CONFIG_EMBED_INLINE_BLOCKS_IN_METAL_ARGUMENT_BUFFER runtime environment variable
+ * or MoltenVK compile-time build setting controls whether MoltenVK should embed the contents
+ * of inline-block descriptors directly in the Metal argument buffer, instead of writing the
+ * contents of the descriptor in an intermediary MTLBuffer, which is then inserted into the
+ * Metal argument buffer. Embedding inline-block descriptor content directly into the Metal
+ * argument buffer improves efficiency and reduces resources, but currently does not cover
+ * all types of possible inline content, and may cause errors in some cases. If this setting
+ * is enabled, MoltenVK will embed inline-block descriptor content directly into the Metal
+ * argument buffers. If this setting is disabled, MoltenVK will write inline-block content
+ * to an intermediary MTLBuffer, and then insert that MTLBuffer into the Metal argument buffer.
+ * This setting is disabled by default, and MoltenVK will use an intermediary MTLBuffer.
+ * This setting only takes effect if the MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS setting is enabled.
*/
typedef struct {
@@ -648,6 +683,7 @@
VkBool32 simdPermute; /**< If true, SIMD-group permutation functions (vote, ballot, shuffle) are supported in shaders. */
VkBool32 simdReduction; /**< If true, SIMD-group reduction functions (arithmetic) are supported in shaders. */
uint32_t minSubgroupSize; /**< The minimum number of threads in a SIMD-group. */
+ VkBool32 argumentBuffers; /**< If true, argument buffers are supported and will be used for descriptor sets. */
} MVKPhysicalDeviceMetalFeatures;
/** MoltenVK performance of a particular type of activity. */
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index 9e2ef99..3fe84d1 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -302,6 +302,18 @@
/** Binds a pipeline to a bind point. */
void bindPipeline(VkPipelineBindPoint pipelineBindPoint, MVKPipeline* pipeline);
+ /** Indicate to either the graphic or compute encoder that a resource is being used within an argument buffer. */
+ void useArgumentBufferResource(const MVKMTLArgumentBufferResourceUsage& resourceUsage, bool isComputeStage);
+
+ /** Bind a buffer to either the graphic or compute encoder, based on the stage. */
+ void bindBuffer(const MVKMTLBufferBinding& binding, MVKShaderStage stage);
+
+ /** Bind a texture to either the graphic or compute encoder, based on the stage. */
+ void bindTexture(const MVKMTLTextureBinding& binding, MVKShaderStage stage);
+
+ /** Bind a sampler to either the graphic or compute encoder, based on the stage. */
+ void bindSamplerState(const MVKMTLSamplerStateBinding& binding, MVKShaderStage stage);
+
/** Encodes an operation to signal an event to a status. */
void signalEvent(MVKEvent* mvkEvent, bool status);
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index 65f06a6..19585da 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -422,6 +422,38 @@
}
}
+void MVKCommandEncoder::useArgumentBufferResource(const MVKMTLArgumentBufferResourceUsage& resourceUsage, bool isComputeStage) {
+ if (isComputeStage) {
+ _computeResourcesState.useArgumentBufferResource(resourceUsage);
+ } else {
+ _graphicsResourcesState.useArgumentBufferResource(resourceUsage);
+ }
+}
+
+void MVKCommandEncoder::bindBuffer(const MVKMTLBufferBinding& binding, MVKShaderStage stage) {
+ if (stage == kMVKShaderStageCompute) {
+ _computeResourcesState.bindBuffer(binding);
+ } else {
+ _graphicsResourcesState.bindBuffer(stage, binding);
+ }
+}
+
+void MVKCommandEncoder::bindTexture(const MVKMTLTextureBinding& binding, MVKShaderStage stage) {
+ if (stage == kMVKShaderStageCompute) {
+ _computeResourcesState.bindTexture(binding);
+ } else {
+ _graphicsResourcesState.bindTexture(stage, binding);
+ }
+}
+
+void MVKCommandEncoder::bindSamplerState(const MVKMTLSamplerStateBinding& binding, MVKShaderStage stage) {
+ if (stage == kMVKShaderStageCompute) {
+ _computeResourcesState.bindSamplerState(binding);
+ } else {
+ _graphicsResourcesState.bindSamplerState(stage, binding);
+ }
+}
+
void MVKCommandEncoder::signalEvent(MVKEvent* mvkEvent, bool status) {
endCurrentMetalEncoding();
mvkEvent->encodeSignal(_mtlCmdBuffer, status);
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
index 62d4a8a..cfdd69b 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
@@ -352,7 +352,9 @@
public:
- /** Constructs this instance for the specified command encoder. */
+ /** Indicates that a resource is being used within an argument buffer. */
+ void useArgumentBufferResource(const MVKMTLArgumentBufferResourceUsage& resourceUsage);
+
MVKResourcesCommandEncoderState(MVKCommandEncoder* cmdEncoder) : MVKCommandEncoderState(cmdEncoder) {}
protected:
@@ -454,6 +456,11 @@
}
};
+ void resetImpl() override;
+ void markDirty() override;
+
+ MVKSmallVector<MVKMTLArgumentBufferResourceUsage, 8> _argumentBufferResourceUsage;
+ bool _areArgumentBufferResourceUsageDirty = false;
};
@@ -521,6 +528,7 @@
void encodeImpl(uint32_t stage) override;
void resetImpl() override;
void markDirty() override;
+ void encodeArgumentBufferResources();
ResourceBindings<8> _shaderStageResourceBindings[4];
};
@@ -559,6 +567,7 @@
protected:
void encodeImpl(uint32_t) override;
void resetImpl() override;
+ void encodeArgumentBufferResources();
ResourceBindings<4> _resourceBindings;
};
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index 671ed45..caa896b 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -513,6 +513,38 @@
}
}
+#pragma mark -
+#pragma mark MVKGraphicsResourcesCommandEncoderState
+
+void MVKResourcesCommandEncoderState::useArgumentBufferResource(const MVKMTLArgumentBufferResourceUsage& resourceUsage) {
+
+ if ( !resourceUsage.mtlResource ) { return; }
+
+ MVKMTLArgumentBufferResourceUsage dru = resourceUsage; // Copy that can be marked dirty
+ MVKCommandEncoderState::markDirty();
+ _areArgumentBufferResourceUsageDirty = true;
+ dru.isDirty = true;
+
+ for (auto iter = _argumentBufferResourceUsage.begin(), end = _argumentBufferResourceUsage.end(); iter != end; ++iter) {
+ if( iter->mtlResource == dru.mtlResource ) {
+ *iter = dru;
+ return;
+ }
+ }
+ _argumentBufferResourceUsage.push_back(dru);
+}
+
+// Mark everything as dirty
+void MVKResourcesCommandEncoderState::markDirty() {
+ MVKCommandEncoderState::markDirty();
+ markDirty(_argumentBufferResourceUsage, _areArgumentBufferResourceUsageDirty);
+}
+
+void MVKResourcesCommandEncoderState::resetImpl() {
+ _argumentBufferResourceUsage.clear();
+ _areArgumentBufferResourceUsageDirty = false;
+}
+
#pragma mark -
#pragma mark MVKGraphicsResourcesCommandEncoderState
@@ -636,7 +668,7 @@
// Mark everything as dirty
void MVKGraphicsResourcesCommandEncoderState::markDirty() {
- MVKCommandEncoderState::markDirty();
+ MVKResourcesCommandEncoderState::markDirty();
for (uint32_t i = kMVKShaderStageVertex; i <= kMVKShaderStageFragment; i++) {
MVKResourcesCommandEncoderState::markDirty(_shaderStageResourceBindings[i].bufferBindings, _shaderStageResourceBindings[i].areBufferBindingsDirty);
MVKResourcesCommandEncoderState::markDirty(_shaderStageResourceBindings[i].textureBindings, _shaderStageResourceBindings[i].areTextureBindingsDirty);
@@ -646,7 +678,9 @@
void MVKGraphicsResourcesCommandEncoderState::encodeImpl(uint32_t stage) {
- MVKGraphicsPipeline* pipeline = (MVKGraphicsPipeline*)_cmdEncoder->_graphicsPipelineState.getPipeline();
+ encodeArgumentBufferResources();
+
+ MVKGraphicsPipeline* pipeline = (MVKGraphicsPipeline*)_cmdEncoder->_graphicsPipelineState.getPipeline();
bool fullImageViewSwizzle = pipeline->fullImageViewSwizzle() || _cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle;
bool forTessellation = pipeline->isTessellationPipeline();
@@ -809,7 +843,24 @@
}
}
+void MVKGraphicsResourcesCommandEncoderState::encodeArgumentBufferResources() {
+
+ encodeBinding<MVKMTLArgumentBufferResourceUsage>(_argumentBufferResourceUsage,
+ _areArgumentBufferResourceUsageDirty,
+ [](MVKCommandEncoder* cmdEncoder, MVKMTLArgumentBufferResourceUsage& abru)->void {
+ if (abru.mtlStages) {
+ auto* mtlEnc = cmdEncoder->_mtlRenderEncoder;
+ if ([mtlEnc respondsToSelector: @selector(useResource:usage:stages:)]) {
+ [mtlEnc useResource: abru.mtlResource usage: abru.mtlUsage stages: abru.mtlStages];
+ } else {
+ [mtlEnc useResource: abru.mtlResource usage: abru.mtlUsage];
+ }
+ }
+ });
+}
+
void MVKGraphicsResourcesCommandEncoderState::resetImpl() {
+ MVKResourcesCommandEncoderState::resetImpl();
for (uint32_t i = kMVKShaderStageVertex; i <= kMVKShaderStageFragment; i++) {
_shaderStageResourceBindings[i].reset();
}
@@ -845,7 +896,7 @@
// Mark everything as dirty
void MVKComputeResourcesCommandEncoderState::markDirty() {
- MVKCommandEncoderState::markDirty();
+ MVKResourcesCommandEncoderState::markDirty();
MVKResourcesCommandEncoderState::markDirty(_resourceBindings.bufferBindings, _resourceBindings.areBufferBindingsDirty);
MVKResourcesCommandEncoderState::markDirty(_resourceBindings.textureBindings, _resourceBindings.areTextureBindingsDirty);
MVKResourcesCommandEncoderState::markDirty(_resourceBindings.samplerStateBindings, _resourceBindings.areSamplerStateBindingsDirty);
@@ -853,6 +904,8 @@
void MVKComputeResourcesCommandEncoderState::encodeImpl(uint32_t) {
+ encodeArgumentBufferResources();
+
MVKPipeline* pipeline = _cmdEncoder->_computePipelineState.getPipeline();
bool fullImageViewSwizzle = pipeline ? pipeline->fullImageViewSwizzle() : false;
@@ -908,8 +961,18 @@
atIndex: b.index];
});
}
+void MVKComputeResourcesCommandEncoderState::encodeArgumentBufferResources() {
+
+ encodeBinding<MVKMTLArgumentBufferResourceUsage>(_argumentBufferResourceUsage,
+ _areArgumentBufferResourceUsageDirty,
+ [](MVKCommandEncoder* cmdEncoder, MVKMTLArgumentBufferResourceUsage& abru)->void {
+ auto* mtlEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch);
+ [mtlEnc useResource: abru.mtlResource usage: abru.mtlUsage];
+ });
+}
void MVKComputeResourcesCommandEncoderState::resetImpl() {
+ MVKResourcesCommandEncoderState::resetImpl();
_resourceBindings.reset();
}
diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
index a09386b..789aa0a 100644
--- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
@@ -18,6 +18,7 @@
#include "MVKMTLBufferAllocation.h"
#include "MVKLogging.h"
+#include <algorithm>
#pragma mark -
@@ -80,6 +81,9 @@
const MVKMTLBufferAllocation* MVKMTLBufferAllocator::acquireMTLBufferRegion(NSUInteger length) {
MVKAssert(length <= _maxAllocationLength, "This MVKMTLBufferAllocator has been configured to dispense MVKMTLBufferRegions no larger than %lu bytes.", (unsigned long)_maxAllocationLength);
+ // Can't allocate a segment smaller than the minimum MTLBuffer alignment.
+ length = std::max<NSUInteger>(length, _device->_pMetalFeatures->mtlBufferAlignment);
+
// Convert max length to the next power-of-two exponent to use as a lookup
NSUInteger p2Exp = mvkPowerOfTwoExponent(length);
MVKMTLBufferAllocationPool* pRP = _regionPools[p2Exp];
@@ -87,7 +91,7 @@
}
MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe) : MVKBaseDeviceObject(device) {
- _maxAllocationLength = maxRegionLength;
+ _maxAllocationLength = std::max<NSUInteger>(maxRegionLength, _device->_pMetalFeatures->mtlBufferAlignment);
_makeThreadSafe = makeThreadSafe;
// Convert max length to the next power-of-two exponent
diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h
index a530288..bf96e5e 100644
--- a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h
@@ -53,6 +53,14 @@
bool isInline = false;
} MVKMTLBufferBinding;
+/** Describes a MTLResource binding used in a Metal argument buffer. */
+typedef struct {
+ id<MTLResource> mtlResource = nil;
+ MTLResourceUsage mtlUsage = 0;
+ MTLRenderStages mtlStages = 0;
+ bool isDirty = true;
+} MVKMTLArgumentBufferResourceUsage;
+
/** Describes a MTLBuffer resource binding as used for an index buffer. */
typedef struct {
union { id<MTLBuffer> mtlBuffer = nil; id<MTLBuffer> mtlResource; }; // aliases
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
index 1339972..0065408 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
@@ -19,6 +19,7 @@
#pragma once
#include "MVKImage.h"
+#include "MVKMTLBufferAllocation.h"
#include "MVKSmallVector.h"
class MVKDescriptorSet;
@@ -30,12 +31,14 @@
/** Indicates the Metal resource indexes used by a single shader stage in a descriptor. */
typedef struct MVKShaderStageResourceBinding {
+ uint16_t resourceIndex = 0;
uint16_t bufferIndex = 0;
uint16_t textureIndex = 0;
uint16_t samplerIndex = 0;
MVKShaderStageResourceBinding operator+ (const MVKShaderStageResourceBinding& rhs);
MVKShaderStageResourceBinding& operator+= (const MVKShaderStageResourceBinding& rhs);
+ void addArgumentBuffer(const MVKShaderStageResourceBinding& rhs);
} MVKShaderStageResourceBinding;
@@ -44,7 +47,7 @@
/** Indicates the Metal resource indexes used by each shader stage in a descriptor. */
typedef struct MVKShaderResourceBinding {
- MVKShaderStageResourceBinding stages[kMVKShaderStageMax];
+ MVKShaderStageResourceBinding stages[kMVKShaderStageCount];
uint16_t getMaxBufferIndex();
uint16_t getMaxTextureIndex();
@@ -52,9 +55,25 @@
MVKShaderResourceBinding operator+ (const MVKShaderResourceBinding& rhs);
MVKShaderResourceBinding& operator+= (const MVKShaderResourceBinding& rhs);
+ void addArgumentBuffer(const MVKShaderResourceBinding& rhs);
} MVKShaderResourceBinding;
+/**
+ * If the shader stage binding has a binding defined for the specified stage, populates
+ * the context at the descriptor set binding from the shader stage resource binding.
+ */
+void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
+ MVKShaderStageResourceBinding& ssRB,
+ spv::ExecutionModel stage,
+ uint32_t descriptorSetIndex,
+ uint32_t bindingIndex,
+ uint32_t count,
+ MVKSampler* immutableSampler);
+
+/** Returns a bitwise OR of Metal render stages from the array of shader stages, which must have a length of kMVKShaderStageCount. */
+MTLRenderStages mvkMTLRenderStagesFromMVKShaderStages(bool stageEnabled[]);
+
#pragma mark -
#pragma mark MVKDescriptorSetLayoutBinding
@@ -83,7 +102,7 @@
* count provided to that descriptor set is returned. Otherwise returns the value
* defined in VkDescriptorSetLayoutBinding::descriptorCount.
*/
- uint32_t getDescriptorCount(MVKDescriptorSet* descSet);
+ uint32_t getDescriptorCount(MVKDescriptorSet* descSet = nullptr);
/** Returns the descriptor type of this layout. */
inline VkDescriptorType getDescriptorType() { return _info.descriptorType; }
@@ -108,33 +127,68 @@
const void* pData,
MVKShaderResourceBinding& dslMTLRezIdxOffsets);
- /** Populates the specified shader converter context, at the specified descriptor set binding. */
- void populateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
- MVKShaderResourceBinding& dslMTLRezIdxOffsets,
- uint32_t dslIndex);
+ /** Returns whether this binding layout is using an argument buffer. */
+ bool isUsingMetalArgumentBuffer() const;
+
+ /** Writes the buffer content to the Metal argument buffer. */
+ void writeToMetalArgumentBuffer(id<MTLBuffer> mtlBuffer,
+ NSUInteger offset,
+ uint32_t elementIndex);
+
+ /** Writes the texture content to the Metal argument buffer. */
+ void writeToMetalArgumentBuffer(id<MTLTexture> mtlTexture,
+ uint32_t planeCount,
+ uint32_t planeIndex,
+ uint32_t elementIndex);
+
+ /** Writes the sampler content to the Metal argument buffer. */
+ void writeToMetalArgumentBuffer(id<MTLSamplerState> mtlSamplerState,
+ uint32_t elementIndex);
+
+ /** Writes the inline data content to the Metal argument buffer. */
+ void writeToMetalArgumentBuffer(uint8_t* pSrcData,
+ NSUInteger dstOffset,
+ NSUInteger dataLen);
MVKDescriptorSetLayoutBinding(MVKDevice* device,
MVKDescriptorSetLayout* layout,
const VkDescriptorSetLayoutBinding* pBinding,
- VkDescriptorBindingFlagsEXT bindingFlags);
+ VkDescriptorBindingFlagsEXT bindingFlags,
+ uint32_t descriptorIndex);
MVKDescriptorSetLayoutBinding(const MVKDescriptorSetLayoutBinding& binding);
~MVKDescriptorSetLayoutBinding() override;
protected:
- friend class MVKInlineUniformBlockDescriptor;
+ friend class MVKDescriptorSetLayout;
+ friend class MVKInlineUniformBlockDescriptor;
+
+ inline uint32_t getDescriptorIndex(uint32_t elementIndex = 0) { return _descriptorIndex + elementIndex; }
+ inline uint32_t getMTLArgumentBufferIndex(uint32_t mvkShaderStage, uint32_t elementIndex = 0) {
+ return _argumentBufferIndex[mvkShaderStage] + elementIndex;
+ }
void initMetalResourceIndexOffsets(MVKShaderStageResourceBinding* pBindingIndexes,
MVKShaderStageResourceBinding* pDescSetCounts,
const VkDescriptorSetLayoutBinding* pBinding);
+ void addMTLArgumentDescriptors(uint32_t stage, NSMutableArray<MTLArgumentDescriptor*>* args, uint32_t& argIdx);
+ void addMTLArgumentDescriptor(NSMutableArray<MTLArgumentDescriptor*>* args,
+ MTLDataType dataType,
+ MTLArgumentAccess access,
+ uint32_t& argIdx);
bool validate(MVKSampler* mvkSampler);
+ void populateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
+ MVKShaderResourceBinding& dslMTLRezIdxOffsets,
+ uint32_t dslIndex);
MVKDescriptorSetLayout* _layout;
VkDescriptorSetLayoutBinding _info;
VkDescriptorBindingFlagsEXT _flags;
MVKSmallVector<MVKSampler*> _immutableSamplers;
+ uint32_t _descriptorIndex;
MVKShaderResourceBinding _mtlResourceIndexOffsets;
- bool _applyToStage[kMVKShaderStageMax];
+ uint32_t _argumentBufferIndex[kMVKShaderStageCount];
+ bool _applyToStage[kMVKShaderStageCount];
};
@@ -153,6 +207,7 @@
/** Encodes this descriptor (based on its layout binding index) on the the command encoder. */
virtual void bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
@@ -161,11 +216,13 @@
/**
* Updates the internal binding from the specified content. The format of the content depends
- * on the descriptor type, and is extracted from pData at the location given by index * stride.
+ * on the descriptor type, and is extracted from pData at the location given by srcIndex * stride.
* MVKInlineUniformBlockDescriptor uses the index as byte offset to write to.
*/
- virtual void write(MVKDescriptorSet* mvkDescSet,
- uint32_t index,
+ virtual void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
+ uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData) = 0;
@@ -180,8 +237,7 @@
* at which to start writing.
* MVKInlineUniformBlockDescriptor uses the index as byte offset to read from.
*/
- virtual void read(MVKDescriptorSet* mvkDescSet,
- uint32_t index,
+ virtual void read(uint32_t index,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
@@ -195,9 +251,13 @@
~MVKDescriptor() { reset(); }
+protected:
+ MTLResourceUsage getMTLResourceUsage();
+
};
+
#pragma mark -
#pragma mark MVKBufferDescriptor
@@ -206,19 +266,21 @@
public:
void bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) override;
- void write(MVKDescriptorSet* mvkDescSet,
+ void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData) override;
- void read(MVKDescriptorSet* mvkDescSet,
- uint32_t dstIndex,
+ void read(uint32_t dstIndex,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
@@ -281,33 +343,42 @@
VkDescriptorType getDescriptorType() override { return VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT; }
void bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) override;
- void write(MVKDescriptorSet* mvkDescSet,
+ void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t dstOffset, // For inline buffers we are using this parameter as dst offset not as src descIdx
+ uint32_t dstIndex,
size_t stride,
const void* pData) override;
- void read(MVKDescriptorSet* mvkDescSet,
- uint32_t srcOffset, // For inline buffers we are using this parameter as src offset not as dst descIdx
+ void read(uint32_t srcOffset, // For inline buffers we are using this parameter as src offset not as dst descIdx
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
VkWriteDescriptorSetInlineUniformBlockEXT* inlineUniformBlock) override;
-
- void setLayout(MVKDescriptorSetLayoutBinding* dslBinding, uint32_t index) override;
void reset() override;
+ /**
+ * Returns whether inline blocks should be embedded directly into a Metal argument buffer, instead of
+ * being held in an intermediaary MTLBuffer, with that MTLBuffer inserted into the Metal argument buffer.
+ */
+ static bool shouldEmbedInlineBlocksInMetalAgumentBuffer();
+
~MVKInlineUniformBlockDescriptor() { reset(); }
protected:
- uint8_t* _buffer = nullptr;
- uint32_t _length;
+ uint8_t* getData();
+
+ void* _buffer = nullptr;
+ uint32_t _length = 0;
+ bool _isUsingIntermediaryMTLBuffer = false;
};
@@ -319,19 +390,21 @@
public:
void bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) override;
- void write(MVKDescriptorSet* mvkDescSet,
+ void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData) override;
- void read(MVKDescriptorSet* mvkDescSet,
- uint32_t dstIndex,
+ void read(uint32_t dstIndex,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
@@ -388,19 +461,21 @@
protected:
void bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex);
- void write(MVKDescriptorSet* mvkDescSet,
+ void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData);
- void read(MVKDescriptorSet* mvkDescSet,
- uint32_t dstIndex,
+ void read(uint32_t dstIndex,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
@@ -408,6 +483,12 @@
void setLayout(MVKDescriptorSetLayoutBinding* dslBinding, uint32_t index);
+ /**
+ * Offset to the first sampler index in the argument buffer. Defaults to zero for simple sampler
+ * descriptors, but combined image/sampler descriptor will override to index samplers after textures.
+ */
+ virtual uint32_t getSamplerArgBufferIndexOffset(MVKDescriptorSetLayoutBinding* dslBinding) { return 0; }
+
void reset();
~MVKSamplerDescriptorMixin() { reset(); }
@@ -427,19 +508,21 @@
VkDescriptorType getDescriptorType() override { return VK_DESCRIPTOR_TYPE_SAMPLER; }
void bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) override;
- void write(MVKDescriptorSet* mvkDescSet,
+ void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData) override;
- void read(MVKDescriptorSet* mvkDescSet,
- uint32_t dstIndex,
+ void read(uint32_t dstIndex,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
@@ -464,19 +547,21 @@
VkDescriptorType getDescriptorType() override { return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; }
void bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) override;
- void write(MVKDescriptorSet* mvkDescSet,
+ void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData) override;
- void read(MVKDescriptorSet* mvkDescSet,
- uint32_t dstIndex,
+ void read(uint32_t dstIndex,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
@@ -484,6 +569,8 @@
void setLayout(MVKDescriptorSetLayoutBinding* dslBinding, uint32_t index) override;
+ uint32_t getSamplerArgBufferIndexOffset(MVKDescriptorSetLayoutBinding* dslBinding) override;
+
void reset() override;
~MVKCombinedImageSamplerDescriptor() { reset(); }
@@ -499,19 +586,21 @@
public:
void bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) override;
- void write(MVKDescriptorSet* mvkDescSet,
+ void write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData) override;
- void read(MVKDescriptorSet* mvkDescSet,
- uint32_t dstIndex,
+ void read(uint32_t dstIndex,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
index 189be4d..88ae6c5 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm
@@ -18,6 +18,7 @@
#include "MVKDescriptor.h"
#include "MVKDescriptorSet.h"
+#include "MVKCommandBuffer.h"
#include "MVKBuffer.h"
@@ -25,6 +26,7 @@
MVKShaderStageResourceBinding MVKShaderStageResourceBinding::operator+ (const MVKShaderStageResourceBinding& rhs) {
MVKShaderStageResourceBinding rslt;
+ rslt.resourceIndex = this->resourceIndex + rhs.resourceIndex;
rslt.bufferIndex = this->bufferIndex + rhs.bufferIndex;
rslt.textureIndex = this->textureIndex + rhs.textureIndex;
rslt.samplerIndex = this->samplerIndex + rhs.samplerIndex;
@@ -32,12 +34,19 @@
}
MVKShaderStageResourceBinding& MVKShaderStageResourceBinding::operator+= (const MVKShaderStageResourceBinding& rhs) {
+ this->resourceIndex += rhs.resourceIndex;
this->bufferIndex += rhs.bufferIndex;
this->textureIndex += rhs.textureIndex;
this->samplerIndex += rhs.samplerIndex;
return *this;
}
+void MVKShaderStageResourceBinding::addArgumentBuffer(const MVKShaderStageResourceBinding& rhs) {
+ bool isUsed = rhs.resourceIndex > 0;
+ this->bufferIndex += isUsed;
+ this->resourceIndex += isUsed;
+}
+
#pragma mark MVKShaderResourceBinding
@@ -55,19 +64,71 @@
MVKShaderResourceBinding MVKShaderResourceBinding::operator+ (const MVKShaderResourceBinding& rhs) {
MVKShaderResourceBinding rslt;
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
rslt.stages[i] = this->stages[i] + rhs.stages[i];
}
return rslt;
}
MVKShaderResourceBinding& MVKShaderResourceBinding::operator+= (const MVKShaderResourceBinding& rhs) {
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
this->stages[i] += rhs.stages[i];
}
return *this;
}
+void MVKShaderResourceBinding::addArgumentBuffer(const MVKShaderResourceBinding& rhs) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
+ this->stages[i].addArgumentBuffer(rhs.stages[i]);
+ }
+}
+
+void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
+ MVKShaderStageResourceBinding& ssRB,
+ spv::ExecutionModel stage,
+ uint32_t descriptorSetIndex,
+ uint32_t bindingIndex,
+ uint32_t count,
+ MVKSampler* immutableSampler) {
+ mvk::MSLResourceBinding rb;
+
+ auto& rbb = rb.resourceBinding;
+ rbb.stage = stage;
+ rbb.desc_set = descriptorSetIndex;
+ rbb.binding = bindingIndex;
+ rbb.count = count;
+ rbb.msl_buffer = ssRB.bufferIndex;
+ rbb.msl_texture = ssRB.textureIndex;
+ rbb.msl_sampler = ssRB.samplerIndex;
+
+ if (immutableSampler) { immutableSampler->getConstexprSampler(rb); }
+
+ context.resourceBindings.push_back(rb);
+}
+
+MTLRenderStages mvkMTLRenderStagesFromMVKShaderStages(bool stageEnabled[]) {
+ MTLRenderStages mtlStages = 0;
+ for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+ if (stageEnabled[stage]) {
+ switch (stage) {
+ case kMVKShaderStageVertex:
+ case kMVKShaderStageTessCtl:
+ case kMVKShaderStageTessEval:
+ mtlStages |= MTLRenderStageVertex;
+ break;
+
+ case kMVKShaderStageFragment:
+ mtlStages |= MTLRenderStageFragment;
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+ return mtlStages;
+}
+
#pragma mark -
#pragma mark MVKDescriptorSetLayoutBinding
@@ -80,7 +141,7 @@
return 1;
}
- if (descSet && hasVariableDescriptorCount()) {
+ if (descSet && mvkIsAnyFlagEnabled(_flags, VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
return descSet->_variableDescriptorCount;
}
@@ -106,7 +167,7 @@
for (uint32_t descIdx = 0; descIdx < descCnt; descIdx++) {
MVKDescriptor* mvkDesc = descSet->getDescriptor(getBinding(), descIdx);
if (mvkDesc->getDescriptorType() == descType) {
- mvkDesc->bind(cmdEncoder, descIdx, _applyToStage, mtlIdxs, dynamicOffsets, dynamicOffsetIndex);
+ mvkDesc->bind(cmdEncoder, this, descIdx, _applyToStage, mtlIdxs, dynamicOffsets, dynamicOffsetIndex);
}
}
}
@@ -166,14 +227,10 @@
else
bb.size = (uint32_t)bufferInfo.range;
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
if (_applyToStage[i]) {
bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
- }
+ if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
}
}
break;
@@ -184,14 +241,10 @@
bb.mtlBytes = inlineUniformBlock.pData;
bb.size = inlineUniformBlock.dataSize;
bb.isInline = true;
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
if (_applyToStage[i]) {
bb.index = mtlIdxs.stages[i].bufferIndex;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
- }
+ if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
}
}
break;
@@ -213,21 +266,13 @@
bb.offset = mtlTex.bufferOffset;
bb.size = (uint32_t)(mtlTex.height * mtlTex.bufferBytesPerRow);
}
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
if (_applyToStage[i]) {
tb.index = mtlIdxs.stages[i].textureIndex + rezIdx + planeIndex;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindTexture(tb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindTexture(MVKShaderStage(i), tb); }
- }
+ if (cmdEncoder) { cmdEncoder->bindTexture(tb, MVKShaderStage(i)); }
if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
- }
+ if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
}
}
}
@@ -246,21 +291,13 @@
bb.offset = mtlTex.bufferOffset;
bb.size = (uint32_t)(mtlTex.height * mtlTex.bufferBytesPerRow);
}
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
if (_applyToStage[i]) {
tb.index = mtlIdxs.stages[i].textureIndex + rezIdx;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindTexture(tb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindTexture(MVKShaderStage(i), tb); }
- }
+ if (cmdEncoder) { cmdEncoder->bindTexture(tb, MVKShaderStage(i)); }
if (_info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
bb.index = mtlIdxs.stages[i].bufferIndex + rezIdx;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
- }
+ if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
}
}
}
@@ -276,14 +313,10 @@
sampler = _immutableSamplers[rezIdx];
}
sb.mtlSamplerState = sampler->getMTLSamplerState();
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
if (_applyToStage[i]) {
sb.index = mtlIdxs.stages[i].samplerIndex + rezIdx;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindSamplerState(sb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindSamplerState(MVKShaderStage(i), sb); }
- }
+ if (cmdEncoder) { cmdEncoder->bindSamplerState(sb, MVKShaderStage(i)); }
}
}
break;
@@ -304,17 +337,14 @@
sampler = _immutableSamplers[rezIdx];
}
sb.mtlSamplerState = sampler->getMTLSamplerState();
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
if (_applyToStage[i]) {
tb.index = mtlIdxs.stages[i].textureIndex + rezIdx + planeIndex;
sb.index = mtlIdxs.stages[i].samplerIndex + rezIdx;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindTexture(tb); }
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindSamplerState(sb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindTexture(MVKShaderStage(i), tb); }
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindSamplerState(MVKShaderStage(i), sb); }
- }
+ if (cmdEncoder) {
+ cmdEncoder->bindTexture(tb, MVKShaderStage(i));
+ cmdEncoder->bindSamplerState(sb, MVKShaderStage(i));
+ }
}
}
}
@@ -335,6 +365,146 @@
}
}
+bool MVKDescriptorSetLayoutBinding::isUsingMetalArgumentBuffer() const { return _layout->isUsingMetalArgumentBuffer(); };
+
+// Adds MTLArgumentDescriptors to the array, and updates resource indexes consumed.
+void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptors(uint32_t stage,
+ NSMutableArray<MTLArgumentDescriptor*>* args,
+ uint32_t& argIdx) {
+ if ( !_applyToStage[stage]) { return; }
+
+ _argumentBufferIndex[stage] = argIdx;
+
+ switch (getDescriptorType()) {
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ addMTLArgumentDescriptor(args, MTLDataTypePointer, MTLArgumentAccessReadOnly, argIdx);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ addMTLArgumentDescriptor(args, MTLDataTypePointer, MTLArgumentAccessReadWrite, argIdx);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+ if (MVKInlineUniformBlockDescriptor::shouldEmbedInlineBlocksInMetalAgumentBuffer()) {
+ addMTLArgumentDescriptor(args, MTLDataTypeUChar, MTLArgumentAccessReadOnly, argIdx);
+ } else {
+ addMTLArgumentDescriptor(args, MTLDataTypePointer, MTLArgumentAccessReadOnly, argIdx);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ addMTLArgumentDescriptor(args, MTLDataTypeTexture, MTLArgumentAccessReadOnly, argIdx);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ addMTLArgumentDescriptor(args, MTLDataTypeTexture, MTLArgumentAccessReadWrite, argIdx);
+// addMTLArgumentDescriptor(args, MTLDataTypePointer, MTLArgumentAccessReadWrite, argIdx); // Needed for atomic operations
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ addMTLArgumentDescriptor(args, MTLDataTypeTexture, MTLArgumentAccessReadOnly, argIdx);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ addMTLArgumentDescriptor(args, MTLDataTypeTexture, MTLArgumentAccessReadWrite, argIdx);
+// addMTLArgumentDescriptor(args, MTLDataTypePointer, MTLArgumentAccessReadWrite, argIdx); // Needed for atomic operations
+ break;
+
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ addMTLArgumentDescriptor(args, MTLDataTypeSampler, MTLArgumentAccessReadOnly, argIdx);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ addMTLArgumentDescriptor(args, MTLDataTypeTexture, MTLArgumentAccessReadOnly, argIdx);
+ addMTLArgumentDescriptor(args, MTLDataTypeSampler, MTLArgumentAccessReadOnly, argIdx);
+ break;
+
+ default:
+ break;
+ }
+}
+
+// Adds an MTLArgumentDescriptor if the specified type to the array, and updates resource indexes consumed.
+void MVKDescriptorSetLayoutBinding::addMTLArgumentDescriptor(NSMutableArray<MTLArgumentDescriptor*>* args,
+ MTLDataType dataType,
+ MTLArgumentAccess access,
+ uint32_t& argIdx) {
+
+ NSUInteger mtlArgDescAryLen = ((_info.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT &&
+ MVKInlineUniformBlockDescriptor::shouldEmbedInlineBlocksInMetalAgumentBuffer())
+ ? _info.descriptorCount : getDescriptorCount());
+
+ auto* argDesc = [MTLArgumentDescriptor argumentDescriptor];
+ argDesc.dataType = dataType;
+ argDesc.access = access;
+ argDesc.index = argIdx;
+ argDesc.arrayLength = mtlArgDescAryLen;
+ argDesc.textureType = MTLTextureType2D;
+
+ [args addObject: argDesc];
+ argIdx += getDescriptorCount();
+}
+
+void MVKDescriptorSetLayoutBinding::writeToMetalArgumentBuffer(id<MTLBuffer> mtlBuffer,
+ NSUInteger offset,
+ uint32_t elementIndex) {
+ if ( !isUsingMetalArgumentBuffer() || !mtlBuffer ) { return; }
+
+ for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+ if (_applyToStage[stage]) {
+ uint32_t argIdx = getMTLArgumentBufferIndex(stage, elementIndex);
+ [_layout->_argumentEncoder[stage].mtlArgumentEncoder setBuffer: mtlBuffer offset: offset atIndex: argIdx];
+ }
+ }
+}
+
+void MVKDescriptorSetLayoutBinding::writeToMetalArgumentBuffer(id<MTLTexture> mtlTexture,
+ uint32_t planeCount,
+ uint32_t planeIndex,
+ uint32_t elementIndex) {
+ if ( !isUsingMetalArgumentBuffer() || !mtlTexture ) { return; }
+
+ for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+ if (_applyToStage[stage]) {
+ uint32_t argIdx = getMTLArgumentBufferIndex(stage, elementIndex * planeCount + planeIndex);
+ [_layout->_argumentEncoder[stage].mtlArgumentEncoder setTexture: mtlTexture atIndex: argIdx];
+ }
+ }
+}
+
+void MVKDescriptorSetLayoutBinding::writeToMetalArgumentBuffer(id<MTLSamplerState> mtlSamplerState,
+ uint32_t elementIndex) {
+ if ( !isUsingMetalArgumentBuffer() ) { return; }
+
+ // Metal requires sampler, so get default if not provided.
+ if ( !mtlSamplerState ) { mtlSamplerState = getDevice()->getDefaultMTLSamplerState(); }
+
+ for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+ if (_applyToStage[stage]) {
+ uint32_t argIdx = getMTLArgumentBufferIndex(stage, elementIndex);
+ [_layout->_argumentEncoder[stage].mtlArgumentEncoder setSamplerState: mtlSamplerState atIndex: argIdx];
+ }
+ }
+}
+
+void MVKDescriptorSetLayoutBinding::writeToMetalArgumentBuffer(uint8_t* pSrcData,
+ NSUInteger dstOffset,
+ NSUInteger dataLen) {
+ if ( !isUsingMetalArgumentBuffer() || !pSrcData ) { return; }
+
+ for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+ if (_applyToStage[stage]) {
+ uint32_t argIdx = getMTLArgumentBufferIndex(stage);
+ uint8_t* pDstData = (uint8_t*)[_layout->_argumentEncoder[stage].mtlArgumentEncoder constantDataAtIndex: argIdx];
+ if (pDstData) { memcpy(pDstData + dstOffset, pSrcData, dataLen); }
+ }
+ }
+}
+
// If depth compare is required, but unavailable on the device, the sampler can only be used as an immutable sampler
bool MVKDescriptorSetLayoutBinding::validate(MVKSampler* mvkSampler) {
if (mvkSampler->getRequiresConstExprSampler()) {
@@ -360,31 +530,44 @@
spv::ExecutionModelFragment,
spv::ExecutionModelGLCompute
};
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
if (_applyToStage[i]) {
mvkPopulateShaderConverterContext(context,
mtlIdxs.stages[i],
models[i],
dslIndex,
_info.binding,
- getDescriptorCount(nullptr),
+ getDescriptorCount(),
mvkSamp);
- }
+
+ // If Metal argument buffers are in use, identify any inline uniform block bindings.
+ if (_info.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT &&
+ isUsingMetalArgumentBuffer() &&
+ MVKInlineUniformBlockDescriptor::shouldEmbedInlineBlocksInMetalAgumentBuffer()) {
+
+ mvk::DescriptorBinding db;
+ db.descriptorSet = dslIndex;
+ db.binding = _info.binding;
+ context.inlineUniformBlocks.push_back(db);
+ }
+ }
}
}
MVKDescriptorSetLayoutBinding::MVKDescriptorSetLayoutBinding(MVKDevice* device,
MVKDescriptorSetLayout* layout,
const VkDescriptorSetLayoutBinding* pBinding,
- VkDescriptorBindingFlagsEXT bindingFlags) :
+ VkDescriptorBindingFlagsEXT bindingFlags,
+ uint32_t descriptorIndex) :
MVKBaseDeviceObject(device),
_layout(layout),
_info(*pBinding),
- _flags(bindingFlags) {
+ _flags(bindingFlags),
+ _descriptorIndex(descriptorIndex) {
_info.pImmutableSamplers = nullptr; // Remove dangling pointer
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
// Determine if this binding is used by this shader stage
_applyToStage[i] = mvkAreAllFlagsEnabled(pBinding->stageFlags, mvkVkShaderStageFlagBitsFromMVKShaderStage(MVKShaderStage(i)));
// If this binding is used by the shader, set the Metal resource index
@@ -412,10 +595,11 @@
_layout(binding._layout),
_info(binding._info),
_flags(binding._flags),
+ _descriptorIndex(binding._descriptorIndex),
_immutableSamplers(binding._immutableSamplers),
_mtlResourceIndexOffsets(binding._mtlResourceIndexOffsets) {
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
_applyToStage[i] = binding._applyToStage[i];
}
for (MVKSampler* sampler : _immutableSamplers) {
@@ -434,10 +618,21 @@
void MVKDescriptorSetLayoutBinding::initMetalResourceIndexOffsets(MVKShaderStageResourceBinding* pBindingIndexes,
MVKShaderStageResourceBinding* pDescSetCounts,
const VkDescriptorSetLayoutBinding* pBinding) {
+
+ // Sets an index offset and updates both that index and the general resource index.
+ // Can be used multiply for combined multi-resource descriptor types.
+# define setResourceIndexOffset(rezIdx) \
+ do { \
+ pBindingIndexes->rezIdx = isUsingMetalArgumentBuffer() ? pDescSetCounts->resourceIndex : pDescSetCounts->rezIdx; \
+ pDescSetCounts->rezIdx += descCnt; \
+ pBindingIndexes->resourceIndex = pDescSetCounts->resourceIndex; \
+ pDescSetCounts->resourceIndex += descCnt; \
+ } while(false)
+
+ uint32_t descCnt = pBinding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT ? 1 : pBinding->descriptorCount;
switch (pBinding->descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
- pBindingIndexes->samplerIndex = pDescSetCounts->samplerIndex;
- pDescSetCounts->samplerIndex += pBinding->descriptorCount;
+ setResourceIndexOffset(samplerIndex);
if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfSamplers) {
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of samplers.", _device->getName()));
@@ -445,10 +640,8 @@
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- pBindingIndexes->textureIndex = pDescSetCounts->textureIndex;
- pDescSetCounts->textureIndex += pBinding->descriptorCount;
- pBindingIndexes->samplerIndex = pDescSetCounts->samplerIndex;
- pDescSetCounts->samplerIndex += pBinding->descriptorCount;
+ setResourceIndexOffset(textureIndex);
+ setResourceIndexOffset(samplerIndex);
if (pBinding->descriptorCount > 1) {
if ( !_device->_pMetalFeatures->arrayOfTextures ) {
@@ -474,14 +667,12 @@
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- pBindingIndexes->bufferIndex = pDescSetCounts->bufferIndex;
- pDescSetCounts->bufferIndex += pBinding->descriptorCount;
+ setResourceIndexOffset(bufferIndex);
// fallthrough
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- pBindingIndexes->textureIndex = pDescSetCounts->textureIndex;
- pDescSetCounts->textureIndex += pBinding->descriptorCount;
+ setResourceIndexOffset(textureIndex);
if (pBinding->descriptorCount > 1 && !_device->_pMetalFeatures->arrayOfTextures) {
_layout->setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Device %s does not support arrays of textures.", _device->getName()));
@@ -492,13 +683,8 @@
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
- pBindingIndexes->bufferIndex = pDescSetCounts->bufferIndex;
- pDescSetCounts->bufferIndex += pBinding->descriptorCount;
- break;
-
- case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
- pBindingIndexes->bufferIndex = pDescSetCounts->bufferIndex;
- pDescSetCounts->bufferIndex += 1;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+ setResourceIndexOffset(bufferIndex);
break;
default:
@@ -508,10 +694,36 @@
#pragma mark -
+#pragma mark MVKDescriptor
+
+MTLResourceUsage MVKDescriptor::getMTLResourceUsage() {
+ MTLResourceUsage mtlUsage = MTLResourceUsageRead;
+ switch (getDescriptorType()) {
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ mtlUsage |= MTLResourceUsageSample;
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ mtlUsage |= MTLResourceUsageWrite;
+ break;
+
+ default:
+ break;
+ }
+ return mtlUsage;
+}
+
+
+#pragma mark -
#pragma mark MVKBufferDescriptor
// A null cmdEncoder can be passed to perform a validation pass
void MVKBufferDescriptor::bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
@@ -534,20 +746,33 @@
else
bb.size = (uint32_t)_buffRange;
}
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
- if (stages[i]) {
- bb.index = mtlIndexes.stages[i].bufferIndex + descriptorIndex;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
+
+ // Update the Metal argument buffer entry with the dynamic offset
+ if (descType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || descType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
+ mvkDSLBind->writeToMetalArgumentBuffer(bb.mtlBuffer, bb.offset, descriptorIndex);
+ }
+
+ if (mvkDSLBind->isUsingMetalArgumentBuffer()) {
+ MVKMTLArgumentBufferResourceUsage abru;
+ abru.mtlResource = bb.mtlResource;
+ abru.mtlUsage = getMTLResourceUsage();
+ abru.mtlStages = mvkMTLRenderStagesFromMVKShaderStages(stages);
+ if (cmdEncoder) { cmdEncoder->useArgumentBufferResource(abru, stages[kMVKShaderStageCompute]); }
+ } else {
+ // If not using Metal argument buffer, bind discretely
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
+ if (stages[i]) {
+ bb.index = mtlIndexes.stages[i].bufferIndex + descriptorIndex;
+ if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
}
}
}
}
-void MVKBufferDescriptor::write(MVKDescriptorSet* mvkDescSet,
+void MVKBufferDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData) {
auto* oldBuff = _mvkBuffer;
@@ -559,10 +784,14 @@
if (_mvkBuffer) { _mvkBuffer->retain(); }
if (oldBuff) { oldBuff->release(); }
+
+ // Update the Metal argument buffer entry
+ id<MTLBuffer> mtlBuffer = _mvkBuffer ? _mvkBuffer->getMTLBuffer() : nil;
+ NSUInteger mtlBuffOffset = _mvkBuffer ? _mvkBuffer->getMTLBufferOffset() + _buffOffset : 0;
+ mvkDSLBind->writeToMetalArgumentBuffer(mtlBuffer, mtlBuffOffset, dstIndex);
}
-void MVKBufferDescriptor::read(MVKDescriptorSet* mvkDescSet,
- uint32_t dstIndex,
+void MVKBufferDescriptor::read(uint32_t dstIndex,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
@@ -587,66 +816,120 @@
// A null cmdEncoder can be passed to perform a validation pass
void MVKInlineUniformBlockDescriptor::bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) {
- MVKMTLBufferBinding bb;
- bb.mtlBytes = _buffer;
- bb.size = _length;
- bb.isInline = true;
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
- if (stages[i]) {
- bb.index = mtlIndexes.stages[i].bufferIndex;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
+
+ if (mvkDSLBind->isUsingMetalArgumentBuffer()) {
+ if (cmdEncoder && _isUsingIntermediaryMTLBuffer) {
+ MVKMTLArgumentBufferResourceUsage abru;
+ abru.mtlResource = ((MVKMTLBufferAllocation*)_buffer)->_mtlBuffer;
+ abru.mtlUsage = getMTLResourceUsage();
+ abru.mtlStages = mvkMTLRenderStagesFromMVKShaderStages(stages);
+ cmdEncoder->useArgumentBufferResource(abru, stages[kMVKShaderStageCompute]);
+ }
+ } else {
+ // If not using Metal argument buffer, bind discretely
+ MVKMTLBufferBinding bb;
+ bb.mtlBytes = getData();
+ bb.size = _length;
+ bb.isInline = true;
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
+ if (stages[i]) {
+ bb.index = mtlIndexes.stages[i].bufferIndex;
+ if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
}
}
}
}
-void MVKInlineUniformBlockDescriptor::write(MVKDescriptorSet* mvkDescSet,
+void MVKInlineUniformBlockDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t dstOffset,
+ uint32_t dstIndex,
size_t stride,
const void* pData) {
+ // Ensure there is a destination to write to
+ if ( !_buffer ) {
+ _length = mvkDSLBind->_info.descriptorCount;
+ _isUsingIntermediaryMTLBuffer = mvkDSLBind->supportsMetalArgumentBuffers() && !shouldEmbedInlineBlocksInMetalAgumentBuffer();
+ if (_isUsingIntermediaryMTLBuffer) {
+ // Acquire an intermediary buffer and write it to the Metal argument buffer
+ auto* mtlBuffRgn = descSet->acquireMTLBufferRegion(_length);
+ _buffer = (void*)mtlBuffRgn;
+ mvkDSLBind->writeToMetalArgumentBuffer(mtlBuffRgn->_mtlBuffer, mtlBuffRgn->_offset, dstIndex);
+ } else {
+ _buffer = malloc(_length);
+ }
+ }
+
const auto& pInlineUniformBlock = *(VkWriteDescriptorSetInlineUniformBlockEXT*)pData;
- if (pInlineUniformBlock.pData && _buffer) {
- memcpy(_buffer + dstOffset, pInlineUniformBlock.pData, pInlineUniformBlock.dataSize);
+ uint8_t* data = getData();
+ if (data && pInlineUniformBlock.pData && dstOffset < _length) {
+ uint32_t dataLen = std::min(pInlineUniformBlock.dataSize, _length - dstOffset);
+ memcpy(data + dstOffset, pInlineUniformBlock.pData, dataLen);
+
+ // If using intermediary buffer, it only needs to be written to Metal argument buffer once.
+ // If writing content directly to Metal argument buffer, update that content.
+ if ( !_isUsingIntermediaryMTLBuffer ) {
+ mvkDSLBind->writeToMetalArgumentBuffer((uint8_t*)pInlineUniformBlock.pData, dstOffset, dataLen);
+ }
}
}
-void MVKInlineUniformBlockDescriptor::read(MVKDescriptorSet* mvkDescSet,
- uint32_t srcOffset,
+void MVKInlineUniformBlockDescriptor::read(uint32_t srcOffset,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock) {
- if (_buffer && pInlineUniformBlock->pData) {
- memcpy((void*)pInlineUniformBlock->pData, _buffer + srcOffset, pInlineUniformBlock->dataSize);
+ uint8_t* data = getData();
+ if (data && pInlineUniformBlock->pData && srcOffset < _length) {
+ uint32_t dataLen = std::min(pInlineUniformBlock->dataSize, _length - srcOffset);
+ memcpy((void*)pInlineUniformBlock->pData, data + srcOffset, dataLen);
}
}
-void MVKInlineUniformBlockDescriptor::setLayout(MVKDescriptorSetLayoutBinding* dslBinding, uint32_t index) {
- _length = dslBinding->_info.descriptorCount;
- _buffer = (uint8_t*)malloc(_length);
-}
-
void MVKInlineUniformBlockDescriptor::reset() {
- free(_buffer);
+ if (_isUsingIntermediaryMTLBuffer) {
+ if (_buffer) { ((MVKMTLBufferAllocation*)_buffer)->returnToPool(); }
+ } else {
+ free(_buffer);
+ }
_buffer = nullptr;
_length = 0;
+ _isUsingIntermediaryMTLBuffer = false;
MVKDescriptor::reset();
}
+uint8_t* MVKInlineUniformBlockDescriptor::getData() {
+ return (uint8_t*)((_isUsingIntermediaryMTLBuffer && _buffer) ? ((MVKMTLBufferAllocation*)_buffer)->getContents() : _buffer);
+}
+
+// We do this once lazily instead of in a library constructor function to
+// ensure the NSProcessInfo environment is available when called upon.
+bool MVKInlineUniformBlockDescriptor::shouldEmbedInlineBlocksInMetalAgumentBuffer() {
+# ifndef MVK_CONFIG_EMBED_INLINE_BLOCKS_IN_METAL_ARGUMENT_BUFFER
+# define MVK_CONFIG_EMBED_INLINE_BLOCKS_IN_METAL_ARGUMENT_BUFFER 0
+# endif
+ static bool _shouldEmbedInlineBlocksInMetalAgumentBuffer = MVK_CONFIG_EMBED_INLINE_BLOCKS_IN_METAL_ARGUMENT_BUFFER;
+ static bool _shouldEmbedInlineBlocksInMetalAgumentBufferInitialized = false;
+ if ( !_shouldEmbedInlineBlocksInMetalAgumentBufferInitialized ) {
+ _shouldEmbedInlineBlocksInMetalAgumentBufferInitialized = true;
+ MVK_SET_FROM_ENV_OR_BUILD_BOOL(_shouldEmbedInlineBlocksInMetalAgumentBuffer, MVK_CONFIG_EMBED_INLINE_BLOCKS_IN_METAL_ARGUMENT_BUFFER);
+ }
+ return _shouldEmbedInlineBlocksInMetalAgumentBuffer;
+}
+
#pragma mark -
#pragma mark MVKImageDescriptor
// A null cmdEncoder can be passed to perform a validation pass
void MVKImageDescriptor::bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
@@ -654,7 +937,7 @@
uint32_t& dynamicOffsetIndex) {
VkDescriptorType descType = getDescriptorType();
- uint8_t planeCount = (_mvkImageView) ? _mvkImageView->getPlaneCount() : 1;
+ uint8_t planeCount = _mvkImageView ? _mvkImageView->getPlaneCount() : 1;
for (uint8_t planeIndex = 0; planeIndex < planeCount; planeIndex++) {
MVKMTLTextureBinding tb;
MVKMTLBufferBinding bb;
@@ -672,29 +955,39 @@
bb.offset = mtlTex.bufferOffset;
bb.size = (uint32_t)(mtlTex.height * mtlTex.bufferBytesPerRow);
}
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
- if (stages[i]) {
- tb.index = mtlIndexes.stages[i].textureIndex + descriptorIndex + planeIndex;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindTexture(tb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindTexture(MVKShaderStage(i), tb); }
- }
- if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
- bb.index = mtlIndexes.stages[i].bufferIndex + descriptorIndex + planeIndex;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
- }
- }
- }
- }
+
+ if (mvkDSLBind->isUsingMetalArgumentBuffer()) {
+ MVKMTLArgumentBufferResourceUsage abru;
+ abru.mtlResource = tb.mtlResource;
+ abru.mtlUsage = getMTLResourceUsage();
+ abru.mtlStages = mvkMTLRenderStagesFromMVKShaderStages(stages);
+ if (cmdEncoder) { cmdEncoder->useArgumentBufferResource(abru, stages[kMVKShaderStageCompute]); }
+
+// Needed for atomic operations
+// if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
+// abru.mtlResource = bb.mtlResource;
+// if (cmdEncoder) { cmdEncoder->useArgumentBufferResource(abru, stages[kMVKShaderStageCompute]); }
+// }
+ } else {
+ // If not using Metal argument buffer, bind discretely
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
+ if (stages[i]) {
+ tb.index = mtlIndexes.stages[i].textureIndex + (descriptorIndex * planeCount) + planeIndex;
+ if (cmdEncoder) { cmdEncoder->bindTexture(tb, MVKShaderStage(i)); }
+ if (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
+ bb.index = mtlIndexes.stages[i].bufferIndex + (descriptorIndex * planeCount) + planeIndex;
+ if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
+ }
+ }
+ }
+ }
}
}
-void MVKImageDescriptor::write(MVKDescriptorSet* mvkDescSet,
+void MVKImageDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData) {
auto* oldImgView = _mvkImageView;
@@ -705,10 +998,23 @@
if (_mvkImageView) { _mvkImageView->retain(); }
if (oldImgView) { oldImgView->release(); }
+
+ // Update the Metal argument buffer entry
+ uint8_t planeCount = _mvkImageView ? _mvkImageView->getPlaneCount() : 1;
+ for (uint8_t planeIndex = 0; planeIndex < planeCount; planeIndex++) {
+ id<MTLTexture> mtlTexture = _mvkImageView ? _mvkImageView->getMTLTexture(planeIndex) : nil;
+ mvkDSLBind->writeToMetalArgumentBuffer(mtlTexture, planeCount, planeIndex, dstIndex);
+
+// Needed for atomic operations
+// if (getDescriptorType() == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
+// id<MTLTexture> baseMTLTex = mtlTexture.parentTexture ? mtlTexture.parentTexture : mtlTexture;
+// uint32_t buffArgIdx = mvkDSLBind->getDescriptorCount() * planeCount + dstIndex;
+// mvkDSLBind->writeToMetalArgumentBuffer(baseMTLTex.buffer, baseMTLTex.bufferOffset, buffArgIdx);
+// }
+ }
}
-void MVKImageDescriptor::read(MVKDescriptorSet* mvkDescSet,
- uint32_t dstIndex,
+void MVKImageDescriptor::read(uint32_t dstIndex,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
@@ -733,29 +1039,38 @@
// Metal validation requires each sampler in an array of samplers to be populated,
// even if not used, so populate a default if one hasn't been set.
void MVKSamplerDescriptorMixin::bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) {
- MVKMTLSamplerStateBinding sb;
- sb.mtlSamplerState = (_mvkSampler
- ? _mvkSampler->getMTLSamplerState()
- : cmdEncoder->getDevice()->getDefaultMTLSamplerState());
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
- if (stages[i]) {
- sb.index = mtlIndexes.stages[i].samplerIndex + descriptorIndex;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindSamplerState(sb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindSamplerState(MVKShaderStage(i), sb); }
+
+ if (mvkDSLBind->isUsingMetalArgumentBuffer()) {
+ // Write any immutable sampler to the argument buffer now
+ if ( !_hasDynamicSampler ) {
+ id<MTLSamplerState> mtlSampler = _mvkSampler ? _mvkSampler->getMTLSamplerState() : nil;
+ uint32_t argBuffIdx = getSamplerArgBufferIndexOffset(mvkDSLBind) + descriptorIndex;
+ mvkDSLBind->writeToMetalArgumentBuffer(mtlSampler, argBuffIdx);
+ }
+ } else {
+ MVKMTLSamplerStateBinding sb;
+ sb.mtlSamplerState = (_mvkSampler
+ ? _mvkSampler->getMTLSamplerState()
+ : cmdEncoder->getDevice()->getDefaultMTLSamplerState());
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
+ if (stages[i]) {
+ sb.index = mtlIndexes.stages[i].samplerIndex + descriptorIndex;
+ if (cmdEncoder) { cmdEncoder->bindSamplerState(sb, MVKShaderStage(i)); }
}
}
}
}
-void MVKSamplerDescriptorMixin::write(MVKDescriptorSet* mvkDescSet,
+void MVKSamplerDescriptorMixin::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData) {
if (_hasDynamicSampler) {
@@ -769,11 +1084,15 @@
if (_mvkSampler) { _mvkSampler->retain(); }
if (oldSamp) { oldSamp->release(); }
+
+ // Update the Metal argument buffer entry
+ id<MTLSamplerState> mtlSampler = _mvkSampler ? _mvkSampler->getMTLSamplerState() : nil;
+ uint32_t argBuffIdx = getSamplerArgBufferIndexOffset(mvkDSLBind) + dstIndex;
+ mvkDSLBind->writeToMetalArgumentBuffer(mtlSampler, argBuffIdx);
}
}
-void MVKSamplerDescriptorMixin::read(MVKDescriptorSet* mvkDescSet,
- uint32_t dstIndex,
+void MVKSamplerDescriptorMixin::read(uint32_t dstIndex,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
@@ -806,28 +1125,30 @@
// A null cmdEncoder can be passed to perform a validation pass
void MVKSamplerDescriptor::bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) {
- MVKSamplerDescriptorMixin::bind(cmdEncoder, descriptorIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
+ MVKSamplerDescriptorMixin::bind(cmdEncoder, mvkDSLBind, descriptorIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
}
-void MVKSamplerDescriptor::write(MVKDescriptorSet* mvkDescSet,
+void MVKSamplerDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData) {
- MVKSamplerDescriptorMixin::write(mvkDescSet, srcIndex, stride, pData);
+ MVKSamplerDescriptorMixin::write(mvkDSLBind, descSet, srcIndex, dstIndex, stride, pData);
}
-void MVKSamplerDescriptor::read(MVKDescriptorSet* mvkDescSet,
- uint32_t dstIndex,
+void MVKSamplerDescriptor::read(uint32_t dstIndex,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock) {
- MVKSamplerDescriptorMixin::read(mvkDescSet, dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
+ MVKSamplerDescriptorMixin::read(dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
}
void MVKSamplerDescriptor::setLayout(MVKDescriptorSetLayoutBinding* dslBinding, uint32_t index) {
@@ -846,31 +1167,33 @@
// A null cmdEncoder can be passed to perform a validation pass
void MVKCombinedImageSamplerDescriptor::bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) {
- MVKImageDescriptor::bind(cmdEncoder, descriptorIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
- MVKSamplerDescriptorMixin::bind(cmdEncoder, descriptorIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
+ MVKImageDescriptor::bind(cmdEncoder, mvkDSLBind, descriptorIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
+ MVKSamplerDescriptorMixin::bind(cmdEncoder, mvkDSLBind, descriptorIndex, stages, mtlIndexes, dynamicOffsets, dynamicOffsetIndex);
}
-void MVKCombinedImageSamplerDescriptor::write(MVKDescriptorSet* mvkDescSet,
+void MVKCombinedImageSamplerDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData) {
- MVKImageDescriptor::write(mvkDescSet, srcIndex, stride, pData);
- MVKSamplerDescriptorMixin::write(mvkDescSet, srcIndex, stride, pData);
+ MVKImageDescriptor::write(mvkDSLBind, descSet, srcIndex, dstIndex, stride, pData);
+ MVKSamplerDescriptorMixin::write(mvkDSLBind, descSet, srcIndex, dstIndex, stride, pData);
}
-void MVKCombinedImageSamplerDescriptor::read(MVKDescriptorSet* mvkDescSet,
- uint32_t dstIndex,
+void MVKCombinedImageSamplerDescriptor::read(uint32_t dstIndex,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock) {
- MVKImageDescriptor::read(mvkDescSet, dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
- MVKSamplerDescriptorMixin::read(mvkDescSet, dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
+ MVKImageDescriptor::read(dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
+ MVKSamplerDescriptorMixin::read(dstIndex, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
}
void MVKCombinedImageSamplerDescriptor::setLayout(MVKDescriptorSetLayoutBinding* dslBinding, uint32_t index) {
@@ -878,6 +1201,11 @@
MVKSamplerDescriptorMixin::setLayout(dslBinding, index);
}
+uint32_t MVKCombinedImageSamplerDescriptor::getSamplerArgBufferIndexOffset(MVKDescriptorSetLayoutBinding* dslBinding) {
+ uint8_t planeCount = _mvkImageView ? _mvkImageView->getPlaneCount() : 1;
+ return dslBinding->getDescriptorCount() * planeCount;
+}
+
void MVKCombinedImageSamplerDescriptor::reset() {
MVKSamplerDescriptorMixin::reset();
MVKImageDescriptor::reset();
@@ -889,6 +1217,7 @@
// A null cmdEncoder can be passed to perform a validation pass
void MVKTexelBufferDescriptor::bind(MVKCommandEncoder* cmdEncoder,
+ MVKDescriptorSetLayoutBinding* mvkDSLBind,
uint32_t descriptorIndex,
bool stages[],
MVKShaderResourceBinding& mtlIndexes,
@@ -906,28 +1235,37 @@
bb.size = (uint32_t)(mtlTex.height * mtlTex.bufferBytesPerRow);
}
}
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
- if (stages[i]) {
- tb.index = mtlIndexes.stages[i].textureIndex + descriptorIndex;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindTexture(tb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindTexture(MVKShaderStage(i), tb); }
- }
- if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
- bb.index = mtlIndexes.stages[i].bufferIndex + descriptorIndex;
- if (i == kMVKShaderStageCompute) {
- if (cmdEncoder) { cmdEncoder->_computeResourcesState.bindBuffer(bb); }
- } else {
- if (cmdEncoder) { cmdEncoder->_graphicsResourcesState.bindBuffer(MVKShaderStage(i), bb); }
+ if (mvkDSLBind->isUsingMetalArgumentBuffer()) {
+ MVKMTLArgumentBufferResourceUsage abru;
+ abru.mtlResource = tb.mtlResource;
+ abru.mtlUsage = getMTLResourceUsage();
+ abru.mtlStages = mvkMTLRenderStagesFromMVKShaderStages(stages);
+ if (cmdEncoder) { cmdEncoder->useArgumentBufferResource(abru, stages[kMVKShaderStageCompute]); }
+
+// Needed for atomic operations
+// if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
+// abru.mtlResource = bb.mtlResource;
+// if (cmdEncoder) { cmdEncoder->useArgumentBufferResource(abru, stages[kMVKShaderStageCompute]); }
+// }
+ } else {
+ // If not using Metal argument buffer, bind discretely
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
+ if (stages[i]) {
+ tb.index = mtlIndexes.stages[i].textureIndex + descriptorIndex;
+ if (cmdEncoder) { cmdEncoder->bindTexture(tb, MVKShaderStage(i)); }
+ if (descType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
+ bb.index = mtlIndexes.stages[i].bufferIndex + descriptorIndex;
+ if (cmdEncoder) { cmdEncoder->bindBuffer(bb, MVKShaderStage(i)); }
}
}
}
}
}
-void MVKTexelBufferDescriptor::write(MVKDescriptorSet* mvkDescSet,
+void MVKTexelBufferDescriptor::write(MVKDescriptorSetLayoutBinding* mvkDSLBind,
+ MVKDescriptorSet* descSet,
uint32_t srcIndex,
+ uint32_t dstIndex,
size_t stride,
const void* pData) {
auto* oldBuffView = _mvkBufferView;
@@ -937,10 +1275,19 @@
if (_mvkBufferView) { _mvkBufferView->retain(); }
if (oldBuffView) { oldBuffView->release(); }
+
+ // Update the Metal argument buffer entry
+ id<MTLTexture> mtlTexture = _mvkBufferView ? _mvkBufferView->getMTLTexture() : nil;
+ mvkDSLBind->writeToMetalArgumentBuffer(mtlTexture, 1, 0, dstIndex);
+
+// Needed for atomic operations
+// if (getDescriptorType() == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
+// uint32_t buffArgIdx = mvkDSLBind->getDescriptorCount() + dstIndex;
+// mvkDSLBind->writeToMetalArgumentBuffer(mtlTexture.buffer, mtlTexture.bufferOffset, buffArgIdx);
+// }
}
-void MVKTexelBufferDescriptor::read(MVKDescriptorSet* mvkDescSet,
- uint32_t dstIndex,
+void MVKTexelBufferDescriptor::read(uint32_t dstIndex,
VkDescriptorImageInfo* pImageInfo,
VkDescriptorBufferInfo* pBufferInfo,
VkBufferView* pTexelBufferView,
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
index 6bbf686..e80fe32 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
@@ -20,9 +20,11 @@
#include "MVKDescriptor.h"
#include "MVKSmallVector.h"
+#include "MVKBitArray.h"
#include <unordered_set>
#include <unordered_map>
#include <vector>
+#include <mutex>
class MVKDescriptorPool;
class MVKPipelineLayout;
@@ -32,6 +34,13 @@
#pragma mark -
#pragma mark MVKDescriptorSetLayout
+/** Tracks a MTLArgumentEncoder and its offset into a Metal argument buffer. */
+typedef struct MVKMTLArgumentEncoder {
+ id<MTLArgumentEncoder> mtlArgumentEncoder = nil;
+ NSUInteger argumentBufferOffset = 0;
+ ~MVKMTLArgumentEncoder() { [mtlArgumentEncoder release]; }
+} MVKMTLArgumentEncoder;
+
/** Represents a Vulkan descriptor set layout. */
class MVKDescriptorSetLayout : public MVKVulkanAPIDeviceObject {
@@ -46,6 +55,7 @@
/** Encodes this descriptor set layout and the specified descriptor set on the specified command encoder. */
void bindDescriptorSet(MVKCommandEncoder* cmdEncoder,
MVKDescriptorSet* descSet,
+ uint32_t descSetLayoutIndex,
MVKShaderResourceBinding& dslMTLRezIdxOffsets,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex);
@@ -69,27 +79,34 @@
uint32_t dslIndex);
/** Returns true if this layout is for push descriptors only. */
- bool isPushDescriptorLayout() const { return _isPushDescriptorLayout; }
+ inline bool isPushDescriptorLayout() const { return _isPushDescriptorLayout; }
+
+ /** Returns whether this layout is using an argument buffer. */
+ inline bool isUsingMetalArgumentBuffer() const { return supportsMetalArgumentBuffers() && !isPushDescriptorLayout(); };
MVKDescriptorSetLayout(MVKDevice* device, const VkDescriptorSetLayoutCreateInfo* pCreateInfo);
protected:
-
- friend class MVKDescriptorSetLayoutBinding;
friend class MVKPipelineLayout;
- friend class MVKDescriptorSet;
friend class MVKDescriptorPool;
+ friend class MVKDescriptorSetLayoutBinding;
+ friend class MVKDescriptorSet;
void propagateDebugName() override {}
inline uint32_t getDescriptorCount() { return _descriptorCount; }
- inline uint32_t getDescriptorIndex(uint32_t binding, uint32_t elementIndex = 0) { return _bindingToDescriptorIndex[binding] + elementIndex; }
inline MVKDescriptorSetLayoutBinding* getBinding(uint32_t binding) { return &_bindings[_bindingToIndex[binding]]; }
+ inline uint32_t getDescriptorIndex(uint32_t binding, uint32_t elementIndex = 0) { return getBinding(binding)->getDescriptorIndex(elementIndex); }
+ inline NSUInteger getArgumentBufferSize() { return _argumentBufferSize; }
const VkDescriptorBindingFlags* getBindingFlags(const VkDescriptorSetLayoutCreateInfo* pCreateInfo);
+ void bindMetalArgumentBuffer(MVKDescriptorSet* descSet);
+ void initMTLArgumentEncoders();
MVKSmallVector<MVKDescriptorSetLayoutBinding> _bindings;
std::unordered_map<uint32_t, uint32_t> _bindingToIndex;
- std::unordered_map<uint32_t, uint32_t> _bindingToDescriptorIndex;
MVKShaderResourceBinding _mtlResourceCounts;
+ MVKMTLArgumentEncoder _argumentEncoder[kMVKShaderStageCount];
+ NSUInteger _argumentBufferSize;
+ std::mutex _argEncodingLock;
uint32_t _descriptorCount;
bool _isPushDescriptorLayout;
};
@@ -126,22 +143,29 @@
VkBufferView* pTexelBufferView,
VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock);
- MVKDescriptorSet(MVKDescriptorSetLayout* layout,
- uint32_t variableDescriptorCount,
- MVKDescriptorPool* pool);
+ /** Returns an MTLBuffer region allocation. */
+ const MVKMTLBufferAllocation* acquireMTLBufferRegion(NSUInteger length);
- ~MVKDescriptorSet() override;
+ MVKDescriptorSet(MVKDescriptorPool* pool);
protected:
- friend class MVKDescriptorSetLayoutBinding;
friend class MVKDescriptorPool;
+ friend class MVKDescriptorSetLayout;
+ friend class MVKDescriptorSetLayoutBinding;
void propagateDebugName() override {}
MVKDescriptor* getDescriptor(uint32_t binding, uint32_t elementIndex = 0);
+ VkResult allocate(MVKDescriptorSetLayout* layout,
+ uint32_t variableDescriptorCount,
+ NSUInteger mtlArgumentBufferOffset);
+ void free(bool isPoolReset);
+ id<MTLBuffer> getMetalArgumentBuffer();
+ inline NSUInteger getMetalArgumentBufferOffset() { return _mtlArgumentBufferOffset; }
+ MVKSmallVector<MVKDescriptor*> _descriptors;
MVKDescriptorSetLayout* _layout;
MVKDescriptorPool* _pool;
- MVKSmallVector<MVKDescriptor*> _descriptors;
+ NSUInteger _mtlArgumentBufferOffset;
uint32_t _variableDescriptorCount;
};
@@ -162,7 +186,7 @@
VkDescriptorType descriptorType);
protected:
- friend class MVKPreallocatedDescriptors;
+ friend class MVKDescriptorPool;
VkResult allocateDescriptor(MVKDescriptor** pMVKDesc);
bool findDescriptor(uint32_t endIndex, MVKDescriptor** pMVKDesc);
@@ -177,41 +201,6 @@
#pragma mark -
-#pragma mark MVKPreallocatedDescriptors
-
-/** Support class for MVKDescriptorPool that holds preallocated instances of all concrete descriptor classes. */
-class MVKPreallocatedDescriptors : public MVKBaseObject {
-
-public:
-
- /** Returns the Vulkan API opaque object controlling this object. */
- MVKVulkanAPIObject* getVulkanAPIObject() override { return nullptr; };
-
- MVKPreallocatedDescriptors(const VkDescriptorPoolCreateInfo* pCreateInfo);
-
-protected:
- friend class MVKDescriptorPool;
-
- VkResult allocateDescriptor(VkDescriptorType descriptorType, MVKDescriptor** pMVKDesc);
- void freeDescriptor(MVKDescriptor* mvkDesc);
- void reset();
-
- MVKDescriptorTypePreallocation<MVKUniformBufferDescriptor> _uniformBufferDescriptors;
- MVKDescriptorTypePreallocation<MVKStorageBufferDescriptor> _storageBufferDescriptors;
- MVKDescriptorTypePreallocation<MVKUniformBufferDynamicDescriptor> _uniformBufferDynamicDescriptors;
- MVKDescriptorTypePreallocation<MVKStorageBufferDynamicDescriptor> _storageBufferDynamicDescriptors;
- MVKDescriptorTypePreallocation<MVKInlineUniformBlockDescriptor> _inlineUniformBlockDescriptors;
- MVKDescriptorTypePreallocation<MVKSampledImageDescriptor> _sampledImageDescriptors;
- MVKDescriptorTypePreallocation<MVKStorageImageDescriptor> _storageImageDescriptors;
- MVKDescriptorTypePreallocation<MVKInputAttachmentDescriptor> _inputAttachmentDescriptors;
- MVKDescriptorTypePreallocation<MVKSamplerDescriptor> _samplerDescriptors;
- MVKDescriptorTypePreallocation<MVKCombinedImageSamplerDescriptor> _combinedImageSamplerDescriptors;
- MVKDescriptorTypePreallocation<MVKUniformTexelBufferDescriptor> _uniformTexelBufferDescriptors;
- MVKDescriptorTypePreallocation<MVKStorageTexelBufferDescriptor> _storageTexelBufferDescriptors;
-};
-
-
-#pragma mark -
#pragma mark MVKDescriptorPool
/** Represents a Vulkan descriptor pool. */
@@ -245,13 +234,30 @@
void propagateDebugName() override {}
VkResult allocateDescriptorSet(MVKDescriptorSetLayout* mvkDSL, uint32_t variableDescriptorCount, VkDescriptorSet* pVKDS);
const uint32_t* getVariableDecriptorCounts(const VkDescriptorSetAllocateInfo* pAllocateInfo);
- void freeDescriptorSet(MVKDescriptorSet* mvkDS);
+ void freeDescriptorSet(MVKDescriptorSet* mvkDS, bool isPoolReset);
VkResult allocateDescriptor(VkDescriptorType descriptorType, MVKDescriptor** pMVKDesc);
void freeDescriptor(MVKDescriptor* mvkDesc);
+ static NSUInteger getDescriptorByteCountForMetalArgumentBuffer(VkDescriptorType descriptorType);
+ static NSUInteger getMaxInlineBlockSize(MVKDevice* device, const VkDescriptorPoolCreateInfo* pCreateInfo);
- uint32_t _maxSets;
- std::unordered_set<MVKDescriptorSet*> _allocatedSets;
- MVKPreallocatedDescriptors* _preallocatedDescriptors;
+ MVKSmallVector<MVKDescriptorSet> _descriptorSets;
+ MVKBitArray _descriptorSetAvailablility;
+ id<MTLBuffer> _mtlArgumentBuffer;
+ NSUInteger _nextMTLArgumentBufferOffset;
+ MVKMTLBufferAllocator _inlineBlockMTLBufferAllocator;
+
+ MVKDescriptorTypePreallocation<MVKUniformBufferDescriptor> _uniformBufferDescriptors;
+ MVKDescriptorTypePreallocation<MVKStorageBufferDescriptor> _storageBufferDescriptors;
+ MVKDescriptorTypePreallocation<MVKUniformBufferDynamicDescriptor> _uniformBufferDynamicDescriptors;
+ MVKDescriptorTypePreallocation<MVKStorageBufferDynamicDescriptor> _storageBufferDynamicDescriptors;
+ MVKDescriptorTypePreallocation<MVKInlineUniformBlockDescriptor> _inlineUniformBlockDescriptors;
+ MVKDescriptorTypePreallocation<MVKSampledImageDescriptor> _sampledImageDescriptors;
+ MVKDescriptorTypePreallocation<MVKStorageImageDescriptor> _storageImageDescriptors;
+ MVKDescriptorTypePreallocation<MVKInputAttachmentDescriptor> _inputAttachmentDescriptors;
+ MVKDescriptorTypePreallocation<MVKSamplerDescriptor> _samplerDescriptors;
+ MVKDescriptorTypePreallocation<MVKCombinedImageSamplerDescriptor> _combinedImageSamplerDescriptors;
+ MVKDescriptorTypePreallocation<MVKUniformTexelBufferDescriptor> _uniformTexelBufferDescriptors;
+ MVKDescriptorTypePreallocation<MVKStorageTexelBufferDescriptor> _storageTexelBufferDescriptors;
};
@@ -304,15 +310,3 @@
void mvkUpdateDescriptorSetWithTemplate(VkDescriptorSet descriptorSet,
VkDescriptorUpdateTemplateKHR updateTemplate,
const void* pData);
-
-/**
- * If the shader stage binding has a binding defined for the specified stage, populates
- * the context at the descriptor set binding from the shader stage resource binding.
- */
-void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
- MVKShaderStageResourceBinding& ssRB,
- spv::ExecutionModel stage,
- uint32_t descriptorSetIndex,
- uint32_t bindingIndex,
- uint32_t count,
- MVKSampler* immutableSampler);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
index ed9d5e2..a81c71d 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
@@ -17,9 +17,12 @@
*/
#include "MVKDescriptorSet.h"
+#include "MVKCommandBuffer.h"
#include "MVKInstance.h"
#include "MVKOSExtensions.h"
+using namespace std;
+
#pragma mark -
#pragma mark MVKDescriptorSetLayout
@@ -27,17 +30,51 @@
// A null cmdEncoder can be passed to perform a validation pass
void MVKDescriptorSetLayout::bindDescriptorSet(MVKCommandEncoder* cmdEncoder,
MVKDescriptorSet* descSet,
+ uint32_t descSetLayoutIndex,
MVKShaderResourceBinding& dslMTLRezIdxOffsets,
MVKArrayRef<uint32_t> dynamicOffsets,
uint32_t& dynamicOffsetIndex) {
if (!cmdEncoder) { clearConfigurationResult(); }
- if ( !_isPushDescriptorLayout ) {
- for (auto& dslBind : _bindings) {
- dslBind.bind(cmdEncoder, descSet, dslMTLRezIdxOffsets, dynamicOffsets, dynamicOffsetIndex);
+ if (_isPushDescriptorLayout ) { return; }
+
+ lock_guard<mutex> lock(_argEncodingLock);
+ bindMetalArgumentBuffer(descSet);
+
+ for (auto& dslBind : _bindings) {
+ dslBind.bind(cmdEncoder, descSet, dslMTLRezIdxOffsets, dynamicOffsets, dynamicOffsetIndex);
+ }
+
+ bindMetalArgumentBuffer(nullptr);
+
+ // If we're using Metal argument buffer, bind it to the command encoder in each stage that will use it.
+ if (cmdEncoder) {
+ id<MTLBuffer> mtlArgBuff = descSet->getMetalArgumentBuffer();
+ NSUInteger descSetOffset = descSet->getMetalArgumentBufferOffset();
+ if (mtlArgBuff) {
+ MVKMTLBufferBinding bb;
+ bb.mtlBuffer = mtlArgBuff;
+ bb.index = descSetLayoutIndex;
+ for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+ auto& argEnc = _argumentEncoder[stage];
+ if (argEnc.mtlArgumentEncoder) {
+ bb.offset = descSetOffset + argEnc.argumentBufferOffset;
+ cmdEncoder->bindBuffer(bb, MVKShaderStage(stage));
+ }
+ }
}
}
}
+void MVKDescriptorSetLayout::bindMetalArgumentBuffer(MVKDescriptorSet* descSet) {
+ id<MTLBuffer> mtlArgBuff = descSet ? descSet->getMetalArgumentBuffer() : nil;
+ NSUInteger descSetOffset = descSet ? descSet->getMetalArgumentBufferOffset() : 0;
+ for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+ auto& argEnc = _argumentEncoder[stage];
+ [argEnc.mtlArgumentEncoder setArgumentBuffer: mtlArgBuff
+ offset: (descSetOffset + argEnc.argumentBufferOffset)];
+ }
+}
+
static const void* getWriteParameters(VkDescriptorType type, const VkDescriptorImageInfo* pImageInfo,
const VkDescriptorBufferInfo* pBufferInfo, const VkBufferView* pTexelBufferView,
const VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock,
@@ -166,6 +203,11 @@
for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
_bindings[bindIdx].populateShaderConverterContext(context, dslMTLRezIdxOffsets, dslIndex);
}
+
+ // Mark if Metal argument buffers are in use, but this descriptor set layout is not using them.
+ if (supportsMetalArgumentBuffers() && !isUsingMetalArgumentBuffer()) {
+ context.discreteDescriptorSets.push_back(dslIndex);
+ }
}
MVKDescriptorSetLayout::MVKDescriptorSetLayout(MVKDevice* device,
@@ -186,20 +228,21 @@
for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
sortedBindings.push_back( { &pCreateInfo->pBindings[bindIdx], pBindingFlags ? pBindingFlags[bindIdx] : 0 } );
}
- std::sort(sortedBindings.begin(), sortedBindings.end(), [](BindInfo bindInfo1, BindInfo bindInfo2) {
+ sort(sortedBindings.begin(), sortedBindings.end(), [](BindInfo bindInfo1, BindInfo bindInfo2) {
return bindInfo1.pBinding->binding < bindInfo2.pBinding->binding;
});
- _isPushDescriptorLayout = (pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) != 0;
+ _isPushDescriptorLayout = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
_descriptorCount = 0;
_bindings.reserve(bindCnt);
for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) {
BindInfo& bindInfo = sortedBindings[bindIdx];
- _bindings.emplace_back(_device, this, bindInfo.pBinding, bindInfo.bindingFlags);
+ _bindings.emplace_back(_device, this, bindInfo.pBinding, bindInfo.bindingFlags, _descriptorCount);
_bindingToIndex[bindInfo.pBinding->binding] = bindIdx;
- _bindingToDescriptorIndex[bindInfo.pBinding->binding] = _descriptorCount;
- _descriptorCount += _bindings.back().getDescriptorCount(nullptr);
+ _descriptorCount += _bindings.back().getDescriptorCount();
}
+
+ initMTLArgumentEncoders();
}
// Find and return an array of binding flags from the pNext chain of pCreateInfo,
@@ -218,6 +261,32 @@
return nullptr;
}
+void MVKDescriptorSetLayout::initMTLArgumentEncoders() {
+ _argumentBufferSize = 0;
+
+ if ( !isUsingMetalArgumentBuffer() ) { return; }
+
+ auto* mvkDvc = getDevice();
+ @autoreleasepool {
+ id<MTLDevice> mtlDvc = mvkDvc->getMTLDevice();
+ NSMutableArray<MTLArgumentDescriptor*>* args = [NSMutableArray arrayWithCapacity: _bindings.size()];
+ for (uint32_t stage = kMVKShaderStageVertex; stage < kMVKShaderStageCount; stage++) {
+ [args removeAllObjects];
+ uint32_t argIdx = 0;
+ for (auto& dslBind : _bindings) {
+ dslBind.addMTLArgumentDescriptors(stage, args, argIdx);
+ }
+ if (args.count) {
+ auto& argEnc = _argumentEncoder[stage];
+ argEnc.mtlArgumentEncoder = [mtlDvc newArgumentEncoderWithArguments: args]; // retained
+ argEnc.argumentBufferOffset = _argumentBufferSize;
+ _argumentBufferSize += mvkAlignByteCount(argEnc.mtlArgumentEncoder.encodedLength,
+ mvkDvc->_pMetalFeatures->mtlBufferAlignment);
+ }
+ }
+ }
+}
+
#pragma mark -
#pragma mark MVKDescriptorSet
@@ -230,28 +299,43 @@
return _descriptors[_layout->getDescriptorIndex(binding, elementIndex)];
}
+id<MTLBuffer> MVKDescriptorSet::getMetalArgumentBuffer() { return _pool->_mtlArgumentBuffer; }
+
template<typename DescriptorAction>
void MVKDescriptorSet::write(const DescriptorAction* pDescriptorAction,
size_t stride,
const void* pData) {
- VkDescriptorType descType = getDescriptorType(pDescriptorAction->dstBinding);
+ lock_guard<mutex> lock(_layout->_argEncodingLock);
+ _layout->bindMetalArgumentBuffer(this);
+
+ MVKDescriptorSetLayoutBinding* mvkDSLBind = _layout->getBinding(pDescriptorAction->dstBinding);
+ VkDescriptorType descType = mvkDSLBind->getDescriptorType();
if (descType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
// For inline buffers dstArrayElement is a byte offset
MVKDescriptor* mvkDesc = getDescriptor(pDescriptorAction->dstBinding);
if (mvkDesc->getDescriptorType() == descType) {
- mvkDesc->write(this, pDescriptorAction->dstArrayElement, stride, pData);
+ mvkDesc->write(mvkDSLBind, this, pDescriptorAction->dstArrayElement, 0, stride, pData);
}
} else {
- uint32_t dstStartIdx = _layout->getDescriptorIndex(pDescriptorAction->dstBinding, pDescriptorAction->dstArrayElement);
+ uint32_t descStartIdx = _layout->getDescriptorIndex(pDescriptorAction->dstBinding, pDescriptorAction->dstArrayElement);
uint32_t descCnt = pDescriptorAction->descriptorCount;
- for (uint32_t descIdx = 0; descIdx < descCnt; descIdx++) {
- MVKDescriptor* mvkDesc = _descriptors[dstStartIdx + descIdx];
+ for (uint32_t srcIdx = 0; srcIdx < descCnt; srcIdx++) {
+ MVKDescriptor* mvkDesc = _descriptors[descStartIdx + srcIdx];
if (mvkDesc->getDescriptorType() == descType) {
- mvkDesc->write(this, descIdx, stride, pData);
+ uint32_t dstIdx = pDescriptorAction->dstArrayElement + srcIdx;
+ mvkDesc->write(mvkDSLBind, this, srcIdx, dstIdx, stride, pData);
}
- }
- }
+ }
+ }
+
+ // For some unexpected reason, GPU capture on Xcode 12 doesn't always correctly expose
+ // the contents of Metal argument buffers. Triggering an extraction of the arg buffer
+ // contents here, after filling it, seems to correct that.
+ // Sigh. A bug report has been filed with Apple.
+ if (getInstance()->isCurrentlyAutoGPUCapturing()) { [_pool->_mtlArgumentBuffer contents]; }
+
+ _layout->bindMetalArgumentBuffer(nullptr);
}
// Create concrete implementations of the three variations of the write() function.
@@ -274,26 +358,31 @@
// For inline buffers srcArrayElement is a byte offset
MVKDescriptor* mvkDesc = getDescriptor(pDescriptorCopy->srcBinding);
if (mvkDesc->getDescriptorType() == descType) {
- mvkDesc->read(this, pDescriptorCopy->srcArrayElement, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
+ mvkDesc->read(pDescriptorCopy->srcArrayElement, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
}
} else {
uint32_t srcStartIdx = _layout->getDescriptorIndex(pDescriptorCopy->srcBinding, pDescriptorCopy->srcArrayElement);
for (uint32_t descIdx = 0; descIdx < descCnt; descIdx++) {
MVKDescriptor* mvkDesc = _descriptors[srcStartIdx + descIdx];
if (mvkDesc->getDescriptorType() == descType) {
- mvkDesc->read(this, descIdx, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
+ mvkDesc->read(descIdx, pImageInfo, pBufferInfo, pTexelBufferView, pInlineUniformBlock);
}
}
}
}
-MVKDescriptorSet::MVKDescriptorSet(MVKDescriptorSetLayout* layout,
- uint32_t variableDescriptorCount,
- MVKDescriptorPool* pool) :
- MVKVulkanAPIDeviceObject(pool->_device),
- _layout(layout),
- _variableDescriptorCount(variableDescriptorCount),
- _pool(pool) {
+const MVKMTLBufferAllocation* MVKDescriptorSet::acquireMTLBufferRegion(NSUInteger length) {
+ return _pool->_inlineBlockMTLBufferAllocator.acquireMTLBufferRegion(length);
+}
+
+VkResult MVKDescriptorSet::allocate(MVKDescriptorSetLayout* layout,
+ uint32_t variableDescriptorCount,
+ NSUInteger mtlArgumentBufferOffset) {
+ _layout = layout;
+ _variableDescriptorCount = variableDescriptorCount;
+
+ // If the Metal argument buffer offset has not been set yet, set it now.
+ if ( !_mtlArgumentBufferOffset ) { _mtlArgumentBufferOffset = mtlArgumentBufferOffset; }
_descriptors.reserve(layout->getDescriptorCount());
uint32_t bindCnt = (uint32_t)layout->_bindings.size();
@@ -310,10 +399,24 @@
}
if ( !wasConfigurationSuccessful() ) { break; }
}
+ return getConfigurationResult();
}
-MVKDescriptorSet::~MVKDescriptorSet() {
+void MVKDescriptorSet::free(bool isPoolReset) {
+ _layout = nullptr;
+ _variableDescriptorCount = 0;
+
+ // Only reset the Metal arg buffer offset if the entire pool is being reset
+ if (isPoolReset) { _mtlArgumentBufferOffset = 0; }
+
for (auto mvkDesc : _descriptors) { _pool->freeDescriptor(mvkDesc); }
+ _descriptors.clear();
+
+ clearConfigurationResult();
+}
+
+MVKDescriptorSet::MVKDescriptorSet(MVKDescriptorPool* pool) : MVKVulkanAPIDeviceObject(pool->_device), _pool(pool) {
+ free(true);
}
@@ -321,26 +424,31 @@
#pragma mark MVKDescriptorTypePreallocation
#ifndef MVK_CONFIG_PREALLOCATE_DESCRIPTORS
-# define MVK_CONFIG_PREALLOCATE_DESCRIPTORS 0
+# define MVK_CONFIG_PREALLOCATE_DESCRIPTORS 1
#endif
-static bool _mvkPreallocateDescriptors = MVK_CONFIG_PREALLOCATE_DESCRIPTORS;
-static bool _mvkPreallocateDescriptorsInitialized = false;
-
// Returns whether descriptors should be preallocated in the descriptor pools
// We do this once lazily instead of in a library constructor function to
// ensure the NSProcessInfo environment is available when called upon.
-static inline bool getMVKPreallocateDescriptors() {
- if ( !_mvkPreallocateDescriptorsInitialized ) {
- _mvkPreallocateDescriptorsInitialized = true;
- MVK_SET_FROM_ENV_OR_BUILD_BOOL(_mvkPreallocateDescriptors, MVK_CONFIG_PREALLOCATE_DESCRIPTORS);
+static inline bool mvkShouldPreallocateDescriptors() {
+ static bool _mvkShouldPreallocateDescriptors = MVK_CONFIG_PREALLOCATE_DESCRIPTORS;
+ static bool _mvkShouldPreallocateDescriptorsInitialized = false;
+ if ( !_mvkShouldPreallocateDescriptorsInitialized ) {
+ _mvkShouldPreallocateDescriptorsInitialized = true;
+ MVK_SET_FROM_ENV_OR_BUILD_BOOL(_mvkShouldPreallocateDescriptors, MVK_CONFIG_PREALLOCATE_DESCRIPTORS);
}
- return _mvkPreallocateDescriptors;
+ return _mvkShouldPreallocateDescriptors;
}
template<class DescriptorClass>
VkResult MVKDescriptorTypePreallocation<DescriptorClass>::allocateDescriptor(MVKDescriptor** pMVKDesc) {
+ // If we don't preallocate, create and return an instance on the fly.
+ if ( !mvkShouldPreallocateDescriptors() ) {
+ *pMVKDesc = new DescriptorClass();
+ return VK_SUCCESS;
+ }
+
uint32_t descCnt = (uint32_t)_descriptors.size();
// Preallocated descriptors that CANNOT be freed.
@@ -389,6 +497,12 @@
template<typename DescriptorClass>
void MVKDescriptorTypePreallocation<DescriptorClass>::freeDescriptor(MVKDescriptor* mvkDesc) {
+ // If we don't preallocate, create and return an instance on the fly.
+ if ( !mvkShouldPreallocateDescriptors() ) {
+ mvkDesc->destroy();
+ return;
+ }
+
mvkDesc->reset();
if (_supportAvailability) {
@@ -411,29 +525,155 @@
template<typename DescriptorClass>
MVKDescriptorTypePreallocation<DescriptorClass>::MVKDescriptorTypePreallocation(const VkDescriptorPoolCreateInfo* pCreateInfo,
VkDescriptorType descriptorType) {
- // There may be more than one poolSizeCount instance for the desired VkDescriptorType.
- // Accumulate the descriptor count for the desired VkDescriptorType, and size the collections accordingly.
- uint32_t descriptorCount = 0;
- uint32_t poolCnt = pCreateInfo->poolSizeCount;
- for (uint32_t poolIdx = 0; poolIdx < poolCnt; poolIdx++) {
- auto& poolSize = pCreateInfo->pPoolSizes[poolIdx];
- if (poolSize.type == descriptorType) { descriptorCount += poolSize.descriptorCount; }
- }
- _descriptors.resize(descriptorCount);
-
// Determine whether we need to track the availability of previously freed descriptors.
_supportAvailability = mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT);
- if (_supportAvailability) { _availability.resize(descriptorCount, true); }
_nextAvailableIndex = 0;
+
+ if (mvkShouldPreallocateDescriptors()) {
+ // There may be more than one poolSizeCount instance for the desired VkDescriptorType.
+ // Accumulate the descriptor count for the desired VkDescriptorType, and size the collections accordingly.
+ uint32_t descriptorCount = 0;
+ uint32_t poolCnt = pCreateInfo->poolSizeCount;
+ for (uint32_t poolIdx = 0; poolIdx < poolCnt; poolIdx++) {
+ auto& poolSize = pCreateInfo->pPoolSizes[poolIdx];
+ if (poolSize.type == descriptorType) { descriptorCount += poolSize.descriptorCount; }
+ }
+
+ _descriptors.resize(descriptorCount);
+ if (_supportAvailability) { _availability.resize(descriptorCount, true); }
+ }
}
#pragma mark -
-#pragma mark MVKPreallocatedDescriptors
+#pragma mark MVKDescriptorPool
+
+VkResult MVKDescriptorPool::allocateDescriptorSets(const VkDescriptorSetAllocateInfo* pAllocateInfo,
+ VkDescriptorSet* pDescriptorSets) {
+ VkResult rslt = VK_SUCCESS;
+ const auto* pVarDescCounts = getVariableDecriptorCounts(pAllocateInfo);
+ for (uint32_t dsIdx = 0; dsIdx < pAllocateInfo->descriptorSetCount; dsIdx++) {
+ MVKDescriptorSetLayout* mvkDSL = (MVKDescriptorSetLayout*)pAllocateInfo->pSetLayouts[dsIdx];
+ if ( !mvkDSL->isPushDescriptorLayout() ) {
+ rslt = allocateDescriptorSet(mvkDSL, (pVarDescCounts ? pVarDescCounts[dsIdx] : 0), &pDescriptorSets[dsIdx]);
+ if (rslt) { break; }
+ }
+ }
+ return rslt;
+}
+
+// Find and return an array of variable descriptor counts from the pNext chain of pCreateInfo,
+// or return nullptr if the chain does not include variable descriptor counts.
+const uint32_t* MVKDescriptorPool::getVariableDecriptorCounts(const VkDescriptorSetAllocateInfo* pAllocateInfo) {
+ for (const auto* next = (VkBaseInStructure*)pAllocateInfo->pNext; next; next = next->pNext) {
+ switch (next->sType) {
+ case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT: {
+ auto* pVarDescSetVarCounts = (VkDescriptorSetVariableDescriptorCountAllocateInfoEXT*)next;
+ return pVarDescSetVarCounts->descriptorSetCount ? pVarDescSetVarCounts->pDescriptorCounts : nullptr;
+ }
+ default:
+ break;
+ }
+ }
+ return nullptr;
+}
+
+// Ensure descriptor set was actually allocated, then return to pool
+VkResult MVKDescriptorPool::freeDescriptorSets(uint32_t count, const VkDescriptorSet* pDescriptorSets) {
+ for (uint32_t dsIdx = 0; dsIdx < count; dsIdx++) {
+ freeDescriptorSet((MVKDescriptorSet*)pDescriptorSets[dsIdx], false);
+ }
+ return VK_SUCCESS;
+}
+
+// Free all descriptor sets.
+VkResult MVKDescriptorPool::reset(VkDescriptorPoolResetFlags flags) {
+ for (auto& mvkDS : _descriptorSets) { freeDescriptorSet(&mvkDS, true); }
+
+ _uniformBufferDescriptors.reset();
+ _storageBufferDescriptors.reset();
+ _uniformBufferDynamicDescriptors.reset();
+ _storageBufferDynamicDescriptors.reset();
+ _inlineUniformBlockDescriptors.reset();
+ _sampledImageDescriptors.reset();
+ _storageImageDescriptors.reset();
+ _inputAttachmentDescriptors.reset();
+ _samplerDescriptors.reset();
+ _combinedImageSamplerDescriptors.reset();
+ _uniformTexelBufferDescriptors.reset();
+ _storageTexelBufferDescriptors.reset();
+
+ _nextMTLArgumentBufferOffset = 0;
+
+ return VK_SUCCESS;
+}
+
+// Retieves the first available descriptor set, and configures it.
+// If none are available, returns an error.
+VkResult MVKDescriptorPool::allocateDescriptorSet(MVKDescriptorSetLayout* mvkDSL,
+ uint32_t variableDescriptorCount,
+ VkDescriptorSet* pVKDS) {
+ NSUInteger mtlArgBuffAllocSize = mvkDSL->getArgumentBufferSize();
+ size_t dsCnt = _descriptorSets.size();
+ size_t dsIdx = 0;
+ while (true) {
+ dsIdx = _descriptorSetAvailablility.getIndexOfFirstSetBit(dsIdx, true);
+ if (dsIdx >= dsCnt) { return VK_ERROR_OUT_OF_POOL_MEMORY; }
+
+ bool isSpaceAvail = true; // If not using Metal arg buffers, space will always be available.
+ MVKDescriptorSet* mvkDS = &_descriptorSets[dsIdx];
+ NSUInteger mtlArgBuffOffset = mvkDS->getMetalArgumentBufferOffset();
+
+ // If the desc set is using a Metal argument buffer, we also need to see if the desc set
+ // will fit in the slot that might already have been allocated for it in the Metal argument
+ // buffer from a previous allocation that was returned. If this pool has been reset recently,
+ // then the desc sets will not have had a Metal argument buffer allocation assigned yet.
+ if (mvkDSL->isUsingMetalArgumentBuffer()) {
+
+ // If the offset has not been set (and it's not the first desc set except
+ // on a reset pool), set the offset and update the next available offset value.
+ if ( !mtlArgBuffOffset && (dsIdx || !_nextMTLArgumentBufferOffset)) {
+ mtlArgBuffOffset = _nextMTLArgumentBufferOffset;
+ _nextMTLArgumentBufferOffset += mtlArgBuffAllocSize;
+ }
+
+ // Get the offset of the next desc set, if one exists and
+ // its offset has been set, or the end of the arg buffer.
+ size_t nextDSIdx = dsIdx + 1;
+ NSUInteger nextOffset = (nextDSIdx < dsCnt ? _descriptorSets[nextDSIdx].getMetalArgumentBufferOffset() : 0);
+ if ( !nextOffset ) { nextOffset = _mtlArgumentBuffer.length; }
+
+ isSpaceAvail = (mtlArgBuffOffset + mtlArgBuffAllocSize) <= nextOffset;
+ }
+
+ if (isSpaceAvail) {
+ mvkDS->allocate(mvkDSL, variableDescriptorCount, mtlArgBuffOffset);
+ if (mvkDS->wasConfigurationSuccessful()) {
+ *pVKDS = (VkDescriptorSet)mvkDS;
+ } else {
+ freeDescriptorSet(mvkDS, false);
+ }
+ return mvkDS->getConfigurationResult();
+ }
+ dsIdx++; // Skip to next desc set and resume looking
+ }
+}
+
+// Descriptor sets are held in contiguous memory, so the index of the returning descriptor
+// set can be calculated by pointer differences, and it can be marked as available.
+void MVKDescriptorPool::freeDescriptorSet(MVKDescriptorSet* mvkDS, bool isPoolReset) {
+ if ( !mvkDS ) { return; } // Vulkan allows NULL refs.
+
+ if (mvkDS->_pool != this) { reportError(VK_ERROR_INITIALIZATION_FAILED, "A descriptor set is being returned to a descriptor pool that did not allocate it."); }
+
+ mvkDS->free(isPoolReset);
+ size_t dsIdx = mvkDS - _descriptorSets.data();
+ _descriptorSetAvailablility.setBit(dsIdx);
+}
// Allocate a descriptor of the specified type
-VkResult MVKPreallocatedDescriptors::allocateDescriptor(VkDescriptorType descriptorType,
- MVKDescriptor** pMVKDesc) {
+VkResult MVKDescriptorPool::allocateDescriptor(VkDescriptorType descriptorType,
+ MVKDescriptor** pMVKDesc) {
switch (descriptorType) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
return _uniformBufferDescriptors.allocateDescriptor(pMVKDesc);
@@ -476,7 +716,7 @@
}
}
-void MVKPreallocatedDescriptors::freeDescriptor(MVKDescriptor* mvkDesc) {
+void MVKDescriptorPool::freeDescriptor(MVKDescriptor* mvkDesc) {
VkDescriptorType descriptorType = mvkDesc->getDescriptorType();
switch (descriptorType) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
@@ -520,22 +760,10 @@
}
}
-void MVKPreallocatedDescriptors::reset() {
- _uniformBufferDescriptors.reset();
- _storageBufferDescriptors.reset();
- _uniformBufferDynamicDescriptors.reset();
- _storageBufferDynamicDescriptors.reset();
- _inlineUniformBlockDescriptors.reset();
- _sampledImageDescriptors.reset();
- _storageImageDescriptors.reset();
- _inputAttachmentDescriptors.reset();
- _samplerDescriptors.reset();
- _combinedImageSamplerDescriptors.reset();
- _uniformTexelBufferDescriptors.reset();
- _storageTexelBufferDescriptors.reset();
-}
-
-MVKPreallocatedDescriptors::MVKPreallocatedDescriptors(const VkDescriptorPoolCreateInfo* pCreateInfo) :
+MVKDescriptorPool::MVKDescriptorPool(MVKDevice* device, const VkDescriptorPoolCreateInfo* pCreateInfo) :
+ MVKVulkanAPIDeviceObject(device),
+ _descriptorSets(pCreateInfo->maxSets, MVKDescriptorSet(this)),
+ _descriptorSetAvailablility(pCreateInfo->maxSets, true),
_uniformBufferDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER),
_storageBufferDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER),
_uniformBufferDynamicDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC),
@@ -547,172 +775,84 @@
_samplerDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_SAMPLER),
_combinedImageSamplerDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER),
_uniformTexelBufferDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER),
- _storageTexelBufferDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) {
-}
+ _storageTexelBufferDescriptors(pCreateInfo, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER),
+ _inlineBlockMTLBufferAllocator(device, getMaxInlineBlockSize(device, pCreateInfo), true) {
+ _mtlArgumentBuffer = nil;
+ _nextMTLArgumentBufferOffset = 0;
+ if (supportsMetalArgumentBuffers()) {
+ NSUInteger mtlArgBuffSize = 0;
+ uint32_t poolCnt = pCreateInfo->poolSizeCount;
+ for (uint32_t poolIdx = 0; poolIdx < poolCnt; poolIdx++) {
+ auto& poolSize = pCreateInfo->pPoolSizes[poolIdx];
+ mtlArgBuffSize += getDescriptorByteCountForMetalArgumentBuffer(poolSize.type) * poolSize.descriptorCount;
+ }
+ mtlArgBuffSize += pCreateInfo->maxSets * _device->_pMetalFeatures->mtlBufferAlignment; // Leave room for each desc set to be aligned
-#pragma mark -
-#pragma mark MVKDescriptorPool
+ // Each shader stage uses it's own arg buffer layout. As a result, we need to significantly
+ // overallocate space here, since we don't yet know how the descriptor set layouts will make
+ // use of the descriptors across each pipeline stage. Ideally, the same MVKMTLArgumentEncoder
+ // should be used across all pipeline stages, but that doesn't seem to be possible with present
+ // combination of SPIRV-Cross and Metal behaviour.
+ mtlArgBuffSize *= kMVKShaderStageCount;
-VkResult MVKDescriptorPool::allocateDescriptorSets(const VkDescriptorSetAllocateInfo* pAllocateInfo,
- VkDescriptorSet* pDescriptorSets) {
- if (_allocatedSets.size() + pAllocateInfo->descriptorSetCount > _maxSets) {
- if (_device->_enabledExtensions.vk_KHR_maintenance1.enabled ||
- _device->getInstance()->getAPIVersion() >= VK_API_VERSION_1_1) {
- return VK_ERROR_OUT_OF_POOL_MEMORY; // Failure is an acceptable test...don't log as error.
- } else {
- return reportError(VK_ERROR_INITIALIZATION_FAILED, "The maximum number of descriptor sets that can be allocated by this descriptor pool is %d.", _maxSets);
+ if (mtlArgBuffSize) {
+ _mtlArgumentBuffer = [getMTLDevice() newBufferWithLength: mtlArgBuffSize options: MTLResourceStorageModeShared]; // retained
+ _mtlArgumentBuffer.label = @"Argument buffer";
}
}
-
- VkResult rslt = VK_SUCCESS;
- const auto* pVarDescCounts = getVariableDecriptorCounts(pAllocateInfo);
- for (uint32_t dsIdx = 0; dsIdx < pAllocateInfo->descriptorSetCount; dsIdx++) {
- MVKDescriptorSetLayout* mvkDSL = (MVKDescriptorSetLayout*)pAllocateInfo->pSetLayouts[dsIdx];
- if ( !mvkDSL->isPushDescriptorLayout() ) {
- rslt = allocateDescriptorSet(mvkDSL, (pVarDescCounts ? pVarDescCounts[dsIdx] : 0), &pDescriptorSets[dsIdx]);
- if (rslt) { break; }
- }
- }
- return rslt;
}
-// Find and return an array of variable descriptor counts from the pNext chain of pCreateInfo,
-// or return nullptr if the chain does not include variable descriptor counts.
-const uint32_t* MVKDescriptorPool::getVariableDecriptorCounts(const VkDescriptorSetAllocateInfo* pAllocateInfo) {
- for (const auto* next = (VkBaseInStructure*)pAllocateInfo->pNext; next; next = next->pNext) {
- switch (next->sType) {
- case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT: {
- auto* pVarDescSetVarCounts = (VkDescriptorSetVariableDescriptorCountAllocateInfoEXT*)next;
- return pVarDescSetVarCounts->descriptorSetCount ? pVarDescSetVarCounts->pDescriptorCounts : nullptr;
- }
- default:
- break;
- }
- }
- return nullptr;
-}
-
-// Ensure descriptor set was actually allocated, then return to pool
-VkResult MVKDescriptorPool::freeDescriptorSets(uint32_t count, const VkDescriptorSet* pDescriptorSets) {
- for (uint32_t dsIdx = 0; dsIdx < count; dsIdx++) {
- MVKDescriptorSet* mvkDS = (MVKDescriptorSet*)pDescriptorSets[dsIdx];
- if (_allocatedSets.erase(mvkDS)) {
- freeDescriptorSet(mvkDS);
- }
- }
- return VK_SUCCESS;
-}
-
-// Destroy all allocated descriptor sets
-VkResult MVKDescriptorPool::reset(VkDescriptorPoolResetFlags flags) {
- for (auto& mvkDS : _allocatedSets) { freeDescriptorSet(mvkDS); }
- _allocatedSets.clear();
- if (_preallocatedDescriptors) { _preallocatedDescriptors->reset(); }
- return VK_SUCCESS;
-}
-
-VkResult MVKDescriptorPool::allocateDescriptorSet(MVKDescriptorSetLayout* mvkDSL,
- uint32_t variableDescriptorCount,
- VkDescriptorSet* pVKDS) {
- MVKDescriptorSet* mvkDS = new MVKDescriptorSet(mvkDSL, variableDescriptorCount, this);
- VkResult rslt = mvkDS->getConfigurationResult();
-
- if (mvkDS->wasConfigurationSuccessful()) {
- _allocatedSets.insert(mvkDS);
- *pVKDS = (VkDescriptorSet)mvkDS;
- } else {
- freeDescriptorSet(mvkDS);
- }
- return rslt;
-}
-
-void MVKDescriptorPool::freeDescriptorSet(MVKDescriptorSet* mvkDS) { mvkDS->destroy(); }
-
-// Allocate a descriptor of the specified type
-VkResult MVKDescriptorPool::allocateDescriptor(VkDescriptorType descriptorType,
- MVKDescriptor** pMVKDesc) {
-
- // If descriptors are preallocated allocate from the preallocated pools
- if (_preallocatedDescriptors) {
- return _preallocatedDescriptors->allocateDescriptor(descriptorType, pMVKDesc);
- }
-
- // Otherwise instantiate one of the appropriate type now
+NSUInteger MVKDescriptorPool::getDescriptorByteCountForMetalArgumentBuffer(VkDescriptorType descriptorType) {
switch (descriptorType) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- *pMVKDesc = new MVKUniformBufferDescriptor();
- break;
-
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- *pMVKDesc = new MVKStorageBufferDescriptor();
- break;
-
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- *pMVKDesc = new MVKUniformBufferDynamicDescriptor();
- break;
-
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
- *pMVKDesc = new MVKStorageBufferDynamicDescriptor();
- break;
+ return sizeof(id<MTLBuffer>);
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
- *pMVKDesc = new MVKInlineUniformBlockDescriptor();
- break;
+ return 1;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- *pMVKDesc = new MVKSampledImageDescriptor();
- break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ return sizeof(id<MTLTexture>);
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- *pMVKDesc = new MVKStorageImageDescriptor();
- break;
-
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- *pMVKDesc = new MVKInputAttachmentDescriptor();
- break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ return sizeof(id<MTLTexture>) + sizeof(id<MTLBuffer>);
case VK_DESCRIPTOR_TYPE_SAMPLER:
- *pMVKDesc = new MVKSamplerDescriptor();
- break;
+ return sizeof(id<MTLSamplerState>);
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- *pMVKDesc = new MVKCombinedImageSamplerDescriptor();
- break;
-
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- *pMVKDesc = new MVKUniformTexelBufferDescriptor();
- break;
-
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- *pMVKDesc = new MVKStorageTexelBufferDescriptor();
- break;
+ return sizeof(id<MTLTexture>) + sizeof(id<MTLSamplerState>);
default:
- return reportError(VK_ERROR_INITIALIZATION_FAILED, "Unrecognized VkDescriptorType %d.", descriptorType);
- }
- return VK_SUCCESS;
-}
-
-// Free a descriptor, either through the preallocated pool, or directly destroy it
-void MVKDescriptorPool::freeDescriptor(MVKDescriptor* mvkDesc) {
- if (_preallocatedDescriptors) {
- _preallocatedDescriptors->freeDescriptor(mvkDesc);
- } else {
- mvkDesc->destroy();
+ return 0;
}
}
-MVKDescriptorPool::MVKDescriptorPool(MVKDevice* device,
- const VkDescriptorPoolCreateInfo* pCreateInfo) : MVKVulkanAPIDeviceObject(device) {
- _maxSets = pCreateInfo->maxSets;
- _preallocatedDescriptors = getMVKPreallocateDescriptors() ? new MVKPreallocatedDescriptors(pCreateInfo) : nullptr;
+NSUInteger MVKDescriptorPool::getMaxInlineBlockSize(MVKDevice* device, const VkDescriptorPoolCreateInfo* pCreateInfo) {
+ if ( !device->_pMetalFeatures->argumentBuffers || MVKInlineUniformBlockDescriptor::shouldEmbedInlineBlocksInMetalAgumentBuffer()) { return 0; }
+
+ NSUInteger maxInlineBlockSize = 0;
+ uint32_t poolCnt = pCreateInfo->poolSizeCount;
+ for (uint32_t poolIdx = 0; poolIdx < poolCnt; poolIdx++) {
+ auto& poolSize = pCreateInfo->pPoolSizes[poolIdx];
+ if (poolSize.type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+ NSUInteger iubSize = getDescriptorByteCountForMetalArgumentBuffer(poolSize.type) * poolSize.descriptorCount;
+ maxInlineBlockSize = std::max(iubSize, maxInlineBlockSize);
+ }
+ }
+ return std::min<NSUInteger>(maxInlineBlockSize, device->_pMetalFeatures->maxMTLBufferSize);
}
// Destroy all allocated descriptor sets and preallocated descriptors
MVKDescriptorPool::~MVKDescriptorPool() {
reset(0);
- if (_preallocatedDescriptors) { _preallocatedDescriptors->destroy(); }
+ [_mtlArgumentBuffer release];
}
@@ -819,26 +959,3 @@
dstSet->write(pEntry, pEntry->stride, pCurData);
}
}
-
-void mvkPopulateShaderConverterContext(mvk::SPIRVToMSLConversionConfiguration& context,
- MVKShaderStageResourceBinding& ssRB,
- spv::ExecutionModel stage,
- uint32_t descriptorSetIndex,
- uint32_t bindingIndex,
- uint32_t count,
- MVKSampler* immutableSampler) {
- mvk::MSLResourceBinding rb;
-
- auto& rbb = rb.resourceBinding;
- rbb.stage = stage;
- rbb.desc_set = descriptorSetIndex;
- rbb.binding = bindingIndex;
- rbb.count = count;
- rbb.msl_buffer = ssRB.bufferIndex;
- rbb.msl_texture = ssRB.textureIndex;
- rbb.msl_sampler = ssRB.samplerIndex;
-
- if (immutableSampler) { immutableSampler->getConstexprSampler(rb); }
-
- context.resourceBindings.push_back(rb);
-}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 484db4a..624918a 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -819,6 +819,9 @@
/** Returns info about the pixel format supported by the physical device. */
inline MVKPixelFormats* getPixelFormats() { return _device->getPixelFormats(); }
+ /** Returns whether the device supports using Metal argument buffers. */
+ inline bool supportsMetalArgumentBuffers() const { return _device->_pMetalFeatures->argumentBuffers; };
+
/** Constructs an instance for the specified device. */
MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); }
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 6545444..37a1cb9 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -377,6 +377,7 @@
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
+ bool isTier2 = _metalFeatures.argumentBuffers && _mtlDevice.argumentBuffersSupport == MTLArgumentBuffersTier2;
auto* pDescIdxProps = (VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)next;
pDescIdxProps->maxUpdateAfterBindDescriptorsInAllPools = kMVKUndefinedLargeUInt32;
pDescIdxProps->shaderUniformBufferArrayNonUniformIndexingNative = false;
@@ -386,20 +387,20 @@
pDescIdxProps->shaderInputAttachmentArrayNonUniformIndexingNative = _metalFeatures.arrayOfTextures;
pDescIdxProps->robustBufferAccessUpdateAfterBind = _features.robustBufferAccess;
pDescIdxProps->quadDivergentImplicitLod = false;
- pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSamplers = _properties.limits.maxPerStageDescriptorSamplers;
- pDescIdxProps->maxPerStageDescriptorUpdateAfterBindUniformBuffers = _properties.limits.maxPerStageDescriptorUniformBuffers;
- pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageBuffers = _properties.limits.maxPerStageDescriptorStorageBuffers;
- pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSampledImages = _properties.limits.maxPerStageDescriptorSampledImages;
- pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageImages = _properties.limits.maxPerStageDescriptorStorageImages;
+ pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSamplers = isTier2 ? 2048 : _properties.limits.maxPerStageDescriptorSamplers;
+ pDescIdxProps->maxPerStageDescriptorUpdateAfterBindUniformBuffers = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorUniformBuffers;
+ pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageBuffers = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageBuffers;
+ pDescIdxProps->maxPerStageDescriptorUpdateAfterBindSampledImages = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorSampledImages;
+ pDescIdxProps->maxPerStageDescriptorUpdateAfterBindStorageImages = isTier2 ? 500000 : _properties.limits.maxPerStageDescriptorStorageImages;
pDescIdxProps->maxPerStageDescriptorUpdateAfterBindInputAttachments = _properties.limits.maxPerStageDescriptorInputAttachments;
- pDescIdxProps->maxPerStageUpdateAfterBindResources = _properties.limits.maxPerStageResources;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindSamplers = _properties.limits.maxDescriptorSetSamplers;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffers = _properties.limits.maxDescriptorSetUniformBuffers;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = _properties.limits.maxDescriptorSetUniformBuffersDynamic;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffers = _properties.limits.maxDescriptorSetStorageBuffers;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = _properties.limits.maxDescriptorSetStorageBuffersDynamic;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindSampledImages = _properties.limits.maxDescriptorSetSampledImages;
- pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageImages = _properties.limits.maxDescriptorSetStorageImages;
+ pDescIdxProps->maxPerStageUpdateAfterBindResources = isTier2 ? 500000 : _properties.limits.maxPerStageResources;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindSamplers = isTier2 ? 2048 : _properties.limits.maxDescriptorSetSamplers;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffers = isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffers;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = isTier2 ? 500000 : _properties.limits.maxDescriptorSetUniformBuffersDynamic;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffers = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffers;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageBuffersDynamic;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindSampledImages = isTier2 ? 500000 : _properties.limits.maxDescriptorSetSampledImages;
+ pDescIdxProps->maxDescriptorSetUpdateAfterBindStorageImages = isTier2 ? 500000 : _properties.limits.maxDescriptorSetStorageImages;
pDescIdxProps->maxDescriptorSetUpdateAfterBindInputAttachments = _properties.limits.maxDescriptorSetInputAttachments;
break;
}
@@ -456,54 +457,51 @@
// Populates the device ID properties structure
void MVKPhysicalDevice::populate(VkPhysicalDeviceIDProperties* pDevIdProps) {
- uint8_t* uuid;
size_t uuidComponentOffset;
// ---- Device ID ----------------------------------------------
- uuid = pDevIdProps->deviceUUID;
uuidComponentOffset = 0;
- mvkClear(uuid, VK_UUID_SIZE);
+ mvkClear(&pDevIdProps->deviceUUID);
// First 4 bytes contains GPU vendor ID
uint32_t vendorID = _properties.vendorID;
- *(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(vendorID);
+ *(uint32_t*)&pDevIdProps->deviceUUID[uuidComponentOffset] = NSSwapHostIntToBig(vendorID);
uuidComponentOffset += sizeof(vendorID);
// Next 4 bytes contains GPU device ID
uint32_t deviceID = _properties.deviceID;
- *(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(deviceID);
+ *(uint32_t*)&pDevIdProps->deviceUUID[uuidComponentOffset] = NSSwapHostIntToBig(deviceID);
uuidComponentOffset += sizeof(deviceID);
// Last 8 bytes contain the GPU registry ID
uint64_t regID = mvkGetRegistryID(_mtlDevice);
- *(uint64_t*)&uuid[uuidComponentOffset] = NSSwapHostLongLongToBig(regID);
+ *(uint64_t*)&pDevIdProps->deviceUUID[uuidComponentOffset] = NSSwapHostLongLongToBig(regID);
uuidComponentOffset += sizeof(regID);
// ---- Driver ID ----------------------------------------------
- uuid = pDevIdProps->driverUUID;
uuidComponentOffset = 0;
- mvkClear(uuid, VK_UUID_SIZE);
+ mvkClear(&pDevIdProps->driverUUID);
// First 4 bytes contains MoltenVK prefix
const char* mvkPfx = "MVK";
size_t mvkPfxLen = strlen(mvkPfx);
- mvkCopy(&uuid[uuidComponentOffset], (uint8_t*)mvkPfx, mvkPfxLen);
+ mvkCopy(&pDevIdProps->driverUUID[uuidComponentOffset], (uint8_t*)mvkPfx, mvkPfxLen);
uuidComponentOffset += mvkPfxLen + 1;
// Next 4 bytes contains MoltenVK version
uint32_t mvkVersion = MVK_VERSION;
- *(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(mvkVersion);
+ *(uint32_t*)&pDevIdProps->driverUUID[uuidComponentOffset] = NSSwapHostIntToBig(mvkVersion);
uuidComponentOffset += sizeof(mvkVersion);
// Next 4 bytes contains highest Metal feature set supported by this device
uint32_t mtlFeatSet = getHighestMTLFeatureSet();
- *(uint32_t*)&uuid[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatSet);
+ *(uint32_t*)&pDevIdProps->driverUUID[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatSet);
uuidComponentOffset += sizeof(mtlFeatSet);
// ---- LUID ignored for Metal devices ------------------------
- mvkClear(pDevIdProps->deviceLUID, VK_LUID_SIZE);
+ mvkClear(&pDevIdProps->deviceLUID);
pDevIdProps->deviceNodeMask = 0;
pDevIdProps->deviceLUIDValid = VK_FALSE;
}
@@ -1111,8 +1109,8 @@
switch (next->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
auto* budgetProps = (VkPhysicalDeviceMemoryBudgetPropertiesEXT*)next;
- mvkClear(budgetProps->heapBudget, VK_MAX_MEMORY_HEAPS);
- mvkClear(budgetProps->heapUsage, VK_MAX_MEMORY_HEAPS);
+ mvkClear(&budgetProps->heapBudget);
+ mvkClear(&budgetProps->heapUsage);
budgetProps->heapBudget[0] = (VkDeviceSize)getRecommendedMaxWorkingSetSize();
budgetProps->heapUsage[0] = (VkDeviceSize)getCurrentAllocatedSize();
if (!getHasUnifiedMemory()) {
@@ -1144,7 +1142,8 @@
initExtensions();
initMemoryProperties();
initExternalMemoryProperties();
- logGPUInfo();
+ initPipelineCacheUUID(); // Call penultimate
+ logGPUInfo(); // Call last
}
// Initializes the physical device properties (except limits).
@@ -1155,7 +1154,6 @@
_properties.driverVersion = MVK_VERSION;
initGPUInfoProperties();
- initPipelineCacheUUID();
}
// Initializes the Metal-specific physical device features of this instance.
@@ -1214,6 +1212,7 @@
if (supportsMTLFeatureSet(tvOS_GPUFamily1_v3)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_0;
_metalFeatures.renderWithoutAttachments = true;
+ MVK_SET_FROM_ENV_OR_BUILD_BOOL(_metalFeatures.argumentBuffers, MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS);
}
if (supportsMTLFeatureSet(tvOS_GPUFamily1_v4)) {
@@ -1287,6 +1286,7 @@
if (supportsMTLFeatureSet(iOS_GPUFamily1_v4)) {
_metalFeatures.mslVersionEnum = MTLLanguageVersion2_0;
_metalFeatures.renderWithoutAttachments = true;
+ MVK_SET_FROM_ENV_OR_BUILD_BOOL(_metalFeatures.argumentBuffers, MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS);
}
if (supportsMTLFeatureSet(iOS_GPUFamily1_v5)) {
@@ -1395,6 +1395,7 @@
_metalFeatures.presentModeImmediate = true;
_metalFeatures.fences = true;
_metalFeatures.nonUniformThreadgroups = true;
+ MVK_SET_FROM_ENV_OR_BUILD_BOOL(_metalFeatures.argumentBuffers, MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS);
}
if (supportsMTLFeatureSet(macOS_GPUFamily1_v4)) {
@@ -2336,24 +2337,26 @@
void MVKPhysicalDevice::initPipelineCacheUUID() {
// Clear the UUID
- mvkClear(&_properties.pipelineCacheUUID, VK_UUID_SIZE);
+ mvkClear(&_properties.pipelineCacheUUID);
size_t uuidComponentOffset = 0;
- // First 4 bytes contains MoltenVK version
- uint32_t mvkVersion = MVK_VERSION;
- *(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mvkVersion);
- uuidComponentOffset += sizeof(mvkVersion);
+ // First 8 bytes contain the first part of the MoltenVK Git revision
+ uint64_t mvkRev = getMoltenVKGitRevision();
+ *(uint64_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostLongLongToBig(mvkRev);
+ uuidComponentOffset += sizeof(mvkRev);
// Next 4 bytes contains highest Metal feature set supported by this device
uint32_t mtlFeatSet = getHighestMTLFeatureSet();
*(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatSet);
uuidComponentOffset += sizeof(mtlFeatSet);
- // Last 8 bytes contain the first part of the MoltenVK Git revision
- uint64_t mvkRev = getMoltenVKGitRevision();
- *(uint64_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostLongLongToBig(mvkRev);
- uuidComponentOffset += sizeof(mvkRev);
+ // Last 4 bytes contains flags based on enabled Metal features that
+ // might affect the contents of the pipeline cache (mostly MSL content).
+ uint32_t mtlFeatures = 0;
+ mtlFeatures |= ((bool)_metalFeatures.argumentBuffers) << 0;
+ *(uint32_t*)&_properties.pipelineCacheUUID[uuidComponentOffset] = NSSwapHostIntToBig(mtlFeatures);
+ uuidComponentOffset += sizeof(mtlFeatures);
}
uint32_t MVKPhysicalDevice::getHighestMTLFeatureSet() {
@@ -2953,7 +2956,7 @@
}
VkResult MVKDevice::getDeviceGroupPresentCapabilities(VkDeviceGroupPresentCapabilitiesKHR* pDeviceGroupPresentCapabilities) {
- mvkClear(pDeviceGroupPresentCapabilities->presentMask, VK_MAX_DEVICE_GROUP_SIZE);
+ mvkClear(&pDeviceGroupPresentCapabilities->presentMask);
pDeviceGroupPresentCapabilities->presentMask[0] = 0x1;
pDeviceGroupPresentCapabilities->modes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR;
@@ -3688,40 +3691,7 @@
_commandResourceFactory = new MVKCommandResourceFactory(this);
-// This code will be refactored in an upcoming release, but for now,
-// suppress deprecation warnings for startCaptureWithDevice: on MacCatalyst.
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wdeprecated-declarations"
- if (getInstance()->_autoGPUCaptureScope == MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE) {
- MTLCaptureManager *captureMgr = [MTLCaptureManager sharedCaptureManager];
- if (!getInstance()->_autoGPUCaptureOutputFile.empty()) {
- if ( ![captureMgr respondsToSelector: @selector(supportsDestination:)] ||
- ![captureMgr supportsDestination: MTLCaptureDestinationGPUTraceDocument] ) {
- reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Capturing GPU traces to a file requires macOS 10.15 or iOS 13.0. Falling back to Xcode GPU capture.");
- [captureMgr startCaptureWithDevice: getMTLDevice()];
- } else {
- NSError *err = nil;
- NSString *path, *expandedPath;
- MTLCaptureDescriptor *captureDesc = [MTLCaptureDescriptor new];
- captureDesc.captureObject = getMTLDevice();
- captureDesc.destination = MTLCaptureDestinationGPUTraceDocument;
- path = [NSString stringWithUTF8String: getInstance()->_autoGPUCaptureOutputFile.c_str()];
- expandedPath = path.stringByExpandingTildeInPath;
- captureDesc.outputURL = [NSURL fileURLWithPath: expandedPath];
- if (![captureMgr startCaptureWithDescriptor: captureDesc error: &err]) {
- reportError(VK_ERROR_INITIALIZATION_FAILED, "Failed to start GPU capture session to %s (Error code %li): %s", getInstance()->_autoGPUCaptureOutputFile.c_str(), (long)err.code, err.localizedDescription.UTF8String);
- [err release];
- }
- [captureDesc.outputURL release];
- [captureDesc release];
- [expandedPath release];
- [path release];
- }
- } else {
- [captureMgr startCaptureWithDevice: getMTLDevice()];
- }
- }
-#pragma clang diagnostic pop
+ getInstance()->startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE, getMTLDevice());
MVKLogInfo("Created VkDevice to run on GPU %s with the following %d Vulkan extensions enabled:%s",
_pProperties->deviceName,
@@ -4099,9 +4069,7 @@
[_globalVisibilityResultMTLBuffer release];
[_defaultMTLSamplerState release];
- if (getInstance()->_autoGPUCaptureScope == MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE) {
- [[MTLCaptureManager sharedCaptureManager] stopCapture];
- }
+ getInstance()->stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE);
mvkDestroyContainerContents(_privateDataSlots);
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index 16771af..0a2c3ba 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -1930,6 +1930,7 @@
? mvkClamp(pCreateInfo->maxAnisotropy, 1.0f, _device->_pProperties->limits.maxSamplerAnisotropy)
: 1);
mtlSampDesc.normalizedCoordinates = !pCreateInfo->unnormalizedCoordinates;
+ mtlSampDesc.supportArgumentBuffers = supportsMetalArgumentBuffers();
// If compareEnable is true, but dynamic samplers with depth compare are not available
// on this device, this sampler must only be used as an immutable sampler, and will
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h
index 3ff1a95..9bb0175 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h
@@ -158,6 +158,37 @@
/** The list of Vulkan extensions, indicating whether each has been enabled by the app. */
const MVKExtensionList _enabledExtensions;
+ /**
+ * Checks if automatic GPU capture is enabled for the specified auto capture
+ * scope, and if so, starts capturing from the specified Metal capture object.
+ * The capture will be made to either Xcode, or to a file if the
+ * MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE environment has specified.
+ *
+ * The autoGPUCaptureScope parameter must be one of:
+ * - MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_NONE
+ * - MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE
+ * - MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME
+ *
+ * The mtlCaptureObject must be one of:
+ * - MTLDevice for scope MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE
+ * - MTLCommandQueue for scope MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME.
+ */
+ void startAutoGPUCapture(int32_t autoGPUCaptureScope, id mtlCaptureObject);
+
+ /**
+ * Checks if automatic GPU capture is enabled for the specified
+ * auto capture scope, and if so, stops capturing.
+ *
+ * The autoGPUCaptureScope parameter must be one of:
+ * - MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_NONE
+ * - MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE
+ * - MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME
+ */
+ void stopAutoGPUCapture(int32_t autoGPUCaptureScope);
+
+ /** Returns whether this instance is currently automatically capturing a GPU trace. */
+ inline bool isCurrentlyAutoGPUCapturing() { return _isCurrentlyAutoGPUCapturing; }
+
#pragma mark Object Creation
@@ -205,6 +236,7 @@
bool _useCreationCallbacks;
const char* _debugReportCallbackLayerPrefix;
int32_t _autoGPUCaptureScope;
+ bool _isCurrentlyAutoGPUCapturing;
std::string _autoGPUCaptureOutputFile;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
index 78efef5..9f1d555 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
@@ -280,6 +280,49 @@
}
}
+void MVKInstance::startAutoGPUCapture(int32_t autoGPUCaptureScope, id mtlCaptureObject) {
+
+ if (_isCurrentlyAutoGPUCapturing || (_autoGPUCaptureScope != autoGPUCaptureScope)) { return; }
+
+ _isCurrentlyAutoGPUCapturing = true;
+
+ @autoreleasepool {
+ MTLCaptureManager *captureMgr = [MTLCaptureManager sharedCaptureManager];
+
+ MTLCaptureDescriptor *captureDesc = [[MTLCaptureDescriptor new] autorelease];
+ captureDesc.captureObject = mtlCaptureObject;
+ captureDesc.destination = MTLCaptureDestinationDeveloperTools;
+
+ if ( !_autoGPUCaptureOutputFile.empty() ) {
+ if ([captureMgr respondsToSelector: @selector(supportsDestination:)] &&
+ [captureMgr supportsDestination: MTLCaptureDestinationGPUTraceDocument] ) {
+
+ NSString* filePath = [[NSString stringWithUTF8String: _autoGPUCaptureOutputFile.c_str()] stringByExpandingTildeInPath];
+ MVKLogInfo("Capturing GPU trace to file %s.", filePath.UTF8String);
+
+ captureDesc.destination = MTLCaptureDestinationGPUTraceDocument;
+ captureDesc.outputURL = [NSURL fileURLWithPath: filePath];
+
+ } else {
+ reportError(VK_ERROR_FEATURE_NOT_PRESENT, "Capturing GPU traces to a file requires macOS 10.15 or iOS 13.0. Falling back to Xcode GPU capture.");
+ }
+ } else {
+ MVKLogInfo("Capturing GPU trace to Xcode.");
+ }
+
+ NSError *err = nil;
+ if ( ![captureMgr startCaptureWithDescriptor: captureDesc error: &err] ) {
+ reportError(VK_ERROR_INITIALIZATION_FAILED, "Failed to automatically start GPU capture session (Error code %li): %s", (long)err.code, err.localizedDescription.UTF8String);
+ }
+ }
+}
+
+void MVKInstance::stopAutoGPUCapture(int32_t autoGPUCaptureScope) {
+ if (_isCurrentlyAutoGPUCapturing && _autoGPUCaptureScope == autoGPUCaptureScope) {
+ [[MTLCaptureManager sharedCaptureManager] stopCapture];
+ _isCurrentlyAutoGPUCapturing = false;
+ }
+}
#pragma mark Object Creation
@@ -364,6 +407,8 @@
setConfigurationResult(reportError(VK_ERROR_INCOMPATIBLE_DRIVER, "Vulkan is not supported on this device. MoltenVK requires Metal, which is not available on this device."));
}
+ _isCurrentlyAutoGPUCapturing = false;
+
if (MVK_MACCAT && !mvkOSVersionIsAtLeast(11.0)) {
setConfigurationResult(reportError(VK_ERROR_INCOMPATIBLE_DRIVER, "To support Mac Catalyst, MoltenVK requires macOS 11.0 or above."));
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
index 45161b5..f6c2f93 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
@@ -39,7 +39,7 @@
#pragma mark MVKPipelineLayout
struct MVKShaderImplicitRezBinding {
- uint32_t stages[kMVKShaderStageMax];
+ uint32_t stages[kMVKShaderStageCount];
};
/** Represents a Vulkan pipeline layout. */
@@ -111,7 +111,7 @@
protected:
void propagateDebugName() override {}
- MVKSmallVector<MVKDescriptorSetLayout*, 1> _descriptorSetLayouts;
+ MVKSmallVector<MVKDescriptorSetLayout*, 8> _descriptorSetLayouts;
MVKSmallVector<MVKShaderResourceBinding, 1> _dslMTLResourceIndexOffsets;
MVKSmallVector<VkPushConstantRange> _pushConstants;
MVKShaderResourceBinding _pushConstantsMTLResourceIndexes;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index 7a160fa..4bc3481 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -48,7 +48,8 @@
MVKDescriptorSet* descSet = descriptorSets[dsIdx];
uint32_t dslIdx = firstSet + dsIdx;
MVKDescriptorSetLayout* dsl = _descriptorSetLayouts[dslIdx];
- dsl->bindDescriptorSet(cmdEncoder, descSet, _dslMTLResourceIndexOffsets[dslIdx],
+ dsl->bindDescriptorSet(cmdEncoder, descSet, dslIdx,
+ _dslMTLResourceIndexOffsets[dslIdx],
dynamicOffsets, dynamicOffsetIndex);
if (!cmdEncoder) { setConfigurationResult(dsl->getConfigurationResult()); }
}
@@ -77,6 +78,8 @@
void MVKPipelineLayout::populateShaderConverterContext(SPIRVToMSLConversionConfiguration& context) {
context.resourceBindings.clear();
+ context.discreteDescriptorSets.clear();
+ context.inlineUniformBlocks.clear();
// Add resource bindings defined in the descriptor set layouts
uint32_t dslCnt = (uint32_t)_descriptorSetLayouts.size();
@@ -94,7 +97,7 @@
spv::ExecutionModelFragment,
spv::ExecutionModelGLCompute
};
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
mvkPopulateShaderConverterContext(context,
_pushConstantsMTLResourceIndexes.stages[i],
models[i],
@@ -105,40 +108,40 @@
}
}
+// Add descriptor set layouts.
+// According to the Vulkan spec, VkDescriptorSetLayout is intended to be consumed when passed
+// to any Vulkan function, and may be safely destroyed by app immediately after. In order for
+// this pipeline layout to retain the VkDescriptorSetLayout, the MVKDescriptorSetLayout
+// instance is retained, so that it will live on here after it has been destroyed by the API.
+
+// If we're not using Metal argument buffers, accumulate the resource index offsets used
+// by the corresponding DSL, and associating the current accumulated resource index offsets
+// with each DSL as it is added. If we're using Metal argument buffers, just accumulate the
+// number of Metal argument buffers we need for each stage.
+// The final accumulation of resource index offsets becomes the resource index offsets that
+// will be used for push contants and any additional auxilliary buffers.
MVKPipelineLayout::MVKPipelineLayout(MVKDevice* device,
const VkPipelineLayoutCreateInfo* pCreateInfo) : MVKVulkanAPIDeviceObject(device) {
-
- // Add descriptor set layouts, accumulating the resource index offsets used by the
- // corresponding DSL, and associating the current accumulated resource index offsets
- // with each DSL as it is added. The final accumulation of resource index offsets
- // becomes the resource index offsets that will be used for push contants.
-
- // According to the Vulkan spec, VkDescriptorSetLayout is intended to be consumed when passed
- // to any Vulkan function, and may be safely destroyed by app immediately after. In order for
- // this pipeline layout to retain the VkDescriptorSetLayout, the MVKDescriptorSetLayout
- // instance is retained, so that it will live on here after it has been destroyed by the API.
-
- _descriptorSetLayouts.reserve(pCreateInfo->setLayoutCount);
- for (uint32_t i = 0; i < pCreateInfo->setLayoutCount; i++) {
- MVKDescriptorSetLayout* pDescSetLayout = (MVKDescriptorSetLayout*)pCreateInfo->pSetLayouts[i];
- pDescSetLayout->retain();
- _descriptorSetLayouts.push_back(pDescSetLayout);
- _dslMTLResourceIndexOffsets.push_back(_pushConstantsMTLResourceIndexes);
- _pushConstantsMTLResourceIndexes += pDescSetLayout->_mtlResourceCounts;
- }
-
- // Add push constants
- _pushConstants.reserve(pCreateInfo->pushConstantRangeCount);
- for (uint32_t i = 0; i < pCreateInfo->pushConstantRangeCount; i++) {
- _pushConstants.push_back(pCreateInfo->pPushConstantRanges[i]);
+ uint32_t dslCnt = pCreateInfo->setLayoutCount;
+ _descriptorSetLayouts.resize(dslCnt);
+ _dslMTLResourceIndexOffsets.resize(dslCnt);
+ for (uint32_t dslIdx = 0; dslIdx < dslCnt; dslIdx++) {
+ MVKDescriptorSetLayout* mvkDSL = (MVKDescriptorSetLayout*)pCreateInfo->pSetLayouts[dslIdx];
+ mvkDSL->retain();
+ _descriptorSetLayouts[dslIdx] = mvkDSL;
+ if (mvkDSL->isUsingMetalArgumentBuffer()) {
+ _pushConstantsMTLResourceIndexes.addArgumentBuffer(mvkDSL->_mtlResourceCounts);
+ } else {
+ _dslMTLResourceIndexOffsets[dslIdx] = _pushConstantsMTLResourceIndexes;
+ _pushConstantsMTLResourceIndexes += mvkDSL->_mtlResourceCounts;
+ }
}
// Set implicit buffer indices
// FIXME: Many of these are optional. We shouldn't set the ones that aren't
- // present--or at least, we should move the ones that are down to avoid
- // running over the limit of available buffers. But we can't know that
- // until we compile the shaders.
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ // present--or at least, we should move the ones that are down to avoid running over
+ // the limit of available buffers. But we can't know that until we compile the shaders.
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
_swizzleBufferIndex.stages[i] = _pushConstantsMTLResourceIndexes.stages[i].bufferIndex + 1;
_bufferSizeBufferIndex.stages[i] = _swizzleBufferIndex.stages[i] + 1;
_indirectParamsIndex.stages[i] = _bufferSizeBufferIndex.stages[i] + 1;
@@ -148,6 +151,13 @@
_tessCtlLevelBufferIndex = _tessCtlPatchOutputBufferIndex + 1;
}
}
+
+ // Add push constants
+ _pushConstants.reserve(pCreateInfo->pushConstantRangeCount);
+ for (uint32_t i = 0; i < pCreateInfo->pushConstantRangeCount; i++) {
+ _pushConstants.push_back(pCreateInfo->pPushConstantRanges[i]);
+ }
+
// Since we currently can't use multiview with tessellation or geometry shaders,
// to conserve the number of buffer bindings, use the same bindings for the
// view range buffer as for the indirect paramters buffer.
@@ -164,7 +174,7 @@
void MVKPipeline::bindPushConstants(MVKCommandEncoder* cmdEncoder) {
if (cmdEncoder) {
- for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageMax; i++) {
+ for (uint32_t i = kMVKShaderStageVertex; i < kMVKShaderStageCount; i++) {
cmdEncoder->getPushConstants(mvkVkShaderStageFlagBitsFromMVKShaderStage(MVKShaderStage(i)))->setMTLBufferIndex(_pushConstantsMTLResourceIndexes.stages[i].bufferIndex);
}
}
@@ -367,7 +377,7 @@
}
// Track dynamic state in _dynamicStateEnabled array
- mvkClear(_dynamicStateEnabled, kMVKVkDynamicStateCount); // start with all dynamic state disabled
+ mvkClear(&_dynamicStateEnabled); // start with all dynamic state disabled
const VkPipelineDynamicStateCreateInfo* pDS = pCreateInfo->pDynamicState;
if (pDS) {
for (uint32_t i = 0; i < pDS->dynamicStateCount; i++) {
@@ -1451,6 +1461,8 @@
shaderContext.options.mslOptions.texel_buffer_texture_width = _device->_pMetalFeatures->maxTextureDimension;
shaderContext.options.mslOptions.r32ui_linear_texture_alignment = (uint32_t)_device->getVkFormatTexelBufferAlignment(VK_FORMAT_R32_UINT, this);
shaderContext.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers;
+ shaderContext.options.mslOptions.argument_buffers = supportsMetalArgumentBuffers();
+ shaderContext.options.mslOptions.force_active_argument_buffer_resources = supportsMetalArgumentBuffers();
MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout;
layout->populateShaderConverterContext(shaderContext);
@@ -1693,6 +1705,8 @@
shaderContext.options.mslOptions.dispatch_base = _allowsDispatchBase;
shaderContext.options.mslOptions.texture_1D_as_2D = mvkTreatTexture1DAs2D();
shaderContext.options.mslOptions.fixed_subgroup_size = mvkIsAnyFlagEnabled(pSS->flags, VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) ? 0 : _device->_pMetalFeatures->maxSubgroupSize;
+ shaderContext.options.mslOptions.argument_buffers = supportsMetalArgumentBuffers();
+ shaderContext.options.mslOptions.force_active_argument_buffer_resources = supportsMetalArgumentBuffers();
#if MVK_MACOS
shaderContext.options.mslOptions.emulate_subgroups = !_device->_pMetalFeatures->simdPermute;
#endif
@@ -2102,10 +2116,18 @@
}
template<class Archive>
+ void serialize(Archive & archive, DescriptorBinding& db) {
+ archive(db.descriptorSet,
+ db.binding);
+ }
+
+ template<class Archive>
void serialize(Archive & archive, SPIRVToMSLConversionConfiguration& ctx) {
archive(ctx.options,
ctx.shaderInputs,
- ctx.resourceBindings);
+ ctx.resourceBindings,
+ ctx.discreteDescriptorSets,
+ ctx.inlineUniformBlocks);
}
template<class Archive>
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm
index 4ca6b4f..9cde2b0 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm
@@ -754,7 +754,7 @@
void MVKPixelFormats::initVkFormatCapabilities() {
- mvkClear(_vkFormatDescriptions, _vkFormatCount);
+ mvkClear(&_vkFormatDescriptions);
uint32_t fmtIdx = 0;
@@ -1066,7 +1066,7 @@
void MVKPixelFormats::initMTLPixelFormatCapabilities() {
- mvkClear(_mtlPixelFormatDescriptions, _mtlPixelFormatCount);
+ mvkClear(&_mtlPixelFormatDescriptions);
uint32_t fmtIdx = 0;
@@ -1250,7 +1250,7 @@
void MVKPixelFormats::initMTLVertexFormatCapabilities() {
- mvkClear(_mtlVertexFormatDescriptions, _mtlVertexFormatCount);
+ mvkClear(&_mtlVertexFormatDescriptions);
uint32_t fmtIdx = 0;
@@ -1331,8 +1331,8 @@
void MVKPixelFormats::buildMTLFormatMaps() {
// Set all MTLPixelFormats and MTLVertexFormats to undefined/invalid
- mvkClear(_mtlFormatDescIndicesByMTLPixelFormatsCore, _mtlPixelFormatCoreCount);
- mvkClear(_mtlFormatDescIndicesByMTLVertexFormats, _mtlVertexFormatCount);
+ mvkClear(&_mtlFormatDescIndicesByMTLPixelFormatsCore);
+ mvkClear(&_mtlFormatDescIndicesByMTLVertexFormats);
// Build lookup table for MTLPixelFormat specs.
// For most Metal format values, which are small and consecutive, use a simple lookup array.
@@ -1933,7 +1933,7 @@
void MVKPixelFormats::buildVkFormatMaps() {
// Set the VkFormats to undefined/invalid
- mvkClear(_vkFormatDescIndicesByVkFormatsCore, _vkFormatCoreCount);
+ mvkClear(&_vkFormatDescIndicesByVkFormatsCore);
// Iterate through the VkFormat descriptions, populate the lookup maps and back pointers,
// and validate the Metal formats for the platform and OS.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index 5df8401..91b66c0 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -208,7 +208,6 @@
id<MTLCommandBuffer> _activeMTLCommandBuffer;
};
-
/**
* Submits the commands in a set of command buffers to the queue.
* Template class to balance vector pre-allocations between very common low counts and fewer larger counts.
@@ -253,6 +252,7 @@
protected:
id<MTLCommandBuffer> getMTLCommandBuffer();
+ void stopAutoGPUCapture();
MVKSmallVector<MVKPresentTimingInfo, 4> _presentInfo;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index caec05e..0fc8979 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -162,7 +162,7 @@
initName();
initExecQueue();
initMTLCommandQueue();
- initGPUCaptureScopes();
+ initGPUCaptureScopes(); // After initMTLCommandQueue()
}
void MVKQueue::initName() {
@@ -203,6 +203,8 @@
_submissionCaptureScope->makeDefault();
}
_submissionCaptureScope->beginScope(); // Allow Xcode to capture the first frame if desired.
+
+ getInstance()->startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME, _mtlQueue);
}
MVKQueue::~MVKQueue() {
@@ -402,6 +404,7 @@
auto cs = _queue->_submissionCaptureScope;
cs->endScope();
cs->beginScope();
+ stopAutoGPUCapture();
this->destroy();
}
@@ -413,6 +416,16 @@
return mtlCmdBuff;
}
+
+void MVKQueuePresentSurfaceSubmission::stopAutoGPUCapture() {
+ MVKInstance* mvkInst = _queue->getInstance();
+ const MVKConfiguration* pMVKConfig = mvkInst->getMoltenVKConfiguration();
+ if (_queue->_queueFamily->getIndex() == pMVKConfig->defaultGPUCaptureScopeQueueFamilyIndex &&
+ _queue->_index == pMVKConfig->defaultGPUCaptureScopeQueueIndex) {
+ mvkInst->stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME);
+ }
+}
+
MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKQueue* queue,
const VkPresentInfoKHR* pPresentInfo)
: MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores) {
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
index 0e0eba8..fcf304e 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.mm
@@ -33,7 +33,7 @@
// Returns a VkExtensionProperties struct populated with a name and version
static VkExtensionProperties mvkMakeExtProps(const char* extensionName, uint32_t specVersion) {
VkExtensionProperties extProps;
- mvkClear(extProps.extensionName, VK_MAX_EXTENSION_NAME_SIZE);
+ mvkClear(&extProps.extensionName);
if (extensionName) { strcpy(extProps.extensionName, extensionName); }
extProps.specVersion = specVersion;
return extProps;
diff --git a/MoltenVK/MoltenVK/Layers/MVKLayers.mm b/MoltenVK/MoltenVK/Layers/MVKLayers.mm
index aefaef3..3a2bc71 100644
--- a/MoltenVK/MoltenVK/Layers/MVKLayers.mm
+++ b/MoltenVK/MoltenVK/Layers/MVKLayers.mm
@@ -41,9 +41,9 @@
MVKLayer::MVKLayer() : _supportedInstanceExtensions(nullptr, true) {
// The core driver layer
- mvkClear(_layerProperties.layerName, VK_MAX_EXTENSION_NAME_SIZE);
+ mvkClear(&_layerProperties.layerName);
strcpy(_layerProperties.layerName, "MoltenVK");
- mvkClear(_layerProperties.description, VK_MAX_DESCRIPTION_SIZE);
+ mvkClear(&_layerProperties.description);
strcpy(_layerProperties.description, "MoltenVK driver layer");
_layerProperties.specVersion = MVK_VULKAN_API_VERSION;
_layerProperties.implementationVersion = MVK_VERSION;
diff --git a/MoltenVK/MoltenVK/Utility/MVKBitArray.h b/MoltenVK/MoltenVK/Utility/MVKBitArray.h
new file mode 100755
index 0000000..6f2226d
--- /dev/null
+++ b/MoltenVK/MoltenVK/Utility/MVKBitArray.h
@@ -0,0 +1,179 @@
+/*
+ * MVKBitArray.h
+ *
+ * Copyright (c) 2020-2020 The Brenwill Workshop Ltd. (http://www.brenwill.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "MVKFoundation.h"
+
+
+#pragma mark -
+#pragma mark MVKBitArray
+
+/** Represents an array of bits, optimized for storage and fast scanning for bits that are set. */
+class MVKBitArray {
+
+ static constexpr size_t SectionMaskSize = 6;
+ static constexpr size_t SectionSize = 1U << SectionMaskSize;
+
+public:
+
+ /** Returns the value of the bit. */
+ inline bool getBit(size_t bitIndex) {
+ return mvkIsAnyFlagEnabled(_pSections[getIndexOfSection(bitIndex)], getSectionSetMask(bitIndex));
+ }
+
+ /** Sets the value of the bit to 1. */
+ inline void setBit(size_t bitIndex) {
+ size_t secIdx = getIndexOfSection(bitIndex);
+ mvkEnableFlags(_pSections[secIdx], getSectionSetMask(bitIndex));
+
+ if (secIdx < _minUnclearedSectionIndex) { _minUnclearedSectionIndex = secIdx; }
+ }
+
+ /** Sets the value of the bit to 0. */
+ inline void clearBit(size_t bitIndex) {
+ size_t secIdx = getIndexOfSection(bitIndex);
+ mvkDisableFlags(_pSections[secIdx], getSectionSetMask(bitIndex));
+
+ if (secIdx == _minUnclearedSectionIndex && !_pSections[secIdx]) { _minUnclearedSectionIndex++; }
+ }
+
+ /** Sets the value of the bit to the value. */
+ inline void setBit(size_t bitIndex, bool val) {
+ if (val) {
+ setBit(bitIndex);
+ } else {
+ clearBit(bitIndex);
+ }
+ }
+
+ /** Sets all bits in the array to 1. */
+ inline void setAllBits() { setAllSections(~0); }
+
+ /** Clears all bits in the array to 0. */
+ inline void clearAllBits() { setAllSections(0); }
+
+ /**
+ * Returns the index of the first bit that is set, at or after the specified index,
+ * and optionally clears that bit. If no bits are set, returns the size() of this bit array.
+ */
+ size_t getIndexOfFirstSetBit(size_t startIndex, bool shouldClear) {
+ size_t startSecIdx = std::max(getIndexOfSection(startIndex), _minUnclearedSectionIndex);
+ size_t bitIdx = startSecIdx << SectionMaskSize;
+ size_t secCnt = getSectionCount();
+ for (size_t secIdx = startSecIdx; secIdx < secCnt; secIdx++) {
+ size_t lclBitIdx = getIndexOfFirstSetBitInSection(_pSections[secIdx], getBitIndexInSection(startIndex));
+ bitIdx += lclBitIdx;
+ if (lclBitIdx < SectionSize) {
+ if (startSecIdx == _minUnclearedSectionIndex && !_pSections[startSecIdx]) { _minUnclearedSectionIndex = secIdx; }
+ if (shouldClear) { clearBit(bitIdx); }
+ return bitIdx;
+ }
+ }
+ return std::min(bitIdx, _bitCount);
+ }
+
+ /**
+ * Returns the index of the first bit that is set, at or after the specified index.
+ * If no bits are set, returns the size() of this bit array.
+ */
+ inline size_t getIndexOfFirstSetBit(size_t startIndex) {
+ return getIndexOfFirstSetBit(startIndex, false);
+ }
+
+ /**
+ * Returns the index of the first bit that is set and optionally clears that bit.
+ * If no bits are set, returns the size() of this bit array.
+ */
+ inline size_t getIndexOfFirstSetBit(bool shouldClear) {
+ return getIndexOfFirstSetBit(0, shouldClear);
+ }
+
+ /**
+ * Returns the index of the first bit that is set.
+ * If no bits are set, returns the size() of this bit array.
+ */
+ inline size_t getIndexOfFirstSetBit() {
+ return getIndexOfFirstSetBit(0, false);
+ }
+
+ /** Returns the number of bits in this array. */
+ inline size_t size() { return _bitCount; }
+
+ /** Returns whether this array is empty. */
+ inline bool empty() { return !_bitCount; }
+
+ /** Constructs an instance for the specified number of bits, and sets the initial value of all the bits. */
+ MVKBitArray(size_t size, bool val = false) {
+ _bitCount = size;
+ _pSections = _bitCount ? (uint64_t*)malloc(getSectionCount() * SectionSize) : nullptr;
+ if (val) {
+ setAllBits();
+ } else {
+ clearAllBits();
+ }
+ }
+
+ ~MVKBitArray() { free(_pSections); }
+
+protected:
+
+ // Returns the number of sections.
+ inline size_t getSectionCount() {
+ return _bitCount ? getIndexOfSection(_bitCount - 1) + 1 : 0;
+ }
+
+ // Returns the index of the section that contains the specified bit.
+ static inline size_t getIndexOfSection(size_t bitIndex) {
+ return bitIndex >> SectionMaskSize;
+ }
+
+ // Converts the bit index to a local bit index within a section, and returns that local bit index.
+ static inline size_t getBitIndexInSection(size_t bitIndex) {
+ return bitIndex & (SectionSize - 1);
+ }
+
+ // Returns a section mask containing a single 1 value in the bit in the section that
+ // corresponds to the specified global bit index, and 0 values in all other bits.
+ static inline uint64_t getSectionSetMask(size_t bitIndex) {
+ return (uint64_t)1U << ((SectionSize - 1) - getBitIndexInSection(bitIndex));
+ }
+
+ // Returns the local index of the first set bit in the section, starting from the highest order bit.
+ // Clears all bits ahead of the start bit so they will be ignored, then counts the number of zeros
+ // ahead of the set bit. If there are no set bits, returns the number of bits in a section.
+ static size_t getIndexOfFirstSetBitInSection(uint64_t section, size_t lclStartBitIndex) {
+ uint64_t lclStartMask = ~(uint64_t)0;
+ lclStartMask >>= lclStartBitIndex;
+ section &= lclStartMask;
+ return section ? __builtin_clzll(section) : SectionSize;
+ }
+
+ // Sets the content of all sections to the value
+ void setAllSections(uint64_t sectionValue) {
+ size_t secCnt = getSectionCount();
+ for (size_t secIdx = 0; secIdx < secCnt; secIdx++) {
+ _pSections[secIdx] = sectionValue;
+ }
+ _minUnclearedSectionIndex = sectionValue ? 0 : secCnt;
+ }
+
+ uint64_t* _pSections;
+ size_t _bitCount;
+ size_t _minUnclearedSectionIndex; // Tracks where to start looking for bits that are set
+};
diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
index 63cc2eb..008b6fc 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
@@ -167,6 +167,7 @@
*/
#define MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_NONE 0
#define MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE 1
+#define MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME 2
#ifndef MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE
# define MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_NONE
#endif
@@ -205,6 +206,11 @@
# define MVK_CONFIG_TEXTURE_1D_AS_2D 1
#endif
+/** Support Metal argument buffers. Enabled by default. */
+#ifndef MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS
+# define MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS 1
+#endif
+
/**
* IOSurfaces are supported on macOS, and on iOS starting with iOS 11.
*
diff --git a/MoltenVK/MoltenVK/Utility/MVKSmallVector.h b/MoltenVK/MoltenVK/Utility/MVKSmallVector.h
index 1d1612b..ff3042d 100755
--- a/MoltenVK/MoltenVK/Utility/MVKSmallVector.h
+++ b/MoltenVK/MoltenVK/Utility/MVKSmallVector.h
@@ -43,11 +43,11 @@
// If you don't need any inline storage use:
// MVKSmallVector<int> v; // this is essentially the same as using std::vector
//
-// The per-instance memory overhead of MVKSmallVector (16 bytes) is smaller than MVKVector (40 bytes)
-// and std::vector (24 bytes), but MVKSmallVector lacks the polymorphism of MVKVector (or std::vector),
-// that allows them to be passed around to functions without reference to the pre-allocation size.
-// MVKSmallVector supports the contents() function to derive an MVKArrayRef from its contents,
-// which can be passed around without reference to the pre-allocaton size.
+// The per-instance memory overhead of MVKSmallVector (16 bytes) is smaller than std::vector (24 bytes),
+// and MVKSmallVector lacks the polymorphism of std::vector, that allows them to be passed around to
+// functions without reference to the pre-allocation size. MVKSmallVector supports the contents()
+// function to derive an MVKArrayRef from its contents, which can be passed around without reference
+// to the pre-allocaton size.
#include "MVKSmallVectorAllocator.h"
#include "MVKFoundation.h"
diff --git a/MoltenVK/MoltenVK/Utility/MVKVector.h b/MoltenVK/MoltenVK/Utility/MVKVector.h
deleted file mode 100755
index 60a4871..0000000
--- a/MoltenVK/MoltenVK/Utility/MVKVector.h
+++ /dev/null
@@ -1,1000 +0,0 @@
-/*
- * MVKVector.h
- *
- * Copyright (c) 2012-2020 Dr. Torsten Hans (hans@ipacs.de)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-//
-// in case MVKVector should use std::vector
-//
-#if 0
-
-template<typename T, size_t N = 0>
-using MVKVectorInline = std::vector<T>;
-
-template<typename T>
-using MVKVectorDefault = std::vector<T>;
-
-template<typename T>
-using MVKVector = std::vector<T>;
-
-#else
-
-//
-// MVKVector.h is a sequence container that (optionally) implements a small
-// buffer optimization.
-// It behaves similarly to std::vector, except until a certain number of
-// elements are reserved, it does not use the heap.
-// Like std::vector, MVKVector is guaranteed to use contiguous memory, so if the
-// preallocated number of elements are exceeded, all elements are then in heap.
-// MVKVector supports just the necessary members to be compatible with MoltenVK
-// If C++17 will be the default in the future, code can be simplified quite a bit.
-//
-// Example:
-//
-// MVKVectorInline<int, 3> vector;
-// vector.emplace_back( 1 );
-// vector.emplace_back( 2 );
-// vector.emplace_back( 3 );
-// // adding another element now reserves memory from heap
-// vector.emplace_back( 4 );
-//
-// If you don't need any inline storage use
-// MVKVectorDefault<int> vector; // this is essentially the same as using std::vector
-//
-// Passing MVKVectorInline to a function would require to use the same template
-// parameters that have been used for declaration. To avoid this MVKVectorInline
-// is derived from MVKVector. If you want to pass MVKVectorInline to a function
-// use MVKVector.
-//
-#include "MVKVectorAllocator.h"
-#include "MVKFoundation.h"
-#include <type_traits>
-#include <initializer_list>
-#include <utility>
-
-
-template<class Type> class MVKVector
-{
- mvk_vector_allocator_base<Type> *alc_ptr;
-
-public:
- class iterator : public std::iterator<std::forward_iterator_tag, Type>
- {
- const MVKVector *vector;
- size_t index;
-
- public:
- iterator() = delete;
- iterator( const size_t _index, const MVKVector &_vector ) : vector{ &_vector }, index{ _index } { }
- iterator &operator=( const iterator &it ) = delete;
-
- Type *operator->() const { return &vector->alc_ptr->ptr[index]; }
- Type &operator*() const { return vector->alc_ptr->ptr[index]; }
- operator Type*( ) const { return &vector->alc_ptr->ptr[index]; }
-
- bool operator==( const iterator &it ) const { return vector == it.vector && index == it.index; }
- bool operator!=( const iterator &it ) const { return vector != it.vector || index != it.index; }
-
- iterator& operator++() { ++index; return *this; }
- iterator operator++( int ) { auto t = *this; ++index; return t; }
-
- bool is_valid() const { return index < vector->size(); }
- size_t get_position() const { return index; }
- };
-
-public:
- typedef Type value_type;
-
- MVKVector() = delete;
- MVKVector( mvk_vector_allocator_base<Type> *a ) : alc_ptr{ a } { }
- virtual ~MVKVector() { }
-
- iterator begin() const { return iterator( 0, *this ); }
- iterator end() const { return iterator( alc_ptr->size(), *this ); }
-
- const MVKArrayRef<Type> contents() const { return MVKArrayRef<Type>(data(), size()); }
- MVKArrayRef<Type> contents() { return MVKArrayRef<Type>(data(), size()); }
-
- virtual const Type &operator[]( const size_t i ) const = 0;
- virtual Type &operator[]( const size_t i ) = 0;
- virtual const Type &at( const size_t i ) const = 0;
- virtual Type &at( const size_t i ) = 0;
- virtual const Type &front() const = 0;
- virtual Type &front() = 0;
- virtual const Type &back() const = 0;
- virtual Type &back() = 0;
- virtual const Type *data() const = 0;
- virtual Type *data() = 0;
-
- virtual size_t size() const = 0;
- virtual bool empty() const = 0;
- virtual size_t capacity() const = 0;
-
- virtual void pop_back() = 0;
- virtual void clear() = 0;
- virtual void reset() = 0;
- virtual void reserve( const size_t new_size ) = 0;
- virtual void assign( const size_t new_size, const Type &t ) = 0;
- virtual void resize( const size_t new_size, const Type t = { } ) = 0;
- virtual void shrink_to_fit() = 0;
- virtual void push_back( const Type &t ) = 0;
- virtual void push_back( Type &&t ) = 0;
-};
-
-
-template<class Type> class MVKVector<Type *>
-{
- mvk_vector_allocator_base<Type*> *alc_ptr;
-
- class iterator : public std::iterator<std::forward_iterator_tag, Type*>
- {
- const MVKVector *vector;
- size_t index;
-
- public:
- iterator() = delete;
- iterator( const size_t _index, const MVKVector &_vector ) : vector{ &_vector }, index{ _index } { }
- iterator &operator=( const iterator &it ) = delete;
-
- Type *operator->() const { return vector->alc_ptr->ptr[index]; }
- Type *&operator*() { return vector->alc_ptr->ptr[index]; }
- operator Type*&() const { return &vector->alc_ptr->ptr[index]; }
-
- bool operator==( const iterator &it ) const { return vector == it.vector && index == it.index; }
- bool operator!=( const iterator &it ) const { return vector != it.vector || index != it.index; }
-
- iterator& operator++() { ++index; return *this; }
- iterator operator++( int ) { auto t = *this; ++index; return t; }
-
- bool is_valid() const { return index < vector->size(); }
- size_t get_position() const { return index; }
- };
-
-public:
- typedef Type* value_type;
-
- MVKVector() = delete;
- MVKVector( mvk_vector_allocator_base<Type*> *a ) : alc_ptr{ a } { }
- virtual ~MVKVector() { }
-
- iterator begin() const { return iterator( 0, *this ); }
- iterator end() const { return iterator( alc_ptr->size(), *this ); }
-
- const MVKArrayRef<Type*> contents() const { return MVKArrayRef<Type*>(data(), size()); }
- MVKArrayRef<Type*> contents() { return MVKArrayRef<Type*>(data(), size()); }
-
- virtual const Type * const operator[]( const size_t i ) const = 0;
- virtual Type * &operator[]( const size_t i ) = 0;
- virtual const Type * const at( const size_t i ) const = 0;
- virtual Type * &at( const size_t i ) = 0;
- virtual const Type * const front() const = 0;
- virtual Type * &front() = 0;
- virtual const Type * const back() const = 0;
- virtual Type * &back() = 0;
- virtual const Type * const *data() const = 0;
- virtual Type * *data() = 0;
-
- virtual size_t size() const = 0;
- virtual bool empty() const = 0;
- virtual size_t capacity() const = 0;
-
- virtual void pop_back() = 0;
- virtual void clear() = 0;
- virtual void reset() = 0;
- virtual void reserve( const size_t new_size ) = 0;
- virtual void assign( const size_t new_size, const Type *t ) = 0;
- virtual void resize( const size_t new_size, const Type *t = nullptr ) = 0;
- virtual void shrink_to_fit() = 0;
- virtual void push_back( const Type *t ) = 0;
-};
-
-
-// this is the actual implementation of MVKVector
-template<class Type, typename Allocator = mvk_vector_allocator_default<Type>> class MVKVectorImpl : public MVKVector<Type>
-{
- friend class MVKVectorImpl;
-
- Allocator alc;
-
-public:
- class iterator : public std::iterator<std::forward_iterator_tag, Type>
- {
- const MVKVectorImpl *vector;
- size_t index;
-
- public:
- iterator() = delete;
- iterator( const size_t _index, const MVKVectorImpl &_vector ) : vector{ &_vector }, index{ _index } { }
-
- iterator &operator=( const iterator &it )
- {
- vector = it.vector;
- index = it.index;
- return *this;
- }
-
- Type *operator->() { return &vector->alc.ptr[index]; }
- Type &operator*() { return vector->alc.ptr[index]; }
- operator Type*() { return &vector->alc.ptr[index]; }
-
- bool operator==( const iterator &it ) const { return vector == it.vector && index == it.index; }
- bool operator!=( const iterator &it ) const { return vector != it.vector || index != it.index; }
-
- iterator& operator++() { ++index; return *this; }
- iterator operator++( int ) { auto t = *this; ++index; return t; }
-
- bool is_valid() const { return index < vector->alc.size(); }
- size_t get_position() const { return index; }
- };
-
-private:
- // this is the growth strategy -> adjust to your needs
- size_t vector_GetNextCapacity() const
- {
- constexpr auto ELEMENTS_FOR_64_BYTES = 64 / sizeof( Type );
- constexpr auto MINIMUM_CAPACITY = ELEMENTS_FOR_64_BYTES > 4 ? ELEMENTS_FOR_64_BYTES : 4;
- const auto current_capacity = capacity();
- return MINIMUM_CAPACITY + ( 3 * current_capacity ) / 2;
- }
-
- void vector_Allocate( const size_t s )
- {
- const auto new_reserved_size = s > size() ? s : size();
-
- alc.allocate( new_reserved_size );
- }
-
- void vector_ReAllocate( const size_t s )
- {
- alc.re_allocate( s );
- }
-
-public:
- MVKVectorImpl() : MVKVector<Type>{ &alc }
- {
- }
-
- MVKVectorImpl( const size_t n, const Type t ) : MVKVector<Type>{ &alc }
- {
- if( n > 0 )
- {
- alc.allocate( n );
-
- for( size_t i = 0; i < n; ++i )
- {
- alc.construct( &alc.ptr[i], t );
- }
-
- alc.num_elements_used = n;
- }
- }
-
- MVKVectorImpl( const MVKVectorImpl &a ) : MVKVector<Type>{ &alc }
- {
- const size_t n = a.size();
-
- if( n > 0 )
- {
- alc.allocate( n );
-
- for( size_t i = 0; i < n; ++i )
- {
- alc.construct( &alc.ptr[i], a.alc.ptr[i] );
- }
-
- alc.num_elements_used = n;
- }
- }
-
- template<typename U>
- MVKVectorImpl( const U &a ) : MVKVector<Type>{ &alc }
- {
- const size_t n = a.size();
-
- if( n > 0 )
- {
- alc.allocate( n );
-
- for( size_t i = 0; i < n; ++i )
- {
- alc.construct( &alc.ptr[i], a[i] );
- }
-
- alc.num_elements_used = n;
- }
- }
-
- MVKVectorImpl( MVKVectorImpl &&a ) : MVKVector<Type>{ &alc }, alc{ std::move( a.alc ) }
- {
- }
-
- MVKVectorImpl( std::initializer_list<Type> vector ) : MVKVector<Type>{ &alc }
- {
- if( vector.size() > capacity() )
- {
- vector_Allocate( vector.size() );
- }
-
- // std::initializer_list does not yet support std::move, we use it anyway but it has no effect
- for( auto &&element : vector )
- {
- alc.construct( &alc.ptr[alc.num_elements_used], std::move( element ) );
- ++alc.num_elements_used;
- }
- }
-
- ~MVKVectorImpl()
- {
- }
-
- template<typename U>
- MVKVectorImpl& operator=( const U &a )
- {
- static_assert( std::is_base_of<MVKVector<Type>, U>::value, "argument is not of type MVKVector" );
-
- if( this != reinterpret_cast<const MVKVector<Type>*>( &a ) )
- {
- const auto n = a.size();
-
- if( alc.num_elements_used == n )
- {
- for( size_t i = 0; i < n; ++i )
- {
- alc.ptr[i] = a.alc.ptr[i];
- }
- }
- else
- {
- if( n > capacity() )
- {
- vector_ReAllocate( n );
- }
- else
- {
- alc.template destruct_all<Type>();
- }
-
- for( size_t i = 0; i < n; ++i )
- {
- alc.construct( &alc.ptr[i], a[i] );
- }
-
- alc.num_elements_used = n;
- }
- }
-
- return *this;
- }
-
- MVKVectorImpl& operator=( MVKVectorImpl &&a )
- {
- alc.swap( a.alc );
- return *this;
- }
-
- bool operator==( const MVKVectorImpl &a ) const
- {
- if( alc.num_elements_used != a.alc.num_elements_used )
- return false;
- for( size_t i = 0; i < alc.num_elements_used; ++i )
- {
- if( alc[i] != a.alc[i] )
- return false;
- }
- return true;
- }
-
- bool operator!=( const MVKVectorImpl &a ) const
- {
- if( alc.num_elements_used != a.alc.num_elements_used )
- return true;
- for( size_t i = 0; i < alc.num_elements_used; ++i )
- {
- if( alc.ptr[i] != a.alc[i] )
- return true;
- }
- return false;
- }
-
- void swap( MVKVectorImpl &a )
- {
- alc.swap( a.alc );
- }
-
- iterator begin() const { return iterator( 0, *this ); }
- iterator end() const { return iterator( alc.num_elements_used, *this ); }
-
- const Type &operator[]( const size_t i ) const override { return alc[i]; }
- Type &operator[]( const size_t i ) override { return alc[i]; }
- const Type &at( const size_t i ) const override { return alc[i]; }
- Type &at( const size_t i ) override { return alc[i]; }
- const Type &front() const override { return alc[0]; }
- Type &front() override { return alc[0]; }
- const Type &back() const override { return alc[alc.num_elements_used - 1]; }
- Type &back() override { return alc[alc.num_elements_used - 1]; }
- const Type *data() const override { return alc.ptr; }
- Type *data() override { return alc.ptr; }
-
- size_t size() const override { return alc.num_elements_used; }
- bool empty() const override { return alc.num_elements_used == 0; }
- size_t capacity() const override { return alc.get_capacity(); }
-
- void pop_back() override
- {
- if( alc.num_elements_used > 0 )
- {
- --alc.num_elements_used;
- alc.destruct( &alc.ptr[alc.num_elements_used] );
- }
- }
-
- void clear() override
- {
- alc.template destruct_all<Type>();
- }
-
- void reset() override
- {
- alc.deallocate();
- }
-
- void reserve( const size_t new_size ) override
- {
- if( new_size > capacity() )
- {
- vector_ReAllocate( new_size );
- }
- }
-
- void assign( const size_t new_size, const Type &t ) override
- {
- if( new_size <= capacity() )
- {
- clear();
- }
- else
- {
- vector_Allocate( new_size );
- }
-
- for( size_t i = 0; i < new_size; ++i )
- {
- alc.construct( &alc.ptr[i], t );
- }
-
- alc.num_elements_used = new_size;
- }
-
- template <class InputIterator>
- void assign( InputIterator first, InputIterator last )
- {
- clear();
-
- while( first != last )
- {
- emplace_back( *first );
- ++first;
- }
- }
-
- void resize( const size_t new_size, const Type t = { } ) override
- {
- if( new_size == alc.num_elements_used )
- {
- return;
- }
-
- if( new_size == 0 )
- {
- clear();
- return;
- }
-
- if( new_size > alc.num_elements_used )
- {
- if( new_size > capacity() )
- {
- vector_ReAllocate( new_size );
- }
-
- while( alc.num_elements_used < new_size )
- {
- alc.construct( &alc.ptr[alc.num_elements_used], t );
- ++alc.num_elements_used;
- }
- }
- else
- {
- //if constexpr( !std::is_trivially_destructible<Type>::value )
- {
- while( alc.num_elements_used > new_size )
- {
- --alc.num_elements_used;
- alc.destruct( &alc.ptr[alc.num_elements_used] );
- }
- }
- //else
- //{
- // alc.num_elements_used = new_size;
- //}
- }
- }
-
- // trims the capacity of the slist to the number of alc.ptr
- void shrink_to_fit() override
- {
- alc.shrink_to_fit();
- }
-
- void erase( const iterator it )
- {
- if( it.is_valid() )
- {
- --alc.num_elements_used;
-
- for( size_t i = it.get_position(); i < alc.num_elements_used; ++i )
- {
- alc.ptr[i] = std::move( alc.ptr[i + 1] );
- }
-
- // this is required for types with a destructor
- alc.destruct( &alc.ptr[alc.num_elements_used] );
- }
- }
-
- void erase( const iterator first, const iterator last )
- {
- if( first.is_valid() )
- {
- size_t last_pos = last.is_valid() ? last.get_position() : size();
- size_t n = last_pos - first.get_position();
- alc.num_elements_used -= n;
-
- for( size_t i = first.get_position(), e = last_pos; i < alc.num_elements_used && e < alc.num_elements_used + n; ++i, ++e )
- {
- alc.ptr[i] = std::move( alc.ptr[e] );
- }
-
- // this is required for types with a destructor
- for( size_t i = alc.num_elements_used; i < alc.num_elements_used + n; ++i )
- {
- alc.destruct( &alc.ptr[i] );
- }
- }
- }
-
- // adds t before it and automatically resizes vector if necessary
- void insert( const iterator it, Type t )
- {
- if( !it.is_valid() || alc.num_elements_used == 0 )
- {
- push_back( std::move( t ) );
- }
- else
- {
- if( alc.num_elements_used == capacity() )
- vector_ReAllocate( vector_GetNextCapacity() );
-
- // move construct last element
- alc.construct( &alc.ptr[alc.num_elements_used], std::move( alc.ptr[alc.num_elements_used - 1] ) );
-
- // move the remaining elements
- const size_t it_position = it.get_position();
- for( size_t i = alc.num_elements_used - 1; i > it_position; --i )
- {
- alc.ptr[i] = std::move( alc.ptr[i - 1] );
- }
-
- alc.ptr[it_position] = std::move( t );
- ++alc.num_elements_used;
- }
- }
-
- void push_back( const Type &t ) override
- {
- if( alc.num_elements_used == capacity() )
- vector_ReAllocate( vector_GetNextCapacity() );
-
- alc.construct( &alc.ptr[alc.num_elements_used], t );
- ++alc.num_elements_used;
- }
-
- void push_back( Type &&t ) override
- {
- if( alc.num_elements_used == capacity() )
- vector_ReAllocate( vector_GetNextCapacity() );
-
- alc.construct( &alc.ptr[alc.num_elements_used], std::forward<Type>( t ) );
- ++alc.num_elements_used;
- }
-
- template<class... Args>
- Type &emplace_back( Args&&... args )
- {
- if( alc.num_elements_used == capacity() )
- vector_ReAllocate( vector_GetNextCapacity() );
-
- alc.construct( &alc.ptr[alc.num_elements_used], std::forward<Args>( args )... );
- ++alc.num_elements_used;
-
- return alc.ptr[alc.num_elements_used - 1];
- }
-};
-
-// specialization for pointer types
-template<class Type, typename Allocator> class MVKVectorImpl<Type*, Allocator> : public MVKVector<Type*>
-{
- friend class MVKVectorImpl;
-
- Allocator alc;
-
-public:
- class iterator : public std::iterator<std::forward_iterator_tag, Type*>
- {
- MVKVectorImpl *vector;
- size_t index;
-
- public:
- iterator() = delete;
- iterator( const size_t _index, MVKVectorImpl &_vector ) : vector{ &_vector }, index{ _index } { }
-
- iterator &operator=( const iterator &it )
- {
- vector = it.vector;
- index = it.index;
- return *this;
- }
-
- Type *&operator*() { return vector->alc[index]; }
-
- bool operator==( const iterator &it ) const { return vector == it.vector && index == it.index; }
- bool operator!=( const iterator &it ) const { return vector != it.vector || index != it.index; }
-
- iterator& operator++() { ++index; return *this; }
- iterator operator++( int ) { auto t = *this; ++index; return t; }
-
- bool is_valid() const { return index < vector->alc.size(); }
- size_t get_position() const { return index; }
- };
-
-private:
- // this is the growth strategy -> adjust to your needs
- size_t vector_GetNextCapacity() const
- {
- constexpr auto ELEMENTS_FOR_64_BYTES = 64 / sizeof( Type* );
- constexpr auto MINIMUM_CAPACITY = ELEMENTS_FOR_64_BYTES > 4 ? ELEMENTS_FOR_64_BYTES : 4;
- const auto current_capacity = capacity();
- return MINIMUM_CAPACITY + ( 3 * current_capacity ) / 2;
- }
-
- void vector_Allocate( const size_t s )
- {
- const auto new_reserved_size = s > size() ? s : size();
-
- alc.allocate( new_reserved_size );
- }
-
- void vector_ReAllocate( const size_t s )
- {
- alc.re_allocate( s );
- }
-
-public:
- MVKVectorImpl() : MVKVector<Type*>{ &alc }
- {
- }
-
- MVKVectorImpl( const size_t n, const Type *t ) : MVKVector<Type*>{ &alc }
- {
- if ( n > 0 )
- {
- alc.allocate( n );
-
- for ( size_t i = 0; i < n; ++i )
- {
- alc.ptr[i] = t;
- }
-
- alc.num_elements_used = n;
- }
- }
-
- MVKVectorImpl( const MVKVectorImpl &a ) : MVKVector<Type*>{ &alc }
- {
- const size_t n = a.size();
-
- if ( n > 0 )
- {
- alc.allocate( n );
-
- for ( size_t i = 0; i < n; ++i )
- {
- alc.ptr[i] = a.alc.ptr[i];
- }
-
- alc.num_elements_used = n;
- }
- }
-
- MVKVectorImpl( MVKVectorImpl &&a ) : MVKVector<Type*>{ &alc }, alc{ std::move( a.alc ) }
- {
- }
-
- MVKVectorImpl( std::initializer_list<Type*> vector ) : MVKVector<Type*>{ &alc }
- {
- if ( vector.size() > capacity() )
- {
- vector_Allocate( vector.size() );
- }
-
- // std::initializer_list does not yet support std::move, we use it anyway but it has no effect
- for ( auto element : vector )
- {
- alc.ptr[alc.num_elements_used] = element;
- ++alc.num_elements_used;
- }
- }
-
- ~MVKVectorImpl()
- {
- }
-
- template<typename U>
- MVKVectorImpl& operator=( const U &a )
- {
- static_assert( std::is_base_of<MVKVector<U>, U>::value, "argument is not of type MVKVector" );
-
- if ( this != reinterpret_cast< const MVKVector<Type>* >( &a ) )
- {
- const auto n = a.size();
-
- if ( alc.num_elements_used == n )
- {
- for ( size_t i = 0; i < n; ++i )
- {
- alc.ptr[i] = a.alc.ptr[i];
- }
- }
- else
- {
- if ( n > capacity() )
- {
- vector_ReAllocate( n );
- }
-
- for ( size_t i = 0; i < n; ++i )
- {
- alc.ptr[i] = a[i];
- }
-
- alc.num_elements_used = n;
- }
- }
-
- return *this;
- }
-
- MVKVectorImpl& operator=( MVKVectorImpl &&a )
- {
- alc.swap( a.alc );
- return *this;
- }
-
- bool operator==( const MVKVectorImpl &a ) const
- {
- if ( alc.num_elements_used != a.alc.num_elements_used )
- return false;
- for ( size_t i = 0; i < alc.num_elements_used; ++i )
- {
- if ( alc[i] != a.alc[i] )
- return false;
- }
- return true;
- }
-
- bool operator!=( const MVKVectorImpl &a ) const
- {
- if ( alc.num_elements_used != a.alc.num_elements_used )
- return true;
- for ( size_t i = 0; i < alc.num_elements_used; ++i )
- {
- if ( alc.ptr[i] != a.alc[i] )
- return true;
- }
- return false;
- }
-
- void swap( MVKVectorImpl &a )
- {
- alc.swap( a.alc );
- }
-
- iterator begin() { return iterator( 0, *this ); }
- iterator end() { return iterator( alc.num_elements_used, *this ); }
-
- const Type * const at( const size_t i ) const override { return alc[i]; }
- Type * &at( const size_t i ) override { return alc[i]; }
- const Type * const operator[]( const size_t i ) const override { return alc[i]; }
- Type * &operator[]( const size_t i ) override { return alc[i]; }
- const Type * const front() const override { return alc[0]; }
- Type * &front() override { return alc[0]; }
- const Type * const back() const override { return alc[alc.num_elements_used - 1]; }
- Type * &back() override { return alc[alc.num_elements_used - 1]; }
- const Type * const *data() const override { return alc.ptr; }
- Type * *data() override { return alc.ptr; }
-
- size_t size() const override { return alc.num_elements_used; }
- bool empty() const override { return alc.num_elements_used == 0; }
- size_t capacity() const override { return alc.get_capacity(); }
-
- void pop_back() override
- {
- if ( alc.num_elements_used > 0 )
- {
- --alc.num_elements_used;
- }
- }
-
- void clear() override
- {
- alc.num_elements_used = 0;
- }
-
- void reset() override
- {
- alc.deallocate();
- }
-
- void reserve( const size_t new_size ) override
- {
- if ( new_size > capacity() )
- {
- vector_ReAllocate( new_size );
- }
- }
-
- void assign( const size_t new_size, const Type *t ) override
- {
- if ( new_size <= capacity() )
- {
- clear();
- }
- else
- {
- vector_Allocate( new_size );
- }
-
- for ( size_t i = 0; i < new_size; ++i )
- {
- alc.ptr[i] = const_cast< Type* >( t );
- }
-
- alc.num_elements_used = new_size;
- }
-
- void resize( const size_t new_size, const Type *t = nullptr ) override
- {
- if ( new_size == alc.num_elements_used )
- {
- return;
- }
-
- if ( new_size == 0 )
- {
- clear();
- return;
- }
-
- if ( new_size > alc.num_elements_used )
- {
- if ( new_size > capacity() )
- {
- vector_ReAllocate( new_size );
- }
-
- while ( alc.num_elements_used < new_size )
- {
- alc.ptr[alc.num_elements_used] = const_cast< Type* >( t );
- ++alc.num_elements_used;
- }
- }
- else
- {
- alc.num_elements_used = new_size;
- }
- }
-
- // trims the capacity of the MVKVector to the number of used elements
- void shrink_to_fit() override
- {
- alc.shrink_to_fit();
- }
-
- void erase( const iterator it )
- {
- if ( it.is_valid() )
- {
- --alc.num_elements_used;
-
- for ( size_t i = it.get_position(); i < alc.num_elements_used; ++i )
- {
- alc.ptr[i] = alc.ptr[i + 1];
- }
- }
- }
-
- void erase( const iterator first, const iterator last )
- {
- if( first.is_valid() )
- {
- size_t last_pos = last.is_valid() ? last.get_position() : size();
- size_t n = last_pos - first.get_position();
- alc.num_elements_used -= n;
-
- for( size_t i = first.get_position(), e = last_pos; i < alc.num_elements_used && e < alc.num_elements_used + n; ++i, ++e )
- {
- alc.ptr[i] = alc.ptr[e];
- }
- }
- }
-
- // adds t before position it and automatically resizes vector if necessary
- void insert( const iterator it, const Type *t )
- {
- if ( !it.is_valid() || alc.num_elements_used == 0 )
- {
- push_back( t );
- }
- else
- {
- if ( alc.num_elements_used == capacity() )
- vector_ReAllocate( vector_GetNextCapacity() );
-
- // move the remaining elements
- const size_t it_position = it.get_position();
- for ( size_t i = alc.num_elements_used; i > it_position; --i )
- {
- alc.ptr[i] = alc.ptr[i - 1];
- }
-
- alc.ptr[it_position] = const_cast< Type* >( t );
- ++alc.num_elements_used;
- }
- }
-
- void push_back( const Type *t ) override
- {
- if ( alc.num_elements_used == capacity() )
- vector_ReAllocate( vector_GetNextCapacity() );
-
- alc.ptr[alc.num_elements_used] = const_cast< Type* >( t );
- ++alc.num_elements_used;
- }
-};
-
-
-template<typename Type>
-using MVKVectorDefault = MVKVectorImpl<Type, mvk_vector_allocator_default<Type>>;
-
-template<typename Type, size_t N = 8>
-using MVKVectorInline = MVKVectorImpl<Type, mvk_vector_allocator_with_stack<Type, N>>;
-
-
-#endif
-
-
diff --git a/MoltenVK/MoltenVK/Utility/MVKVectorAllocator.h b/MoltenVK/MoltenVK/Utility/MVKVectorAllocator.h
deleted file mode 100755
index 38623c0..0000000
--- a/MoltenVK/MoltenVK/Utility/MVKVectorAllocator.h
+++ /dev/null
@@ -1,552 +0,0 @@
-/*
- * MVKVectorAllocator.h
- *
- * Copyright (c) 2012-2020 Dr. Torsten Hans (hans@ipacs.de)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <new>
-#include <type_traits>
-
-
-#define MVK_VECTOR_CHECK_BOUNDS if (i >= num_elements_used) { throw std::out_of_range("Index out of range"); }
-
-
-namespace mvk_memory_allocator
-{
- inline char *alloc( const size_t num_bytes )
- {
- return new char[num_bytes];
- }
-
- inline void free( void *ptr )
- {
- delete[] (char*)ptr;
- }
-};
-
-
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-// mvk_vector_allocator_base -> base class so we can use MVKVector with template parameter
-//
-//////////////////////////////////////////////////////////////////////////////////////////
-template<typename T>
-class mvk_vector_allocator_base
-{
-public:
- typedef T value_type;
- T *ptr;
- size_t num_elements_used;
-
-public:
- mvk_vector_allocator_base() : ptr{ nullptr }, num_elements_used{ 0 } { }
- mvk_vector_allocator_base( T *_ptr, const size_t _num_elements_used ) : ptr{ _ptr }, num_elements_used{ _num_elements_used } { }
- virtual ~mvk_vector_allocator_base() { }
-
- const T &operator[]( const size_t i ) const { MVK_VECTOR_CHECK_BOUNDS return ptr[i]; }
- T &operator[]( const size_t i ) { MVK_VECTOR_CHECK_BOUNDS return ptr[i]; }
-
- size_t size() const { return num_elements_used; }
-
- virtual size_t get_capacity() const = 0;
- virtual void allocate( const size_t num_elements_to_reserve ) = 0;
- virtual void re_allocate( const size_t num_elements_to_reserve ) = 0;
- virtual void shrink_to_fit() = 0;
- virtual void deallocate() = 0;
-};
-
-
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-// mvk_vector_allocator_default -> malloc based allocator for MVKVector
-//
-//////////////////////////////////////////////////////////////////////////////////////////
-template <typename T>
-class mvk_vector_allocator_default final : public mvk_vector_allocator_base<T>
-{
-private:
- size_t num_elements_reserved;
-
-public:
- template<class S, class... Args> typename std::enable_if< !std::is_trivially_constructible<S>::value >::type
- construct( S *_ptr, Args&&... _args )
- {
- new ( _ptr ) S( std::forward<Args>( _args )... );
- }
-
- template<class S, class... Args> typename std::enable_if< std::is_trivially_constructible<S>::value >::type
- construct( S *_ptr, Args&&... _args )
- {
- *_ptr = S( std::forward<Args>( _args )... );
- }
-
- template<class S> typename std::enable_if< !std::is_trivially_destructible<S>::value >::type
- destruct( S *_ptr )
- {
- _ptr->~S();
- }
-
- template<class S> typename std::enable_if< std::is_trivially_destructible<S>::value >::type
- destruct( S *_ptr )
- {
- }
-
- template<class S> typename std::enable_if< !std::is_trivially_destructible<S>::value >::type
- destruct_all()
- {
- for( size_t i = 0; i < mvk_vector_allocator_base<S>::num_elements_used; ++i )
- {
- mvk_vector_allocator_base<S>::ptr[i].~S();
- }
-
- mvk_vector_allocator_base<S>::num_elements_used = 0;
- }
-
- template<class S> typename std::enable_if< std::is_trivially_destructible<S>::value >::type
- destruct_all()
- {
- mvk_vector_allocator_base<T>::num_elements_used = 0;
- }
-
-public:
- constexpr mvk_vector_allocator_default() : mvk_vector_allocator_base<T>{}, num_elements_reserved{ 0 }
- {
- }
-
- mvk_vector_allocator_default( mvk_vector_allocator_default &&a ) : mvk_vector_allocator_base<T>{ a.ptr, a.num_elements_used }, num_elements_reserved{ a.num_elements_reserved }
- {
- a.ptr = nullptr;
- a.num_elements_used = 0;
- a.num_elements_reserved = 0;
- }
-
- virtual ~mvk_vector_allocator_default()
- {
- deallocate();
- }
-
- size_t get_capacity() const override
- {
- return num_elements_reserved;
- }
-
- void swap( mvk_vector_allocator_default &a )
- {
- const auto copy_ptr = a.ptr;
- const auto copy_num_elements_used = a.num_elements_used;
- const auto copy_num_elements_reserved = a.num_elements_reserved;
-
- a.ptr = mvk_vector_allocator_base<T>::ptr;
- a.num_elements_used = mvk_vector_allocator_base<T>::num_elements_used;
- a.num_elements_reserved = num_elements_reserved;
-
- mvk_vector_allocator_base<T>::ptr = copy_ptr;
- mvk_vector_allocator_base<T>::num_elements_used = copy_num_elements_used;
- num_elements_reserved = copy_num_elements_reserved;
- }
-
- void allocate( const size_t num_elements_to_reserve ) override
- {
- deallocate();
-
- mvk_vector_allocator_base<T>::ptr = reinterpret_cast< T* >( mvk_memory_allocator::alloc( num_elements_to_reserve * sizeof( T ) ) );
- mvk_vector_allocator_base<T>::num_elements_used = 0;
- num_elements_reserved = num_elements_to_reserve;
- }
-
- void re_allocate( const size_t num_elements_to_reserve ) override
- {
- //if constexpr( std::is_trivially_copyable<T>::value )
- //{
- // ptr = reinterpret_cast< T* >( mvk_memory_allocator::tm_memrealloc( ptr, num_elements_to_reserve * sizeof( T ) );
- //}
- //else
- {
- auto *new_ptr = reinterpret_cast< T* >( mvk_memory_allocator::alloc( num_elements_to_reserve * sizeof( T ) ) );
-
- for( size_t i = 0; i < mvk_vector_allocator_base<T>::num_elements_used; ++i )
- {
- construct( &new_ptr[i], std::move( mvk_vector_allocator_base<T>::ptr[i] ) );
- destruct( &mvk_vector_allocator_base<T>::ptr[i] );
- }
-
- //if ( ptr != nullptr )
- {
- mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );
- }
-
- mvk_vector_allocator_base<T>::ptr = new_ptr;
- }
-
- num_elements_reserved = num_elements_to_reserve;
- }
-
- void shrink_to_fit() override
- {
- if( mvk_vector_allocator_base<T>::num_elements_used == 0 )
- {
- deallocate();
- }
- else
- {
- auto *new_ptr = reinterpret_cast< T* >( mvk_memory_allocator::alloc( mvk_vector_allocator_base<T>::num_elements_used * sizeof( T ) ) );
-
- for( size_t i = 0; i < mvk_vector_allocator_base<T>::num_elements_used; ++i )
- {
- construct( &new_ptr[i], std::move( mvk_vector_allocator_base<T>::ptr[i] ) );
- destruct( &mvk_vector_allocator_base<T>::ptr[i] );
- }
-
- mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );
-
- mvk_vector_allocator_base<T>::ptr = new_ptr;
- num_elements_reserved = mvk_vector_allocator_base<T>::num_elements_used;
- }
- }
-
- void deallocate() override
- {
- destruct_all<T>();
-
- mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );
-
- mvk_vector_allocator_base<T>::ptr = nullptr;
- num_elements_reserved = 0;
- }
-};
-
-
-//////////////////////////////////////////////////////////////////////////////////////////
-//
-// mvk_vector_allocator_with_stack -> malloc based MVKVector allocator with preallocated storage
-//
-//////////////////////////////////////////////////////////////////////////////////////////
-template <typename T, int N>
-class mvk_vector_allocator_with_stack final : public mvk_vector_allocator_base<T>
-{
-private:
- //size_t num_elements_reserved; // uhh, num_elements_reserved is mapped onto the stack elements, let the fun begin
- alignas( alignof( T ) ) unsigned char elements_stack[N * sizeof( T )];
-
- static_assert( N * sizeof( T ) >= sizeof( size_t ), "Initial static allocation must be at least 8 bytes. Increase the count of pre-allocated elements." );
-
- void set_num_elements_reserved( const size_t num_elements_reserved )
- {
- *reinterpret_cast<size_t*>( &elements_stack[0] ) = num_elements_reserved;
- }
-
-public:
- //
- // faster element construction and destruction using type traits
- //
- template<class S, class... Args> typename std::enable_if< !std::is_trivially_constructible<S, Args...>::value >::type
- construct( S *_ptr, Args&&... _args )
- {
- new ( _ptr ) S( std::forward<Args>( _args )... );
- }
-
- template<class S, class... Args> typename std::enable_if< std::is_trivially_constructible<S, Args...>::value >::type
- construct( S *_ptr, Args&&... _args )
- {
- *_ptr = S( std::forward<Args>( _args )... );
- }
-
- template<class S> typename std::enable_if< !std::is_trivially_destructible<S>::value >::type
- destruct( S *_ptr )
- {
- _ptr->~S();
- }
-
- template<class S> typename std::enable_if< std::is_trivially_destructible<S>::value >::type
- destruct( S *_ptr )
- {
- }
-
- template<class S> typename std::enable_if< !std::is_trivially_destructible<S>::value >::type
- destruct_all()
- {
- for( size_t i = 0; i < mvk_vector_allocator_base<S>::num_elements_used; ++i )
- {
- mvk_vector_allocator_base<S>::ptr[i].~S();
- }
-
- mvk_vector_allocator_base<S>::num_elements_used = 0;
- }
-
- template<class S> typename std::enable_if< std::is_trivially_destructible<S>::value >::type
- destruct_all()
- {
- mvk_vector_allocator_base<S>::num_elements_used = 0;
- }
-
- template<class S> typename std::enable_if< !std::is_trivially_destructible<S>::value >::type
- swap_stack( mvk_vector_allocator_with_stack &a )
- {
- T stack_copy[N];
-
- for( size_t i = 0; i < mvk_vector_allocator_base<S>::num_elements_used; ++i )
- {
- construct( &stack_copy[i], std::move( S::ptr[i] ) );
- destruct( &mvk_vector_allocator_base<S>::ptr[i] );
- }
-
- for( size_t i = 0; i < a.num_elements_used; ++i )
- {
- construct( &mvk_vector_allocator_base<S>::ptr[i], std::move( a.ptr[i] ) );
- destruct( &mvk_vector_allocator_base<S>::ptr[i] );
- }
-
- for( size_t i = 0; i < mvk_vector_allocator_base<S>::num_elements_used; ++i )
- {
- construct( &a.ptr[i], std::move( stack_copy[i] ) );
- destruct( &stack_copy[i] );
- }
- }
-
- template<class S> typename std::enable_if< std::is_trivially_destructible<S>::value >::type
- swap_stack( mvk_vector_allocator_with_stack &a )
- {
- constexpr int STACK_SIZE = N * sizeof( T );
- for( int i = 0; i < STACK_SIZE; ++i )
- {
- const auto v = elements_stack[i];
- elements_stack[i] = a.elements_stack[i];
- a.elements_stack[i] = v;
- }
- }
-
-public:
- mvk_vector_allocator_with_stack() : mvk_vector_allocator_base<T>{ reinterpret_cast<T*>( &elements_stack[0] ), 0 }
- {
- }
-
- mvk_vector_allocator_with_stack( mvk_vector_allocator_with_stack &&a ) : mvk_vector_allocator_base<T>{ nullptr, a.num_elements_used }
- {
- // is a heap based -> steal ptr from a
- if( !a.get_data_on_stack() )
- {
- mvk_vector_allocator_base<T>::ptr = a.ptr;
- set_num_elements_reserved( a.get_capacity() );
-
- a.ptr = a.get_default_ptr();
- }
- else
- {
- mvk_vector_allocator_base<T>::ptr = get_default_ptr();
- for( size_t i = 0; i < a.num_elements_used; ++i )
- {
- construct( &mvk_vector_allocator_base<T>::ptr[i], std::move( a.ptr[i] ) );
- destruct( &a.ptr[i] );
- }
- }
-
- a.num_elements_used = 0;
- }
-
- ~mvk_vector_allocator_with_stack()
- {
- deallocate();
- }
-
- size_t get_capacity() const override
- {
- return get_data_on_stack() ? N : *reinterpret_cast<const size_t*>( &elements_stack[0] );
- }
-
- constexpr T *get_default_ptr() const
- {
- return reinterpret_cast< T* >( const_cast< unsigned char * >( &elements_stack[0] ) );
- }
-
- bool get_data_on_stack() const
- {
- return mvk_vector_allocator_base<T>::ptr == get_default_ptr();
- }
-
- void swap( mvk_vector_allocator_with_stack &a )
- {
- // both allocators on heap -> easy case
- if( !get_data_on_stack() && !a.get_data_on_stack() )
- {
- auto copy_ptr = mvk_vector_allocator_base<T>::ptr;
- auto copy_num_elements_reserved = get_capacity();
- mvk_vector_allocator_base<T>::ptr = a.ptr;
- set_num_elements_reserved( a.get_capacity() );
- a.ptr = copy_ptr;
- a.set_num_elements_reserved( copy_num_elements_reserved );
- }
- // both allocators on stack -> just switch the stack contents
- else if( get_data_on_stack() && a.get_data_on_stack() )
- {
- swap_stack<T>( a );
- }
- else if( get_data_on_stack() && !a.get_data_on_stack() )
- {
- auto copy_ptr = a.ptr;
- auto copy_num_elements_reserved = a.get_capacity();
-
- a.ptr = a.get_default_ptr();
- for( size_t i = 0; i < mvk_vector_allocator_base<T>::num_elements_used; ++i )
- {
- construct( &a.ptr[i], std::move( mvk_vector_allocator_base<T>::ptr[i] ) );
- destruct( &mvk_vector_allocator_base<T>::ptr[i] );
- }
-
- mvk_vector_allocator_base<T>::ptr = copy_ptr;
- set_num_elements_reserved( copy_num_elements_reserved );
- }
- else if( !get_data_on_stack() && a.get_data_on_stack() )
- {
- auto copy_ptr = mvk_vector_allocator_base<T>::ptr;
- auto copy_num_elements_reserved = get_capacity();
-
- mvk_vector_allocator_base<T>::ptr = get_default_ptr();
- for( size_t i = 0; i < a.num_elements_used; ++i )
- {
- construct( &mvk_vector_allocator_base<T>::ptr[i], std::move( a.ptr[i] ) );
- destruct( &a.ptr[i] );
- }
-
- a.ptr = copy_ptr;
- a.set_num_elements_reserved( copy_num_elements_reserved );
- }
-
- auto copy_num_elements_used = mvk_vector_allocator_base<T>::num_elements_used;
- mvk_vector_allocator_base<T>::num_elements_used = a.num_elements_used;
- a.num_elements_used = copy_num_elements_used;
- }
-
- //
- // allocates rounded up to the defined alignment the number of bytes / if the system cannot allocate the specified amount of memory then a null block is returned
- //
- void allocate( const size_t num_elements_to_reserve ) override
- {
- deallocate();
-
- // check if enough memory on stack space is left
- if( num_elements_to_reserve <= N )
- {
- return;
- }
-
- mvk_vector_allocator_base<T>::ptr = reinterpret_cast< T* >( mvk_memory_allocator::alloc( num_elements_to_reserve * sizeof( T ) ) );
- mvk_vector_allocator_base<T>::num_elements_used = 0;
- set_num_elements_reserved( num_elements_to_reserve );
- }
-
- //template<class S> typename std::enable_if< !std::is_trivially_copyable<S>::value >::type
- void _re_allocate( const size_t num_elements_to_reserve )
- {
- auto *new_ptr = reinterpret_cast< T* >( mvk_memory_allocator::alloc( num_elements_to_reserve * sizeof( T ) ) );
-
- for( size_t i = 0; i < mvk_vector_allocator_base<T>::num_elements_used; ++i )
- {
- construct( &new_ptr[i], std::move( mvk_vector_allocator_base<T>::ptr[i] ) );
- destruct( &mvk_vector_allocator_base<T>::ptr[i] );
- }
-
- if( mvk_vector_allocator_base<T>::ptr != get_default_ptr() )
- {
- mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );
- }
-
- mvk_vector_allocator_base<T>::ptr = new_ptr;
- set_num_elements_reserved( num_elements_to_reserve );
- }
-
- //template<class S> typename std::enable_if< std::is_trivially_copyable<S>::value >::type
- // _re_allocate( const size_t num_elements_to_reserve )
- //{
- // const bool data_is_on_stack = get_data_on_stack();
- //
- // auto *new_ptr = reinterpret_cast< S* >( mvk_memory_allocator::tm_memrealloc( data_is_on_stack ? nullptr : ptr, num_elements_to_reserve * sizeof( S ) ) );
- // if( data_is_on_stack )
- // {
- // for( int i = 0; i < N; ++i )
- // {
- // new_ptr[i] = ptr[i];
- // }
- // }
- //
- // ptr = new_ptr;
- // set_num_elements_reserved( num_elements_to_reserve );
- //}
-
- void re_allocate( const size_t num_elements_to_reserve ) override
- {
- //TM_ASSERT( num_elements_to_reserve > get_capacity() );
-
- if( num_elements_to_reserve > N )
- {
- _re_allocate( num_elements_to_reserve );
- }
- }
-
- void shrink_to_fit() override
- {
- // nothing to do if data is on stack already
- if( get_data_on_stack() )
- return;
-
- // move elements to stack space
- if( mvk_vector_allocator_base<T>::num_elements_used <= N )
- {
- //const auto num_elements_reserved = get_capacity();
-
- auto *stack_ptr = get_default_ptr();
- for( size_t i = 0; i < mvk_vector_allocator_base<T>::num_elements_used; ++i )
- {
- construct( &stack_ptr[i], std::move( mvk_vector_allocator_base<T>::ptr[i] ) );
- destruct( &mvk_vector_allocator_base<T>::ptr[i] );
- }
-
- mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );
-
- mvk_vector_allocator_base<T>::ptr = stack_ptr;
- }
- else
- {
- auto *new_ptr = reinterpret_cast< T* >( mvk_memory_allocator::alloc( mvk_vector_allocator_base<T>::num_elements_used * sizeof( T ) ) );
-
- for( size_t i = 0; i < mvk_vector_allocator_base<T>::num_elements_used; ++i )
- {
- construct( &new_ptr[i], std::move( mvk_vector_allocator_base<T>::ptr[i] ) );
- destruct( &mvk_vector_allocator_base<T>::ptr[i] );
- }
-
- mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );
-
- mvk_vector_allocator_base<T>::ptr = new_ptr;
- set_num_elements_reserved( mvk_vector_allocator_base<T>::num_elements_used );
- }
- }
-
- void deallocate() override
- {
- destruct_all<T>();
-
- if( !get_data_on_stack() )
- {
- mvk_memory_allocator::free( mvk_vector_allocator_base<T>::ptr );
- }
-
- mvk_vector_allocator_base<T>::ptr = get_default_ptr();
- mvk_vector_allocator_base<T>::num_elements_used = 0;
- }
-};
-
-
diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
index f348b54..0d22e17 100644
--- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
+++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm
@@ -638,7 +638,7 @@
case VK_SHADER_STAGE_COMPUTE_BIT: return kMVKShaderStageCompute;
default:
MVKBaseObject::reportError(mvkObj, VK_ERROR_FORMAT_NOT_SUPPORTED, "VkShaderStage %x is not supported.", vkStage);
- return kMVKShaderStageMax;
+ return kMVKShaderStageCount;
}
}
@@ -650,8 +650,8 @@
/* FIXME: kMVKShaderStageGeometry */
case kMVKShaderStageFragment: return VK_SHADER_STAGE_FRAGMENT_BIT;
case kMVKShaderStageCompute: return VK_SHADER_STAGE_COMPUTE_BIT;
- case kMVKShaderStageMax:
- assert(!"This function should never be called with kMVKShaderStageMax!");
+ case kMVKShaderStageCount:
+ assert(!"This function should never be called with kMVKShaderStageCount!");
return VK_SHADER_STAGE_ALL;
}
}
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
index fb1bb93..36c803f 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
@@ -31,10 +31,17 @@
#pragma mark -
#pragma mark SPIRVToMSLConversionConfiguration
+// Returns whether the container contains an item equal to the value.
+template<class C, class T>
+bool contains(const C& container, const T& val) {
+ for (const T& cVal : container) { if (cVal == val) { return true; } }
+ return false;
+}
+
// Returns whether the vector contains the value (using a matches(T&) comparison member function). */
-template<class T>
-bool containsMatching(const vector<T>& vec, const T& val) {
- for (const T& vecVal : vec) { if (vecVal.matches(val)) { return true; } }
+template<class C, class T>
+bool containsMatching(const C& container, const T& val) {
+ for (const T& cVal : container) { if (cVal.matches(val)) { return true; } }
return false;
}
@@ -143,6 +150,12 @@
return true;
}
+MVK_PUBLIC_SYMBOL bool mvk::DescriptorBinding::matches(const mvk::DescriptorBinding& other) const {
+ if (descriptorSet != other.descriptorSet) { return false; }
+ if (binding != other.binding) { return false; }
+ return true;
+}
+
MVK_PUBLIC_SYMBOL bool SPIRVToMSLConversionConfiguration::stageSupportsVertexAttributes() const {
return (options.entryPointStage == spv::ExecutionModelVertex ||
options.entryPointStage == spv::ExecutionModelTessellationControl ||
@@ -182,6 +195,14 @@
if (rb.isUsedByShader && !containsMatching(other.resourceBindings, rb)) { return false; }
}
+ for (uint32_t dsIdx : discreteDescriptorSets) {
+ if ( !contains(other.discreteDescriptorSets, dsIdx)) { return false; }
+ }
+
+ for (const auto& db : inlineUniformBlocks) {
+ if ( !containsMatching(other.inlineUniformBlocks, db)) { return false; }
+ }
+
return true;
}
@@ -277,6 +298,18 @@
}
}
+ // Add any descriptor sets that are not using Metal argument buffers.
+ // This only has an effect if SPIRVToMSLConversionConfiguration::options::mslOptions::argument_buffers is enabled.
+ for (uint32_t dsIdx : context.discreteDescriptorSets) {
+ pMSLCompiler->add_discrete_descriptor_set(dsIdx);
+ }
+
+ // Add any inline block bindings.
+ // This only has an effect if SPIRVToMSLConversionConfiguration::options::mslOptions::argument_buffers is enabled.
+ for (auto& db : context.inlineUniformBlocks) {
+ pMSLCompiler->add_inline_uniform_block(db.descriptorSet, db.binding);
+ }
+
_msl = pMSLCompiler->compile();
if (shouldLogMSL) { logSource(_msl, "MSL", "Converted"); }
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
index f642644..a9d9199 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
@@ -120,6 +120,20 @@
} MSLResourceBinding;
/**
+ * Identifies a descriptor set binding.
+ *
+ * THIS STRUCT IS STREAMED OUT AS PART OF THE PIPELINE CACHE.
+ * CHANGES TO THIS STRUCT SHOULD BE CAPTURED IN THE STREAMING LOGIC OF THE PIPELINE CACHE.
+ */
+ typedef struct DescriptorBinding {
+ uint32_t descriptorSet = 0;
+ uint32_t binding = 0;
+
+ bool matches(const DescriptorBinding& other) const;
+
+ } DescriptorBinding;
+
+ /**
* Configuration passed to the SPIRVToMSLConverter.
*
* THIS STRUCT IS STREAMED OUT AS PART OF THE PIEPLINE CACHE.
@@ -129,6 +143,8 @@
SPIRVToMSLConversionOptions options;
std::vector<MSLShaderInput> shaderInputs;
std::vector<MSLResourceBinding> resourceBindings;
+ std::vector<uint32_t> discreteDescriptorSets;
+ std::vector<DescriptorBinding> inlineUniformBlocks;
/** Returns whether the pipeline stage being converted supports vertex attributes. */
bool stageSupportsVertexAttributes() const;