MVKBuffer: Support texel buffers in "host-coherent" memory on Mac.
According to the Vulkan spec:
> * If `buffer` is a `VkBuffer` not created with the
> `VK_BUFFER_CREATE_SPARSE_BINDING_BIT` bit set[...] then the
> `memoryTypeBits` member always contains at least one bit set
> corresponding to a `VkMemoryType` with a `propertyFlags` that has
> both the `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` bit and the
> `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT` bit set. In other words,
> mappable coherent memory **can** always be attached to these
> objects.
There is no exception for texel buffers. Even though desktop Metal
disallows textures in shared memory, even linear textures created from a
buffer, we have to advertise host-coherent memory for texel buffers.
Some applications actually depend on this behavior, so it's not just a
theoretical concern.
To support host-coherent texel buffers, we implicitly create a managed
buffer and copy data between the device memory and the managed buffer,
just like for a linear image.
Signed-off-by: Chip Davis <cdavis@codeweavers.com>
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
index fb59c4d..d3cbb05 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
@@ -72,7 +72,7 @@
id<MTLBuffer> getMTLBuffer();
/** Returns the offset at which the contents of this instance starts within the underlying Metal buffer. */
- inline NSUInteger getMTLBufferOffset() { return _deviceMemory && _deviceMemory->getMTLHeap() ? 0 : _deviceMemoryOffset; }
+ inline NSUInteger getMTLBufferOffset() { return _deviceMemory && _deviceMemory->getMTLHeap() && !_isHostCoherentTexelBuffer ? 0 : _deviceMemoryOffset; }
#pragma mark Construction
@@ -82,14 +82,19 @@
~MVKBuffer() override;
protected:
+ friend class MVKDeviceMemory;
using MVKResource::needsHostReadSync;
void propogateDebugName() override;
bool needsHostReadSync(VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
VkBufferMemoryBarrier* pBufferMemoryBarrier);
+ bool shouldFlushHostMemory();
+ VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size);
+ VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size);
VkBufferUsageFlags _usage;
+ bool _isHostCoherentTexelBuffer = false;
id<MTLBuffer> _mtlBuffer = nil;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
index 9fdf6af..728fa61 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
@@ -52,12 +52,6 @@
pMemoryRequirements->alignment = _byteAlignment;
}
pMemoryRequirements->memoryTypeBits = _device->getPhysicalDevice()->getAllMemoryTypes();
-#if MVK_MACOS
- // Textures must not use shared memory
- if (mvkIsAnyFlagEnabled(_usage, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)) {
- mvkDisableFlags(pMemoryRequirements->memoryTypeBits, _device->getPhysicalDevice()->getHostCoherentMemoryTypes());
- }
-#endif
#if MVK_IOS
// Memoryless storage is not allowed for buffers
mvkDisableFlags(pMemoryRequirements->memoryTypeBits, _device->getPhysicalDevice()->getLazilyAllocatedMemoryTypes());
@@ -88,6 +82,12 @@
MVKResource::bindDeviceMemory(mvkMem, memOffset);
+#if MVK_MACOS
+ if (_deviceMemory) {
+ _isHostCoherentTexelBuffer = _deviceMemory->isMemoryHostCoherent() && mvkIsAnyFlagEnabled(_usage, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT);
+ }
+#endif
+
propogateDebugName();
return _deviceMemory ? _deviceMemory->addBuffer(this) : VK_SUCCESS;
@@ -117,6 +117,31 @@
#endif
}
+#if MVK_MACOS
+bool MVKBuffer::shouldFlushHostMemory() { return _isHostCoherentTexelBuffer; }
+#endif
+
+// Flushes the device memory at the specified memory range into the MTLBuffer.
+VkResult MVKBuffer::flushToDevice(VkDeviceSize offset, VkDeviceSize size) {
+#if MVK_MACOS
+ if (shouldFlushHostMemory()) {
+ memcpy(getMTLBuffer().contents, reinterpret_cast<const char *>(_deviceMemory->getHostMemoryAddress()) + offset, size);
+ [getMTLBuffer() didModifyRange: NSMakeRange(0, size)];
+ }
+#endif
+ return VK_SUCCESS;
+}
+
+// Pulls content from the MTLBuffer into the device memory at the specified memory range.
+VkResult MVKBuffer::pullFromDevice(VkDeviceSize offset, VkDeviceSize size) {
+#if MVK_MACOS
+ if (shouldFlushHostMemory()) {
+ memcpy(reinterpret_cast<char *>(_deviceMemory->getHostMemoryAddress()) + offset, getMTLBuffer().contents, size);
+ }
+#endif
+ return VK_SUCCESS;
+}
+
// Returns whether the specified buffer memory barrier requires a sync between this
// buffer and host memory for the purpose of the host reading texture memory.
bool MVKBuffer::needsHostReadSync(VkPipelineStageFlags srcStageMask,
@@ -128,7 +153,7 @@
#if MVK_MACOS
return (mvkIsAnyFlagEnabled(dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) &&
mvkIsAnyFlagEnabled(pBufferMemoryBarrier->dstAccessMask, (VK_ACCESS_HOST_READ_BIT)) &&
- isMemoryHostAccessible() && !isMemoryHostCoherent());
+ isMemoryHostAccessible() && (!isMemoryHostCoherent() || _isHostCoherentTexelBuffer));
#endif
}
@@ -144,6 +169,16 @@
offset: _deviceMemoryOffset]; // retained
propogateDebugName();
return _mtlBuffer;
+#if MVK_MACOS
+ } else if (_isHostCoherentTexelBuffer) {
+ // According to the Vulkan spec, buffers, like linear images, can always use host-coherent memory.
+ // But texel buffers on Mac cannot use shared memory. So we need to use host-cached
+ // memory here.
+ _mtlBuffer = [_device->getMTLDevice() newBufferWithLength: getByteCount()
+ options: MTLResourceStorageModeManaged]; // retained
+ propogateDebugName();
+ return _mtlBuffer;
+#endif
} else {
return _deviceMemory->getMTLBuffer();
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
index 6f3a5d4..714a8f9 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
@@ -93,6 +93,7 @@
if (!_mtlHeap) {
lock_guard<mutex> lock(_rezLock);
for (auto& img : _images) { img->flushToDevice(offset, memSize); }
+ for (auto& buf : _buffers) { buf->flushToDevice(offset, memSize); }
}
}
return VK_SUCCESS;
@@ -107,6 +108,7 @@
if (memSize > 0 && isMemoryHostAccessible() && (evenIfCoherent || !isMemoryHostCoherent()) && !_mtlHeap) {
lock_guard<mutex> lock(_rezLock);
for (auto& img : _images) { img->pullFromDevice(offset, memSize); }
+ for (auto& buf : _buffers) { buf->pullFromDevice(offset, memSize); }
#if MVK_MACOS
if (pBlitEnc && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {