vkInvalidateMappedMemoryRanges() synchronizes managed device memory to CPU.
Add MVKDevice::invalidateMappedMemoryRanges().
Add MVKMTLBlitEncoder struct.
MVKDevice::getQueue() defaults to queue family zero and queue zero.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index fd79051..3239b8d 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -21,6 +21,7 @@
- Add support for extensions:
- `VK_KHR_device_group`
- Add support for `VkEvent`, using either native `MTLEvent` or emulation when `MTLEvent` not available.
+- `vkInvalidateMappedMemoryRanges()` synchronizes managed device memory to CPU.
- Revert to supporting host-coherent memory for linear images on macOS.
- Ensure Vulkan loader magic number is set every time before returning any dispatchable Vulkan handle.
- Fix crash when `VkDeviceCreateInfo` specifies queue families out of numerical order.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 5f267df..b4a7461 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -360,6 +360,11 @@
#pragma mark -
#pragma mark MVKDevice
+typedef struct {
+ id<MTLBlitCommandEncoder> mtlBlitEncoder = nil;
+ id<MTLCommandBuffer> mtlCmdBuffer = nil;
+} MVKMTLBlitEncoder;
+
/** Represents a Vulkan logical GPU device, associated with a physical device. */
class MVKDevice : public MVKDispatchableVulkanAPIObject {
@@ -387,7 +392,7 @@
PFN_vkVoidFunction getProcAddr(const char* pName);
/** Retrieves a queue at the specified index within the specified family. */
- MVKQueue* getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex);
+ MVKQueue* getQueue(uint32_t queueFamilyIndex = 0, uint32_t queueIndex = 0);
/** Block the current thread until all queues in this device are idle. */
VkResult waitIdle();
@@ -528,6 +533,9 @@
void freeMemory(MVKDeviceMemory* mvkDevMem,
const VkAllocationCallbacks* pAllocator);
+
+#pragma mark Operations
+
/** Applies the specified global memory barrier to all resource issued by this device. */
void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
@@ -565,6 +573,9 @@
/** Populates the specified statistics structure from the current activity performance statistics. */
void getPerformanceStatistics(MVKPerformanceStatistics* pPerf);
+ /** Invalidates the memory regions. */
+ VkResult invalidateMappedMemoryRanges(uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges);
+
#pragma mark Metal
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index ff3bf4c..170a6c4 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -2122,6 +2122,9 @@
mvkDevMem->destroy();
}
+
+#pragma mark Operations
+
// Adds the specified resource for tracking, and returns the added resource.
MVKResource* MVKDevice::addResource(MVKResource* rez) {
lock_guard<mutex> lock(_rezLock);
@@ -2197,6 +2200,25 @@
if (pPerf) { *pPerf = _performanceStatistics; }
}
+VkResult MVKDevice::invalidateMappedMemoryRanges(uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges) {
+ @autoreleasepool {
+ VkResult rslt = VK_SUCCESS;
+ MVKMTLBlitEncoder mvkBlitEnc;
+ for (uint32_t i = 0; i < memRangeCount; i++) {
+ const VkMappedMemoryRange* pMem = &pMemRanges[i];
+ MVKDeviceMemory* mvkMem = (MVKDeviceMemory*)pMem->memory;
+ VkResult r = mvkMem->pullFromDevice(pMem->offset, pMem->size, false, &mvkBlitEnc);
+ if (rslt == VK_SUCCESS) { rslt = r; }
+ }
+ if (mvkBlitEnc.mtlBlitEncoder) { [mvkBlitEnc.mtlBlitEncoder endEncoding]; }
+ if (mvkBlitEnc.mtlCmdBuffer) {
+ [mvkBlitEnc.mtlCmdBuffer commit];
+ [mvkBlitEnc.mtlCmdBuffer waitUntilCompleted];
+ }
+ return rslt;
+ }
+}
+
#pragma mark Metal
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
index 9d468ef..557c51c 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
@@ -85,8 +85,18 @@
* If this memory is host-visible, pulls the specified memory range from the device.
* Normally, pulling will only occur if the device memory is non-coherent, but pulling
* to coherent memory can be forced by setting evenIfCoherent to true.
+ *
+ * If pBlitEnc is not null, it points to a holder for a MTLBlitCommandEncoder and its
+ * assocated MTLCommandBuffer. If this instance has a MTLBuffer using managed memory,
+ * this function may call synchronizeResource: on the MTLBlitCommandEncoder to
+ * synchronize the GPU contents to the CPU. If the contents of the pBlitEnc do not
+ * include a MTLBlitCommandEncoder and MTLCommandBuffer, this function will create
+ * them and populate the contents into the MVKMTLBlitEncoder struct.
*/
- VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size, bool evenIfCoherent = false);
+ VkResult pullFromDevice(VkDeviceSize offset,
+ VkDeviceSize size,
+ bool evenIfCoherent = false,
+ MVKMTLBlitEncoder* pBlitEnc = nullptr);
#pragma mark Metal
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
index ee4aedf..5d5dd09 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
@@ -19,6 +19,7 @@
#include "MVKDeviceMemory.h"
#include "MVKBuffer.h"
#include "MVKImage.h"
+#include "MVKQueue.h"
#include "MVKEnvironment.h"
#include "mvk_datatypes.hpp"
#include "MVKFoundation.h"
@@ -91,12 +92,24 @@
return VK_SUCCESS;
}
-VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset, VkDeviceSize size, bool evenIfCoherent) {
+VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset,
+ VkDeviceSize size,
+ bool evenIfCoherent,
+ MVKMTLBlitEncoder* pBlitEnc) {
// Coherent memory is flushed on unmap(), so it is only flushed if forced
VkDeviceSize memSize = adjustMemorySize(size, offset);
if (memSize > 0 && isMemoryHostAccessible() && (evenIfCoherent || !isMemoryHostCoherent()) ) {
lock_guard<mutex> lock(_rezLock);
for (auto& img : _images) { img->pullFromDevice(offset, memSize); }
+
+#if MVK_MACOS
+ if (pBlitEnc && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
+ if ( !pBlitEnc->mtlCmdBuffer) { pBlitEnc->mtlCmdBuffer = [_device->getQueue()->getMTLCommandQueue() commandBufferWithUnretainedReferences]; }
+ if ( !pBlitEnc->mtlBlitEncoder) { pBlitEnc->mtlBlitEncoder = [pBlitEnc->mtlCmdBuffer blitCommandEncoder]; }
+ [pBlitEnc->mtlBlitEncoder synchronizeResource: _mtlBuffer];
+ }
+#endif
+
}
return VK_SUCCESS;
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
index e1e6467..b10aac6 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
@@ -166,14 +166,10 @@
signal(signaler);
if (_device->_useMTLEventsForSemaphores) {
// Unfortunately, we can't assume we have an MTLSharedEvent here.
- // This means we need to execute a command on the device to signal
- // the semaphore. Alternatively, we could always use an MTLSharedEvent,
- // but that might impose unacceptable performance costs just to handle
- // this one case.
- MVKQueue* queue = _device->getQueue(0, 0);
- id<MTLCommandQueue> mtlQ = queue->getMTLCommandQueue();
- id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
- [mtlCmdBuff enqueue];
+ // This means we need to execute a command on the device to signal the semaphore.
+ // Alternatively, we could always use an MTLSharedEvent, but that might impose
+ // unacceptable performance costs just to handle this one case.
+ id<MTLCommandBuffer> mtlCmdBuff = [_device->getQueue()->getMTLCommandQueue() commandBufferWithUnretainedReferences];
signaler.first->encodeSignal(mtlCmdBuff);
[mtlCmdBuff commit];
}
diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
index e44f368..a1e2fc9 100644
--- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm
+++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
@@ -429,15 +429,10 @@
VkDevice device,
uint32_t memRangeCount,
const VkMappedMemoryRange* pMemRanges) {
-
+
MVKTraceVulkanCallStart();
- VkResult rslt = VK_SUCCESS;
- for (uint32_t i = 0; i < memRangeCount; i++) {
- const VkMappedMemoryRange* pMem = &pMemRanges[i];
- MVKDeviceMemory* mvkMem = (MVKDeviceMemory*)pMem->memory;
- VkResult r = mvkMem->pullFromDevice(pMem->offset, pMem->size);
- if (rslt == VK_SUCCESS) { rslt = r; }
- }
+ MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
+ VkResult rslt = mvkDev->invalidateMappedMemoryRanges(memRangeCount, pMemRanges);
MVKTraceVulkanCallEnd();
return rslt;
}