Merge pull request #715 from billhollings/master
Add ability to automatically cause an Xcode GPU capture without developer intervention.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index fd3c8e8..3239b8d 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -21,6 +21,7 @@
- Add support for extensions:
- `VK_KHR_device_group`
- Add support for `VkEvent`, using either native `MTLEvent` or emulation when `MTLEvent` not available.
+- `vkInvalidateMappedMemoryRanges()` synchronizes managed device memory to CPU.
- Revert to supporting host-coherent memory for linear images on macOS.
- Ensure Vulkan loader magic number is set every time before returning any dispatchable Vulkan handle.
- Fix crash when `VkDeviceCreateInfo` specifies queue families out of numerical order.
@@ -32,6 +33,7 @@
- No longer prefer dedicated allocations for buffer memory, including buffer-backed images.
- Handle the `compositeAlpha` member of `VkSwapchainCreateInfoKHR`.
- `VkPhysicalDevicePortabilitySubsetFeaturesEXTX::events` set to `true`.
+- Add ability to automatically cause an *Xcode* GPU capture without developer intervention.
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index eef5e65..352b732 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -102,8 +102,8 @@
* 2: Log errors and informational messages.
* If neither is set, errors and informational messages are logged.
*
- * 2. Setting the MVK_CONFIG_TRACE_VULKAN_CALLS runtime environment variable or MoltenVK compile-time build
- * setting will cause MoltenVK to log the name of each Vulkan call made by the application. The logging
+ * 2. The MVK_CONFIG_TRACE_VULKAN_CALLS runtime environment variable or MoltenVK compile-time build
+ * setting causes MoltenVK to log the name of each Vulkan call made by the application. The logging
* format options can be controlled by setting the value of MVK_CONFIG_TRACE_VULKAN_CALLS as follows:
* 0: No Vulkan call logging.
* 1: Log the name of each Vulkan call when the call is entered.
@@ -117,6 +117,18 @@
* 4. Setting the MVK_ALLOW_METAL_EVENTS runtime environment variable or MoltenVK compile-time build
* setting to 1 will cause MoltenVK to use Metal events, if they are available on the device, for
* for VkSemaphore sychronization behaviour. This is disabled by default.
+ *
+ * 5. The MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE runtime environment variable or MoltenVK compile-time
+ * build setting controls whether Xcode should run an automatic GPU capture without the user
+ * having to trigger it manually via the Xcode user interface, and controls the scope under
+ * which that GPU capture will occur. This is useful when trying to capture a one-shot GPU
+ * trace, such as when running a Vulkan CTS test case. For the automatic GPU capture to occur,
+ * the Xcode scheme under which the app is run must have the Metal GPU capture option turned on.
+ * MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE should not be set to manually trigger a GPU capture via the
+ * Xcode user interface.
+ * 0: No automatic GPU capture.
+ * 1: Capture all GPU commands issued during the lifetime of the VkDevice.
+ * If none of these is set, no automatic GPU capture will occur.
*/
typedef struct {
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
index dcf566d..adbf683 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
@@ -110,10 +110,8 @@
#endif
}
-/**
- * Returns whether the specified buffer memory barrier requires a sync between this
- * buffer and host memory for the purpose of the host reading texture memory.
- */
+// Returns whether the specified buffer memory barrier requires a sync between this
+// buffer and host memory for the purpose of the host reading texture memory.
bool MVKBuffer::needsHostReadSync(VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
VkBufferMemoryBarrier* pBufferMemoryBarrier) {
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 5f267df..b4a7461 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -360,6 +360,11 @@
#pragma mark -
#pragma mark MVKDevice
+typedef struct {
+ id<MTLBlitCommandEncoder> mtlBlitEncoder = nil;
+ id<MTLCommandBuffer> mtlCmdBuffer = nil;
+} MVKMTLBlitEncoder;
+
/** Represents a Vulkan logical GPU device, associated with a physical device. */
class MVKDevice : public MVKDispatchableVulkanAPIObject {
@@ -387,7 +392,7 @@
PFN_vkVoidFunction getProcAddr(const char* pName);
/** Retrieves a queue at the specified index within the specified family. */
- MVKQueue* getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex);
+ MVKQueue* getQueue(uint32_t queueFamilyIndex = 0, uint32_t queueIndex = 0);
/** Block the current thread until all queues in this device are idle. */
VkResult waitIdle();
@@ -528,6 +533,9 @@
void freeMemory(MVKDeviceMemory* mvkDevMem,
const VkAllocationCallbacks* pAllocator);
+
+#pragma mark Operations
+
/** Applies the specified global memory barrier to all resource issued by this device. */
void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
@@ -565,6 +573,9 @@
/** Populates the specified statistics structure from the current activity performance statistics. */
void getPerformanceStatistics(MVKPerformanceStatistics* pPerf);
+ /** Invalidates the memory regions. */
+ VkResult invalidateMappedMemoryRanges(uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges);
+
#pragma mark Metal
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index e900a61..170a6c4 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -2122,6 +2122,9 @@
mvkDevMem->destroy();
}
+
+#pragma mark Operations
+
// Adds the specified resource for tracking, and returns the added resource.
MVKResource* MVKDevice::addResource(MVKResource* rez) {
lock_guard<mutex> lock(_rezLock);
@@ -2197,6 +2200,25 @@
if (pPerf) { *pPerf = _performanceStatistics; }
}
+VkResult MVKDevice::invalidateMappedMemoryRanges(uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges) {
+ @autoreleasepool {
+ VkResult rslt = VK_SUCCESS;
+ MVKMTLBlitEncoder mvkBlitEnc;
+ for (uint32_t i = 0; i < memRangeCount; i++) {
+ const VkMappedMemoryRange* pMem = &pMemRanges[i];
+ MVKDeviceMemory* mvkMem = (MVKDeviceMemory*)pMem->memory;
+ VkResult r = mvkMem->pullFromDevice(pMem->offset, pMem->size, false, &mvkBlitEnc);
+ if (rslt == VK_SUCCESS) { rslt = r; }
+ }
+ if (mvkBlitEnc.mtlBlitEncoder) { [mvkBlitEnc.mtlBlitEncoder endEncoding]; }
+ if (mvkBlitEnc.mtlCmdBuffer) {
+ [mvkBlitEnc.mtlCmdBuffer commit];
+ [mvkBlitEnc.mtlCmdBuffer waitUntilCompleted];
+ }
+ return rslt;
+ }
+}
+
#pragma mark Metal
@@ -2279,6 +2301,10 @@
initQueues(pCreateInfo);
+ if (getInstance()->_autoGPUCaptureScope == MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE) {
+ [[MTLCaptureManager sharedCaptureManager] startCaptureWithDevice: getMTLDevice()];
+ }
+
MVKLogInfo("Created VkDevice to run on GPU %s with the following %d Vulkan extensions enabled:%s",
_pProperties->deviceName,
_enabledExtensions.getEnabledCount(),
@@ -2562,6 +2588,10 @@
[_mtlCompileOptions release];
[_globalVisibilityResultMTLBuffer release];
+
+ if (getInstance()->_autoGPUCaptureScope == MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE) {
+ [[MTLCaptureManager sharedCaptureManager] stopCapture];
+ }
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
index 9d468ef..557c51c 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
@@ -85,8 +85,18 @@
* If this memory is host-visible, pulls the specified memory range from the device.
* Normally, pulling will only occur if the device memory is non-coherent, but pulling
* to coherent memory can be forced by setting evenIfCoherent to true.
+ *
+ * If pBlitEnc is not null, it points to a holder for a MTLBlitCommandEncoder and its
+ * assocated MTLCommandBuffer. If this instance has a MTLBuffer using managed memory,
+ * this function may call synchronizeResource: on the MTLBlitCommandEncoder to
+ * synchronize the GPU contents to the CPU. If the contents of the pBlitEnc do not
+ * include a MTLBlitCommandEncoder and MTLCommandBuffer, this function will create
+ * them and populate the contents into the MVKMTLBlitEncoder struct.
*/
- VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size, bool evenIfCoherent = false);
+ VkResult pullFromDevice(VkDeviceSize offset,
+ VkDeviceSize size,
+ bool evenIfCoherent = false,
+ MVKMTLBlitEncoder* pBlitEnc = nullptr);
#pragma mark Metal
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
index ee4aedf..5d5dd09 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
@@ -19,6 +19,7 @@
#include "MVKDeviceMemory.h"
#include "MVKBuffer.h"
#include "MVKImage.h"
+#include "MVKQueue.h"
#include "MVKEnvironment.h"
#include "mvk_datatypes.hpp"
#include "MVKFoundation.h"
@@ -91,12 +92,24 @@
return VK_SUCCESS;
}
-VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset, VkDeviceSize size, bool evenIfCoherent) {
+VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset,
+ VkDeviceSize size,
+ bool evenIfCoherent,
+ MVKMTLBlitEncoder* pBlitEnc) {
// Coherent memory is flushed on unmap(), so it is only flushed if forced
VkDeviceSize memSize = adjustMemorySize(size, offset);
if (memSize > 0 && isMemoryHostAccessible() && (evenIfCoherent || !isMemoryHostCoherent()) ) {
lock_guard<mutex> lock(_rezLock);
for (auto& img : _images) { img->pullFromDevice(offset, memSize); }
+
+#if MVK_MACOS
+ if (pBlitEnc && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
+ if ( !pBlitEnc->mtlCmdBuffer) { pBlitEnc->mtlCmdBuffer = [_device->getQueue()->getMTLCommandQueue() commandBufferWithUnretainedReferences]; }
+ if ( !pBlitEnc->mtlBlitEncoder) { pBlitEnc->mtlBlitEncoder = [pBlitEnc->mtlCmdBuffer blitCommandEncoder]; }
+ [pBlitEnc->mtlBlitEncoder synchronizeResource: _mtlBuffer];
+ }
+#endif
+
}
return VK_SUCCESS;
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h
index 4bbef89..301a4f0 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h
@@ -196,6 +196,7 @@
bool _hasDebugUtilsMessengers;
bool _useCreationCallbacks;
const char* _debugReportCallbackLayerPrefix;
+ int32_t _autoGPUCaptureScope;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
index 7a32e7a..c184f7d 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
@@ -679,6 +679,8 @@
MVK_SET_FROM_ENV_OR_BUILD_BOOL( _mvkConfig.fullImageViewSwizzle, MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE);
MVK_SET_FROM_ENV_OR_BUILD_BOOL( _mvkConfig.defaultGPUCaptureScopeQueueFamilyIndex, MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX);
MVK_SET_FROM_ENV_OR_BUILD_BOOL( _mvkConfig.defaultGPUCaptureScopeQueueIndex, MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX);
+
+ MVK_SET_FROM_ENV_OR_BUILD_INT32(_autoGPUCaptureScope, MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE);
}
VkResult MVKInstance::verifyLayers(uint32_t count, const char* const* names) {
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
index e1e6467..b10aac6 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
@@ -166,14 +166,10 @@
signal(signaler);
if (_device->_useMTLEventsForSemaphores) {
// Unfortunately, we can't assume we have an MTLSharedEvent here.
- // This means we need to execute a command on the device to signal
- // the semaphore. Alternatively, we could always use an MTLSharedEvent,
- // but that might impose unacceptable performance costs just to handle
- // this one case.
- MVKQueue* queue = _device->getQueue(0, 0);
- id<MTLCommandQueue> mtlQ = queue->getMTLCommandQueue();
- id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
- [mtlCmdBuff enqueue];
+ // This means we need to execute a command on the device to signal the semaphore.
+ // Alternatively, we could always use an MTLSharedEvent, but that might impose
+ // unacceptable performance costs just to handle this one case.
+ id<MTLCommandBuffer> mtlCmdBuff = [_device->getQueue()->getMTLCommandQueue() commandBufferWithUnretainedReferences];
signaler.first->encodeSignal(mtlCmdBuff);
[mtlCmdBuff commit];
}
diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
index 56471ad..2114446 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
@@ -141,6 +141,17 @@
# define MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX 0
#endif
+/**
+ * The scope under which to automatically run a GPU capture within Xcode, without the
+ * developer having to trigger it manually via the Xcode UI. This is useful when trying
+ * to capture a one-shot trace, such as when running a Vulkan CTS test case.
+ */
+#define MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_NONE 0
+#define MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE 1
+#ifndef MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE
+# define MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_NONE
+#endif
+
/** Force the use of a low-power GPU if it exists. Disabled by default. */
#ifndef MVK_CONFIG_FORCE_LOW_POWER_GPU
# define MVK_CONFIG_FORCE_LOW_POWER_GPU 0
diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
index e44f368..a1e2fc9 100644
--- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm
+++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
@@ -429,15 +429,10 @@
VkDevice device,
uint32_t memRangeCount,
const VkMappedMemoryRange* pMemRanges) {
-
+
MVKTraceVulkanCallStart();
- VkResult rslt = VK_SUCCESS;
- for (uint32_t i = 0; i < memRangeCount; i++) {
- const VkMappedMemoryRange* pMem = &pMemRanges[i];
- MVKDeviceMemory* mvkMem = (MVKDeviceMemory*)pMem->memory;
- VkResult r = mvkMem->pullFromDevice(pMem->offset, pMem->size);
- if (rslt == VK_SUCCESS) { rslt = r; }
- }
+ MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
+ VkResult rslt = mvkDev->invalidateMappedMemoryRanges(memRangeCount, pMemRanges);
MVKTraceVulkanCallEnd();
return rslt;
}