Merge pull request #1693 from tellowkrinkle/SingleQueueSemaphore
Replace MTLFence semaphores with forcing a single queue
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index 12637c2..11aed6b 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -566,36 +566,34 @@
VkBool32 forceLowPowerGPU;
/**
- * Use MTLFence, if it is available on the device, for VkSemaphore synchronization behaviour.
+ * Use Metal's implicit guarantees that all operations submitted to a queue will give the same result as
+ * if they had been run in submission order to implement VkSemaphore synchronization as no-ops.
*
- * This parameter interacts with semaphoreUseMTLEvent. If both are enabled, on GPUs other than
- * NVIDIA, semaphoreUseMTLEvent takes priority and MTLEvent will be used if it is available,
- * otherwise MTLFence will be used if it is available. On NVIDIA GPUs, MTLEvent is disabled
- * for VkSemaphores, so CPU-based synchronization will be used unless semaphoreUseMTLFence
- * is enabled and MTLFence is available.
+ * This requires all submissions be made to the same queue, and to guarantee that, MoltenVK will expose
+ * only one queue to the application.
*
* In the special case of VK_SEMAPHORE_TYPE_TIMELINE semaphores, MoltenVK will always
* use MTLSharedEvent if it is available on the platform, regardless of the values of
* semaphoreUseMTLEvent or semaphoreUseMTLFence.
*
- * The value of this parameter must be changed before creating a VkDevice,
- * for the change to take effect.
+ * The value of this parameter must be changed before creating a VkDevice for the change to take effect.
*
* The initial value or this parameter is set by the
- * MVK_ALLOW_METAL_FENCES
+ * MVK_ALLOW_SINGLE_QUEUE_SEMAPHORE
* runtime environment variable or MoltenVK compile-time build setting.
- * If neither is set, this setting is disabled by default, and VkSemaphore will not use MTLFence.
+ * If neither is set, this setting is enabled by default, and VkSemaphore will force a single queue
+ * on NVIDIA GPUs and whenever MVK_ALLOW_METAL_EVENTS is not also set.
*/
- VkBool32 semaphoreUseMTLFence;
+ VkBool32 semaphoreUseSingleQueue;
/**
* Use MTLEvent, if it is available on the device, for VkSemaphore synchronization behaviour.
*
- * This parameter interacts with semaphoreUseMTLFence. If both are enabled, on GPUs other than
+ * This parameter interacts with semaphoreUseSingleQueue. If both are enabled, on GPUs other than
* NVIDIA, semaphoreUseMTLEvent takes priority and MTLEvent will be used if it is available,
* otherwise MTLFence will be used if it is available. On NVIDIA GPUs, MTLEvent is disabled
- * for VkSemaphores, so CPU-based synchronization will be used unless semaphoreUseMTLFence
- * is enabled and MTLFence is available.
+ * for VkSemaphores, so CPU-based synchronization will be used unless semaphoreUseSingleQueue
+ * is enabled.
*
* In the special case of VK_SEMAPHORE_TYPE_TIMELINE semaphores, MoltenVK will always
* use MTLSharedEvent if it is available on the platform, regardless of the values of
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index e7fda7e..3ce2caa 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -90,6 +90,12 @@
#pragma mark -
#pragma mark MVKPhysicalDevice
+typedef enum {
+ MVKSemaphoreStyleUseMTLEvent,
+ MVKSemaphoreStyleUseEmulation,
+ MVKSemaphoreStyleSingleQueue,
+} MVKSemaphoreStyle;
+
/** VkPhysicalDeviceVulkan12Features entries that did not originate in a prior extension. */
typedef struct MVKPhysicalDeviceVulkan12FeaturesNoExt {
VkBool32 samplerMirrorClampToEdge;
@@ -407,6 +413,7 @@
void initExtensions();
void initCounterSets();
bool needsCounterSetRetained();
+ MVKSemaphoreStyle getSemaphoreStyle();
MVKArrayRef<MVKQueueFamily*> getQueueFamilies();
void initPipelineCacheUUID();
uint32_t getHighestGPUCapability();
@@ -445,12 +452,6 @@
id<MTLCommandBuffer> mtlCmdBuffer = nil;
} MVKMTLBlitEncoder;
-typedef enum {
- MVKSemaphoreStyleUseMTLEvent,
- MVKSemaphoreStyleUseMTLFence,
- MVKSemaphoreStyleUseEmulation
-} MVKSemaphoreStyle;
-
/** Represents a Vulkan logical GPU device, associated with a physical device. */
class MVKDevice : public MVKDispatchableVulkanAPIObject {
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index b2d2be8..dc13832 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -1309,17 +1309,22 @@
qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT);
_queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
- // Dedicated graphics queue family...or another general-purpose queue family.
- if (specialize) { qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT); }
- _queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
+ // Single queue semaphore requires using a single queue for everything
+ // So don't allow anyone to have more than one
+ if (getSemaphoreStyle() != MVKSemaphoreStyleSingleQueue)
+ {
+ // Dedicated graphics queue family...or another general-purpose queue family.
+ if (specialize) { qfProps.queueFlags = (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_TRANSFER_BIT); }
+ _queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
- // Dedicated compute queue family...or another general-purpose queue family.
- if (specialize) { qfProps.queueFlags = (VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT); }
- _queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
+ // Dedicated compute queue family...or another general-purpose queue family.
+ if (specialize) { qfProps.queueFlags = (VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT); }
+ _queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
- // Dedicated transfer queue family...or another general-purpose queue family.
- if (specialize) { qfProps.queueFlags = VK_QUEUE_TRANSFER_BIT; }
- _queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
+ // Dedicated transfer queue family...or another general-purpose queue family.
+ if (specialize) { qfProps.queueFlags = VK_QUEUE_TRANSFER_BIT; }
+ _queueFamilies.push_back(new MVKQueueFamily(this, qfIdx++, &qfProps));
+ }
MVKAssert(kMVKQueueFamilyCount >= _queueFamilies.size(), "Adjust value of kMVKQueueFamilyCount.");
}
@@ -3110,6 +3115,23 @@
}
}
+MVKSemaphoreStyle MVKPhysicalDevice::getSemaphoreStyle() {
+ // Decide whether Vulkan semaphores should use a MTLEvent or forcing a single queue if they are available.
+ // Prefer MTLEvent, because MTLEvent handles sync across MTLCommandBuffers and MTLCommandQueues.
+ // However, do not allow use of MTLEvents on Rosetta2 (x86 build on M1 runtime) or NVIDIA GPUs,
+ // which have demonstrated trouble with MTLEvents. In that case, since a single queue will be used
+ // unless the option for it it has been disabled, in which case CPU emulation will be used
+ bool isNVIDIA = _properties.vendorID == kNVVendorId;
+ bool isRosetta2 = _properties.vendorID == kAppleVendorId && !MVK_APPLE_SILICON;
+ if (_metalFeatures.events && mvkConfig().semaphoreUseMTLEvent && !(isRosetta2 || isNVIDIA)) {
+ return MVKSemaphoreStyleUseMTLEvent;
+ }
+ if (mvkConfig().semaphoreUseSingleQueue) {
+ return MVKSemaphoreStyleSingleQueue;
+ }
+ return MVKSemaphoreStyleUseEmulation;
+}
+
// Workaround for a bug in Intel Iris Plus Graphics driver where the counterSets array is
// not properly retained internally, and becomes a zombie when counterSets is called more
// than once, which occurs when an app creates more than one VkInstance. This workaround
@@ -3644,8 +3666,8 @@
} else {
switch (_vkSemaphoreStyle) {
case MVKSemaphoreStyleUseMTLEvent: return new MVKSemaphoreMTLEvent(this, pCreateInfo, pExportInfo, pImportInfo);
- case MVKSemaphoreStyleUseMTLFence: return new MVKSemaphoreMTLFence(this, pCreateInfo, pExportInfo, pImportInfo);
case MVKSemaphoreStyleUseEmulation: return new MVKSemaphoreEmulated(this, pCreateInfo, pExportInfo, pImportInfo);
+ case MVKSemaphoreStyleSingleQueue: return new MVKSemaphoreSingleQueue(this, pCreateInfo, pExportInfo, pImportInfo);
}
}
}
@@ -4438,26 +4460,17 @@
_pProperties = &_physicalDevice->_properties;
_pMemoryProperties = &_physicalDevice->_memoryProperties;
- // Decide whether Vulkan semaphores should use a MTLEvent or MTLFence if they are available.
- // Prefer MTLEvent, because MTLEvent handles sync across MTLCommandBuffers and MTLCommandQueues.
- // However, do not allow use of MTLEvents on Rosetta2 (x86 build on M1 runtime) or NVIDIA GPUs,
- // which have demonstrated trouble with MTLEvents. In that case, since MTLFence use is disabled
- // by default, unless MTLFence is deliberately enabled, CPU emulation will be used.
- bool isNVIDIA = _pProperties->vendorID == kNVVendorId;
- bool isRosetta2 = _pProperties->vendorID == kAppleVendorId && !MVK_APPLE_SILICON;
- bool canUseMTLEventForSem4 = _pMetalFeatures->events && mvkConfig().semaphoreUseMTLEvent && !(isRosetta2 || isNVIDIA);
- bool canUseMTLFenceForSem4 = _pMetalFeatures->fences && mvkConfig().semaphoreUseMTLFence;
- _vkSemaphoreStyle = canUseMTLEventForSem4 ? MVKSemaphoreStyleUseMTLEvent : (canUseMTLFenceForSem4 ? MVKSemaphoreStyleUseMTLFence : MVKSemaphoreStyleUseEmulation);
+ _vkSemaphoreStyle = _physicalDevice->getSemaphoreStyle();
switch (_vkSemaphoreStyle) {
case MVKSemaphoreStyleUseMTLEvent:
MVKLogInfo("Using MTLEvent for Vulkan semaphores.");
break;
- case MVKSemaphoreStyleUseMTLFence:
- MVKLogInfo("Using MTLFence for Vulkan semaphores.");
- break;
case MVKSemaphoreStyleUseEmulation:
MVKLogInfo("Using emulation for Vulkan semaphores.");
break;
+ case MVKSemaphoreStyleSingleQueue:
+ MVKLogInfo("Using Metal implicit guarantees within a single queue for Vulkan semaphores.");
+ break;
}
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
index 3562369..0a03499 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
@@ -201,27 +201,29 @@
#pragma mark -
-#pragma mark MVKSemaphoreMTLFence
+#pragma mark MVKSemaphoreSingleQueue
-/** An MVKSemaphore that uses MTLFence to provide synchronization. */
-class MVKSemaphoreMTLFence : public MVKSemaphore {
+/**
+ * An MVKSemaphore that uses Metal's built-in guarantees on single-queue submission to provide semaphore-like guarantees.
+ *
+ * Relies on Metal's enabled-by-default hazard tracking, and will need to start doing things with MTLFences
+ * if we start using things with MTLHazardTrackingModeUntracked
+ */
+class MVKSemaphoreSingleQueue : public MVKSemaphore {
public:
void encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
void encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
uint64_t deferSignal() override;
void encodeDeferredSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) override;
- bool isUsingCommandEncoding() override { return true; }
+ bool isUsingCommandEncoding() override { return false; }
- MVKSemaphoreMTLFence(MVKDevice* device,
- const VkSemaphoreCreateInfo* pCreateInfo,
- const VkExportMetalObjectCreateInfoEXT* pExportInfo,
- const VkImportMetalSharedEventInfoEXT* pImportInfo);
+ MVKSemaphoreSingleQueue(MVKDevice* device,
+ const VkSemaphoreCreateInfo* pCreateInfo,
+ const VkExportMetalObjectCreateInfoEXT* pExportInfo,
+ const VkImportMetalSharedEventInfoEXT* pImportInfo);
- ~MVKSemaphoreMTLFence() override;
-
-protected:
- id<MTLFence> _mtlFence;
+ ~MVKSemaphoreSingleQueue() override;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
index 29d36d3..318e86a 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
@@ -77,47 +77,34 @@
#pragma mark -
-#pragma mark MVKSemaphoreMTLFence
+#pragma mark MVKSemaphoreSingleQueue
-// Could use any encoder. Assume BLIT is fastest and lightest.
-// Nil mtlCmdBuff will do nothing.
-void MVKSemaphoreMTLFence::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
- id<MTLBlitCommandEncoder> mtlCmdEnc = mtlCmdBuff.blitCommandEncoder;
- [mtlCmdEnc waitForFence: _mtlFence];
- [mtlCmdEnc endEncoding];
+void MVKSemaphoreSingleQueue::encodeWait(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
+ // Metal will handle all synchronization for us automatically
}
-// Could use any encoder. Assume BLIT is fastest and lightest.
-// Nil mtlCmdBuff will do nothing.
-void MVKSemaphoreMTLFence::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
- id<MTLBlitCommandEncoder> mtlCmdEnc = mtlCmdBuff.blitCommandEncoder;
- [mtlCmdEnc updateFence: _mtlFence];
- [mtlCmdEnc endEncoding];
+void MVKSemaphoreSingleQueue::encodeSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
+ // Metal will handle all synchronization for us automatically
}
-uint64_t MVKSemaphoreMTLFence::deferSignal() {
+uint64_t MVKSemaphoreSingleQueue::deferSignal() {
return 0;
}
-void MVKSemaphoreMTLFence::encodeDeferredSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
+void MVKSemaphoreSingleQueue::encodeDeferredSignal(id<MTLCommandBuffer> mtlCmdBuff, uint64_t) {
encodeSignal(mtlCmdBuff, 0);
}
-MVKSemaphoreMTLFence::MVKSemaphoreMTLFence(MVKDevice* device,
- const VkSemaphoreCreateInfo* pCreateInfo,
- const VkExportMetalObjectCreateInfoEXT* pExportInfo,
- const VkImportMetalSharedEventInfoEXT* pImportInfo) : MVKSemaphore(device, pCreateInfo) {
-
- _mtlFence = [device->getMTLDevice() newFence]; //retained
-
+MVKSemaphoreSingleQueue::MVKSemaphoreSingleQueue(MVKDevice* device,
+ const VkSemaphoreCreateInfo* pCreateInfo,
+ const VkExportMetalObjectCreateInfoEXT* pExportInfo,
+ const VkImportMetalSharedEventInfoEXT* pImportInfo) : MVKSemaphore(device, pCreateInfo) {
if ((pImportInfo && pImportInfo->mtlSharedEvent) || (pExportInfo && pExportInfo->exportObjectType == VK_EXPORT_METAL_OBJECT_TYPE_METAL_SHARED_EVENT_BIT_EXT)) {
- setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "vkCreateEvent(): MTLSharedEvent is not available with VkSemaphores that use MTLFence."));
+ setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "vkCreateEvent(): MTLSharedEvent is not available with VkSemaphores that use implicit synchronization."));
}
}
-MVKSemaphoreMTLFence::~MVKSemaphoreMTLFence() {
- [_mtlFence release];
-}
+MVKSemaphoreSingleQueue::~MVKSemaphoreSingleQueue() = default;
#pragma mark -
diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
index 97cafd0..56876d3 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp
@@ -50,7 +50,7 @@
MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.logLevel, MVK_CONFIG_LOG_LEVEL);
MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.traceVulkanCalls, MVK_CONFIG_TRACE_VULKAN_CALLS);
MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.forceLowPowerGPU, MVK_CONFIG_FORCE_LOW_POWER_GPU);
- MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.semaphoreUseMTLFence, MVK_ALLOW_METAL_FENCES);
+ MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.semaphoreUseSingleQueue, MVK_ALLOW_SINGLE_QUEUE_SEMAPHORE);
MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.semaphoreUseMTLEvent, MVK_ALLOW_METAL_EVENTS);
MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.autoGPUCaptureScope, MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE);
MVK_SET_FROM_ENV_OR_BUILD_STRING(evCfg.autoGPUCaptureOutputFilepath, MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE, evGPUCapFileStrObj);
diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
index df8edf2..840381d 100644
--- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
+++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h
@@ -231,16 +231,16 @@
#endif
/**
- * Allow the use of MTLFence or MTLEvent for VkSemaphore synchronization behaviour.
+ * Allow the use of a single queue or MTLEvent for VkSemaphore synchronization behaviour.
* By default:
* - MVK_ALLOW_METAL_EVENTS is enabled
- * - MVK_ALLOW_METAL_FENCES is disabled
+ * - MVK_ALLOW_SINGLE_QUEUE_SEMAPHORE is enabled
* */
#ifndef MVK_ALLOW_METAL_EVENTS
# define MVK_ALLOW_METAL_EVENTS 1
#endif
-#ifndef MVK_ALLOW_METAL_FENCES
-# define MVK_ALLOW_METAL_FENCES 0
+#ifndef MVK_ALLOW_SINGLE_QUEUE_SEMAPHORE
+# define MVK_ALLOW_SINGLE_QUEUE_SEMAPHORE 1
#endif
/** Substitute Metal 2D textures for Vulkan 1D images. Enabled by default. */