Merge pull request #591 from cdavis5e/gpu-semaphores

MVKSemaphore: Use MTLEvent for device-side synchronization.
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index 2f57a2f..a97070d 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -55,7 +55,7 @@
 #define MVK_MAKE_VERSION(major, minor, patch)    (((major) * 10000) + ((minor) * 100) + (patch))
 #define MVK_VERSION     MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH)
 
-#define VK_MVK_MOLTENVK_SPEC_VERSION            19
+#define VK_MVK_MOLTENVK_SPEC_VERSION            20
 #define VK_MVK_MOLTENVK_EXTENSION_NAME          "VK_MVK_moltenvk"
 
 /**
@@ -524,6 +524,7 @@
 	VkBool32 arrayOfSamplers;			 	  	/**< If true, arrays of texture samplers is supported. */
 	MTLLanguageVersion mslVersionEnum;			/**< The version of the Metal Shading Language available on this device, as a Metal enumeration. */
 	VkBool32 depthSampleCompare;				/**< If true, depth texture samplers support the comparison of the pixel value against a reference value. */
+	VkBool32 events;							/**< If true, Metal synchronization events are supported. */
 } MVKPhysicalDeviceMetalFeatures;
 
 /**
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index b87cb40..123663a 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -735,6 +735,7 @@
 
 	if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily1_v5] ) {
 		_metalFeatures.mslVersionEnum = MTLLanguageVersion2_1;
+		_metalFeatures.events = true;
 	}
 
 	if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily3_v1] ) {
@@ -789,8 +790,9 @@
     }
 
     if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_macOS_GPUFamily1_v4] ) {
-		_metalFeatures.mslVersionEnum = MTLLanguageVersion2_1;
+        _metalFeatures.mslVersionEnum = MTLLanguageVersion2_1;
         _metalFeatures.multisampleArrayTextures = true;
+        _metalFeatures.events = true;
     }
 
 #endif
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index fdadf9d..d7b6891 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -17,6 +17,7 @@
  */
 
 #include "MVKImage.h"
+#include "MVKQueue.h"
 #include "MVKSwapchain.h"
 #include "MVKCommandBuffer.h"
 #include "mvk_datatypes.h"
@@ -1083,6 +1084,19 @@
 	if (_availability.isAvailable) {
 		_availability.isAvailable = false;
 		signal(signaler);
+		if (_device->_pMetalFeatures->events) {
+			// Unfortunately, we can't assume we have an MTLSharedEvent here.
+			// This means we need to execute a command on the device to signal
+			// the semaphore. Alternatively, we could always use an MTLSharedEvent,
+			// but that might impose unacceptable performance costs just to handle
+			// this one case.
+			MVKQueue* queue = _device->getQueue(0, 0);	
+			id<MTLCommandQueue> mtlQ = queue->getMTLCommandQueue();
+			id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
+			[mtlCmdBuff enqueue];
+			signaler.first->encodeSignal(mtlCmdBuff);
+			[mtlCmdBuff commit];
+		}
 		_preSignaled = signaler;
 	} else {
 		_availabilitySignalers.push_back(signaler);
@@ -1094,7 +1108,7 @@
 
 // Signal either or both of the semaphore and fence in the specified tracker pair.
 void MVKSwapchainImage::signal(MVKSwapchainSignaler& signaler) {
-	if (signaler.first) { signaler.first->signal(); }
+	if (signaler.first && !_device->_pMetalFeatures->events) { signaler.first->signal(); }
 	if (signaler.second) { signaler.second->signal(); }
 }
 
@@ -1147,6 +1161,10 @@
     if (mtlCmdBuff) {
         [mtlCmdBuff presentDrawable: mtlDrawable];
         resetMetalSurface();
+        if (_device->_pMetalFeatures->events && !_availabilitySignalers.empty()) {
+            // Signal the semaphore device-side.
+            _availabilitySignalers.front().first->encodeSignal(mtlCmdBuff);
+        }
         [mtlCmdBuff addCompletedHandler: ^(id<MTLCommandBuffer> mcb) { makeAvailable(); }];
     } else {
         [mtlDrawable present];
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index 6f74255..6ef0f08 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -206,6 +206,7 @@
 	MVKFence* _fence;
     MVKCommandUse _cmdBuffUse;
 	id<MTLCommandBuffer> _activeMTLCommandBuffer;
+	bool _isSignalingSemaphores;
 };
 
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index 8257f2b..e6ad5cf 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -227,12 +227,28 @@
 
 	_queue->_submissionCaptureScope->beginScope();
 
-    // Submit each command buffer.
+	// If the device supports it, wait for any semaphores on the device.
+	if (_device->_pMetalFeatures->events && _isAwaitingSemaphores) {
+		_isAwaitingSemaphores = false;
+		for (auto* ws : _waitSemaphores) {
+			ws->encodeWait(getActiveMTLCommandBuffer());
+		}
+	}
+
+	// Submit each command buffer.
 	for (auto& cb : _cmdBuffers) { cb->submit(this); }
 
 	// If a fence or semaphores were provided, ensure that a MTLCommandBuffer
 	// is available to trigger them, in case no command buffers were provided.
-	if (_fence || !_signalSemaphores.empty()) { getActiveMTLCommandBuffer(); }
+	if (_fence || _isSignalingSemaphores) { getActiveMTLCommandBuffer(); }
+
+	// If the device supports it, signal all semaphores on the device.
+	if (_device->_pMetalFeatures->events && _isSignalingSemaphores) {
+		_isSignalingSemaphores = false;
+		for (auto* ss : _signalSemaphores) {
+			ss->encodeSignal(getActiveMTLCommandBuffer());
+		}
+	}
 
 	// Commit the last MTLCommandBuffer.
 	// Nothing after this because callback might destroy this instance before this function ends.
@@ -289,7 +305,9 @@
 	_queue->_submissionCaptureScope->endScope();
 
 	// Signal each of the signal semaphores.
-    for (auto& ss : _signalSemaphores) { ss->signal(); }
+    if (_isSignalingSemaphores) {
+        for (auto& ss : _signalSemaphores) { ss->signal(); }
+    }
 
 	// If a fence exists, signal it.
 	if (_fence) { _fence->signal(); }
@@ -318,6 +336,7 @@
         }
 
         uint32_t ssCnt = pSubmit->signalSemaphoreCount;
+        _isSignalingSemaphores = ssCnt > 0;
         _signalSemaphores.reserve(ssCnt);
         for (uint32_t i = 0; i < ssCnt; i++) {
             _signalSemaphores.push_back((MVKSemaphore*)pSubmit->pSignalSemaphores[i]);
@@ -339,7 +358,20 @@
 void MVKQueuePresentSurfaceSubmission::execute() {
     id<MTLCommandQueue> mtlQ = _queue->getMTLCommandQueue();
 
-	if (_device->_pMVKConfig->presentWithCommandBuffer || _device->_pMVKConfig->displayWatermark) {
+	// If there are semaphores and this device supports MTLEvent, we must present
+	// with a command buffer in order to synchronize with the semaphores.
+	if (_device->_pMetalFeatures->events && !_waitSemaphores.empty()) {
+		// Create a command buffer, have it wait for the semaphores, then present
+		// surfaces via the command buffer.
+		id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
+		mtlCmdBuff.label = mvkMTLCommandBufferLabel(kMVKCommandUseQueuePresent);
+		[mtlCmdBuff enqueue];
+
+		for (auto& ws : _waitSemaphores) { ws->encodeWait(mtlCmdBuff); }
+		for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(mtlCmdBuff); }
+
+		[mtlCmdBuff commit];
+	} else if (_device->_pMVKConfig->presentWithCommandBuffer || _device->_pMVKConfig->displayWatermark) {
 		// Create a command buffer, present surfaces via the command buffer,
 		// then wait on the semaphores before committing.
 		id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
index be0cd1e..034d473 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h
@@ -19,6 +19,7 @@
 #pragma once
 
 #include "MVKDevice.h"
+#include <atomic>
 #include <mutex>
 #include <condition_variable>
 #include <unordered_set>
@@ -121,14 +122,23 @@
 	/** Signals the semaphore. Unblocks all waiting threads to continue processing. */
 	void signal();
 
+	/** Encodes an operation to block command buffer operation until this semaphore is signaled. */
+	void encodeWait(id<MTLCommandBuffer> cmdBuff);
+
+	/** Encodes an operation to signal the semaphore. */
+	void encodeSignal(id<MTLCommandBuffer> cmdBuff);
+
 
 #pragma mark Construction
 
-    MVKSemaphore(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo)
-        : MVKRefCountedDeviceObject(device), _blocker(false, 1) {}
+    MVKSemaphore(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo);
+
+    ~MVKSemaphore() override;
 
 protected:
 	MVKSemaphoreImpl _blocker;
+	id<MTLEvent> _mtlEvent;
+	std::atomic<uint64_t> _mtlEventValue;
 };
 
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
index d887366..94679d7 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm
@@ -84,6 +84,27 @@
     _blocker.release();
 }
 
+void MVKSemaphore::encodeWait(id<MTLCommandBuffer> cmdBuff) {
+    [cmdBuff encodeWaitForEvent: _mtlEvent value: _mtlEventValue];
+    ++_mtlEventValue;
+}
+
+void MVKSemaphore::encodeSignal(id<MTLCommandBuffer> cmdBuff) {
+    [cmdBuff encodeSignalEvent: _mtlEvent value: _mtlEventValue];
+}
+
+MVKSemaphore::MVKSemaphore(MVKDevice* device, const VkSemaphoreCreateInfo* pCreateInfo)
+    : MVKRefCountedDeviceObject(device), _blocker(false, 1), _mtlEvent(nil), _mtlEventValue(1) {
+
+    if (device->_pMetalFeatures->events) {
+        _mtlEvent = [device->getMTLDevice() newEvent];
+    }
+}
+
+MVKSemaphore::~MVKSemaphore() {
+    [_mtlEvent release];
+}
+
 
 #pragma mark -
 #pragma mark MVKFence