Get rid of the queue idle event, too.

Put the fence back on the last submission.

This mostly reverts the last of the original change, save for a few
formatting changes and deletions of unused cruft.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index cead7ae..6f74255 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -127,24 +127,17 @@
 	void initMTLCommandQueue();
 	void initGPUCaptureScopes();
 	void destroyExecQueue();
-	void unlockQueue();
-	MVKSemaphoreImpl* addNewEvent(uint32_t submitCount);
-	void removeEvent(MVKSemaphoreImpl* event);
 	VkResult submit(MVKQueueSubmission* qSubmit);
 
 	MVKQueueFamily* _queueFamily;
 	uint32_t _index;
 	float _priority;
 	dispatch_queue_t _execQueue;
-	dispatch_queue_t _fenceQueue;
 	id<MTLCommandQueue> _mtlQueue;
 	std::string _name;
 	MVKMTLCommandBufferID _nextMTLCmdBuffID;
 	MVKGPUCaptureScope* _submissionCaptureScope;
 	MVKGPUCaptureScope* _presentationCaptureScope;
-	std::mutex _activeCountLock;
-	std::atomic<uint32_t> _activeCount;
-	std::unordered_set<MVKSemaphoreImpl*> _pendingSubmitDoneEvents;
 };
 
 
@@ -197,11 +190,9 @@
 	MVKQueueCommandBufferSubmission(MVKDevice* device,
 									MVKQueue* queue,
 									const VkSubmitInfo* pSubmit,
+									VkFence fence,
                                     MVKCommandUse cmdBuffUse);
 
-    /** Destructor. */
-    ~MVKQueueCommandBufferSubmission() override;
-
 protected:
 	friend MVKCommandBuffer;
 
@@ -212,6 +203,7 @@
 
 	MVKVectorInline<MVKCommandBuffer*, 16> _cmdBuffers;
 	MVKVectorInline<MVKSemaphore*, 16> _signalSemaphores;
+	MVKFence* _fence;
     MVKCommandUse _cmdBuffUse;
 	id<MTLCommandBuffer> _activeMTLCommandBuffer;
 };
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index 1e674c7..8257f2b 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -86,20 +86,16 @@
 VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits,
                           VkFence fence, MVKCommandUse cmdBuffUse) {
 
-    VkResult rslt = VK_SUCCESS;
-    MVKSemaphoreImpl* submitDoneEvent = addNewEvent(submitCount);
-    for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) {
-        VkResult subRslt = submit(new MVKQueueCommandBufferSubmission(_device, this, &pSubmits[sIdx], cmdBuffUse));
-		if (rslt == VK_SUCCESS) { rslt = subRslt; }
+    // Fence-only submission
+    if (submitCount == 0 && fence) {
+        return submit(new MVKQueueCommandBufferSubmission(_device, this, nullptr, fence, cmdBuffUse));
     }
-    if (rslt == VK_SUCCESS && fence) {
-        // Fence must wait for all completion blocks to finish.
-        dispatch_async(_execQueue ? _execQueue : _fenceQueue, ^{
-            submitDoneEvent->wait();
-            ((MVKFence*)fence)->signal();
-            this->removeEvent(submitDoneEvent);
-            delete submitDoneEvent;
-        });
+
+    VkResult rslt = VK_SUCCESS;
+    for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) {
+        VkFence fenceOrNil = (sIdx == (submitCount - 1)) ? fence : VK_NULL_HANDLE; // last one gets the fence
+        VkResult subRslt = submit(new MVKQueueCommandBufferSubmission(_device, this, &pSubmits[sIdx], fenceOrNil, cmdBuffUse));
+        if (rslt == VK_SUCCESS) { rslt = subRslt; }
     }
     return rslt;
 }
@@ -108,34 +104,6 @@
 	return submit(new MVKQueuePresentSurfaceSubmission(_device, this, pPresentInfo));
 }
 
-// Decrements the queue activation count and all known events.
-void MVKQueue::unlockQueue() {
-	lock_guard<mutex> lock(_activeCountLock);
-	for (auto iter = _pendingSubmitDoneEvents.begin(), end = _pendingSubmitDoneEvents.end(); iter != end; ) {
-		if ((*iter)->release()) {
-			iter = _pendingSubmitDoneEvents.erase(iter);
-		} else {
-			++iter;
-		}
-	}
-	--_activeCount;
-}
-
-// Adds a new event with the current active count plus any additional submissions.
-MVKSemaphoreImpl* MVKQueue::addNewEvent(uint32_t submitCount) {
-    lock_guard<mutex> lock(_activeCountLock);
-    _activeCount += submitCount;
-    auto* submitDoneEvent = new MVKSemaphoreImpl(true, _activeCount);
-    _pendingSubmitDoneEvents.insert(submitDoneEvent);
-    return submitDoneEvent;
-}
-
-// Removes an event from the set of pending submission completion events.
-void MVKQueue::removeEvent(MVKSemaphoreImpl* event) {
-    lock_guard<mutex> lock(_activeCountLock);
-    _pendingSubmitDoneEvents.erase(event);
-}
-
 // Create an empty submit struct and fence, submit to queue and wait on fence.
 VkResult MVKQueue::waitIdle(MVKCommandUse cmdBuffUse) {
 
@@ -148,11 +116,7 @@
 	MVKFence mvkFence(_device, &vkFenceInfo);
 	VkFence fence = (VkFence)&mvkFence;
 	submit(0, nullptr, fence, cmdBuffUse);
-	VkResult rslt = mvkWaitForFences(1, &fence, false);
-	if (rslt != VK_SUCCESS)
-		return rslt;
-
-	return VK_SUCCESS;
+	return mvkWaitForFences(1, &fence, false);
 }
 
 
@@ -161,7 +125,7 @@
 #define MVK_DISPATCH_QUEUE_QOS_CLASS		QOS_CLASS_USER_INITIATED
 
 MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority)
-        : MVKDispatchableDeviceObject(device), _activeCount(0) {
+        : MVKDispatchableDeviceObject(device) {
 
 	_queueFamily = queueFamily;
 	_index = index;
@@ -191,11 +155,6 @@
 
 		// Create the dispatch queue
 		_execQueue = dispatch_queue_create((getName() + "-Dispatch").c_str(), dqAttr);		// retained
-	} else {
-		// Create a queue just for fence updates.
-		dispatch_queue_attr_t dqAttr = dispatch_queue_attr_make_with_qos_class(DISPATCH_QUEUE_CONCURRENT, QOS_CLASS_UTILITY, QOS_MIN_RELATIVE_PRIORITY);
-
-		_fenceQueue = dispatch_queue_create((getName() + "-Fence").c_str(), dqAttr);	// retained
 	}
 }
 
@@ -271,9 +230,9 @@
     // Submit each command buffer.
 	for (auto& cb : _cmdBuffers) { cb->submit(this); }
 
-	// If semaphores were provided, ensure that a MTLCommandBuffer is available
-	// to trigger them, in case no command buffers were provided.
-	if (!_signalSemaphores.empty()) { getActiveMTLCommandBuffer(); }
+	// If a fence or semaphores were provided, ensure that a MTLCommandBuffer
+	// is available to trigger them, in case no command buffers were provided.
+	if (_fence || !_signalSemaphores.empty()) { getActiveMTLCommandBuffer(); }
 
 	// Commit the last MTLCommandBuffer.
 	// Nothing after this because callback might destroy this instance before this function ends.
@@ -332,12 +291,16 @@
 	// Signal each of the signal semaphores.
     for (auto& ss : _signalSemaphores) { ss->signal(); }
 
+	// If a fence exists, signal it.
+	if (_fence) { _fence->signal(); }
+
     this->destroy();
 }
 
 MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKDevice* device,
 																 MVKQueue* queue,
 																 const VkSubmitInfo* pSubmit,
+																 VkFence fence,
                                                                  MVKCommandUse cmdBuffUse)
         : MVKQueueSubmission(device,
 							 queue,
@@ -361,6 +324,7 @@
         }
     }
 
+	_fence = (MVKFence*)fence;
     _cmdBuffUse= cmdBuffUse;
 	_activeMTLCommandBuffer = nil;
 
@@ -368,10 +332,6 @@
 //	MVKLogDebug("Creating submission %p. Submission count %u.", this, ++_subCount);
 }
 
-MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() {
-	_queue->unlockQueue();
-}
-
 
 #pragma mark -
 #pragma mark MVKQueuePresentSurfaceSubmission