MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm - external/github.com/KhronosGroup/MoltenVK - Git at Google

 /*
  * MVKQueue.mm
  *
  * Copyright (c) 2014-2018 The Brenwill Workshop Ltd. (http://www.brenwill.com)
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "MVKQueue.h"
 #include "MVKSwapchain.h"
 #include "MVKSync.h"
 #include "MVKFoundation.h"
 #include "MVKOSExtensions.h"
 #include "MVKLogging.h"

 using namespace std;


 #pragma mark -
 #pragma mark MVKQueueFamily

 // MTLCommandQueues are cached in MVKQueueFamily/MVKPhysicalDevice because they are very
 // limited in number. An app that creates multiple VkDevices over time (such as a test suite)
 // will soon find 15 second delays when creating subsequent MTLCommandQueues.
 id<MTLCommandQueue> MVKQueueFamily::getMTLCommandQueue(uint32_t queueIndex) {
 	lock_guard<mutex> lock(_qLock);
 	id<MTLCommandQueue> mtlQ = _mtlQueues[queueIndex];
 	if ( !mtlQ ) {
 		mtlQ = [_physicalDevice->getMTLDevice() newCommandQueue];	// retained
 		_mtlQueues[queueIndex] = mtlQ;
 	}
 	return mtlQ;
 }

 MVKQueueFamily::MVKQueueFamily(MVKPhysicalDevice* physicalDevice, uint32_t queueFamilyIndex, const VkQueueFamilyProperties* pProperties) {
 	_physicalDevice = physicalDevice;
 	_queueFamilyIndex = queueFamilyIndex;
 	_properties = *pProperties;
 	_mtlQueues.assign(_properties.queueCount, nil);
 }

 MVKQueueFamily::~MVKQueueFamily() {
 	mvkReleaseContainerContents(_mtlQueues);
 }


 #pragma mark -
 #pragma mark MVKQueue


 #pragma mark Queue submissions

 // Executes the submmission, either immediately, or by dispatching to an execution queue.
 // Submissions to the execution queue are wrapped in a dedicated autorelease pool.
 // Relying on the dispatch queue to find time to drain the autorelease pool can
 // result in significant memory creep under heavy workloads.
 VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) {
 	if ( !qSubmit ) { return VK_SUCCESS; }     // Ignore nils

 	VkResult rslt = qSubmit->_submissionResult;     // Extract result before submission to avoid race condition with early destruction
 	if (_execQueue) {
 		dispatch_async(_execQueue, ^{ @autoreleasepool { qSubmit->execute(); } } );
 	} else {
 		qSubmit->execute();
 	}
 	return rslt;
 }

 VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits,
                           VkFence fence, MVKCommandUse cmdBuffUse) {

 	// Fence-only submission
 	if (submitCount == 0 && fence) {
 		return submit(new MVKQueueCommandBufferSubmission(_device, this, VK_NULL_HANDLE, fence, cmdBuffUse));
 	}

 	VkResult rslt = VK_SUCCESS;
     for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) {
         VkFence fenceOrNil = (sIdx == (submitCount - 1)) ? fence : VK_NULL_HANDLE;	// last one gets the fence
         VkResult subRslt = submit(new MVKQueueCommandBufferSubmission(_device, this, &pSubmits[sIdx], fenceOrNil, cmdBuffUse));
 		if (rslt == VK_SUCCESS) { rslt = subRslt; }
     }
     return rslt;
 }

 VkResult MVKQueue::submit(const VkPresentInfoKHR* pPresentInfo) {
 	return submit(new MVKQueuePresentSurfaceSubmission(_device, this, pPresentInfo));
 }

 // Create an empty submit struct and fence, submit to queue and wait on fence.
 VkResult MVKQueue::waitIdle(MVKCommandUse cmdBuffUse) {

 	VkSubmitInfo vkSbmtInfo = {
 		.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
 		.pNext = NULL,
 		.waitSemaphoreCount = 0,
 		.pWaitSemaphores = VK_NULL_HANDLE,
 		.commandBufferCount = 0,
 		.pCommandBuffers = VK_NULL_HANDLE,
 		.signalSemaphoreCount = 0,
 		.pSignalSemaphores = VK_NULL_HANDLE
 	};

 	VkFenceCreateInfo vkFenceInfo = {
 		.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
 		.pNext = NULL,
 		.flags = 0,
 	};

 	MVKFence mvkFence(_device, &vkFenceInfo);
 	VkFence fence = (VkFence)&mvkFence;
 	submit(1, &vkSbmtInfo, fence, cmdBuffUse);
 	return mvkWaitForFences(1, &fence, false);
 }

 // This function is guarded against conflict with the mtlCommandBufferHasCompleted()
 // function, but is not theadsafe against calls to this function itself, or to the
 // registerMTLCommandBufferCountdown() function from multiple threads. It is assumed
 // that this function and the registerMTLCommandBufferCountdown() function are called
 // from a single thread.
 id<MTLCommandBuffer> MVKQueue::makeMTLCommandBuffer(NSString* mtlCmdBuffLabel) {

 	// Retrieve a MTLCommandBuffer from the MTLCommandQueue.
 	id<MTLCommandBuffer> mtlCmdBuffer = [_mtlQueue commandBufferWithUnretainedReferences];
     mtlCmdBuffer.label = mtlCmdBuffLabel;

 	// Assign a unique ID to the MTLCommandBuffer, and track when it completes.
     MVKMTLCommandBufferID mtlCmdBuffID = _nextMTLCmdBuffID++;
 	[mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer> mtlCmdBuff) {
 		this->mtlCommandBufferHasCompleted(mtlCmdBuff, mtlCmdBuffID);
 	}];

     // Keep a running count of the active MTLCommandBuffers.
     // This needs to be guarded against a race condition with a MTLCommandBuffer completing.
     lock_guard<mutex> lock(_completionLock);
 	_activeMTLCommandBufferCount++;

 	return mtlCmdBuffer;
 }

 // This function must be called after all corresponding calls to makeMTLCommandBuffer() and from the same thead.
 void MVKQueue::registerMTLCommandBufferCountdown(MVKMTLCommandBufferCountdown* countdown) {
 	lock_guard<mutex> lock(_completionLock);

 	if ( !countdown->setActiveMTLCommandBufferCount(_activeMTLCommandBufferCount, _nextMTLCmdBuffID) ) {
 		_completionCountdowns.push_back(countdown);
 	}
 }

 void MVKQueue::mtlCommandBufferHasCompleted(id<MTLCommandBuffer> mtlCmdBuff, MVKMTLCommandBufferID mtlCmdBuffID) {
 	lock_guard<mutex> lock(_completionLock);

 	_activeMTLCommandBufferCount--;

 	// Iterate through the countdowns, letting them know about the completion, and
 	// remove any countdowns that have completed by eliding them out of the array.
 	uint32_t ccCnt = (uint32_t)_completionCountdowns.size();
 	uint32_t currCCIdx = 0;
 	for (uint32_t ccIdx = 0; ccIdx < ccCnt; ccIdx++) {
 		MVKMTLCommandBufferCountdown* mvkCD = _completionCountdowns[ccIdx];
 		if ( !mvkCD->mtlCommandBufferHasCompleted(mtlCmdBuffID) ) {
 			// Only retain the countdown if it has not just completed.
 			// Move it forward in the array if any preceding countdowns have been removed.
 			if (currCCIdx != ccIdx) { _completionCountdowns[currCCIdx] = mvkCD; }
 			currCCIdx++;
 		}
 	}
 	// If any countdowns were removed, clear out the extras at the end
 	if (currCCIdx < ccCnt) { _completionCountdowns.resize(currCCIdx); }
 }


 #pragma mark Construction

 #define MVK_DISPATCH_QUEUE_QOS_CLASS		QOS_CLASS_USER_INITIATED

 MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority)
         : MVKDispatchableDeviceObject(device), _commandEncodingPool(device) {

 	_queueFamily = queueFamily;
 	_index = index;
 	_priority = priority;
 	initExecQueue();
 	initMTLCommandQueue();
 	_activeMTLCommandBufferCount = 0;
 	_nextMTLCmdBuffID = 1;
 }

 // Unless synchronous submission processing was configured,
 // creates and initializes the prioritized execution dispatch queue.
 void MVKQueue::initExecQueue() {
 	if (_device->_mvkConfig.synchronousQueueSubmits) {
 		_execQueue = nullptr;
 	} else {
 		// Create a name for the dispatch queue
 		const char* dqNameFmt = "MoltenVKDispatchQueue-%d-%d-%.1f";
 		char dqName[strlen(dqNameFmt) + 32];
 		sprintf(dqName, dqNameFmt, _queueFamily->getIndex(), _index, _priority);

 		// Determine the dispatch queue priority
 		dispatch_qos_class_t dqQOS = MVK_DISPATCH_QUEUE_QOS_CLASS;
 		int dqPriority = (1.0 - _priority) * QOS_MIN_RELATIVE_PRIORITY;
 		dispatch_queue_attr_t dqAttr = dispatch_queue_attr_make_with_qos_class(DISPATCH_QUEUE_SERIAL, dqQOS, dqPriority);

 		// Create the dispatch queue
 		_execQueue = dispatch_queue_create(dqName, dqAttr);		// retained
 	}
 }

 /** Creates and initializes the Metal queue. */
 void MVKQueue::initMTLCommandQueue() {
 	uint64_t startTime = _device->getPerformanceTimestamp();
 	_mtlQueue = _queueFamily->getMTLCommandQueue(_index);	// not retained (cached in queue family)
 	_device->addActivityPerformance(_device->_performanceStatistics.queue.mtlQueueAccess, startTime);
     [_mtlQueue insertDebugCaptureBoundary];                 // Allow Xcode to capture the first frame if desired.
 }

 MVKQueue::~MVKQueue() {
     // Delay destroying this queue until registerMTLCommandBufferCountdown() is done.
     // registerMTLCommandBufferCountdown() can trigger a queue submission to finish(),
     // which may trigger semaphores that control a queue waitIdle(). If that waitIdle()
     // is being called by the app just prior to device and queue destruction, a rare race
     // condition exists if registerMTLCommandBufferCountdown() does not complete before
     // this queue is destroyed. If _completionLock is destroyed along with this queue,
     // before registerMTLCommandBufferCountdown() completes, a SIGABRT crash will arise
     // in the destructor of the lock created in registerMTLCommandBufferCountdown().
     lock_guard<mutex> lock(_completionLock);
 	destroyExecQueue();
 }

 // Destroys the execution dispatch queue.
 void MVKQueue::destroyExecQueue() {
 	if (_execQueue) { dispatch_release(_execQueue); }
 }


 #pragma mark -
 #pragma mark MVKQueueCommandBufferSubmissionCountdown

 MVKQueueCommandBufferSubmissionCountdown::MVKQueueCommandBufferSubmissionCountdown(MVKQueueCommandBufferSubmission* qSub) {
 	_qSub = qSub;
 }

 void MVKQueueCommandBufferSubmissionCountdown::finish() { _qSub->finish(); }


 #pragma mark -
 #pragma mark MVKQueueSubmission

 MVKQueueSubmission::MVKQueueSubmission(MVKDevice* device,
 									   MVKQueue* queue,
 									   uint32_t waitSemaphoreCount,
 									   const VkSemaphore* pWaitSemaphores) : MVKBaseDeviceObject(device) {
 	_queue = queue;
 	_prev = VK_NULL_HANDLE;
 	_next = VK_NULL_HANDLE;
 	_submissionResult = VK_SUCCESS;

 	_isAwaitingSemaphores = waitSemaphoreCount > 0;
 	_waitSemaphores.reserve(waitSemaphoreCount);
 	for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
 		_waitSemaphores.push_back((MVKSemaphore*)pWaitSemaphores[i]);
 	}
 }

 void MVKQueueSubmission::recordResult(VkResult vkResult) {
     if (_submissionResult == VK_SUCCESS) { _submissionResult = vkResult; }
 }


 #pragma mark -
 #pragma mark MVKQueueCommandBufferSubmission

 std::atomic<uint32_t> _subCount;

 void MVKQueueCommandBufferSubmission::execute() {

 //	MVKLogDebug("Executing submission %p.", this);

     // Execute each command buffer, or if no command buffers, but a fence or semaphores,
     // create an empty MTLCommandBuffer to trigger the semaphores and fence.
     if ( !_cmdBuffers.empty() ) {
 		MVKCommandBufferBatchPosition cmdBuffPos = {1, uint32_t(_cmdBuffers.size()), _cmdBuffUse};
 		for (auto& cb : _cmdBuffers) {
 			cb->execute(this, cmdBuffPos);
 			cmdBuffPos.index++;
 		}
     } else {
 		if (_fence || !_signalSemaphores.empty() ) {
 			getActiveMTLCommandBuffer();
 		}
     }

 	commitActiveMTLCommandBuffer();

     // Register for callback when MTLCommandBuffers have completed
     _queue->registerMTLCommandBufferCountdown(&_cmdBuffCountdown);
 }

 id<MTLCommandBuffer> MVKQueueCommandBufferSubmission::getActiveMTLCommandBuffer() {
 	if ( !_activeMTLCommandBuffer ) {
 		_activeMTLCommandBuffer = _queue->makeMTLCommandBuffer(getMTLCommandBufferName());
 		[_activeMTLCommandBuffer enqueue];
 	}
 	return _activeMTLCommandBuffer;
 }

 void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer() {

 	// Wait on each wait semaphore in turn. It doesn't matter which order they are signalled.
 	// We have delayed this as long as possible to allow as much filling of the MTLCommandBuffer
 	// as possible before forcing a wait. We only wait for each semaphore once per submission.
 	if (_isAwaitingSemaphores) {
 		_isAwaitingSemaphores = false;
 		for (auto& ws : _waitSemaphores) { ws->wait(); }
 	}

 	[_activeMTLCommandBuffer commit];
 	_activeMTLCommandBuffer = nil;			// not retained
 }

 // Returns an NSString suitable for use as a label
 NSString* MVKQueueCommandBufferSubmission::getMTLCommandBufferName() {
     switch (_cmdBuffUse) {
         case kMVKCommandUseQueueSubmit:
             return [NSString stringWithFormat: @"%@ (virtual for sync)", mvkMTLCommandBufferLabel(_cmdBuffUse)];
         default:
             return mvkMTLCommandBufferLabel(_cmdBuffUse);
     }
 }

 void MVKQueueCommandBufferSubmission::finish() {

 //	MVKLogDebug("Finishing submission %p. Submission count %u.", this, _subCount--);

 	// Signal each of the signal semaphores.
     for (auto& ss : _signalSemaphores) { ss->signal(); }

     // If a fence exists, signal it.
     if (_fence) { _fence->signal(); }

     this->destroy();
 }

 MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKDevice* device,
 																 MVKQueue* queue,
 																 const VkSubmitInfo* pSubmit,
                                                                  VkFence fence,
                                                                  MVKCommandUse cmdBuffUse)
         : MVKQueueSubmission(device,
 							 queue,
 							 (pSubmit ? pSubmit->waitSemaphoreCount : 0),
 							 (pSubmit ? pSubmit->pWaitSemaphores : nullptr)), _cmdBuffCountdown(this) {

     // pSubmit can be null if just tracking the fence alone
     if (pSubmit) {
         uint32_t cbCnt = pSubmit->commandBufferCount;
         _cmdBuffers.reserve(cbCnt);
         for (uint32_t i = 0; i < cbCnt; i++) {
             MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]);
             _cmdBuffers.push_back(cb);
             recordResult(cb->getRecordingResult());
         }

         uint32_t ssCnt = pSubmit->signalSemaphoreCount;
         _signalSemaphores.reserve(ssCnt);
         for (uint32_t i = 0; i < ssCnt; i++) {
             _signalSemaphores.push_back((MVKSemaphore*)pSubmit->pSignalSemaphores[i]);
         }
     }

 	_fence = (MVKFence*)fence;
     _cmdBuffUse= cmdBuffUse;
 	_activeMTLCommandBuffer = nil;

 //	MVKLogDebug("Creating submission %p. Submission count %u.", this, ++_subCount);
 }


 #pragma mark -
 #pragma mark MVKQueuePresentSurfaceSubmission

 #define MVK_PRESENT_VIA_CMD_BUFFER		0

 void MVKQueuePresentSurfaceSubmission::execute() {
     id<MTLCommandQueue> mtlQ = _queue->getMTLCommandQueue();

 	if (_device->_mvkConfig.presentWithCommandBuffer || _device->_mvkConfig.displayWatermark) {
 		// Create a command buffer, present surfaces via the command buffer,
 		// then wait on the semaphores before committing.
 		id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
 		mtlCmdBuff.label = mvkMTLCommandBufferLabel(kMVKCommandUseQueuePresent);
 		[mtlCmdBuff enqueue];

 		for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(mtlCmdBuff); }
 		for (auto& ws : _waitSemaphores) { ws->wait(); }

 		[mtlCmdBuff commit];
 	} else {
 		// Wait on semaphores, then present directly.
 		for (auto& ws : _waitSemaphores) { ws->wait(); }
 		for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(nil); }
 	}

     // Let Xcode know the frame is done, in case command buffer is not used
     if (_device->_mvkConfig.debugMode) { [mtlQ insertDebugCaptureBoundary]; }

     this->destroy();
 }

 MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKDevice* device,
 																   MVKQueue* queue,
 																   const VkPresentInfoKHR* pPresentInfo)
 		: MVKQueueSubmission(device,
 							 queue,
 							 pPresentInfo->waitSemaphoreCount,
 							 pPresentInfo->pWaitSemaphores) {

 	// Populate the array of swapchain images, testing each one for a change in surface size
 	_surfaceImages.reserve(pPresentInfo->swapchainCount);
 	for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
 		MVKSwapchain* mvkSC = (MVKSwapchain*)pPresentInfo->pSwapchains[i];
 		_surfaceImages.push_back(mvkSC->getImage(pPresentInfo->pImageIndices[i]));
 		if (mvkSC->getHasSurfaceSizeChanged()) {
 			_submissionResult = VK_ERROR_OUT_OF_DATE_KHR;
 		}
 	}
 }
	/*
	* MVKQueue.mm
	*
	* Copyright (c) 2014-2018 The Brenwill Workshop Ltd. (http://www.brenwill.com)
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "MVKQueue.h"
	#include "MVKSwapchain.h"
	#include "MVKSync.h"
	#include "MVKFoundation.h"
	#include "MVKOSExtensions.h"
	#include "MVKLogging.h"

	using namespace std;


	#pragma mark -
	#pragma mark MVKQueueFamily

	// MTLCommandQueues are cached in MVKQueueFamily/MVKPhysicalDevice because they are very
	// limited in number. An app that creates multiple VkDevices over time (such as a test suite)
	// will soon find 15 second delays when creating subsequent MTLCommandQueues.
	id<MTLCommandQueue> MVKQueueFamily::getMTLCommandQueue(uint32_t queueIndex) {
	lock_guard<mutex> lock(_qLock);
	id<MTLCommandQueue> mtlQ = _mtlQueues[queueIndex];
	if ( !mtlQ ) {
	mtlQ = [_physicalDevice->getMTLDevice() newCommandQueue]; // retained
	_mtlQueues[queueIndex] = mtlQ;
	}
	return mtlQ;
	}

	MVKQueueFamily::MVKQueueFamily(MVKPhysicalDevice* physicalDevice, uint32_t queueFamilyIndex, const VkQueueFamilyProperties* pProperties) {
	_physicalDevice = physicalDevice;
	_queueFamilyIndex = queueFamilyIndex;
	_properties = *pProperties;
	_mtlQueues.assign(_properties.queueCount, nil);
	}

	MVKQueueFamily::~MVKQueueFamily() {
	mvkReleaseContainerContents(_mtlQueues);
	}


	#pragma mark -
	#pragma mark MVKQueue


	#pragma mark Queue submissions

	// Executes the submmission, either immediately, or by dispatching to an execution queue.
	// Submissions to the execution queue are wrapped in a dedicated autorelease pool.
	// Relying on the dispatch queue to find time to drain the autorelease pool can
	// result in significant memory creep under heavy workloads.
	VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) {
	if ( !qSubmit ) { return VK_SUCCESS; } // Ignore nils

	VkResult rslt = qSubmit->_submissionResult; // Extract result before submission to avoid race condition with early destruction
	if (_execQueue) {
	dispatch_async(_execQueue, ^{ @autoreleasepool { qSubmit->execute(); } } );
	} else {
	qSubmit->execute();
	}
	return rslt;
	}

	VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits,
	VkFence fence, MVKCommandUse cmdBuffUse) {

	// Fence-only submission
	if (submitCount == 0 && fence) {
	return submit(new MVKQueueCommandBufferSubmission(_device, this, VK_NULL_HANDLE, fence, cmdBuffUse));
	}

	VkResult rslt = VK_SUCCESS;
	for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) {
	VkFence fenceOrNil = (sIdx == (submitCount - 1)) ? fence : VK_NULL_HANDLE; // last one gets the fence
	VkResult subRslt = submit(new MVKQueueCommandBufferSubmission(_device, this, &pSubmits[sIdx], fenceOrNil, cmdBuffUse));
	if (rslt == VK_SUCCESS) { rslt = subRslt; }
	}
	return rslt;
	}

	VkResult MVKQueue::submit(const VkPresentInfoKHR* pPresentInfo) {
	return submit(new MVKQueuePresentSurfaceSubmission(_device, this, pPresentInfo));
	}

	// Create an empty submit struct and fence, submit to queue and wait on fence.
	VkResult MVKQueue::waitIdle(MVKCommandUse cmdBuffUse) {

	VkSubmitInfo vkSbmtInfo = {
	.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
	.pNext = NULL,
	.waitSemaphoreCount = 0,
	.pWaitSemaphores = VK_NULL_HANDLE,
	.commandBufferCount = 0,
	.pCommandBuffers = VK_NULL_HANDLE,
	.signalSemaphoreCount = 0,
	.pSignalSemaphores = VK_NULL_HANDLE
	};

	VkFenceCreateInfo vkFenceInfo = {
	.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
	.pNext = NULL,
	.flags = 0,
	};

	MVKFence mvkFence(_device, &vkFenceInfo);
	VkFence fence = (VkFence)&mvkFence;
	submit(1, &vkSbmtInfo, fence, cmdBuffUse);
	return mvkWaitForFences(1, &fence, false);
	}

	// This function is guarded against conflict with the mtlCommandBufferHasCompleted()
	// function, but is not theadsafe against calls to this function itself, or to the
	// registerMTLCommandBufferCountdown() function from multiple threads. It is assumed
	// that this function and the registerMTLCommandBufferCountdown() function are called
	// from a single thread.
	id<MTLCommandBuffer> MVKQueue::makeMTLCommandBuffer(NSString* mtlCmdBuffLabel) {

	// Retrieve a MTLCommandBuffer from the MTLCommandQueue.
	id<MTLCommandBuffer> mtlCmdBuffer = [_mtlQueue commandBufferWithUnretainedReferences];
	mtlCmdBuffer.label = mtlCmdBuffLabel;

	// Assign a unique ID to the MTLCommandBuffer, and track when it completes.
	MVKMTLCommandBufferID mtlCmdBuffID = _nextMTLCmdBuffID++;
	[mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer> mtlCmdBuff) {
	this->mtlCommandBufferHasCompleted(mtlCmdBuff, mtlCmdBuffID);
	}];

	// Keep a running count of the active MTLCommandBuffers.
	// This needs to be guarded against a race condition with a MTLCommandBuffer completing.
	lock_guard<mutex> lock(_completionLock);
	_activeMTLCommandBufferCount++;

	return mtlCmdBuffer;
	}

	// This function must be called after all corresponding calls to makeMTLCommandBuffer() and from the same thead.
	void MVKQueue::registerMTLCommandBufferCountdown(MVKMTLCommandBufferCountdown* countdown) {
	lock_guard<mutex> lock(_completionLock);

	if ( !countdown->setActiveMTLCommandBufferCount(_activeMTLCommandBufferCount, _nextMTLCmdBuffID) ) {
	_completionCountdowns.push_back(countdown);
	}
	}

	void MVKQueue::mtlCommandBufferHasCompleted(id<MTLCommandBuffer> mtlCmdBuff, MVKMTLCommandBufferID mtlCmdBuffID) {
	lock_guard<mutex> lock(_completionLock);

	_activeMTLCommandBufferCount--;

	// Iterate through the countdowns, letting them know about the completion, and
	// remove any countdowns that have completed by eliding them out of the array.
	uint32_t ccCnt = (uint32_t)_completionCountdowns.size();
	uint32_t currCCIdx = 0;
	for (uint32_t ccIdx = 0; ccIdx < ccCnt; ccIdx++) {
	MVKMTLCommandBufferCountdown* mvkCD = _completionCountdowns[ccIdx];
	if ( !mvkCD->mtlCommandBufferHasCompleted(mtlCmdBuffID) ) {
	// Only retain the countdown if it has not just completed.
	// Move it forward in the array if any preceding countdowns have been removed.
	if (currCCIdx != ccIdx) { _completionCountdowns[currCCIdx] = mvkCD; }
	currCCIdx++;
	}
	}
	// If any countdowns were removed, clear out the extras at the end
	if (currCCIdx < ccCnt) { _completionCountdowns.resize(currCCIdx); }
	}


	#pragma mark Construction

	#define MVK_DISPATCH_QUEUE_QOS_CLASS QOS_CLASS_USER_INITIATED

	MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority)
	: MVKDispatchableDeviceObject(device), _commandEncodingPool(device) {

	_queueFamily = queueFamily;
	_index = index;
	_priority = priority;
	initExecQueue();
	initMTLCommandQueue();
	_activeMTLCommandBufferCount = 0;
	_nextMTLCmdBuffID = 1;
	}

	// Unless synchronous submission processing was configured,
	// creates and initializes the prioritized execution dispatch queue.
	void MVKQueue::initExecQueue() {
	if (_device->_mvkConfig.synchronousQueueSubmits) {
	_execQueue = nullptr;
	} else {
	// Create a name for the dispatch queue
	const char* dqNameFmt = "MoltenVKDispatchQueue-%d-%d-%.1f";
	char dqName[strlen(dqNameFmt) + 32];
	sprintf(dqName, dqNameFmt, _queueFamily->getIndex(), _index, _priority);

	// Determine the dispatch queue priority
	dispatch_qos_class_t dqQOS = MVK_DISPATCH_QUEUE_QOS_CLASS;
	int dqPriority = (1.0 - _priority) * QOS_MIN_RELATIVE_PRIORITY;
	dispatch_queue_attr_t dqAttr = dispatch_queue_attr_make_with_qos_class(DISPATCH_QUEUE_SERIAL, dqQOS, dqPriority);

	// Create the dispatch queue
	_execQueue = dispatch_queue_create(dqName, dqAttr); // retained
	}
	}

	/** Creates and initializes the Metal queue. */
	void MVKQueue::initMTLCommandQueue() {
	uint64_t startTime = _device->getPerformanceTimestamp();
	_mtlQueue = _queueFamily->getMTLCommandQueue(_index); // not retained (cached in queue family)
	_device->addActivityPerformance(_device->_performanceStatistics.queue.mtlQueueAccess, startTime);
	[_mtlQueue insertDebugCaptureBoundary]; // Allow Xcode to capture the first frame if desired.
	}

	MVKQueue::~MVKQueue() {
	// Delay destroying this queue until registerMTLCommandBufferCountdown() is done.
	// registerMTLCommandBufferCountdown() can trigger a queue submission to finish(),
	// which may trigger semaphores that control a queue waitIdle(). If that waitIdle()
	// is being called by the app just prior to device and queue destruction, a rare race
	// condition exists if registerMTLCommandBufferCountdown() does not complete before
	// this queue is destroyed. If _completionLock is destroyed along with this queue,
	// before registerMTLCommandBufferCountdown() completes, a SIGABRT crash will arise
	// in the destructor of the lock created in registerMTLCommandBufferCountdown().
	lock_guard<mutex> lock(_completionLock);
	destroyExecQueue();
	}

	// Destroys the execution dispatch queue.
	void MVKQueue::destroyExecQueue() {
	if (_execQueue) { dispatch_release(_execQueue); }
	}


	#pragma mark -
	#pragma mark MVKQueueCommandBufferSubmissionCountdown

	MVKQueueCommandBufferSubmissionCountdown::MVKQueueCommandBufferSubmissionCountdown(MVKQueueCommandBufferSubmission* qSub) {
	_qSub = qSub;
	}

	void MVKQueueCommandBufferSubmissionCountdown::finish() { _qSub->finish(); }


	#pragma mark -
	#pragma mark MVKQueueSubmission

	MVKQueueSubmission::MVKQueueSubmission(MVKDevice* device,
	MVKQueue* queue,
	uint32_t waitSemaphoreCount,
	const VkSemaphore* pWaitSemaphores) : MVKBaseDeviceObject(device) {
	_queue = queue;
	_prev = VK_NULL_HANDLE;
	_next = VK_NULL_HANDLE;
	_submissionResult = VK_SUCCESS;

	_isAwaitingSemaphores = waitSemaphoreCount > 0;
	_waitSemaphores.reserve(waitSemaphoreCount);
	for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
	_waitSemaphores.push_back((MVKSemaphore*)pWaitSemaphores[i]);
	}
	}

	void MVKQueueSubmission::recordResult(VkResult vkResult) {
	if (_submissionResult == VK_SUCCESS) { _submissionResult = vkResult; }
	}


	#pragma mark -
	#pragma mark MVKQueueCommandBufferSubmission

	std::atomic<uint32_t> _subCount;

	void MVKQueueCommandBufferSubmission::execute() {

	// MVKLogDebug("Executing submission %p.", this);

	// Execute each command buffer, or if no command buffers, but a fence or semaphores,
	// create an empty MTLCommandBuffer to trigger the semaphores and fence.
	if ( !_cmdBuffers.empty() ) {
	MVKCommandBufferBatchPosition cmdBuffPos = {1, uint32_t(_cmdBuffers.size()), _cmdBuffUse};
	for (auto& cb : _cmdBuffers) {
	cb->execute(this, cmdBuffPos);
	cmdBuffPos.index++;
	}
	} else {
	if (_fence \|\| !_signalSemaphores.empty() ) {
	getActiveMTLCommandBuffer();
	}
	}

	commitActiveMTLCommandBuffer();

	// Register for callback when MTLCommandBuffers have completed
	_queue->registerMTLCommandBufferCountdown(&_cmdBuffCountdown);
	}

	id<MTLCommandBuffer> MVKQueueCommandBufferSubmission::getActiveMTLCommandBuffer() {
	if ( !_activeMTLCommandBuffer ) {
	_activeMTLCommandBuffer = _queue->makeMTLCommandBuffer(getMTLCommandBufferName());
	[_activeMTLCommandBuffer enqueue];
	}
	return _activeMTLCommandBuffer;
	}

	void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer() {

	// Wait on each wait semaphore in turn. It doesn't matter which order they are signalled.
	// We have delayed this as long as possible to allow as much filling of the MTLCommandBuffer
	// as possible before forcing a wait. We only wait for each semaphore once per submission.
	if (_isAwaitingSemaphores) {
	_isAwaitingSemaphores = false;
	for (auto& ws : _waitSemaphores) { ws->wait(); }
	}

	[_activeMTLCommandBuffer commit];
	_activeMTLCommandBuffer = nil; // not retained
	}

	// Returns an NSString suitable for use as a label
	NSString* MVKQueueCommandBufferSubmission::getMTLCommandBufferName() {
	switch (_cmdBuffUse) {
	case kMVKCommandUseQueueSubmit:
	return [NSString stringWithFormat: @"%@ (virtual for sync)", mvkMTLCommandBufferLabel(_cmdBuffUse)];
	default:
	return mvkMTLCommandBufferLabel(_cmdBuffUse);
	}
	}

	void MVKQueueCommandBufferSubmission::finish() {

	// MVKLogDebug("Finishing submission %p. Submission count %u.", this, _subCount--);

	// Signal each of the signal semaphores.
	for (auto& ss : _signalSemaphores) { ss->signal(); }

	// If a fence exists, signal it.
	if (_fence) { _fence->signal(); }

	this->destroy();
	}

	MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKDevice* device,
	MVKQueue* queue,
	const VkSubmitInfo* pSubmit,
	VkFence fence,
	MVKCommandUse cmdBuffUse)
	: MVKQueueSubmission(device,
	queue,
	(pSubmit ? pSubmit->waitSemaphoreCount : 0),
	(pSubmit ? pSubmit->pWaitSemaphores : nullptr)), _cmdBuffCountdown(this) {

	// pSubmit can be null if just tracking the fence alone
	if (pSubmit) {
	uint32_t cbCnt = pSubmit->commandBufferCount;
	_cmdBuffers.reserve(cbCnt);
	for (uint32_t i = 0; i < cbCnt; i++) {
	MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]);
	_cmdBuffers.push_back(cb);
	recordResult(cb->getRecordingResult());
	}

	uint32_t ssCnt = pSubmit->signalSemaphoreCount;
	_signalSemaphores.reserve(ssCnt);
	for (uint32_t i = 0; i < ssCnt; i++) {
	_signalSemaphores.push_back((MVKSemaphore*)pSubmit->pSignalSemaphores[i]);
	}
	}

	_fence = (MVKFence*)fence;
	_cmdBuffUse= cmdBuffUse;
	_activeMTLCommandBuffer = nil;

	// MVKLogDebug("Creating submission %p. Submission count %u.", this, ++_subCount);
	}


	#pragma mark -
	#pragma mark MVKQueuePresentSurfaceSubmission

	#define MVK_PRESENT_VIA_CMD_BUFFER 0

	void MVKQueuePresentSurfaceSubmission::execute() {
	id<MTLCommandQueue> mtlQ = _queue->getMTLCommandQueue();

	if (_device->_mvkConfig.presentWithCommandBuffer \|\| _device->_mvkConfig.displayWatermark) {
	// Create a command buffer, present surfaces via the command buffer,
	// then wait on the semaphores before committing.
	id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
	mtlCmdBuff.label = mvkMTLCommandBufferLabel(kMVKCommandUseQueuePresent);
	[mtlCmdBuff enqueue];

	for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(mtlCmdBuff); }
	for (auto& ws : _waitSemaphores) { ws->wait(); }

	[mtlCmdBuff commit];
	} else {
	// Wait on semaphores, then present directly.
	for (auto& ws : _waitSemaphores) { ws->wait(); }
	for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(nil); }
	}

	// Let Xcode know the frame is done, in case command buffer is not used
	if (_device->_mvkConfig.debugMode) { [mtlQ insertDebugCaptureBoundary]; }

	this->destroy();
	}

	MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKDevice* device,
	MVKQueue* queue,
	const VkPresentInfoKHR* pPresentInfo)
	: MVKQueueSubmission(device,
	queue,
	pPresentInfo->waitSemaphoreCount,
	pPresentInfo->pWaitSemaphores) {

	// Populate the array of swapchain images, testing each one for a change in surface size
	_surfaceImages.reserve(pPresentInfo->swapchainCount);
	for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
	MVKSwapchain* mvkSC = (MVKSwapchain*)pPresentInfo->pSwapchains[i];
	_surfaceImages.push_back(mvkSC->getImage(pPresentInfo->pImageIndices[i]));
	if (mvkSC->getHasSurfaceSizeChanged()) {
	_submissionResult = VK_ERROR_OUT_OF_DATE_KHR;
	}
	}
	}