MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm - external/github.com/KhronosGroup/MoltenVK - Git at Google

 /*
  * MVKQueue.mm
  *
  * Copyright (c) 2014-2019 The Brenwill Workshop Ltd. (http://www.brenwill.com)
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "MVKInstance.h"
 #include "MVKQueue.h"
 #include "MVKSwapchain.h"
 #include "MVKSync.h"
 #include "MVKFoundation.h"
 #include "MVKOSExtensions.h"
 #include "MVKGPUCapture.h"
 #include "MVKLogging.h"

 using namespace std;


 #pragma mark -
 #pragma mark MVKQueueFamily

 // MTLCommandQueues are cached in MVKQueueFamily/MVKPhysicalDevice because they are very
 // limited in number. An app that creates multiple VkDevices over time (such as a test suite)
 // will soon find 15 second delays when creating subsequent MTLCommandQueues.
 id<MTLCommandQueue> MVKQueueFamily::getMTLCommandQueue(uint32_t queueIndex) {
 	lock_guard<mutex> lock(_qLock);
 	id<MTLCommandQueue> mtlQ = _mtlQueues[queueIndex];
 	if ( !mtlQ ) {
 		uint32_t maxCmdBuffs = _physicalDevice->getInstance()->getMoltenVKConfiguration()->maxActiveMetalCommandBuffersPerQueue;
 		mtlQ = [_physicalDevice->getMTLDevice() newCommandQueueWithMaxCommandBufferCount: maxCmdBuffs];		// retained
 		_mtlQueues[queueIndex] = mtlQ;
 	}
 	return mtlQ;
 }

 MVKQueueFamily::MVKQueueFamily(MVKPhysicalDevice* physicalDevice, uint32_t queueFamilyIndex, const VkQueueFamilyProperties* pProperties) {
 	_physicalDevice = physicalDevice;
 	_queueFamilyIndex = queueFamilyIndex;
 	_properties = *pProperties;
 	_mtlQueues.assign(_properties.queueCount, nil);
 }

 MVKQueueFamily::~MVKQueueFamily() {
 	mvkReleaseContainerContents(_mtlQueues);
 }


 #pragma mark -
 #pragma mark MVKQueue

 void MVKQueue::propogateDebugName() { setLabelIfNotNil(_mtlQueue, _debugName); }


 #pragma mark Queue submissions

 // Execute the queue submission under an autorelease pool to ensure transient Metal objects are autoreleased.
 // This is critical for apps that don't use standard OS autoreleasing runloop threading.
 static inline void execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { qSubmit->execute(); } }

 // Executes the submmission, either immediately, or by dispatching to an execution queue.
 // Submissions to the execution queue are wrapped in a dedicated autorelease pool.
 // Relying on the dispatch queue to find time to drain the autorelease pool can
 // result in significant memory creep under heavy workloads.
 VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) {
 	if ( !qSubmit ) { return VK_SUCCESS; }     // Ignore nils

 	VkResult rslt = qSubmit->getConfigurationResult();     // Extract result before submission to avoid race condition with early destruction
 	if (_execQueue) {
 		dispatch_async(_execQueue, ^{ execute(qSubmit); } );
 	} else {
 		execute(qSubmit);
 	}
 	return rslt;
 }

 VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) {

     // Fence-only submission
     if (submitCount == 0 && fence) {
         return submit(new MVKQueueCommandBufferSubmission(this, nullptr, fence));
     }

     VkResult rslt = VK_SUCCESS;
     for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) {
         VkFence fenceOrNil = (sIdx == (submitCount - 1)) ? fence : VK_NULL_HANDLE; // last one gets the fence
         VkResult subRslt = submit(new MVKQueueCommandBufferSubmission(this, &pSubmits[sIdx], fenceOrNil));
         if (rslt == VK_SUCCESS) { rslt = subRslt; }
     }
     return rslt;
 }

 VkResult MVKQueue::submit(const VkPresentInfoKHR* pPresentInfo) {
 	return submit(new MVKQueuePresentSurfaceSubmission(this, pPresentInfo));
 }

 // Create an empty submit struct and fence, submit to queue and wait on fence.
 VkResult MVKQueue::waitIdle() {

 	VkFenceCreateInfo vkFenceInfo = {
 		.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
 		.pNext = nullptr,
 		.flags = 0,
 	};

 	MVKFence mvkFence(_device, &vkFenceInfo);
 	VkFence fence = (VkFence)&mvkFence;
 	submit(0, nullptr, fence);
 	return mvkWaitForFences(_device, 1, &fence, false);
 }


 #pragma mark Construction

 #define MVK_DISPATCH_QUEUE_QOS_CLASS		QOS_CLASS_USER_INITIATED

 MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority)
         : MVKDeviceTrackingMixin(device) {

 	_queueFamily = queueFamily;
 	_index = index;
 	_priority = priority;
 	_nextMTLCmdBuffID = 1;

 	initName();
 	initExecQueue();
 	initMTLCommandQueue();
 	initGPUCaptureScopes();
 }

 void MVKQueue::initName() {
 	const char* fmt = "MoltenVKQueue-%d-%d-%.1f";
 	char name[256];
 	sprintf(name, fmt, _queueFamily->getIndex(), _index, _priority);
 	_name = name;
 }

 void MVKQueue::initExecQueue() {
 	_execQueue = nil;
 	if ( !_device->_pMVKConfig->synchronousQueueSubmits ) {
 		// Determine the dispatch queue priority
 		dispatch_qos_class_t dqQOS = MVK_DISPATCH_QUEUE_QOS_CLASS;
 		int dqPriority = (1.0 - _priority) * QOS_MIN_RELATIVE_PRIORITY;
 		dispatch_queue_attr_t dqAttr = dispatch_queue_attr_make_with_qos_class(DISPATCH_QUEUE_SERIAL, dqQOS, dqPriority);

 		// Create the dispatch queue
 		_execQueue = dispatch_queue_create((getName() + "-Dispatch").c_str(), dqAttr);		// retained
 	}
 }

 // Retrieves and initializes the Metal command queue.
 void MVKQueue::initMTLCommandQueue() {
 	uint64_t startTime = _device->getPerformanceTimestamp();
 	_mtlQueue = _queueFamily->getMTLCommandQueue(_index);	// not retained (cached in queue family)
 	_device->addActivityPerformance(_device->_performanceStatistics.queue.mtlQueueAccess, startTime);
 }

 // Initializes Xcode GPU capture scopes
 void MVKQueue::initGPUCaptureScopes() {
 	const MVKConfiguration* pMVKConfig = getInstance()->getMoltenVKConfiguration();

 	_submissionCaptureScope = new MVKGPUCaptureScope(this, "CommandBuffer-Submission");

 	_presentationCaptureScope = new MVKGPUCaptureScope(this, "Surface-Presentation");
 	if (_queueFamily->getIndex() == pMVKConfig->defaultGPUCaptureScopeQueueFamilyIndex &&
 		_index == pMVKConfig->defaultGPUCaptureScopeQueueIndex) {
 		_presentationCaptureScope->makeDefault();
 	}
 	_presentationCaptureScope->beginScope();	// Allow Xcode to capture the first frame if desired.
 }

 MVKQueue::~MVKQueue() {
 	destroyExecQueue();
 	_submissionCaptureScope->destroy();
 	_presentationCaptureScope->destroy();
 }

 // Destroys the execution dispatch queue.
 void MVKQueue::destroyExecQueue() {
 	if (_execQueue) {
 		dispatch_release(_execQueue);
 		_execQueue = nullptr;
 	}
 }


 #pragma mark -
 #pragma mark MVKQueueSubmission

 MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue,
 									   uint32_t waitSemaphoreCount,
 									   const VkSemaphore* pWaitSemaphores) {
 	_queue = queue;
 	_prev = VK_NULL_HANDLE;
 	_next = VK_NULL_HANDLE;

 	_isAwaitingSemaphores = waitSemaphoreCount > 0;
 	_waitSemaphores.reserve(waitSemaphoreCount);
 	for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
 		_waitSemaphores.push_back((MVKSemaphore*)pWaitSemaphores[i]);
 	}
 }


 #pragma mark -
 #pragma mark MVKQueueCommandBufferSubmission

 void MVKQueueCommandBufferSubmission::execute() {

 //	MVKLogDebug("Executing submission %p.", this);

 	_queue->_submissionCaptureScope->beginScope();

 	MVKDevice* mvkDev = _queue->getDevice();

 	// If the device supports it, wait for any semaphores on the device.
 	if (mvkDev->_pMetalFeatures->events && _isAwaitingSemaphores) {
 		_isAwaitingSemaphores = false;
 		for (auto* ws : _waitSemaphores) {
 			ws->encodeWait(getActiveMTLCommandBuffer());
 		}
 	}

 	// Submit each command buffer.
 	for (auto& cb : _cmdBuffers) { cb->submit(this); }

 	// If a fence or semaphores were provided, ensure that a MTLCommandBuffer
 	// is available to trigger them, in case no command buffers were provided.
 	if (_fence || _isSignalingSemaphores) { getActiveMTLCommandBuffer(); }

 	// If the device supports it, signal all semaphores on the device.
 	if (mvkDev->_pMetalFeatures->events && _isSignalingSemaphores) {
 		_isSignalingSemaphores = false;
 		for (auto* ss : _signalSemaphores) {
 			ss->encodeSignal(getActiveMTLCommandBuffer());
 		}
 	}

 	// Commit the last MTLCommandBuffer.
 	// Nothing after this because callback might destroy this instance before this function ends.
 	commitActiveMTLCommandBuffer(true);
 }

 // Returns the active MTLCommandBuffer, lazily retrieving it from the queue if needed.
 id<MTLCommandBuffer> MVKQueueCommandBufferSubmission::getActiveMTLCommandBuffer() {
 	if ( !_activeMTLCommandBuffer ) {
 		setActiveMTLCommandBuffer([_queue->_mtlQueue commandBufferWithUnretainedReferences]);
 	}
 	return _activeMTLCommandBuffer;
 }

 // Commits the current active MTLCommandBuffer, if it exists, and sets a new active MTLCommandBuffer.
 void MVKQueueCommandBufferSubmission::setActiveMTLCommandBuffer(id<MTLCommandBuffer> mtlCmdBuff) {

 	if (_activeMTLCommandBuffer) { commitActiveMTLCommandBuffer(); }

 	_activeMTLCommandBuffer = mtlCmdBuff;	// not retained
 	[_activeMTLCommandBuffer enqueue];
 }

 // Commits and releases the currently active MTLCommandBuffer, optionally signalling
 // when the MTLCommandBuffer is done. The first time this is called, it will wait on
 // any semaphores. We have delayed signalling the semaphores as long as possible to
 // allow as much filling of the MTLCommandBuffer as possible before forcing a wait.
 void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool signalCompletion) {

 	if (_isAwaitingSemaphores) {
 		_isAwaitingSemaphores = false;
 		for (auto& ws : _waitSemaphores) { ws->wait(); }
 	}

 	if (signalCompletion) {
 		[_activeMTLCommandBuffer addCompletedHandler: ^(id<MTLCommandBuffer> mtlCmdBuff) {
 			this->finish();
 		}];
 	}

 	// Use temp var because callback may destroy this instance before this function ends.
 	id<MTLCommandBuffer> mtlCmdBuff = _activeMTLCommandBuffer;
 	_activeMTLCommandBuffer = nil;			// not retained
 	[mtlCmdBuff commit];
 }

 void MVKQueueCommandBufferSubmission::finish() {

 //	MVKLogDebug("Finishing submission %p. Submission count %u.", this, _subCount--);

 	// Performed here instead of as part of execute() for rare case where app destroys queue
 	// immediately after a waitIdle() is cleared by fence below, taking the capture scope with it.
 	_queue->_submissionCaptureScope->endScope();

 	// Signal each of the signal semaphores.
     if (_isSignalingSemaphores) {
         for (auto& ss : _signalSemaphores) { ss->signal(); }
     }

 	// If a fence exists, signal it.
 	if (_fence) { _fence->signal(); }

     this->destroy();
 }

 MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue,
 																 const VkSubmitInfo* pSubmit,
 																 VkFence fence)
         : MVKQueueSubmission(queue,
 							 (pSubmit ? pSubmit->waitSemaphoreCount : 0),
 							 (pSubmit ? pSubmit->pWaitSemaphores : nullptr)) {

     // pSubmit can be null if just tracking the fence alone
     if (pSubmit) {
         uint32_t cbCnt = pSubmit->commandBufferCount;
         _cmdBuffers.reserve(cbCnt);
         for (uint32_t i = 0; i < cbCnt; i++) {
             MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]);
             _cmdBuffers.push_back(cb);
             setConfigurationResult(cb->getConfigurationResult());
         }

         uint32_t ssCnt = pSubmit->signalSemaphoreCount;
         _isSignalingSemaphores = ssCnt > 0;
         _signalSemaphores.reserve(ssCnt);
         for (uint32_t i = 0; i < ssCnt; i++) {
             _signalSemaphores.push_back((MVKSemaphore*)pSubmit->pSignalSemaphores[i]);
         }
     }

 	_fence = (MVKFence*)fence;
 	_activeMTLCommandBuffer = nil;

 //	static std::atomic<uint32_t> _subCount;
 //	MVKLogDebug("Creating submission %p. Submission count %u.", this, ++_subCount);
 }


 #pragma mark -
 #pragma mark MVKQueuePresentSurfaceSubmission

 void MVKQueuePresentSurfaceSubmission::execute() {
 	// If there are semaphores and this device supports MTLEvent, we must present
 	// with a command buffer in order to synchronize with the semaphores.
 	MVKDevice* mvkDev = _queue->getDevice();
 	if (mvkDev->_pMetalFeatures->events && !_waitSemaphores.empty()) {
 		// Create a command buffer, have it wait for the semaphores, then present
 		// surfaces via the command buffer.
 		id<MTLCommandBuffer> mtlCmdBuff = getMTLCommandBuffer();
 		for (auto& ws : _waitSemaphores) { ws->encodeWait(mtlCmdBuff); }
 		for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(mtlCmdBuff); }

 		[mtlCmdBuff commit];
 	} else if (mvkDev->_pMVKConfig->presentWithCommandBuffer || mvkDev->_pMVKConfig->displayWatermark) {
 		// Create a command buffer, present surfaces via the command buffer,
 		// then wait on the semaphores before committing.
 		id<MTLCommandBuffer> mtlCmdBuff = getMTLCommandBuffer();
 		for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(mtlCmdBuff); }
 		for (auto& ws : _waitSemaphores) { ws->wait(); }

 		[mtlCmdBuff commit];
 	} else {
 		// Wait on semaphores, then present directly.
 		for (auto& ws : _waitSemaphores) { ws->wait(); }
 		for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(nil); }
 	}

     // Let Xcode know the current frame is done, then start a new frame
 	auto cs = _queue->_presentationCaptureScope;
 	cs->endScope();
 	cs->beginScope();

     this->destroy();
 }

 id<MTLCommandBuffer> MVKQueuePresentSurfaceSubmission::getMTLCommandBuffer() {
 	id<MTLCommandBuffer> mtlCmdBuff = [_queue->getMTLCommandQueue() commandBufferWithUnretainedReferences];
 	setLabelIfNotNil(mtlCmdBuff, @"vkQueuePresentKHR CommandBuffer");
 	[mtlCmdBuff enqueue];
 	return mtlCmdBuff;
 }

 MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKQueue* queue,
 																   const VkPresentInfoKHR* pPresentInfo)
 		: MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores) {

 	// Populate the array of swapchain images, testing each one for a change in surface size
 	_surfaceImages.reserve(pPresentInfo->swapchainCount);
 	for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
 		MVKSwapchain* mvkSC = (MVKSwapchain*)pPresentInfo->pSwapchains[i];
 		_surfaceImages.push_back(mvkSC->getImage(pPresentInfo->pImageIndices[i]));
 		// Surface loss takes precedence over out-of-date errors.
 		if (mvkSC->getIsSurfaceLost()) {
 			setConfigurationResult(VK_ERROR_SURFACE_LOST_KHR);
 		} else if (mvkSC->getHasSurfaceSizeChanged() && getConfigurationResult() != VK_ERROR_SURFACE_LOST_KHR) {
 			setConfigurationResult(VK_ERROR_OUT_OF_DATE_KHR);
 		}
 	}
 }
	/*
	* MVKQueue.mm
	*
	* Copyright (c) 2014-2019 The Brenwill Workshop Ltd. (http://www.brenwill.com)
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "MVKInstance.h"
	#include "MVKQueue.h"
	#include "MVKSwapchain.h"
	#include "MVKSync.h"
	#include "MVKFoundation.h"
	#include "MVKOSExtensions.h"
	#include "MVKGPUCapture.h"
	#include "MVKLogging.h"

	using namespace std;


	#pragma mark -
	#pragma mark MVKQueueFamily

	// MTLCommandQueues are cached in MVKQueueFamily/MVKPhysicalDevice because they are very
	// limited in number. An app that creates multiple VkDevices over time (such as a test suite)
	// will soon find 15 second delays when creating subsequent MTLCommandQueues.
	id<MTLCommandQueue> MVKQueueFamily::getMTLCommandQueue(uint32_t queueIndex) {
	lock_guard<mutex> lock(_qLock);
	id<MTLCommandQueue> mtlQ = _mtlQueues[queueIndex];
	if ( !mtlQ ) {
	uint32_t maxCmdBuffs = _physicalDevice->getInstance()->getMoltenVKConfiguration()->maxActiveMetalCommandBuffersPerQueue;
	mtlQ = [_physicalDevice->getMTLDevice() newCommandQueueWithMaxCommandBufferCount: maxCmdBuffs]; // retained
	_mtlQueues[queueIndex] = mtlQ;
	}
	return mtlQ;
	}

	MVKQueueFamily::MVKQueueFamily(MVKPhysicalDevice* physicalDevice, uint32_t queueFamilyIndex, const VkQueueFamilyProperties* pProperties) {
	_physicalDevice = physicalDevice;
	_queueFamilyIndex = queueFamilyIndex;
	_properties = *pProperties;
	_mtlQueues.assign(_properties.queueCount, nil);
	}

	MVKQueueFamily::~MVKQueueFamily() {
	mvkReleaseContainerContents(_mtlQueues);
	}


	#pragma mark -
	#pragma mark MVKQueue

	void MVKQueue::propogateDebugName() { setLabelIfNotNil(_mtlQueue, _debugName); }


	#pragma mark Queue submissions

	// Execute the queue submission under an autorelease pool to ensure transient Metal objects are autoreleased.
	// This is critical for apps that don't use standard OS autoreleasing runloop threading.
	static inline void execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { qSubmit->execute(); } }

	// Executes the submmission, either immediately, or by dispatching to an execution queue.
	// Submissions to the execution queue are wrapped in a dedicated autorelease pool.
	// Relying on the dispatch queue to find time to drain the autorelease pool can
	// result in significant memory creep under heavy workloads.
	VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) {
	if ( !qSubmit ) { return VK_SUCCESS; } // Ignore nils

	VkResult rslt = qSubmit->getConfigurationResult(); // Extract result before submission to avoid race condition with early destruction
	if (_execQueue) {
	dispatch_async(_execQueue, ^{ execute(qSubmit); } );
	} else {
	execute(qSubmit);
	}
	return rslt;
	}

	VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence) {

	// Fence-only submission
	if (submitCount == 0 && fence) {
	return submit(new MVKQueueCommandBufferSubmission(this, nullptr, fence));
	}

	VkResult rslt = VK_SUCCESS;
	for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) {
	VkFence fenceOrNil = (sIdx == (submitCount - 1)) ? fence : VK_NULL_HANDLE; // last one gets the fence
	VkResult subRslt = submit(new MVKQueueCommandBufferSubmission(this, &pSubmits[sIdx], fenceOrNil));
	if (rslt == VK_SUCCESS) { rslt = subRslt; }
	}
	return rslt;
	}

	VkResult MVKQueue::submit(const VkPresentInfoKHR* pPresentInfo) {
	return submit(new MVKQueuePresentSurfaceSubmission(this, pPresentInfo));
	}

	// Create an empty submit struct and fence, submit to queue and wait on fence.
	VkResult MVKQueue::waitIdle() {

	VkFenceCreateInfo vkFenceInfo = {
	.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
	.pNext = nullptr,
	.flags = 0,
	};

	MVKFence mvkFence(_device, &vkFenceInfo);
	VkFence fence = (VkFence)&mvkFence;
	submit(0, nullptr, fence);
	return mvkWaitForFences(_device, 1, &fence, false);
	}


	#pragma mark Construction

	#define MVK_DISPATCH_QUEUE_QOS_CLASS QOS_CLASS_USER_INITIATED

	MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority)
	: MVKDeviceTrackingMixin(device) {

	_queueFamily = queueFamily;
	_index = index;
	_priority = priority;
	_nextMTLCmdBuffID = 1;

	initName();
	initExecQueue();
	initMTLCommandQueue();
	initGPUCaptureScopes();
	}

	void MVKQueue::initName() {
	const char* fmt = "MoltenVKQueue-%d-%d-%.1f";
	char name[256];
	sprintf(name, fmt, _queueFamily->getIndex(), _index, _priority);
	_name = name;
	}

	void MVKQueue::initExecQueue() {
	_execQueue = nil;
	if ( !_device->_pMVKConfig->synchronousQueueSubmits ) {
	// Determine the dispatch queue priority
	dispatch_qos_class_t dqQOS = MVK_DISPATCH_QUEUE_QOS_CLASS;
	int dqPriority = (1.0 - _priority) * QOS_MIN_RELATIVE_PRIORITY;
	dispatch_queue_attr_t dqAttr = dispatch_queue_attr_make_with_qos_class(DISPATCH_QUEUE_SERIAL, dqQOS, dqPriority);

	// Create the dispatch queue
	_execQueue = dispatch_queue_create((getName() + "-Dispatch").c_str(), dqAttr); // retained
	}
	}

	// Retrieves and initializes the Metal command queue.
	void MVKQueue::initMTLCommandQueue() {
	uint64_t startTime = _device->getPerformanceTimestamp();
	_mtlQueue = _queueFamily->getMTLCommandQueue(_index); // not retained (cached in queue family)
	_device->addActivityPerformance(_device->_performanceStatistics.queue.mtlQueueAccess, startTime);
	}

	// Initializes Xcode GPU capture scopes
	void MVKQueue::initGPUCaptureScopes() {
	const MVKConfiguration* pMVKConfig = getInstance()->getMoltenVKConfiguration();

	_submissionCaptureScope = new MVKGPUCaptureScope(this, "CommandBuffer-Submission");

	_presentationCaptureScope = new MVKGPUCaptureScope(this, "Surface-Presentation");
	if (_queueFamily->getIndex() == pMVKConfig->defaultGPUCaptureScopeQueueFamilyIndex &&
	_index == pMVKConfig->defaultGPUCaptureScopeQueueIndex) {
	_presentationCaptureScope->makeDefault();
	}
	_presentationCaptureScope->beginScope(); // Allow Xcode to capture the first frame if desired.
	}

	MVKQueue::~MVKQueue() {
	destroyExecQueue();
	_submissionCaptureScope->destroy();
	_presentationCaptureScope->destroy();
	}

	// Destroys the execution dispatch queue.
	void MVKQueue::destroyExecQueue() {
	if (_execQueue) {
	dispatch_release(_execQueue);
	_execQueue = nullptr;
	}
	}


	#pragma mark -
	#pragma mark MVKQueueSubmission

	MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue,
	uint32_t waitSemaphoreCount,
	const VkSemaphore* pWaitSemaphores) {
	_queue = queue;
	_prev = VK_NULL_HANDLE;
	_next = VK_NULL_HANDLE;

	_isAwaitingSemaphores = waitSemaphoreCount > 0;
	_waitSemaphores.reserve(waitSemaphoreCount);
	for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
	_waitSemaphores.push_back((MVKSemaphore*)pWaitSemaphores[i]);
	}
	}


	#pragma mark -
	#pragma mark MVKQueueCommandBufferSubmission

	void MVKQueueCommandBufferSubmission::execute() {

	// MVKLogDebug("Executing submission %p.", this);

	_queue->_submissionCaptureScope->beginScope();

	MVKDevice* mvkDev = _queue->getDevice();

	// If the device supports it, wait for any semaphores on the device.
	if (mvkDev->_pMetalFeatures->events && _isAwaitingSemaphores) {
	_isAwaitingSemaphores = false;
	for (auto* ws : _waitSemaphores) {
	ws->encodeWait(getActiveMTLCommandBuffer());
	}
	}

	// Submit each command buffer.
	for (auto& cb : _cmdBuffers) { cb->submit(this); }

	// If a fence or semaphores were provided, ensure that a MTLCommandBuffer
	// is available to trigger them, in case no command buffers were provided.
	if (_fence \|\| _isSignalingSemaphores) { getActiveMTLCommandBuffer(); }

	// If the device supports it, signal all semaphores on the device.
	if (mvkDev->_pMetalFeatures->events && _isSignalingSemaphores) {
	_isSignalingSemaphores = false;
	for (auto* ss : _signalSemaphores) {
	ss->encodeSignal(getActiveMTLCommandBuffer());
	}
	}

	// Commit the last MTLCommandBuffer.
	// Nothing after this because callback might destroy this instance before this function ends.
	commitActiveMTLCommandBuffer(true);
	}

	// Returns the active MTLCommandBuffer, lazily retrieving it from the queue if needed.
	id<MTLCommandBuffer> MVKQueueCommandBufferSubmission::getActiveMTLCommandBuffer() {
	if ( !_activeMTLCommandBuffer ) {
	setActiveMTLCommandBuffer([_queue->_mtlQueue commandBufferWithUnretainedReferences]);
	}
	return _activeMTLCommandBuffer;
	}

	// Commits the current active MTLCommandBuffer, if it exists, and sets a new active MTLCommandBuffer.
	void MVKQueueCommandBufferSubmission::setActiveMTLCommandBuffer(id<MTLCommandBuffer> mtlCmdBuff) {

	if (_activeMTLCommandBuffer) { commitActiveMTLCommandBuffer(); }

	_activeMTLCommandBuffer = mtlCmdBuff; // not retained
	[_activeMTLCommandBuffer enqueue];
	}

	// Commits and releases the currently active MTLCommandBuffer, optionally signalling
	// when the MTLCommandBuffer is done. The first time this is called, it will wait on
	// any semaphores. We have delayed signalling the semaphores as long as possible to
	// allow as much filling of the MTLCommandBuffer as possible before forcing a wait.
	void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool signalCompletion) {

	if (_isAwaitingSemaphores) {
	_isAwaitingSemaphores = false;
	for (auto& ws : _waitSemaphores) { ws->wait(); }
	}

	if (signalCompletion) {
	[_activeMTLCommandBuffer addCompletedHandler: ^(id<MTLCommandBuffer> mtlCmdBuff) {
	this->finish();
	}];
	}

	// Use temp var because callback may destroy this instance before this function ends.
	id<MTLCommandBuffer> mtlCmdBuff = _activeMTLCommandBuffer;
	_activeMTLCommandBuffer = nil; // not retained
	[mtlCmdBuff commit];
	}

	void MVKQueueCommandBufferSubmission::finish() {

	// MVKLogDebug("Finishing submission %p. Submission count %u.", this, _subCount--);

	// Performed here instead of as part of execute() for rare case where app destroys queue
	// immediately after a waitIdle() is cleared by fence below, taking the capture scope with it.
	_queue->_submissionCaptureScope->endScope();

	// Signal each of the signal semaphores.
	if (_isSignalingSemaphores) {
	for (auto& ss : _signalSemaphores) { ss->signal(); }
	}

	// If a fence exists, signal it.
	if (_fence) { _fence->signal(); }

	this->destroy();
	}

	MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue,
	const VkSubmitInfo* pSubmit,
	VkFence fence)
	: MVKQueueSubmission(queue,
	(pSubmit ? pSubmit->waitSemaphoreCount : 0),
	(pSubmit ? pSubmit->pWaitSemaphores : nullptr)) {

	// pSubmit can be null if just tracking the fence alone
	if (pSubmit) {
	uint32_t cbCnt = pSubmit->commandBufferCount;
	_cmdBuffers.reserve(cbCnt);
	for (uint32_t i = 0; i < cbCnt; i++) {
	MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]);
	_cmdBuffers.push_back(cb);
	setConfigurationResult(cb->getConfigurationResult());
	}

	uint32_t ssCnt = pSubmit->signalSemaphoreCount;
	_isSignalingSemaphores = ssCnt > 0;
	_signalSemaphores.reserve(ssCnt);
	for (uint32_t i = 0; i < ssCnt; i++) {
	_signalSemaphores.push_back((MVKSemaphore*)pSubmit->pSignalSemaphores[i]);
	}
	}

	_fence = (MVKFence*)fence;
	_activeMTLCommandBuffer = nil;

	// static std::atomic<uint32_t> _subCount;
	// MVKLogDebug("Creating submission %p. Submission count %u.", this, ++_subCount);
	}


	#pragma mark -
	#pragma mark MVKQueuePresentSurfaceSubmission

	void MVKQueuePresentSurfaceSubmission::execute() {
	// If there are semaphores and this device supports MTLEvent, we must present
	// with a command buffer in order to synchronize with the semaphores.
	MVKDevice* mvkDev = _queue->getDevice();
	if (mvkDev->_pMetalFeatures->events && !_waitSemaphores.empty()) {
	// Create a command buffer, have it wait for the semaphores, then present
	// surfaces via the command buffer.
	id<MTLCommandBuffer> mtlCmdBuff = getMTLCommandBuffer();
	for (auto& ws : _waitSemaphores) { ws->encodeWait(mtlCmdBuff); }
	for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(mtlCmdBuff); }

	[mtlCmdBuff commit];
	} else if (mvkDev->_pMVKConfig->presentWithCommandBuffer \|\| mvkDev->_pMVKConfig->displayWatermark) {
	// Create a command buffer, present surfaces via the command buffer,
	// then wait on the semaphores before committing.
	id<MTLCommandBuffer> mtlCmdBuff = getMTLCommandBuffer();
	for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(mtlCmdBuff); }
	for (auto& ws : _waitSemaphores) { ws->wait(); }

	[mtlCmdBuff commit];
	} else {
	// Wait on semaphores, then present directly.
	for (auto& ws : _waitSemaphores) { ws->wait(); }
	for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(nil); }
	}

	// Let Xcode know the current frame is done, then start a new frame
	auto cs = _queue->_presentationCaptureScope;
	cs->endScope();
	cs->beginScope();

	this->destroy();
	}

	id<MTLCommandBuffer> MVKQueuePresentSurfaceSubmission::getMTLCommandBuffer() {
	id<MTLCommandBuffer> mtlCmdBuff = [_queue->getMTLCommandQueue() commandBufferWithUnretainedReferences];
	setLabelIfNotNil(mtlCmdBuff, @"vkQueuePresentKHR CommandBuffer");
	[mtlCmdBuff enqueue];
	return mtlCmdBuff;
	}

	MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKQueue* queue,
	const VkPresentInfoKHR* pPresentInfo)
	: MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores) {

	// Populate the array of swapchain images, testing each one for a change in surface size
	_surfaceImages.reserve(pPresentInfo->swapchainCount);
	for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
	MVKSwapchain* mvkSC = (MVKSwapchain*)pPresentInfo->pSwapchains[i];
	_surfaceImages.push_back(mvkSC->getImage(pPresentInfo->pImageIndices[i]));
	// Surface loss takes precedence over out-of-date errors.
	if (mvkSC->getIsSurfaceLost()) {
	setConfigurationResult(VK_ERROR_SURFACE_LOST_KHR);
	} else if (mvkSC->getHasSurfaceSizeChanged() && getConfigurationResult() != VK_ERROR_SURFACE_LOST_KHR) {
	setConfigurationResult(VK_ERROR_OUT_OF_DATE_KHR);
	}
	}
	}