blob: 82612d1a7505128b5d933781552d9d770b2ff82e [file] [log] [blame]
/*
* MVKQueue.mm
*
* Copyright (c) 2014-2018 The Brenwill Workshop Ltd. (http://www.brenwill.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "MVKQueue.h"
#include "MVKSwapchain.h"
#include "MVKSync.h"
#include "MVKFoundation.h"
#include "MVKOSExtensions.h"
#include "MVKLogging.h"
using namespace std;
#pragma mark -
#pragma mark MVKQueueFamily
// MTLCommandQueues are cached in MVKQueueFamily/MVKPhysicalDevice because they are very
// limited in number. An app that creates multiple VkDevices over time (such as a test suite)
// will soon find 15 second delays when creating subsequent MTLCommandQueues.
id<MTLCommandQueue> MVKQueueFamily::getMTLCommandQueue(uint32_t queueIndex) {
lock_guard<mutex> lock(_qLock);
id<MTLCommandQueue> mtlQ = _mtlQueues[queueIndex];
if ( !mtlQ ) {
mtlQ = [_physicalDevice->getMTLDevice() newCommandQueue]; // retained
_mtlQueues[queueIndex] = mtlQ;
}
return mtlQ;
}
MVKQueueFamily::MVKQueueFamily(MVKPhysicalDevice* physicalDevice, uint32_t queueFamilyIndex, const VkQueueFamilyProperties* pProperties) {
_physicalDevice = physicalDevice;
_queueFamilyIndex = queueFamilyIndex;
_properties = *pProperties;
_mtlQueues.assign(_properties.queueCount, nil);
}
MVKQueueFamily::~MVKQueueFamily() {
mvkReleaseContainerContents(_mtlQueues);
}
#pragma mark -
#pragma mark MVKQueue
#pragma mark Queue submissions
// Executes the submmission, either immediately, or by dispatching to an execution queue.
// Submissions to the execution queue are wrapped in a dedicated autorelease pool.
// Relying on the dispatch queue to find time to drain the autorelease pool can
// result in significant memory creep under heavy workloads.
VkResult MVKQueue::submit(MVKQueueSubmission* qSubmit) {
if ( !qSubmit ) { return VK_SUCCESS; } // Ignore nils
VkResult rslt = qSubmit->_submissionResult; // Extract result before submission to avoid race condition with early destruction
if (_execQueue) {
dispatch_async(_execQueue, ^{ @autoreleasepool { qSubmit->execute(); } } );
} else {
qSubmit->execute();
}
return rslt;
}
VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits,
VkFence fence, MVKCommandUse cmdBuffUse) {
// Fence-only submission
if (submitCount == 0 && fence) {
return submit(new MVKQueueCommandBufferSubmission(_device, this, VK_NULL_HANDLE, fence, cmdBuffUse));
}
VkResult rslt = VK_SUCCESS;
for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) {
VkFence fenceOrNil = (sIdx == (submitCount - 1)) ? fence : VK_NULL_HANDLE; // last one gets the fence
VkResult subRslt = submit(new MVKQueueCommandBufferSubmission(_device, this, &pSubmits[sIdx], fenceOrNil, cmdBuffUse));
if (rslt == VK_SUCCESS) { rslt = subRslt; }
}
return rslt;
}
VkResult MVKQueue::submit(const VkPresentInfoKHR* pPresentInfo) {
return submit(new MVKQueuePresentSurfaceSubmission(_device, this, pPresentInfo));
}
// Create an empty submit struct and fence, submit to queue and wait on fence.
VkResult MVKQueue::waitIdle(MVKCommandUse cmdBuffUse) {
VkSubmitInfo vkSbmtInfo = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = NULL,
.waitSemaphoreCount = 0,
.pWaitSemaphores = VK_NULL_HANDLE,
.commandBufferCount = 0,
.pCommandBuffers = VK_NULL_HANDLE,
.signalSemaphoreCount = 0,
.pSignalSemaphores = VK_NULL_HANDLE
};
VkFenceCreateInfo vkFenceInfo = {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext = NULL,
.flags = 0,
};
MVKFence mvkFence(_device, &vkFenceInfo);
VkFence fence = (VkFence)&mvkFence;
submit(1, &vkSbmtInfo, fence, cmdBuffUse);
return mvkWaitForFences(1, &fence, false);
}
// This function is guarded against conflict with the mtlCommandBufferHasCompleted()
// function, but is not theadsafe against calls to this function itself, or to the
// registerMTLCommandBufferCountdown() function from multiple threads. It is assumed
// that this function and the registerMTLCommandBufferCountdown() function are called
// from a single thread.
id<MTLCommandBuffer> MVKQueue::makeMTLCommandBuffer(NSString* mtlCmdBuffLabel) {
// Retrieve a MTLCommandBuffer from the MTLCommandQueue.
id<MTLCommandBuffer> mtlCmdBuffer = [_mtlQueue commandBufferWithUnretainedReferences];
mtlCmdBuffer.label = mtlCmdBuffLabel;
// Assign a unique ID to the MTLCommandBuffer, and track when it completes.
MVKMTLCommandBufferID mtlCmdBuffID = _nextMTLCmdBuffID++;
[mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer> mtlCmdBuff) {
this->mtlCommandBufferHasCompleted(mtlCmdBuff, mtlCmdBuffID);
}];
// Keep a running count of the active MTLCommandBuffers.
// This needs to be guarded against a race condition with a MTLCommandBuffer completing.
lock_guard<mutex> lock(_completionLock);
_activeMTLCommandBufferCount++;
return mtlCmdBuffer;
}
// This function must be called after all corresponding calls to makeMTLCommandBuffer() and from the same thead.
void MVKQueue::registerMTLCommandBufferCountdown(MVKMTLCommandBufferCountdown* countdown) {
lock_guard<mutex> lock(_completionLock);
if ( !countdown->setActiveMTLCommandBufferCount(_activeMTLCommandBufferCount, _nextMTLCmdBuffID) ) {
_completionCountdowns.push_back(countdown);
}
}
void MVKQueue::mtlCommandBufferHasCompleted(id<MTLCommandBuffer> mtlCmdBuff, MVKMTLCommandBufferID mtlCmdBuffID) {
lock_guard<mutex> lock(_completionLock);
_activeMTLCommandBufferCount--;
// Iterate through the countdowns, letting them know about the completion, and
// remove any countdowns that have completed by eliding them out of the array.
uint32_t ccCnt = (uint32_t)_completionCountdowns.size();
uint32_t currCCIdx = 0;
for (uint32_t ccIdx = 0; ccIdx < ccCnt; ccIdx++) {
MVKMTLCommandBufferCountdown* mvkCD = _completionCountdowns[ccIdx];
if ( !mvkCD->mtlCommandBufferHasCompleted(mtlCmdBuffID) ) {
// Only retain the countdown if it has not just completed.
// Move it forward in the array if any preceding countdowns have been removed.
if (currCCIdx != ccIdx) { _completionCountdowns[currCCIdx] = mvkCD; }
currCCIdx++;
}
}
// If any countdowns were removed, clear out the extras at the end
if (currCCIdx < ccCnt) { _completionCountdowns.resize(currCCIdx); }
}
#pragma mark Construction
#define MVK_DISPATCH_QUEUE_QOS_CLASS QOS_CLASS_USER_INITIATED
MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority)
: MVKDispatchableDeviceObject(device), _commandEncodingPool(device) {
_queueFamily = queueFamily;
_index = index;
_priority = priority;
initExecQueue();
initMTLCommandQueue();
_activeMTLCommandBufferCount = 0;
_nextMTLCmdBuffID = 1;
}
// Unless synchronous submission processing was configured,
// creates and initializes the prioritized execution dispatch queue.
void MVKQueue::initExecQueue() {
if (_device->_mvkConfig.synchronousQueueSubmits) {
_execQueue = nullptr;
} else {
// Create a name for the dispatch queue
const char* dqNameFmt = "MoltenVKDispatchQueue-%d-%d-%.1f";
char dqName[strlen(dqNameFmt) + 32];
sprintf(dqName, dqNameFmt, _queueFamily->getIndex(), _index, _priority);
// Determine the dispatch queue priority
dispatch_qos_class_t dqQOS = MVK_DISPATCH_QUEUE_QOS_CLASS;
int dqPriority = (1.0 - _priority) * QOS_MIN_RELATIVE_PRIORITY;
dispatch_queue_attr_t dqAttr = dispatch_queue_attr_make_with_qos_class(DISPATCH_QUEUE_SERIAL, dqQOS, dqPriority);
// Create the dispatch queue
_execQueue = dispatch_queue_create(dqName, dqAttr); // retained
}
}
/** Creates and initializes the Metal queue. */
void MVKQueue::initMTLCommandQueue() {
uint64_t startTime = _device->getPerformanceTimestamp();
_mtlQueue = _queueFamily->getMTLCommandQueue(_index); // not retained (cached in queue family)
_device->addActivityPerformance(_device->_performanceStatistics.queue.mtlQueueAccess, startTime);
[_mtlQueue insertDebugCaptureBoundary]; // Allow Xcode to capture the first frame if desired.
}
MVKQueue::~MVKQueue() {
// Delay destroying this queue until registerMTLCommandBufferCountdown() is done.
// registerMTLCommandBufferCountdown() can trigger a queue submission to finish(),
// which may trigger semaphores that control a queue waitIdle(). If that waitIdle()
// is being called by the app just prior to device and queue destruction, a rare race
// condition exists if registerMTLCommandBufferCountdown() does not complete before
// this queue is destroyed. If _completionLock is destroyed along with this queue,
// before registerMTLCommandBufferCountdown() completes, a SIGABRT crash will arise
// in the destructor of the lock created in registerMTLCommandBufferCountdown().
lock_guard<mutex> lock(_completionLock);
destroyExecQueue();
}
// Destroys the execution dispatch queue.
void MVKQueue::destroyExecQueue() {
if (_execQueue) { dispatch_release(_execQueue); }
}
#pragma mark -
#pragma mark MVKQueueCommandBufferSubmissionCountdown
MVKQueueCommandBufferSubmissionCountdown::MVKQueueCommandBufferSubmissionCountdown(MVKQueueCommandBufferSubmission* qSub) {
_qSub = qSub;
}
void MVKQueueCommandBufferSubmissionCountdown::finish() { _qSub->finish(); }
#pragma mark -
#pragma mark MVKQueueSubmission
MVKQueueSubmission::MVKQueueSubmission(MVKDevice* device,
MVKQueue* queue,
uint32_t waitSemaphoreCount,
const VkSemaphore* pWaitSemaphores) : MVKBaseDeviceObject(device) {
_queue = queue;
_prev = VK_NULL_HANDLE;
_next = VK_NULL_HANDLE;
_submissionResult = VK_SUCCESS;
_isAwaitingSemaphores = waitSemaphoreCount > 0;
_waitSemaphores.reserve(waitSemaphoreCount);
for (uint32_t i = 0; i < waitSemaphoreCount; i++) {
_waitSemaphores.push_back((MVKSemaphore*)pWaitSemaphores[i]);
}
}
void MVKQueueSubmission::recordResult(VkResult vkResult) {
if (_submissionResult == VK_SUCCESS) { _submissionResult = vkResult; }
}
#pragma mark -
#pragma mark MVKQueueCommandBufferSubmission
std::atomic<uint32_t> _subCount;
void MVKQueueCommandBufferSubmission::execute() {
// MVKLogDebug("Executing submission %p.", this);
// Execute each command buffer, or if no command buffers, but a fence or semaphores,
// create an empty MTLCommandBuffer to trigger the semaphores and fence.
if ( !_cmdBuffers.empty() ) {
MVKCommandBufferBatchPosition cmdBuffPos = {1, uint32_t(_cmdBuffers.size()), _cmdBuffUse};
for (auto& cb : _cmdBuffers) {
cb->execute(this, cmdBuffPos);
cmdBuffPos.index++;
}
} else {
if (_fence || !_signalSemaphores.empty() ) {
getActiveMTLCommandBuffer();
}
}
commitActiveMTLCommandBuffer();
// Register for callback when MTLCommandBuffers have completed
_queue->registerMTLCommandBufferCountdown(&_cmdBuffCountdown);
}
id<MTLCommandBuffer> MVKQueueCommandBufferSubmission::getActiveMTLCommandBuffer() {
if ( !_activeMTLCommandBuffer ) {
_activeMTLCommandBuffer = _queue->makeMTLCommandBuffer(getMTLCommandBufferName());
[_activeMTLCommandBuffer enqueue];
}
return _activeMTLCommandBuffer;
}
void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer() {
// Wait on each wait semaphore in turn. It doesn't matter which order they are signalled.
// We have delayed this as long as possible to allow as much filling of the MTLCommandBuffer
// as possible before forcing a wait. We only wait for each semaphore once per submission.
if (_isAwaitingSemaphores) {
_isAwaitingSemaphores = false;
for (auto& ws : _waitSemaphores) { ws->wait(); }
}
[_activeMTLCommandBuffer commit];
_activeMTLCommandBuffer = nil; // not retained
}
// Returns an NSString suitable for use as a label
NSString* MVKQueueCommandBufferSubmission::getMTLCommandBufferName() {
switch (_cmdBuffUse) {
case kMVKCommandUseQueueSubmit:
return [NSString stringWithFormat: @"%@ (virtual for sync)", mvkMTLCommandBufferLabel(_cmdBuffUse)];
default:
return mvkMTLCommandBufferLabel(_cmdBuffUse);
}
}
void MVKQueueCommandBufferSubmission::finish() {
// MVKLogDebug("Finishing submission %p. Submission count %u.", this, _subCount--);
// Signal each of the signal semaphores.
for (auto& ss : _signalSemaphores) { ss->signal(); }
// If a fence exists, signal it.
if (_fence) { _fence->signal(); }
this->destroy();
}
MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKDevice* device,
MVKQueue* queue,
const VkSubmitInfo* pSubmit,
VkFence fence,
MVKCommandUse cmdBuffUse)
: MVKQueueSubmission(device,
queue,
(pSubmit ? pSubmit->waitSemaphoreCount : 0),
(pSubmit ? pSubmit->pWaitSemaphores : nullptr)), _cmdBuffCountdown(this) {
// pSubmit can be null if just tracking the fence alone
if (pSubmit) {
uint32_t cbCnt = pSubmit->commandBufferCount;
_cmdBuffers.reserve(cbCnt);
for (uint32_t i = 0; i < cbCnt; i++) {
MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]);
_cmdBuffers.push_back(cb);
recordResult(cb->getRecordingResult());
}
uint32_t ssCnt = pSubmit->signalSemaphoreCount;
_signalSemaphores.reserve(ssCnt);
for (uint32_t i = 0; i < ssCnt; i++) {
_signalSemaphores.push_back((MVKSemaphore*)pSubmit->pSignalSemaphores[i]);
}
}
_fence = (MVKFence*)fence;
_cmdBuffUse= cmdBuffUse;
_activeMTLCommandBuffer = nil;
// MVKLogDebug("Creating submission %p. Submission count %u.", this, ++_subCount);
}
#pragma mark -
#pragma mark MVKQueuePresentSurfaceSubmission
#define MVK_PRESENT_VIA_CMD_BUFFER 0
void MVKQueuePresentSurfaceSubmission::execute() {
id<MTLCommandQueue> mtlQ = _queue->getMTLCommandQueue();
if (_device->_mvkConfig.presentWithCommandBuffer || _device->_mvkConfig.displayWatermark) {
// Create a command buffer, present surfaces via the command buffer,
// then wait on the semaphores before committing.
id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
mtlCmdBuff.label = mvkMTLCommandBufferLabel(kMVKCommandUseQueuePresent);
[mtlCmdBuff enqueue];
for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(mtlCmdBuff); }
for (auto& ws : _waitSemaphores) { ws->wait(); }
[mtlCmdBuff commit];
} else {
// Wait on semaphores, then present directly.
for (auto& ws : _waitSemaphores) { ws->wait(); }
for (auto& si : _surfaceImages) { si->presentCAMetalDrawable(nil); }
}
// Let Xcode know the frame is done, in case command buffer is not used
if (_device->_mvkConfig.debugMode) { [mtlQ insertDebugCaptureBoundary]; }
this->destroy();
}
MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKDevice* device,
MVKQueue* queue,
const VkPresentInfoKHR* pPresentInfo)
: MVKQueueSubmission(device,
queue,
pPresentInfo->waitSemaphoreCount,
pPresentInfo->pWaitSemaphores) {
// Populate the array of swapchain images, testing each one for a change in surface size
_surfaceImages.reserve(pPresentInfo->swapchainCount);
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
MVKSwapchain* mvkSC = (MVKSwapchain*)pPresentInfo->pSwapchains[i];
_surfaceImages.push_back(mvkSC->getImage(pPresentInfo->pImageIndices[i]));
if (mvkSC->getHasSurfaceSizeChanged()) {
_submissionResult = VK_ERROR_OUT_OF_DATE_KHR;
}
}
}