Support the VK_KHR_device_group extension.
Largely minimal for now. Much of it, particularly most of the
interactions with `VK_KHR_swapchain`, was already implemented
previously. The only interesting bits are the `vkCmdDispatchBase()`
command, and the ability to create arbitrary swapchain images and bind
them to swapchain memory, which requires the use of the previously
implemented `VK_KHR_bind_memory2` extension. Most everything else can be
safely ignored for now.
Non-zero dispatch bases use the compute stage-input region to pass the
dispatch base group to the shader, which must manually adjust the
`WorkgroupId` and `GlobalInvocationId` builtins, since Metal does not do
this for us. I have tested that this approach works well--at least, well
enough to pass the CTS.
Because of the ability to bind arbitrary images to swapchain memory,
I've sucked the guts out of `MVKSwapchainImage` and into `MVKSwapchain`
itself. Availability and drawable management is now performed by the
swapchain object. `MVKSwapchainImage` is now just a specialized kind of
image, created when requested with a `VkImageCreateSwapchainInfoKHR`
structure.
Update SPIRV-Cross so we can support the `vkCmdDispatchBase()` command.
One more step towards Vulkan 1.1.
diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md
index e4cf172..e735323 100644
--- a/Docs/MoltenVK_Runtime_UserGuide.md
+++ b/Docs/MoltenVK_Runtime_UserGuide.md
@@ -231,6 +231,8 @@
- `VK_KHR_bind_memory2`
- `VK_KHR_dedicated_allocation`
- `VK_KHR_descriptor_update_template`
+- `VK_KHR_device_group`
+- `VK_KHR_device_group_creation`
- `VK_KHR_get_memory_requirements2`
- `VK_KHR_get_physical_device_properties2`
- `VK_KHR_get_surface_capabilities2`
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 9a5e72f..c3fe0f6 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -18,12 +18,16 @@
Released TBD
+- Add support for extensions:
+ - `VK_KHR_device_group`
- Add support for `VkEvent`, using either native `MTLEvent` or emulation when `MTLEvent` not available.
- Revert to supporting host-coherent memory for linear images on macOS.
- Ensure Vulkan loader magic number is set every time before returning any dispatchable Vulkan handle.
- Fix crash when `VkDeviceCreateInfo` specifies queue families out of numerical order.
- Consolidate the various linkable objects into a `MVKLinkableMixin` template base class.
- Use `MVKVector` whenever possible in MoltenVK, especially within render loop.
+- No longer prefer dedicated allocations for buffer memory, including buffer-backed images.
+- Handle the `compositeAlpha` member of `VkSwapchainCreateInfoKHR`.
MoltenVK 1.0.36
diff --git a/ExternalRevisions/SPIRV-Cross_repo_revision b/ExternalRevisions/SPIRV-Cross_repo_revision
index 337aa79..fc4d9eb 100644
--- a/ExternalRevisions/SPIRV-Cross_repo_revision
+++ b/ExternalRevisions/SPIRV-Cross_repo_revision
@@ -1 +1 @@
-07bb1a53e0cb86cfb9b116623493df974ad9ccee
+4ce04480ec5469fe7ebbdd66c3016090a704d81b
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.h b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.h
index 036605c..d6b45fa 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.h
@@ -31,14 +31,15 @@
class MVKCmdDispatch : public MVKCommand {
public:
- void setContent(uint32_t x, uint32_t y, uint32_t z);
+ void setContent(uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
+ uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
void encode(MVKCommandEncoder* cmdEncoder) override;
MVKCmdDispatch(MVKCommandTypePool<MVKCmdDispatch>* pool);
protected:
- MTLSize _mtlThreadgroupCount;
+ MTLRegion _mtlThreadgroupCount;
};
@@ -70,3 +71,6 @@
/** Adds an indirect compute threadgroup dispatch command to the specified command buffer. */
void mvkCmdDispatchIndirect(MVKCommandBuffer* cmdBuff, VkBuffer buffer, VkDeviceSize offset);
+/** Adds a compute threadgroup dispatch command to the specified command buffer, with thread IDs starting from the given base. */
+void mvkCmdDispatchBase(MVKCommandBuffer* cmdBuff, uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
+
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm
index d52d36a..f1f4e55 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm
@@ -20,6 +20,7 @@
#include "MVKCommandBuffer.h"
#include "MVKCommandPool.h"
#include "MVKBuffer.h"
+#include "MVKPipeline.h"
#include "MVKFoundation.h"
#include "mvk_datatypes.hpp"
@@ -27,18 +28,30 @@
#pragma mark -
#pragma mark MVKCmdDispatch
-void MVKCmdDispatch::setContent(uint32_t x, uint32_t y, uint32_t z) {
- _mtlThreadgroupCount.width = x;
- _mtlThreadgroupCount.height = y;
- _mtlThreadgroupCount.depth = z;
+void MVKCmdDispatch::setContent(uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
+ uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) {
+ _mtlThreadgroupCount = MTLRegionMake3D(baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
}
void MVKCmdDispatch::encode(MVKCommandEncoder* cmdEncoder) {
// MVKLogDebug("vkCmdDispatch() dispatching (%d, %d, %d) threadgroups.", _x, _y, _z);
cmdEncoder->finalizeDispatchState(); // Ensure all updated state has been submitted to Metal
- [cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch) dispatchThreadgroups: _mtlThreadgroupCount
- threadsPerThreadgroup: cmdEncoder->_mtlThreadgroupSize];
+ id<MTLComputeCommandEncoder> mtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch);
+ auto* pipeline = (MVKComputePipeline*)cmdEncoder->_computePipelineState.getPipeline();
+ if (pipeline->allowsDispatchBase()) {
+ if ([mtlEncoder respondsToSelector: @selector(setStageInRegion:)]) {
+ // We'll use the stage-input region to pass the base along to the shader.
+ // Hopefully Metal won't complain that we didn't set up a stage-input descriptor.
+ [mtlEncoder setStageInRegion: _mtlThreadgroupCount];
+ } else {
+ // We have to pass the base group in a buffer.
+ unsigned int base[3] = {(uint32_t)_mtlThreadgroupCount.origin.x, (uint32_t)_mtlThreadgroupCount.origin.y, (uint32_t)_mtlThreadgroupCount.origin.z};
+ cmdEncoder->setComputeBytes(mtlEncoder, base, sizeof(base), pipeline->getIndirectParamsIndex().stages[kMVKShaderStageCompute]);
+ }
+ }
+ [mtlEncoder dispatchThreadgroups: _mtlThreadgroupCount.size
+ threadsPerThreadgroup: cmdEncoder->_mtlThreadgroupSize];
}
MVKCmdDispatch::MVKCmdDispatch(MVKCommandTypePool<MVKCmdDispatch>* pool)
@@ -75,7 +88,7 @@
void mvkCmdDispatch(MVKCommandBuffer* cmdBuff, uint32_t x, uint32_t y, uint32_t z) {
MVKCmdDispatch* cmd = cmdBuff->_commandPool->_cmdDispatchPool.acquireObject();
- cmd->setContent(x, y, z);
+ cmd->setContent(0, 0, 0, x, y, z);
cmdBuff->addCommand(cmd);
}
@@ -85,4 +98,11 @@
cmdBuff->addCommand(cmd);
}
+void mvkCmdDispatchBase(MVKCommandBuffer* cmdBuff, uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
+ uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) {
+ MVKCmdDispatch* cmd = cmdBuff->_commandPool->_cmdDispatchPool.acquireObject();
+ cmd->setContent(baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
+ cmdBuff->addCommand(cmd);
+}
+
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
index 1c543aa..3812739 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
@@ -313,3 +313,6 @@
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers);
+
+/** Indicates that following commands are to be recorded only for the devices in the given device mask. */
+void mvkCmdSetDeviceMask(MVKCommandBuffer* cmdBuff, uint32_t deviceMask);
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
index 7cd6e09..afab76a 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
@@ -503,5 +503,8 @@
bufferMemoryBarrierCount, pBufferMemoryBarriers,
imageMemoryBarrierCount, pImageMemoryBarriers);
cmdBuff->addCommand(cmd);
+}
+void mvkCmdSetDeviceMask(MVKCommandBuffer* cmdBuff, uint32_t deviceMask) {
+ // No-op for now...
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 17bdb02..5f267df 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -402,6 +402,9 @@
/** Populates the device group surface presentation modes. */
VkResult getDeviceGroupSurfacePresentModes(MVKSurface* surface, VkDeviceGroupPresentModeFlagsKHR* pModes);
+ /** Populates the device group peer memory features. */
+ void getPeerMemoryFeatures(uint32_t heapIndex, uint32_t localDevice, uint32_t remoteDevice, VkPeerMemoryFeatureFlags* pPeerMemoryFeatures);
+
#pragma mark Object lifecycle
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index d3d66a8..2f73064 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -242,7 +242,8 @@
if ( !pImageFormatProperties ) { return VK_SUCCESS; }
// Metal does not support creating uncompressed views of compressed formats.
- if (mvkIsAnyFlagEnabled(flags, VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT)) {
+ // Metal does not support split-instance images.
+ if (mvkIsAnyFlagEnabled(flags, VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT | VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT)) {
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
@@ -1780,6 +1781,10 @@
return VK_SUCCESS;
}
+void MVKDevice::getPeerMemoryFeatures(uint32_t heapIndex, uint32_t localDevice, uint32_t remoteDevice, VkPeerMemoryFeatureFlags* pPeerMemoryFeatures) {
+ *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | VK_PEER_MEMORY_FEATURE_COPY_DST_BIT;
+}
+
#pragma mark Object lifecycle
@@ -1837,6 +1842,20 @@
MVKImage* MVKDevice::createImage(const VkImageCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator) {
+ // If there's a VkImageSwapchainCreateInfoKHR, then we need to create a swapchain image.
+ const VkImageSwapchainCreateInfoKHR* swapchainInfo = nullptr;
+ for (const auto* next = (const VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) {
+ switch (next->sType) {
+ case VK_STRUCTURE_TYPE_IMAGE_SWAPCHAIN_CREATE_INFO_KHR:
+ swapchainInfo = (const VkImageSwapchainCreateInfoKHR*)next;
+ break;
+ default:
+ break;
+ }
+ }
+ if (swapchainInfo) {
+ return (MVKImage*)addResource(new MVKSwapchainImage(this, pCreateInfo, (MVKSwapchain*)swapchainInfo->swapchain));
+ }
return (MVKImage*)addResource(new MVKImage(this, pCreateInfo));
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
index 68fdd23..0358457 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
@@ -393,9 +393,6 @@
#pragma mark -
#pragma mark MVKSwapchainImage
-/** Tracks a semaphore and fence for later signaling. */
-typedef std::pair<MVKSemaphore*, MVKFence*> MVKSwapchainSignaler;
-
/** Indicates the relative availability of each image in the swapchain. */
typedef struct MVKSwapchainImageAvailability_t {
uint64_t acquisitionID; /**< When this image was last made available, relative to the other images in the swapchain. Smaller value is earlier. */
@@ -405,11 +402,18 @@
bool operator< (const MVKSwapchainImageAvailability_t& rhs) const;
} MVKSwapchainImageAvailability;
+
/** Represents a Vulkan image used as a rendering destination within a swapchain. */
class MVKSwapchainImage : public MVKImage {
public:
+ /** Binds this resource to the specified offset within the specified memory allocation. */
+ VkResult bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOffset) override;
+
+ /** Binds this resource according to the specified bind information. */
+ VkResult bindDeviceMemory2(const void* pBindInfo) override;
+
/** Returns the encompassing swapchain. */
inline MVKSwapchain* getSwapchain() { return _swapchain; }
@@ -431,7 +435,7 @@
/**
* Presents the contained drawable to the OS, releases the Metal drawable and its
- * texture back to the Metal layer's pool, and makes this image available for new use.
+ * texture back to the Metal layer's pool, and makes the image memory available for new use.
*
* If mtlCmdBuff is not nil, the contained drawable is scheduled for presentation using
* the presentDrawable: method of the command buffer. If mtlCmdBuff is nil, the contained
@@ -448,25 +452,18 @@
MVKSwapchain* swapchain,
uint32_t swapchainIndex);
- ~MVKSwapchainImage() override;
+ /** Constructs an instance for the specified device and swapchain, without binding to a particular swapchain image index. */
+ MVKSwapchainImage(MVKDevice* device,
+ const VkImageCreateInfo* pCreateInfo,
+ MVKSwapchain* swapchain);
protected:
id<MTLTexture> newMTLTexture() override;
id<CAMetalDrawable> getCAMetalDrawable();
- void resetCAMetalDrawable();
void resetMetalSurface();
- void signal(MVKSwapchainSignaler& signaler);
- void markAsTracked(MVKSwapchainSignaler& signaler);
- void unmarkAsTracked(MVKSwapchainSignaler& signaler);
- void makeAvailable();
void renderWatermark(id<MTLCommandBuffer> mtlCmdBuff);
MVKSwapchain* _swapchain;
uint32_t _swapchainIndex;
- id<CAMetalDrawable> _mtlDrawable;
- std::mutex _availabilityLock;
- MVKVectorInline<MVKSwapchainSignaler, 4> _availabilitySignalers;
- MVKSwapchainSignaler _preSignaled;
- MVKSwapchainImageAvailability _availability;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index 9cf0cce..2a20417 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -662,6 +662,9 @@
if (mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT)) {
setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Metal does not allow uncompressed views of compressed images."));
}
+ if (mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT)) {
+ setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Metal does not support split-instance memory binding."));
+ }
}
VkSampleCountFlagBits MVKImage::validateSamples(const VkImageCreateInfo* pCreateInfo, bool isAttachment) {
@@ -1232,101 +1235,36 @@
#pragma mark -
#pragma mark MVKSwapchainImage
-bool MVKSwapchainImageAvailability_t::operator< (const MVKSwapchainImageAvailability_t& rhs) const {
- if ( isAvailable && !rhs.isAvailable) { return true; }
- if ( !isAvailable && rhs.isAvailable) { return false; }
-
- if (waitCount < rhs.waitCount) { return true; }
- if (waitCount > rhs.waitCount) { return false; }
-
- return acquisitionID < rhs.acquisitionID;
+VkResult MVKSwapchainImage::bindDeviceMemory(MVKDeviceMemory*, VkDeviceSize) {
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
-// Makes this image available for acquisition by the app.
-// If any semaphores are waiting to be signaled when this image becomes available, the
-// earliest semaphore is signaled, and this image remains unavailable for other uses.
-void MVKSwapchainImage::makeAvailable() {
- lock_guard<mutex> lock(_availabilityLock);
-
- // Mark when this event happened, relative to that of other images
- _availability.acquisitionID = _swapchain->getNextAcquisitionID();
-
- // Mark this image as available if no semaphores or fences are waiting to be signaled.
- _availability.isAvailable = _availabilitySignalers.empty();
-
- MVKSwapchainSignaler signaler;
- if (_availability.isAvailable) {
- // If this image is now available, signal the semaphore and fence that were associated
- // with the last time this image was acquired while available. This is a workaround for
- // when an app uses a single semaphore or fence for more than one swapchain image.
- // Becuase the semaphore or fence will be signaled by more than one image, it will
- // get out of sync, and the final use of the image would not be signaled as a result.
-
- signaler = _preSignaled;
- } else {
- // If this image is not yet available, extract and signal the first semaphore and fence.
-
- signaler = _availabilitySignalers.front();
- _availabilitySignalers.erase( _availabilitySignalers.begin() );
+VkResult MVKSwapchainImage::bindDeviceMemory2(const void* pBindInfo) {
+ const auto* imageInfo = (const VkBindImageMemoryInfo*)pBindInfo;
+ const VkBindImageMemorySwapchainInfoKHR* swapchainInfo = nullptr;
+ for (const auto* next = (const VkBaseInStructure*)imageInfo->pNext; next; next = next->pNext) {
+ switch (next->sType) {
+ case VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR:
+ swapchainInfo = (const VkBindImageMemorySwapchainInfoKHR*)next;
+ break;
+ default:
+ break;
+ }
+ if (swapchainInfo) { break; }
}
-
- // Signal the semaphore and fence, and let them know they are no longer being tracked.
- signal(signaler);
- unmarkAsTracked(signaler);
-
-// MVKLogDebug("Signaling%s swapchain image %p semaphore %p from present, with %lu remaining semaphores.", (_availability.isAvailable ? " pre-signaled" : ""), this, signaler.first, _availabilitySignalers.size());
+ if (!swapchainInfo) {
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ }
+ _swapchainIndex = swapchainInfo->imageIndex;
+ return VK_SUCCESS;
}
void MVKSwapchainImage::signalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) {
- lock_guard<mutex> lock(_availabilityLock);
- auto signaler = make_pair(semaphore, fence);
- if (_availability.isAvailable) {
- _availability.isAvailable = false;
- signal(signaler);
- if (_device->_useMTLEventsForSemaphores) {
- // Unfortunately, we can't assume we have an MTLSharedEvent here.
- // This means we need to execute a command on the device to signal
- // the semaphore. Alternatively, we could always use an MTLSharedEvent,
- // but that might impose unacceptable performance costs just to handle
- // this one case.
- MVKQueue* queue = _device->getQueue(0, 0);
- id<MTLCommandQueue> mtlQ = queue->getMTLCommandQueue();
- id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
- [mtlCmdBuff enqueue];
- signaler.first->encodeSignal(mtlCmdBuff);
- [mtlCmdBuff commit];
- }
- _preSignaled = signaler;
- } else {
- _availabilitySignalers.push_back(signaler);
- }
- markAsTracked(signaler);
-
-// MVKLogDebug("%s swapchain image %p semaphore %p in acquire with %lu other semaphores.", (_availability.isAvailable ? "Signaling" : "Tracking"), this, semaphore, _availabilitySignalers.size());
-}
-
-// Signal either or both of the semaphore and fence in the specified tracker pair.
-void MVKSwapchainImage::signal(MVKSwapchainSignaler& signaler) {
- if (signaler.first && !_device->_useMTLEventsForSemaphores) { signaler.first->signal(); }
- if (signaler.second) { signaler.second->signal(); }
-}
-
-// Tell the semaphore and fence that they are being tracked for future signaling.
-void MVKSwapchainImage::markAsTracked(MVKSwapchainSignaler& signaler) {
- if (signaler.first) { signaler.first->retain(); }
- if (signaler.second) { signaler.second->retain(); }
-}
-
-// Tell the semaphore and fence that they are no longer being tracked for future signaling.
-void MVKSwapchainImage::unmarkAsTracked(MVKSwapchainSignaler& signaler) {
- if (signaler.first) { signaler.first->release(); }
- if (signaler.second) { signaler.second->release(); }
+ _swapchain->signalWhenAvailable( _swapchainIndex, semaphore, fence );
}
const MVKSwapchainImageAvailability* MVKSwapchainImage::getAvailability() {
- lock_guard<mutex> lock(_availabilityLock);
- _availability.waitCount = (uint32_t)_availabilitySignalers.size();
- return &_availability;
+ return _swapchain->getAvailability( _swapchainIndex );
}
@@ -1339,13 +1277,9 @@
}
id<CAMetalDrawable> MVKSwapchainImage::getCAMetalDrawable() {
- if ( !_mtlDrawable ) {
- @autoreleasepool { // Allow auto-released drawable object to be reclaimed before end of loop
- _mtlDrawable = [_swapchain->getNextCAMetalDrawable() retain]; // retained
- }
- MVKAssert(_mtlDrawable, "Could not aquire an available CAMetalDrawable from the CAMetalLayer in MVKSwapchain image: %p.", this);
- }
- return _mtlDrawable;
+ id<CAMetalDrawable> mtlDrawable = _swapchain->getCAMetalDrawable(_swapchainIndex);
+ MVKAssert(mtlDrawable, "Could not acquire an available CAMetalDrawable from the CAMetalLayer in MVKSwapchain image: %p.", this);
+ return mtlDrawable;
}
void MVKSwapchainImage::presentCAMetalDrawable(id<MTLCommandBuffer> mtlCmdBuff) {
@@ -1364,33 +1298,26 @@
if (scName) { [mtlCmdBuff popDebugGroup]; }
resetMetalSurface();
- if (_device->_useMTLEventsForSemaphores && !_availabilitySignalers.empty()) {
- // Signal the semaphore device-side.
- _availabilitySignalers.front().first->encodeSignal(mtlCmdBuff);
+ if (_device->_useMTLEventsForSemaphores) {
+ _swapchain->signalOnDevice(_swapchainIndex, mtlCmdBuff);
}
retain(); // Ensure this image is not destroyed while awaiting MTLCommandBuffer completion
[mtlCmdBuff addCompletedHandler: ^(id<MTLCommandBuffer> mcb) {
- makeAvailable();
+ _swapchain->makeAvailable(_swapchainIndex);
release();
}];
} else {
[mtlDrawable present];
resetMetalSurface();
- makeAvailable();
+ _swapchain->makeAvailable(_swapchainIndex);
}
}
-// Removes and releases the Metal drawable object, so that it can be lazily created by getCAMetalDrawable().
-void MVKSwapchainImage::resetCAMetalDrawable() {
- [_mtlDrawable release];
- _mtlDrawable = nil;
-}
-
// Resets the MTLTexture and CAMetalDrawable underlying this image.
void MVKSwapchainImage::resetMetalSurface() {
resetMTLTexture(); // Release texture first so drawable will be last to release it
- resetCAMetalDrawable();
+ _swapchain->resetCAMetalDrawable(_swapchainIndex);
}
@@ -1402,14 +1329,13 @@
uint32_t swapchainIndex) : MVKImage(device, pCreateInfo) {
_swapchain = swapchain;
_swapchainIndex = swapchainIndex;
- _availability.acquisitionID = _swapchain->getNextAcquisitionID();
- _availability.isAvailable = true;
- _preSignaled = make_pair(nullptr, nullptr);
- _mtlDrawable = nil;
}
-MVKSwapchainImage::~MVKSwapchainImage() {
- resetCAMetalDrawable();
+MVKSwapchainImage::MVKSwapchainImage(MVKDevice* device,
+ const VkImageCreateInfo* pCreateInfo,
+ MVKSwapchain* swapchain) : MVKImage(device, pCreateInfo) {
+ _swapchain = swapchain;
+ _swapchainIndex = uint32_t(-1);
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
index 31558bb..7a32e7a 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
@@ -609,6 +609,9 @@
ADD_DVC_EXT_ENTRY_POINT(vkCreateDescriptorUpdateTemplateKHR, KHR_DESCRIPTOR_UPDATE_TEMPLATE);
ADD_DVC_EXT_ENTRY_POINT(vkDestroyDescriptorUpdateTemplateKHR, KHR_DESCRIPTOR_UPDATE_TEMPLATE);
ADD_DVC_EXT_ENTRY_POINT(vkUpdateDescriptorSetWithTemplateKHR, KHR_DESCRIPTOR_UPDATE_TEMPLATE);
+ ADD_DVC_EXT_ENTRY_POINT(vkGetDeviceGroupPeerMemoryFeaturesKHR, KHR_DEVICE_GROUP);
+ ADD_DVC_EXT_ENTRY_POINT(vkCmdSetDeviceMaskKHR, KHR_DEVICE_GROUP);
+ ADD_DVC_EXT_ENTRY_POINT(vkCmdDispatchBaseKHR, KHR_DEVICE_GROUP);
ADD_DVC_EXT_ENTRY_POINT(vkGetBufferMemoryRequirements2KHR, KHR_GET_MEMORY_REQUIREMENTS_2);
ADD_DVC_EXT_ENTRY_POINT(vkGetImageMemoryRequirements2KHR, KHR_GET_MEMORY_REQUIREMENTS_2);
ADD_DVC_EXT_ENTRY_POINT(vkGetImageSparseMemoryRequirements2KHR, KHR_GET_MEMORY_REQUIREMENTS_2);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
index 99bb888..30d48c2 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
@@ -156,6 +156,9 @@
/** Returns the current buffer size buffer bindings. */
const MVKShaderImplicitRezBinding& getBufferSizeBufferIndex() { return _bufferSizeBufferIndex; }
+ /** Returns the current indirect parameter buffer bindings. */
+ const MVKShaderImplicitRezBinding& getIndirectParamsIndex() { return _indirectParamsIndex; }
+
/** Returns whether or not full image view swizzling is enabled for this pipeline. */
bool fullImageViewSwizzle() const { return _fullImageViewSwizzle; }
@@ -171,6 +174,7 @@
MVKPipelineCache* _pipelineCache;
MVKShaderImplicitRezBinding _swizzleBufferIndex;
MVKShaderImplicitRezBinding _bufferSizeBufferIndex;
+ MVKShaderImplicitRezBinding _indirectParamsIndex;
MVKShaderResourceBinding _pushConstantsMTLResourceIndexes;
bool _fullImageViewSwizzle;
bool _hasValidMTLPipelineStates = true;
@@ -204,9 +208,6 @@
/** Returns the number of output tessellation patch control points. */
uint32_t getOutputControlPointCount() { return _outputControlPointCount; }
- /** Returns the current indirect parameter buffer bindings. */
- const MVKShaderImplicitRezBinding& getIndirectParamsIndex() { return _indirectParamsIndex; }
-
/** Returns the current captured output buffer bindings. */
const MVKShaderImplicitRezBinding& getOutputBufferIndex() { return _outputBufferIndex; }
@@ -281,7 +282,6 @@
float _blendConstants[4] = { 0.0, 0.0, 0.0, 1.0 };
uint32_t _outputControlPointCount;
- MVKShaderImplicitRezBinding _indirectParamsIndex;
MVKShaderImplicitRezBinding _outputBufferIndex;
uint32_t _tessCtlPatchOutputBufferIndex = 0;
uint32_t _tessCtlLevelBufferIndex = 0;
@@ -317,6 +317,9 @@
/** Binds this pipeline to the specified command encoder. */
void encode(MVKCommandEncoder* cmdEncoder, uint32_t = 0) override;
+ /** Returns if this pipeline allows non-zero dispatch bases in vkCmdDispatchBase(). */
+ bool allowsDispatchBase() { return _allowsDispatchBase; }
+
/** Constructs an instance for the device and parent (which may be NULL). */
MVKComputePipeline(MVKDevice* device,
MVKPipelineCache* pipelineCache,
@@ -332,6 +335,8 @@
MTLSize _mtlThreadgroupSize;
bool _needsSwizzleBuffer = false;
bool _needsBufferSizeBuffer = false;
+ bool _needsDispatchBaseBuffer = false;
+ bool _allowsDispatchBase = false;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index c21340d..265d922 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -1294,6 +1294,8 @@
const VkComputePipelineCreateInfo* pCreateInfo) :
MVKPipeline(device, pipelineCache, (MVKPipelineLayout*)pCreateInfo->layout, parent) {
+ _allowsDispatchBase = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_DISPATCH_BASE); // sic; drafters forgot the 'BIT' suffix
+
MVKMTLFunction func = getMTLFunction(pCreateInfo);
_mtlThreadgroupSize = func.threadGroupSize;
_mtlPipelineState = nil;
@@ -1324,6 +1326,9 @@
if (_needsBufferSizeBuffer && _bufferSizeBufferIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires buffer size buffer, but there is no free slot to pass it."));
}
+ if (_needsDispatchBaseBuffer && _indirectParamsIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
+ setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires dispatch base buffer, but there is no free slot to pass it."));
+ }
}
// Returns a MTLFunction to use when creating the MTLComputePipelineState.
@@ -1339,6 +1344,7 @@
shaderContext.options.mslOptions.texel_buffer_texture_width = _device->_pMetalFeatures->maxTextureDimension;
shaderContext.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle;
shaderContext.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers;
+ shaderContext.options.mslOptions.dispatch_base = _allowsDispatchBase;
MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout;
layout->populateShaderConverterContext(shaderContext);
@@ -1346,12 +1352,14 @@
_bufferSizeBufferIndex = layout->getBufferSizeBufferIndex();
shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageCompute];
shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageCompute];
+ shaderContext.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageCompute];
MVKMTLFunction func = ((MVKShaderModule*)pSS->module)->getMTLFunction(&shaderContext, pSS->pSpecializationInfo, _pipelineCache);
auto& funcRslts = func.shaderConversionResults;
_needsSwizzleBuffer = funcRslts.needsSwizzleBuffer;
_needsBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+ _needsDispatchBaseBuffer = funcRslts.needsDispatchBaseBuffer;
return func;
}
@@ -1710,7 +1718,8 @@
scr.needsOutputBuffer,
scr.needsPatchOutputBuffer,
scr.needsBufferSizeBuffer,
- scr.needsInputThreadgroupMem);
+ scr.needsInputThreadgroupMem,
+ scr.needsDispatchBaseBuffer);
}
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
index 802f2ad..24421f4 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
@@ -19,9 +19,9 @@
#pragma once
#include "MVKDevice.h"
+#include "MVKImage.h"
#include "MVKVector.h"
-class MVKSwapchainImage;
class MVKWatermark;
@class MVKBlockObserver;
@@ -29,6 +29,9 @@
#pragma mark MVKSwapchain
+/** Tracks a semaphore and fence for later signaling. */
+typedef std::pair<MVKSemaphore*, MVKFence*> MVKSwapchainSignaler;
+
/** Represents a Vulkan swapchain. */
class MVKSwapchain : public MVKVulkanAPIDeviceObject {
@@ -77,17 +80,27 @@
/** Returns the specified performance stats structure. */
const MVKSwapchainPerformance* getPerformanceStatistics() { return &_performanceStatistics; }
+ /**
+ * Registers a semaphore and/or fence that will be signaled when the image at the given index becomes available.
+ * This function accepts both a semaphore and a fence, and either none, one, or both may be provided.
+ * If this image is available already, the semaphore and fence are immediately signaled.
+ */
+ void signalWhenAvailable(uint32_t imageIndex, MVKSemaphore* semaphore, MVKFence* fence);
+
+ /** Returns the availability status of the image at the given index, relative to other images in the swapchain. */
+ const MVKSwapchainImageAvailability* getAvailability(uint32_t imageIndex);
#pragma mark Metal
/**
- * Returns the next Metal drawable available to provide backing for
- * an image in this swapchain. The returned object is autoreleased.
+ * Returns the Metal drawable providing backing for the image at the given
+ * index in this swapchain. If none is established, the next available
+ * drawable is acquired and returned.
*
* This function may block until the next drawable is available,
* and may return nil if no drawable is available at all.
*/
- id<CAMetalDrawable> getNextCAMetalDrawable();
+ id<CAMetalDrawable> getCAMetalDrawable(uint32_t imgIdx);
#pragma mark Construction
@@ -99,6 +112,12 @@
protected:
friend class MVKSwapchainImage;
+ struct Availability {
+ MVKSwapchainImageAvailability status;
+ MVKVectorInline<MVKSwapchainSignaler, 4> signalers;
+ MVKSwapchainSignaler preSignaled;
+ };
+
void propogateDebugName() override;
void initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo, uint32_t imgCnt);
void initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo, uint32_t imgCnt);
@@ -108,10 +127,19 @@
void willPresentSurface(id<MTLTexture> mtlTexture, id<MTLCommandBuffer> mtlCmdBuff);
void renderWatermark(id<MTLTexture> mtlTexture, id<MTLCommandBuffer> mtlCmdBuff);
void markFrameInterval();
+ void resetCAMetalDrawable(uint32_t imgIdx);
+ void signal(MVKSwapchainSignaler& signaler);
+ void signalOnDevice(uint32_t imgIdx, id<MTLCommandBuffer> mtlCmdBuff);
+ static void markAsTracked(MVKSwapchainSignaler& signaler);
+ static void unmarkAsTracked(MVKSwapchainSignaler& signaler);
+ void makeAvailable(uint32_t imgIdx);
CAMetalLayer* _mtlLayer;
MVKWatermark* _licenseWatermark;
MVKVectorInline<MVKSwapchainImage*, kMVKMaxSwapchainImageCount> _surfaceImages;
+ MVKVectorInline<id<CAMetalDrawable>, kMVKMaxSwapchainImageCount> _mtlDrawables;
+ MVKVectorInline<Availability, kMVKMaxSwapchainImageCount> _imageAvailability;
+ std::mutex _availabilityLock;
std::atomic<uint64_t> _currentAcquisitionID;
CGSize _mtlLayerOrigDrawSize;
MVKSwapchainPerformance _performanceStatistics;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
index cae47c8..e1e6467 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
@@ -19,6 +19,7 @@
#include "MVKSurface.h"
#include "MVKSwapchain.h"
#include "MVKImage.h"
+#include "MVKQueue.h"
#include "MVKFoundation.h"
#include "MVKOSExtensions.h"
#include "MVKWatermark.h"
@@ -34,6 +35,16 @@
#pragma mark MVKSwapchain
+bool MVKSwapchainImageAvailability_t::operator< (const MVKSwapchainImageAvailability_t& rhs) const {
+ if ( isAvailable && !rhs.isAvailable) { return true; }
+ if ( !isAvailable && rhs.isAvailable) { return false; }
+
+ if (waitCount < rhs.waitCount) { return true; }
+ if (waitCount > rhs.waitCount) { return false; }
+
+ return acquisitionID < rhs.acquisitionID;
+}
+
void MVKSwapchain::propogateDebugName() {
if (_debugName) {
size_t imgCnt = _surfaceImages.size();
@@ -45,7 +56,7 @@
}
}
-uint32_t MVKSwapchain::getImageCount() { return (uint32_t)_surfaceImages.size(); }
+uint32_t MVKSwapchain::getImageCount() { return (uint32_t)_imageAvailability.size(); }
MVKSwapchainImage* MVKSwapchain::getImage(uint32_t index) { return _surfaceImages[index]; }
@@ -85,16 +96,16 @@
MVKSwapchainImageAvailability minAvailability = { .acquisitionID = kMVKUndefinedLargeUInt64,
.waitCount = kMVKUndefinedLargeUInt32,
.isAvailable = false };
- for (MVKSwapchainImage* mvkSCImg : _surfaceImages) {
- const MVKSwapchainImageAvailability* currAvailability = mvkSCImg->getAvailability();
- if (*currAvailability < minAvailability) {
- minAvailability = *currAvailability;
- minWaitIndex = mvkSCImg->getSwapchainIndex();
+ for (uint32_t imgIdx = 0; imgIdx < _imageAvailability.size(); imgIdx++) {
+ const Availability& avail = _imageAvailability[imgIdx];
+ if (avail.status < minAvailability) {
+ minAvailability = avail.status;
+ minWaitIndex = imgIdx;
}
}
*pImageIndex = minWaitIndex; // Return the index of the image with the shortest wait
- _surfaceImages[minWaitIndex]->signalWhenAvailable((MVKSemaphore*)semaphore, (MVKFence*)fence);
+ signalWhenAvailable(minWaitIndex, (MVKSemaphore*)semaphore, (MVKFence*)fence);
return getHasSurfaceSizeChanged() ? VK_ERROR_OUT_OF_DATE_KHR : VK_SUCCESS;
}
@@ -110,6 +121,103 @@
*/
void MVKSwapchain::releaseUndisplayedSurfaces() {}
+// Makes an image available for acquisition by the app.
+// If any semaphores are waiting to be signaled when this image becomes available, the
+// earliest semaphore is signaled, and this image remains unavailable for other uses.
+void MVKSwapchain::makeAvailable(uint32_t imgIdx) {
+ lock_guard<mutex> lock(_availabilityLock);
+ auto& availability = _imageAvailability[imgIdx].status;
+
+ // Mark when this event happened, relative to that of other images
+ availability.acquisitionID = getNextAcquisitionID();
+
+ // Mark this image as available if no semaphores or fences are waiting to be signaled.
+ availability.isAvailable = _imageAvailability[imgIdx].signalers.empty();
+
+ MVKSwapchainSignaler signaler;
+ if (availability.isAvailable) {
+ // If this image is now available, signal the semaphore and fence that were associated
+ // with the last time this image was acquired while available. This is a workaround for
+ // when an app uses a single semaphore or fence for more than one swapchain image.
+ // Becuase the semaphore or fence will be signaled by more than one image, it will
+ // get out of sync, and the final use of the image would not be signaled as a result.
+
+ signaler = _imageAvailability[imgIdx].preSignaled;
+ } else {
+ // If this image is not yet available, extract and signal the first semaphore and fence.
+
+ signaler = _imageAvailability[imgIdx].signalers.front();
+ _imageAvailability[imgIdx].signalers.erase( _imageAvailability[imgIdx].signalers.begin() );
+ }
+
+ // Signal the semaphore and fence, and let them know they are no longer being tracked.
+ signal(signaler);
+ unmarkAsTracked(signaler);
+
+// MVKLogDebug("Signaling%s swapchain image %p semaphore %p from present, with %lu remaining semaphores.", (_availability.isAvailable ? " pre-signaled" : ""), this, signaler.first, _availabilitySignalers.size());
+}
+
+void MVKSwapchain::signalWhenAvailable(uint32_t imageIndex, MVKSemaphore* semaphore, MVKFence* fence) {
+ lock_guard<mutex> lock(_availabilityLock);
+ auto signaler = make_pair(semaphore, fence);
+ auto& availability = _imageAvailability[imageIndex].status;
+ if (availability.isAvailable) {
+ availability.isAvailable = false;
+ signal(signaler);
+ if (_device->_useMTLEventsForSemaphores) {
+ // Unfortunately, we can't assume we have an MTLSharedEvent here.
+ // This means we need to execute a command on the device to signal
+ // the semaphore. Alternatively, we could always use an MTLSharedEvent,
+ // but that might impose unacceptable performance costs just to handle
+ // this one case.
+ MVKQueue* queue = _device->getQueue(0, 0);
+ id<MTLCommandQueue> mtlQ = queue->getMTLCommandQueue();
+ id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
+ [mtlCmdBuff enqueue];
+ signaler.first->encodeSignal(mtlCmdBuff);
+ [mtlCmdBuff commit];
+ }
+ _imageAvailability[imageIndex].preSignaled = signaler;
+ } else {
+ _imageAvailability[imageIndex].signalers.push_back(signaler);
+ }
+ markAsTracked(signaler);
+
+// MVKLogDebug("%s swapchain image %p semaphore %p in acquire with %lu other semaphores.", (_availability.isAvailable ? "Signaling" : "Tracking"), this, semaphore, _availabilitySignalers.size());
+}
+
+// Signal either or both of the semaphore and fence in the specified tracker pair.
+void MVKSwapchain::signal(MVKSwapchainSignaler& signaler) {
+ if (signaler.first && !_device->_useMTLEventsForSemaphores) { signaler.first->signal(); }
+ if (signaler.second) { signaler.second->signal(); }
+}
+
+// If present, signal the semaphore for the first waiter for the given image.
+void MVKSwapchain::signalOnDevice(uint32_t imgIdx, id<MTLCommandBuffer> mtlCmdBuff) {
+ lock_guard<mutex> lock(_availabilityLock);
+ MVKSemaphore* mvkSem = _imageAvailability[imgIdx].signalers.front().first;
+ if (mvkSem) { mvkSem->encodeSignal(mtlCmdBuff); }
+}
+
+// Tell the semaphore and fence that they are being tracked for future signaling.
+void MVKSwapchain::markAsTracked(MVKSwapchainSignaler& signaler) {
+ if (signaler.first) { signaler.first->retain(); }
+ if (signaler.second) { signaler.second->retain(); }
+}
+
+// Tell the semaphore and fence that they are no longer being tracked for future signaling.
+void MVKSwapchain::unmarkAsTracked(MVKSwapchainSignaler& signaler) {
+ if (signaler.first) { signaler.first->release(); }
+ if (signaler.second) { signaler.second->release(); }
+}
+
+const MVKSwapchainImageAvailability* MVKSwapchain::getAvailability(uint32_t imageIndex) {
+ lock_guard<mutex> lock(_availabilityLock);
+ auto& availability = _imageAvailability[imageIndex].status;
+ availability.waitCount = (uint32_t)_imageAvailability[imageIndex].signalers.size();
+ return &availability;
+}
+
#pragma mark Rendering
@@ -176,12 +284,22 @@
#pragma mark Metal
-id<CAMetalDrawable> MVKSwapchain::getNextCAMetalDrawable() {
- id<CAMetalDrawable> nextDrwbl = nil;
- while ( !(nextDrwbl = [_mtlLayer nextDrawable]) ) {
- MVKLogError("Drawable could not be retrieved! Elapsed time: %.6f ms.", mvkGetElapsedMilliseconds());
+id<CAMetalDrawable> MVKSwapchain::getCAMetalDrawable(uint32_t imageIndex) {
+ if ( _mtlDrawables[imageIndex] ) { return _mtlDrawables[imageIndex]; }
+ @autoreleasepool { // Allow auto-released drawable object to be reclaimed before end of loop
+ id<CAMetalDrawable> nextDrwbl = nil;
+ while ( !(nextDrwbl = [_mtlLayer nextDrawable]) ) {
+ MVKLogError("Drawable could not be retrieved! Elapsed time: %.6f ms.", mvkGetElapsedMilliseconds());
+ }
+ _mtlDrawables[imageIndex] = [nextDrwbl retain];
}
- return nextDrwbl;
+ return _mtlDrawables[imageIndex];
+}
+
+// Removes and releases a Metal drawable object, so that it can be lazily created by getCAMetalDrawable().
+void MVKSwapchain::resetCAMetalDrawable(uint32_t imgIdx) {
+ [_mtlDrawables[imgIdx] release];
+ _mtlDrawables[imgIdx] = nil;
}
@@ -304,10 +422,20 @@
if (mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR)) {
mvkEnableFlag(imgInfo.flags, VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT);
}
+ if (mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_SWAPCHAIN_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT_KHR)) {
+ // We don't really support this, but set the flag anyway.
+ mvkEnableFlag(imgInfo.flags, VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT);
+ }
_surfaceImages.reserve(imgCnt);
+ _mtlDrawables.resize(imgCnt);
+ _imageAvailability.resize(imgCnt);
for (uint32_t imgIdx = 0; imgIdx < imgCnt; imgIdx++) {
_surfaceImages.push_back(_device->createSwapchainImage(&imgInfo, this, imgIdx, NULL));
+ _imageAvailability[imgIdx].status.acquisitionID = getNextAcquisitionID();
+ _imageAvailability[imgIdx].status.isAvailable = true;
+ _imageAvailability[imgIdx].preSignaled = make_pair(nullptr, nullptr);
+ _mtlDrawables[imgIdx] = nil;
}
MVKLogInfo("Created %d swapchain images with initial size (%d, %d).", imgCnt, imgExtent.width, imgExtent.height);
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
index 44589cb..a7edcb7 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
@@ -35,6 +35,7 @@
MVK_EXTENSION(KHR_bind_memory2, KHR_BIND_MEMORY_2)
MVK_EXTENSION(KHR_dedicated_allocation, KHR_DEDICATED_ALLOCATION)
MVK_EXTENSION(KHR_descriptor_update_template, KHR_DESCRIPTOR_UPDATE_TEMPLATE)
+MVK_EXTENSION(KHR_device_group, KHR_DEVICE_GROUP)
MVK_EXTENSION(KHR_device_group_creation, KHR_DEVICE_GROUP_CREATION)
MVK_EXTENSION(KHR_get_memory_requirements2, KHR_GET_MEMORY_REQUIREMENTS_2)
MVK_EXTENSION(KHR_get_physical_device_properties2, KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2)
diff --git a/MoltenVK/MoltenVK/Utility/MVKVector.h b/MoltenVK/MoltenVK/Utility/MVKVector.h
index e7843a8..4d4adee 100755
--- a/MoltenVK/MoltenVK/Utility/MVKVector.h
+++ b/MoltenVK/MoltenVK/Utility/MVKVector.h
@@ -191,7 +191,7 @@
virtual void reset() = 0;
virtual void reserve( const size_t new_size ) = 0;
virtual void assign( const size_t new_size, const Type *t ) = 0;
- virtual void resize( const size_t new_size, const Type *t ) = 0;
+ virtual void resize( const size_t new_size, const Type *t = nullptr ) = 0;
virtual void shrink_to_fit() = 0;
virtual void push_back( const Type *t ) = 0;
};
@@ -880,7 +880,7 @@
alc.num_elements_used = new_size;
}
- void resize( const size_t new_size, const Type *t ) override
+ void resize( const size_t new_size, const Type *t = nullptr ) override
{
if ( new_size == alc.num_elements_used )
{
diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
index 1604d24..e44f368 100644
--- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm
+++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
@@ -1969,6 +1969,48 @@
#pragma mark -
+#pragma mark VK_KHR_device_group extension
+
+MVK_PUBLIC_SYMBOL void vkGetDeviceGroupPeerMemoryFeaturesKHR(
+ VkDevice device,
+ uint32_t heapIndex,
+ uint32_t localDeviceIndex,
+ uint32_t remoteDeviceIndex,
+ VkPeerMemoryFeatureFlagsKHR* pPeerMemoryFeatures) {
+
+ MVKTraceVulkanCallStart();
+ MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
+ mvkDev->getPeerMemoryFeatures(heapIndex, localDeviceIndex, remoteDeviceIndex, pPeerMemoryFeatures);
+ MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_SYMBOL void vkCmdSetDeviceMaskKHR(
+ VkCommandBuffer commandBuffer,
+ uint32_t deviceMask) {
+
+ MVKTraceVulkanCallStart();
+ MVKCommandBuffer* cmdBuff = MVKCommandBuffer::getMVKCommandBuffer(commandBuffer);
+ mvkCmdSetDeviceMask(cmdBuff, deviceMask);
+ MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_SYMBOL void vkCmdDispatchBaseKHR(
+ VkCommandBuffer commandBuffer,
+ uint32_t baseGroupX,
+ uint32_t baseGroupY,
+ uint32_t baseGroupZ,
+ uint32_t groupCountX,
+ uint32_t groupCountY,
+ uint32_t groupCountZ) {
+
+ MVKTraceVulkanCallStart();
+ MVKCommandBuffer* cmdBuff = MVKCommandBuffer::getMVKCommandBuffer(commandBuffer);
+ mvkCmdDispatchBase(cmdBuff, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
+ MVKTraceVulkanCallEnd();
+}
+
+
+#pragma mark -
#pragma mark VK_KHR_device_group_creation extension
MVK_PUBLIC_SYMBOL VkResult vkEnumeratePhysicalDeviceGroupsKHR(
diff --git a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp
index 885bb79..e331486 100644
--- a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp
+++ b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp
@@ -308,6 +308,7 @@
_shaderConversionResults.needsPatchOutputBuffer = pMSLCompiler && pMSLCompiler->needs_patch_output_buffer();
_shaderConversionResults.needsBufferSizeBuffer = pMSLCompiler && pMSLCompiler->needs_buffer_size_buffer();
_shaderConversionResults.needsInputThreadgroupMem = pMSLCompiler && pMSLCompiler->needs_input_threadgroup_mem();
+ _shaderConversionResults.needsDispatchBaseBuffer = pMSLCompiler && pMSLCompiler->needs_dispatch_base_buffer();
if (context.stageSupportsVertexAttributes()) {
for (auto& ctxVA : context.vertexAttributes) {
diff --git a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h
index 82024cd..0b7f463 100644
--- a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h
+++ b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h
@@ -204,6 +204,7 @@
bool needsPatchOutputBuffer = false;
bool needsBufferSizeBuffer = false;
bool needsInputThreadgroupMem = false;
+ bool needsDispatchBaseBuffer = false;
void reset() { *this = SPIRVToMSLConversionResults(); }