Support the VK_KHR_device_group extension.

Largely minimal for now. Much of it, particularly most of the
interactions with `VK_KHR_swapchain`, was already implemented
previously. The only interesting bits are the `vkCmdDispatchBase()`
command, and the ability to create arbitrary swapchain images and bind
them to swapchain memory, which requires the use of the previously
implemented `VK_KHR_bind_memory2` extension. Most everything else can be
safely ignored for now.

Non-zero dispatch bases use the compute stage-input region to pass the
dispatch base group to the shader, which must manually adjust the
`WorkgroupId` and `GlobalInvocationId` builtins, since Metal does not do
this for us. I have tested that this approach works well--at least, well
enough to pass the CTS.

Because of the ability to bind arbitrary images to swapchain memory,
I've sucked the guts out of `MVKSwapchainImage` and into `MVKSwapchain`
itself. Availability and drawable management is now performed by the
swapchain object. `MVKSwapchainImage` is now just a specialized kind of
image, created when requested with a `VkImageCreateSwapchainInfoKHR`
structure.

Update SPIRV-Cross so we can support the `vkCmdDispatchBase()` command.

One more step towards Vulkan 1.1.
diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md
index e4cf172..e735323 100644
--- a/Docs/MoltenVK_Runtime_UserGuide.md
+++ b/Docs/MoltenVK_Runtime_UserGuide.md
@@ -231,6 +231,8 @@
 - `VK_KHR_bind_memory2`
 - `VK_KHR_dedicated_allocation`
 - `VK_KHR_descriptor_update_template`
+- `VK_KHR_device_group`
+- `VK_KHR_device_group_creation`
 - `VK_KHR_get_memory_requirements2`
 - `VK_KHR_get_physical_device_properties2`
 - `VK_KHR_get_surface_capabilities2`
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 9a5e72f..c3fe0f6 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -18,12 +18,16 @@
 
 Released TBD
 
+- Add support for extensions:
+	- `VK_KHR_device_group`
 - Add support for `VkEvent`, using either native `MTLEvent` or emulation when `MTLEvent` not available.
 - Revert to supporting host-coherent memory for linear images on macOS.
 - Ensure Vulkan loader magic number is set every time before returning any dispatchable Vulkan handle.
 - Fix crash when `VkDeviceCreateInfo` specifies queue families out of numerical order.
 - Consolidate the various linkable objects into a `MVKLinkableMixin` template base class.
 - Use `MVKVector` whenever possible in MoltenVK, especially within render loop.
+- No longer prefer dedicated allocations for buffer memory, including buffer-backed images.
+- Handle the `compositeAlpha` member of `VkSwapchainCreateInfoKHR`.
 
 
 MoltenVK 1.0.36
diff --git a/ExternalRevisions/SPIRV-Cross_repo_revision b/ExternalRevisions/SPIRV-Cross_repo_revision
index 337aa79..fc4d9eb 100644
--- a/ExternalRevisions/SPIRV-Cross_repo_revision
+++ b/ExternalRevisions/SPIRV-Cross_repo_revision
@@ -1 +1 @@
-07bb1a53e0cb86cfb9b116623493df974ad9ccee
+4ce04480ec5469fe7ebbdd66c3016090a704d81b
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.h b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.h
index 036605c..d6b45fa 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.h
@@ -31,14 +31,15 @@
 class MVKCmdDispatch : public MVKCommand {
 
 public:
-	void setContent(uint32_t x, uint32_t y, uint32_t z);
+    void setContent(uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
+                    uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
 
     void encode(MVKCommandEncoder* cmdEncoder) override;
 
     MVKCmdDispatch(MVKCommandTypePool<MVKCmdDispatch>* pool);
 
 protected:
-    MTLSize  _mtlThreadgroupCount;
+    MTLRegion  _mtlThreadgroupCount;
 };
 
 
@@ -70,3 +71,6 @@
 /** Adds an indirect compute threadgroup dispatch command to the specified command buffer. */
 void mvkCmdDispatchIndirect(MVKCommandBuffer* cmdBuff, VkBuffer buffer, VkDeviceSize offset);
 
+/** Adds a compute threadgroup dispatch command to the specified command buffer, with thread IDs starting from the given base. */
+void mvkCmdDispatchBase(MVKCommandBuffer* cmdBuff, uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
+
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm
index d52d36a..f1f4e55 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm
@@ -20,6 +20,7 @@
 #include "MVKCommandBuffer.h"
 #include "MVKCommandPool.h"
 #include "MVKBuffer.h"
+#include "MVKPipeline.h"
 #include "MVKFoundation.h"
 #include "mvk_datatypes.hpp"
 
@@ -27,18 +28,30 @@
 #pragma mark -
 #pragma mark MVKCmdDispatch
 
-void MVKCmdDispatch::setContent(uint32_t x, uint32_t y, uint32_t z) {
-    _mtlThreadgroupCount.width = x;
-    _mtlThreadgroupCount.height = y;
-    _mtlThreadgroupCount.depth = z;
+void MVKCmdDispatch::setContent(uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
+                                uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) {
+    _mtlThreadgroupCount = MTLRegionMake3D(baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
 }
 
 void MVKCmdDispatch::encode(MVKCommandEncoder* cmdEncoder) {
 //    MVKLogDebug("vkCmdDispatch() dispatching (%d, %d, %d) threadgroups.", _x, _y, _z);
 
 	cmdEncoder->finalizeDispatchState();	// Ensure all updated state has been submitted to Metal
-    [cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch) dispatchThreadgroups: _mtlThreadgroupCount
-															 threadsPerThreadgroup: cmdEncoder->_mtlThreadgroupSize];
+	id<MTLComputeCommandEncoder> mtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch);
+	auto* pipeline = (MVKComputePipeline*)cmdEncoder->_computePipelineState.getPipeline();
+	if (pipeline->allowsDispatchBase()) {
+		if ([mtlEncoder respondsToSelector: @selector(setStageInRegion:)]) {
+			// We'll use the stage-input region to pass the base along to the shader.
+			// Hopefully Metal won't complain that we didn't set up a stage-input descriptor.
+			[mtlEncoder setStageInRegion: _mtlThreadgroupCount];
+		} else {
+			// We have to pass the base group in a buffer.
+			unsigned int base[3] = {(uint32_t)_mtlThreadgroupCount.origin.x, (uint32_t)_mtlThreadgroupCount.origin.y, (uint32_t)_mtlThreadgroupCount.origin.z};
+			cmdEncoder->setComputeBytes(mtlEncoder, base, sizeof(base), pipeline->getIndirectParamsIndex().stages[kMVKShaderStageCompute]);
+		}
+	}
+	[mtlEncoder dispatchThreadgroups: _mtlThreadgroupCount.size
+			   threadsPerThreadgroup: cmdEncoder->_mtlThreadgroupSize];
 }
 
 MVKCmdDispatch::MVKCmdDispatch(MVKCommandTypePool<MVKCmdDispatch>* pool)
@@ -75,7 +88,7 @@
 
 void mvkCmdDispatch(MVKCommandBuffer* cmdBuff, uint32_t x, uint32_t y, uint32_t z) {
 	MVKCmdDispatch* cmd = cmdBuff->_commandPool->_cmdDispatchPool.acquireObject();
-	cmd->setContent(x, y, z);
+	cmd->setContent(0, 0, 0, x, y, z);
 	cmdBuff->addCommand(cmd);
 }
 
@@ -85,4 +98,11 @@
 	cmdBuff->addCommand(cmd);
 }
 
+void mvkCmdDispatchBase(MVKCommandBuffer* cmdBuff, uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
+						uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) {
+	MVKCmdDispatch* cmd = cmdBuff->_commandPool->_cmdDispatchPool.acquireObject();
+	cmd->setContent(baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
+	cmdBuff->addCommand(cmd);
+}
+
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
index 1c543aa..3812739 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h
@@ -313,3 +313,6 @@
 					  const VkBufferMemoryBarrier* pBufferMemoryBarriers,
 					  uint32_t imageMemoryBarrierCount,
 					  const VkImageMemoryBarrier* pImageMemoryBarriers);
+
+/** Indicates that following commands are to be recorded only for the devices in the given device mask. */
+void mvkCmdSetDeviceMask(MVKCommandBuffer* cmdBuff, uint32_t deviceMask);
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
index 7cd6e09..afab76a 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
@@ -503,5 +503,8 @@
 					bufferMemoryBarrierCount, pBufferMemoryBarriers,
 					imageMemoryBarrierCount, pImageMemoryBarriers);
 	cmdBuff->addCommand(cmd);
+}
 
+void mvkCmdSetDeviceMask(MVKCommandBuffer* cmdBuff, uint32_t deviceMask) {
+	// No-op for now...
 }
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 17bdb02..5f267df 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -402,6 +402,9 @@
 	/** Populates the device group surface presentation modes. */
 	VkResult getDeviceGroupSurfacePresentModes(MVKSurface* surface, VkDeviceGroupPresentModeFlagsKHR* pModes);
 
+	/** Populates the device group peer memory features. */
+	void getPeerMemoryFeatures(uint32_t heapIndex, uint32_t localDevice, uint32_t remoteDevice, VkPeerMemoryFeatureFlags* pPeerMemoryFeatures);
+
 
 #pragma mark Object lifecycle
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index d3d66a8..2f73064 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -242,7 +242,8 @@
 	if ( !pImageFormatProperties ) { return VK_SUCCESS; }
 
 	// Metal does not support creating uncompressed views of compressed formats.
-	if (mvkIsAnyFlagEnabled(flags, VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT)) {
+	// Metal does not support split-instance images.
+	if (mvkIsAnyFlagEnabled(flags, VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT | VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT)) {
 		return VK_ERROR_FORMAT_NOT_SUPPORTED;
 	}
 
@@ -1780,6 +1781,10 @@
 	return VK_SUCCESS;
 }
 
+void MVKDevice::getPeerMemoryFeatures(uint32_t heapIndex, uint32_t localDevice, uint32_t remoteDevice, VkPeerMemoryFeatureFlags* pPeerMemoryFeatures) {
+	*pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | VK_PEER_MEMORY_FEATURE_COPY_DST_BIT;
+}
+
 
 #pragma mark Object lifecycle
 
@@ -1837,6 +1842,20 @@
 
 MVKImage* MVKDevice::createImage(const VkImageCreateInfo* pCreateInfo,
 								 const VkAllocationCallbacks* pAllocator) {
+	// If there's a VkImageSwapchainCreateInfoKHR, then we need to create a swapchain image.
+	const VkImageSwapchainCreateInfoKHR* swapchainInfo = nullptr;
+	for (const auto* next = (const VkBaseInStructure*)pCreateInfo->pNext; next; next = next->pNext) {
+		switch (next->sType) {
+		case VK_STRUCTURE_TYPE_IMAGE_SWAPCHAIN_CREATE_INFO_KHR:
+			swapchainInfo = (const VkImageSwapchainCreateInfoKHR*)next;
+			break;
+		default:
+			break;
+		}
+	}
+	if (swapchainInfo) {
+		return (MVKImage*)addResource(new MVKSwapchainImage(this, pCreateInfo, (MVKSwapchain*)swapchainInfo->swapchain));
+	}
 	return (MVKImage*)addResource(new MVKImage(this, pCreateInfo));
 }
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
index 68fdd23..0358457 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h
@@ -393,9 +393,6 @@
 #pragma mark -
 #pragma mark MVKSwapchainImage
 
-/** Tracks a semaphore and fence for later signaling. */
-typedef std::pair<MVKSemaphore*, MVKFence*> MVKSwapchainSignaler;
-
 /** Indicates the relative availability of each image in the swapchain. */
 typedef struct MVKSwapchainImageAvailability_t {
 	uint64_t acquisitionID;			/**< When this image was last made available, relative to the other images in the swapchain. Smaller value is earlier. */
@@ -405,11 +402,18 @@
 	bool operator< (const MVKSwapchainImageAvailability_t& rhs) const;
 } MVKSwapchainImageAvailability;
 
+
 /** Represents a Vulkan image used as a rendering destination within a swapchain. */
 class MVKSwapchainImage : public MVKImage {
 
 public:
 
+	/** Binds this resource to the specified offset within the specified memory allocation. */
+	VkResult bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOffset) override;
+
+	/** Binds this resource according to the specified bind information. */
+	VkResult bindDeviceMemory2(const void* pBindInfo) override;
+
 	/** Returns the encompassing swapchain. */
 	inline MVKSwapchain* getSwapchain() { return _swapchain; }
 
@@ -431,7 +435,7 @@
 
 	/**
 	 * Presents the contained drawable to the OS, releases the Metal drawable and its 
-	 * texture back to the Metal layer's pool, and makes this image available for new use.
+	 * texture back to the Metal layer's pool, and makes the image memory available for new use.
 	 *
 	 * If mtlCmdBuff is not nil, the contained drawable is scheduled for presentation using
 	 * the presentDrawable: method of the command buffer. If mtlCmdBuff is nil, the contained
@@ -448,25 +452,18 @@
 					  MVKSwapchain* swapchain,
 					  uint32_t swapchainIndex);
 
-	~MVKSwapchainImage() override;
+	/** Constructs an instance for the specified device and swapchain, without binding to a particular swapchain image index. */
+	MVKSwapchainImage(MVKDevice* device,
+					  const VkImageCreateInfo* pCreateInfo,
+					  MVKSwapchain* swapchain);
 
 protected:
 	id<MTLTexture> newMTLTexture() override;
 	id<CAMetalDrawable> getCAMetalDrawable();
-	void resetCAMetalDrawable();
     void resetMetalSurface();
-	void signal(MVKSwapchainSignaler& signaler);
-	void markAsTracked(MVKSwapchainSignaler& signaler);
-	void unmarkAsTracked(MVKSwapchainSignaler& signaler);
-	void makeAvailable();
     void renderWatermark(id<MTLCommandBuffer> mtlCmdBuff);
 
 	MVKSwapchain* _swapchain;
 	uint32_t _swapchainIndex;
-	id<CAMetalDrawable> _mtlDrawable;
-	std::mutex _availabilityLock;
-	MVKVectorInline<MVKSwapchainSignaler, 4> _availabilitySignalers;
-	MVKSwapchainSignaler _preSignaled;
-	MVKSwapchainImageAvailability _availability;
 };
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
index 9cf0cce..2a20417 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm
@@ -662,6 +662,9 @@
 	if (mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT)) {
 		setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Metal does not allow uncompressed views of compressed images."));
 	}
+	if (mvkIsAnyFlagEnabled(pCreateInfo->flags, VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT)) {
+		setConfigurationResult(reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCreateImage() : Metal does not support split-instance memory binding."));
+	}
 }
 
 VkSampleCountFlagBits MVKImage::validateSamples(const VkImageCreateInfo* pCreateInfo, bool isAttachment) {
@@ -1232,101 +1235,36 @@
 #pragma mark -
 #pragma mark MVKSwapchainImage
 
-bool MVKSwapchainImageAvailability_t::operator< (const MVKSwapchainImageAvailability_t& rhs) const {
-	if (  isAvailable && !rhs.isAvailable) { return true; }
-	if ( !isAvailable &&  rhs.isAvailable) { return false; }
-
-	if (waitCount < rhs.waitCount) { return true; }
-	if (waitCount > rhs.waitCount) { return false; }
-
-	return acquisitionID < rhs.acquisitionID;
+VkResult MVKSwapchainImage::bindDeviceMemory(MVKDeviceMemory*, VkDeviceSize) {
+	return VK_ERROR_OUT_OF_DEVICE_MEMORY;
 }
 
-// Makes this image available for acquisition by the app.
-// If any semaphores are waiting to be signaled when this image becomes available, the
-// earliest semaphore is signaled, and this image remains unavailable for other uses.
-void MVKSwapchainImage::makeAvailable() {
-	lock_guard<mutex> lock(_availabilityLock);
-
-	// Mark when this event happened, relative to that of other images
-	_availability.acquisitionID = _swapchain->getNextAcquisitionID();
-
-	// Mark this image as available if no semaphores or fences are waiting to be signaled.
-	_availability.isAvailable = _availabilitySignalers.empty();
-
-	MVKSwapchainSignaler signaler;
-	if (_availability.isAvailable) {
-		// If this image is now available, signal the semaphore and fence that were associated
-		// with the last time this image was acquired while available. This is a workaround for
-		// when an app uses a single semaphore or fence for more than one swapchain image.
-		// Becuase the semaphore or fence will be signaled by more than one image, it will
-		// get out of sync, and the final use of the image would not be signaled as a result.
-
-		signaler = _preSignaled;
-	} else {
-		// If this image is not yet available, extract and signal the first semaphore and fence.
-
-		signaler = _availabilitySignalers.front();
-		_availabilitySignalers.erase( _availabilitySignalers.begin() );
+VkResult MVKSwapchainImage::bindDeviceMemory2(const void* pBindInfo) {
+	const auto* imageInfo = (const VkBindImageMemoryInfo*)pBindInfo;
+	const VkBindImageMemorySwapchainInfoKHR* swapchainInfo = nullptr;
+	for (const auto* next = (const VkBaseInStructure*)imageInfo->pNext; next; next = next->pNext) {
+		switch (next->sType) {
+		case VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR:
+			swapchainInfo = (const VkBindImageMemorySwapchainInfoKHR*)next;
+			break;
+		default:
+			break;
+		}
+		if (swapchainInfo) { break; }
 	}
-
-	// Signal the semaphore and fence, and let them know they are no longer being tracked.
-	signal(signaler);
-	unmarkAsTracked(signaler);
-
-//	MVKLogDebug("Signaling%s swapchain image %p semaphore %p from present, with %lu remaining semaphores.", (_availability.isAvailable ? " pre-signaled" : ""), this, signaler.first, _availabilitySignalers.size());
+	if (!swapchainInfo) {
+		return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+	}
+	_swapchainIndex = swapchainInfo->imageIndex;
+	return VK_SUCCESS;
 }
 
 void MVKSwapchainImage::signalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) {
-	lock_guard<mutex> lock(_availabilityLock);
-	auto signaler = make_pair(semaphore, fence);
-	if (_availability.isAvailable) {
-		_availability.isAvailable = false;
-		signal(signaler);
-		if (_device->_useMTLEventsForSemaphores) {
-			// Unfortunately, we can't assume we have an MTLSharedEvent here.
-			// This means we need to execute a command on the device to signal
-			// the semaphore. Alternatively, we could always use an MTLSharedEvent,
-			// but that might impose unacceptable performance costs just to handle
-			// this one case.
-			MVKQueue* queue = _device->getQueue(0, 0);	
-			id<MTLCommandQueue> mtlQ = queue->getMTLCommandQueue();
-			id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
-			[mtlCmdBuff enqueue];
-			signaler.first->encodeSignal(mtlCmdBuff);
-			[mtlCmdBuff commit];
-		}
-		_preSignaled = signaler;
-	} else {
-		_availabilitySignalers.push_back(signaler);
-	}
-	markAsTracked(signaler);
-
-//	MVKLogDebug("%s swapchain image %p semaphore %p in acquire with %lu other semaphores.", (_availability.isAvailable ? "Signaling" : "Tracking"), this, semaphore, _availabilitySignalers.size());
-}
-
-// Signal either or both of the semaphore and fence in the specified tracker pair.
-void MVKSwapchainImage::signal(MVKSwapchainSignaler& signaler) {
-	if (signaler.first && !_device->_useMTLEventsForSemaphores) { signaler.first->signal(); }
-	if (signaler.second) { signaler.second->signal(); }
-}
-
-// Tell the semaphore and fence that they are being tracked for future signaling.
-void MVKSwapchainImage::markAsTracked(MVKSwapchainSignaler& signaler) {
-	if (signaler.first) { signaler.first->retain(); }
-	if (signaler.second) { signaler.second->retain(); }
-}
-
-// Tell the semaphore and fence that they are no longer being tracked for future signaling.
-void MVKSwapchainImage::unmarkAsTracked(MVKSwapchainSignaler& signaler) {
-	if (signaler.first) { signaler.first->release(); }
-	if (signaler.second) { signaler.second->release(); }
+	_swapchain->signalWhenAvailable( _swapchainIndex, semaphore, fence );
 }
 
 const MVKSwapchainImageAvailability* MVKSwapchainImage::getAvailability() {
-	lock_guard<mutex> lock(_availabilityLock);
-	_availability.waitCount = (uint32_t)_availabilitySignalers.size();
-	return &_availability;
+	return _swapchain->getAvailability( _swapchainIndex );
 }
 
 
@@ -1339,13 +1277,9 @@
 }
 
 id<CAMetalDrawable> MVKSwapchainImage::getCAMetalDrawable() {
-	if ( !_mtlDrawable ) {
-		@autoreleasepool {		// Allow auto-released drawable object to be reclaimed before end of loop
-			_mtlDrawable = [_swapchain->getNextCAMetalDrawable() retain];	// retained
-		}
-		MVKAssert(_mtlDrawable, "Could not aquire an available CAMetalDrawable from the CAMetalLayer in MVKSwapchain image: %p.", this);
-	}
-	return _mtlDrawable;
+	id<CAMetalDrawable> mtlDrawable = _swapchain->getCAMetalDrawable(_swapchainIndex);
+	MVKAssert(mtlDrawable, "Could not acquire an available CAMetalDrawable from the CAMetalLayer in MVKSwapchain image: %p.", this);
+	return mtlDrawable;
 }
 
 void MVKSwapchainImage::presentCAMetalDrawable(id<MTLCommandBuffer> mtlCmdBuff) {
@@ -1364,33 +1298,26 @@
 		if (scName) { [mtlCmdBuff popDebugGroup]; }
 
 		resetMetalSurface();
-        if (_device->_useMTLEventsForSemaphores && !_availabilitySignalers.empty()) {
-            // Signal the semaphore device-side.
-            _availabilitySignalers.front().first->encodeSignal(mtlCmdBuff);
+        if (_device->_useMTLEventsForSemaphores) {
+            _swapchain->signalOnDevice(_swapchainIndex, mtlCmdBuff);
         }
 
 		retain();	// Ensure this image is not destroyed while awaiting MTLCommandBuffer completion
         [mtlCmdBuff addCompletedHandler: ^(id<MTLCommandBuffer> mcb) {
-			makeAvailable();
+			_swapchain->makeAvailable(_swapchainIndex);
 			release();
 		}];
     } else {
         [mtlDrawable present];
         resetMetalSurface();
-        makeAvailable();
+        _swapchain->makeAvailable(_swapchainIndex);
     }
 }
 
-// Removes and releases the Metal drawable object, so that it can be lazily created by getCAMetalDrawable().
-void MVKSwapchainImage::resetCAMetalDrawable() {
-	[_mtlDrawable release];
-	_mtlDrawable = nil;
-}
-
 // Resets the MTLTexture and CAMetalDrawable underlying this image.
 void MVKSwapchainImage::resetMetalSurface() {
     resetMTLTexture();			// Release texture first so drawable will be last to release it
-    resetCAMetalDrawable();
+    _swapchain->resetCAMetalDrawable(_swapchainIndex);
 }
 
 
@@ -1402,14 +1329,13 @@
 									 uint32_t swapchainIndex) : MVKImage(device, pCreateInfo) {
 	_swapchain = swapchain;
 	_swapchainIndex = swapchainIndex;
-	_availability.acquisitionID = _swapchain->getNextAcquisitionID();
-	_availability.isAvailable = true;
-	_preSignaled = make_pair(nullptr, nullptr);
-	_mtlDrawable = nil;
 }
 
-MVKSwapchainImage::~MVKSwapchainImage() {
-	resetCAMetalDrawable();
+MVKSwapchainImage::MVKSwapchainImage(MVKDevice* device,
+									 const VkImageCreateInfo* pCreateInfo,
+									 MVKSwapchain* swapchain) : MVKImage(device, pCreateInfo) {
+	_swapchain = swapchain;
+	_swapchainIndex = uint32_t(-1);
 }
 
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
index 31558bb..7a32e7a 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
@@ -609,6 +609,9 @@
 	ADD_DVC_EXT_ENTRY_POINT(vkCreateDescriptorUpdateTemplateKHR, KHR_DESCRIPTOR_UPDATE_TEMPLATE);
 	ADD_DVC_EXT_ENTRY_POINT(vkDestroyDescriptorUpdateTemplateKHR, KHR_DESCRIPTOR_UPDATE_TEMPLATE);
 	ADD_DVC_EXT_ENTRY_POINT(vkUpdateDescriptorSetWithTemplateKHR, KHR_DESCRIPTOR_UPDATE_TEMPLATE);
+	ADD_DVC_EXT_ENTRY_POINT(vkGetDeviceGroupPeerMemoryFeaturesKHR, KHR_DEVICE_GROUP);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetDeviceMaskKHR, KHR_DEVICE_GROUP);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdDispatchBaseKHR, KHR_DEVICE_GROUP);
 	ADD_DVC_EXT_ENTRY_POINT(vkGetBufferMemoryRequirements2KHR, KHR_GET_MEMORY_REQUIREMENTS_2);
 	ADD_DVC_EXT_ENTRY_POINT(vkGetImageMemoryRequirements2KHR, KHR_GET_MEMORY_REQUIREMENTS_2);
 	ADD_DVC_EXT_ENTRY_POINT(vkGetImageSparseMemoryRequirements2KHR, KHR_GET_MEMORY_REQUIREMENTS_2);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
index 99bb888..30d48c2 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
@@ -156,6 +156,9 @@
 	/** Returns the current buffer size buffer bindings. */
 	const MVKShaderImplicitRezBinding& getBufferSizeBufferIndex() { return _bufferSizeBufferIndex; }
 
+	/** Returns the current indirect parameter buffer bindings. */
+	const MVKShaderImplicitRezBinding& getIndirectParamsIndex() { return _indirectParamsIndex; }
+
 	/** Returns whether or not full image view swizzling is enabled for this pipeline. */
 	bool fullImageViewSwizzle() const { return _fullImageViewSwizzle; }
 
@@ -171,6 +174,7 @@
 	MVKPipelineCache* _pipelineCache;
 	MVKShaderImplicitRezBinding _swizzleBufferIndex;
 	MVKShaderImplicitRezBinding _bufferSizeBufferIndex;
+	MVKShaderImplicitRezBinding _indirectParamsIndex;
 	MVKShaderResourceBinding _pushConstantsMTLResourceIndexes;
 	bool _fullImageViewSwizzle;
 	bool _hasValidMTLPipelineStates = true;
@@ -204,9 +208,6 @@
     /** Returns the number of output tessellation patch control points. */
     uint32_t getOutputControlPointCount() { return _outputControlPointCount; }
 
-	/** Returns the current indirect parameter buffer bindings. */
-	const MVKShaderImplicitRezBinding& getIndirectParamsIndex() { return _indirectParamsIndex; }
-
 	/** Returns the current captured output buffer bindings. */
 	const MVKShaderImplicitRezBinding& getOutputBufferIndex() { return _outputBufferIndex; }
 
@@ -281,7 +282,6 @@
 
     float _blendConstants[4] = { 0.0, 0.0, 0.0, 1.0 };
     uint32_t _outputControlPointCount;
-	MVKShaderImplicitRezBinding _indirectParamsIndex;
 	MVKShaderImplicitRezBinding _outputBufferIndex;
 	uint32_t _tessCtlPatchOutputBufferIndex = 0;
 	uint32_t _tessCtlLevelBufferIndex = 0;
@@ -317,6 +317,9 @@
 	/** Binds this pipeline to the specified command encoder. */
 	void encode(MVKCommandEncoder* cmdEncoder, uint32_t = 0) override;
 
+	/** Returns if this pipeline allows non-zero dispatch bases in vkCmdDispatchBase(). */
+	bool allowsDispatchBase() { return _allowsDispatchBase; }
+
 	/** Constructs an instance for the device and parent (which may be NULL). */
 	MVKComputePipeline(MVKDevice* device,
 					   MVKPipelineCache* pipelineCache,
@@ -332,6 +335,8 @@
     MTLSize _mtlThreadgroupSize;
     bool _needsSwizzleBuffer = false;
     bool _needsBufferSizeBuffer = false;
+    bool _needsDispatchBaseBuffer = false;
+    bool _allowsDispatchBase = false;
 };
 
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index c21340d..265d922 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -1294,6 +1294,8 @@
 									   const VkComputePipelineCreateInfo* pCreateInfo) :
 	MVKPipeline(device, pipelineCache, (MVKPipelineLayout*)pCreateInfo->layout, parent) {
 
+	_allowsDispatchBase = mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_PIPELINE_CREATE_DISPATCH_BASE);	// sic; drafters forgot the 'BIT' suffix
+
 	MVKMTLFunction func = getMTLFunction(pCreateInfo);
 	_mtlThreadgroupSize = func.threadGroupSize;
 	_mtlPipelineState = nil;
@@ -1324,6 +1326,9 @@
 	if (_needsBufferSizeBuffer && _bufferSizeBufferIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
 		setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires buffer size buffer, but there is no free slot to pass it."));
 	}
+	if (_needsDispatchBaseBuffer && _indirectParamsIndex.stages[kMVKShaderStageCompute] > _device->_pMetalFeatures->maxPerStageBufferCount) {
+		setConfigurationResult(reportError(VK_ERROR_INVALID_SHADER_NV, "Compute shader requires dispatch base buffer, but there is no free slot to pass it."));
+	}
 }
 
 // Returns a MTLFunction to use when creating the MTLComputePipelineState.
@@ -1339,6 +1344,7 @@
     shaderContext.options.mslOptions.texel_buffer_texture_width = _device->_pMetalFeatures->maxTextureDimension;
 	shaderContext.options.mslOptions.swizzle_texture_samples = _fullImageViewSwizzle;
 	shaderContext.options.mslOptions.texture_buffer_native = _device->_pMetalFeatures->textureBuffers;
+	shaderContext.options.mslOptions.dispatch_base = _allowsDispatchBase;
 
     MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout;
     layout->populateShaderConverterContext(shaderContext);
@@ -1346,12 +1352,14 @@
     _bufferSizeBufferIndex = layout->getBufferSizeBufferIndex();
     shaderContext.options.mslOptions.swizzle_buffer_index = _swizzleBufferIndex.stages[kMVKShaderStageCompute];
     shaderContext.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageCompute];
+    shaderContext.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageCompute];
 
     MVKMTLFunction func = ((MVKShaderModule*)pSS->module)->getMTLFunction(&shaderContext, pSS->pSpecializationInfo, _pipelineCache);
 
 	auto& funcRslts = func.shaderConversionResults;
 	_needsSwizzleBuffer = funcRslts.needsSwizzleBuffer;
     _needsBufferSizeBuffer = funcRslts.needsBufferSizeBuffer;
+    _needsDispatchBaseBuffer = funcRslts.needsDispatchBaseBuffer;
 
 	return func;
 }
@@ -1710,7 +1718,8 @@
 				scr.needsOutputBuffer,
 				scr.needsPatchOutputBuffer,
 				scr.needsBufferSizeBuffer,
-				scr.needsInputThreadgroupMem);
+				scr.needsInputThreadgroupMem,
+				scr.needsDispatchBaseBuffer);
 	}
 
 }
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
index 802f2ad..24421f4 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
@@ -19,9 +19,9 @@
 #pragma once
 
 #include "MVKDevice.h"
+#include "MVKImage.h"
 #include "MVKVector.h"
 
-class MVKSwapchainImage;
 class MVKWatermark;
 
 @class MVKBlockObserver;
@@ -29,6 +29,9 @@
 
 #pragma mark MVKSwapchain
 
+/** Tracks a semaphore and fence for later signaling. */
+typedef std::pair<MVKSemaphore*, MVKFence*> MVKSwapchainSignaler;
+
 /** Represents a Vulkan swapchain. */
 class MVKSwapchain : public MVKVulkanAPIDeviceObject {
 
@@ -77,17 +80,27 @@
 	/** Returns the specified performance stats structure. */
 	const MVKSwapchainPerformance* getPerformanceStatistics() { return &_performanceStatistics; }
 
+	/**
+	 * Registers a semaphore and/or fence that will be signaled when the image at the given index becomes available.
+	 * This function accepts both a semaphore and a fence, and either none, one, or both may be provided.
+	 * If this image is available already, the semaphore and fence are immediately signaled.
+	 */
+	void signalWhenAvailable(uint32_t imageIndex, MVKSemaphore* semaphore, MVKFence* fence);
+
+	/** Returns the availability status of the image at the given index, relative to other images in the swapchain. */
+	const MVKSwapchainImageAvailability* getAvailability(uint32_t imageIndex);
 
 #pragma mark Metal
 
 	/** 
-	 * Returns the next Metal drawable available to provide backing for 
-	 * an image in this swapchain. The returned object is autoreleased.
+	 * Returns the Metal drawable providing backing for the image at the given
+	 * index in this swapchain. If none is established, the next available
+	 * drawable is acquired and returned.
 	 *
 	 * This function may block until the next drawable is available, 
 	 * and may return nil if no drawable is available at all.
 	 */
-	id<CAMetalDrawable> getNextCAMetalDrawable();
+	id<CAMetalDrawable> getCAMetalDrawable(uint32_t imgIdx);
 
 
 #pragma mark Construction
@@ -99,6 +112,12 @@
 protected:
 	friend class MVKSwapchainImage;
 
+	struct Availability {
+		MVKSwapchainImageAvailability status;
+		MVKVectorInline<MVKSwapchainSignaler, 4> signalers;
+		MVKSwapchainSignaler preSignaled;
+	};
+
 	void propogateDebugName() override;
 	void initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo, uint32_t imgCnt);
 	void initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo, uint32_t imgCnt);
@@ -108,10 +127,19 @@
     void willPresentSurface(id<MTLTexture> mtlTexture, id<MTLCommandBuffer> mtlCmdBuff);
     void renderWatermark(id<MTLTexture> mtlTexture, id<MTLCommandBuffer> mtlCmdBuff);
     void markFrameInterval();
+	void resetCAMetalDrawable(uint32_t imgIdx);
+	void signal(MVKSwapchainSignaler& signaler);
+	void signalOnDevice(uint32_t imgIdx, id<MTLCommandBuffer> mtlCmdBuff);
+	static void markAsTracked(MVKSwapchainSignaler& signaler);
+	static void unmarkAsTracked(MVKSwapchainSignaler& signaler);
+	void makeAvailable(uint32_t imgIdx);
 
 	CAMetalLayer* _mtlLayer;
     MVKWatermark* _licenseWatermark;
 	MVKVectorInline<MVKSwapchainImage*, kMVKMaxSwapchainImageCount> _surfaceImages;
+	MVKVectorInline<id<CAMetalDrawable>, kMVKMaxSwapchainImageCount> _mtlDrawables;
+	MVKVectorInline<Availability, kMVKMaxSwapchainImageCount> _imageAvailability;
+	std::mutex _availabilityLock;
 	std::atomic<uint64_t> _currentAcquisitionID;
     CGSize _mtlLayerOrigDrawSize;
     MVKSwapchainPerformance _performanceStatistics;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
index cae47c8..e1e6467 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
@@ -19,6 +19,7 @@
 #include "MVKSurface.h"
 #include "MVKSwapchain.h"
 #include "MVKImage.h"
+#include "MVKQueue.h"
 #include "MVKFoundation.h"
 #include "MVKOSExtensions.h"
 #include "MVKWatermark.h"
@@ -34,6 +35,16 @@
 
 #pragma mark MVKSwapchain
 
+bool MVKSwapchainImageAvailability_t::operator< (const MVKSwapchainImageAvailability_t& rhs) const {
+	if (  isAvailable && !rhs.isAvailable) { return true; }
+	if ( !isAvailable &&  rhs.isAvailable) { return false; }
+
+	if (waitCount < rhs.waitCount) { return true; }
+	if (waitCount > rhs.waitCount) { return false; }
+
+	return acquisitionID < rhs.acquisitionID;
+}
+
 void MVKSwapchain::propogateDebugName() {
 	if (_debugName) {
 		size_t imgCnt = _surfaceImages.size();
@@ -45,7 +56,7 @@
 	}
 }
 
-uint32_t MVKSwapchain::getImageCount() { return (uint32_t)_surfaceImages.size(); }
+uint32_t MVKSwapchain::getImageCount() { return (uint32_t)_imageAvailability.size(); }
 
 MVKSwapchainImage* MVKSwapchain::getImage(uint32_t index) { return _surfaceImages[index]; }
 
@@ -85,16 +96,16 @@
     MVKSwapchainImageAvailability minAvailability = { .acquisitionID = kMVKUndefinedLargeUInt64,
 													  .waitCount = kMVKUndefinedLargeUInt32,
 													  .isAvailable = false };
-    for (MVKSwapchainImage* mvkSCImg : _surfaceImages) {
-        const MVKSwapchainImageAvailability* currAvailability = mvkSCImg->getAvailability();
-        if (*currAvailability < minAvailability) {
-            minAvailability = *currAvailability;
-            minWaitIndex = mvkSCImg->getSwapchainIndex();
+    for (uint32_t imgIdx = 0; imgIdx < _imageAvailability.size(); imgIdx++) {
+        const Availability& avail = _imageAvailability[imgIdx];
+        if (avail.status < minAvailability) {
+            minAvailability = avail.status;
+            minWaitIndex = imgIdx;
         }
     }
 
     *pImageIndex = minWaitIndex;	// Return the index of the image with the shortest wait
-    _surfaceImages[minWaitIndex]->signalWhenAvailable((MVKSemaphore*)semaphore, (MVKFence*)fence);
+    signalWhenAvailable(minWaitIndex, (MVKSemaphore*)semaphore, (MVKFence*)fence);
     return getHasSurfaceSizeChanged() ? VK_ERROR_OUT_OF_DATE_KHR : VK_SUCCESS;
 }
 
@@ -110,6 +121,103 @@
  */
 void MVKSwapchain::releaseUndisplayedSurfaces() {}
 
+// Makes an image available for acquisition by the app.
+// If any semaphores are waiting to be signaled when this image becomes available, the
+// earliest semaphore is signaled, and this image remains unavailable for other uses.
+void MVKSwapchain::makeAvailable(uint32_t imgIdx) {
+	lock_guard<mutex> lock(_availabilityLock);
+	auto& availability = _imageAvailability[imgIdx].status;
+
+	// Mark when this event happened, relative to that of other images
+	availability.acquisitionID = getNextAcquisitionID();
+
+	// Mark this image as available if no semaphores or fences are waiting to be signaled.
+	availability.isAvailable = _imageAvailability[imgIdx].signalers.empty();
+
+	MVKSwapchainSignaler signaler;
+	if (availability.isAvailable) {
+		// If this image is now available, signal the semaphore and fence that were associated
+		// with the last time this image was acquired while available. This is a workaround for
+		// when an app uses a single semaphore or fence for more than one swapchain image.
+		// Becuase the semaphore or fence will be signaled by more than one image, it will
+		// get out of sync, and the final use of the image would not be signaled as a result.
+
+		signaler = _imageAvailability[imgIdx].preSignaled;
+	} else {
+		// If this image is not yet available, extract and signal the first semaphore and fence.
+
+		signaler = _imageAvailability[imgIdx].signalers.front();
+		_imageAvailability[imgIdx].signalers.erase( _imageAvailability[imgIdx].signalers.begin() );
+	}
+
+	// Signal the semaphore and fence, and let them know they are no longer being tracked.
+	signal(signaler);
+	unmarkAsTracked(signaler);
+
+//	MVKLogDebug("Signaling%s swapchain image %p semaphore %p from present, with %lu remaining semaphores.", (_availability.isAvailable ? " pre-signaled" : ""), this, signaler.first, _availabilitySignalers.size());
+}
+
+void MVKSwapchain::signalWhenAvailable(uint32_t imageIndex, MVKSemaphore* semaphore, MVKFence* fence) {
+	lock_guard<mutex> lock(_availabilityLock);
+	auto signaler = make_pair(semaphore, fence);
+	auto& availability = _imageAvailability[imageIndex].status; 
+	if (availability.isAvailable) {
+		availability.isAvailable = false;
+		signal(signaler);
+		if (_device->_useMTLEventsForSemaphores) {
+			// Unfortunately, we can't assume we have an MTLSharedEvent here.
+			// This means we need to execute a command on the device to signal
+			// the semaphore. Alternatively, we could always use an MTLSharedEvent,
+			// but that might impose unacceptable performance costs just to handle
+			// this one case.
+			MVKQueue* queue = _device->getQueue(0, 0);	
+			id<MTLCommandQueue> mtlQ = queue->getMTLCommandQueue();
+			id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
+			[mtlCmdBuff enqueue];
+			signaler.first->encodeSignal(mtlCmdBuff);
+			[mtlCmdBuff commit];
+		}
+		_imageAvailability[imageIndex].preSignaled = signaler;
+	} else {
+		_imageAvailability[imageIndex].signalers.push_back(signaler);
+	}
+	markAsTracked(signaler);
+
+//	MVKLogDebug("%s swapchain image %p semaphore %p in acquire with %lu other semaphores.", (_availability.isAvailable ? "Signaling" : "Tracking"), this, semaphore, _availabilitySignalers.size());
+}
+
+// Signal either or both of the semaphore and fence in the specified tracker pair.
+void MVKSwapchain::signal(MVKSwapchainSignaler& signaler) {
+	if (signaler.first && !_device->_useMTLEventsForSemaphores) { signaler.first->signal(); }
+	if (signaler.second) { signaler.second->signal(); }
+}
+
+// If present, signal the semaphore for the first waiter for the given image.
+void MVKSwapchain::signalOnDevice(uint32_t imgIdx, id<MTLCommandBuffer> mtlCmdBuff) {
+	lock_guard<mutex> lock(_availabilityLock);
+	MVKSemaphore* mvkSem = _imageAvailability[imgIdx].signalers.front().first;
+	if (mvkSem) { mvkSem->encodeSignal(mtlCmdBuff); }
+}
+
+// Tell the semaphore and fence that they are being tracked for future signaling.
+void MVKSwapchain::markAsTracked(MVKSwapchainSignaler& signaler) {
+	if (signaler.first) { signaler.first->retain(); }
+	if (signaler.second) { signaler.second->retain(); }
+}
+
+// Tell the semaphore and fence that they are no longer being tracked for future signaling.
+void MVKSwapchain::unmarkAsTracked(MVKSwapchainSignaler& signaler) {
+	if (signaler.first) { signaler.first->release(); }
+	if (signaler.second) { signaler.second->release(); }
+}
+
+const MVKSwapchainImageAvailability* MVKSwapchain::getAvailability(uint32_t imageIndex) {
+	lock_guard<mutex> lock(_availabilityLock);
+	auto& availability = _imageAvailability[imageIndex].status;
+	availability.waitCount = (uint32_t)_imageAvailability[imageIndex].signalers.size();
+	return &availability;
+}
+
 
 #pragma mark Rendering
 
@@ -176,12 +284,22 @@
 
 #pragma mark Metal
 
-id<CAMetalDrawable> MVKSwapchain::getNextCAMetalDrawable() {
-    id<CAMetalDrawable> nextDrwbl = nil;
-    while ( !(nextDrwbl = [_mtlLayer nextDrawable]) ) {
-        MVKLogError("Drawable could not be retrieved! Elapsed time: %.6f ms.", mvkGetElapsedMilliseconds());
+id<CAMetalDrawable> MVKSwapchain::getCAMetalDrawable(uint32_t imageIndex) {
+    if ( _mtlDrawables[imageIndex] ) { return _mtlDrawables[imageIndex]; }
+    @autoreleasepool {      // Allow auto-released drawable object to be reclaimed before end of loop
+        id<CAMetalDrawable> nextDrwbl = nil;
+        while ( !(nextDrwbl = [_mtlLayer nextDrawable]) ) {
+            MVKLogError("Drawable could not be retrieved! Elapsed time: %.6f ms.", mvkGetElapsedMilliseconds());
+        }
+        _mtlDrawables[imageIndex] = [nextDrwbl retain];
     }
-    return nextDrwbl;
+    return _mtlDrawables[imageIndex];
+}
+
+// Removes and releases a Metal drawable object, so that it can be lazily created by getCAMetalDrawable().
+void MVKSwapchain::resetCAMetalDrawable(uint32_t imgIdx) {
+	[_mtlDrawables[imgIdx] release];
+	_mtlDrawables[imgIdx] = nil;
 }
 
 
@@ -304,10 +422,20 @@
 	if (mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR)) {
 		mvkEnableFlag(imgInfo.flags, VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT);
 	}
+	if (mvkAreAllFlagsEnabled(pCreateInfo->flags, VK_SWAPCHAIN_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT_KHR)) {
+		// We don't really support this, but set the flag anyway.
+		mvkEnableFlag(imgInfo.flags, VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT);
+	}
 
 	_surfaceImages.reserve(imgCnt);
+	_mtlDrawables.resize(imgCnt);
+	_imageAvailability.resize(imgCnt);
     for (uint32_t imgIdx = 0; imgIdx < imgCnt; imgIdx++) {
         _surfaceImages.push_back(_device->createSwapchainImage(&imgInfo, this, imgIdx, NULL));
+        _imageAvailability[imgIdx].status.acquisitionID = getNextAcquisitionID();
+        _imageAvailability[imgIdx].status.isAvailable = true;
+        _imageAvailability[imgIdx].preSignaled = make_pair(nullptr, nullptr);
+        _mtlDrawables[imgIdx] = nil;
     }
 
     MVKLogInfo("Created %d swapchain images with initial size (%d, %d).", imgCnt, imgExtent.width, imgExtent.height);
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
index 44589cb..a7edcb7 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
@@ -35,6 +35,7 @@
 MVK_EXTENSION(KHR_bind_memory2, KHR_BIND_MEMORY_2)
 MVK_EXTENSION(KHR_dedicated_allocation, KHR_DEDICATED_ALLOCATION)
 MVK_EXTENSION(KHR_descriptor_update_template, KHR_DESCRIPTOR_UPDATE_TEMPLATE)
+MVK_EXTENSION(KHR_device_group, KHR_DEVICE_GROUP)
 MVK_EXTENSION(KHR_device_group_creation, KHR_DEVICE_GROUP_CREATION)
 MVK_EXTENSION(KHR_get_memory_requirements2, KHR_GET_MEMORY_REQUIREMENTS_2)
 MVK_EXTENSION(KHR_get_physical_device_properties2, KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2)
diff --git a/MoltenVK/MoltenVK/Utility/MVKVector.h b/MoltenVK/MoltenVK/Utility/MVKVector.h
index e7843a8..4d4adee 100755
--- a/MoltenVK/MoltenVK/Utility/MVKVector.h
+++ b/MoltenVK/MoltenVK/Utility/MVKVector.h
@@ -191,7 +191,7 @@
   virtual void                reset()                                        = 0;

   virtual void                reserve( const size_t new_size )               = 0;

   virtual void                assign( const size_t new_size, const Type *t ) = 0;

-  virtual void                resize( const size_t new_size, const Type *t ) = 0;

+  virtual void                resize( const size_t new_size, const Type *t = nullptr ) = 0;

   virtual void                shrink_to_fit()                                = 0;

   virtual void                push_back( const Type *t )                     = 0;

 };

@@ -880,7 +880,7 @@
     alc.num_elements_used = new_size;

   }

 

-  void resize( const size_t new_size, const Type *t ) override

+  void resize( const size_t new_size, const Type *t = nullptr ) override

   {

     if ( new_size == alc.num_elements_used )

     {

diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
index 1604d24..e44f368 100644
--- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm
+++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
@@ -1969,6 +1969,48 @@
 
 
 #pragma mark -
+#pragma mark VK_KHR_device_group extension
+
+MVK_PUBLIC_SYMBOL void vkGetDeviceGroupPeerMemoryFeaturesKHR(
+    VkDevice                                    device,
+    uint32_t                                    heapIndex,
+    uint32_t                                    localDeviceIndex,
+    uint32_t                                    remoteDeviceIndex,
+    VkPeerMemoryFeatureFlagsKHR*                pPeerMemoryFeatures) {
+
+    MVKTraceVulkanCallStart();
+    MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
+    mvkDev->getPeerMemoryFeatures(heapIndex, localDeviceIndex, remoteDeviceIndex, pPeerMemoryFeatures);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_SYMBOL void vkCmdSetDeviceMaskKHR(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    deviceMask) {
+
+    MVKTraceVulkanCallStart();
+    MVKCommandBuffer* cmdBuff = MVKCommandBuffer::getMVKCommandBuffer(commandBuffer);
+    mvkCmdSetDeviceMask(cmdBuff, deviceMask);
+    MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_SYMBOL void vkCmdDispatchBaseKHR(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    baseGroupX,
+    uint32_t                                    baseGroupY,
+    uint32_t                                    baseGroupZ,
+    uint32_t                                    groupCountX,
+    uint32_t                                    groupCountY,
+    uint32_t                                    groupCountZ) {
+	
+	MVKTraceVulkanCallStart();
+    MVKCommandBuffer* cmdBuff = MVKCommandBuffer::getMVKCommandBuffer(commandBuffer);
+	mvkCmdDispatchBase(cmdBuff, baseGroupX, baseGroupY, baseGroupZ, groupCountX, groupCountY, groupCountZ);
+	MVKTraceVulkanCallEnd();
+}
+
+
+#pragma mark -
 #pragma mark VK_KHR_device_group_creation extension
 
 MVK_PUBLIC_SYMBOL VkResult vkEnumeratePhysicalDeviceGroupsKHR(
diff --git a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp
index 885bb79..e331486 100644
--- a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp
+++ b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.cpp
@@ -308,6 +308,7 @@
 	_shaderConversionResults.needsPatchOutputBuffer = pMSLCompiler && pMSLCompiler->needs_patch_output_buffer();
 	_shaderConversionResults.needsBufferSizeBuffer = pMSLCompiler && pMSLCompiler->needs_buffer_size_buffer();
 	_shaderConversionResults.needsInputThreadgroupMem = pMSLCompiler && pMSLCompiler->needs_input_threadgroup_mem();
+	_shaderConversionResults.needsDispatchBaseBuffer = pMSLCompiler && pMSLCompiler->needs_dispatch_base_buffer();
 
 	if (context.stageSupportsVertexAttributes()) {
 		for (auto& ctxVA : context.vertexAttributes) {
diff --git a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h
index 82024cd..0b7f463 100644
--- a/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h
+++ b/MoltenVKShaderConverter/MoltenVKSPIRVToMSLConverter/SPIRVToMSLConverter.h
@@ -204,6 +204,7 @@
 		bool needsPatchOutputBuffer = false;
 		bool needsBufferSizeBuffer = false;
 		bool needsInputThreadgroupMem = false;
+		bool needsDispatchBaseBuffer = false;
 
 		void reset() { *this = SPIRVToMSLConversionResults(); }