vkInvalidateMappedMemoryRanges() synchronizes managed device memory to CPU.

Add MVKDevice::invalidateMappedMemoryRanges().
Add MVKMTLBlitEncoder struct.
MVKDevice::getQueue() defaults to queue family zero and queue zero.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index fd79051..3239b8d 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -21,6 +21,7 @@
 - Add support for extensions:
 	- `VK_KHR_device_group`
 - Add support for `VkEvent`, using either native `MTLEvent` or emulation when `MTLEvent` not available.
+- `vkInvalidateMappedMemoryRanges()` synchronizes managed device memory to CPU.
 - Revert to supporting host-coherent memory for linear images on macOS.
 - Ensure Vulkan loader magic number is set every time before returning any dispatchable Vulkan handle.
 - Fix crash when `VkDeviceCreateInfo` specifies queue families out of numerical order.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 5f267df..b4a7461 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -360,6 +360,11 @@
 #pragma mark -
 #pragma mark MVKDevice
 
+typedef struct {
+	id<MTLBlitCommandEncoder> mtlBlitEncoder = nil;
+	id<MTLCommandBuffer> mtlCmdBuffer = nil;
+} MVKMTLBlitEncoder;
+
 /** Represents a Vulkan logical GPU device, associated with a physical device. */
 class MVKDevice : public MVKDispatchableVulkanAPIObject {
 
@@ -387,7 +392,7 @@
 	PFN_vkVoidFunction getProcAddr(const char* pName);
 
 	/** Retrieves a queue at the specified index within the specified family. */
-	MVKQueue* getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex);
+	MVKQueue* getQueue(uint32_t queueFamilyIndex = 0, uint32_t queueIndex = 0);
 
 	/** Block the current thread until all queues in this device are idle. */
 	VkResult waitIdle();
@@ -528,6 +533,9 @@
 	void freeMemory(MVKDeviceMemory* mvkDevMem,
 					const VkAllocationCallbacks* pAllocator);
 
+
+#pragma mark Operations
+
 	/** Applies the specified global memory barrier to all resource issued by this device. */
 	void applyMemoryBarrier(VkPipelineStageFlags srcStageMask,
 							VkPipelineStageFlags dstStageMask,
@@ -565,6 +573,9 @@
     /** Populates the specified statistics structure from the current activity performance statistics. */
     void getPerformanceStatistics(MVKPerformanceStatistics* pPerf);
 
+	/** Invalidates the memory regions. */
+	VkResult invalidateMappedMemoryRanges(uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges);
+
 
 #pragma mark Metal
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index ff3bf4c..170a6c4 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -2122,6 +2122,9 @@
 	mvkDevMem->destroy();
 }
 
+
+#pragma mark Operations
+
 // Adds the specified resource for tracking, and returns the added resource.
 MVKResource* MVKDevice::addResource(MVKResource* rez) {
 	lock_guard<mutex> lock(_rezLock);
@@ -2197,6 +2200,25 @@
     if (pPerf) { *pPerf = _performanceStatistics; }
 }
 
+VkResult MVKDevice::invalidateMappedMemoryRanges(uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges) {
+	@autoreleasepool {
+		VkResult rslt = VK_SUCCESS;
+		MVKMTLBlitEncoder mvkBlitEnc;
+		for (uint32_t i = 0; i < memRangeCount; i++) {
+			const VkMappedMemoryRange* pMem = &pMemRanges[i];
+			MVKDeviceMemory* mvkMem = (MVKDeviceMemory*)pMem->memory;
+			VkResult r = mvkMem->pullFromDevice(pMem->offset, pMem->size, false, &mvkBlitEnc);
+			if (rslt == VK_SUCCESS) { rslt = r; }
+		}
+		if (mvkBlitEnc.mtlBlitEncoder) { [mvkBlitEnc.mtlBlitEncoder endEncoding]; }
+		if (mvkBlitEnc.mtlCmdBuffer) {
+			[mvkBlitEnc.mtlCmdBuffer commit];
+			[mvkBlitEnc.mtlCmdBuffer waitUntilCompleted];
+		}
+		return rslt;
+	}
+}
+
 
 #pragma mark Metal
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
index 9d468ef..557c51c 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.h
@@ -85,8 +85,18 @@
 	 * If this memory is host-visible, pulls the specified memory range from the device.
 	 * Normally, pulling will only occur if the device memory is non-coherent, but pulling
 	 * to coherent memory can be forced by setting evenIfCoherent to true.
+	 *
+	 * If pBlitEnc is not null, it points to a holder for a MTLBlitCommandEncoder and its
+	 * assocated MTLCommandBuffer. If this instance has a MTLBuffer using managed memory,
+	 * this function may call synchronizeResource: on the MTLBlitCommandEncoder to
+	 * synchronize the GPU contents to the CPU. If the contents of the pBlitEnc do not
+	 * include a MTLBlitCommandEncoder and MTLCommandBuffer, this function will create
+	 * them and populate the contents into the MVKMTLBlitEncoder struct.
 	 */
-	VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size, bool evenIfCoherent = false);
+	VkResult pullFromDevice(VkDeviceSize offset,
+							VkDeviceSize size,
+							bool evenIfCoherent = false,
+							MVKMTLBlitEncoder* pBlitEnc = nullptr);
 
 
 #pragma mark Metal
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
index ee4aedf..5d5dd09 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceMemory.mm
@@ -19,6 +19,7 @@
 #include "MVKDeviceMemory.h"
 #include "MVKBuffer.h"
 #include "MVKImage.h"
+#include "MVKQueue.h"
 #include "MVKEnvironment.h"
 #include "mvk_datatypes.hpp"
 #include "MVKFoundation.h"
@@ -91,12 +92,24 @@
 	return VK_SUCCESS;
 }
 
-VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset, VkDeviceSize size, bool evenIfCoherent) {
+VkResult MVKDeviceMemory::pullFromDevice(VkDeviceSize offset,
+										 VkDeviceSize size,
+										 bool evenIfCoherent,
+										 MVKMTLBlitEncoder* pBlitEnc) {
 	// Coherent memory is flushed on unmap(), so it is only flushed if forced
     VkDeviceSize memSize = adjustMemorySize(size, offset);
 	if (memSize > 0 && isMemoryHostAccessible() && (evenIfCoherent || !isMemoryHostCoherent()) ) {
 		lock_guard<mutex> lock(_rezLock);
         for (auto& img : _images) { img->pullFromDevice(offset, memSize); }
+
+#if MVK_MACOS
+		if (pBlitEnc && _mtlBuffer && _mtlStorageMode == MTLStorageModeManaged) {
+			if ( !pBlitEnc->mtlCmdBuffer) { pBlitEnc->mtlCmdBuffer = [_device->getQueue()->getMTLCommandQueue() commandBufferWithUnretainedReferences]; }
+			if ( !pBlitEnc->mtlBlitEncoder) { pBlitEnc->mtlBlitEncoder = [pBlitEnc->mtlCmdBuffer blitCommandEncoder]; }
+			[pBlitEnc->mtlBlitEncoder synchronizeResource: _mtlBuffer];
+		}
+#endif
+
 	}
 	return VK_SUCCESS;
 }
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
index e1e6467..b10aac6 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
@@ -166,14 +166,10 @@
 		signal(signaler);
 		if (_device->_useMTLEventsForSemaphores) {
 			// Unfortunately, we can't assume we have an MTLSharedEvent here.
-			// This means we need to execute a command on the device to signal
-			// the semaphore. Alternatively, we could always use an MTLSharedEvent,
-			// but that might impose unacceptable performance costs just to handle
-			// this one case.
-			MVKQueue* queue = _device->getQueue(0, 0);	
-			id<MTLCommandQueue> mtlQ = queue->getMTLCommandQueue();
-			id<MTLCommandBuffer> mtlCmdBuff = [mtlQ commandBufferWithUnretainedReferences];
-			[mtlCmdBuff enqueue];
+			// This means we need to execute a command on the device to signal the semaphore.
+			// Alternatively, we could always use an MTLSharedEvent, but that might impose
+			// unacceptable performance costs just to handle this one case.
+			id<MTLCommandBuffer> mtlCmdBuff = [_device->getQueue()->getMTLCommandQueue() commandBufferWithUnretainedReferences];
 			signaler.first->encodeSignal(mtlCmdBuff);
 			[mtlCmdBuff commit];
 		}
diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
index e44f368..a1e2fc9 100644
--- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm
+++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
@@ -429,15 +429,10 @@
     VkDevice                                    device,
     uint32_t                                    memRangeCount,
     const VkMappedMemoryRange*                  pMemRanges) {
-	
+
 	MVKTraceVulkanCallStart();
-	VkResult rslt = VK_SUCCESS;
-	for (uint32_t i = 0; i < memRangeCount; i++) {
-		const VkMappedMemoryRange* pMem = &pMemRanges[i];
-		MVKDeviceMemory* mvkMem = (MVKDeviceMemory*)pMem->memory;
-		VkResult r = mvkMem->pullFromDevice(pMem->offset, pMem->size);
-		if (rslt == VK_SUCCESS) { rslt = r; }
-	}
+	MVKDevice* mvkDev = MVKDevice::getMVKDevice(device);
+	VkResult rslt = mvkDev->invalidateMappedMemoryRanges(memRangeCount, pMemRanges);
 	MVKTraceVulkanCallEnd();
 	return rslt;
 }