MVKMTLBufferAllocator: Make sure temporary buffer data is not discarded while in use.

e1ac50c0 marks temporary buffers as volatile, but it only mlocks() them for the duration
of memcpy() of initial data, leaving Metal runtime a window to snatch it before the buffer
is no longer in use. Simply putting the munlock() in a completion handler raises questions
about possible edge cases where multiple, non page-aligned allocations are live.
This patch fixes it by keeping the whole buffer non-volatile while is supports any active
allocations.

Fixes rendering issues in Sekiro: Shadows Die Twice menus.
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
index 1aa38f2..7cf349d 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
@@ -1612,11 +1612,9 @@
     NSUInteger dstMTLBuffOffset = _dstBuffer->getMTLBufferOffset() + _dstOffset;
 
     // Copy data to the source MTLBuffer
-    MVKMTLBufferAllocation* srcMTLBufferAlloc = (MVKMTLBufferAllocation*)cmdEncoder->getCommandEncodingPool()->acquireMTLBufferAllocation(_dataSize);
+    MVKMTLBufferAllocation* srcMTLBufferAlloc = cmdEncoder->getCommandEncodingPool()->acquireMTLBufferAllocation(_dataSize);
     void* pBuffData = srcMTLBufferAlloc->getContents();
-    mlock(pBuffData, _dataSize);
     memcpy(pBuffData, _srcDataCache.data(), _dataSize);
-    munlock(pBuffData, _dataSize);
 
     [mtlBlitEnc copyFromBuffer: srcMTLBufferAlloc->_mtlBuffer
                   sourceOffset: srcMTLBufferAlloc->_offset
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index 801b3f7..85f312b 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -695,7 +695,7 @@
 
 // Return the MTLBuffer allocation to the pool once the command buffer is done with it
 const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length, bool isPrivate, bool isDedicated) {
-    const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isPrivate, isDedicated);
+    MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isPrivate, isDedicated);
     [_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer> mcb) { mtlBuffAlloc->returnToPool(); }];
     return mtlBuffAlloc;
 }
@@ -708,9 +708,7 @@
 const MVKMTLBufferAllocation* MVKCommandEncoder::copyToTempMTLBufferAllocation(const void* bytes, NSUInteger length, bool isDedicated) {
 	const MVKMTLBufferAllocation* mtlBuffAlloc = getTempMTLBuffer(length, false, isDedicated);
     void* pBuffData = mtlBuffAlloc->getContents();
-    mlock(pBuffData, length);
     memcpy(pBuffData, bytes, length);
-    munlock(pBuffData, length);
 
     return mtlBuffAlloc;
 }
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
index 2b3e8ae..6318f64 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
@@ -66,7 +66,7 @@
      * To return the returned allocation back to the pool to be reused,
      * call the returnToPool() function on the returned allocation.
      */
-    const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isPrivate = false, bool isDedicated = false);
+    MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isPrivate = false, bool isDedicated = false);
 
 	/**
 	 * Returns a MTLRenderPipelineState dedicated to rendering to several attachments
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
index 16f702a..78f8bbc 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
@@ -77,7 +77,7 @@
 	MVK_ENC_REZ_ACCESS(_cmdClearDefaultDepthStencilState, newMTLDepthStencilState(useDepth, useStencil));
 }
 
-const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isPrivate, bool isDedicated) {
+MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isPrivate, bool isDedicated) {
     MVKAssert(isPrivate || !isDedicated, "Dedicated, host-shared temporary buffers are not supported."); 
     if (isDedicated) {
         return _dedicatedMtlBufferAllocator.acquireMTLBufferRegion(length);
diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
index 7488305..9cc7ccd 100644
--- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
@@ -51,17 +51,20 @@
     MVKMTLBufferAllocationPool* getPool() const { return _pool; }
 
 	/** Returns this object back to the pool that created it. */
-    void returnToPool() const;
+    void returnToPool();
 
 	/** Constructs this instance with the specified pool as its origin. */
     MVKMTLBufferAllocation(MVKMTLBufferAllocationPool* pool,
                            id<MTLBuffer> mtlBuffer,
                            NSUInteger offset,
-                           NSUInteger length) : _pool(pool), _mtlBuffer(mtlBuffer), _offset(offset), _length(length) {}
+                           NSUInteger length,
+                           uint64_t poolIndex) : _pool(pool), _mtlBuffer(mtlBuffer), _offset(offset), _length(length), _poolIndex(poolIndex) {}
 
 protected:
-	MVKMTLBufferAllocationPool* _pool;
+    friend class MVKMTLBufferAllocationPool;
 
+    MVKMTLBufferAllocationPool* _pool;
+    uint64_t _poolIndex;
 };
 
 
@@ -78,6 +81,11 @@
 class MVKMTLBufferAllocationPool : public MVKObjectPool<MVKMTLBufferAllocation>, public MVKDeviceTrackingMixin {
 
 public:
+    /** Returns a new allocation. */
+    MVKMTLBufferAllocation* acquireAllocation();
+
+    /** Returns a new allocation (without mutual exclusion). */
+    MVKMTLBufferAllocation* acquireAllocationUnlocked();
 
 	/** Returns the Vulkan API opaque object controlling this object. */
 	MVKVulkanAPIObject* getVulkanAPIObject() override { return _device->getVulkanAPIObject(); };
@@ -93,7 +101,8 @@
 	
 	MVKBaseObject* getBaseObject() override { return this; };
 	MVKMTLBufferAllocation* newObject() override;
-	void returnAllocation(MVKMTLBufferAllocation* ba) { _isThreadSafe ? returnObjectSafely(ba) : returnObject(ba); }
+    void returnAllocationUnlocked(MVKMTLBufferAllocation* ba);
+    void returnAllocation(MVKMTLBufferAllocation* ba);
 	uint32_t calcMTLBufferAllocationCount();
     void addMTLBuffer();
 
@@ -101,8 +110,9 @@
     NSUInteger _allocationLength;
     NSUInteger _mtlBufferLength;
     MTLStorageMode _mtlStorageMode;
-	MVKSmallVector<id<MTLBuffer>, 64> _mtlBuffers;
-	bool _isThreadSafe;
+    struct MTLBufferTracker { id<MTLBuffer> mtlBuffer; uint64_t allocationCount; };
+    MVKSmallVector<MTLBufferTracker, 64> _mtlBuffers;
+    bool _isThreadSafe;
 };
 
 
@@ -132,7 +142,7 @@
      * To return the MVKMTLBufferAllocation back to the pool, call 
      * the returnToPool() function on the returned instance.
      */
-    const MVKMTLBufferAllocation* acquireMTLBufferRegion(NSUInteger length);
+    MVKMTLBufferAllocation* acquireMTLBufferRegion(NSUInteger length);
 
     /**
      * Configures this instance to dispense MVKMTLBufferAllocation up to the specified
diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
index 2aabbf9..ee7b46f 100644
--- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
@@ -24,7 +24,7 @@
 
 MVKVulkanAPIObject* MVKMTLBufferAllocation::getVulkanAPIObject() { return _pool->getVulkanAPIObject(); };
 
-void MVKMTLBufferAllocation::returnToPool() const { _pool->returnAllocation((MVKMTLBufferAllocation*)this); }
+void MVKMTLBufferAllocation::returnToPool() { _pool->returnAllocation(this); }
 
 
 #pragma mark -
@@ -39,16 +39,49 @@
     // of future allocation to beyond this allocation.
     NSUInteger offset = _nextOffset;
     _nextOffset += _allocationLength;
-    return new MVKMTLBufferAllocation(this, _mtlBuffers.back(), offset, _allocationLength);
+    return new MVKMTLBufferAllocation(this, _mtlBuffers.back().mtlBuffer, offset, _allocationLength, _mtlBuffers.size() - 1);
 }
 
 // Adds a new MTLBuffer to the buffer pool and resets the next offset to the start of it
 void MVKMTLBufferAllocationPool::addMTLBuffer() {
     MTLResourceOptions mbOpts = (_mtlStorageMode << MTLResourceStorageModeShift) | MTLResourceCPUCacheModeDefaultCache;
-    _mtlBuffers.push_back([_device->getMTLDevice() newBufferWithLength: _mtlBufferLength options: mbOpts]);
+    _mtlBuffers.push_back({ [_device->getMTLDevice() newBufferWithLength: _mtlBufferLength options: mbOpts], 0 });
     _nextOffset = 0;
 }
 
+MVKMTLBufferAllocation* MVKMTLBufferAllocationPool::acquireAllocationUnlocked() {
+    MVKMTLBufferAllocation* ba = acquireObject();
+    if (!_mtlBuffers[ba->_poolIndex].allocationCount++) {
+        [ba->_mtlBuffer setPurgeableState: MTLPurgeableStateNonVolatile];
+    }
+    return ba;
+}
+
+MVKMTLBufferAllocation* MVKMTLBufferAllocationPool::acquireAllocation() {
+    if (_isThreadSafe) {
+        std::lock_guard<std::mutex> lock(_lock);
+        return acquireAllocationUnlocked();
+    } else {
+        return acquireAllocationUnlocked();
+    }
+}
+
+void MVKMTLBufferAllocationPool::returnAllocationUnlocked(MVKMTLBufferAllocation* ba) {
+    if (!--_mtlBuffers[ba->_poolIndex].allocationCount) {
+        [ba->_mtlBuffer setPurgeableState: MTLPurgeableStateVolatile];
+    }
+    returnObject(ba);
+}
+
+void MVKMTLBufferAllocationPool::returnAllocation(MVKMTLBufferAllocation* ba) {
+    if (_isThreadSafe) {
+        std::lock_guard<std::mutex> lock(_lock);
+        returnAllocationUnlocked(ba);
+    } else {
+        returnAllocationUnlocked(ba);
+    }
+}
+
 
 MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool makeThreadSafe,
 													   bool isDedicated, MTLStorageMode mtlStorageMode) :
@@ -73,14 +106,17 @@
 }
 
 MVKMTLBufferAllocationPool::~MVKMTLBufferAllocationPool() {
-    mvkReleaseContainerContents(_mtlBuffers);
+    for (uint32_t bufferIndex = 0; bufferIndex < _mtlBuffers.size(); ++bufferIndex) {
+        [_mtlBuffers[bufferIndex].mtlBuffer release];
+    }
+    _mtlBuffers.clear();
 }
 
 
 #pragma mark -
 #pragma mark MVKMTLBufferAllocator
 
-const MVKMTLBufferAllocation* MVKMTLBufferAllocator::acquireMTLBufferRegion(NSUInteger length) {
+MVKMTLBufferAllocation* MVKMTLBufferAllocator::acquireMTLBufferRegion(NSUInteger length) {
 	MVKAssert(length <= _maxAllocationLength, "This MVKMTLBufferAllocator has been configured to dispense MVKMTLBufferRegions no larger than %lu bytes.", (unsigned long)_maxAllocationLength);
 
 	// Can't allocate a segment smaller than the minimum MTLBuffer alignment.
@@ -88,12 +124,7 @@
 
     // Convert max length to the next power-of-two exponent to use as a lookup
     NSUInteger p2Exp = mvkPowerOfTwoExponent(length);
-	MVKMTLBufferAllocationPool* pRP = _regionPools[p2Exp];
-	const MVKMTLBufferAllocation* region = _isThreadSafe ? pRP->acquireObjectSafely() : pRP->acquireObject();
-	if (region) {
-		[region->_mtlBuffer setPurgeableState: MTLPurgeableStateVolatile];
-	}
-	return region;
+    return _regionPools[p2Exp]->acquireAllocation();
 }
 
 MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated, MTLStorageMode mtlStorageMode) : MVKBaseDeviceObject(device) {
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
index 64128ea..c1ca6ba 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.h
@@ -397,7 +397,7 @@
 protected:
 	inline uint8_t* getData() { return _mvkMTLBufferAllocation ? (uint8_t*)_mvkMTLBufferAllocation->getContents() : nullptr; }
 
-	const MVKMTLBufferAllocation* _mvkMTLBufferAllocation = nullptr;
+	MVKMTLBufferAllocation* _mvkMTLBufferAllocation = nullptr;
 };
 
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
index 1e9dddc..289bc44 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h
@@ -180,7 +180,7 @@
 			  VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniformBlock);
 
 	/** Returns an MTLBuffer region allocation. */
-	const MVKMTLBufferAllocation* acquireMTLBufferRegion(NSUInteger length);
+    MVKMTLBufferAllocation* acquireMTLBufferRegion(NSUInteger length);
 	/**
 	 * Returns the Metal argument buffer to which resources are written,
 	 * or return nil if Metal argument buffers are not being used.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
index ff9fcd5..72a95cc 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm
@@ -339,7 +339,7 @@
     }
 }
 
-const MVKMTLBufferAllocation* MVKDescriptorSet::acquireMTLBufferRegion(NSUInteger length) {
+MVKMTLBufferAllocation* MVKDescriptorSet::acquireMTLBufferRegion(NSUInteger length) {
 	return _pool->_inlineBlockMTLBufferAllocator.acquireMTLBufferRegion(length);
 }
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
index f3006f8..cf3853f 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
@@ -223,9 +223,7 @@
 	const MVKMTLBufferAllocation* tempBuff = cmdEncoder->getTempMTLBuffer(queryCount * _queryElementCount * sizeof(uint64_t));
 	void* pBuffData = tempBuff->getContents();
 	size_t size = queryCount * _queryElementCount * sizeof(uint64_t);
-	mlock(pBuffData, size);
 	memcpy(pBuffData, &_timestamps[firstQuery], size);
-	munlock(pBuffData, size);
 	offset = tempBuff->_offset;
 	return tempBuff->_mtlBuffer;
 }