Merge pull request #1229 from cdavis5e/occlusion-query-temp-buffers

MVKCommandEncoder: Use the temp buffer mechanism for visibility buffers.
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index dd7df60..8fef07a 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -370,7 +370,7 @@
     void setComputeBytes(id<MTLComputeCommandEncoder> mtlEncoder, const void* bytes, NSUInteger length, uint32_t mtlBuffIndex);
 
     /** Get a temporary MTLBuffer that will be returned to a pool after the command buffer is finished. */
-    const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length);
+    const MVKMTLBufferAllocation* getTempMTLBuffer(NSUInteger length, bool dedicated = false);
 
     /** Returns the command encoding pool. */
     MVKCommandEncodingPool* getCommandEncodingPool();
@@ -415,8 +415,8 @@
 	/** The current Metal render encoder. */
 	id<MTLRenderCommandEncoder> _mtlRenderEncoder;
 
-	/** The buffer used to hold occlusion query results in this render pass. */
-	id<MTLBuffer> _visibilityResultMTLBuffer;
+	/** The buffer used to hold occlusion query results in a render pass. */
+	const MVKMTLBufferAllocation* _visibilityResultMTLBuffer;
 
     /** Tracks the current graphics pipeline bound to the encoder. */
     MVKPipelineCommandEncoderState _graphicsPipelineState;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index 6b41c3a..392dd74 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -336,18 +336,9 @@
     getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _multiviewPassIndex, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride);
     if (_occlusionQueryState.getNeedsVisibilityResultMTLBuffer()) {
         if (!_visibilityResultMTLBuffer) {
-            // Unfortunately, the temp buffer mechanism tends to allocate large buffers and return offsets into them.
-            // This won't work with visibility buffers, particularly if the offset is greater than the maximum supported
-            // by the device. So we can't use that.
-            // Use a local variable to make sure it gets copied.
-            id<MTLBuffer> visibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: _pDeviceMetalFeatures->maxQueryBufferSize options: MTLResourceStorageModePrivate];    // not retained
-            [visibilityResultMTLBuffer setPurgeableState: MTLPurgeableStateVolatile];
-            [_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) {
-                [visibilityResultMTLBuffer release];
-            }];
-            _visibilityResultMTLBuffer = visibilityResultMTLBuffer;
+            _visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true);
         }
-        mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer;
+        mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer->_mtlBuffer;
     }
 
     VkExtent2D fbExtent = _framebuffer->getExtent2D();
@@ -655,8 +646,8 @@
     }
 }
 
-const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length) {
-    const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length);
+const MVKMTLBufferAllocation* MVKCommandEncoder::getTempMTLBuffer(NSUInteger length, bool isDedicated) {
+    const MVKMTLBufferAllocation* mtlBuffAlloc = getCommandEncodingPool()->acquireMTLBufferAllocation(length, isDedicated);
 	MVKMTLBufferAllocationPool* pool = mtlBuffAlloc->getPool();
 
     // Return the MTLBuffer allocation to the pool once the command buffer is done with it
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index df213b7..f69b03f 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -929,7 +929,7 @@
         [mtlAccumEncoder setBuffer: pQueryPool->getVisibilityResultMTLBuffer()
                             offset: pQueryPool->getVisibilityResultOffset(query.first.query)
                            atIndex: 0];
-        [mtlAccumEncoder setBuffer: _cmdEncoder->_visibilityResultMTLBuffer
+        [mtlAccumEncoder setBuffer: _cmdEncoder->_visibilityResultMTLBuffer->_mtlBuffer
                             offset: query.second
                            atIndex: 1];
         [mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1)
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
index 3a255f0..adaa9e0 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
@@ -66,7 +66,7 @@
      * To return the returned allocation back to the pool to be reused,
      * call the returnToPool() function on the returned allocation.
      */
-    const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length);
+    const MVKMTLBufferAllocation* acquireMTLBufferAllocation(NSUInteger length, bool isDedicated = false);
 
 	/**
 	 * Returns a MTLRenderPipelineState dedicated to rendering to several attachments
@@ -153,6 +153,7 @@
     std::unordered_map<MVKBufferDescriptorData, MVKBuffer*> _transferBuffers;
     std::unordered_map<MVKBufferDescriptorData, MVKDeviceMemory*> _transferBufferMemory;
     MVKMTLBufferAllocator _mtlBufferAllocator;
+    MVKMTLBufferAllocator _dedicatedMtlBufferAllocator;
     id<MTLDepthStencilState> _cmdClearDepthOnlyDepthStencilState = nil;
     id<MTLDepthStencilState> _cmdClearStencilOnlyDepthStencilState = nil;
     id<MTLDepthStencilState> _cmdClearDepthAndStencilDepthStencilState = nil;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
index 03c2595..39efae6 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
@@ -77,7 +77,10 @@
 	MVK_ENC_REZ_ACCESS(_cmdClearDefaultDepthStencilState, newMTLDepthStencilState(useDepth, useStencil));
 }
 
-const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length) {
+const MVKMTLBufferAllocation* MVKCommandEncodingPool::acquireMTLBufferAllocation(NSUInteger length, bool isDedicated) {
+    if (isDedicated) {
+        return _dedicatedMtlBufferAllocator.acquireMTLBufferRegion(length);
+    }
     return _mtlBufferAllocator.acquireMTLBufferRegion(length);
 }
 
@@ -159,7 +162,8 @@
 #pragma mark Construction
 
 MVKCommandEncodingPool::MVKCommandEncodingPool(MVKCommandPool* commandPool) : _commandPool(commandPool),
-    _mtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true) {
+    _mtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxMTLBufferSize, true),
+    _dedicatedMtlBufferAllocator(commandPool->getDevice(), commandPool->getDevice()->_pMetalFeatures->maxQueryBufferSize, true, true) {
 }
 
 MVKCommandEncodingPool::~MVKCommandEncodingPool() {
diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
index 310db4a..84ca121 100644
--- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h
@@ -86,7 +86,7 @@
     MVKMTLBufferAllocation* newObject() override;
 
     /** Configures this instance to dispense MVKMTLBufferAllocation instances of the specified size. */
-    MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength);
+    MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated);
 
     ~MVKMTLBufferAllocationPool() override;
 
@@ -137,7 +137,7 @@
      * next power-of-two value that is at least as big as the specified maximum size.
 	 * If makeThreadSafe is true, a lock will be applied when an allocation is acquired.
      */
-    MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false);
+    MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe = false, bool isDedicated = false);
 
     ~MVKMTLBufferAllocator() override;
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
index e1e9f7c..b60b640 100644
--- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.mm
@@ -50,11 +50,11 @@
 }
 
 
-MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength)
+MVKMTLBufferAllocationPool::MVKMTLBufferAllocationPool(MVKDevice* device, NSUInteger allocationLength, bool isDedicated)
         : MVKObjectPool<MVKMTLBufferAllocation>(true) {
     _device = device;
     _allocationLength = allocationLength;
-    _mtlBufferLength = _allocationLength * calcMTLBufferAllocationCount();
+    _mtlBufferLength = _allocationLength * (isDedicated ? 1 : calcMTLBufferAllocationCount());
     _nextOffset = _mtlBufferLength;     // Force a MTLBuffer to be added on first access
 }
 
@@ -85,7 +85,7 @@
 	return _makeThreadSafe ? pRP->acquireObjectSafely() : pRP->acquireObject();
 }
 
-MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe) : MVKBaseDeviceObject(device) {
+MVKMTLBufferAllocator::MVKMTLBufferAllocator(MVKDevice* device, NSUInteger maxRegionLength, bool makeThreadSafe, bool isDedicated) : MVKBaseDeviceObject(device) {
     _maxAllocationLength = maxRegionLength;
 	_makeThreadSafe = makeThreadSafe;
 
@@ -96,7 +96,7 @@
     _regionPools.reserve(maxP2Exp + 1);
     NSUInteger allocLen = 1;
     for (uint32_t p2Exp = 0; p2Exp <= maxP2Exp; p2Exp++) {
-        _regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen));
+        _regionPools.push_back(new MVKMTLBufferAllocationPool(device, allocLen, isDedicated));
         allocLen <<= 1;
     }
 }