Merge pull request #1223 from cdavis5e/occlusion-query-rewrite

MVKQueryPool: Totally rework the way occlusion queries work.
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm
index 4b520b5..8df18a4 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm
@@ -92,7 +92,7 @@
     uint32_t query = _query;
     if (cmdEncoder->getMultiviewPassIndex() > 0)
         query += cmdEncoder->getSubpass()->getViewCountUpToMetalPass(cmdEncoder->getMultiviewPassIndex() - 1);
-    cmdEncoder->markTimestamp(_queryPool, query);
+    _queryPool->endQuery(query, cmdEncoder);
 }
 
 
@@ -112,6 +112,7 @@
 }
 
 void MVKCmdResetQueryPool::encode(MVKCommandEncoder* cmdEncoder) {
+    cmdEncoder->resetQueries(_queryPool, _query, _queryCount);
     _queryPool->resetResults(_query, _queryCount, cmdEncoder);
 }
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index b78b165..dd7df60 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -92,10 +92,9 @@
      * Metal requires that a visibility buffer is established when a render pass is created, 
      * but Vulkan permits it to be set during a render pass. When the first occlusion query
      * command is added, it sets this value so that it can be applied when the first renderpass
-     * is begun. The execution of subsequent occlusion query commands may change the visibility
-     * buffer during command execution, and begin a new Metal renderpass.
+     * is begun.
      */
-    id<MTLBuffer> _initialVisibilityResultMTLBuffer;
+    bool _needsVisibilityResultMTLBuffer;
 
 	/** Called when a MVKCmdExecuteCommands is added to this command buffer. */
 	void recordExecuteCommands(const MVKArrayRef<MVKCommandBuffer*> secondaryCommandBuffers);
@@ -387,6 +386,9 @@
     /** Marks a timestamp for the specified query. */
     void markTimestamp(MVKQueryPool* pQueryPool, uint32_t query);
 
+    /** Reset a range of queries. */
+    void resetQueries(MVKQueryPool* pQueryPool, uint32_t firstQuery, uint32_t queryCount);
+
 #pragma mark Dynamic encoding state accessed directly
 
     /** A reference to the Metal features supported by the device. */
@@ -413,6 +415,9 @@
 	/** The current Metal render encoder. */
 	id<MTLRenderCommandEncoder> _mtlRenderEncoder;
 
+	/** The buffer used to hold occlusion query results in this render pass. */
+	id<MTLBuffer> _visibilityResultMTLBuffer;
+
     /** Tracks the current graphics pipeline bound to the encoder. */
     MVKPipelineCommandEncoderState _graphicsPipelineState;
 
@@ -461,7 +466,7 @@
 	MVKCommandEncoder(MVKCommandBuffer* cmdBuffer);
 
 protected:
-    void addActivatedQuery(MVKQueryPool* pQueryPool, uint32_t query);
+    void addActivatedQueries(MVKQueryPool* pQueryPool, uint32_t query, uint32_t queryCount);
     void finishQueries();
 	void setSubpass(MVKCommand* passCmd, VkSubpassContents subpassContents, uint32_t subpassIndex);
 	void clearRenderArea();
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index e0f6c2f..6b41c3a 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -74,7 +74,7 @@
 	_wasExecuted = false;
 	_isExecutingNonConcurrently.clear();
 	_commandCount = 0;
-	_initialVisibilityResultMTLBuffer = nil;		// not retained
+	_needsVisibilityResultMTLBuffer = false;
 	_lastTessellationPipeline = nullptr;
 	_lastMultiviewSubpass = nullptr;
 	setConfigurationResult(VK_NOT_READY);
@@ -198,10 +198,10 @@
 // found among any of the secondary command buffers, to support the case where a render pass is started in
 // the primary command buffer but the visibility query is started inside one of the secondary command buffers.
 void MVKCommandBuffer::recordExecuteCommands(const MVKArrayRef<MVKCommandBuffer*> secondaryCommandBuffers) {
-	if (_initialVisibilityResultMTLBuffer == nil) {
+	if (!_needsVisibilityResultMTLBuffer) {
 		for (MVKCommandBuffer* cmdBuff : secondaryCommandBuffers) {
-			if (cmdBuff->_initialVisibilityResultMTLBuffer) {
-				_initialVisibilityResultMTLBuffer = cmdBuff->_initialVisibilityResultMTLBuffer;
+			if (cmdBuff->_needsVisibilityResultMTLBuffer) {
+				_needsVisibilityResultMTLBuffer = true;
 				break;
 			}
 		}
@@ -334,7 +334,21 @@
 
     MTLRenderPassDescriptor* mtlRPDesc = [MTLRenderPassDescriptor renderPassDescriptor];
     getSubpass()->populateMTLRenderPassDescriptor(mtlRPDesc, _multiviewPassIndex, _framebuffer, _clearValues.contents(), _isRenderingEntireAttachment, loadOverride);
-    mtlRPDesc.visibilityResultBuffer = _occlusionQueryState.getVisibilityResultMTLBuffer();
+    if (_occlusionQueryState.getNeedsVisibilityResultMTLBuffer()) {
+        if (!_visibilityResultMTLBuffer) {
+            // Unfortunately, the temp buffer mechanism tends to allocate large buffers and return offsets into them.
+            // This won't work with visibility buffers, particularly if the offset is greater than the maximum supported
+            // by the device. So we can't use that.
+            // Use a local variable to make sure it gets copied.
+            id<MTLBuffer> visibilityResultMTLBuffer = [getMTLDevice() newBufferWithLength: _pDeviceMetalFeatures->maxQueryBufferSize options: MTLResourceStorageModePrivate];    // not retained
+            [visibilityResultMTLBuffer setPurgeableState: MTLPurgeableStateVolatile];
+            [_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) {
+                [visibilityResultMTLBuffer release];
+            }];
+            _visibilityResultMTLBuffer = visibilityResultMTLBuffer;
+        }
+        mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer;
+    }
 
     VkExtent2D fbExtent = _framebuffer->getExtent2D();
     mtlRPDesc.renderTargetWidthMVK = max(min(_renderArea.offset.x + _renderArea.extent.width, fbExtent.width), 1u);
@@ -525,8 +539,24 @@
 
 void MVKCommandEncoder::endMetalRenderEncoding() {
 //    MVKLogDebugIf(_mtlRenderEncoder, "Render subpass end MTLRenderCommandEncoder.");
+    if (_mtlRenderEncoder == nil) { return; }
+
     [_mtlRenderEncoder endEncoding];
 	_mtlRenderEncoder = nil;    // not retained
+
+    _graphicsPipelineState.endMetalRenderPass();
+    _graphicsResourcesState.endMetalRenderPass();
+    _viewportState.endMetalRenderPass();
+    _scissorState.endMetalRenderPass();
+    _depthBiasState.endMetalRenderPass();
+    _blendColorState.endMetalRenderPass();
+    _vertexPushConstants.endMetalRenderPass();
+    _tessCtlPushConstants.endMetalRenderPass();
+    _tessEvalPushConstants.endMetalRenderPass();
+    _fragmentPushConstants.endMetalRenderPass();
+    _depthStencilState.endMetalRenderPass();
+    _stencilReferenceValueState.endMetalRenderPass();
+    _occlusionQueryState.endMetalRenderPass();
 }
 
 void MVKCommandEncoder::endCurrentMetalEncoding() {
@@ -655,7 +685,11 @@
 
 void MVKCommandEncoder::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {
     _occlusionQueryState.beginOcclusionQuery(pQueryPool, query, flags);
-    addActivatedQuery(pQueryPool, query);
+    uint32_t queryCount = 1;
+    if (_renderPass && getSubpass()->isMultiview()) {
+        queryCount = getSubpass()->getViewCountInMetalPass(_multiviewPassIndex);
+    }
+    addActivatedQueries(pQueryPool, query, queryCount);
 }
 
 void MVKCommandEncoder::endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query) {
@@ -663,16 +697,21 @@
 }
 
 void MVKCommandEncoder::markTimestamp(MVKQueryPool* pQueryPool, uint32_t query) {
-    addActivatedQuery(pQueryPool, query);
+    uint32_t queryCount = 1;
+    if (_renderPass && getSubpass()->isMultiview()) {
+        queryCount = getSubpass()->getViewCountInMetalPass(_multiviewPassIndex);
+    }
+    addActivatedQueries(pQueryPool, query, queryCount);
 }
 
-// Marks the specified query as activated
-void MVKCommandEncoder::addActivatedQuery(MVKQueryPool* pQueryPool, uint32_t query) {
+void MVKCommandEncoder::resetQueries(MVKQueryPool* pQueryPool, uint32_t firstQuery, uint32_t queryCount) {
+    addActivatedQueries(pQueryPool, firstQuery, queryCount);
+}
+
+// Marks the specified queries as activated
+void MVKCommandEncoder::addActivatedQueries(MVKQueryPool* pQueryPool, uint32_t query, uint32_t queryCount) {
     if ( !_pActivatedQueries ) { _pActivatedQueries = new MVKActivatedQueries(); }
-    uint32_t endQuery = query + 1;
-    if (_renderPass && getSubpass()->isMultiview()) {
-        endQuery = query + getSubpass()->getViewCountInMetalPass(_multiviewPassIndex);
-    }
+    uint32_t endQuery = query + queryCount;
     while (query < endQuery) {
         (*_pActivatedQueries)[pQueryPool].push_back(query++);
     }
@@ -698,6 +737,7 @@
 
 MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer) : MVKBaseDeviceObject(cmdBuffer->getDevice()),
         _cmdBuffer(cmdBuffer),
+        _visibilityResultMTLBuffer(nil),
         _graphicsPipelineState(this),
         _computePipelineState(this),
         _viewportState(this),
@@ -772,6 +812,7 @@
         case kMVKCommandUseTessellationVertexTessCtl: return @"vkCmdDraw (vertex and tess control stages) ComputeEncoder";
         case kMVKCommandUseMultiviewInstanceCountAdjust: return @"vkCmdDraw (multiview instance count adjustment) ComputeEncoder";
         case kMVKCommandUseCopyQueryPoolResults:return @"vkCmdCopyQueryPoolResults ComputeEncoder";
+        case kMVKCommandUseAccumOcclusionQuery: return @"Post-render-pass occlusion query accumulation ComputeEncoder";
         default:                                return @"Unknown Use ComputeEncoder";
     }
 }
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
index fb42d47..db377c5 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
@@ -66,6 +66,11 @@
      */
 	virtual void beginMetalRenderPass() { if (_isModified) { markDirty(); } }
 
+	/**
+	 * Called automatically when a Metal render pass ends.
+	 */
+	virtual void endMetalRenderPass() { }
+
     /**
      * If the content of this instance is dirty, marks this instance as no longer dirty
      * and calls the encodeImpl() function to encode the content onto the Metal encoder.
@@ -572,14 +577,16 @@
 
 public:
 
+	void endMetalRenderPass() override;
+
     /** Begins an occlusion query. */
     void beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags);
 
     /** Ends an occlusion query. */
     void endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query);
 
-    /** Returns the MTLBuffer used to hold occlusion query results. */
-    id<MTLBuffer> getVisibilityResultMTLBuffer();
+    /** Returns whether an MTLBuffer is needed to hold occlusion query results. */
+    bool getNeedsVisibilityResultMTLBuffer();
 
     /** Constructs this instance for the specified command encoder. */
     MVKOcclusionQueryCommandEncoderState(MVKCommandEncoder* cmdEncoder);
@@ -588,11 +595,10 @@
     void encodeImpl(uint32_t) override;
     void resetImpl() override;
 
-    id<MTLBuffer> _visibilityResultMTLBuffer = nil;
+    bool _needsVisibilityResultMTLBuffer = false;
     MTLVisibilityResultMode _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
     NSUInteger _mtlVisibilityResultOffset = 0;
-	std::unordered_map<MVKQuerySpec, id<MTLRenderCommandEncoder>> _mtlEncodersUsed;
-	MVKQuerySpec _currentQuery;
+	MVKSmallVector<std::pair<MVKQuerySpec, NSUInteger>> _mtlRenderPassQueries;
 };
 
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index edc814b..df213b7 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -916,18 +916,43 @@
 #pragma mark -
 #pragma mark MVKOcclusionQueryCommandEncoderState
 
+void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
+
+    if (_mtlRenderPassQueries.empty()) { return; }  // Nothing to do.
+
+    id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
+    id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery);
+    [mtlAccumEncoder setComputePipelineState: mtlAccumState];
+    for (auto& query : _mtlRenderPassQueries) {
+        // Accumulate the current results to the query pool's buffer.
+        auto* pQueryPool = (MVKOcclusionQueryPool*)query.first.queryPool;
+        [mtlAccumEncoder setBuffer: pQueryPool->getVisibilityResultMTLBuffer()
+                            offset: pQueryPool->getVisibilityResultOffset(query.first.query)
+                           atIndex: 0];
+        [mtlAccumEncoder setBuffer: _cmdEncoder->_visibilityResultMTLBuffer
+                            offset: query.second
+                           atIndex: 1];
+        [mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1)
+                        threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
+    }
+    _cmdEncoder->endCurrentMetalEncoding();
+    _mtlRenderPassQueries.clear();
+}
+
 void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {
 
-	_currentQuery.set(pQueryPool, query);
-
-    NSUInteger offset = pQueryPool->getVisibilityResultOffset(query);
+    MVKQuerySpec querySpec;
+    querySpec.set(pQueryPool, query);
+    NSUInteger offset = _mtlRenderPassQueries.empty() ? 0 : _mtlVisibilityResultOffset + 8;
     NSUInteger maxOffset = _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes;
+    offset = min(offset, maxOffset);
+    _mtlRenderPassQueries.push_back(make_pair(querySpec, offset));
 
     bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
     _mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
-    _mtlVisibilityResultOffset = min(offset, maxOffset);
+    _mtlVisibilityResultOffset = offset;
 
-    _visibilityResultMTLBuffer = pQueryPool->getVisibilityResultMTLBuffer();    // not retained
+    _needsVisibilityResultMTLBuffer = true;
 
     markDirty();
 }
@@ -936,31 +961,17 @@
 	reset();
 }
 
-id<MTLBuffer> MVKOcclusionQueryCommandEncoderState::getVisibilityResultMTLBuffer() { return _visibilityResultMTLBuffer; }
+bool MVKOcclusionQueryCommandEncoderState::getNeedsVisibilityResultMTLBuffer() { return _needsVisibilityResultMTLBuffer; }
 
 void MVKOcclusionQueryCommandEncoderState::encodeImpl(uint32_t stage) {
 	if (stage != kMVKGraphicsStageRasterization) { return; }
 
-	// Metal does not allow a query to be run twice on a single render encoder.
-	// If the query is active and was already used for the current Metal render encoder,
-	// log an error and terminate the current query. Remember which MTLRenderEncoder
-	// was used for this query to test for this situation on future queries.
-	if (_mtlVisibilityResultMode != MTLVisibilityResultModeDisabled) {
-		id<MTLRenderCommandEncoder> currMTLRendEnc = _cmdEncoder->_mtlRenderEncoder;
-		if (currMTLRendEnc == _mtlEncodersUsed[_currentQuery]) {
-			MVKLogError("vkCmdBeginQuery(): Metal does not support using the same occlusion query more than once within a single Vulkan render subpass.");
-			resetImpl();
-		}
-		_mtlEncodersUsed[_currentQuery] = currMTLRendEnc;
-	}
-
 	[_cmdEncoder->_mtlRenderEncoder setVisibilityResultMode: _mtlVisibilityResultMode
 													 offset: _mtlVisibilityResultOffset];
 }
 
 void MVKOcclusionQueryCommandEncoderState::resetImpl() {
-	_currentQuery.reset();
-    _visibilityResultMTLBuffer = _cmdEncoder->_cmdBuffer->_initialVisibilityResultMTLBuffer;
+    _needsVisibilityResultMTLBuffer = _cmdEncoder->_cmdBuffer->_needsVisibilityResultMTLBuffer;
     _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
     _mtlVisibilityResultOffset = 0;
 }
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
index b85f02e..3a255f0 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.h
@@ -129,6 +129,9 @@
 	/** Returns a MTLComputePipelineState for copying query results to a buffer. */
 	id<MTLComputePipelineState> getCmdCopyQueryPoolResultsMTLComputePipelineState();
 
+	/** Returns a MTLComputePipelineState for accumulating occlusion query results over multiple render passes. */
+	id<MTLComputePipelineState> getAccumulateOcclusionQueryResultsMTLComputePipelineState();
+
 	/** Deletes all the internal resources. */
 	void clear();
 
@@ -164,5 +167,6 @@
 	id<MTLComputePipelineState> _mtlDrawIndirectTessConvertBuffersComputePipelineState[2] = {nil, nil};
 	id<MTLComputePipelineState> _mtlDrawIndexedCopyIndexBufferComputePipelineState[2] = {nil, nil};
 	id<MTLComputePipelineState> _mtlCopyQueryPoolResultsComputePipelineState = nil;
+	id<MTLComputePipelineState> _mtlAccumOcclusionQueryResultsComputePipelineState = nil;
 };
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
index 1861367..03c2595 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncodingPool.mm
@@ -146,6 +146,10 @@
 	MVK_ENC_REZ_ACCESS(_mtlCopyQueryPoolResultsComputePipelineState, newCmdCopyQueryPoolResultsMTLComputePipelineState(_commandPool));
 }
 
+id<MTLComputePipelineState> MVKCommandEncodingPool::getAccumulateOcclusionQueryResultsMTLComputePipelineState() {
+	MVK_ENC_REZ_ACCESS(_mtlAccumOcclusionQueryResultsComputePipelineState, newAccumulateOcclusionQueryResultsMTLComputePipelineState(_commandPool));
+}
+
 void MVKCommandEncodingPool::clear() {
 	lock_guard<mutex> lock(_lock);
 	destroyMetalResources();
@@ -233,5 +237,8 @@
 
     [_mtlCopyQueryPoolResultsComputePipelineState release];
     _mtlCopyQueryPoolResultsComputePipelineState = nil;
+
+    [_mtlAccumOcclusionQueryResultsComputePipelineState release];
+    _mtlAccumOcclusionQueryResultsComputePipelineState = nil;
 }
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h b/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
index 50773f4..7b715f3 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandPipelineStateFactoryShaderSource.h
@@ -376,5 +376,13 @@
     }                                                                                                           \n\
 }                                                                                                               \n\
                                                                                                                 \n\
+kernel void accumulateOcclusionQueryResults(device VisibilityBuffer& dest [[buffer(0)]],                        \n\
+                                            const device VisibilityBuffer& src [[buffer(1)]]) {                 \n\
+    uint32_t oldDestCount = dest.count;                                                                         \n\
+    dest.count += src.count;                                                                                    \n\
+    dest.countHigh += src.countHigh;                                                                            \n\
+    if (dest.count < max(oldDestCount, src.count)) { dest.countHigh++; }                                        \n\
+}                                                                                                               \n\
+                                                                                                                \n\
 ";
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
index 9d2dc96..e036c45 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.h
@@ -449,6 +449,9 @@
 	/** Returns a new MTLComputePipelineState for copying query results to a buffer. */
 	id<MTLComputePipelineState> newCmdCopyQueryPoolResultsMTLComputePipelineState(MVKVulkanAPIDeviceObject* owner);
 
+	/** Returns a new MTLComputePipelineState for accumulating occlusion query results to a buffer. */
+	id<MTLComputePipelineState> newAccumulateOcclusionQueryResultsMTLComputePipelineState(MVKVulkanAPIDeviceObject* owner);
+
 
 #pragma mark Construction
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
index 7afc215..d5c3421 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm
@@ -554,6 +554,10 @@
 	return newMTLComputePipelineState("cmdCopyQueryPoolResultsToBuffer", owner);
 }
 
+id<MTLComputePipelineState> MVKCommandResourceFactory::newAccumulateOcclusionQueryResultsMTLComputePipelineState(MVKVulkanAPIDeviceObject* owner) {
+	return newMTLComputePipelineState("accumulateOcclusionQueryResults", owner);
+}
+
 
 #pragma mark Support methods
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h
index 12df04e..91dba09 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h
@@ -143,6 +143,7 @@
 class MVKTimestampQueryPool : public MVKQueryPool {
 
 public:
+    void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) override;
     void finishQueries(const MVKArrayRef<uint32_t>& queries) override;
 
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
index 05073cb..86fb43f 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm
@@ -31,6 +31,7 @@
 
 void MVKQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
     uint32_t queryCount = cmdEncoder->isInRenderPass() ? cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1;
+    queryCount = max(queryCount, 1u);
     lock_guard<mutex> lock(_availabilityLock);
     for (uint32_t i = query; i < query + queryCount; ++i) {
         _availability[i] = DeviceAvailable;
@@ -52,7 +53,11 @@
 // Mark queries as available
 void MVKQueryPool::finishQueries(const MVKArrayRef<uint32_t>& queries) {
     lock_guard<mutex> lock(_availabilityLock);
-    for (uint32_t qry : queries) { _availability[qry] = Available; }
+    for (uint32_t qry : queries) {
+        if (_availability[qry] == DeviceAvailable) {
+            _availability[qry] = Available;
+        }
+    }
     _availabilityBlocker.notify_all();      // Predicate of each wait() call will check whether all required queries are available
 }
 
@@ -192,6 +197,11 @@
 #pragma mark -
 #pragma mark MVKTimestampQueryPool
 
+void MVKTimestampQueryPool::endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) {
+    cmdEncoder->markTimestamp(this, query);
+    MVKQueryPool::endQuery(query, cmdEncoder);
+}
+
 // Update timestamp values, then mark queries as available
 void MVKTimestampQueryPool::finishQueries(const MVKArrayRef<uint32_t>& queries) {
     uint64_t ts = mvkGetTimestamp();
@@ -306,9 +316,7 @@
         cmdBuffer->setConfigurationResult(reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The query offset value %lu is larger than the maximum offset value %lu available on this device.", offset, maxOffset));
     }
 
-    if (cmdBuffer->_initialVisibilityResultMTLBuffer == nil) {
-        cmdBuffer->_initialVisibilityResultMTLBuffer = getVisibilityResultMTLBuffer();
-    }
+    cmdBuffer->_needsVisibilityResultMTLBuffer = true;
 }
 
 
diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
index 0dba5d5..afbc1c6 100644
--- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h
+++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
@@ -87,7 +87,8 @@
     kMVKCommandUseDispatch,                 /**< vkCmdDispatch. */
     kMVKCommandUseTessellationVertexTessCtl,/**< vkCmdDraw* - vertex and tessellation control stages. */
 	kMVKCommandUseMultiviewInstanceCountAdjust,/**< vkCmdDrawIndirect* - adjust instance count for multiview. */
-    kMVKCommandUseCopyQueryPoolResults      /**< vkCmdCopyQueryPoolResults. */
+    kMVKCommandUseCopyQueryPoolResults,     /**< vkCmdCopyQueryPoolResults. */
+    kMVKCommandUseAccumOcclusionQuery       /**< Any command terminating a Metal render pass with active visibility buffer. */
 } MVKCommandUse;
 
 /** Represents a given stage of a graphics pipeline. */