Merge pull request #1387 from billhollings/occlusion-query-fixes-for-M1
Occlusion query fixes for M1
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 25c119f..64df3cc 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -33,6 +33,8 @@
- Fix synchronization issue with locking `MTLArgumentEncoder` for Metal Argument Buffers.
- Fix race condition on submission fence during device loss.
- Fix crash using memoryless storage for input attachments on Apple Silicon.
+- Fix issue where M1 GPU does not support reusing Metal visibility buffer offsets
+ across separate render encoders within a single Metal command buffer (Vulkan submit).
- On command buffer submission failure, if `MVKConfiguration::resumeLostDevice` enabled, do not release
waits on `VkDevice`, and do not return `VK_ERROR_DEVICE_LOST`, unless `VkPhysicalDevice` is also lost.
- Fix inconsistent handling of linear attachment decisions on Apple Silicon.
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index 952c7ec..7d9c92f 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -47,6 +47,16 @@
#pragma mark -
+#pragma mark MVKCommandEncodingContext
+
+/** Context for tracking information across multiple encodings. */
+typedef struct MVKCommandEncodingContext {
+ NSUInteger mtlVisibilityResultOffset = 0;
+ const MVKMTLBufferAllocation* visibilityResultBuffer = nullptr;
+} MVKCommandEncodingContext;
+
+
+#pragma mark -
#pragma mark MVKCommandBuffer
/** Represents a Vulkan command pool. */
@@ -83,7 +93,7 @@
inline MVKCommandPool* getCommandPool() { return _commandPool; }
/** Submit the commands in this buffer as part of the queue submission. */
- void submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit);
+ void submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit, MVKCommandEncodingContext* pEncodingContext);
/** Returns whether this command buffer can be submitted to a queue more than once. */
inline bool getIsReusable() { return _isReusable; }
@@ -264,7 +274,7 @@
MVKVulkanAPIObject* getVulkanAPIObject() override { return _cmdBuffer->getVulkanAPIObject(); };
/** Encode commands from the command buffer onto the Metal command buffer. */
- void encode(id<MTLCommandBuffer> mtlCmdBuff);
+ void encode(id<MTLCommandBuffer> mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext);
/** Encode commands from the specified secondary command buffer onto the Metal command buffer. */
void encodeSecondary(MVKCommandBuffer* secondaryCmdBuffer);
@@ -407,6 +417,9 @@
#pragma mark Dynamic encoding state accessed directly
+ /** Context for tracking information across multiple encodings. */
+ MVKCommandEncodingContext* _pEncodingContext;
+
/** A reference to the Metal features supported by the device. */
const MVKPhysicalDeviceMetalFeatures* _pDeviceMetalFeatures;
@@ -428,9 +441,6 @@
/** The current Metal render encoder. */
id<MTLRenderCommandEncoder> _mtlRenderEncoder;
- /** The buffer used to hold occlusion query results in a render pass. */
- const MVKMTLBufferAllocation* _visibilityResultMTLBuffer;
-
/** Tracks the current graphics pipeline bound to the encoder. */
MVKPipelineCommandEncoderState _graphicsPipelineState;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index 85f312b..18d2441 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -105,7 +105,8 @@
_commandCount++;
}
-void MVKCommandBuffer::submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit) {
+void MVKCommandBuffer::submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit,
+ MVKCommandEncodingContext* pEncodingContext) {
if ( !canExecute() ) { return; }
if (_prefilledMTLCmdBuffer) {
@@ -113,7 +114,7 @@
clearPrefilledMTLCommandBuffer();
} else {
MVKCommandEncoder encoder(this);
- encoder.encode(cmdBuffSubmit->getActiveMTLCommandBuffer());
+ encoder.encode(cmdBuffSubmit->getActiveMTLCommandBuffer(), pEncodingContext);
}
if ( !_supportsConcurrentExecution ) { _isExecutingNonConcurrently.clear(); }
@@ -150,8 +151,9 @@
uint32_t qIdx = 0;
_prefilledMTLCmdBuffer = _commandPool->newMTLCommandBuffer(qIdx); // retain
+ MVKCommandEncodingContext encodingContext;
MVKCommandEncoder encoder(this);
- encoder.encode(_prefilledMTLCmdBuffer);
+ encoder.encode(_prefilledMTLCmdBuffer, &encodingContext);
// Once encoded onto Metal, if this command buffer is not reusable, we don't need the
// MVKCommand instances anymore, so release them in order to reduce memory pressure.
@@ -246,13 +248,15 @@
#pragma mark -
#pragma mark MVKCommandEncoder
-void MVKCommandEncoder::encode(id<MTLCommandBuffer> mtlCmdBuff) {
+void MVKCommandEncoder::encode(id<MTLCommandBuffer> mtlCmdBuff,
+ MVKCommandEncodingContext* pEncodingContext) {
_renderPass = nullptr;
_subpassContents = VK_SUBPASS_CONTENTS_INLINE;
_renderSubpassIndex = 0;
_multiviewPassIndex = 0;
_canUseLayeredRendering = false;
+ _pEncodingContext = pEncodingContext;
_mtlCmdBuffer = mtlCmdBuff; // not retained
setLabelIfNotNil(_mtlCmdBuffer, _cmdBuffer->_debugName);
@@ -345,12 +349,12 @@
_clearValues.contents(),
_isRenderingEntireAttachment,
loadOverride);
- if (_cmdBuffer->_needsVisibilityResultMTLBuffer) {
- if (!_visibilityResultMTLBuffer) {
- _visibilityResultMTLBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
- }
- mtlRPDesc.visibilityResultBuffer = _visibilityResultMTLBuffer->_mtlBuffer;
- }
+ if (_cmdBuffer->_needsVisibilityResultMTLBuffer) {
+ if ( !_pEncodingContext->visibilityResultBuffer ) {
+ _pEncodingContext->visibilityResultBuffer = getTempMTLBuffer(_pDeviceMetalFeatures->maxQueryBufferSize, true, true);
+ }
+ mtlRPDesc.visibilityResultBuffer = _pEncodingContext->visibilityResultBuffer->_mtlBuffer;
+ }
VkExtent2D fbExtent = _framebufferExtent;
mtlRPDesc.renderTargetWidthMVK = max(min(_renderArea.offset.x + _renderArea.extent.width, fbExtent.width), 1u);
@@ -770,7 +774,6 @@
MVKCommandEncoder::MVKCommandEncoder(MVKCommandBuffer* cmdBuffer) : MVKBaseDeviceObject(cmdBuffer->getDevice()),
_cmdBuffer(cmdBuffer),
- _visibilityResultMTLBuffer(nil),
_graphicsPipelineState(this),
_computePipelineState(this),
_viewportState(this),
@@ -799,6 +802,7 @@
_mtlComputeEncoderUse = kMVKCommandUseNone;
_mtlBlitEncoder = nil;
_mtlBlitEncoderUse = kMVKCommandUseNone;
+ _pEncodingContext = nullptr;
}
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
index d18fa87..572a732 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
@@ -614,9 +614,18 @@
protected:
void encodeImpl(uint32_t) override;
+ typedef struct OcclusionQueryLocation {
+ MVKOcclusionQueryPool* queryPool = nullptr;
+ uint32_t query = 0;
+ NSUInteger visibilityBufferOffset = 0;
+
+ OcclusionQueryLocation(MVKOcclusionQueryPool* qPool, uint32_t qIdx, NSUInteger vbOfst)
+ : queryPool(qPool), query(qIdx), visibilityBufferOffset(vbOfst) {}
+
+ } OcclusionQueryLocation;
+
+ MVKSmallVector<OcclusionQueryLocation> _mtlRenderPassQueries;
MTLVisibilityResultMode _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
- NSUInteger _mtlVisibilityResultOffset = 0;
- MVKSmallVector<std::pair<MVKQuerySpec, NSUInteger>> _mtlRenderPassQueries;
};
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index 96ea826..8093cab 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -1073,20 +1073,19 @@
#pragma mark MVKOcclusionQueryCommandEncoderState
void MVKOcclusionQueryCommandEncoderState::endMetalRenderPass() {
+ const MVKMTLBufferAllocation* vizBuff = _cmdEncoder->_pEncodingContext->visibilityResultBuffer;
+ if ( !vizBuff || _mtlRenderPassQueries.empty() ) { return; } // Nothing to do.
- if (_mtlRenderPassQueries.empty()) { return; } // Nothing to do.
-
- id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
+ id<MTLComputePipelineState> mtlAccumState = _cmdEncoder->getCommandEncodingPool()->getAccumulateOcclusionQueryResultsMTLComputePipelineState();
id<MTLComputeCommandEncoder> mtlAccumEncoder = _cmdEncoder->getMTLComputeEncoder(kMVKCommandUseAccumOcclusionQuery);
[mtlAccumEncoder setComputePipelineState: mtlAccumState];
- for (auto& query : _mtlRenderPassQueries) {
+ for (auto& qryLoc : _mtlRenderPassQueries) {
// Accumulate the current results to the query pool's buffer.
- auto* pQueryPool = (MVKOcclusionQueryPool*)query.first.queryPool;
- [mtlAccumEncoder setBuffer: pQueryPool->getVisibilityResultMTLBuffer()
- offset: pQueryPool->getVisibilityResultOffset(query.first.query)
+ [mtlAccumEncoder setBuffer: qryLoc.queryPool->getVisibilityResultMTLBuffer()
+ offset: qryLoc.queryPool->getVisibilityResultOffset(qryLoc.query)
atIndex: 0];
- [mtlAccumEncoder setBuffer: _cmdEncoder->_visibilityResultMTLBuffer->_mtlBuffer
- offset: query.second
+ [mtlAccumEncoder setBuffer: vizBuff->_mtlBuffer
+ offset: vizBuff->_offset + qryLoc.visibilityBufferOffset
atIndex: 1];
[mtlAccumEncoder dispatchThreadgroups: MTLSizeMake(1, 1, 1)
threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
@@ -1095,24 +1094,28 @@
_mtlRenderPassQueries.clear();
}
+// The Metal visibility buffer has a finite size, and on some Metal platforms (looking at you M1),
+// query offsets cannnot be reused with the same MTLCommandBuffer. If enough occlusion queries are
+// begun within a single MTLCommandBuffer, it may exhaust the visibility buffer. If that occurs,
+// report an error and disable further visibility tracking for the remainder of the MTLCommandBuffer.
+// In most cases, a MTLCommandBuffer corresponds to a Vulkan command submit (VkSubmitInfo),
+// and so the error text is framed in terms of the Vulkan submit.
void MVKOcclusionQueryCommandEncoderState::beginOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query, VkQueryControlFlags flags) {
-
- MVKQuerySpec querySpec;
- querySpec.set(pQueryPool, query);
- NSUInteger offset = _mtlRenderPassQueries.empty() ? 0 : _mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes;
- NSUInteger maxOffset = _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize - kMVKQuerySlotSizeInBytes;
- offset = min(offset, maxOffset);
- _mtlRenderPassQueries.push_back(make_pair(querySpec, offset));
-
- bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
- _mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
- _mtlVisibilityResultOffset = offset;
-
+ if (_cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset + kMVKQuerySlotSizeInBytes <= _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize) {
+ bool shouldCount = _cmdEncoder->_pDeviceFeatures->occlusionQueryPrecise && mvkAreAllFlagsEnabled(flags, VK_QUERY_CONTROL_PRECISE_BIT);
+ _mtlVisibilityResultMode = shouldCount ? MTLVisibilityResultModeCounting : MTLVisibilityResultModeBoolean;
+ _mtlRenderPassQueries.emplace_back(pQueryPool, query, _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset);
+ } else {
+ reportError(VK_ERROR_OUT_OF_DEVICE_MEMORY, "vkCmdBeginQuery(): The maximum number of queries in a single Vulkan command submission is %llu.", _cmdEncoder->_pDeviceMetalFeatures->maxQueryBufferSize / kMVKQuerySlotSizeInBytes);
+ _mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
+ _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset -= kMVKQuerySlotSizeInBytes;
+ }
markDirty();
}
void MVKOcclusionQueryCommandEncoderState::endOcclusionQuery(MVKOcclusionQueryPool* pQueryPool, uint32_t query) {
_mtlVisibilityResultMode = MTLVisibilityResultModeDisabled;
+ _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset += kMVKQuerySlotSizeInBytes;
markDirty();
}
@@ -1120,5 +1123,5 @@
if (stage != kMVKGraphicsStageRasterization) { return; }
[_cmdEncoder->_mtlRenderEncoder setVisibilityResultMode: _mtlVisibilityResultMode
- offset: _mtlVisibilityResultOffset];
+ offset: _cmdEncoder->_pEncodingContext->mtlVisibilityResultOffset];
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index e9c92ee..1379687 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -237,7 +237,7 @@
}
protected:
- void submitCommandBuffers() override { for (auto& cb : _cmdBuffers) { cb->submit(this); } }
+ void submitCommandBuffers() override;
MVKSmallVector<MVKCommandBuffer*, N> _cmdBuffers;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index 22bd1b2..286946a 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -463,6 +463,13 @@
}
+template <size_t N>
+void MVKQueueFullCommandBufferSubmission<N>::submitCommandBuffers() {
+ MVKCommandEncodingContext encodingContext;
+ for (auto& cb : _cmdBuffers) { cb->submit(this, &encodingContext); }
+}
+
+
#pragma mark -
#pragma mark MVKQueuePresentSurfaceSubmission