Merge pull request #1500 from billhollings/normalize-gpu-counters
Improve accuracy of VkPhysicalDeviceLimits::timestampPeriod.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 3fabb28..61e3e02 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -24,6 +24,7 @@
- Fix issue where *MSL 2.3* only available on *Apple Silicon*, even on *macOS*.
- Fix memory leak of dummy `MTLTexture` in render subpasses that use no attachments.
- Fix Metal object retain-release errors in assignment operators.
+- Improve accuracy of `VkPhysicalDeviceLimits::timestampPeriod`.
- Update to latest SPIRV-Cross:
- MSL: Add 64 bit support for `OpSwitch`.
- MSL: Don't output depth and stencil values with explicit early fragment tests.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 11baf05..c4be1d2 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -83,6 +83,9 @@
const static uint32_t kMVKCachedColorAttachmentCount = 8;
const static uint32_t kMVKMaxDescriptorSetCount = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers;
+#if !MVK_XCODE_12
+typedef NSUInteger MTLTimestamp;
+#endif
#pragma mark -
#pragma mark MVKPhysicalDevice
@@ -329,6 +332,20 @@
/** Returns whether this device is using Metal argument buffers. */
bool isUsingMetalArgumentBuffers() const { return _metalFeatures.argumentBuffers && mvkConfig().useMetalArgumentBuffers; };
+ /**
+ * Returns the start timestamps of a timestamp correlation.
+ * The returned values should be later passed back to updateTimestampPeriod().
+ */
+ void startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart);
+
+ /**
+ * Updates the current value of VkPhysicalDeviceLimits::timestampPeriod, based on the
+ * correlation between the CPU time tickes and GPU time ticks, from the specified start
+ * values, to the current values. The cpuStart and gpuStart values should have been
+ * retrieved from a prior call to startTimestampCorrelation().
+ */
+ void updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart);
+
#pragma mark Construction
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 49aea68..a1e09a1 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -1101,6 +1101,25 @@
return rslt;
}
+// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps
+// are the same, or if we're not using GPU timestamp counters.
+void MVKPhysicalDevice::startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart) {
+ if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; }
+ [_mtlDevice sampleTimestamps: &cpuStart gpuTimestamp: &gpuStart];
+}
+
+// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps
+// are the same, or if we're not using GPU timestamp counters.
+void MVKPhysicalDevice::updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart) {
+ if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; }
+
+ MTLTimestamp cpuEnd;
+ MTLTimestamp gpuEnd;
+ [_mtlDevice sampleTimestamps: &cpuEnd gpuTimestamp: &gpuEnd];
+
+ _properties.limits.timestampPeriod = (double)(cpuEnd - cpuStart) / (double)(gpuEnd - gpuStart);
+}
+
#pragma mark Memory models
@@ -2077,7 +2096,7 @@
_properties.limits.optimalBufferCopyRowPitchAlignment = 1;
_properties.limits.timestampComputeAndGraphics = VK_TRUE;
- _properties.limits.timestampPeriod = mvkGetTimestampPeriod();
+ _properties.limits.timestampPeriod = _metalFeatures.counterSamplingPoints ? 1.0 : mvkGetTimestampPeriod();
_properties.limits.pointSizeRange[0] = 1;
switch (_properties.vendorID) {
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index bc00cc4..f9e1295 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -213,7 +213,7 @@
id<MTLCommandBuffer> getActiveMTLCommandBuffer();
void setActiveMTLCommandBuffer(id<MTLCommandBuffer> mtlCmdBuff);
void commitActiveMTLCommandBuffer(bool signalCompletion = false);
- void finish();
+ virtual void finish();
virtual void submitCommandBuffers() {}
MVKSmallVector<std::pair<MVKSemaphore*, uint64_t>> _signalSemaphores;
@@ -248,8 +248,11 @@
protected:
void submitCommandBuffers() override;
+ void finish() override;
MVKSmallVector<MVKCommandBuffer*, N> _cmdBuffers;
+ MTLTimestamp _cpuStart = 0;
+ MTLTimestamp _gpuStart = 0;
};
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index 24c415b..30f1d1b 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -511,10 +511,17 @@
template <size_t N>
void MVKQueueFullCommandBufferSubmission<N>::submitCommandBuffers() {
+ _queue->getPhysicalDevice()->startTimestampCorrelation(_cpuStart, _gpuStart);
MVKCommandEncodingContext encodingContext;
for (auto& cb : _cmdBuffers) { cb->submit(this, &encodingContext); }
}
+template <size_t N>
+void MVKQueueFullCommandBufferSubmission<N>::finish() {
+ _queue->getPhysicalDevice()->updateTimestampPeriod(_cpuStart, _gpuStart);
+ MVKQueueCommandBufferSubmission::finish();
+}
+
#pragma mark -
#pragma mark MVKQueuePresentSurfaceSubmission