Merge pull request #1500 from billhollings/normalize-gpu-counters

Improve accuracy of VkPhysicalDeviceLimits::timestampPeriod.
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 3fabb28..61e3e02 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -24,6 +24,7 @@
 - Fix issue where *MSL 2.3* only available on *Apple Silicon*, even on *macOS*.
 - Fix memory leak of dummy `MTLTexture` in render subpasses that use no attachments.
 - Fix Metal object retain-release errors in assignment operators.
+- Improve accuracy of `VkPhysicalDeviceLimits::timestampPeriod`.
 - Update to latest SPIRV-Cross:
 	- MSL: Add 64 bit support for `OpSwitch`.
 	- MSL: Don't output depth and stencil values with explicit early fragment tests.
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index 11baf05..c4be1d2 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -83,6 +83,9 @@
 const static uint32_t kMVKCachedColorAttachmentCount = 8;
 const static uint32_t kMVKMaxDescriptorSetCount = SPIRV_CROSS_NAMESPACE::kMaxArgumentBuffers;
 
+#if !MVK_XCODE_12
+typedef NSUInteger MTLTimestamp;
+#endif
 
 #pragma mark -
 #pragma mark MVKPhysicalDevice
@@ -329,6 +332,20 @@
 	/** Returns whether this device is using Metal argument buffers. */
 	bool isUsingMetalArgumentBuffers() const  { return _metalFeatures.argumentBuffers && mvkConfig().useMetalArgumentBuffers; };
 
+	/**
+	 * Returns the start timestamps of a timestamp correlation.
+	 * The returned values should be later passed back to updateTimestampPeriod().
+	 */
+	void startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart);
+
+	/**
+	 * Updates the current value of VkPhysicalDeviceLimits::timestampPeriod, based on the
+	 * correlation between the CPU time tickes and GPU time ticks, from the specified start
+	 * values, to the current values. The cpuStart and gpuStart values should have been
+	 * retrieved from a prior call to startTimestampCorrelation().
+	 */
+	void updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart);
+
 
 #pragma mark Construction
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 49aea68..a1e09a1 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -1101,6 +1101,25 @@
 	return rslt;
 }
 
+// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps
+// are the same, or if we're not using GPU timestamp counters.
+void MVKPhysicalDevice::startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart) {
+	if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; }
+	[_mtlDevice sampleTimestamps: &cpuStart gpuTimestamp: &gpuStart];
+}
+
+// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps
+// are the same, or if we're not using GPU timestamp counters.
+void MVKPhysicalDevice::updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart) {
+	if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; }
+
+	MTLTimestamp cpuEnd;
+	MTLTimestamp gpuEnd;
+	[_mtlDevice sampleTimestamps: &cpuEnd gpuTimestamp: &gpuEnd];
+
+	_properties.limits.timestampPeriod = (double)(cpuEnd - cpuStart) / (double)(gpuEnd - gpuStart);
+}
+
 
 #pragma mark Memory models
 
@@ -2077,7 +2096,7 @@
     _properties.limits.optimalBufferCopyRowPitchAlignment = 1;
 
 	_properties.limits.timestampComputeAndGraphics = VK_TRUE;
-	_properties.limits.timestampPeriod = mvkGetTimestampPeriod();
+	_properties.limits.timestampPeriod = _metalFeatures.counterSamplingPoints ? 1.0 : mvkGetTimestampPeriod();
 
     _properties.limits.pointSizeRange[0] = 1;
 	switch (_properties.vendorID) {
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
index bc00cc4..f9e1295 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h
@@ -213,7 +213,7 @@
 	id<MTLCommandBuffer> getActiveMTLCommandBuffer();
 	void setActiveMTLCommandBuffer(id<MTLCommandBuffer> mtlCmdBuff);
 	void commitActiveMTLCommandBuffer(bool signalCompletion = false);
-	void finish();
+	virtual void finish();
 	virtual void submitCommandBuffers() {}
 
 	MVKSmallVector<std::pair<MVKSemaphore*, uint64_t>> _signalSemaphores;
@@ -248,8 +248,11 @@
 
 protected:
 	void submitCommandBuffers() override;
+	void finish() override;
 
 	MVKSmallVector<MVKCommandBuffer*, N> _cmdBuffers;
+	MTLTimestamp _cpuStart = 0;
+	MTLTimestamp _gpuStart = 0;
 };
 
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
index 24c415b..30f1d1b 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm
@@ -511,10 +511,17 @@
 
 template <size_t N>
 void MVKQueueFullCommandBufferSubmission<N>::submitCommandBuffers() {
+	_queue->getPhysicalDevice()->startTimestampCorrelation(_cpuStart, _gpuStart);
 	MVKCommandEncodingContext encodingContext;
 	for (auto& cb : _cmdBuffers) { cb->submit(this, &encodingContext); }
 }
 
+template <size_t N>
+void MVKQueueFullCommandBufferSubmission<N>::finish() {
+	_queue->getPhysicalDevice()->updateTimestampPeriod(_cpuStart, _gpuStart);
+	MVKQueueCommandBufferSubmission::finish();
+}
+
 
 #pragma mark -
 #pragma mark MVKQueuePresentSurfaceSubmission