Fix error on some Apple GPU's where a vkCmdTimestampQuery() after a
renderpass was writing timestamp before renderpass activity was complete.

MVKCommandBuffer tracks whether it contains a stage-based timestamp command,
and MVKCommandEncoder updates the timestamp command fence when ending any
Metal command encoder on such a MVKCommandBuffer.

MVKCommandEncoder reorder member variables to avoid layout gaps (unrelated).
MVKCommandBuffer update _commandCount even for single-use immediate command
encoding (unrelated).
diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index 3e11894..0519187 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -25,9 +25,11 @@
 	- `VK_KHR_dynamic_rendering`
 	- `VK_KHR_separate_depth_stencil_layouts`
 	- `VK_EXT_separate_stencil_usage`
+- Support attachment clearing when some clearing formats are not specified.
 - Fix error where previously bound push constants can override a descriptor buffer binding 
   used by a subsequent pipeline that does not use push constants.
-- Support attachment clearing when some clearing formats are not specified.
+- Fix error on some Apple GPU's where a `vkCmdTimestampQuery()` after a renderpass was 
+  writing timestamp before renderpass activity was complete.
 - Update to latest SPIRV-Cross:
 	- MSL: Emit interface block members of array length 1 as arrays instead of scalars.
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm
index b19335f..e64493a 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm
@@ -85,6 +85,8 @@
 
 	_pipelineStage = pipelineStage;
 
+	cmdBuff->recordTimestampCommand();
+
 	return rslt;
 }
 
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index 3b8b3e1..5502dc2 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -118,6 +118,10 @@
 	/** Called when a MVKCmdExecuteCommands is added to this command buffer. */
 	void recordExecuteCommands(const MVKArrayRef<MVKCommandBuffer*> secondaryCommandBuffers);
 
+	/** Called when a timestamp command is added. */
+	void recordTimestampCommand();
+
+
 #pragma mark Tessellation constituent command management
 
 	/** Update the last recorded pipeline with tessellation shaders */
@@ -197,6 +201,7 @@
 	bool _isReusable;
 	bool _supportsConcurrentExecution;
 	bool _wasExecuted;
+	bool _hasStageCounterTimestampCommand;
 };
 
 
@@ -461,7 +466,6 @@
     NSString* getMTLRenderCommandEncoderName(MVKCommandUse cmdUse);
 	void encodeGPUCounterSample(MVKGPUCounterQueryPool* mvkQryPool, uint32_t sampleIndex, MVKCounterSamplingFlags samplingPoints);
 	void encodeTimestampStageCounterSamples();
-	bool hasTimestampStageCounterQueries() { return !_timestampStageCounterQueries.empty(); }
 	id<MTLFence> getStageCountersMTLFence();
 	MVKArrayRef<MTLSamplePosition> getCustomSamplePositions();
 
@@ -470,11 +474,8 @@
 		uint32_t query = 0;
 	} GPUCounterQuery;
 
-	VkSubpassContents _subpassContents;
-	MVKCommand* _lastMultiviewPassCmd;
-	uint32_t _renderSubpassIndex;
-	uint32_t _multiviewPassIndex;
 	VkRect2D _renderArea;
+	MVKCommand* _lastMultiviewPassCmd;
     MVKActivatedQueries* _pActivatedQueries;
 	MVKSmallVector<GPUCounterQuery, 16> _timestampStageCounterQueries;
 	MVKSmallVector<VkClearValue, kMVKDefaultAttachmentCount> _clearValues;
@@ -482,16 +483,19 @@
 	MVKSmallVector<MTLSamplePosition> _dynamicSamplePositions;
 	MVKSmallVector<MVKSmallVector<MTLSamplePosition>> _subpassSamplePositions;
 	id<MTLComputeCommandEncoder> _mtlComputeEncoder;
-	MVKCommandUse _mtlComputeEncoderUse;
 	id<MTLBlitCommandEncoder> _mtlBlitEncoder;
 	id<MTLFence> _stageCountersMTLFence;
-    MVKCommandUse _mtlBlitEncoderUse;
 	MVKPushConstantsCommandEncoderState _vertexPushConstants;
 	MVKPushConstantsCommandEncoderState _tessCtlPushConstants;
 	MVKPushConstantsCommandEncoderState _tessEvalPushConstants;
 	MVKPushConstantsCommandEncoderState _fragmentPushConstants;
 	MVKPushConstantsCommandEncoderState _computePushConstants;
     MVKOcclusionQueryCommandEncoderState _occlusionQueryState;
+	VkSubpassContents _subpassContents;
+	MVKCommandUse _mtlComputeEncoderUse;
+	MVKCommandUse _mtlBlitEncoderUse;
+	uint32_t _renderSubpassIndex;
+	uint32_t _multiviewPassIndex;
     uint32_t _flushCount = 0;
 	bool _isRenderingEntireAttachment;
 };
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index f0995c2..2c84d4a 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -147,6 +147,7 @@
 	_isExecutingNonConcurrently.clear();
 	_commandCount = 0;
 	_needsVisibilityResultMTLBuffer = false;
+	_hasStageCounterTimestampCommand = false;
 	_lastTessellationPipeline = nullptr;
 	_lastMultiviewSubpass = nullptr;
 	setConfigurationResult(VK_NOT_READY);
@@ -171,7 +172,9 @@
         setConfigurationResult(reportError(VK_NOT_READY, "Command buffer cannot accept commands before vkBeginCommandBuffer() is called."));
         return;
     }
-    
+
+	_commandCount++;
+
     if(_immediateCmdEncoder) {
         _immediateCmdEncoder->encodeCommands(command);
         
@@ -185,7 +188,6 @@
     command->_next = nullptr;
     _tail = command;
     if ( !_head ) { _head = command; }
-    _commandCount++;
 }
 
 void MVKCommandBuffer::submit(MVKQueueCommandBufferSubmission* cmdBuffSubmit,
@@ -258,20 +260,21 @@
 	reset(0);
 }
 
-// If the initial visibility result buffer has not been set, promote the first visibility result buffer
-// found among any of the secondary command buffers, to support the case where a render pass is started in
-// the primary command buffer but the visibility query is started inside one of the secondary command buffers.
+// Promote the initial visibility buffer and indication of timestamp use from the secondary buffers.
 void MVKCommandBuffer::recordExecuteCommands(const MVKArrayRef<MVKCommandBuffer*> secondaryCommandBuffers) {
-	if (!_needsVisibilityResultMTLBuffer) {
-		for (MVKCommandBuffer* cmdBuff : secondaryCommandBuffers) {
-			if (cmdBuff->_needsVisibilityResultMTLBuffer) {
-				_needsVisibilityResultMTLBuffer = true;
-				break;
-			}
-		}
+	for (MVKCommandBuffer* cmdBuff : secondaryCommandBuffers) {
+		if (cmdBuff->_needsVisibilityResultMTLBuffer) { _needsVisibilityResultMTLBuffer = true; }
+		if (cmdBuff->_hasStageCounterTimestampCommand) { _hasStageCounterTimestampCommand = true; }
 	}
 }
 
+// Track whether a stage-based timestamp command has been added, so we know
+// to update the timestamp command fence when ending a Metal command encoder.
+void MVKCommandBuffer::recordTimestampCommand() {
+	_hasStageCounterTimestampCommand = mvkIsAnyFlagEnabled(_device->_pMetalFeatures->counterSamplingPoints, MVK_COUNTER_SAMPLING_AT_PIPELINE_STAGE);
+}
+
+
 #pragma mark -
 #pragma mark Tessellation constituent command management
 
@@ -334,7 +337,7 @@
     while(command) {
         uint32_t prevMVPassIdx = _multiviewPassIndex;
         command->encode(this);
-        
+
         if(_multiviewPassIndex > prevMVPassIdx) {
             // This means we're in a multiview render pass, and we moved on to the
             // next view group. Re-encode all commands in the subpass again for this group.
@@ -744,7 +747,7 @@
 void MVKCommandEncoder::endMetalRenderEncoding() {
     if (_mtlRenderEncoder == nil) { return; }
 
-	if (hasTimestampStageCounterQueries() ) { [_mtlRenderEncoder updateFence: getStageCountersMTLFence() afterStages: MTLRenderStageFragment]; }
+	if (_cmdBuffer->_hasStageCounterTimestampCommand) { [_mtlRenderEncoder updateFence: getStageCountersMTLFence() afterStages: MTLRenderStageFragment]; }
     [_mtlRenderEncoder endEncoding];
 	_mtlRenderEncoder = nil;    // not retained
 
@@ -772,12 +775,12 @@
 	_computeResourcesState.markDirty();
 	_computePushConstants.markDirty();
 
-	if (_mtlComputeEncoder && hasTimestampStageCounterQueries() ) { [_mtlComputeEncoder updateFence: getStageCountersMTLFence()]; }
+	if (_mtlComputeEncoder && _cmdBuffer->_hasStageCounterTimestampCommand) { [_mtlComputeEncoder updateFence: getStageCountersMTLFence()]; }
 	[_mtlComputeEncoder endEncoding];
 	_mtlComputeEncoder = nil;       // not retained
 	_mtlComputeEncoderUse = kMVKCommandUseNone;
 
-	if (_mtlBlitEncoder && hasTimestampStageCounterQueries() ) { [_mtlBlitEncoder updateFence: getStageCountersMTLFence()]; }
+	if (_mtlBlitEncoder && _cmdBuffer->_hasStageCounterTimestampCommand) { [_mtlBlitEncoder updateFence: getStageCountersMTLFence()]; }
 	[_mtlBlitEncoder endEncoding];
 	_mtlBlitEncoder = nil;          // not retained
     _mtlBlitEncoderUse = kMVKCommandUseNone;