blob: 3f9a52ebad3c9a796d841ff6d6c97f77a8bd94bf [file] [log] [blame]
/*
* MVKCmdDraw.mm
*
* Copyright (c) 2015-2022 The Brenwill Workshop Ltd. (http://www.brenwill.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "MVKCmdDraw.h"
#include "MVKCommandBuffer.h"
#include "MVKCommandPool.h"
#include "MVKBuffer.h"
#include "MVKPipeline.h"
#include "MVKFoundation.h"
#include "mvk_datatypes.hpp"
#pragma mark -
#pragma mark MVKCmdBindVertexBuffers
template <size_t N>
VkResult MVKCmdBindVertexBuffers<N>::setContent(MVKCommandBuffer* cmdBuff,
uint32_t startBinding,
uint32_t bindingCount,
const VkBuffer* pBuffers,
const VkDeviceSize* pOffsets) {
MVKDevice* mvkDvc = cmdBuff->getDevice();
_bindings.clear(); // Clear for reuse
_bindings.reserve(bindingCount);
MVKMTLBufferBinding b;
for (uint32_t bindIdx = 0; bindIdx < bindingCount; bindIdx++) {
MVKBuffer* mvkBuffer = (MVKBuffer*)pBuffers[bindIdx];
b.index = mvkDvc->getMetalBufferIndexForVertexAttributeBinding(startBinding + bindIdx);
b.mtlBuffer = mvkBuffer->getMTLBuffer();
b.offset = mvkBuffer->getMTLBufferOffset() + pOffsets[bindIdx];
_bindings.push_back(b);
}
return VK_SUCCESS;
}
template <size_t N>
void MVKCmdBindVertexBuffers<N>::encode(MVKCommandEncoder* cmdEncoder) {
for (auto& b : _bindings) { cmdEncoder->_graphicsResourcesState.bindBuffer(kMVKShaderStageVertex, b); }
}
template class MVKCmdBindVertexBuffers<1>;
template class MVKCmdBindVertexBuffers<2>;
template class MVKCmdBindVertexBuffers<8>;
#pragma mark -
#pragma mark MVKCmdBindIndexBuffer
VkResult MVKCmdBindIndexBuffer::setContent(MVKCommandBuffer* cmdBuff,
VkBuffer buffer,
VkDeviceSize offset,
VkIndexType indexType) {
MVKBuffer* mvkBuffer = (MVKBuffer*)buffer;
_binding.mtlBuffer = mvkBuffer->getMTLBuffer();
_binding.offset = mvkBuffer->getMTLBufferOffset() + offset;
_binding.mtlIndexType = mvkMTLIndexTypeFromVkIndexType(indexType);
return VK_SUCCESS;
}
void MVKCmdBindIndexBuffer::encode(MVKCommandEncoder* cmdEncoder) {
cmdEncoder->_graphicsResourcesState.bindIndexBuffer(_binding);
}
#pragma mark -
#pragma mark MVKCmdDraw
VkResult MVKCmdDraw::setContent(MVKCommandBuffer* cmdBuff,
uint32_t vertexCount,
uint32_t instanceCount,
uint32_t firstVertex,
uint32_t firstInstance) {
_vertexCount = vertexCount;
_instanceCount = instanceCount;
_firstVertex = firstVertex;
_firstInstance = firstInstance;
// Validate
if ((_firstInstance != 0) && !(cmdBuff->getDevice()->_pMetalFeatures->baseVertexInstanceDrawing)) {
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDraw(): The current device does not support drawing with a non-zero base instance.");
}
return VK_SUCCESS;
}
void MVKCmdDraw::encode(MVKCommandEncoder* cmdEncoder) {
if (_vertexCount == 0 || _instanceCount == 0) {
// Nothing to do.
return;
}
cmdEncoder->_isIndexedDraw = false;
auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline();
MVKPiplineStages stages;
pipeline->getStages(stages);
const MVKMTLBufferAllocation* vtxOutBuff = nullptr;
const MVKMTLBufferAllocation* tcOutBuff = nullptr;
const MVKMTLBufferAllocation* tcPatchOutBuff = nullptr;
const MVKMTLBufferAllocation* tcLevelBuff = nullptr;
struct {
uint32_t inControlPointCount = 0;
uint32_t patchCount = 0;
} tessParams;
uint32_t outControlPointCount = 0;
if (pipeline->isTessellationPipeline()) {
tessParams.inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
tessParams.patchCount = mvkCeilingDivide(_vertexCount, tessParams.inControlPointCount) * _instanceCount;
}
for (uint32_t s : stages) {
auto stage = MVKGraphicsStage(s);
cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal
if ( !pipeline->hasValidMTLPipelineStates() ) { return; } // Abort if this pipeline stage could not be compiled.
id<MTLComputeCommandEncoder> mtlTessCtlEncoder = nil;
switch (stage) {
case kMVKGraphicsStageVertex: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
}
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake2D(_firstVertex, _firstInstance, _vertexCount, _instanceCount)];
// If there are vertex bindings with a zero vertex divisor, I need to offset them by
// _firstInstance * stride, since that is the expected behaviour for a divisor of 0.
cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance);
id<MTLComputePipelineState> vtxState = pipeline->getTessVertexStageState();
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
#if MVK_MACOS_OR_IOS
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_vertexCount, _instanceCount, 1)
threadsPerThreadgroup: MTLSizeMake(vtxState.threadExecutionWidth, 1, 1)];
#endif
} else {
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_vertexCount, vtxState.threadExecutionWidth), _instanceCount, 1)
threadsPerThreadgroup: MTLSizeMake(vtxState.threadExecutionWidth, 1, 1)];
}
// Mark pipeline, resources, and tess control push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
cmdEncoder->_depthStencilState.markDirty();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
break;
}
case kMVKGraphicsStageTessControl: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
[mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
[mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: pipeline->getTessCtlLevelBufferIndex()];
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&tessParams,
sizeof(tessParams),
pipeline->getIndirectParamsIndex().stages[kMVKShaderStageTessCtl]);
if (pipeline->needsVertexOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessCtlInputBufferBinding)];
}
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
NSUInteger wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
while (wgSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
sgSize >>= 1;
wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
}
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
#if MVK_MACOS_OR_IOS
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(tessParams.patchCount * outControlPointCount, 1, 1)
threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
#endif
} else {
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(tessParams.patchCount * outControlPointCount, wgSize), 1, 1)
threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
}
// Running this stage prematurely ended the render pass, so we have to start it up again.
// TODO: On iOS, maybe we could use a tile shader to avoid this.
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
break;
}
case kMVKGraphicsStageRasterization:
if (pipeline->isTessellationPipeline()) {
if (pipeline->needsTessCtlOutputBuffer()) {
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessEvalInputBufferBinding)];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessEvalPatchInputBufferBinding)];
}
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessEvalLevelBufferBinding)];
[cmdEncoder->_mtlRenderEncoder setTessellationFactorBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
instanceStride: 0];
[cmdEncoder->_mtlRenderEncoder drawPatches: outControlPointCount
patchStart: 0
patchCount: tessParams.patchCount
patchIndexBuffer: nil
patchIndexBufferOffset: 0
instanceCount: 1
baseInstance: 0];
// Mark pipeline, resources, and tess control push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
} else {
MVKRenderSubpass* subpass = cmdEncoder->getSubpass();
uint32_t viewCount = subpass->isMultiview() ? subpass->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1;
uint32_t instanceCount = _instanceCount * viewCount;
cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance);
if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) {
[cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType
vertexStart: _firstVertex
vertexCount: _vertexCount
instanceCount: instanceCount
baseInstance: _firstInstance];
} else {
[cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType
vertexStart: _firstVertex
vertexCount: _vertexCount
instanceCount: instanceCount];
}
}
break;
}
}
}
#pragma mark -
#pragma mark MVKCmdDrawIndexed
VkResult MVKCmdDrawIndexed::setContent(MVKCommandBuffer* cmdBuff,
uint32_t indexCount,
uint32_t instanceCount,
uint32_t firstIndex,
int32_t vertexOffset,
uint32_t firstInstance) {
_indexCount = indexCount;
_instanceCount = instanceCount;
_firstIndex = firstIndex;
_vertexOffset = vertexOffset;
_firstInstance = firstInstance;
// Validate
MVKDevice* mvkDvc = cmdBuff->getDevice();
if ((_firstInstance != 0) && !(mvkDvc->_pMetalFeatures->baseVertexInstanceDrawing)) {
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndexed(): The current device does not support drawing with a non-zero base instance.");
}
if ((_vertexOffset != 0) && !(mvkDvc->_pMetalFeatures->baseVertexInstanceDrawing)) {
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndexed(): The current device does not support drawing with a non-zero base vertex.");
}
return VK_SUCCESS;
}
void MVKCmdDrawIndexed::encode(MVKCommandEncoder* cmdEncoder) {
if (_indexCount == 0 || _instanceCount == 0) {
// Nothing to do.
return;
}
cmdEncoder->_isIndexedDraw = true;
auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline();
MVKPiplineStages stages;
pipeline->getStages(stages);
MVKIndexMTLBufferBinding& ibb = cmdEncoder->_graphicsResourcesState._mtlIndexBufferBinding;
size_t idxSize = mvkMTLIndexTypeSizeInBytes((MTLIndexType)ibb.mtlIndexType);
VkDeviceSize idxBuffOffset = ibb.offset + (_firstIndex * idxSize);
const MVKMTLBufferAllocation* vtxOutBuff = nullptr;
const MVKMTLBufferAllocation* tcOutBuff = nullptr;
const MVKMTLBufferAllocation* tcPatchOutBuff = nullptr;
const MVKMTLBufferAllocation* tcLevelBuff = nullptr;
struct {
uint32_t inControlPointCount = 0;
uint32_t patchCount = 0;
} tessParams;
uint32_t outControlPointCount = 0;
if (pipeline->isTessellationPipeline()) {
tessParams.inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
tessParams.patchCount = mvkCeilingDivide(_indexCount, tessParams.inControlPointCount) * _instanceCount;
}
for (uint32_t s : stages) {
auto stage = MVKGraphicsStage(s);
id<MTLComputeCommandEncoder> mtlTessCtlEncoder = nil;
cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal
if ( !pipeline->hasValidMTLPipelineStates() ) { return; } // Abort if this pipeline stage could not be compiled.
switch (stage) {
case kMVKGraphicsStageVertex: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(_indexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
}
[mtlTessCtlEncoder setBuffer: ibb.mtlBuffer
offset: idxBuffOffset
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageVertex]];
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake2D(_vertexOffset, _firstInstance, _indexCount, _instanceCount)];
// If there are vertex bindings with a zero vertex divisor, I need to offset them by
// _firstInstance * stride, since that is the expected behaviour for a divisor of 0.
cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance);
id<MTLComputePipelineState> vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getTessVertexStageIndex16State() : pipeline->getTessVertexStageIndex32State();
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
#if MVK_MACOS_OR_IOS
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_indexCount, _instanceCount, 1)
threadsPerThreadgroup: MTLSizeMake(vtxState.threadExecutionWidth, 1, 1)];
#endif
} else {
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(_indexCount, vtxState.threadExecutionWidth), _instanceCount, 1)
threadsPerThreadgroup: MTLSizeMake(vtxState.threadExecutionWidth, 1, 1)];
}
// Mark pipeline, resources, and tess control push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
cmdEncoder->_depthStencilState.markDirty();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
break;
}
case kMVKGraphicsStageTessControl: {
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
[mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(tessParams.patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
[mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: pipeline->getTessCtlLevelBufferIndex()];
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&tessParams,
sizeof(tessParams),
pipeline->getIndirectParamsIndex().stages[kMVKShaderStageTessCtl]);
if (pipeline->needsVertexOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessCtlInputBufferBinding)];
}
// The vertex shader produced output in the correct order, so there's no need to use
// an index buffer here.
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
NSUInteger wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
while (wgSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
sgSize >>= 1;
wgSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
}
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
#if MVK_MACOS_OR_IOS
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(tessParams.patchCount * outControlPointCount, 1, 1)
threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
#endif
} else {
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide(tessParams.patchCount * outControlPointCount, wgSize), 1, 1)
threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
}
// Running this stage prematurely ended the render pass, so we have to start it up again.
// TODO: On iOS, maybe we could use a tile shader to avoid this.
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
break;
}
case kMVKGraphicsStageRasterization:
if (pipeline->isTessellationPipeline()) {
if (pipeline->needsTessCtlOutputBuffer()) {
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessEvalInputBufferBinding)];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessEvalPatchInputBufferBinding)];
}
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessEvalLevelBufferBinding)];
[cmdEncoder->_mtlRenderEncoder setTessellationFactorBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
instanceStride: 0];
// The tessellation control shader produced output in the correct order, so there's no need to use
// an index buffer here.
[cmdEncoder->_mtlRenderEncoder drawPatches: outControlPointCount
patchStart: 0
patchCount: tessParams.patchCount
patchIndexBuffer: nil
patchIndexBufferOffset: 0
instanceCount: 1
baseInstance: 0];
// Mark pipeline, resources, and tess control push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
} else {
MVKRenderSubpass* subpass = cmdEncoder->getSubpass();
uint32_t viewCount = subpass->isMultiview() ? subpass->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) : 1;
uint32_t instanceCount = _instanceCount * viewCount;
cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance);
if (cmdEncoder->_pDeviceMetalFeatures->baseVertexInstanceDrawing) {
[cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType
indexCount: _indexCount
indexType: (MTLIndexType)ibb.mtlIndexType
indexBuffer: ibb.mtlBuffer
indexBufferOffset: idxBuffOffset
instanceCount: instanceCount
baseVertex: _vertexOffset
baseInstance: _firstInstance];
} else {
[cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType
indexCount: _indexCount
indexType: (MTLIndexType)ibb.mtlIndexType
indexBuffer: ibb.mtlBuffer
indexBufferOffset: idxBuffOffset
instanceCount: instanceCount];
}
}
break;
}
}
}
#pragma mark -
#pragma mark MVKCmdDrawIndirect
VkResult MVKCmdDrawIndirect::setContent(MVKCommandBuffer* cmdBuff,
VkBuffer buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride) {
MVKBuffer* mvkBuffer = (MVKBuffer*)buffer;
_mtlIndirectBuffer = mvkBuffer->getMTLBuffer();
_mtlIndirectBufferOffset = mvkBuffer->getMTLBufferOffset() + offset;
_mtlIndirectBufferStride = stride;
_drawCount = drawCount;
// Validate
MVKDevice* mvkDvc = cmdBuff->getDevice();
if ( !mvkDvc->_pMetalFeatures->indirectDrawing ) {
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndirect(): The current device does not support indirect drawing.");
}
if (cmdBuff->_lastTessellationPipeline && !mvkDvc->_pMetalFeatures->indirectTessellationDrawing) {
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndirect(): The current device does not support indirect tessellated drawing.");
}
return VK_SUCCESS;
}
// This is totally arbitrary, but we're forced to do this because we don't know how many vertices
// there are at encoding time. And this will probably be inadequate for large instanced draws.
// TODO: Consider breaking up such draws using different base instance values. But this will
// require yet more munging of the indirect buffers...
static const uint32_t kMVKDrawIndirectVertexCountUpperBound = 131072;
void MVKCmdDrawIndirect::encode(MVKCommandEncoder* cmdEncoder) {
cmdEncoder->_isIndexedDraw = false;
auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline();
bool needsInstanceAdjustment = cmdEncoder->getSubpass()->isMultiview() &&
cmdEncoder->getPhysicalDevice()->canUseInstancingForMultiview();
// The indirect calls for dispatchThreadgroups:... and drawPatches:... have different formats.
// We have to convert from the drawPrimitives:... format to them.
// While we're at it, we can create the temporary output buffers once and reuse them
// for each draw.
const MVKMTLBufferAllocation* tempIndirectBuff = nullptr;
const MVKMTLBufferAllocation* tcParamsBuff = nullptr;
const MVKMTLBufferAllocation* vtxOutBuff = nullptr;
const MVKMTLBufferAllocation* tcOutBuff = nullptr;
const MVKMTLBufferAllocation* tcPatchOutBuff = nullptr;
const MVKMTLBufferAllocation* tcLevelBuff = nullptr;
uint32_t patchCount = 0, vertexCount = 0;
uint32_t inControlPointCount = 0, outControlPointCount = 0;
VkDeviceSize paramsIncr = 0;
id<MTLBuffer> mtlIndBuff = _mtlIndirectBuffer;
VkDeviceSize mtlIndBuffOfst = _mtlIndirectBufferOffset;
VkDeviceSize mtlParmBuffOfst = 0;
NSUInteger vtxThreadExecWidth = 0;
NSUInteger tcWorkgroupSize = 0;
if (pipeline->isTessellationPipeline()) {
// We can't read the indirect buffer CPU-side, since it may change between
// encoding and execution. So we don't know how big to make the buffers.
// We must assume an arbitrarily large number of vertices may be submitted.
// But not too many, or we'll exhaust available VRAM.
inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
vertexCount = kMVKDrawIndirectVertexCountUpperBound;
patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
VkDeviceSize indirectSize = (2 * sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
}
paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
VkDeviceSize paramsSize = paramsIncr * _drawCount;
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
mtlIndBuffOfst = tempIndirectBuff->_offset;
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true);
mtlParmBuffOfst = tcParamsBuff->_offset;
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
}
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
vtxThreadExecWidth = pipeline->getTessVertexStageState().threadExecutionWidth;
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
while (tcWorkgroupSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
sgSize >>= 1;
tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
}
} else if (needsInstanceAdjustment) {
// In this case, we need to adjust the instance count for the views being drawn.
VkDeviceSize indirectSize = sizeof(MTLDrawPrimitivesIndirectArguments) * _drawCount;
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
mtlIndBuffOfst = tempIndirectBuff->_offset;
}
MVKPiplineStages stages;
pipeline->getStages(stages);
for (uint32_t drawIdx = 0; drawIdx < _drawCount; drawIdx++) {
for (uint32_t s : stages) {
auto stage = MVKGraphicsStage(s);
id<MTLComputeCommandEncoder> mtlTessCtlEncoder = nil;
if (drawIdx == 0 && stage == kMVKGraphicsStageVertex && pipeline->isTessellationPipeline()) {
// We need the indirect buffers now. This must be done before finalizing
// draw state, or the pipeline will get overridden. This is a good time
// to do it, since it will require switching to compute anyway. Do it all
// at once to get it over with.
cmdEncoder->encodeStoreActions(true);
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
id<MTLComputePipelineState> mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(false);
[mtlTessCtlEncoder setComputePipelineState: mtlConvertState];
[mtlTessCtlEncoder setBuffer: _mtlIndirectBuffer
offset: _mtlIndirectBufferOffset
atIndex: 0];
[mtlTessCtlEncoder setBuffer: tempIndirectBuff->_mtlBuffer
offset: tempIndirectBuff->_offset
atIndex: 1];
[mtlTessCtlEncoder setBuffer: tcParamsBuff->_mtlBuffer
offset: tcParamsBuff->_offset
atIndex: 2];
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&_mtlIndirectBufferStride,
sizeof(_mtlIndirectBufferStride),
3);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&inControlPointCount,
sizeof(inControlPointCount),
4);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&outControlPointCount,
sizeof(outControlPointCount),
5);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&_drawCount,
sizeof(_drawCount),
6);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&vtxThreadExecWidth,
sizeof(vtxThreadExecWidth),
7);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&tcWorkgroupSize,
sizeof(tcWorkgroupSize),
8);
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
#if MVK_MACOS_OR_IOS
[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
#endif
} else {
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
}
// Mark pipelines, resources, and vertex push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass();
} else if (drawIdx == 0 && needsInstanceAdjustment) {
// Similarly, for multiview, we need to adjust the instance count now.
// Unfortunately, this requires switching to compute.
// TODO: Consider using tile shaders to avoid this cost.
cmdEncoder->encodeStoreActions(true);
id<MTLComputeCommandEncoder> mtlConvertEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseMultiviewInstanceCountAdjust);
id<MTLComputePipelineState> mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(false);
uint32_t viewCount;
[mtlConvertEncoder setComputePipelineState: mtlConvertState];
[mtlConvertEncoder setBuffer: _mtlIndirectBuffer
offset: _mtlIndirectBufferOffset
atIndex: 0];
[mtlConvertEncoder setBuffer: tempIndirectBuff->_mtlBuffer
offset: tempIndirectBuff->_offset
atIndex: 1];
cmdEncoder->setComputeBytes(mtlConvertEncoder,
&_mtlIndirectBufferStride,
sizeof(_mtlIndirectBufferStride),
2);
cmdEncoder->setComputeBytes(mtlConvertEncoder,
&_drawCount,
sizeof(_drawCount),
3);
viewCount = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex());
cmdEncoder->setComputeBytes(mtlConvertEncoder,
&viewCount,
sizeof(viewCount),
4);
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
#if MVK_MACOS_OR_IOS
[mtlConvertEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
#endif
} else {
[mtlConvertEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
}
// Switch back to rendering now, since we don't have compute stages to run anyway.
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
}
cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal
if ( !pipeline->hasValidMTLPipelineStates() ) { return; } // Abort if this pipeline stage could not be compiled.
switch (stage) {
case kMVKGraphicsStageVertex:
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsVertexOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
}
// We must assume we can read up to the maximum number of vertices.
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake2D(0, 0, vertexCount, vertexCount)];
if ([mtlTessCtlEncoder respondsToSelector: @selector(setStageInRegionWithIndirectBuffer:indirectBufferOffset:)]) {
[mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: mtlIndBuff
indirectBufferOffset: mtlIndBuffOfst];
mtlIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments);
}
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff
indirectBufferOffset: mtlIndBuffOfst
threadsPerThreadgroup: MTLSizeMake(vtxThreadExecWidth, 1, 1)];
mtlIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
// Mark pipeline, resources, and tess control push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
cmdEncoder->_depthStencilState.markDirty();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
break;
case kMVKGraphicsStageTessControl:
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsTessCtlOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
}
[mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: pipeline->getTessCtlLevelBufferIndex()];
[mtlTessCtlEncoder setBuffer: tcParamsBuff->_mtlBuffer
offset: mtlParmBuffOfst
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageTessCtl]];
mtlParmBuffOfst += paramsIncr;
if (pipeline->needsVertexOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessCtlInputBufferBinding)];
}
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff
indirectBufferOffset: mtlIndBuffOfst
threadsPerThreadgroup: MTLSizeMake(tcWorkgroupSize, 1, 1)];
mtlIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
// Running this stage prematurely ended the render pass, so we have to start it up again.
// TODO: On iOS, maybe we could use a tile shader to avoid this.
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
break;
case kMVKGraphicsStageRasterization:
if (pipeline->isTessellationPipeline()) {
if (cmdEncoder->getDevice()->_pMetalFeatures->indirectTessellationDrawing) {
if (pipeline->needsTessCtlOutputBuffer()) {
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessEvalInputBufferBinding)];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessEvalPatchInputBufferBinding)];
}
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessEvalLevelBufferBinding)];
[cmdEncoder->_mtlRenderEncoder setTessellationFactorBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
instanceStride: 0];
#if MVK_MACOS_OR_IOS
[cmdEncoder->_mtlRenderEncoder drawPatches: outControlPointCount
patchIndexBuffer: nil
patchIndexBufferOffset: 0
indirectBuffer: mtlIndBuff
indirectBufferOffset: mtlIndBuffOfst];
#endif
}
mtlIndBuffOfst += sizeof(MTLDrawPatchIndirectArguments);
// Mark pipeline, resources, and vertex push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass();
} else {
[cmdEncoder->_mtlRenderEncoder drawPrimitives: cmdEncoder->_mtlPrimitiveType
indirectBuffer: mtlIndBuff
indirectBufferOffset: mtlIndBuffOfst];
mtlIndBuffOfst += needsInstanceAdjustment ? sizeof(MTLDrawPrimitivesIndirectArguments) : _mtlIndirectBufferStride;
}
break;
}
}
}
}
#pragma mark -
#pragma mark MVKCmdDrawIndexedIndirect
VkResult MVKCmdDrawIndexedIndirect::setContent(MVKCommandBuffer* cmdBuff,
VkBuffer buffer,
VkDeviceSize offset,
uint32_t drawCount,
uint32_t stride) {
MVKBuffer* mvkBuffer = (MVKBuffer*)buffer;
_mtlIndirectBuffer = mvkBuffer->getMTLBuffer();
_mtlIndirectBufferOffset = mvkBuffer->getMTLBufferOffset() + offset;
_mtlIndirectBufferStride = stride;
_drawCount = drawCount;
// Validate
MVKDevice* mvkDvc = cmdBuff->getDevice();
if ( !mvkDvc->_pMetalFeatures->indirectDrawing ) {
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndexedIndirect(): The current device does not support indirect drawing.");
}
if (cmdBuff->_lastTessellationPipeline && !mvkDvc->_pMetalFeatures->indirectTessellationDrawing) {
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdDrawIndexedIndirect(): The current device does not support indirect tessellated drawing.");
}
return VK_SUCCESS;
}
void MVKCmdDrawIndexedIndirect::encode(MVKCommandEncoder* cmdEncoder) {
cmdEncoder->_isIndexedDraw = true;
MVKIndexMTLBufferBinding& ibb = cmdEncoder->_graphicsResourcesState._mtlIndexBufferBinding;
auto* pipeline = (MVKGraphicsPipeline*)cmdEncoder->_graphicsPipelineState.getPipeline();
bool needsInstanceAdjustment = cmdEncoder->getSubpass()->isMultiview() &&
cmdEncoder->getPhysicalDevice()->canUseInstancingForMultiview();
// The indirect calls for dispatchThreadgroups:... and drawPatches:... have different formats.
// We have to convert from the drawIndexedPrimitives:... format to them.
// While we're at it, we can create the temporary output buffers once and reuse them
// for each draw.
const MVKMTLBufferAllocation* tempIndirectBuff = nullptr;
const MVKMTLBufferAllocation* tcParamsBuff = nullptr;
const MVKMTLBufferAllocation* vtxOutBuff = nullptr;
const MVKMTLBufferAllocation* tcOutBuff = nullptr;
const MVKMTLBufferAllocation* tcPatchOutBuff = nullptr;
const MVKMTLBufferAllocation* tcLevelBuff = nullptr;
const MVKMTLBufferAllocation* vtxIndexBuff = nullptr;
uint32_t patchCount = 0, vertexCount = 0;
uint32_t inControlPointCount = 0, outControlPointCount = 0;
VkDeviceSize paramsIncr = 0;
id<MTLBuffer> mtlIndBuff = _mtlIndirectBuffer;
VkDeviceSize mtlIndBuffOfst = _mtlIndirectBufferOffset;
VkDeviceSize mtlTempIndBuffOfst = _mtlIndirectBufferOffset;
VkDeviceSize mtlParmBuffOfst = 0;
NSUInteger vtxThreadExecWidth = 0;
NSUInteger tcWorkgroupSize = 0;
if (pipeline->isTessellationPipeline()) {
// We can't read the indirect buffer CPU-side, since it may change between
// encoding and execution. So we don't know how big to make the buffers.
// We must assume an arbitrarily large number of vertices may be submitted.
// But not too many, or we'll exhaust available VRAM.
inControlPointCount = pipeline->getInputControlPointCount();
outControlPointCount = pipeline->getOutputControlPointCount();
vertexCount = kMVKDrawIndirectVertexCountUpperBound;
patchCount = mvkCeilingDivide(vertexCount, inControlPointCount);
VkDeviceSize indirectSize = (sizeof(MTLDispatchThreadgroupsIndirectArguments) + sizeof(MTLDrawPatchIndirectArguments)) * _drawCount;
if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
}
paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
VkDeviceSize paramsSize = paramsIncr * _drawCount;
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
mtlTempIndBuffOfst = tempIndirectBuff->_offset;
tcParamsBuff = cmdEncoder->getTempMTLBuffer(paramsSize, true);
mtlParmBuffOfst = tcParamsBuff->_offset;
if (pipeline->needsVertexOutputBuffer()) {
vtxOutBuff = cmdEncoder->getTempMTLBuffer(vertexCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
}
if (pipeline->needsTessCtlOutputBuffer()) {
tcOutBuff = cmdEncoder->getTempMTLBuffer(outControlPointCount * patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerVertexOutputComponents, true);
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
tcPatchOutBuff = cmdEncoder->getTempMTLBuffer(patchCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxTessellationControlPerPatchOutputComponents, true);
}
tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length, true);
id<MTLComputePipelineState> vtxState;
vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getTessVertexStageIndex16State() : pipeline->getTessVertexStageIndex32State();
vtxThreadExecWidth = vtxState.threadExecutionWidth;
NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
while (tcWorkgroupSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
sgSize >>= 1;
tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
}
} else if (needsInstanceAdjustment) {
// In this case, we need to adjust the instance count for the views being drawn.
VkDeviceSize indirectSize = sizeof(MTLDrawIndexedPrimitivesIndirectArguments) * _drawCount;
tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
mtlIndBuff = tempIndirectBuff->_mtlBuffer;
mtlTempIndBuffOfst = tempIndirectBuff->_offset;
}
MVKPiplineStages stages;
pipeline->getStages(stages);
for (uint32_t drawIdx = 0; drawIdx < _drawCount; drawIdx++) {
for (uint32_t s : stages) {
auto stage = MVKGraphicsStage(s);
id<MTLComputeCommandEncoder> mtlTessCtlEncoder = nil;
if (stage == kMVKGraphicsStageVertex && pipeline->isTessellationPipeline()) {
cmdEncoder->encodeStoreActions(true);
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
// We need the indirect buffers now. This must be done before finalizing
// draw state, or the pipeline will get overridden. This is a good time
// to do it, since it will require switching to compute anyway. Do it all
// at once to get it over with.
if (drawIdx == 0) {
id<MTLComputePipelineState> mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectTessConvertBuffersMTLComputePipelineState(true);
[mtlTessCtlEncoder setComputePipelineState: mtlConvertState];
[mtlTessCtlEncoder setBuffer: _mtlIndirectBuffer
offset: _mtlIndirectBufferOffset
atIndex: 0];
[mtlTessCtlEncoder setBuffer: tempIndirectBuff->_mtlBuffer
offset: tempIndirectBuff->_offset
atIndex: 1];
[mtlTessCtlEncoder setBuffer: tcParamsBuff->_mtlBuffer
offset: tcParamsBuff->_offset
atIndex: 2];
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&_mtlIndirectBufferStride,
sizeof(_mtlIndirectBufferStride),
3);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&inControlPointCount,
sizeof(inControlPointCount),
4);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&outControlPointCount,
sizeof(inControlPointCount),
5);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&_drawCount,
sizeof(_drawCount),
6);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&vtxThreadExecWidth,
sizeof(vtxThreadExecWidth),
7);
cmdEncoder->setComputeBytes(mtlTessCtlEncoder,
&tcWorkgroupSize,
sizeof(tcWorkgroupSize),
8);
[mtlTessCtlEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
}
// We actually need to make a copy of the index buffer, because there's no way to tell Metal to
// offset an index buffer from a value in an indirect buffer. This also
// means that, to make a copy, we have to use a compute shader.
id<MTLComputePipelineState> mtlCopyIndexState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndexedCopyIndexBufferMTLComputePipelineState((MTLIndexType)ibb.mtlIndexType);
[mtlTessCtlEncoder setComputePipelineState: mtlCopyIndexState];
[mtlTessCtlEncoder setBuffer: ibb.mtlBuffer
offset: ibb.offset
atIndex: 0];
[mtlTessCtlEncoder setBuffer: vtxIndexBuff->_mtlBuffer
offset: vtxIndexBuff->_offset
atIndex: 1];
[mtlTessCtlEncoder setBuffer: _mtlIndirectBuffer
offset: mtlIndBuffOfst
atIndex: 2];
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff
indirectBufferOffset: mtlTempIndBuffOfst
threadsPerThreadgroup: MTLSizeMake(vtxThreadExecWidth, 1, 1)];
mtlIndBuffOfst += sizeof(MTLDrawIndexedPrimitivesIndirectArguments);
// Mark pipeline, resources, and vertex push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass();
} else if (drawIdx == 0 && needsInstanceAdjustment) {
// Similarly, for multiview, we need to adjust the instance count now.
// Unfortunately, this requires switching to compute. Luckily, we don't also
// have to copy the index buffer.
// TODO: Consider using tile shaders to avoid this cost.
cmdEncoder->encodeStoreActions(true);
id<MTLComputeCommandEncoder> mtlConvertEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseMultiviewInstanceCountAdjust);
id<MTLComputePipelineState> mtlConvertState = cmdEncoder->getCommandEncodingPool()->getCmdDrawIndirectMultiviewConvertBuffersMTLComputePipelineState(true);
uint32_t viewCount;
[mtlConvertEncoder setComputePipelineState: mtlConvertState];
[mtlConvertEncoder setBuffer: _mtlIndirectBuffer
offset: _mtlIndirectBufferOffset
atIndex: 0];
[mtlConvertEncoder setBuffer: tempIndirectBuff->_mtlBuffer
offset: tempIndirectBuff->_offset
atIndex: 1];
cmdEncoder->setComputeBytes(mtlConvertEncoder,
&_mtlIndirectBufferStride,
sizeof(_mtlIndirectBufferStride),
2);
cmdEncoder->setComputeBytes(mtlConvertEncoder,
&_drawCount,
sizeof(_drawCount),
3);
viewCount = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex());
cmdEncoder->setComputeBytes(mtlConvertEncoder,
&viewCount,
sizeof(viewCount),
4);
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
#if MVK_MACOS_OR_IOS
[mtlConvertEncoder dispatchThreads: MTLSizeMake(_drawCount, 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
#endif
} else {
[mtlConvertEncoder dispatchThreadgroups: MTLSizeMake(mvkCeilingDivide<NSUInteger>(_drawCount, mtlConvertState.threadExecutionWidth), 1, 1)
threadsPerThreadgroup: MTLSizeMake(mtlConvertState.threadExecutionWidth, 1, 1)];
}
// Switch back to rendering now, since we don't have compute stages to run anyway.
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
}
cmdEncoder->finalizeDrawState(stage); // Ensure all updated state has been submitted to Metal
if ( !pipeline->hasValidMTLPipelineStates() ) { return; } // Abort if this pipeline stage could not be compiled.
switch (stage) {
case kMVKGraphicsStageVertex:
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsVertexOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
}
[mtlTessCtlEncoder setBuffer: vtxIndexBuff->_mtlBuffer
offset: vtxIndexBuff->_offset
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageVertex]];
[mtlTessCtlEncoder setStageInRegion: MTLRegionMake2D(0, 0, vertexCount, vertexCount)];
if ([mtlTessCtlEncoder respondsToSelector: @selector(setStageInRegionWithIndirectBuffer:indirectBufferOffset:)]) {
[mtlTessCtlEncoder setStageInRegionWithIndirectBuffer: mtlIndBuff
indirectBufferOffset: mtlTempIndBuffOfst];
mtlTempIndBuffOfst += sizeof(MTLStageInRegionIndirectArguments);
}
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff
indirectBufferOffset: mtlTempIndBuffOfst
threadsPerThreadgroup: MTLSizeMake(vtxThreadExecWidth, 1, 1)];
mtlTempIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
// Mark pipeline, resources, and tess control push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
cmdEncoder->_depthStencilState.markDirty();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)->beginMetalRenderPass();
break;
case kMVKGraphicsStageTessControl:
mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
if (pipeline->needsTessCtlOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageTessCtl]];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: pipeline->getTessCtlPatchOutputBufferIndex()];
}
[mtlTessCtlEncoder setBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: pipeline->getTessCtlLevelBufferIndex()];
[mtlTessCtlEncoder setBuffer: tcParamsBuff->_mtlBuffer
offset: mtlParmBuffOfst
atIndex: pipeline->getIndirectParamsIndex().stages[kMVKShaderStageTessCtl]];
mtlParmBuffOfst += paramsIncr;
if (pipeline->needsVertexOutputBuffer()) {
[mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
offset: vtxOutBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessCtlInputBufferBinding)];
}
[mtlTessCtlEncoder dispatchThreadgroupsWithIndirectBuffer: mtlIndBuff
indirectBufferOffset: mtlTempIndBuffOfst
threadsPerThreadgroup: MTLSizeMake(tcWorkgroupSize, 1, 1)];
mtlTempIndBuffOfst += sizeof(MTLDispatchThreadgroupsIndirectArguments);
// Running this stage prematurely ended the render pass, so we have to start it up again.
// TODO: On iOS, maybe we could use a tile shader to avoid this.
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
break;
case kMVKGraphicsStageRasterization:
if (pipeline->isTessellationPipeline()) {
if (cmdEncoder->getDevice()->_pMetalFeatures->indirectTessellationDrawing) {
if (pipeline->needsTessCtlOutputBuffer()) {
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcOutBuff->_mtlBuffer
offset: tcOutBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessEvalInputBufferBinding)];
}
if (pipeline->needsTessCtlPatchOutputBuffer()) {
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcPatchOutBuff->_mtlBuffer
offset: tcPatchOutBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessEvalPatchInputBufferBinding)];
}
[cmdEncoder->_mtlRenderEncoder setVertexBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
atIndex: cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKTessEvalLevelBufferBinding)];
[cmdEncoder->_mtlRenderEncoder setTessellationFactorBuffer: tcLevelBuff->_mtlBuffer
offset: tcLevelBuff->_offset
instanceStride: 0];
#if MVK_MACOS_OR_IOS
[cmdEncoder->_mtlRenderEncoder drawPatches: outControlPointCount
patchIndexBuffer: nil
patchIndexBufferOffset: 0
indirectBuffer: mtlIndBuff
indirectBufferOffset: mtlTempIndBuffOfst];
#endif
}
mtlTempIndBuffOfst += sizeof(MTLDrawPatchIndirectArguments);
// Mark pipeline, resources, and vertex push constants as dirty
// so I apply them during the next stage.
cmdEncoder->_graphicsPipelineState.beginMetalRenderPass();
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
cmdEncoder->getPushConstants(VK_SHADER_STAGE_VERTEX_BIT)->beginMetalRenderPass();
} else {
[cmdEncoder->_mtlRenderEncoder drawIndexedPrimitives: cmdEncoder->_mtlPrimitiveType
indexType: (MTLIndexType)ibb.mtlIndexType
indexBuffer: ibb.mtlBuffer
indexBufferOffset: ibb.offset
indirectBuffer: mtlIndBuff
indirectBufferOffset: mtlTempIndBuffOfst];
mtlTempIndBuffOfst += needsInstanceAdjustment ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) : _mtlIndirectBufferStride;
}
break;
}
}
}
}