Merge pull request #104 from billhollings/master
Derive vkCmdCopyBuffer() alignment requirement at runtime.
diff --git a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
index 234a51d..c90f657 100644
--- a/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
+++ b/MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
@@ -84,6 +84,7 @@
VkDeviceSize maxMTLBufferSize; /**< The max size of a MTLBuffer (in bytes). */
VkDeviceSize mtlBufferAlignment; /**< The alignment used when allocating memory for MTLBuffers. Must be PoT. */
VkDeviceSize maxQueryBufferSize; /**< The maximum size of an occlusion query buffer (in bytes). */
+ VkDeviceSize mtlCopyBufferAlignment; /**< The alignment required during buffer copy operations (in bytes). */
VkSampleCountFlags supportedSampleCounts; /**< A bitmask identifying the sample counts supported by the device. */
} MVKPhysicalDeviceMetalFeatures;
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm
index 68d19a8..0ab0d68 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDispatch.mm
@@ -37,8 +37,8 @@
// MVKLogDebug("vkCmdDispatch() dispatching (%d, %d, %d) threadgroups.", _x, _y, _z);
cmdEncoder->finalizeDispatchState(); // Ensure all updated state has been submitted to Metal
- [cmdEncoder->getMTLComputeEncoder() dispatchThreadgroups: _mtlThreadgroupCount
- threadsPerThreadgroup: cmdEncoder->_mtlThreadgroupSize];
+ [cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch) dispatchThreadgroups: _mtlThreadgroupCount
+ threadsPerThreadgroup: cmdEncoder->_mtlThreadgroupSize];
}
MVKCmdDispatch::MVKCmdDispatch(MVKCommandTypePool<MVKCmdDispatch>* pool)
@@ -59,9 +59,9 @@
// MVKLogDebug("vkCmdDispatchIndirect() dispatching indirectly.");
cmdEncoder->finalizeDispatchState(); // Ensure all updated state has been submitted to Metal
- [cmdEncoder->getMTLComputeEncoder() dispatchThreadgroupsWithIndirectBuffer: _mtlIndirectBuffer
- indirectBufferOffset: _mtlIndirectBufferOffset
- threadsPerThreadgroup: cmdEncoder->_mtlThreadgroupSize];
+ [cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch) dispatchThreadgroupsWithIndirectBuffer: _mtlIndirectBuffer
+ indirectBufferOffset: _mtlIndirectBufferOffset
+ threadsPerThreadgroup: cmdEncoder->_mtlThreadgroupSize];
}
MVKCmdDispatchIndirect::MVKCmdDispatchIndirect(MVKCommandTypePool<MVKCmdDispatchIndirect>* pool)
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
index 9b3e81c..ec628d1 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
@@ -555,45 +555,41 @@
}
void MVKCmdCopyBuffer::encode(MVKCommandEncoder* cmdEncoder) {
- id<MTLBuffer> srcMTLBuff = _srcBuffer->getMTLBuffer();
- NSUInteger srcMTLBuffOffset = _srcBuffer->getMTLBufferOffset();
+ id<MTLBuffer> srcMTLBuff = _srcBuffer->getMTLBuffer();
+ NSUInteger srcMTLBuffOffset = _srcBuffer->getMTLBufferOffset();
- id<MTLBuffer> dstMTLBuff = _dstBuffer->getMTLBuffer();
- NSUInteger dstMTLBuffOffset = _dstBuffer->getMTLBufferOffset();
+ id<MTLBuffer> dstMTLBuff = _dstBuffer->getMTLBuffer();
+ NSUInteger dstMTLBuffOffset = _dstBuffer->getMTLBufferOffset();
- for (auto& cpyRgn : _mtlBuffCopyRegions) {
-#if MVK_MACOS
- const bool useComputeCopy = cpyRgn.srcOffset % 4 != 0
- || cpyRgn.dstOffset % 4 != 0
- || cpyRgn.size % 4 != 0;
-#else
- const bool useComputeCopy = false;
-#endif
- if (useComputeCopy)
- {
- MVKAssert(
- cpyRgn.srcOffset <= UINT32_MAX || cpyRgn.dstOffset <= UINT32_MAX || cpyRgn.size <= UINT32_MAX,
- "Compute buffer copy region offsets and size must fit into a 32-bit unsigned integer.");
+ VkDeviceSize buffAlign = getDevice()->_pMetalFeatures->mtlCopyBufferAlignment;
- id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder();
- id<MTLComputePipelineState> pipelineState = cmdEncoder->getCommandEncodingPool()->getCopyBufferBytesComputePipelineState();
- [mtlComputeEnc setComputePipelineState:pipelineState];
- [mtlComputeEnc setBuffer:srcMTLBuff offset:srcMTLBuffOffset atIndex:0];
- [mtlComputeEnc setBuffer:dstMTLBuff offset:dstMTLBuffOffset atIndex:1];
- uint32_t copyInfo[3] = { (uint32_t)cpyRgn.srcOffset, (uint32_t)cpyRgn.dstOffset, (uint32_t)cpyRgn.size };
- [mtlComputeEnc setBytes:copyInfo length:sizeof(copyInfo) atIndex:2];
- [mtlComputeEnc dispatchThreads:MTLSizeMake(1, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
- }
- else
- {
- id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseCopyBuffer);
- [mtlBlitEnc copyFromBuffer: srcMTLBuff
- sourceOffset: (srcMTLBuffOffset + cpyRgn.srcOffset)
- toBuffer: dstMTLBuff
- destinationOffset: (dstMTLBuffOffset + cpyRgn.dstOffset)
- size: cpyRgn.size];
- }
- }
+ for (auto& cpyRgn : _mtlBuffCopyRegions) {
+ const bool useComputeCopy = buffAlign > 1 && (cpyRgn.srcOffset % buffAlign != 0 ||
+ cpyRgn.dstOffset % buffAlign != 0 ||
+ cpyRgn.size % buffAlign != 0);
+ if (useComputeCopy) {
+ MVKAssert(cpyRgn.srcOffset <= UINT32_MAX || cpyRgn.dstOffset <= UINT32_MAX || cpyRgn.size <= UINT32_MAX,
+ "Compute buffer copy region offsets and size must fit into a 32-bit unsigned integer.");
+
+ id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyBuffer);
+ [mtlComputeEnc pushDebugGroup: @"vkCmdCopyBuffer"];
+ id<MTLComputePipelineState> pipelineState = cmdEncoder->getCommandEncodingPool()->getCopyBufferBytesComputePipelineState();
+ [mtlComputeEnc setComputePipelineState:pipelineState];
+ [mtlComputeEnc setBuffer:srcMTLBuff offset:srcMTLBuffOffset atIndex:0];
+ [mtlComputeEnc setBuffer:dstMTLBuff offset:dstMTLBuffOffset atIndex:1];
+ uint32_t copyInfo[3] = { (uint32_t)cpyRgn.srcOffset, (uint32_t)cpyRgn.dstOffset, (uint32_t)cpyRgn.size };
+ [mtlComputeEnc setBytes:copyInfo length:sizeof(copyInfo) atIndex:2];
+ [mtlComputeEnc dispatchThreads: MTLSizeMake(1, 1, 1) threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
+ [mtlComputeEnc popDebugGroup];
+ } else {
+ id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseCopyBuffer);
+ [mtlBlitEnc copyFromBuffer: srcMTLBuff
+ sourceOffset: (srcMTLBuffOffset + cpyRgn.srcOffset)
+ toBuffer: dstMTLBuff
+ destinationOffset: (dstMTLBuffOffset + cpyRgn.dstOffset)
+ size: cpyRgn.size];
+ }
+ }
}
MVKCmdCopyBuffer::MVKCmdCopyBuffer(MVKCommandTypePool<MVKCmdCopyBuffer>* pool)
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index 86d8841..e002162 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -294,12 +294,13 @@
void flush();
/**
- * The current Metal compute encoder.
+ * The current Metal compute encoder for the specified use,
+ * which determines the label assigned to the returned encoder.
*
* If the current encoder is not a compute encoder, this function ends current before
* beginning compute encoding.
*/
- id<MTLComputeCommandEncoder> getMTLComputeEncoder();
+ id<MTLComputeCommandEncoder> getMTLComputeEncoder(MVKCommandUse cmdUse);
/**
* The current Metal BLIT encoder for the specified use,
@@ -308,7 +309,7 @@
* If the current encoder is not a BLIT encoder, this function ends
* the current encoder before beginning BLIT encoding.
*/
- id<MTLBlitCommandEncoder> getMTLBlitEncoder(MVKCommandUse cmdBlitEncUse);
+ id<MTLBlitCommandEncoder> getMTLBlitEncoder(MVKCommandUse cmdUse);
/** Returns the push constants associated with the specified shader stage. */
MVKPushConstantsCommandEncoderState* getPushConstants(VkShaderStageFlagBits shaderStage);
@@ -424,6 +425,7 @@
MVKActivatedQueries* _pActivatedQueries;
std::vector<VkClearValue> _clearValues;
id<MTLComputeCommandEncoder> _mtlComputeEncoder;
+ MVKCommandUse _mtlComputeEncoderUse;
id<MTLBlitCommandEncoder> _mtlBlitEncoder;
MVKCommandUse _mtlBlitEncoderUse;
MVKPushConstantsCommandEncoderState _vertexPushConstants;
@@ -499,11 +501,14 @@
#pragma mark Support functions
/** Returns a name, suitable for use as a MTLCommandBuffer label, based on the MVKCommandUse. */
-NSString* mvkMTLCommandBufferLabel(MVKCommandUse cmdBuffUse);
+NSString* mvkMTLCommandBufferLabel(MVKCommandUse cmdUse);
/** Returns a name, suitable for use as a MTLRenderCommandEncoder label, based on the MVKCommandUse. */
-NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdBlitEncUse);
+NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdUse);
/** Returns a name, suitable for use as a MTLBlitCommandEncoder label, based on the MVKCommandUse. */
-NSString* mvkMTLBlitCommandEncoderLabel(MVKCommandUse cmdBlitEncUse);
+NSString* mvkMTLBlitCommandEncoderLabel(MVKCommandUse cmdUse);
+
+/** Returns a name, suitable for use as a MTLComputeCommandEncoder label, based on the MVKCommandUse. */
+NSString* mvkMTLComputeCommandEncoderLabel(MVKCommandUse cmdUse);
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
index d56b5f6..19e1187 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm
@@ -342,6 +342,7 @@
[_mtlComputeEncoder endEncoding];
_mtlComputeEncoder = nil; // not retained
+ _mtlComputeEncoderUse = kMVKCommandUseNone;
[_mtlBlitEncoder endEncoding];
_mtlBlitEncoder = nil; // not retained
@@ -362,22 +363,26 @@
}
}
-id<MTLComputeCommandEncoder> MVKCommandEncoder::getMTLComputeEncoder() {
+id<MTLComputeCommandEncoder> MVKCommandEncoder::getMTLComputeEncoder(MVKCommandUse cmdUse) {
if ( !_mtlComputeEncoder ) {
endCurrentMetalEncoding();
_mtlComputeEncoder = [_mtlCmdBuffer computeCommandEncoder]; // not retained
}
+ if (_mtlComputeEncoderUse != cmdUse) {
+ _mtlComputeEncoderUse = cmdUse;
+ _mtlComputeEncoder.label = mvkMTLComputeCommandEncoderLabel(cmdUse);
+ }
return _mtlComputeEncoder;
}
-id<MTLBlitCommandEncoder> MVKCommandEncoder::getMTLBlitEncoder(MVKCommandUse cmdBlitEncUse) {
+id<MTLBlitCommandEncoder> MVKCommandEncoder::getMTLBlitEncoder(MVKCommandUse cmdUse) {
if ( !_mtlBlitEncoder ) {
endCurrentMetalEncoding();
_mtlBlitEncoder = [_mtlCmdBuffer blitCommandEncoder]; // not retained
}
- if (_mtlBlitEncoderUse != cmdBlitEncUse) {
- _mtlBlitEncoderUse = cmdBlitEncUse;
- _mtlBlitEncoder.label = mvkMTLBlitCommandEncoderLabel(cmdBlitEncUse);
+ if (_mtlBlitEncoderUse != cmdUse) {
+ _mtlBlitEncoderUse = cmdUse;
+ _mtlBlitEncoder.label = mvkMTLBlitCommandEncoderLabel(cmdUse);
}
return _mtlBlitEncoder;
}
@@ -513,6 +518,7 @@
_mtlCmdBuffer = nil;
_mtlRenderEncoder = nil;
_mtlComputeEncoder = nil;
+ _mtlComputeEncoderUse = kMVKCommandUseNone;
_mtlBlitEncoder = nil;
_mtlBlitEncoderUse = kMVKCommandUseNone;
}
@@ -550,8 +556,8 @@
#pragma mark -
#pragma mark Support functions
-NSString* mvkMTLCommandBufferLabel(MVKCommandUse cmdBuffUse) {
- switch (cmdBuffUse) {
+NSString* mvkMTLCommandBufferLabel(MVKCommandUse cmdUse) {
+ switch (cmdUse) {
case kMVKCommandUseQueueSubmit: return @"vkQueueSubmit CommandBuffer";
case kMVKCommandUseQueuePresent: return @"vkQueuePresentKHR CommandBuffer";
case kMVKCommandUseQueueWaitIdle: return @"vkQueueWaitIdle CommandBuffer";
@@ -560,8 +566,8 @@
}
}
-NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdRendEncUse) {
- switch (cmdRendEncUse) {
+NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdUse) {
+ switch (cmdUse) {
case kMVKCommandUseBeginRenderPass: return @"vkCmdBeginRenderPass RenderEncoder";
case kMVKCommandUseNextSubpass: return @"vkCmdNextSubpass RenderEncoder";
case kMVKCommandUseBlitImage: return @"vkCmdBlitImage RenderEncoder";
@@ -573,8 +579,8 @@
}
}
-NSString* mvkMTLBlitCommandEncoderLabel(MVKCommandUse cmdBlitEncUse) {
- switch (cmdBlitEncUse) {
+NSString* mvkMTLBlitCommandEncoderLabel(MVKCommandUse cmdUse) {
+ switch (cmdUse) {
case kMVKCommandUsePipelineBarrier: return @"vkCmdPipelineBarrier BlitEncoder";
case kMVKCommandUseCopyImage: return @"vkCmdCopyImage BlitEncoder";
case kMVKCommandUseResolveCopyImage: return @"vkCmdResolveImage (copy stage) RenderEncoder";
@@ -588,3 +594,12 @@
}
}
+NSString* mvkMTLComputeCommandEncoderLabel(MVKCommandUse cmdUse) {
+ switch (cmdUse) {
+ case kMVKCommandUseDispatch: return @"vkCmdDispatch ComputeEncoder";
+ case kMVKCommandUseCopyBuffer: return @"vkCmdCopyBuffer ComputeEncoder";
+ case kMVKCommandUseFillBuffer: return @"vkCmdFillBuffer ComputeEncoder";
+ default: return @"Unknown Use ComputeEncoder";
+ }
+}
+
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
index 65b3d51..3fa8ca4 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm
@@ -140,7 +140,7 @@
_mtlBufferIndex);
break;
case VK_SHADER_STAGE_COMPUTE_BIT:
- _cmdEncoder->setComputeBytes(_cmdEncoder->getMTLComputeEncoder(),
+ _cmdEncoder->setComputeBytes(_cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch),
_pushConstants.data(),
_pushConstants.size(),
_mtlBufferIndex);
@@ -496,21 +496,21 @@
encodeBinding<MVKMTLBufferBinding>(_bufferBindings, _areBufferBindingsDirty,
[](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b)->void {
- [cmdEncoder->getMTLComputeEncoder() setBuffer: b.mtlBuffer
- offset: b.offset
- atIndex: b.index];
+ [cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch) setBuffer: b.mtlBuffer
+ offset: b.offset
+ atIndex: b.index];
});
encodeBinding<MVKMTLTextureBinding>(_textureBindings, _areTextureBindingsDirty,
[](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void {
- [cmdEncoder->getMTLComputeEncoder() setTexture: b.mtlTexture
- atIndex: b.index];
+ [cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch) setTexture: b.mtlTexture
+ atIndex: b.index];
});
encodeBinding<MVKMTLSamplerStateBinding>(_samplerStateBindings, _areSamplerStateBindingsDirty,
[](MVKCommandEncoder* cmdEncoder, MVKMTLSamplerStateBinding& b)->void {
- [cmdEncoder->getMTLComputeEncoder() setSamplerState: b.mtlSamplerState
- atIndex: b.index];
+ [cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch) setSamplerState: b.mtlSamplerState
+ atIndex: b.index];
});
}
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 77dd83d..2209fcc 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -306,6 +306,7 @@
_metalFeatures.mslVersion = SPIRVToMSLConverterOptions::makeMSLVersion(1);
_metalFeatures.maxPerStageTextureCount = 31;
_metalFeatures.mtlBufferAlignment = 64;
+ _metalFeatures.mtlCopyBufferAlignment = 1;
_metalFeatures.texelBuffers = true;
if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily3_v1] ) {
@@ -333,6 +334,7 @@
_metalFeatures.mslVersion = SPIRVToMSLConverterOptions::makeMSLVersion(1, 1);
_metalFeatures.maxPerStageTextureCount = 128;
_metalFeatures.mtlBufferAlignment = 256;
+ _metalFeatures.mtlCopyBufferAlignment = 4;
_metalFeatures.indirectDrawing = true;
_metalFeatures.baseVertexInstanceDrawing = true;
_metalFeatures.ioSurfaces = true;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index dc3740d..84af75e 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -395,9 +395,7 @@
#pragma mark MVKComputePipeline
void MVKComputePipeline::encode(MVKCommandEncoder* cmdEncoder) {
- id<MTLComputeCommandEncoder> mtlCmdEnc = cmdEncoder->getMTLComputeEncoder();
- [mtlCmdEnc setComputePipelineState: _mtlPipelineState];
-
+ [cmdEncoder->getMTLComputeEncoder(kMVKCommandUseDispatch) setComputePipelineState: _mtlPipelineState];
cmdEncoder->_mtlThreadgroupSize = _mtlThreadgroupSize;
}
diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
index 9f1a4df..5066df8 100644
--- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h
+++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
@@ -88,6 +88,7 @@
kMVKCommandUseClearColorImage, /**< vkCmdClearColorImage. */
kMVKCommandUseClearDepthStencilImage, /**< vkCmdClearDepthStencilImage. */
kMVKCommandUseResetQueryPool, /**< vkCmdResetQueryPool. */
+ kMVKCommandUseDispatch, /**< vkCmdDispatch. */
} MVKCommandUse;
/**
diff --git a/README.md b/README.md
index 28f8a97..3e08899 100644
--- a/README.md
+++ b/README.md
@@ -38,6 +38,9 @@
To learn how to integrate the **MoltenVK** runtime into a game or application, see the
[`Docs/MoltenVK_Runtime_UserGuide.md `](Docs/MoltenVK_Runtime_UserGuide.md) document in the `Docs` directory.
+If you are just looking for a pre-built **MoltenVK** runtime binary, you can download it as part of the
+[*LunarG SDK*](https://vulkan.lunarg.com).
+
<a name="intro"></a>