blob: e4321159b6db1dc1f89c2acf1532767a774b30cb [file] [log] [blame]
/*
* MVKCmdTransfer.mm
*
* Copyright (c) 2015-2022 The Brenwill Workshop Ltd. (http://www.brenwill.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "MVKCmdTransfer.h"
#include "MVKCommandBuffer.h"
#include "MVKCommandPool.h"
#include "MVKCommandEncodingPool.h"
#include "MVKImage.h"
#include "MVKBuffer.h"
#include "MVKFramebuffer.h"
#include "MVKRenderPass.h"
#include "MTLRenderPassDescriptor+MoltenVK.h"
#include "MVKEnvironment.h"
#include "mvk_datatypes.hpp"
#include <algorithm>
#include <sys/mman.h>
#pragma mark -
#pragma mark Support functions
// Clamps the size so that the sum of the origin and size do not exceed the maximum size.
static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize maxSize) {
MTLSize clamped;
clamped.width = std::min(size.width, maxSize.width - origin.x);
clamped.height = std::min(size.height, maxSize.height - origin.y);
clamped.depth = std::min(size.depth, maxSize.depth - origin.z);
return clamped;
}
#pragma mark -
#pragma mark MVKCmdCopyImage
template <size_t N>
VkResult MVKCmdCopyImage<N>::setContent(MVKCommandBuffer* cmdBuff,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkImageCopy* pRegions) {
_srcImage = (MVKImage*)srcImage;
_srcLayout = srcImageLayout;
_dstImage = (MVKImage*)dstImage;
_dstLayout = dstImageLayout;
_vkImageCopies.clear(); // Clear for reuse
for (uint32_t regionIdx = 0; regionIdx < regionCount; regionIdx++) {
auto& vkIR = pRegions[regionIdx];
uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIR.srcSubresource.aspectMask);
uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIR.dstSubresource.aspectMask);
// Validate
MVKPixelFormats* pixFmts = cmdBuff->getPixelFormats();
if ((_dstImage->getSampleCount() != _srcImage->getSampleCount()) ||
(pixFmts->getBytesPerBlock(_dstImage->getMTLPixelFormat(dstPlaneIndex)) != pixFmts->getBytesPerBlock(_srcImage->getMTLPixelFormat(srcPlaneIndex)))) {
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdCopyImage(): Cannot copy between incompatible formats, such as formats of different pixel sizes, or between images with different sample counts.");
}
_vkImageCopies.push_back(vkIR);
}
return VK_SUCCESS;
}
template <size_t N>
void MVKCmdCopyImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse commandUse) {
MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats();
uint32_t copyCnt = (uint32_t)_vkImageCopies.size();
VkBufferImageCopy vkSrcCopies[copyCnt];
VkBufferImageCopy vkDstCopies[copyCnt];
size_t tmpBuffSize = 0;
for (uint32_t copyIdx = 0; copyIdx < copyCnt; copyIdx++) {
auto& vkIC = _vkImageCopies[copyIdx];
uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIC.srcSubresource.aspectMask);
uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIC.dstSubresource.aspectMask);
MTLPixelFormat srcMTLPixFmt = _srcImage->getMTLPixelFormat(srcPlaneIndex);
bool isSrcCompressed = _srcImage->getIsCompressed();
MTLPixelFormat dstMTLPixFmt = _dstImage->getMTLPixelFormat(dstPlaneIndex);
bool isDstCompressed = _dstImage->getIsCompressed();
// If source and destination have different formats and at least one is compressed, use a temporary intermediary buffer
bool useTempBuffer = (srcMTLPixFmt != dstMTLPixFmt) && (isSrcCompressed || isDstCompressed);
if (useTempBuffer) {
// Add copy from source image to temp buffer.
auto& srcCpy = vkSrcCopies[copyIdx];
srcCpy.bufferOffset = tmpBuffSize;
srcCpy.bufferRowLength = 0;
srcCpy.bufferImageHeight = 0;
srcCpy.imageSubresource = vkIC.srcSubresource;
srcCpy.imageOffset = vkIC.srcOffset;
srcCpy.imageExtent = vkIC.extent;
// Add copy from temp buffer to destination image.
// Extent is provided in source texels. If the source is compressed but the
// destination is not, each destination pixel will consume an entire source block,
// so we must downscale the destination extent by the size of the source block.
// Likewise if the destination is compressed and source is not, each source pixel
// will map to a block of pixels in the destination texture, and we need to
// adjust destination's extent accordingly.
VkExtent3D dstExtent = vkIC.extent;
if (isSrcCompressed && !isDstCompressed) {
VkExtent2D srcBlockExtent = pixFmts->getBlockTexelSize(srcMTLPixFmt);
dstExtent.width /= srcBlockExtent.width;
dstExtent.height /= srcBlockExtent.height;
} else if (!isSrcCompressed && isDstCompressed) {
VkExtent2D dstBlockExtent = pixFmts->getBlockTexelSize(dstMTLPixFmt);
dstExtent.width *= dstBlockExtent.width;
dstExtent.height *= dstBlockExtent.height;
}
auto& dstCpy = vkDstCopies[copyIdx];
dstCpy.bufferOffset = tmpBuffSize;
dstCpy.bufferRowLength = 0;
dstCpy.bufferImageHeight = 0;
dstCpy.imageSubresource = vkIC.dstSubresource;
dstCpy.imageOffset = vkIC.dstOffset;
dstCpy.imageExtent = dstExtent;
size_t bytesPerRow = pixFmts->getBytesPerRow(srcMTLPixFmt, vkIC.extent.width);
size_t bytesPerRegion = pixFmts->getBytesPerLayer(srcMTLPixFmt, bytesPerRow, vkIC.extent.height);
tmpBuffSize += bytesPerRegion;
} else {
// Map the source pixel format to the dest pixel format through a texture view on the source texture.
// If the source and dest pixel formats are the same, this will simply degenerate to the source texture itself.
id<MTLTexture> srcMTLTex = _srcImage->getMTLTexture(srcPlaneIndex, _dstImage->getMTLPixelFormat(dstPlaneIndex));
id<MTLTexture> dstMTLTex = _dstImage->getMTLTexture(dstPlaneIndex);
if ( !srcMTLTex || !dstMTLTex ) { return; }
id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(commandUse);
// If copies can be performed using direct texture-texture copying, do so
uint32_t srcLevel = vkIC.srcSubresource.mipLevel;
uint32_t srcBaseLayer = vkIC.srcSubresource.baseArrayLayer;
VkExtent3D srcExtent = _srcImage->getExtent3D(srcPlaneIndex, srcLevel);
uint32_t dstLevel = vkIC.dstSubresource.mipLevel;
uint32_t dstBaseLayer = vkIC.dstSubresource.baseArrayLayer;
VkExtent3D dstExtent = _dstImage->getExtent3D(dstPlaneIndex, dstLevel);
// If the extent completely covers both images, I can copy all layers at once.
// This will obviously not apply to copies between a 3D and 2D image.
if (mvkVkExtent3DsAreEqual(srcExtent, vkIC.extent) && mvkVkExtent3DsAreEqual(dstExtent, vkIC.extent) &&
[mtlBlitEnc respondsToSelector: @selector(copyFromTexture:sourceSlice:sourceLevel:toTexture:destinationSlice:destinationLevel:sliceCount:levelCount:)]) {
assert((_srcImage->getMTLTextureType() == MTLTextureType3D) == (_dstImage->getMTLTextureType() == MTLTextureType3D));
[mtlBlitEnc copyFromTexture: srcMTLTex
sourceSlice: srcBaseLayer
sourceLevel: srcLevel
toTexture: dstMTLTex
destinationSlice: dstBaseLayer
destinationLevel: dstLevel
sliceCount: vkIC.srcSubresource.layerCount
levelCount: 1];
} else {
MTLOrigin srcOrigin = mvkMTLOriginFromVkOffset3D(vkIC.srcOffset);
MTLSize srcSize;
uint32_t layCnt;
if ((_srcImage->getMTLTextureType() == MTLTextureType3D) != (_dstImage->getMTLTextureType() == MTLTextureType3D)) {
// In the case, the number of layers to copy is in extent.depth. Use that value,
// then clamp the depth so we don't try to copy more than Metal will allow.
layCnt = vkIC.extent.depth;
srcSize = mvkClampMTLSize(mvkMTLSizeFromVkExtent3D(vkIC.extent),
srcOrigin,
mvkMTLSizeFromVkExtent3D(srcExtent));
srcSize.depth = 1;
} else {
layCnt = vkIC.srcSubresource.layerCount;
srcSize = mvkClampMTLSize(mvkMTLSizeFromVkExtent3D(vkIC.extent),
srcOrigin,
mvkMTLSizeFromVkExtent3D(srcExtent));
}
MTLOrigin dstOrigin = mvkMTLOriginFromVkOffset3D(vkIC.dstOffset);
for (uint32_t layIdx = 0; layIdx < layCnt; layIdx++) {
// We can copy between a 3D and a 2D image easily. Just copy between
// one slice of the 2D image and one plane of the 3D image at a time.
if ((_srcImage->getMTLTextureType() == MTLTextureType3D) == (_dstImage->getMTLTextureType() == MTLTextureType3D)) {
[mtlBlitEnc copyFromTexture: srcMTLTex
sourceSlice: srcBaseLayer + layIdx
sourceLevel: srcLevel
sourceOrigin: srcOrigin
sourceSize: srcSize
toTexture: dstMTLTex
destinationSlice: dstBaseLayer + layIdx
destinationLevel: dstLevel
destinationOrigin: dstOrigin];
} else if (_srcImage->getMTLTextureType() == MTLTextureType3D) {
[mtlBlitEnc copyFromTexture: srcMTLTex
sourceSlice: srcBaseLayer
sourceLevel: srcLevel
sourceOrigin: MTLOriginMake(srcOrigin.x, srcOrigin.y, srcOrigin.z + layIdx)
sourceSize: srcSize
toTexture: dstMTLTex
destinationSlice: dstBaseLayer + layIdx
destinationLevel: dstLevel
destinationOrigin: dstOrigin];
} else {
assert(_dstImage->getMTLTextureType() == MTLTextureType3D);
[mtlBlitEnc copyFromTexture: srcMTLTex
sourceSlice: srcBaseLayer + layIdx
sourceLevel: srcLevel
sourceOrigin: srcOrigin
sourceSize: srcSize
toTexture: dstMTLTex
destinationSlice: dstBaseLayer
destinationLevel: dstLevel
destinationOrigin: MTLOriginMake(dstOrigin.x, dstOrigin.y, dstOrigin.z + layIdx)];
}
}
}
}
}
if (tmpBuffSize > 0) {
MVKBufferDescriptorData tempBuffData;
tempBuffData.size = tmpBuffSize;
tempBuffData.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VkBuffer tempBuff = (VkBuffer)cmdEncoder->getCommandEncodingPool()->getTransferMVKBuffer(tempBuffData);
MVKCmdBufferImageCopy<N> cpyCmd;
// Copy from source image to buffer
// Create and execute a temporary buffer image command.
// To be threadsafe...do NOT acquire and return the command from the pool.
cpyCmd.setContent(cmdEncoder->_cmdBuffer, tempBuff, (VkImage)_srcImage, _srcLayout, copyCnt, vkSrcCopies, false);
cpyCmd.encode(cmdEncoder);
// Copy from buffer to destination image
// Create and execute a temporary buffer image command.
// To be threadsafe...do NOT acquire and return the command from the pool.
cpyCmd.setContent(cmdEncoder->_cmdBuffer, tempBuff, (VkImage)_dstImage, _dstLayout, copyCnt, vkDstCopies, true);
cpyCmd.encode(cmdEncoder);
}
}
template class MVKCmdCopyImage<1>;
template class MVKCmdCopyImage<4>;
#pragma mark -
#pragma mark MVKCmdBlitImage
template <size_t N>
VkResult MVKCmdBlitImage<N>::setContent(MVKCommandBuffer* cmdBuff,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkImageBlit* pRegions,
VkFilter filter) {
_srcImage = (MVKImage*)srcImage;
_srcLayout = srcImageLayout;
_dstImage = (MVKImage*)dstImage;
_dstLayout = dstImageLayout;
_filter = filter;
bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getDevice()->_pMetalFeatures->renderLinearTextures && _dstImage->getIsLinear();
_vkImageBlits.clear(); // Clear for reuse
for (uint32_t rIdx = 0; rIdx < regionCount; rIdx++) {
auto& vkIB = pRegions[rIdx];
// Validate - macOS linear images cannot be a scaling or inversion destination
if (isDestUnwritableLinear && !(canCopyFormats(vkIB) && canCopy(vkIB)) ) {
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdBlitImage(): Scaling or inverting to a linear destination image is not supported.");
}
_vkImageBlits.push_back(vkIB);
}
return VK_SUCCESS;
}
template <size_t N>
bool MVKCmdBlitImage<N>::canCopyFormats(const VkImageBlit& region) {
uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(region.srcSubresource.aspectMask);
uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(region.dstSubresource.aspectMask);
return ((_srcImage->getMTLPixelFormat(srcPlaneIndex) == _dstImage->getMTLPixelFormat(dstPlaneIndex)) &&
(_dstImage->getSampleCount() == _srcImage->getSampleCount()));
}
// The source and destination sizes must be equal and not be negative in any direction
template <size_t N>
bool MVKCmdBlitImage<N>::canCopy(const VkImageBlit& region) {
VkOffset3D srcSize = mvkVkOffset3DDifference(region.srcOffsets[1], region.srcOffsets[0]);
VkOffset3D dstSize = mvkVkOffset3DDifference(region.dstOffsets[1], region.dstOffsets[0]);
return (mvkVkOffset3DsAreEqual(srcSize, dstSize) &&
(srcSize.x >= 0) && (srcSize.y >= 0) && (srcSize.z >= 0));
}
template <size_t N>
void MVKCmdBlitImage<N>::populateVertices(MVKVertexPosTex* vertices, const VkImageBlit& region) {
const VkOffset3D& so0 = region.srcOffsets[0];
const VkOffset3D& so1 = region.srcOffsets[1];
const VkOffset3D& do0 = region.dstOffsets[0];
const VkOffset3D& do1 = region.dstOffsets[1];
// Get the extents of the source and destination textures.
uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(region.srcSubresource.aspectMask);
uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(region.dstSubresource.aspectMask);
VkExtent3D srcExtent = _srcImage->getExtent3D(srcPlaneIndex, region.srcSubresource.mipLevel);
VkExtent3D dstExtent = _dstImage->getExtent3D(dstPlaneIndex, region.dstSubresource.mipLevel);
// Determine the bottom-left and top-right corners of the source and destination
// texture regions, each as a fraction of the corresponding texture size.
CGPoint srcBL = CGPointMake((CGFloat)(so0.x) / (CGFloat)srcExtent.width,
(CGFloat)(srcExtent.height - so1.y) / (CGFloat)srcExtent.height);
CGPoint srcTR = CGPointMake((CGFloat)(so1.x) / (CGFloat)srcExtent.width,
(CGFloat)(srcExtent.height - so0.y) / (CGFloat)srcExtent.height);
CGPoint dstBL = CGPointMake((CGFloat)(do0.x) / (CGFloat)dstExtent.width,
(CGFloat)(dstExtent.height - do1.y) / (CGFloat)dstExtent.height);
CGPoint dstTR = CGPointMake((CGFloat)(do1.x) / (CGFloat)dstExtent.width,
(CGFloat)(dstExtent.height - do0.y) / (CGFloat)dstExtent.height);
// The destination region is used for vertex positions,
// which are bounded by (-1.0 < p < 1.0) in clip-space.
// Map texture coordinates (0.0 < p < 1.0) to vertex coordinates (-1.0 < p < 1.0).
dstBL = CGPointMake((dstBL.x * 2.0) - 1.0, (dstBL.y * 2.0) - 1.0);
dstTR = CGPointMake((dstTR.x * 2.0) - 1.0, (dstTR.y * 2.0) - 1.0);
MVKVertexPosTex* pVtx;
// Bottom left vertex
pVtx = &vertices[0];
pVtx->position.x = dstBL.x;
pVtx->position.y = dstBL.y;
pVtx->texCoord.x = srcBL.x;
pVtx->texCoord.y = (1.0 - srcBL.y);
// Bottom right vertex
pVtx = &vertices[1];
pVtx->position.x = dstTR.x;
pVtx->position.y = dstBL.y;
pVtx->texCoord.x = srcTR.x;
pVtx->texCoord.y = (1.0 - srcBL.y);
// Top left vertex
pVtx = &vertices[2];
pVtx->position.x = dstBL.x;
pVtx->position.y = dstTR.y;
pVtx->texCoord.x = srcBL.x;
pVtx->texCoord.y = (1.0 - srcTR.y);
// Top right vertex
pVtx = &vertices[3];
pVtx->position.x = dstTR.x;
pVtx->position.y = dstTR.y;
pVtx->texCoord.x = srcTR.x;
pVtx->texCoord.y = (1.0 - srcTR.y);
}
template <size_t N>
void MVKCmdBlitImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse commandUse) {
size_t vkIBCnt = _vkImageBlits.size();
VkImageCopy vkImageCopies[vkIBCnt];
MVKImageBlitRender mvkBlitRenders[vkIBCnt];
uint32_t copyCnt = 0;
uint32_t blitCnt = 0;
// Separate BLITs into those that are really just simple texure region copies,
// and those that require rendering
for (auto& vkIB : _vkImageBlits) {
if (canCopyFormats(vkIB) && canCopy(vkIB)) {
const VkOffset3D& so0 = vkIB.srcOffsets[0];
const VkOffset3D& so1 = vkIB.srcOffsets[1];
auto& vkIC = vkImageCopies[copyCnt++];
vkIC.srcSubresource = vkIB.srcSubresource;
vkIC.srcOffset = vkIB.srcOffsets[0];
vkIC.dstSubresource = vkIB.dstSubresource;
vkIC.dstOffset = vkIB.dstOffsets[0];
vkIC.extent.width = so1.x - so0.x;
vkIC.extent.height = so1.y - so0.y;
vkIC.extent.depth = so1.z - so0.z;
} else {
auto& mvkIBR = mvkBlitRenders[blitCnt++];
mvkIBR.region = vkIB;
populateVertices(mvkIBR.vertices, vkIB);
}
}
// Perform those BLITs that can be covered by simple texture copying.
if (copyCnt) {
MVKCmdCopyImage<N> copyCmd;
copyCmd.setContent(cmdEncoder->_cmdBuffer,
(VkImage)_srcImage, _srcLayout,
(VkImage)_dstImage, _dstLayout,
copyCnt, vkImageCopies);
copyCmd.encode(cmdEncoder, kMVKCommandUseBlitImage);
}
// Perform those BLITs that require rendering to destination texture.
for (uint32_t blitIdx = 0; blitIdx < blitCnt; blitIdx++) {
auto& mvkIBR = mvkBlitRenders[blitIdx];
uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(mvkIBR.region.srcSubresource.aspectMask);
uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(mvkIBR.region.dstSubresource.aspectMask);
id<MTLTexture> srcMTLTex = _srcImage->getMTLTexture(srcPlaneIndex);
id<MTLTexture> dstMTLTex = _dstImage->getMTLTexture(dstPlaneIndex);
if (blitCnt && srcMTLTex && dstMTLTex) {
cmdEncoder->endCurrentMetalEncoding();
MTLRenderPassDescriptor* mtlRPD = [MTLRenderPassDescriptor renderPassDescriptor];
MTLRenderPassColorAttachmentDescriptor* mtlColorAttDesc = mtlRPD.colorAttachments[0];
MTLRenderPassDepthAttachmentDescriptor* mtlDepthAttDesc = mtlRPD.depthAttachment;
MTLRenderPassStencilAttachmentDescriptor* mtlStencilAttDesc = mtlRPD.stencilAttachment;
if (mvkIsAnyFlagEnabled(mvkIBR.region.dstSubresource.aspectMask, (VK_IMAGE_ASPECT_DEPTH_BIT))) {
mtlDepthAttDesc.loadAction = MTLLoadActionLoad;
mtlDepthAttDesc.storeAction = MTLStoreActionStore;
mtlDepthAttDesc.texture = dstMTLTex;
} else {
mtlDepthAttDesc.loadAction = MTLLoadActionDontCare;
mtlDepthAttDesc.storeAction = MTLStoreActionDontCare;
mtlDepthAttDesc.texture = nil;
}
if (mvkIsAnyFlagEnabled(mvkIBR.region.dstSubresource.aspectMask, (VK_IMAGE_ASPECT_STENCIL_BIT))) {
mtlStencilAttDesc.loadAction = MTLLoadActionLoad;
mtlStencilAttDesc.storeAction = MTLStoreActionStore;
mtlStencilAttDesc.texture = dstMTLTex;
} else {
mtlStencilAttDesc.loadAction = MTLLoadActionDontCare;
mtlStencilAttDesc.storeAction = MTLStoreActionDontCare;
mtlStencilAttDesc.texture = nil;
}
if (!mvkIsAnyFlagEnabled(mvkIBR.region.dstSubresource.aspectMask, (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
mtlColorAttDesc.loadAction = MTLLoadActionLoad;
mtlColorAttDesc.storeAction = MTLStoreActionStore;
mtlColorAttDesc.texture = dstMTLTex;
}
MVKRPSKeyBlitImg blitKey;
blitKey.srcMTLPixelFormat = _srcImage->getMTLPixelFormat(srcPlaneIndex);
blitKey.srcMTLTextureType = _srcImage->getMTLTextureType();
if (blitKey.srcMTLTextureType == MTLTextureTypeCube || blitKey.srcMTLTextureType == MTLTextureTypeCubeArray) {
// In this case, I'll use a temp 2D array view. That way, I don't have to
// deal with mapping the blit coordinates to a cube direction vector.
blitKey.srcMTLTextureType = MTLTextureType2DArray;
srcMTLTex = [srcMTLTex newTextureViewWithPixelFormat: blitKey.getSrcMTLPixelFormat()
textureType: MTLTextureType2DArray
levels: NSMakeRange(0, srcMTLTex.mipmapLevelCount)
slices: NSMakeRange(0, srcMTLTex.arrayLength)];
[cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) {
[srcMTLTex release];
}];
}
blitKey.dstMTLPixelFormat = _dstImage->getMTLPixelFormat(dstPlaneIndex);
blitKey.srcFilter = mvkMTLSamplerMinMagFilterFromVkFilter(_filter);
blitKey.srcAspect = mvkIBR.region.srcSubresource.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
blitKey.dstSampleCount = mvkSampleCountFromVkSampleCountFlagBits(_dstImage->getSampleCount());
id<MTLRenderPipelineState> mtlRPS = cmdEncoder->getCommandEncodingPool()->getCmdBlitImageMTLRenderPipelineState(blitKey);
bool isBlittingDepth = mvkIsAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_DEPTH_BIT));
bool isBlittingStencil = mvkIsAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_STENCIL_BIT));
id<MTLDepthStencilState> mtlDSS = cmdEncoder->getCommandEncodingPool()->getMTLDepthStencilState(isBlittingDepth, isBlittingStencil);
uint32_t vtxBuffIdx = cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKVertexContentBufferIndex);
mtlColorAttDesc.level = mvkIBR.region.dstSubresource.mipLevel;
mtlDepthAttDesc.level = mvkIBR.region.dstSubresource.mipLevel;
mtlStencilAttDesc.level = mvkIBR.region.dstSubresource.mipLevel;
bool isLayeredBlit = blitKey.dstSampleCount > 1 ? cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering : cmdEncoder->getDevice()->_pMetalFeatures->layeredRendering;
uint32_t layCnt = mvkIBR.region.srcSubresource.layerCount;
if (_dstImage->getMTLTextureType() == MTLTextureType3D) {
layCnt = mvkAbsDiff(mvkIBR.region.dstOffsets[1].z, mvkIBR.region.dstOffsets[0].z);
}
if (isLayeredBlit) {
// In this case, I can blit all layers at once with a layered draw.
mtlRPD.renderTargetArrayLengthMVK = layCnt;
layCnt = 1; // Only need to run the loop once.
}
for (uint32_t layIdx = 0; layIdx < layCnt; layIdx++) {
// Update the render pass descriptor for the texture level and slice, and create a render encoder.
if (_dstImage->getMTLTextureType() == MTLTextureType3D) {
if (isLayeredBlit) {
// For layered blits, the layers are always in ascending order. I'll reverse the order
// of the 'r' coordinates if the destination is mirrored.
uint32_t depthPlane = std::min(mvkIBR.region.dstOffsets[0].z, mvkIBR.region.dstOffsets[1].z);
mtlColorAttDesc.depthPlane = depthPlane;
mtlDepthAttDesc.depthPlane = depthPlane;
mtlStencilAttDesc.depthPlane = depthPlane;
} else {
uint32_t depthPlane = mvkIBR.region.dstOffsets[0].z + (mvkIBR.region.dstOffsets[1].z > mvkIBR.region.dstOffsets[0].z ? layIdx : -(layIdx + 1));
mtlColorAttDesc.depthPlane = depthPlane;
mtlDepthAttDesc.depthPlane = depthPlane;
mtlStencilAttDesc.depthPlane = depthPlane;
}
} else {
mtlColorAttDesc.slice = mvkIBR.region.dstSubresource.baseArrayLayer + layIdx;
mtlDepthAttDesc.slice = mvkIBR.region.dstSubresource.baseArrayLayer + layIdx;
mtlStencilAttDesc.slice = mvkIBR.region.dstSubresource.baseArrayLayer + layIdx;
}
id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPD];
setLabelIfNotNil(mtlRendEnc, mvkMTLRenderCommandEncoderLabel(commandUse));
float zIncr;
if (blitKey.srcMTLTextureType == MTLTextureType3D) {
// In this case, I need to interpolate along the third dimension manually.
VkExtent3D srcExtent = _srcImage->getExtent3D(srcPlaneIndex, mvkIBR.region.dstSubresource.mipLevel);
VkOffset3D so0 = mvkIBR.region.srcOffsets[0], so1 = mvkIBR.region.srcOffsets[1];
VkOffset3D do0 = mvkIBR.region.dstOffsets[0], do1 = mvkIBR.region.dstOffsets[1];
float startZ = (float)so0.z / (float)srcExtent.depth;
float endZ = (float)so1.z / (float)srcExtent.depth;
if (isLayeredBlit && do0.z > do1.z) {
// Swap start and end points so interpolation moves in the right direction.
std::swap(startZ, endZ);
}
zIncr = (endZ - startZ) / mvkAbsDiff(do1.z, do0.z);
float z = startZ + (isLayeredBlit ? 0.0 : (layIdx + 0.5)) * zIncr;
for (uint32_t i = 0; i < kMVKBlitVertexCount; ++i) {
mvkIBR.vertices[i].texCoord.z = z;
}
}
[mtlRendEnc pushDebugGroup: @"vkCmdBlitImage"];
[mtlRendEnc setRenderPipelineState: mtlRPS];
[mtlRendEnc setDepthStencilState: mtlDSS];
cmdEncoder->setVertexBytes(mtlRendEnc, mvkIBR.vertices, sizeof(mvkIBR.vertices), vtxBuffIdx);
if (isLayeredBlit) {
cmdEncoder->setVertexBytes(mtlRendEnc, &zIncr, sizeof(zIncr), 0);
}
if (!mvkIsOnlyAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_STENCIL_BIT))) {
[mtlRendEnc setFragmentTexture: srcMTLTex atIndex: 0];
}
if (isBlittingStencil) {
// For stencil blits of packed depth/stencil images, I need to use a stencil view.
MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats();
if (pixFmts->isDepthFormat(blitKey.getSrcMTLPixelFormat()) &&
pixFmts->isStencilFormat(blitKey.getSrcMTLPixelFormat())) {
MTLPixelFormat stencilFmt = blitKey.getSrcMTLPixelFormat();
if (stencilFmt == MTLPixelFormatDepth32Float_Stencil8) {
stencilFmt = MTLPixelFormatX32_Stencil8;
#if MVK_MACOS
} else if (stencilFmt == MTLPixelFormatDepth24Unorm_Stencil8) {
stencilFmt = MTLPixelFormatX24_Stencil8;
#endif
}
id<MTLTexture> stencilMTLTex = [srcMTLTex newTextureViewWithPixelFormat: stencilFmt];
[cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) {
[stencilMTLTex release];
}];
[mtlRendEnc setFragmentTexture: stencilMTLTex atIndex: 1];
} else {
[mtlRendEnc setFragmentTexture: srcMTLTex atIndex: 1];
}
}
struct {
uint slice;
float lod;
} texSubRez;
texSubRez.slice = mvkIBR.region.srcSubresource.baseArrayLayer + layIdx;
texSubRez.lod = mvkIBR.region.srcSubresource.mipLevel;
cmdEncoder->setFragmentBytes(mtlRendEnc, &texSubRez, sizeof(texSubRez), 0);
NSUInteger instanceCount = isLayeredBlit ? mtlRPD.renderTargetArrayLengthMVK : 1;
[mtlRendEnc drawPrimitives: MTLPrimitiveTypeTriangleStrip vertexStart: 0 vertexCount: kMVKBlitVertexCount instanceCount: instanceCount];
[mtlRendEnc popDebugGroup];
[mtlRendEnc endEncoding];
}
}
}
}
template class MVKCmdBlitImage<1>;
template class MVKCmdBlitImage<4>;
#pragma mark -
#pragma mark MVKCmdResolveImage
template <size_t N>
VkResult MVKCmdResolveImage<N>::setContent(MVKCommandBuffer* cmdBuff,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage dstImage,
VkImageLayout dstImageLayout,
uint32_t regionCount,
const VkImageResolve* pRegions) {
_srcImage = (MVKImage*)srcImage;
_srcLayout = srcImageLayout;
_dstImage = (MVKImage*)dstImage;
_dstLayout = dstImageLayout;
_vkImageResolves.clear(); // Clear for reuse
_vkImageResolves.reserve(regionCount);
for (uint32_t regionIdx = 0; regionIdx < regionCount; regionIdx++) {
auto& vkIR = pRegions[regionIdx];
uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIR.dstSubresource.aspectMask);
// Validate
MVKPixelFormats* pixFmts = cmdBuff->getPixelFormats();
if ( !mvkAreAllFlagsEnabled(pixFmts->getCapabilities(_dstImage->getMTLPixelFormat(dstPlaneIndex)), kMVKMTLFmtCapsResolve) ) {
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdResolveImage(): %s cannot be used as a resolve destination on this device.", pixFmts->getName(_dstImage->getVkFormat()));
}
_vkImageResolves.push_back(vkIR);
}
return VK_SUCCESS;
}
template <size_t N>
void MVKCmdResolveImage<N>::encode(MVKCommandEncoder* cmdEncoder) {
size_t vkIRCnt = _vkImageResolves.size();
VkImageBlit expansionRegions[vkIRCnt];
VkImageCopy copyRegions[vkIRCnt];
// If we can do layered rendering to a multisample texture, I can resolve all the layers at once.
uint32_t layerCnt = 0;
if (cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering) {
layerCnt = (uint32_t)_vkImageResolves.size();
} else {
for (VkImageResolve& vkIR : _vkImageResolves) { layerCnt += vkIR.dstSubresource.layerCount; }
}
MVKMetalResolveSlice mtlResolveSlices[layerCnt];
uint32_t expCnt = 0;
uint32_t copyCnt = 0;
uint32_t sliceCnt = 0;
for (VkImageResolve& vkIR : _vkImageResolves) {
uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIR.srcSubresource.aspectMask);
uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIR.dstSubresource.aspectMask);
VkExtent3D srcImgExt = _srcImage->getExtent3D(srcPlaneIndex, vkIR.srcSubresource.mipLevel);
VkExtent3D dstImgExt = _dstImage->getExtent3D(dstPlaneIndex, vkIR.dstSubresource.mipLevel);
// If the region does not cover the entire content of the destination level, expand
// the destination content in the region to the temporary image. The purpose of this
// expansion is to render the existing content of the destination image to the
// temporary transfer multisample image, so that regions of that temporary transfer
// image can then be overwritten with content from the source image, prior to
// resolving it back to the destination image.
if ( !mvkVkExtent3DsAreEqual(dstImgExt, vkIR.extent) ) {
VkImageBlit& expRgn = expansionRegions[expCnt++];
expRgn.srcSubresource = vkIR.dstSubresource;
expRgn.srcOffsets[0] = { 0, 0, 0 };
expRgn.srcOffsets[1] = { int32_t(dstImgExt.width), int32_t(dstImgExt.height), int32_t(dstImgExt.depth) };
expRgn.dstSubresource = vkIR.dstSubresource;
expRgn.dstOffsets[0] = { 0, 0, 0 };
expRgn.dstOffsets[1] = { int32_t(dstImgExt.width), int32_t(dstImgExt.height), int32_t(dstImgExt.depth) };
}
// Copy the region from the source image to the temporary multisample image,
// prior to the temporary image being resolved back to the destination image.
// The source of this copy stage is the source image, and the destination of
// this copy stage is the temporary transfer image.
bool needXfrImage = !mvkVkExtent3DsAreEqual(srcImgExt, vkIR.extent) || !mvkVkExtent3DsAreEqual(dstImgExt, vkIR.extent);
if ( needXfrImage ) {
VkImageCopy& cpyRgn = copyRegions[copyCnt++];
cpyRgn.srcSubresource = vkIR.srcSubresource;
cpyRgn.srcOffset = vkIR.srcOffset;
cpyRgn.dstSubresource = vkIR.dstSubresource;
cpyRgn.dstOffset = vkIR.dstOffset;
cpyRgn.extent = vkIR.extent;
}
// Adds a resolve slice struct for each destination layer in the resolve region.
// Note that the source subresource for this is that of the SOURCE image if we're doing a
// direct resolve, but that of the DESTINATION if we need a temporary transfer image.
mtlResolveSlices[sliceCnt].dstSubresource = vkIR.dstSubresource;
mtlResolveSlices[sliceCnt].srcSubresource = needXfrImage ? vkIR.dstSubresource : vkIR.srcSubresource;
if (cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering) {
sliceCnt++;
} else {
uint32_t layCnt = vkIR.dstSubresource.layerCount;
mtlResolveSlices[sliceCnt].dstSubresource.layerCount = 1;
mtlResolveSlices[sliceCnt].srcSubresource.layerCount = 1;
sliceCnt++;
for (uint32_t layIdx = 1; layIdx < layCnt; layIdx++) {
MVKMetalResolveSlice& rslvSlice = mtlResolveSlices[sliceCnt];
rslvSlice = mtlResolveSlices[sliceCnt - 1];
rslvSlice.dstSubresource.baseArrayLayer++;
rslvSlice.srcSubresource.baseArrayLayer++;
sliceCnt++;
}
}
}
// Expansion and copying is not required. Each mip level of the source image
// is being resolved entirely. Resolve directly from the source image.
MVKImage* xfrImage = _srcImage;
if (copyCnt) {
// Expansion and/or copying is required. Acquire a temporary transfer image, expand
// the destination image into it if necessary, copy from the source image to the
// temporary image, and then resolve from the temporary image to the destination image.
MVKImageDescriptorData xferImageData;
_dstImage->getTransferDescriptorData(xferImageData);
xferImageData.samples = _srcImage->getSampleCount();
xfrImage = cmdEncoder->getCommandEncodingPool()->getTransferMVKImage(xferImageData);
if (expCnt) {
// Expand the current content of the destination image to the temporary transfer image.
MVKCmdBlitImage<N> expCmd;
expCmd.setContent(cmdEncoder->_cmdBuffer,
(VkImage)_dstImage, _dstLayout, (VkImage)xfrImage, _dstLayout,
expCnt, expansionRegions, VK_FILTER_LINEAR);
expCmd.encode(cmdEncoder, kMVKCommandUseResolveExpandImage);
}
// Copy the resolve regions of the source image to the temporary transfer image.
MVKCmdCopyImage<N> copyCmd;
copyCmd.setContent(cmdEncoder->_cmdBuffer,
(VkImage)_srcImage, _srcLayout,
(VkImage)xfrImage, _dstLayout,
copyCnt, copyRegions);
copyCmd.encode(cmdEncoder, kMVKCommandUseResolveCopyImage);
}
cmdEncoder->endCurrentMetalEncoding();
MTLRenderPassDescriptor* mtlRPD = [MTLRenderPassDescriptor renderPassDescriptor];
MTLRenderPassColorAttachmentDescriptor* mtlColorAttDesc = mtlRPD.colorAttachments[0];
mtlColorAttDesc.loadAction = MTLLoadActionLoad;
mtlColorAttDesc.storeAction = MTLStoreActionMultisampleResolve;
// For each resolve slice, update the render pass descriptor for
// the texture level and slice and create a render encoder.
for (uint32_t sIdx = 0; sIdx < sliceCnt; sIdx++) {
MVKMetalResolveSlice& rslvSlice = mtlResolveSlices[sIdx];
uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(rslvSlice.srcSubresource.aspectMask);
uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(rslvSlice.dstSubresource.aspectMask);
mtlColorAttDesc.texture = xfrImage->getMTLTexture(srcPlaneIndex);
mtlColorAttDesc.resolveTexture = _dstImage->getMTLTexture(dstPlaneIndex);
mtlColorAttDesc.level = rslvSlice.srcSubresource.mipLevel;
mtlColorAttDesc.slice = rslvSlice.srcSubresource.baseArrayLayer;
mtlColorAttDesc.resolveLevel = rslvSlice.dstSubresource.mipLevel;
mtlColorAttDesc.resolveSlice = rslvSlice.dstSubresource.baseArrayLayer;
if (rslvSlice.dstSubresource.layerCount > 1) {
mtlRPD.renderTargetArrayLengthMVK = rslvSlice.dstSubresource.layerCount;
}
id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPD];
setLabelIfNotNil(mtlRendEnc, mvkMTLRenderCommandEncoderLabel(kMVKCommandUseResolveImage));
[mtlRendEnc pushDebugGroup: @"vkCmdResolveImage"];
[mtlRendEnc popDebugGroup];
[mtlRendEnc endEncoding];
}
}
template class MVKCmdResolveImage<1>;
template class MVKCmdResolveImage<4>;
#pragma mark -
#pragma mark MVKCmdCopyBuffer
// Matches shader struct.
typedef struct {
uint32_t srcOffset;
uint32_t dstOffset;
uint32_t size;
} MVKCmdCopyBufferInfo;
template <size_t N>
VkResult MVKCmdCopyBuffer<N>::setContent(MVKCommandBuffer* cmdBuff,
VkBuffer srcBuffer,
VkBuffer destBuffer,
uint32_t regionCount,
const VkBufferCopy* pRegions) {
_srcBuffer = (MVKBuffer*)srcBuffer;
_dstBuffer = (MVKBuffer*)destBuffer;
// Add buffer regions
_bufferCopyRegions.clear(); // Clear for reuse
_bufferCopyRegions.reserve(regionCount);
for (uint32_t i = 0; i < regionCount; i++) {
_bufferCopyRegions.push_back(pRegions[i]);
}
return VK_SUCCESS;
}
template <size_t N>
void MVKCmdCopyBuffer<N>::encode(MVKCommandEncoder* cmdEncoder) {
id<MTLBuffer> srcMTLBuff = _srcBuffer->getMTLBuffer();
NSUInteger srcMTLBuffOffset = _srcBuffer->getMTLBufferOffset();
id<MTLBuffer> dstMTLBuff = _dstBuffer->getMTLBuffer();
NSUInteger dstMTLBuffOffset = _dstBuffer->getMTLBufferOffset();
VkDeviceSize buffAlign = cmdEncoder->getDevice()->_pMetalFeatures->mtlCopyBufferAlignment;
for (auto& cpyRgn : _bufferCopyRegions) {
const bool useComputeCopy = buffAlign > 1 && (cpyRgn.srcOffset % buffAlign != 0 ||
cpyRgn.dstOffset % buffAlign != 0 ||
cpyRgn.size % buffAlign != 0);
if (useComputeCopy) {
MVKAssert(mvkFits<uint32_t>(cpyRgn.srcOffset) && mvkFits<uint32_t>(cpyRgn.dstOffset) && mvkFits<uint32_t>(cpyRgn.size),
"Byte-aligned buffer copy region offsets and size must each fit into a 32-bit unsigned integer.");
MVKCmdCopyBufferInfo copyInfo;
copyInfo.srcOffset = (uint32_t)cpyRgn.srcOffset;
copyInfo.dstOffset = (uint32_t)cpyRgn.dstOffset;
copyInfo.size = (uint32_t)cpyRgn.size;
id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyBuffer);
[mtlComputeEnc pushDebugGroup: @"vkCmdCopyBuffer"];
[mtlComputeEnc setComputePipelineState: cmdEncoder->getCommandEncodingPool()->getCmdCopyBufferBytesMTLComputePipelineState()];
[mtlComputeEnc setBuffer:srcMTLBuff offset: srcMTLBuffOffset atIndex: 0];
[mtlComputeEnc setBuffer:dstMTLBuff offset: dstMTLBuffOffset atIndex: 1];
[mtlComputeEnc setBytes: &copyInfo length: sizeof(copyInfo) atIndex: 2];
[mtlComputeEnc dispatchThreadgroups: MTLSizeMake(1, 1, 1) threadsPerThreadgroup: MTLSizeMake(1, 1, 1)];
[mtlComputeEnc popDebugGroup];
} else {
id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseCopyBuffer);
[mtlBlitEnc copyFromBuffer: srcMTLBuff
sourceOffset: (srcMTLBuffOffset + cpyRgn.srcOffset)
toBuffer: dstMTLBuff
destinationOffset: (dstMTLBuffOffset + cpyRgn.dstOffset)
size: cpyRgn.size];
}
}
}
template class MVKCmdCopyBuffer<1>;
template class MVKCmdCopyBuffer<4>;
#pragma mark -
#pragma mark MVKCmdBufferImageCopy
// Matches shader struct.
typedef struct {
uint32_t srcRowStride;
uint32_t srcRowStrideHigh;
uint32_t srcDepthStride;
uint32_t srcDepthStrideHigh;
uint32_t destRowStride;
uint32_t destRowStrideHigh;
uint32_t destDepthStride;
uint32_t destDepthStrideHigh;
VkFormat format;
VkOffset3D offset;
VkExtent3D extent;
} MVKCmdCopyBufferToImageInfo;
template <size_t N>
VkResult MVKCmdBufferImageCopy<N>::setContent(MVKCommandBuffer* cmdBuff,
VkBuffer buffer,
VkImage image,
VkImageLayout imageLayout,
uint32_t regionCount,
const VkBufferImageCopy* pRegions,
bool toImage) {
_buffer = (MVKBuffer*)buffer;
_image = (MVKImage*)image;
_toImage = toImage;
// Add buffer regions
_bufferImageCopyRegions.clear(); // Clear for reuse
_bufferImageCopyRegions.reserve(regionCount);
for (uint32_t i = 0; i < regionCount; i++) {
_bufferImageCopyRegions.push_back(pRegions[i]);
// Validate
if ( !_image->hasExpectedTexelSize() ) {
MTLPixelFormat mtlPixFmt = _image->getMTLPixelFormat(MVKImage::getPlaneFromVkImageAspectFlags(pRegions[i].imageSubresource.aspectMask));
const char* cmdName = _toImage ? "vkCmdCopyBufferToImage" : "vkCmdCopyImageToBuffer";
return cmdBuff->reportError(VK_ERROR_FORMAT_NOT_SUPPORTED, "%s(): The image is using Metal format %s as a substitute for Vulkan format %s. Since the pixel size is different, content for the image cannot be copied to or from a buffer.", cmdName, cmdBuff->getPixelFormats()->getName(mtlPixFmt), cmdBuff->getPixelFormats()->getName(_image->getVkFormat()));
}
}
return VK_SUCCESS;
}
template <size_t N>
void MVKCmdBufferImageCopy<N>::encode(MVKCommandEncoder* cmdEncoder) {
id<MTLBuffer> mtlBuffer = _buffer->getMTLBuffer();
if ( !mtlBuffer ) { return; }
NSUInteger mtlBuffOffsetBase = _buffer->getMTLBufferOffset();
MVKCommandUse cmdUse = _toImage ? kMVKCommandUseCopyBufferToImage : kMVKCommandUseCopyImageToBuffer;
MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats();
for (auto& cpyRgn : _bufferImageCopyRegions) {
uint8_t planeIndex = MVKImage::getPlaneFromVkImageAspectFlags(cpyRgn.imageSubresource.aspectMask);
MTLPixelFormat mtlPixFmt = _image->getMTLPixelFormat(planeIndex);
id<MTLTexture> mtlTexture = _image->getMTLTexture(planeIndex);
if ( !mtlTexture ) { continue; }
uint32_t mipLevel = cpyRgn.imageSubresource.mipLevel;
MTLOrigin mtlTxtOrigin = mvkMTLOriginFromVkOffset3D(cpyRgn.imageOffset);
MTLSize mtlTxtSize = mvkClampMTLSize(mvkMTLSizeFromVkExtent3D(cpyRgn.imageExtent),
mtlTxtOrigin,
mvkMTLSizeFromVkExtent3D(_image->getExtent3D(planeIndex, mipLevel)));
NSUInteger mtlBuffOffset = mtlBuffOffsetBase + cpyRgn.bufferOffset;
uint32_t buffImgWd = cpyRgn.bufferRowLength;
if (buffImgWd == 0) { buffImgWd = cpyRgn.imageExtent.width; }
uint32_t buffImgHt = cpyRgn.bufferImageHeight;
if (buffImgHt == 0) { buffImgHt = cpyRgn.imageExtent.height; }
NSUInteger bytesPerRow = pixFmts->getBytesPerRow(mtlPixFmt, buffImgWd);
NSUInteger bytesPerImg = pixFmts->getBytesPerLayer(mtlPixFmt, bytesPerRow, buffImgHt);
// If the format combines BOTH depth and stencil, determine whether one or both
// components are to be copied, and adjust the byte counts and copy options accordingly.
MTLBlitOption blitOptions = MTLBlitOptionNone;
if (pixFmts->isDepthFormat(mtlPixFmt) && pixFmts->isStencilFormat(mtlPixFmt)) {
VkImageAspectFlags imgFlags = cpyRgn.imageSubresource.aspectMask;
bool wantDepth = mvkAreAllFlagsEnabled(imgFlags, VK_IMAGE_ASPECT_DEPTH_BIT);
bool wantStencil = mvkAreAllFlagsEnabled(imgFlags, VK_IMAGE_ASPECT_STENCIL_BIT);
// The stencil component is always 1 byte per pixel.
// Don't reduce depths of 32-bit depth/stencil formats.
if (wantDepth && !wantStencil) {
if (pixFmts->getBytesPerTexel(mtlPixFmt) != 4) {
bytesPerRow -= buffImgWd;
bytesPerImg -= buffImgWd * buffImgHt;
}
blitOptions |= MTLBlitOptionDepthFromDepthStencil;
} else if (wantStencil && !wantDepth) {
bytesPerRow = buffImgWd;
bytesPerImg = buffImgWd * buffImgHt;
blitOptions |= MTLBlitOptionStencilFromDepthStencil;
}
}
#if MVK_APPLE_SILICON
if (pixFmts->isPVRTCFormat(mtlPixFmt)) {
blitOptions |= MTLBlitOptionRowLinearPVRTC;
}
#endif
#if MVK_MACOS
// If we're copying to a compressed 3D image, the image data need to be decompressed.
// If we're copying to mip level 0, we can skip the copy and just decode
// directly into the image. Otherwise, we need to use an intermediate buffer.
if (_toImage && _image->getIsCompressed() && mtlTexture.textureType == MTLTextureType3D &&
!cmdEncoder->getDevice()->_pMetalFeatures->native3DCompressedTextures) {
MVKCmdCopyBufferToImageInfo info;
info.srcRowStride = bytesPerRow & 0xffffffff;
info.srcRowStrideHigh = bytesPerRow >> 32;
info.srcDepthStride = bytesPerImg & 0xffffffff;
info.srcDepthStrideHigh = bytesPerImg >> 32;
info.destRowStride = info.destRowStrideHigh = 0;
info.destDepthStride = info.destDepthStrideHigh = 0;
info.format = _image->getVkFormat();
info.offset = cpyRgn.imageOffset;
info.extent = cpyRgn.imageExtent;
bool needsTempBuff = mipLevel != 0;
id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(cmdUse);
id<MTLComputePipelineState> mtlComputeState = cmdEncoder->getCommandEncodingPool()->getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(needsTempBuff);
[mtlComputeEnc pushDebugGroup: @"vkCmdCopyBufferToImage"];
[mtlComputeEnc setComputePipelineState: mtlComputeState];
[mtlComputeEnc setBuffer: mtlBuffer offset: mtlBuffOffset atIndex: 0];
MVKBuffer* tempBuff;
if (needsTempBuff) {
NSUInteger bytesPerDestRow = pixFmts->getBytesPerRow(mtlTexture.pixelFormat, info.extent.width);
NSUInteger bytesPerDestImg = pixFmts->getBytesPerLayer(mtlTexture.pixelFormat, bytesPerDestRow, info.extent.height);
// We're going to copy from the temporary buffer now, so use the
// temp buffer parameters in the copy below.
bytesPerRow = bytesPerDestRow;
bytesPerImg = bytesPerDestImg;
MVKBufferDescriptorData tempBuffData;
tempBuffData.size = bytesPerDestImg * mtlTxtSize.depth;
tempBuffData.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
tempBuff = cmdEncoder->getCommandEncodingPool()->getTransferMVKBuffer(tempBuffData);
mtlBuffer = tempBuff->getMTLBuffer();
mtlBuffOffset = tempBuff->getMTLBufferOffset();
info.destRowStride = bytesPerDestRow & 0xffffffff;
info.destRowStrideHigh = bytesPerDestRow >> 32;
info.destDepthStride = bytesPerDestImg & 0xffffffff;
info.destDepthStrideHigh = bytesPerDestImg >> 32;
[mtlComputeEnc setBuffer: mtlBuffer offset: mtlBuffOffset atIndex: 1];
} else {
[mtlComputeEnc setTexture: mtlTexture atIndex: 0];
}
cmdEncoder->setComputeBytes(mtlComputeEnc, &info, sizeof(info), 2);
// Now work out how big to make the grid, and from there, the size and number of threadgroups.
// One thread is run per block. Each block decompresses to an m x n array of texels.
// So the size of the grid is (ceil(width/m), ceil(height/n), depth).
VkExtent2D blockExtent = pixFmts->getBlockTexelSize(mtlPixFmt);
MTLSize mtlGridSize = MTLSizeMake(mvkCeilingDivide<NSUInteger>(mtlTxtSize.width, blockExtent.width),
mvkCeilingDivide<NSUInteger>(mtlTxtSize.height, blockExtent.height),
mtlTxtSize.depth);
// Use four times the thread execution width as the threadgroup size.
MTLSize mtlTgrpSize = MTLSizeMake(2, 2, mtlComputeState.threadExecutionWidth);
// Then the number of threadgroups is (ceil(x/2), ceil(y/2), ceil(z/t)),
// where 't' is the thread execution width.
mtlGridSize.width = mvkCeilingDivide(mtlGridSize.width, mtlTgrpSize.width);
mtlGridSize.height = mvkCeilingDivide(mtlGridSize.height, mtlTgrpSize.height);
mtlGridSize.depth = mvkCeilingDivide(mtlGridSize.depth, mtlTgrpSize.depth);
// There may be extra threads, but that's OK; the shader does bounds checking to
// ensure it doesn't try to write out of bounds.
// Alternatively, we could use the newer -[MTLComputeCommandEncoder dispatchThreads:threadsPerThreadgroup:] method,
// but that needs Metal 2.0.
[mtlComputeEnc dispatchThreadgroups: mtlGridSize threadsPerThreadgroup: mtlTgrpSize];
[mtlComputeEnc popDebugGroup];
if (!needsTempBuff) { continue; }
}
#endif
// Don't supply bytes per image if not an arrayed texture
if ( !isArrayTexture() ) { bytesPerImg = 0; }
id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(cmdUse);
for (uint32_t lyrIdx = 0; lyrIdx < cpyRgn.imageSubresource.layerCount; lyrIdx++) {
if (_toImage) {
[mtlBlitEnc copyFromBuffer: mtlBuffer
sourceOffset: (mtlBuffOffset + (bytesPerImg * lyrIdx))
sourceBytesPerRow: bytesPerRow
sourceBytesPerImage: bytesPerImg
sourceSize: mtlTxtSize
toTexture: mtlTexture
destinationSlice: (cpyRgn.imageSubresource.baseArrayLayer + lyrIdx)
destinationLevel: mipLevel
destinationOrigin: mtlTxtOrigin
options: blitOptions];
} else {
[mtlBlitEnc copyFromTexture: mtlTexture
sourceSlice: (cpyRgn.imageSubresource.baseArrayLayer + lyrIdx)
sourceLevel: mipLevel
sourceOrigin: mtlTxtOrigin
sourceSize: mtlTxtSize
toBuffer: mtlBuffer
destinationOffset: (mtlBuffOffset + (bytesPerImg * lyrIdx))
destinationBytesPerRow: bytesPerRow
destinationBytesPerImage: bytesPerImg
options: blitOptions];
}
}
}
}
template <size_t N>
bool MVKCmdBufferImageCopy<N>::isArrayTexture() {
MTLTextureType mtlTexType = _image->getMTLTextureType();
return (mtlTexType == MTLTextureType3D ||
mtlTexType == MTLTextureType2DArray ||
#if MVK_MACOS_OR_IOS
mtlTexType == MTLTextureType2DMultisampleArray ||
#endif
mtlTexType == MTLTextureType1DArray);
}
template class MVKCmdBufferImageCopy<1>;
template class MVKCmdBufferImageCopy<4>; // To support MVKCmdCopyImage
template class MVKCmdBufferImageCopy<8>;
template class MVKCmdBufferImageCopy<16>;
#pragma mark -
#pragma mark MVKCmdClearAttachments
template <size_t N>
VkResult MVKCmdClearAttachments<N>::setContent(MVKCommandBuffer* cmdBuff,
uint32_t attachmentCount,
const VkClearAttachment* pAttachments,
uint32_t rectCount,
const VkClearRect* pRects) {
_rpsKey.reset();
_mtlDepthVal = 0.0;
_mtlStencilValue = 0;
_isClearingDepth = false;
_isClearingStencil = false;
MVKPixelFormats* pixFmts = cmdBuff->getPixelFormats();
// For each attachment to be cleared, mark it so in the render pipeline state
// attachment key, and populate the clear color value into a uniform array.
// Also set the depth and stencil clear value to the last clear attachment that specifies them.
for (uint32_t i = 0; i < attachmentCount; i++) {
auto& clrAtt = pAttachments[i];
if (mvkIsAnyFlagEnabled(clrAtt.aspectMask, VK_IMAGE_ASPECT_COLOR_BIT)) {
uint32_t caIdx = clrAtt.colorAttachment; // Might be VK_ATTACHMENT_UNUSED
if (caIdx != VK_ATTACHMENT_UNUSED) {
_rpsKey.enableAttachment(caIdx);
setClearValue(caIdx, clrAtt.clearValue);
}
}
if (mvkIsAnyFlagEnabled(clrAtt.aspectMask, VK_IMAGE_ASPECT_DEPTH_BIT)) {
_isClearingDepth = true;
_rpsKey.enableAttachment(kMVKClearAttachmentDepthStencilIndex);
_mtlDepthVal = pixFmts->getMTLClearDepthValue(clrAtt.clearValue);
}
if (mvkIsAnyFlagEnabled(clrAtt.aspectMask, VK_IMAGE_ASPECT_STENCIL_BIT)) {
_isClearingStencil = true;
_rpsKey.enableAttachment(kMVKClearAttachmentDepthStencilIndex);
_mtlStencilValue = pixFmts->getMTLClearStencilValue(clrAtt.clearValue);
}
}
_clearRects.clear(); // Clear for reuse
_clearRects.reserve(rectCount);
for (uint32_t i = 0; i < rectCount; i++) {
_clearRects.push_back(pRects[i]);
}
return VK_SUCCESS;
}
// Returns the total number of vertices needed to clear all layers of all rectangles.
template <size_t N>
uint32_t MVKCmdClearAttachments<N>::getVertexCount(MVKCommandEncoder* cmdEncoder) {
uint32_t vtxCnt = 0;
if (cmdEncoder->getSubpass()->isMultiview()) {
// In this case, all the layer counts will be one. We want to use the number of views in the current multiview pass.
vtxCnt = (uint32_t)_clearRects.size() * cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) * 6;
} else {
for (auto& rect : _clearRects) {
vtxCnt += 6 * rect.layerCount;
}
}
return vtxCnt;
}
// Populates the vertices for all clear rectangles within an attachment of the specified size.
template <size_t N>
void MVKCmdClearAttachments<N>::populateVertices(MVKCommandEncoder* cmdEncoder, simd::float4* vertices,
float attWidth, float attHeight) {
uint32_t vtxIdx = 0;
for (auto& rect : _clearRects) {
vtxIdx = populateVertices(cmdEncoder, vertices, vtxIdx, rect, attWidth, attHeight);
}
}
// Populates the vertices, starting at the vertex, from the specified rectangle within
// an attachment of the specified size. Returns the next vertex that needs to be populated.
template <size_t N>
uint32_t MVKCmdClearAttachments<N>::populateVertices(MVKCommandEncoder* cmdEncoder,
simd::float4* vertices,
uint32_t startVertex,
VkClearRect& clearRect,
float attWidth,
float attHeight) {
// Determine the positions of the four edges of the
// clear rectangle as a fraction of the attachment size.
float leftPos = (float)(clearRect.rect.offset.x) / attWidth;
float rightPos = (float)(clearRect.rect.extent.width) / attWidth + leftPos;
float bottomPos = (float)(clearRect.rect.offset.y) / attHeight;
float topPos = (float)(clearRect.rect.extent.height) / attHeight + bottomPos;
// Now transform to clip-space coordinates,
// which are bounded by (-1.0 < p < 1.0) in clip-space.
leftPos = (leftPos * 2.0) - 1.0;
rightPos = (rightPos * 2.0) - 1.0;
bottomPos = (bottomPos * 2.0) - 1.0;
topPos = (topPos * 2.0) - 1.0;
simd::float4 vtx;
uint32_t vtxIdx = startVertex;
uint32_t startLayer, endLayer;
if (cmdEncoder->getSubpass()->isMultiview()) {
// In a multiview pass, the baseArrayLayer will be 0 and the layerCount will be 1.
// Use the view count instead. We already set the base slice properly in the
// MTLRenderPassDescriptor, so we don't need to offset the starting layer.
startLayer = 0;
endLayer = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex());
} else {
startLayer = clearRect.baseArrayLayer;
endLayer = startLayer + clearRect.layerCount;
}
for (uint32_t layer = startLayer; layer < endLayer; layer++) {
vtx.z = 0.0;
vtx.w = layer;
// Top left vertex - First triangle
vtx.y = topPos;
vtx.x = leftPos;
vertices[vtxIdx++] = vtx;
// Bottom left vertex
vtx.y = bottomPos;
vtx.x = leftPos;
vertices[vtxIdx++] = vtx;
// Bottom right vertex
vtx.y = bottomPos;
vtx.x = rightPos;
vertices[vtxIdx++] = vtx;
// Bottom right vertex - Second triangle
vertices[vtxIdx++] = vtx;
// Top right vertex
vtx.y = topPos;
vtx.x = rightPos;
vertices[vtxIdx++] = vtx;
// Top left vertex
vtx.y = topPos;
vtx.x = leftPos;
vertices[vtxIdx++] = vtx;
}
return vtxIdx;
}
template <size_t N>
void MVKCmdClearAttachments<N>::encode(MVKCommandEncoder* cmdEncoder) {
uint32_t vtxCnt = getVertexCount(cmdEncoder);
simd::float4 vertices[vtxCnt];
simd::float4 clearColors[kMVKClearAttachmentCount];
VkExtent2D fbExtent = cmdEncoder->getFramebufferExtent();
#if MVK_MACOS_OR_IOS
// I need to know if the 'renderTargetWidth' and 'renderTargetHeight' properties
// actually do something, but [MTLRenderPassDescriptor instancesRespondToSelector: @selector(renderTargetWidth)]
// returns NO even on systems that do support it. So we have to check an actual instance.
MTLRenderPassDescriptor* tempRPDesc = [MTLRenderPassDescriptor new]; // temp retain
if ([tempRPDesc respondsToSelector: @selector(renderTargetWidth)]) {
VkRect2D renderArea = cmdEncoder->clipToRenderArea({{0, 0}, fbExtent});
fbExtent = {renderArea.offset.x + renderArea.extent.width, renderArea.offset.y + renderArea.extent.height};
}
[tempRPDesc release]; // temp release
#endif
populateVertices(cmdEncoder, vertices, fbExtent.width, fbExtent.height);
MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats();
MVKRenderSubpass* subpass = cmdEncoder->getSubpass();
uint32_t vtxBuffIdx = cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKVertexContentBufferIndex);
// Populate the render pipeline state attachment key with info from the subpass and framebuffer.
_rpsKey.mtlSampleCount = mvkSampleCountFromVkSampleCountFlagBits(subpass->getSampleCount());
if (cmdEncoder->_canUseLayeredRendering &&
(cmdEncoder->getFramebufferLayerCount() > 1 || cmdEncoder->getSubpass()->isMultiview())) {
_rpsKey.enableLayeredRendering();
}
uint32_t caCnt = subpass->getColorAttachmentCount();
for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) {
if (!subpass->isColorAttachmentUsed(caIdx)) {
// If the subpass attachment isn't actually used, don't try to clear it.
_rpsKey.disableAttachment(caIdx);
continue;
}
VkFormat vkAttFmt = subpass->getColorAttachmentFormat(caIdx);
_rpsKey.attachmentMTLPixelFormats[caIdx] = pixFmts->getMTLPixelFormat(vkAttFmt);
MTLClearColor mtlCC = pixFmts->getMTLClearColor(getClearValue(caIdx), vkAttFmt);
clearColors[caIdx] = { (float)mtlCC.red, (float)mtlCC.green, (float)mtlCC.blue, (float)mtlCC.alpha};
}
// The depth value (including vertex position Z value) is held in the last index.
clearColors[kMVKClearAttachmentDepthStencilIndex] = { _mtlDepthVal, _mtlDepthVal, _mtlDepthVal, _mtlDepthVal };
VkFormat vkAttFmt = subpass->getDepthStencilFormat();
MTLPixelFormat mtlAttFmt = pixFmts->getMTLPixelFormat(vkAttFmt);
_rpsKey.attachmentMTLPixelFormats[kMVKClearAttachmentDepthStencilIndex] = mtlAttFmt;
bool isClearingDepth = _isClearingDepth && pixFmts->isDepthFormat(mtlAttFmt);
bool isClearingStencil = _isClearingStencil && pixFmts->isStencilFormat(mtlAttFmt);
if (!isClearingDepth && !isClearingStencil) {
// If the subpass attachment isn't actually used, don't try to clear it.
_rpsKey.disableAttachment(kMVKClearAttachmentDepthStencilIndex);
}
if (!_rpsKey.isAnyAttachmentEnabled()) {
// Nothing to do.
return;
}
// Render the clear colors to the attachments
MVKCommandEncodingPool* cmdEncPool = cmdEncoder->getCommandEncodingPool();
id<MTLRenderCommandEncoder> mtlRendEnc = cmdEncoder->_mtlRenderEncoder;
[mtlRendEnc pushDebugGroup: @"vkCmdClearAttachments"];
[mtlRendEnc setRenderPipelineState: cmdEncPool->getCmdClearMTLRenderPipelineState(_rpsKey)];
[mtlRendEnc setDepthStencilState: cmdEncPool->getMTLDepthStencilState(isClearingDepth, isClearingStencil)];
[mtlRendEnc setStencilReferenceValue: _mtlStencilValue];
[mtlRendEnc setCullMode: MTLCullModeNone];
[mtlRendEnc setTriangleFillMode: MTLTriangleFillModeFill];
[mtlRendEnc setDepthBias: 0 slopeScale: 0 clamp: 0];
[mtlRendEnc setViewport: {0, 0, (double) fbExtent.width, (double) fbExtent.height, 0.0, 1.0}];
[mtlRendEnc setScissorRect: {0, 0, fbExtent.width, fbExtent.height}];
cmdEncoder->setVertexBytes(mtlRendEnc, clearColors, sizeof(clearColors), 0);
cmdEncoder->setFragmentBytes(mtlRendEnc, clearColors, sizeof(clearColors), 0);
cmdEncoder->setVertexBytes(mtlRendEnc, vertices, vtxCnt * sizeof(vertices[0]), vtxBuffIdx);
[mtlRendEnc drawPrimitives: MTLPrimitiveTypeTriangle vertexStart: 0 vertexCount: vtxCnt];
[mtlRendEnc popDebugGroup];
// Apple GPUs do not support rendering/writing to an attachment and then reading from
// that attachment within a single Metal renderpass. So, if any of the attachments just
// cleared is an input attachment, we need to restart into separate Metal renderpasses.
if (cmdEncoder->getDevice()->_pMetalFeatures->tileBasedDeferredRendering) {
bool needsRenderpassRestart = false;
for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) {
if (_rpsKey.isAttachmentEnabled(caIdx) && subpass->isColorAttachmentAlsoInputAttachment(caIdx)) {
needsRenderpassRestart = true;
break;
}
}
if (needsRenderpassRestart) {
cmdEncoder->encodeStoreActions(true);
cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass);
}
}
// Return to the previous rendering state on the next render activity
cmdEncoder->_graphicsPipelineState.markDirty();
cmdEncoder->_depthStencilState.markDirty();
cmdEncoder->_stencilReferenceValueState.markDirty();
cmdEncoder->_depthBiasState.markDirty();
cmdEncoder->_viewportState.markDirty();
cmdEncoder->_scissorState.markDirty();
cmdEncoder->_graphicsResourcesState.beginMetalRenderPass();
}
template class MVKCmdClearAttachments<1>;
template class MVKCmdClearAttachments<4>;
template class MVKCmdClearSingleAttachment<1>;
template class MVKCmdClearSingleAttachment<4>;
template class MVKCmdClearMultiAttachments<1>;
template class MVKCmdClearMultiAttachments<4>;
#pragma mark -
#pragma mark MVKCmdClearImage
template <size_t N>
VkResult MVKCmdClearImage<N>::setContent(MVKCommandBuffer* cmdBuff,
VkImage image,
VkImageLayout imageLayout,
const VkClearValue& clearValue,
uint32_t rangeCount,
const VkImageSubresourceRange* pRanges) {
_image = (MVKImage*)image;
_clearValue = clearValue;
// Add subresource ranges
_subresourceRanges.clear(); // Clear for reuse
_subresourceRanges.reserve(rangeCount);
bool isDS = isDepthStencilClear();
for (uint32_t rangeIdx = 0; rangeIdx < rangeCount; rangeIdx++) {
auto& vkIR = pRanges[rangeIdx];
uint8_t planeIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIR.aspectMask);
// Validate
MVKMTLFmtCaps mtlFmtCaps = cmdBuff->getPixelFormats()->getCapabilities(_image->getMTLPixelFormat(planeIndex));
bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getDevice()->_pMetalFeatures->renderLinearTextures && _image->getIsLinear();
uint32_t reqCap = isDS ? kMVKMTLFmtCapsDSAtt : (isDestUnwritableLinear ? kMVKMTLFmtCapsWrite : kMVKMTLFmtCapsColorAtt);
if (!mvkAreAllFlagsEnabled(mtlFmtCaps, reqCap)) {
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdClear%sImage(): Format %s cannot be cleared on this device.", (isDS ? "DepthStencil" : "Color"), cmdBuff->getPixelFormats()->getName(_image->getVkFormat()));
}
_subresourceRanges.push_back(vkIR);
}
// Validate
if (_image->getImageType() == VK_IMAGE_TYPE_1D) {
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdClear%sImage(): Native 1D images cannot be cleared on this device. Consider enabling MVK_CONFIG_TEXTURE_1D_AS_2D.", (isDS ? "DepthStencil" : "Color"));
}
return VK_SUCCESS;
}
template <size_t N>
void MVKCmdClearImage<N>::encode(MVKCommandEncoder* cmdEncoder) {
bool isDS = isDepthStencilClear();
NSString* mtlRendEncName = (isDS
? mvkMTLRenderCommandEncoderLabel(kMVKCommandUseClearDepthStencilImage)
: mvkMTLRenderCommandEncoderLabel(kMVKCommandUseClearColorImage));
cmdEncoder->endCurrentMetalEncoding();
MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats();
for (auto& srRange : _subresourceRanges) {
uint8_t planeIndex = MVKImage::getPlaneFromVkImageAspectFlags(srRange.aspectMask);
id<MTLTexture> imgMTLTex = _image->getMTLTexture(planeIndex);
if ( !imgMTLTex ) { continue; }
#if MVK_MACOS
if ( _image->getIsLinear() && !cmdEncoder->getDevice()->_pMetalFeatures->renderLinearTextures ) {
// These images cannot be rendered. Instead, use a compute shader.
// Luckily for us, linear images only have one mip and one array layer under Metal.
assert( !isDS );
id<MTLComputePipelineState> mtlClearState = cmdEncoder->getCommandEncodingPool()->getCmdClearColorImageMTLComputePipelineState(pixFmts->getFormatType(_image->getVkFormat()));
id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseClearColorImage);
[mtlComputeEnc pushDebugGroup: @"vkCmdClearColorImage"];
[mtlComputeEnc setComputePipelineState: mtlClearState];
[mtlComputeEnc setTexture: imgMTLTex atIndex: 0];
cmdEncoder->setComputeBytes(mtlComputeEnc, &_clearValue, sizeof(_clearValue), 0);
MTLSize gridSize = mvkMTLSizeFromVkExtent3D(_image->getExtent3D());
MTLSize tgSize = MTLSizeMake(mtlClearState.threadExecutionWidth, 1, 1);
if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
[mtlComputeEnc dispatchThreads: gridSize threadsPerThreadgroup: tgSize];
} else {
MTLSize tgCount = MTLSizeMake(gridSize.width / tgSize.width, gridSize.height, gridSize.depth);
if (gridSize.width % tgSize.width) { tgCount.width += 1; }
[mtlComputeEnc dispatchThreadgroups: tgCount threadsPerThreadgroup: tgSize];
}
[mtlComputeEnc popDebugGroup];
continue;
}
#endif
MTLRenderPassDescriptor* mtlRPDesc = [MTLRenderPassDescriptor renderPassDescriptor];
MTLRenderPassColorAttachmentDescriptor* mtlRPCADesc = nil;
MTLRenderPassDepthAttachmentDescriptor* mtlRPDADesc = nil;
MTLRenderPassStencilAttachmentDescriptor* mtlRPSADesc = nil;
bool isClearingColor = !isDS && mvkIsAnyFlagEnabled(srRange.aspectMask, VK_IMAGE_ASPECT_COLOR_BIT);
bool isClearingDepth = isDS && mvkIsAnyFlagEnabled(srRange.aspectMask, VK_IMAGE_ASPECT_DEPTH_BIT);
bool isClearingStencil = isDS && mvkIsAnyFlagEnabled(srRange.aspectMask, VK_IMAGE_ASPECT_STENCIL_BIT);
if (isClearingColor) {
mtlRPCADesc = mtlRPDesc.colorAttachments[0];
mtlRPCADesc.texture = imgMTLTex;
mtlRPCADesc.loadAction = MTLLoadActionClear;
mtlRPCADesc.storeAction = MTLStoreActionStore;
mtlRPCADesc.clearColor = pixFmts->getMTLClearColor(_clearValue, _image->getVkFormat());
}
if (isClearingDepth) {
mtlRPDADesc = mtlRPDesc.depthAttachment;
mtlRPDADesc.texture = imgMTLTex;
mtlRPDADesc.loadAction = MTLLoadActionClear;
mtlRPDADesc.storeAction = MTLStoreActionStore;
mtlRPDADesc.clearDepth = pixFmts->getMTLClearDepthValue(_clearValue);
}
if (isClearingStencil) {
mtlRPSADesc = mtlRPDesc.stencilAttachment;
mtlRPSADesc.texture = imgMTLTex;
mtlRPSADesc.loadAction = MTLLoadActionClear;
mtlRPSADesc.storeAction = MTLStoreActionStore;
mtlRPSADesc.clearStencil = pixFmts->getMTLClearStencilValue(_clearValue);
}
// Extract the mipmap levels that are to be updated
uint32_t mipLvlStart = srRange.baseMipLevel;
uint32_t mipLvlCnt = srRange.levelCount;
uint32_t mipLvlEnd = (mipLvlCnt == VK_REMAINING_MIP_LEVELS
? _image->getMipLevelCount()
: (mipLvlStart + mipLvlCnt));
// Extract the cube or array layers (slices) that are to be updated
bool is3D = _image->getMTLTextureType() == MTLTextureType3D;
uint32_t layerStart = is3D ? 0 : srRange.baseArrayLayer;
uint32_t layerCnt = srRange.layerCount;
uint32_t layerEnd = (layerCnt == VK_REMAINING_ARRAY_LAYERS
? _image->getLayerCount()
: (layerStart + layerCnt));
// Iterate across mipmap levels and layers, and perform and empty render to clear each
for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) {
mtlRPCADesc.level = mipLvl;
mtlRPDADesc.level = mipLvl;
mtlRPSADesc.level = mipLvl;
// If a 3D image, we need to get the depth for each level.
if (is3D) {
layerCnt = _image->getExtent3D(planeIndex, mipLvl).depth;
layerEnd = layerStart + layerCnt;
}
// If we can do layered rendering, I can clear all the layers at once.
if (cmdEncoder->getDevice()->_pMetalFeatures->layeredRendering &&
(_image->getSampleCount() == VK_SAMPLE_COUNT_1_BIT || cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering)) {
if (is3D) {
mtlRPCADesc.depthPlane = layerStart;
mtlRPDADesc.depthPlane = layerStart;
mtlRPSADesc.depthPlane = layerStart;
} else {
mtlRPCADesc.slice = layerStart;
mtlRPDADesc.slice = layerStart;
mtlRPSADesc.slice = layerStart;
}
mtlRPDesc.renderTargetArrayLengthMVK = (layerCnt == VK_REMAINING_ARRAY_LAYERS
? (_image->getLayerCount() - layerStart)
: layerCnt);
id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc];
setLabelIfNotNil(mtlRendEnc, mtlRendEncName);
[mtlRendEnc endEncoding];
} else {
for (uint32_t layer = layerStart; layer < layerEnd; layer++) {
if (is3D) {
mtlRPCADesc.depthPlane = layer;
mtlRPDADesc.depthPlane = layer;
mtlRPSADesc.depthPlane = layer;
} else {
mtlRPCADesc.slice = layer;
mtlRPDADesc.slice = layer;
mtlRPSADesc.slice = layer;
}
id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc];
setLabelIfNotNil(mtlRendEnc, mtlRendEncName);
[mtlRendEnc endEncoding];
}
}
}
}
}
template class MVKCmdClearImage<1>;
template class MVKCmdClearImage<4>;
template class MVKCmdClearColorImage<1>;
template class MVKCmdClearColorImage<4>;
template class MVKCmdClearDepthStencilImage<1>;
template class MVKCmdClearDepthStencilImage<4>;
#pragma mark -
#pragma mark MVKCmdFillBuffer
VkResult MVKCmdFillBuffer::setContent(MVKCommandBuffer* cmdBuff,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize size,
uint32_t data) {
_dstBuffer = (MVKBuffer*)dstBuffer;
_dstOffset = dstOffset;
_dataValue = data;
// Round down in case of VK_WHOLE_SIZE on a buffer size which is not aligned to 4 bytes.
VkDeviceSize byteCnt = (size == VK_WHOLE_SIZE) ? (_dstBuffer->getByteCount() - _dstOffset) : size;
VkDeviceSize wdCnt = byteCnt >> 2;
if (mvkFits<uint32_t>(wdCnt)) {
_wordCount = (uint32_t)wdCnt;
} else {
_wordCount = std::numeric_limits<uint32_t>::max();
return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdFillBuffer(): Buffer fill size must fit into a 32-bit unsigned integer. Fill size %llu is too large.", wdCnt);
}
return VK_SUCCESS;
}
void MVKCmdFillBuffer::encode(MVKCommandEncoder* cmdEncoder) {
if (_wordCount == 0) { return; }
id<MTLBuffer> dstMTLBuff = _dstBuffer->getMTLBuffer();
NSUInteger dstMTLBuffOffset = _dstBuffer->getMTLBufferOffset() + _dstOffset;
// Determine the number of full threadgroups we can dispatch to cover the buffer content efficiently.
// Some GPU's report different values for max threadgroup width between the pipeline state and device,
// so conservatively use the minimum of these two reported values.
id<MTLComputePipelineState> cps = cmdEncoder->getCommandEncodingPool()->getCmdFillBufferMTLComputePipelineState();
NSUInteger tgWidth = std::min(cps.maxTotalThreadsPerThreadgroup, cmdEncoder->getMTLDevice().maxThreadsPerThreadgroup.width);
NSUInteger tgCount = _wordCount / tgWidth;
id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseFillBuffer);
[mtlComputeEnc pushDebugGroup: @"vkCmdFillBuffer"];
[mtlComputeEnc setComputePipelineState: cps];
[mtlComputeEnc setBytes: &_dataValue length: sizeof(_dataValue) atIndex: 1];
[mtlComputeEnc setBuffer: dstMTLBuff offset: dstMTLBuffOffset atIndex: 0];
// Run as many full threadgroups as will fit into the buffer content.
if (tgCount > 0) {
[mtlComputeEnc dispatchThreadgroups: MTLSizeMake(tgCount, 1, 1)
threadsPerThreadgroup: MTLSizeMake(tgWidth, 1, 1)];
}
// If there is left-over buffer content after running full threadgroups, or if the buffer content
// fits within a single threadgroup, run a single partial threadgroup of the appropriate size.
uint32_t remainderWordCount = _wordCount % tgWidth;
if (remainderWordCount > 0) {
if (tgCount > 0) { // If we've already written full threadgroups, skip ahead to unwritten content
dstMTLBuffOffset += tgCount * tgWidth * sizeof(_dataValue);
[mtlComputeEnc setBufferOffset: dstMTLBuffOffset atIndex: 0];
}
[mtlComputeEnc dispatchThreadgroups: MTLSizeMake(1, 1, 1)
threadsPerThreadgroup: MTLSizeMake(remainderWordCount, 1, 1)];
}
[mtlComputeEnc popDebugGroup];
}
#pragma mark -
#pragma mark MVKCmdUpdateBuffer
VkResult MVKCmdUpdateBuffer::setContent(MVKCommandBuffer* cmdBuff,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize dataSize,
const void* pData) {
_dstBuffer = (MVKBuffer*)dstBuffer;
_dstOffset = dstOffset;
_dataSize = dataSize;
_srcDataCache.reserve(_dataSize);
memcpy(_srcDataCache.data(), pData, _dataSize);
return VK_SUCCESS;
}
void MVKCmdUpdateBuffer::encode(MVKCommandEncoder* cmdEncoder) {
id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseUpdateBuffer);
id<MTLBuffer> dstMTLBuff = _dstBuffer->getMTLBuffer();
NSUInteger dstMTLBuffOffset = _dstBuffer->getMTLBufferOffset() + _dstOffset;
// Copy data to the source MTLBuffer
MVKMTLBufferAllocation* srcMTLBufferAlloc = cmdEncoder->getCommandEncodingPool()->acquireMTLBufferAllocation(_dataSize);
void* pBuffData = srcMTLBufferAlloc->getContents();
memcpy(pBuffData, _srcDataCache.data(), _dataSize);
[mtlBlitEnc copyFromBuffer: srcMTLBufferAlloc->_mtlBuffer
sourceOffset: srcMTLBufferAlloc->_offset
toBuffer: dstMTLBuff
destinationOffset: dstMTLBuffOffset
size: _dataSize];
// Return the MTLBuffer allocation to the pool once the command buffer is done with it
[cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer> mcb) {
srcMTLBufferAlloc->returnToPool();
}];
}