| /* |
| * MVKCmdTransfer.mm |
| * |
| * Copyright (c) 2015-2022 The Brenwill Workshop Ltd. (http://www.brenwill.com) |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "MVKCmdTransfer.h" |
| #include "MVKCommandBuffer.h" |
| #include "MVKCommandPool.h" |
| #include "MVKCommandEncodingPool.h" |
| #include "MVKImage.h" |
| #include "MVKBuffer.h" |
| #include "MVKFramebuffer.h" |
| #include "MVKRenderPass.h" |
| #include "MTLRenderPassDescriptor+MoltenVK.h" |
| #include "MVKEnvironment.h" |
| #include "mvk_datatypes.hpp" |
| #include <algorithm> |
| #include <sys/mman.h> |
| |
| |
| #pragma mark - |
| #pragma mark Support functions |
| |
| // Clamps the size so that the sum of the origin and size do not exceed the maximum size. |
| static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize maxSize) { |
| MTLSize clamped; |
| clamped.width = std::min(size.width, maxSize.width - origin.x); |
| clamped.height = std::min(size.height, maxSize.height - origin.y); |
| clamped.depth = std::min(size.depth, maxSize.depth - origin.z); |
| return clamped; |
| } |
| |
| |
| #pragma mark - |
| #pragma mark MVKCmdCopyImage |
| |
| template <size_t N> |
| VkResult MVKCmdCopyImage<N>::setContent(MVKCommandBuffer* cmdBuff, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkImage dstImage, |
| VkImageLayout dstImageLayout, |
| uint32_t regionCount, |
| const VkImageCopy* pRegions) { |
| _srcImage = (MVKImage*)srcImage; |
| _srcLayout = srcImageLayout; |
| |
| _dstImage = (MVKImage*)dstImage; |
| _dstLayout = dstImageLayout; |
| |
| _vkImageCopies.clear(); // Clear for reuse |
| for (uint32_t regionIdx = 0; regionIdx < regionCount; regionIdx++) { |
| auto& vkIR = pRegions[regionIdx]; |
| uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIR.srcSubresource.aspectMask); |
| uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIR.dstSubresource.aspectMask); |
| |
| // Validate |
| MVKPixelFormats* pixFmts = cmdBuff->getPixelFormats(); |
| if ((_dstImage->getSampleCount() != _srcImage->getSampleCount()) || |
| (pixFmts->getBytesPerBlock(_dstImage->getMTLPixelFormat(dstPlaneIndex)) != pixFmts->getBytesPerBlock(_srcImage->getMTLPixelFormat(srcPlaneIndex)))) { |
| return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdCopyImage(): Cannot copy between incompatible formats, such as formats of different pixel sizes, or between images with different sample counts."); |
| } |
| |
| _vkImageCopies.push_back(vkIR); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| template <size_t N> |
| void MVKCmdCopyImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse commandUse) { |
| MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats(); |
| uint32_t copyCnt = (uint32_t)_vkImageCopies.size(); |
| VkBufferImageCopy vkSrcCopies[copyCnt]; |
| VkBufferImageCopy vkDstCopies[copyCnt]; |
| size_t tmpBuffSize = 0; |
| |
| for (uint32_t copyIdx = 0; copyIdx < copyCnt; copyIdx++) { |
| auto& vkIC = _vkImageCopies[copyIdx]; |
| |
| uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIC.srcSubresource.aspectMask); |
| uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIC.dstSubresource.aspectMask); |
| |
| MTLPixelFormat srcMTLPixFmt = _srcImage->getMTLPixelFormat(srcPlaneIndex); |
| bool isSrcCompressed = _srcImage->getIsCompressed(); |
| |
| MTLPixelFormat dstMTLPixFmt = _dstImage->getMTLPixelFormat(dstPlaneIndex); |
| bool isDstCompressed = _dstImage->getIsCompressed(); |
| |
| // If source and destination have different formats and at least one is compressed, use a temporary intermediary buffer |
| bool useTempBuffer = (srcMTLPixFmt != dstMTLPixFmt) && (isSrcCompressed || isDstCompressed); |
| |
| if (useTempBuffer) { |
| // Add copy from source image to temp buffer. |
| auto& srcCpy = vkSrcCopies[copyIdx]; |
| srcCpy.bufferOffset = tmpBuffSize; |
| srcCpy.bufferRowLength = 0; |
| srcCpy.bufferImageHeight = 0; |
| srcCpy.imageSubresource = vkIC.srcSubresource; |
| srcCpy.imageOffset = vkIC.srcOffset; |
| srcCpy.imageExtent = vkIC.extent; |
| |
| // Add copy from temp buffer to destination image. |
| // Extent is provided in source texels. If the source is compressed but the |
| // destination is not, each destination pixel will consume an entire source block, |
| // so we must downscale the destination extent by the size of the source block. |
| // Likewise if the destination is compressed and source is not, each source pixel |
| // will map to a block of pixels in the destination texture, and we need to |
| // adjust destination's extent accordingly. |
| VkExtent3D dstExtent = vkIC.extent; |
| if (isSrcCompressed && !isDstCompressed) { |
| VkExtent2D srcBlockExtent = pixFmts->getBlockTexelSize(srcMTLPixFmt); |
| dstExtent.width /= srcBlockExtent.width; |
| dstExtent.height /= srcBlockExtent.height; |
| } else if (!isSrcCompressed && isDstCompressed) { |
| VkExtent2D dstBlockExtent = pixFmts->getBlockTexelSize(dstMTLPixFmt); |
| dstExtent.width *= dstBlockExtent.width; |
| dstExtent.height *= dstBlockExtent.height; |
| } |
| auto& dstCpy = vkDstCopies[copyIdx]; |
| dstCpy.bufferOffset = tmpBuffSize; |
| dstCpy.bufferRowLength = 0; |
| dstCpy.bufferImageHeight = 0; |
| dstCpy.imageSubresource = vkIC.dstSubresource; |
| dstCpy.imageOffset = vkIC.dstOffset; |
| dstCpy.imageExtent = dstExtent; |
| |
| size_t bytesPerRow = pixFmts->getBytesPerRow(srcMTLPixFmt, vkIC.extent.width); |
| size_t bytesPerRegion = pixFmts->getBytesPerLayer(srcMTLPixFmt, bytesPerRow, vkIC.extent.height); |
| tmpBuffSize += bytesPerRegion; |
| } else { |
| // Map the source pixel format to the dest pixel format through a texture view on the source texture. |
| // If the source and dest pixel formats are the same, this will simply degenerate to the source texture itself. |
| id<MTLTexture> srcMTLTex = _srcImage->getMTLTexture(srcPlaneIndex, _dstImage->getMTLPixelFormat(dstPlaneIndex)); |
| id<MTLTexture> dstMTLTex = _dstImage->getMTLTexture(dstPlaneIndex); |
| if ( !srcMTLTex || !dstMTLTex ) { return; } |
| |
| id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(commandUse); |
| |
| // If copies can be performed using direct texture-texture copying, do so |
| uint32_t srcLevel = vkIC.srcSubresource.mipLevel; |
| uint32_t srcBaseLayer = vkIC.srcSubresource.baseArrayLayer; |
| VkExtent3D srcExtent = _srcImage->getExtent3D(srcPlaneIndex, srcLevel); |
| uint32_t dstLevel = vkIC.dstSubresource.mipLevel; |
| uint32_t dstBaseLayer = vkIC.dstSubresource.baseArrayLayer; |
| VkExtent3D dstExtent = _dstImage->getExtent3D(dstPlaneIndex, dstLevel); |
| // If the extent completely covers both images, I can copy all layers at once. |
| // This will obviously not apply to copies between a 3D and 2D image. |
| if (mvkVkExtent3DsAreEqual(srcExtent, vkIC.extent) && mvkVkExtent3DsAreEqual(dstExtent, vkIC.extent) && |
| [mtlBlitEnc respondsToSelector: @selector(copyFromTexture:sourceSlice:sourceLevel:toTexture:destinationSlice:destinationLevel:sliceCount:levelCount:)]) { |
| assert((_srcImage->getMTLTextureType() == MTLTextureType3D) == (_dstImage->getMTLTextureType() == MTLTextureType3D)); |
| [mtlBlitEnc copyFromTexture: srcMTLTex |
| sourceSlice: srcBaseLayer |
| sourceLevel: srcLevel |
| toTexture: dstMTLTex |
| destinationSlice: dstBaseLayer |
| destinationLevel: dstLevel |
| sliceCount: vkIC.srcSubresource.layerCount |
| levelCount: 1]; |
| } else { |
| MTLOrigin srcOrigin = mvkMTLOriginFromVkOffset3D(vkIC.srcOffset); |
| MTLSize srcSize; |
| uint32_t layCnt; |
| if ((_srcImage->getMTLTextureType() == MTLTextureType3D) != (_dstImage->getMTLTextureType() == MTLTextureType3D)) { |
| // In the case, the number of layers to copy is in extent.depth. Use that value, |
| // then clamp the depth so we don't try to copy more than Metal will allow. |
| layCnt = vkIC.extent.depth; |
| srcSize = mvkClampMTLSize(mvkMTLSizeFromVkExtent3D(vkIC.extent), |
| srcOrigin, |
| mvkMTLSizeFromVkExtent3D(srcExtent)); |
| srcSize.depth = 1; |
| } else { |
| layCnt = vkIC.srcSubresource.layerCount; |
| srcSize = mvkClampMTLSize(mvkMTLSizeFromVkExtent3D(vkIC.extent), |
| srcOrigin, |
| mvkMTLSizeFromVkExtent3D(srcExtent)); |
| } |
| MTLOrigin dstOrigin = mvkMTLOriginFromVkOffset3D(vkIC.dstOffset); |
| |
| for (uint32_t layIdx = 0; layIdx < layCnt; layIdx++) { |
| // We can copy between a 3D and a 2D image easily. Just copy between |
| // one slice of the 2D image and one plane of the 3D image at a time. |
| if ((_srcImage->getMTLTextureType() == MTLTextureType3D) == (_dstImage->getMTLTextureType() == MTLTextureType3D)) { |
| [mtlBlitEnc copyFromTexture: srcMTLTex |
| sourceSlice: srcBaseLayer + layIdx |
| sourceLevel: srcLevel |
| sourceOrigin: srcOrigin |
| sourceSize: srcSize |
| toTexture: dstMTLTex |
| destinationSlice: dstBaseLayer + layIdx |
| destinationLevel: dstLevel |
| destinationOrigin: dstOrigin]; |
| } else if (_srcImage->getMTLTextureType() == MTLTextureType3D) { |
| [mtlBlitEnc copyFromTexture: srcMTLTex |
| sourceSlice: srcBaseLayer |
| sourceLevel: srcLevel |
| sourceOrigin: MTLOriginMake(srcOrigin.x, srcOrigin.y, srcOrigin.z + layIdx) |
| sourceSize: srcSize |
| toTexture: dstMTLTex |
| destinationSlice: dstBaseLayer + layIdx |
| destinationLevel: dstLevel |
| destinationOrigin: dstOrigin]; |
| } else { |
| assert(_dstImage->getMTLTextureType() == MTLTextureType3D); |
| [mtlBlitEnc copyFromTexture: srcMTLTex |
| sourceSlice: srcBaseLayer + layIdx |
| sourceLevel: srcLevel |
| sourceOrigin: srcOrigin |
| sourceSize: srcSize |
| toTexture: dstMTLTex |
| destinationSlice: dstBaseLayer |
| destinationLevel: dstLevel |
| destinationOrigin: MTLOriginMake(dstOrigin.x, dstOrigin.y, dstOrigin.z + layIdx)]; |
| } |
| } |
| } |
| } |
| } |
| |
| if (tmpBuffSize > 0) { |
| MVKBufferDescriptorData tempBuffData; |
| tempBuffData.size = tmpBuffSize; |
| tempBuffData.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; |
| VkBuffer tempBuff = (VkBuffer)cmdEncoder->getCommandEncodingPool()->getTransferMVKBuffer(tempBuffData); |
| |
| MVKCmdBufferImageCopy<N> cpyCmd; |
| |
| // Copy from source image to buffer |
| // Create and execute a temporary buffer image command. |
| // To be threadsafe...do NOT acquire and return the command from the pool. |
| cpyCmd.setContent(cmdEncoder->_cmdBuffer, tempBuff, (VkImage)_srcImage, _srcLayout, copyCnt, vkSrcCopies, false); |
| cpyCmd.encode(cmdEncoder); |
| |
| // Copy from buffer to destination image |
| // Create and execute a temporary buffer image command. |
| // To be threadsafe...do NOT acquire and return the command from the pool. |
| cpyCmd.setContent(cmdEncoder->_cmdBuffer, tempBuff, (VkImage)_dstImage, _dstLayout, copyCnt, vkDstCopies, true); |
| cpyCmd.encode(cmdEncoder); |
| } |
| } |
| |
| template class MVKCmdCopyImage<1>; |
| template class MVKCmdCopyImage<4>; |
| |
| |
| #pragma mark - |
| #pragma mark MVKCmdBlitImage |
| |
| template <size_t N> |
| VkResult MVKCmdBlitImage<N>::setContent(MVKCommandBuffer* cmdBuff, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkImage dstImage, |
| VkImageLayout dstImageLayout, |
| uint32_t regionCount, |
| const VkImageBlit* pRegions, |
| VkFilter filter) { |
| |
| _srcImage = (MVKImage*)srcImage; |
| _srcLayout = srcImageLayout; |
| _dstImage = (MVKImage*)dstImage; |
| _dstLayout = dstImageLayout; |
| |
| _filter = filter; |
| |
| bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getDevice()->_pMetalFeatures->renderLinearTextures && _dstImage->getIsLinear(); |
| |
| _vkImageBlits.clear(); // Clear for reuse |
| for (uint32_t rIdx = 0; rIdx < regionCount; rIdx++) { |
| auto& vkIB = pRegions[rIdx]; |
| |
| // Validate - macOS linear images cannot be a scaling or inversion destination |
| if (isDestUnwritableLinear && !(canCopyFormats(vkIB) && canCopy(vkIB)) ) { |
| return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdBlitImage(): Scaling or inverting to a linear destination image is not supported."); |
| } |
| |
| _vkImageBlits.push_back(vkIB); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| template <size_t N> |
| bool MVKCmdBlitImage<N>::canCopyFormats(const VkImageBlit& region) { |
| uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(region.srcSubresource.aspectMask); |
| uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(region.dstSubresource.aspectMask); |
| return ((_srcImage->getMTLPixelFormat(srcPlaneIndex) == _dstImage->getMTLPixelFormat(dstPlaneIndex)) && |
| (_dstImage->getSampleCount() == _srcImage->getSampleCount())); |
| } |
| |
| // The source and destination sizes must be equal and not be negative in any direction |
| template <size_t N> |
| bool MVKCmdBlitImage<N>::canCopy(const VkImageBlit& region) { |
| VkOffset3D srcSize = mvkVkOffset3DDifference(region.srcOffsets[1], region.srcOffsets[0]); |
| VkOffset3D dstSize = mvkVkOffset3DDifference(region.dstOffsets[1], region.dstOffsets[0]); |
| return (mvkVkOffset3DsAreEqual(srcSize, dstSize) && |
| (srcSize.x >= 0) && (srcSize.y >= 0) && (srcSize.z >= 0)); |
| } |
| |
| template <size_t N> |
| void MVKCmdBlitImage<N>::populateVertices(MVKVertexPosTex* vertices, const VkImageBlit& region) { |
| const VkOffset3D& so0 = region.srcOffsets[0]; |
| const VkOffset3D& so1 = region.srcOffsets[1]; |
| const VkOffset3D& do0 = region.dstOffsets[0]; |
| const VkOffset3D& do1 = region.dstOffsets[1]; |
| |
| // Get the extents of the source and destination textures. |
| uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(region.srcSubresource.aspectMask); |
| uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(region.dstSubresource.aspectMask); |
| VkExtent3D srcExtent = _srcImage->getExtent3D(srcPlaneIndex, region.srcSubresource.mipLevel); |
| VkExtent3D dstExtent = _dstImage->getExtent3D(dstPlaneIndex, region.dstSubresource.mipLevel); |
| |
| // Determine the bottom-left and top-right corners of the source and destination |
| // texture regions, each as a fraction of the corresponding texture size. |
| CGPoint srcBL = CGPointMake((CGFloat)(so0.x) / (CGFloat)srcExtent.width, |
| (CGFloat)(srcExtent.height - so1.y) / (CGFloat)srcExtent.height); |
| CGPoint srcTR = CGPointMake((CGFloat)(so1.x) / (CGFloat)srcExtent.width, |
| (CGFloat)(srcExtent.height - so0.y) / (CGFloat)srcExtent.height); |
| CGPoint dstBL = CGPointMake((CGFloat)(do0.x) / (CGFloat)dstExtent.width, |
| (CGFloat)(dstExtent.height - do1.y) / (CGFloat)dstExtent.height); |
| CGPoint dstTR = CGPointMake((CGFloat)(do1.x) / (CGFloat)dstExtent.width, |
| (CGFloat)(dstExtent.height - do0.y) / (CGFloat)dstExtent.height); |
| |
| // The destination region is used for vertex positions, |
| // which are bounded by (-1.0 < p < 1.0) in clip-space. |
| // Map texture coordinates (0.0 < p < 1.0) to vertex coordinates (-1.0 < p < 1.0). |
| dstBL = CGPointMake((dstBL.x * 2.0) - 1.0, (dstBL.y * 2.0) - 1.0); |
| dstTR = CGPointMake((dstTR.x * 2.0) - 1.0, (dstTR.y * 2.0) - 1.0); |
| |
| MVKVertexPosTex* pVtx; |
| |
| // Bottom left vertex |
| pVtx = &vertices[0]; |
| pVtx->position.x = dstBL.x; |
| pVtx->position.y = dstBL.y; |
| pVtx->texCoord.x = srcBL.x; |
| pVtx->texCoord.y = (1.0 - srcBL.y); |
| |
| // Bottom right vertex |
| pVtx = &vertices[1]; |
| pVtx->position.x = dstTR.x; |
| pVtx->position.y = dstBL.y; |
| pVtx->texCoord.x = srcTR.x; |
| pVtx->texCoord.y = (1.0 - srcBL.y); |
| |
| // Top left vertex |
| pVtx = &vertices[2]; |
| pVtx->position.x = dstBL.x; |
| pVtx->position.y = dstTR.y; |
| pVtx->texCoord.x = srcBL.x; |
| pVtx->texCoord.y = (1.0 - srcTR.y); |
| |
| // Top right vertex |
| pVtx = &vertices[3]; |
| pVtx->position.x = dstTR.x; |
| pVtx->position.y = dstTR.y; |
| pVtx->texCoord.x = srcTR.x; |
| pVtx->texCoord.y = (1.0 - srcTR.y); |
| } |
| |
| template <size_t N> |
| void MVKCmdBlitImage<N>::encode(MVKCommandEncoder* cmdEncoder, MVKCommandUse commandUse) { |
| |
| size_t vkIBCnt = _vkImageBlits.size(); |
| VkImageCopy vkImageCopies[vkIBCnt]; |
| MVKImageBlitRender mvkBlitRenders[vkIBCnt]; |
| uint32_t copyCnt = 0; |
| uint32_t blitCnt = 0; |
| |
| // Separate BLITs into those that are really just simple texure region copies, |
| // and those that require rendering |
| for (auto& vkIB : _vkImageBlits) { |
| if (canCopyFormats(vkIB) && canCopy(vkIB)) { |
| |
| const VkOffset3D& so0 = vkIB.srcOffsets[0]; |
| const VkOffset3D& so1 = vkIB.srcOffsets[1]; |
| |
| auto& vkIC = vkImageCopies[copyCnt++]; |
| vkIC.srcSubresource = vkIB.srcSubresource; |
| vkIC.srcOffset = vkIB.srcOffsets[0]; |
| vkIC.dstSubresource = vkIB.dstSubresource; |
| vkIC.dstOffset = vkIB.dstOffsets[0]; |
| vkIC.extent.width = so1.x - so0.x; |
| vkIC.extent.height = so1.y - so0.y; |
| vkIC.extent.depth = so1.z - so0.z; |
| |
| } else { |
| auto& mvkIBR = mvkBlitRenders[blitCnt++]; |
| mvkIBR.region = vkIB; |
| populateVertices(mvkIBR.vertices, vkIB); |
| } |
| } |
| |
| // Perform those BLITs that can be covered by simple texture copying. |
| if (copyCnt) { |
| MVKCmdCopyImage<N> copyCmd; |
| copyCmd.setContent(cmdEncoder->_cmdBuffer, |
| (VkImage)_srcImage, _srcLayout, |
| (VkImage)_dstImage, _dstLayout, |
| copyCnt, vkImageCopies); |
| copyCmd.encode(cmdEncoder, kMVKCommandUseBlitImage); |
| } |
| |
| // Perform those BLITs that require rendering to destination texture. |
| for (uint32_t blitIdx = 0; blitIdx < blitCnt; blitIdx++) { |
| auto& mvkIBR = mvkBlitRenders[blitIdx]; |
| |
| uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(mvkIBR.region.srcSubresource.aspectMask); |
| uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(mvkIBR.region.dstSubresource.aspectMask); |
| |
| id<MTLTexture> srcMTLTex = _srcImage->getMTLTexture(srcPlaneIndex); |
| id<MTLTexture> dstMTLTex = _dstImage->getMTLTexture(dstPlaneIndex); |
| if (blitCnt && srcMTLTex && dstMTLTex) { |
| cmdEncoder->endCurrentMetalEncoding(); |
| |
| MTLRenderPassDescriptor* mtlRPD = [MTLRenderPassDescriptor renderPassDescriptor]; |
| MTLRenderPassColorAttachmentDescriptor* mtlColorAttDesc = mtlRPD.colorAttachments[0]; |
| MTLRenderPassDepthAttachmentDescriptor* mtlDepthAttDesc = mtlRPD.depthAttachment; |
| MTLRenderPassStencilAttachmentDescriptor* mtlStencilAttDesc = mtlRPD.stencilAttachment; |
| if (mvkIsAnyFlagEnabled(mvkIBR.region.dstSubresource.aspectMask, (VK_IMAGE_ASPECT_DEPTH_BIT))) { |
| mtlDepthAttDesc.loadAction = MTLLoadActionLoad; |
| mtlDepthAttDesc.storeAction = MTLStoreActionStore; |
| mtlDepthAttDesc.texture = dstMTLTex; |
| } else { |
| mtlDepthAttDesc.loadAction = MTLLoadActionDontCare; |
| mtlDepthAttDesc.storeAction = MTLStoreActionDontCare; |
| mtlDepthAttDesc.texture = nil; |
| } |
| if (mvkIsAnyFlagEnabled(mvkIBR.region.dstSubresource.aspectMask, (VK_IMAGE_ASPECT_STENCIL_BIT))) { |
| mtlStencilAttDesc.loadAction = MTLLoadActionLoad; |
| mtlStencilAttDesc.storeAction = MTLStoreActionStore; |
| mtlStencilAttDesc.texture = dstMTLTex; |
| } else { |
| mtlStencilAttDesc.loadAction = MTLLoadActionDontCare; |
| mtlStencilAttDesc.storeAction = MTLStoreActionDontCare; |
| mtlStencilAttDesc.texture = nil; |
| } |
| if (!mvkIsAnyFlagEnabled(mvkIBR.region.dstSubresource.aspectMask, (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) { |
| mtlColorAttDesc.loadAction = MTLLoadActionLoad; |
| mtlColorAttDesc.storeAction = MTLStoreActionStore; |
| mtlColorAttDesc.texture = dstMTLTex; |
| } |
| |
| MVKRPSKeyBlitImg blitKey; |
| blitKey.srcMTLPixelFormat = _srcImage->getMTLPixelFormat(srcPlaneIndex); |
| blitKey.srcMTLTextureType = _srcImage->getMTLTextureType(); |
| if (blitKey.srcMTLTextureType == MTLTextureTypeCube || blitKey.srcMTLTextureType == MTLTextureTypeCubeArray) { |
| // In this case, I'll use a temp 2D array view. That way, I don't have to |
| // deal with mapping the blit coordinates to a cube direction vector. |
| blitKey.srcMTLTextureType = MTLTextureType2DArray; |
| srcMTLTex = [srcMTLTex newTextureViewWithPixelFormat: blitKey.getSrcMTLPixelFormat() |
| textureType: MTLTextureType2DArray |
| levels: NSMakeRange(0, srcMTLTex.mipmapLevelCount) |
| slices: NSMakeRange(0, srcMTLTex.arrayLength)]; |
| [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) { |
| [srcMTLTex release]; |
| }]; |
| } |
| blitKey.dstMTLPixelFormat = _dstImage->getMTLPixelFormat(dstPlaneIndex); |
| blitKey.srcFilter = mvkMTLSamplerMinMagFilterFromVkFilter(_filter); |
| blitKey.srcAspect = mvkIBR.region.srcSubresource.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); |
| blitKey.dstSampleCount = mvkSampleCountFromVkSampleCountFlagBits(_dstImage->getSampleCount()); |
| id<MTLRenderPipelineState> mtlRPS = cmdEncoder->getCommandEncodingPool()->getCmdBlitImageMTLRenderPipelineState(blitKey); |
| bool isBlittingDepth = mvkIsAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_DEPTH_BIT)); |
| bool isBlittingStencil = mvkIsAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_STENCIL_BIT)); |
| id<MTLDepthStencilState> mtlDSS = cmdEncoder->getCommandEncodingPool()->getMTLDepthStencilState(isBlittingDepth, isBlittingStencil); |
| |
| uint32_t vtxBuffIdx = cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKVertexContentBufferIndex); |
| |
| mtlColorAttDesc.level = mvkIBR.region.dstSubresource.mipLevel; |
| mtlDepthAttDesc.level = mvkIBR.region.dstSubresource.mipLevel; |
| mtlStencilAttDesc.level = mvkIBR.region.dstSubresource.mipLevel; |
| |
| bool isLayeredBlit = blitKey.dstSampleCount > 1 ? cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering : cmdEncoder->getDevice()->_pMetalFeatures->layeredRendering; |
| |
| uint32_t layCnt = mvkIBR.region.srcSubresource.layerCount; |
| if (_dstImage->getMTLTextureType() == MTLTextureType3D) { |
| layCnt = mvkAbsDiff(mvkIBR.region.dstOffsets[1].z, mvkIBR.region.dstOffsets[0].z); |
| } |
| if (isLayeredBlit) { |
| // In this case, I can blit all layers at once with a layered draw. |
| mtlRPD.renderTargetArrayLengthMVK = layCnt; |
| layCnt = 1; // Only need to run the loop once. |
| } |
| for (uint32_t layIdx = 0; layIdx < layCnt; layIdx++) { |
| // Update the render pass descriptor for the texture level and slice, and create a render encoder. |
| if (_dstImage->getMTLTextureType() == MTLTextureType3D) { |
| if (isLayeredBlit) { |
| // For layered blits, the layers are always in ascending order. I'll reverse the order |
| // of the 'r' coordinates if the destination is mirrored. |
| uint32_t depthPlane = std::min(mvkIBR.region.dstOffsets[0].z, mvkIBR.region.dstOffsets[1].z); |
| mtlColorAttDesc.depthPlane = depthPlane; |
| mtlDepthAttDesc.depthPlane = depthPlane; |
| mtlStencilAttDesc.depthPlane = depthPlane; |
| } else { |
| uint32_t depthPlane = mvkIBR.region.dstOffsets[0].z + (mvkIBR.region.dstOffsets[1].z > mvkIBR.region.dstOffsets[0].z ? layIdx : -(layIdx + 1)); |
| mtlColorAttDesc.depthPlane = depthPlane; |
| mtlDepthAttDesc.depthPlane = depthPlane; |
| mtlStencilAttDesc.depthPlane = depthPlane; |
| } |
| } else { |
| mtlColorAttDesc.slice = mvkIBR.region.dstSubresource.baseArrayLayer + layIdx; |
| mtlDepthAttDesc.slice = mvkIBR.region.dstSubresource.baseArrayLayer + layIdx; |
| mtlStencilAttDesc.slice = mvkIBR.region.dstSubresource.baseArrayLayer + layIdx; |
| } |
| id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPD]; |
| setLabelIfNotNil(mtlRendEnc, mvkMTLRenderCommandEncoderLabel(commandUse)); |
| |
| float zIncr; |
| if (blitKey.srcMTLTextureType == MTLTextureType3D) { |
| // In this case, I need to interpolate along the third dimension manually. |
| VkExtent3D srcExtent = _srcImage->getExtent3D(srcPlaneIndex, mvkIBR.region.dstSubresource.mipLevel); |
| VkOffset3D so0 = mvkIBR.region.srcOffsets[0], so1 = mvkIBR.region.srcOffsets[1]; |
| VkOffset3D do0 = mvkIBR.region.dstOffsets[0], do1 = mvkIBR.region.dstOffsets[1]; |
| float startZ = (float)so0.z / (float)srcExtent.depth; |
| float endZ = (float)so1.z / (float)srcExtent.depth; |
| if (isLayeredBlit && do0.z > do1.z) { |
| // Swap start and end points so interpolation moves in the right direction. |
| std::swap(startZ, endZ); |
| } |
| zIncr = (endZ - startZ) / mvkAbsDiff(do1.z, do0.z); |
| float z = startZ + (isLayeredBlit ? 0.0 : (layIdx + 0.5)) * zIncr; |
| for (uint32_t i = 0; i < kMVKBlitVertexCount; ++i) { |
| mvkIBR.vertices[i].texCoord.z = z; |
| } |
| } |
| [mtlRendEnc pushDebugGroup: @"vkCmdBlitImage"]; |
| [mtlRendEnc setRenderPipelineState: mtlRPS]; |
| [mtlRendEnc setDepthStencilState: mtlDSS]; |
| cmdEncoder->setVertexBytes(mtlRendEnc, mvkIBR.vertices, sizeof(mvkIBR.vertices), vtxBuffIdx); |
| if (isLayeredBlit) { |
| cmdEncoder->setVertexBytes(mtlRendEnc, &zIncr, sizeof(zIncr), 0); |
| } |
| if (!mvkIsOnlyAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_STENCIL_BIT))) { |
| [mtlRendEnc setFragmentTexture: srcMTLTex atIndex: 0]; |
| } |
| if (isBlittingStencil) { |
| // For stencil blits of packed depth/stencil images, I need to use a stencil view. |
| MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats(); |
| if (pixFmts->isDepthFormat(blitKey.getSrcMTLPixelFormat()) && |
| pixFmts->isStencilFormat(blitKey.getSrcMTLPixelFormat())) { |
| MTLPixelFormat stencilFmt = blitKey.getSrcMTLPixelFormat(); |
| if (stencilFmt == MTLPixelFormatDepth32Float_Stencil8) { |
| |
| stencilFmt = MTLPixelFormatX32_Stencil8; |
| #if MVK_MACOS |
| } else if (stencilFmt == MTLPixelFormatDepth24Unorm_Stencil8) { |
| stencilFmt = MTLPixelFormatX24_Stencil8; |
| #endif |
| } |
| id<MTLTexture> stencilMTLTex = [srcMTLTex newTextureViewWithPixelFormat: stencilFmt]; |
| [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer>) { |
| [stencilMTLTex release]; |
| }]; |
| [mtlRendEnc setFragmentTexture: stencilMTLTex atIndex: 1]; |
| } else { |
| [mtlRendEnc setFragmentTexture: srcMTLTex atIndex: 1]; |
| } |
| } |
| |
| struct { |
| uint slice; |
| float lod; |
| } texSubRez; |
| texSubRez.slice = mvkIBR.region.srcSubresource.baseArrayLayer + layIdx; |
| texSubRez.lod = mvkIBR.region.srcSubresource.mipLevel; |
| cmdEncoder->setFragmentBytes(mtlRendEnc, &texSubRez, sizeof(texSubRez), 0); |
| |
| NSUInteger instanceCount = isLayeredBlit ? mtlRPD.renderTargetArrayLengthMVK : 1; |
| [mtlRendEnc drawPrimitives: MTLPrimitiveTypeTriangleStrip vertexStart: 0 vertexCount: kMVKBlitVertexCount instanceCount: instanceCount]; |
| [mtlRendEnc popDebugGroup]; |
| [mtlRendEnc endEncoding]; |
| } |
| } |
| } |
| } |
| |
| template class MVKCmdBlitImage<1>; |
| template class MVKCmdBlitImage<4>; |
| |
| |
| #pragma mark - |
| #pragma mark MVKCmdResolveImage |
| |
| template <size_t N> |
| VkResult MVKCmdResolveImage<N>::setContent(MVKCommandBuffer* cmdBuff, |
| VkImage srcImage, |
| VkImageLayout srcImageLayout, |
| VkImage dstImage, |
| VkImageLayout dstImageLayout, |
| uint32_t regionCount, |
| const VkImageResolve* pRegions) { |
| _srcImage = (MVKImage*)srcImage; |
| _srcLayout = srcImageLayout; |
| _dstImage = (MVKImage*)dstImage; |
| _dstLayout = dstImageLayout; |
| |
| _vkImageResolves.clear(); // Clear for reuse |
| _vkImageResolves.reserve(regionCount); |
| for (uint32_t regionIdx = 0; regionIdx < regionCount; regionIdx++) { |
| auto& vkIR = pRegions[regionIdx]; |
| uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIR.dstSubresource.aspectMask); |
| |
| // Validate |
| MVKPixelFormats* pixFmts = cmdBuff->getPixelFormats(); |
| if ( !mvkAreAllFlagsEnabled(pixFmts->getCapabilities(_dstImage->getMTLPixelFormat(dstPlaneIndex)), kMVKMTLFmtCapsResolve) ) { |
| return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdResolveImage(): %s cannot be used as a resolve destination on this device.", pixFmts->getName(_dstImage->getVkFormat())); |
| } |
| |
| _vkImageResolves.push_back(vkIR); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| template <size_t N> |
| void MVKCmdResolveImage<N>::encode(MVKCommandEncoder* cmdEncoder) { |
| |
| size_t vkIRCnt = _vkImageResolves.size(); |
| VkImageBlit expansionRegions[vkIRCnt]; |
| VkImageCopy copyRegions[vkIRCnt]; |
| |
| // If we can do layered rendering to a multisample texture, I can resolve all the layers at once. |
| uint32_t layerCnt = 0; |
| if (cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering) { |
| layerCnt = (uint32_t)_vkImageResolves.size(); |
| } else { |
| for (VkImageResolve& vkIR : _vkImageResolves) { layerCnt += vkIR.dstSubresource.layerCount; } |
| } |
| MVKMetalResolveSlice mtlResolveSlices[layerCnt]; |
| |
| uint32_t expCnt = 0; |
| uint32_t copyCnt = 0; |
| uint32_t sliceCnt = 0; |
| |
| for (VkImageResolve& vkIR : _vkImageResolves) { |
| uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIR.srcSubresource.aspectMask); |
| uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIR.dstSubresource.aspectMask); |
| |
| VkExtent3D srcImgExt = _srcImage->getExtent3D(srcPlaneIndex, vkIR.srcSubresource.mipLevel); |
| VkExtent3D dstImgExt = _dstImage->getExtent3D(dstPlaneIndex, vkIR.dstSubresource.mipLevel); |
| |
| // If the region does not cover the entire content of the destination level, expand |
| // the destination content in the region to the temporary image. The purpose of this |
| // expansion is to render the existing content of the destination image to the |
| // temporary transfer multisample image, so that regions of that temporary transfer |
| // image can then be overwritten with content from the source image, prior to |
| // resolving it back to the destination image. |
| if ( !mvkVkExtent3DsAreEqual(dstImgExt, vkIR.extent) ) { |
| VkImageBlit& expRgn = expansionRegions[expCnt++]; |
| expRgn.srcSubresource = vkIR.dstSubresource; |
| expRgn.srcOffsets[0] = { 0, 0, 0 }; |
| expRgn.srcOffsets[1] = { int32_t(dstImgExt.width), int32_t(dstImgExt.height), int32_t(dstImgExt.depth) }; |
| expRgn.dstSubresource = vkIR.dstSubresource; |
| expRgn.dstOffsets[0] = { 0, 0, 0 }; |
| expRgn.dstOffsets[1] = { int32_t(dstImgExt.width), int32_t(dstImgExt.height), int32_t(dstImgExt.depth) }; |
| } |
| |
| // Copy the region from the source image to the temporary multisample image, |
| // prior to the temporary image being resolved back to the destination image. |
| // The source of this copy stage is the source image, and the destination of |
| // this copy stage is the temporary transfer image. |
| bool needXfrImage = !mvkVkExtent3DsAreEqual(srcImgExt, vkIR.extent) || !mvkVkExtent3DsAreEqual(dstImgExt, vkIR.extent); |
| if ( needXfrImage ) { |
| VkImageCopy& cpyRgn = copyRegions[copyCnt++]; |
| cpyRgn.srcSubresource = vkIR.srcSubresource; |
| cpyRgn.srcOffset = vkIR.srcOffset; |
| cpyRgn.dstSubresource = vkIR.dstSubresource; |
| cpyRgn.dstOffset = vkIR.dstOffset; |
| cpyRgn.extent = vkIR.extent; |
| } |
| |
| // Adds a resolve slice struct for each destination layer in the resolve region. |
| // Note that the source subresource for this is that of the SOURCE image if we're doing a |
| // direct resolve, but that of the DESTINATION if we need a temporary transfer image. |
| mtlResolveSlices[sliceCnt].dstSubresource = vkIR.dstSubresource; |
| mtlResolveSlices[sliceCnt].srcSubresource = needXfrImage ? vkIR.dstSubresource : vkIR.srcSubresource; |
| if (cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering) { |
| sliceCnt++; |
| } else { |
| uint32_t layCnt = vkIR.dstSubresource.layerCount; |
| mtlResolveSlices[sliceCnt].dstSubresource.layerCount = 1; |
| mtlResolveSlices[sliceCnt].srcSubresource.layerCount = 1; |
| sliceCnt++; |
| for (uint32_t layIdx = 1; layIdx < layCnt; layIdx++) { |
| MVKMetalResolveSlice& rslvSlice = mtlResolveSlices[sliceCnt]; |
| rslvSlice = mtlResolveSlices[sliceCnt - 1]; |
| rslvSlice.dstSubresource.baseArrayLayer++; |
| rslvSlice.srcSubresource.baseArrayLayer++; |
| sliceCnt++; |
| } |
| } |
| } |
| |
| // Expansion and copying is not required. Each mip level of the source image |
| // is being resolved entirely. Resolve directly from the source image. |
| MVKImage* xfrImage = _srcImage; |
| if (copyCnt) { |
| // Expansion and/or copying is required. Acquire a temporary transfer image, expand |
| // the destination image into it if necessary, copy from the source image to the |
| // temporary image, and then resolve from the temporary image to the destination image. |
| MVKImageDescriptorData xferImageData; |
| _dstImage->getTransferDescriptorData(xferImageData); |
| xferImageData.samples = _srcImage->getSampleCount(); |
| xfrImage = cmdEncoder->getCommandEncodingPool()->getTransferMVKImage(xferImageData); |
| |
| if (expCnt) { |
| // Expand the current content of the destination image to the temporary transfer image. |
| MVKCmdBlitImage<N> expCmd; |
| expCmd.setContent(cmdEncoder->_cmdBuffer, |
| (VkImage)_dstImage, _dstLayout, (VkImage)xfrImage, _dstLayout, |
| expCnt, expansionRegions, VK_FILTER_LINEAR); |
| expCmd.encode(cmdEncoder, kMVKCommandUseResolveExpandImage); |
| } |
| |
| // Copy the resolve regions of the source image to the temporary transfer image. |
| MVKCmdCopyImage<N> copyCmd; |
| copyCmd.setContent(cmdEncoder->_cmdBuffer, |
| (VkImage)_srcImage, _srcLayout, |
| (VkImage)xfrImage, _dstLayout, |
| copyCnt, copyRegions); |
| copyCmd.encode(cmdEncoder, kMVKCommandUseResolveCopyImage); |
| } |
| |
| cmdEncoder->endCurrentMetalEncoding(); |
| |
| MTLRenderPassDescriptor* mtlRPD = [MTLRenderPassDescriptor renderPassDescriptor]; |
| MTLRenderPassColorAttachmentDescriptor* mtlColorAttDesc = mtlRPD.colorAttachments[0]; |
| mtlColorAttDesc.loadAction = MTLLoadActionLoad; |
| mtlColorAttDesc.storeAction = MTLStoreActionMultisampleResolve; |
| |
| // For each resolve slice, update the render pass descriptor for |
| // the texture level and slice and create a render encoder. |
| for (uint32_t sIdx = 0; sIdx < sliceCnt; sIdx++) { |
| MVKMetalResolveSlice& rslvSlice = mtlResolveSlices[sIdx]; |
| uint8_t srcPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(rslvSlice.srcSubresource.aspectMask); |
| uint8_t dstPlaneIndex = MVKImage::getPlaneFromVkImageAspectFlags(rslvSlice.dstSubresource.aspectMask); |
| |
| mtlColorAttDesc.texture = xfrImage->getMTLTexture(srcPlaneIndex); |
| mtlColorAttDesc.resolveTexture = _dstImage->getMTLTexture(dstPlaneIndex); |
| mtlColorAttDesc.level = rslvSlice.srcSubresource.mipLevel; |
| mtlColorAttDesc.slice = rslvSlice.srcSubresource.baseArrayLayer; |
| mtlColorAttDesc.resolveLevel = rslvSlice.dstSubresource.mipLevel; |
| mtlColorAttDesc.resolveSlice = rslvSlice.dstSubresource.baseArrayLayer; |
| if (rslvSlice.dstSubresource.layerCount > 1) { |
| mtlRPD.renderTargetArrayLengthMVK = rslvSlice.dstSubresource.layerCount; |
| } |
| id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPD]; |
| setLabelIfNotNil(mtlRendEnc, mvkMTLRenderCommandEncoderLabel(kMVKCommandUseResolveImage)); |
| |
| [mtlRendEnc pushDebugGroup: @"vkCmdResolveImage"]; |
| [mtlRendEnc popDebugGroup]; |
| [mtlRendEnc endEncoding]; |
| } |
| } |
| |
| template class MVKCmdResolveImage<1>; |
| template class MVKCmdResolveImage<4>; |
| |
| |
| #pragma mark - |
| #pragma mark MVKCmdCopyBuffer |
| |
| // Matches shader struct. |
| typedef struct { |
| uint32_t srcOffset; |
| uint32_t dstOffset; |
| uint32_t size; |
| } MVKCmdCopyBufferInfo; |
| |
| template <size_t N> |
| VkResult MVKCmdCopyBuffer<N>::setContent(MVKCommandBuffer* cmdBuff, |
| VkBuffer srcBuffer, |
| VkBuffer destBuffer, |
| uint32_t regionCount, |
| const VkBufferCopy* pRegions) { |
| _srcBuffer = (MVKBuffer*)srcBuffer; |
| _dstBuffer = (MVKBuffer*)destBuffer; |
| |
| // Add buffer regions |
| _bufferCopyRegions.clear(); // Clear for reuse |
| _bufferCopyRegions.reserve(regionCount); |
| for (uint32_t i = 0; i < regionCount; i++) { |
| _bufferCopyRegions.push_back(pRegions[i]); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| template <size_t N> |
| void MVKCmdCopyBuffer<N>::encode(MVKCommandEncoder* cmdEncoder) { |
| id<MTLBuffer> srcMTLBuff = _srcBuffer->getMTLBuffer(); |
| NSUInteger srcMTLBuffOffset = _srcBuffer->getMTLBufferOffset(); |
| |
| id<MTLBuffer> dstMTLBuff = _dstBuffer->getMTLBuffer(); |
| NSUInteger dstMTLBuffOffset = _dstBuffer->getMTLBufferOffset(); |
| |
| VkDeviceSize buffAlign = cmdEncoder->getDevice()->_pMetalFeatures->mtlCopyBufferAlignment; |
| |
| for (auto& cpyRgn : _bufferCopyRegions) { |
| const bool useComputeCopy = buffAlign > 1 && (cpyRgn.srcOffset % buffAlign != 0 || |
| cpyRgn.dstOffset % buffAlign != 0 || |
| cpyRgn.size % buffAlign != 0); |
| if (useComputeCopy) { |
| MVKAssert(mvkFits<uint32_t>(cpyRgn.srcOffset) && mvkFits<uint32_t>(cpyRgn.dstOffset) && mvkFits<uint32_t>(cpyRgn.size), |
| "Byte-aligned buffer copy region offsets and size must each fit into a 32-bit unsigned integer."); |
| |
| MVKCmdCopyBufferInfo copyInfo; |
| copyInfo.srcOffset = (uint32_t)cpyRgn.srcOffset; |
| copyInfo.dstOffset = (uint32_t)cpyRgn.dstOffset; |
| copyInfo.size = (uint32_t)cpyRgn.size; |
| |
| id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseCopyBuffer); |
| [mtlComputeEnc pushDebugGroup: @"vkCmdCopyBuffer"]; |
| [mtlComputeEnc setComputePipelineState: cmdEncoder->getCommandEncodingPool()->getCmdCopyBufferBytesMTLComputePipelineState()]; |
| [mtlComputeEnc setBuffer:srcMTLBuff offset: srcMTLBuffOffset atIndex: 0]; |
| [mtlComputeEnc setBuffer:dstMTLBuff offset: dstMTLBuffOffset atIndex: 1]; |
| [mtlComputeEnc setBytes: ©Info length: sizeof(copyInfo) atIndex: 2]; |
| [mtlComputeEnc dispatchThreadgroups: MTLSizeMake(1, 1, 1) threadsPerThreadgroup: MTLSizeMake(1, 1, 1)]; |
| [mtlComputeEnc popDebugGroup]; |
| } else { |
| id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseCopyBuffer); |
| [mtlBlitEnc copyFromBuffer: srcMTLBuff |
| sourceOffset: (srcMTLBuffOffset + cpyRgn.srcOffset) |
| toBuffer: dstMTLBuff |
| destinationOffset: (dstMTLBuffOffset + cpyRgn.dstOffset) |
| size: cpyRgn.size]; |
| } |
| } |
| } |
| |
| template class MVKCmdCopyBuffer<1>; |
| template class MVKCmdCopyBuffer<4>; |
| |
| |
| #pragma mark - |
| #pragma mark MVKCmdBufferImageCopy |
| |
| // Matches shader struct. |
| typedef struct { |
| uint32_t srcRowStride; |
| uint32_t srcRowStrideHigh; |
| uint32_t srcDepthStride; |
| uint32_t srcDepthStrideHigh; |
| uint32_t destRowStride; |
| uint32_t destRowStrideHigh; |
| uint32_t destDepthStride; |
| uint32_t destDepthStrideHigh; |
| VkFormat format; |
| VkOffset3D offset; |
| VkExtent3D extent; |
| } MVKCmdCopyBufferToImageInfo; |
| |
| template <size_t N> |
| VkResult MVKCmdBufferImageCopy<N>::setContent(MVKCommandBuffer* cmdBuff, |
| VkBuffer buffer, |
| VkImage image, |
| VkImageLayout imageLayout, |
| uint32_t regionCount, |
| const VkBufferImageCopy* pRegions, |
| bool toImage) { |
| _buffer = (MVKBuffer*)buffer; |
| _image = (MVKImage*)image; |
| _toImage = toImage; |
| |
| // Add buffer regions |
| _bufferImageCopyRegions.clear(); // Clear for reuse |
| _bufferImageCopyRegions.reserve(regionCount); |
| for (uint32_t i = 0; i < regionCount; i++) { |
| _bufferImageCopyRegions.push_back(pRegions[i]); |
| |
| // Validate |
| if ( !_image->hasExpectedTexelSize() ) { |
| MTLPixelFormat mtlPixFmt = _image->getMTLPixelFormat(MVKImage::getPlaneFromVkImageAspectFlags(pRegions[i].imageSubresource.aspectMask)); |
| const char* cmdName = _toImage ? "vkCmdCopyBufferToImage" : "vkCmdCopyImageToBuffer"; |
| return cmdBuff->reportError(VK_ERROR_FORMAT_NOT_SUPPORTED, "%s(): The image is using Metal format %s as a substitute for Vulkan format %s. Since the pixel size is different, content for the image cannot be copied to or from a buffer.", cmdName, cmdBuff->getPixelFormats()->getName(mtlPixFmt), cmdBuff->getPixelFormats()->getName(_image->getVkFormat())); |
| } |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| template <size_t N> |
| void MVKCmdBufferImageCopy<N>::encode(MVKCommandEncoder* cmdEncoder) { |
| id<MTLBuffer> mtlBuffer = _buffer->getMTLBuffer(); |
| if ( !mtlBuffer ) { return; } |
| |
| NSUInteger mtlBuffOffsetBase = _buffer->getMTLBufferOffset(); |
| MVKCommandUse cmdUse = _toImage ? kMVKCommandUseCopyBufferToImage : kMVKCommandUseCopyImageToBuffer; |
| MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats(); |
| |
| for (auto& cpyRgn : _bufferImageCopyRegions) { |
| uint8_t planeIndex = MVKImage::getPlaneFromVkImageAspectFlags(cpyRgn.imageSubresource.aspectMask); |
| MTLPixelFormat mtlPixFmt = _image->getMTLPixelFormat(planeIndex); |
| id<MTLTexture> mtlTexture = _image->getMTLTexture(planeIndex); |
| if ( !mtlTexture ) { continue; } |
| |
| uint32_t mipLevel = cpyRgn.imageSubresource.mipLevel; |
| MTLOrigin mtlTxtOrigin = mvkMTLOriginFromVkOffset3D(cpyRgn.imageOffset); |
| MTLSize mtlTxtSize = mvkClampMTLSize(mvkMTLSizeFromVkExtent3D(cpyRgn.imageExtent), |
| mtlTxtOrigin, |
| mvkMTLSizeFromVkExtent3D(_image->getExtent3D(planeIndex, mipLevel))); |
| NSUInteger mtlBuffOffset = mtlBuffOffsetBase + cpyRgn.bufferOffset; |
| |
| uint32_t buffImgWd = cpyRgn.bufferRowLength; |
| if (buffImgWd == 0) { buffImgWd = cpyRgn.imageExtent.width; } |
| |
| uint32_t buffImgHt = cpyRgn.bufferImageHeight; |
| if (buffImgHt == 0) { buffImgHt = cpyRgn.imageExtent.height; } |
| |
| NSUInteger bytesPerRow = pixFmts->getBytesPerRow(mtlPixFmt, buffImgWd); |
| NSUInteger bytesPerImg = pixFmts->getBytesPerLayer(mtlPixFmt, bytesPerRow, buffImgHt); |
| |
| // If the format combines BOTH depth and stencil, determine whether one or both |
| // components are to be copied, and adjust the byte counts and copy options accordingly. |
| MTLBlitOption blitOptions = MTLBlitOptionNone; |
| if (pixFmts->isDepthFormat(mtlPixFmt) && pixFmts->isStencilFormat(mtlPixFmt)) { |
| |
| VkImageAspectFlags imgFlags = cpyRgn.imageSubresource.aspectMask; |
| bool wantDepth = mvkAreAllFlagsEnabled(imgFlags, VK_IMAGE_ASPECT_DEPTH_BIT); |
| bool wantStencil = mvkAreAllFlagsEnabled(imgFlags, VK_IMAGE_ASPECT_STENCIL_BIT); |
| |
| // The stencil component is always 1 byte per pixel. |
| // Don't reduce depths of 32-bit depth/stencil formats. |
| if (wantDepth && !wantStencil) { |
| if (pixFmts->getBytesPerTexel(mtlPixFmt) != 4) { |
| bytesPerRow -= buffImgWd; |
| bytesPerImg -= buffImgWd * buffImgHt; |
| } |
| blitOptions |= MTLBlitOptionDepthFromDepthStencil; |
| } else if (wantStencil && !wantDepth) { |
| bytesPerRow = buffImgWd; |
| bytesPerImg = buffImgWd * buffImgHt; |
| blitOptions |= MTLBlitOptionStencilFromDepthStencil; |
| } |
| } |
| |
| #if MVK_APPLE_SILICON |
| if (pixFmts->isPVRTCFormat(mtlPixFmt)) { |
| blitOptions |= MTLBlitOptionRowLinearPVRTC; |
| } |
| #endif |
| |
| #if MVK_MACOS |
| // If we're copying to a compressed 3D image, the image data need to be decompressed. |
| // If we're copying to mip level 0, we can skip the copy and just decode |
| // directly into the image. Otherwise, we need to use an intermediate buffer. |
| if (_toImage && _image->getIsCompressed() && mtlTexture.textureType == MTLTextureType3D && |
| !cmdEncoder->getDevice()->_pMetalFeatures->native3DCompressedTextures) { |
| |
| MVKCmdCopyBufferToImageInfo info; |
| info.srcRowStride = bytesPerRow & 0xffffffff; |
| info.srcRowStrideHigh = bytesPerRow >> 32; |
| info.srcDepthStride = bytesPerImg & 0xffffffff; |
| info.srcDepthStrideHigh = bytesPerImg >> 32; |
| info.destRowStride = info.destRowStrideHigh = 0; |
| info.destDepthStride = info.destDepthStrideHigh = 0; |
| info.format = _image->getVkFormat(); |
| info.offset = cpyRgn.imageOffset; |
| info.extent = cpyRgn.imageExtent; |
| bool needsTempBuff = mipLevel != 0; |
| id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(cmdUse); |
| id<MTLComputePipelineState> mtlComputeState = cmdEncoder->getCommandEncodingPool()->getCmdCopyBufferToImage3DDecompressMTLComputePipelineState(needsTempBuff); |
| [mtlComputeEnc pushDebugGroup: @"vkCmdCopyBufferToImage"]; |
| [mtlComputeEnc setComputePipelineState: mtlComputeState]; |
| [mtlComputeEnc setBuffer: mtlBuffer offset: mtlBuffOffset atIndex: 0]; |
| MVKBuffer* tempBuff; |
| if (needsTempBuff) { |
| NSUInteger bytesPerDestRow = pixFmts->getBytesPerRow(mtlTexture.pixelFormat, info.extent.width); |
| NSUInteger bytesPerDestImg = pixFmts->getBytesPerLayer(mtlTexture.pixelFormat, bytesPerDestRow, info.extent.height); |
| // We're going to copy from the temporary buffer now, so use the |
| // temp buffer parameters in the copy below. |
| bytesPerRow = bytesPerDestRow; |
| bytesPerImg = bytesPerDestImg; |
| MVKBufferDescriptorData tempBuffData; |
| tempBuffData.size = bytesPerDestImg * mtlTxtSize.depth; |
| tempBuffData.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; |
| tempBuff = cmdEncoder->getCommandEncodingPool()->getTransferMVKBuffer(tempBuffData); |
| mtlBuffer = tempBuff->getMTLBuffer(); |
| mtlBuffOffset = tempBuff->getMTLBufferOffset(); |
| info.destRowStride = bytesPerDestRow & 0xffffffff; |
| info.destRowStrideHigh = bytesPerDestRow >> 32; |
| info.destDepthStride = bytesPerDestImg & 0xffffffff; |
| info.destDepthStrideHigh = bytesPerDestImg >> 32; |
| [mtlComputeEnc setBuffer: mtlBuffer offset: mtlBuffOffset atIndex: 1]; |
| } else { |
| [mtlComputeEnc setTexture: mtlTexture atIndex: 0]; |
| } |
| cmdEncoder->setComputeBytes(mtlComputeEnc, &info, sizeof(info), 2); |
| |
| // Now work out how big to make the grid, and from there, the size and number of threadgroups. |
| // One thread is run per block. Each block decompresses to an m x n array of texels. |
| // So the size of the grid is (ceil(width/m), ceil(height/n), depth). |
| VkExtent2D blockExtent = pixFmts->getBlockTexelSize(mtlPixFmt); |
| MTLSize mtlGridSize = MTLSizeMake(mvkCeilingDivide<NSUInteger>(mtlTxtSize.width, blockExtent.width), |
| mvkCeilingDivide<NSUInteger>(mtlTxtSize.height, blockExtent.height), |
| mtlTxtSize.depth); |
| // Use four times the thread execution width as the threadgroup size. |
| MTLSize mtlTgrpSize = MTLSizeMake(2, 2, mtlComputeState.threadExecutionWidth); |
| // Then the number of threadgroups is (ceil(x/2), ceil(y/2), ceil(z/t)), |
| // where 't' is the thread execution width. |
| mtlGridSize.width = mvkCeilingDivide(mtlGridSize.width, mtlTgrpSize.width); |
| mtlGridSize.height = mvkCeilingDivide(mtlGridSize.height, mtlTgrpSize.height); |
| mtlGridSize.depth = mvkCeilingDivide(mtlGridSize.depth, mtlTgrpSize.depth); |
| // There may be extra threads, but that's OK; the shader does bounds checking to |
| // ensure it doesn't try to write out of bounds. |
| // Alternatively, we could use the newer -[MTLComputeCommandEncoder dispatchThreads:threadsPerThreadgroup:] method, |
| // but that needs Metal 2.0. |
| [mtlComputeEnc dispatchThreadgroups: mtlGridSize threadsPerThreadgroup: mtlTgrpSize]; |
| [mtlComputeEnc popDebugGroup]; |
| |
| if (!needsTempBuff) { continue; } |
| } |
| #endif |
| |
| // Don't supply bytes per image if not an arrayed texture |
| if ( !isArrayTexture() ) { bytesPerImg = 0; } |
| |
| id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(cmdUse); |
| |
| for (uint32_t lyrIdx = 0; lyrIdx < cpyRgn.imageSubresource.layerCount; lyrIdx++) { |
| if (_toImage) { |
| [mtlBlitEnc copyFromBuffer: mtlBuffer |
| sourceOffset: (mtlBuffOffset + (bytesPerImg * lyrIdx)) |
| sourceBytesPerRow: bytesPerRow |
| sourceBytesPerImage: bytesPerImg |
| sourceSize: mtlTxtSize |
| toTexture: mtlTexture |
| destinationSlice: (cpyRgn.imageSubresource.baseArrayLayer + lyrIdx) |
| destinationLevel: mipLevel |
| destinationOrigin: mtlTxtOrigin |
| options: blitOptions]; |
| } else { |
| [mtlBlitEnc copyFromTexture: mtlTexture |
| sourceSlice: (cpyRgn.imageSubresource.baseArrayLayer + lyrIdx) |
| sourceLevel: mipLevel |
| sourceOrigin: mtlTxtOrigin |
| sourceSize: mtlTxtSize |
| toBuffer: mtlBuffer |
| destinationOffset: (mtlBuffOffset + (bytesPerImg * lyrIdx)) |
| destinationBytesPerRow: bytesPerRow |
| destinationBytesPerImage: bytesPerImg |
| options: blitOptions]; |
| } |
| } |
| } |
| } |
| |
| template <size_t N> |
| bool MVKCmdBufferImageCopy<N>::isArrayTexture() { |
| MTLTextureType mtlTexType = _image->getMTLTextureType(); |
| return (mtlTexType == MTLTextureType3D || |
| mtlTexType == MTLTextureType2DArray || |
| #if MVK_MACOS_OR_IOS |
| mtlTexType == MTLTextureType2DMultisampleArray || |
| #endif |
| mtlTexType == MTLTextureType1DArray); |
| } |
| |
| template class MVKCmdBufferImageCopy<1>; |
| template class MVKCmdBufferImageCopy<4>; // To support MVKCmdCopyImage |
| template class MVKCmdBufferImageCopy<8>; |
| template class MVKCmdBufferImageCopy<16>; |
| |
| |
| #pragma mark - |
| #pragma mark MVKCmdClearAttachments |
| |
| template <size_t N> |
| VkResult MVKCmdClearAttachments<N>::setContent(MVKCommandBuffer* cmdBuff, |
| uint32_t attachmentCount, |
| const VkClearAttachment* pAttachments, |
| uint32_t rectCount, |
| const VkClearRect* pRects) { |
| _rpsKey.reset(); |
| _mtlDepthVal = 0.0; |
| _mtlStencilValue = 0; |
| _isClearingDepth = false; |
| _isClearingStencil = false; |
| MVKPixelFormats* pixFmts = cmdBuff->getPixelFormats(); |
| |
| // For each attachment to be cleared, mark it so in the render pipeline state |
| // attachment key, and populate the clear color value into a uniform array. |
| // Also set the depth and stencil clear value to the last clear attachment that specifies them. |
| for (uint32_t i = 0; i < attachmentCount; i++) { |
| auto& clrAtt = pAttachments[i]; |
| |
| if (mvkIsAnyFlagEnabled(clrAtt.aspectMask, VK_IMAGE_ASPECT_COLOR_BIT)) { |
| uint32_t caIdx = clrAtt.colorAttachment; // Might be VK_ATTACHMENT_UNUSED |
| if (caIdx != VK_ATTACHMENT_UNUSED) { |
| _rpsKey.enableAttachment(caIdx); |
| setClearValue(caIdx, clrAtt.clearValue); |
| } |
| } |
| |
| if (mvkIsAnyFlagEnabled(clrAtt.aspectMask, VK_IMAGE_ASPECT_DEPTH_BIT)) { |
| _isClearingDepth = true; |
| _rpsKey.enableAttachment(kMVKClearAttachmentDepthStencilIndex); |
| _mtlDepthVal = pixFmts->getMTLClearDepthValue(clrAtt.clearValue); |
| } |
| |
| if (mvkIsAnyFlagEnabled(clrAtt.aspectMask, VK_IMAGE_ASPECT_STENCIL_BIT)) { |
| _isClearingStencil = true; |
| _rpsKey.enableAttachment(kMVKClearAttachmentDepthStencilIndex); |
| _mtlStencilValue = pixFmts->getMTLClearStencilValue(clrAtt.clearValue); |
| } |
| } |
| |
| _clearRects.clear(); // Clear for reuse |
| _clearRects.reserve(rectCount); |
| for (uint32_t i = 0; i < rectCount; i++) { |
| _clearRects.push_back(pRects[i]); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| // Returns the total number of vertices needed to clear all layers of all rectangles. |
| template <size_t N> |
| uint32_t MVKCmdClearAttachments<N>::getVertexCount(MVKCommandEncoder* cmdEncoder) { |
| uint32_t vtxCnt = 0; |
| if (cmdEncoder->getSubpass()->isMultiview()) { |
| // In this case, all the layer counts will be one. We want to use the number of views in the current multiview pass. |
| vtxCnt = (uint32_t)_clearRects.size() * cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()) * 6; |
| } else { |
| for (auto& rect : _clearRects) { |
| vtxCnt += 6 * rect.layerCount; |
| } |
| } |
| return vtxCnt; |
| } |
| |
| // Populates the vertices for all clear rectangles within an attachment of the specified size. |
| template <size_t N> |
| void MVKCmdClearAttachments<N>::populateVertices(MVKCommandEncoder* cmdEncoder, simd::float4* vertices, |
| float attWidth, float attHeight) { |
| uint32_t vtxIdx = 0; |
| for (auto& rect : _clearRects) { |
| vtxIdx = populateVertices(cmdEncoder, vertices, vtxIdx, rect, attWidth, attHeight); |
| } |
| } |
| |
| // Populates the vertices, starting at the vertex, from the specified rectangle within |
| // an attachment of the specified size. Returns the next vertex that needs to be populated. |
| template <size_t N> |
| uint32_t MVKCmdClearAttachments<N>::populateVertices(MVKCommandEncoder* cmdEncoder, |
| simd::float4* vertices, |
| uint32_t startVertex, |
| VkClearRect& clearRect, |
| float attWidth, |
| float attHeight) { |
| // Determine the positions of the four edges of the |
| // clear rectangle as a fraction of the attachment size. |
| float leftPos = (float)(clearRect.rect.offset.x) / attWidth; |
| float rightPos = (float)(clearRect.rect.extent.width) / attWidth + leftPos; |
| float bottomPos = (float)(clearRect.rect.offset.y) / attHeight; |
| float topPos = (float)(clearRect.rect.extent.height) / attHeight + bottomPos; |
| |
| // Now transform to clip-space coordinates, |
| // which are bounded by (-1.0 < p < 1.0) in clip-space. |
| leftPos = (leftPos * 2.0) - 1.0; |
| rightPos = (rightPos * 2.0) - 1.0; |
| bottomPos = (bottomPos * 2.0) - 1.0; |
| topPos = (topPos * 2.0) - 1.0; |
| |
| simd::float4 vtx; |
| |
| uint32_t vtxIdx = startVertex; |
| uint32_t startLayer, endLayer; |
| if (cmdEncoder->getSubpass()->isMultiview()) { |
| // In a multiview pass, the baseArrayLayer will be 0 and the layerCount will be 1. |
| // Use the view count instead. We already set the base slice properly in the |
| // MTLRenderPassDescriptor, so we don't need to offset the starting layer. |
| startLayer = 0; |
| endLayer = cmdEncoder->getSubpass()->getViewCountInMetalPass(cmdEncoder->getMultiviewPassIndex()); |
| } else { |
| startLayer = clearRect.baseArrayLayer; |
| endLayer = startLayer + clearRect.layerCount; |
| } |
| for (uint32_t layer = startLayer; layer < endLayer; layer++) { |
| |
| vtx.z = 0.0; |
| vtx.w = layer; |
| |
| // Top left vertex - First triangle |
| vtx.y = topPos; |
| vtx.x = leftPos; |
| vertices[vtxIdx++] = vtx; |
| |
| // Bottom left vertex |
| vtx.y = bottomPos; |
| vtx.x = leftPos; |
| vertices[vtxIdx++] = vtx; |
| |
| // Bottom right vertex |
| vtx.y = bottomPos; |
| vtx.x = rightPos; |
| vertices[vtxIdx++] = vtx; |
| |
| // Bottom right vertex - Second triangle |
| vertices[vtxIdx++] = vtx; |
| |
| // Top right vertex |
| vtx.y = topPos; |
| vtx.x = rightPos; |
| vertices[vtxIdx++] = vtx; |
| |
| // Top left vertex |
| vtx.y = topPos; |
| vtx.x = leftPos; |
| vertices[vtxIdx++] = vtx; |
| } |
| |
| return vtxIdx; |
| } |
| |
| template <size_t N> |
| void MVKCmdClearAttachments<N>::encode(MVKCommandEncoder* cmdEncoder) { |
| |
| uint32_t vtxCnt = getVertexCount(cmdEncoder); |
| simd::float4 vertices[vtxCnt]; |
| simd::float4 clearColors[kMVKClearAttachmentCount]; |
| |
| VkExtent2D fbExtent = cmdEncoder->getFramebufferExtent(); |
| #if MVK_MACOS_OR_IOS |
| // I need to know if the 'renderTargetWidth' and 'renderTargetHeight' properties |
| // actually do something, but [MTLRenderPassDescriptor instancesRespondToSelector: @selector(renderTargetWidth)] |
| // returns NO even on systems that do support it. So we have to check an actual instance. |
| MTLRenderPassDescriptor* tempRPDesc = [MTLRenderPassDescriptor new]; // temp retain |
| if ([tempRPDesc respondsToSelector: @selector(renderTargetWidth)]) { |
| VkRect2D renderArea = cmdEncoder->clipToRenderArea({{0, 0}, fbExtent}); |
| fbExtent = {renderArea.offset.x + renderArea.extent.width, renderArea.offset.y + renderArea.extent.height}; |
| } |
| [tempRPDesc release]; // temp release |
| #endif |
| populateVertices(cmdEncoder, vertices, fbExtent.width, fbExtent.height); |
| |
| MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats(); |
| MVKRenderSubpass* subpass = cmdEncoder->getSubpass(); |
| uint32_t vtxBuffIdx = cmdEncoder->getDevice()->getMetalBufferIndexForVertexAttributeBinding(kMVKVertexContentBufferIndex); |
| |
| // Populate the render pipeline state attachment key with info from the subpass and framebuffer. |
| _rpsKey.mtlSampleCount = mvkSampleCountFromVkSampleCountFlagBits(subpass->getSampleCount()); |
| if (cmdEncoder->_canUseLayeredRendering && |
| (cmdEncoder->getFramebufferLayerCount() > 1 || cmdEncoder->getSubpass()->isMultiview())) { |
| _rpsKey.enableLayeredRendering(); |
| } |
| |
| uint32_t caCnt = subpass->getColorAttachmentCount(); |
| for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) { |
| if (!subpass->isColorAttachmentUsed(caIdx)) { |
| // If the subpass attachment isn't actually used, don't try to clear it. |
| _rpsKey.disableAttachment(caIdx); |
| continue; |
| } |
| VkFormat vkAttFmt = subpass->getColorAttachmentFormat(caIdx); |
| _rpsKey.attachmentMTLPixelFormats[caIdx] = pixFmts->getMTLPixelFormat(vkAttFmt); |
| MTLClearColor mtlCC = pixFmts->getMTLClearColor(getClearValue(caIdx), vkAttFmt); |
| clearColors[caIdx] = { (float)mtlCC.red, (float)mtlCC.green, (float)mtlCC.blue, (float)mtlCC.alpha}; |
| } |
| |
| // The depth value (including vertex position Z value) is held in the last index. |
| clearColors[kMVKClearAttachmentDepthStencilIndex] = { _mtlDepthVal, _mtlDepthVal, _mtlDepthVal, _mtlDepthVal }; |
| |
| VkFormat vkAttFmt = subpass->getDepthStencilFormat(); |
| MTLPixelFormat mtlAttFmt = pixFmts->getMTLPixelFormat(vkAttFmt); |
| _rpsKey.attachmentMTLPixelFormats[kMVKClearAttachmentDepthStencilIndex] = mtlAttFmt; |
| |
| bool isClearingDepth = _isClearingDepth && pixFmts->isDepthFormat(mtlAttFmt); |
| bool isClearingStencil = _isClearingStencil && pixFmts->isStencilFormat(mtlAttFmt); |
| if (!isClearingDepth && !isClearingStencil) { |
| // If the subpass attachment isn't actually used, don't try to clear it. |
| _rpsKey.disableAttachment(kMVKClearAttachmentDepthStencilIndex); |
| } |
| |
| if (!_rpsKey.isAnyAttachmentEnabled()) { |
| // Nothing to do. |
| return; |
| } |
| |
| // Render the clear colors to the attachments |
| MVKCommandEncodingPool* cmdEncPool = cmdEncoder->getCommandEncodingPool(); |
| id<MTLRenderCommandEncoder> mtlRendEnc = cmdEncoder->_mtlRenderEncoder; |
| [mtlRendEnc pushDebugGroup: @"vkCmdClearAttachments"]; |
| [mtlRendEnc setRenderPipelineState: cmdEncPool->getCmdClearMTLRenderPipelineState(_rpsKey)]; |
| [mtlRendEnc setDepthStencilState: cmdEncPool->getMTLDepthStencilState(isClearingDepth, isClearingStencil)]; |
| [mtlRendEnc setStencilReferenceValue: _mtlStencilValue]; |
| [mtlRendEnc setCullMode: MTLCullModeNone]; |
| [mtlRendEnc setTriangleFillMode: MTLTriangleFillModeFill]; |
| [mtlRendEnc setDepthBias: 0 slopeScale: 0 clamp: 0]; |
| [mtlRendEnc setViewport: {0, 0, (double) fbExtent.width, (double) fbExtent.height, 0.0, 1.0}]; |
| [mtlRendEnc setScissorRect: {0, 0, fbExtent.width, fbExtent.height}]; |
| |
| cmdEncoder->setVertexBytes(mtlRendEnc, clearColors, sizeof(clearColors), 0); |
| cmdEncoder->setFragmentBytes(mtlRendEnc, clearColors, sizeof(clearColors), 0); |
| cmdEncoder->setVertexBytes(mtlRendEnc, vertices, vtxCnt * sizeof(vertices[0]), vtxBuffIdx); |
| [mtlRendEnc drawPrimitives: MTLPrimitiveTypeTriangle vertexStart: 0 vertexCount: vtxCnt]; |
| [mtlRendEnc popDebugGroup]; |
| |
| // Apple GPUs do not support rendering/writing to an attachment and then reading from |
| // that attachment within a single Metal renderpass. So, if any of the attachments just |
| // cleared is an input attachment, we need to restart into separate Metal renderpasses. |
| if (cmdEncoder->getDevice()->_pMetalFeatures->tileBasedDeferredRendering) { |
| bool needsRenderpassRestart = false; |
| for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) { |
| if (_rpsKey.isAttachmentEnabled(caIdx) && subpass->isColorAttachmentAlsoInputAttachment(caIdx)) { |
| needsRenderpassRestart = true; |
| break; |
| } |
| } |
| if (needsRenderpassRestart) { |
| cmdEncoder->encodeStoreActions(true); |
| cmdEncoder->beginMetalRenderPass(kMVKCommandUseRestartSubpass); |
| } |
| } |
| |
| // Return to the previous rendering state on the next render activity |
| cmdEncoder->_graphicsPipelineState.markDirty(); |
| cmdEncoder->_depthStencilState.markDirty(); |
| cmdEncoder->_stencilReferenceValueState.markDirty(); |
| cmdEncoder->_depthBiasState.markDirty(); |
| cmdEncoder->_viewportState.markDirty(); |
| cmdEncoder->_scissorState.markDirty(); |
| cmdEncoder->_graphicsResourcesState.beginMetalRenderPass(); |
| } |
| |
| template class MVKCmdClearAttachments<1>; |
| template class MVKCmdClearAttachments<4>; |
| |
| template class MVKCmdClearSingleAttachment<1>; |
| template class MVKCmdClearSingleAttachment<4>; |
| |
| template class MVKCmdClearMultiAttachments<1>; |
| template class MVKCmdClearMultiAttachments<4>; |
| |
| |
| #pragma mark - |
| #pragma mark MVKCmdClearImage |
| |
| template <size_t N> |
| VkResult MVKCmdClearImage<N>::setContent(MVKCommandBuffer* cmdBuff, |
| VkImage image, |
| VkImageLayout imageLayout, |
| const VkClearValue& clearValue, |
| uint32_t rangeCount, |
| const VkImageSubresourceRange* pRanges) { |
| _image = (MVKImage*)image; |
| _clearValue = clearValue; |
| |
| // Add subresource ranges |
| _subresourceRanges.clear(); // Clear for reuse |
| _subresourceRanges.reserve(rangeCount); |
| bool isDS = isDepthStencilClear(); |
| for (uint32_t rangeIdx = 0; rangeIdx < rangeCount; rangeIdx++) { |
| auto& vkIR = pRanges[rangeIdx]; |
| uint8_t planeIndex = MVKImage::getPlaneFromVkImageAspectFlags(vkIR.aspectMask); |
| |
| // Validate |
| MVKMTLFmtCaps mtlFmtCaps = cmdBuff->getPixelFormats()->getCapabilities(_image->getMTLPixelFormat(planeIndex)); |
| bool isDestUnwritableLinear = MVK_MACOS && !cmdBuff->getDevice()->_pMetalFeatures->renderLinearTextures && _image->getIsLinear(); |
| uint32_t reqCap = isDS ? kMVKMTLFmtCapsDSAtt : (isDestUnwritableLinear ? kMVKMTLFmtCapsWrite : kMVKMTLFmtCapsColorAtt); |
| if (!mvkAreAllFlagsEnabled(mtlFmtCaps, reqCap)) { |
| return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdClear%sImage(): Format %s cannot be cleared on this device.", (isDS ? "DepthStencil" : "Color"), cmdBuff->getPixelFormats()->getName(_image->getVkFormat())); |
| } |
| |
| _subresourceRanges.push_back(vkIR); |
| } |
| |
| // Validate |
| if (_image->getImageType() == VK_IMAGE_TYPE_1D) { |
| return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdClear%sImage(): Native 1D images cannot be cleared on this device. Consider enabling MVK_CONFIG_TEXTURE_1D_AS_2D.", (isDS ? "DepthStencil" : "Color")); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| template <size_t N> |
| void MVKCmdClearImage<N>::encode(MVKCommandEncoder* cmdEncoder) { |
| bool isDS = isDepthStencilClear(); |
| NSString* mtlRendEncName = (isDS |
| ? mvkMTLRenderCommandEncoderLabel(kMVKCommandUseClearDepthStencilImage) |
| : mvkMTLRenderCommandEncoderLabel(kMVKCommandUseClearColorImage)); |
| |
| cmdEncoder->endCurrentMetalEncoding(); |
| |
| MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats(); |
| for (auto& srRange : _subresourceRanges) { |
| uint8_t planeIndex = MVKImage::getPlaneFromVkImageAspectFlags(srRange.aspectMask); |
| id<MTLTexture> imgMTLTex = _image->getMTLTexture(planeIndex); |
| if ( !imgMTLTex ) { continue; } |
| |
| #if MVK_MACOS |
| if ( _image->getIsLinear() && !cmdEncoder->getDevice()->_pMetalFeatures->renderLinearTextures ) { |
| // These images cannot be rendered. Instead, use a compute shader. |
| // Luckily for us, linear images only have one mip and one array layer under Metal. |
| assert( !isDS ); |
| id<MTLComputePipelineState> mtlClearState = cmdEncoder->getCommandEncodingPool()->getCmdClearColorImageMTLComputePipelineState(pixFmts->getFormatType(_image->getVkFormat())); |
| id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseClearColorImage); |
| [mtlComputeEnc pushDebugGroup: @"vkCmdClearColorImage"]; |
| [mtlComputeEnc setComputePipelineState: mtlClearState]; |
| [mtlComputeEnc setTexture: imgMTLTex atIndex: 0]; |
| cmdEncoder->setComputeBytes(mtlComputeEnc, &_clearValue, sizeof(_clearValue), 0); |
| MTLSize gridSize = mvkMTLSizeFromVkExtent3D(_image->getExtent3D()); |
| MTLSize tgSize = MTLSizeMake(mtlClearState.threadExecutionWidth, 1, 1); |
| if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) { |
| [mtlComputeEnc dispatchThreads: gridSize threadsPerThreadgroup: tgSize]; |
| } else { |
| MTLSize tgCount = MTLSizeMake(gridSize.width / tgSize.width, gridSize.height, gridSize.depth); |
| if (gridSize.width % tgSize.width) { tgCount.width += 1; } |
| [mtlComputeEnc dispatchThreadgroups: tgCount threadsPerThreadgroup: tgSize]; |
| } |
| [mtlComputeEnc popDebugGroup]; |
| continue; |
| } |
| #endif |
| |
| MTLRenderPassDescriptor* mtlRPDesc = [MTLRenderPassDescriptor renderPassDescriptor]; |
| MTLRenderPassColorAttachmentDescriptor* mtlRPCADesc = nil; |
| MTLRenderPassDepthAttachmentDescriptor* mtlRPDADesc = nil; |
| MTLRenderPassStencilAttachmentDescriptor* mtlRPSADesc = nil; |
| |
| bool isClearingColor = !isDS && mvkIsAnyFlagEnabled(srRange.aspectMask, VK_IMAGE_ASPECT_COLOR_BIT); |
| bool isClearingDepth = isDS && mvkIsAnyFlagEnabled(srRange.aspectMask, VK_IMAGE_ASPECT_DEPTH_BIT); |
| bool isClearingStencil = isDS && mvkIsAnyFlagEnabled(srRange.aspectMask, VK_IMAGE_ASPECT_STENCIL_BIT); |
| |
| if (isClearingColor) { |
| mtlRPCADesc = mtlRPDesc.colorAttachments[0]; |
| mtlRPCADesc.texture = imgMTLTex; |
| mtlRPCADesc.loadAction = MTLLoadActionClear; |
| mtlRPCADesc.storeAction = MTLStoreActionStore; |
| mtlRPCADesc.clearColor = pixFmts->getMTLClearColor(_clearValue, _image->getVkFormat()); |
| } |
| |
| if (isClearingDepth) { |
| mtlRPDADesc = mtlRPDesc.depthAttachment; |
| mtlRPDADesc.texture = imgMTLTex; |
| mtlRPDADesc.loadAction = MTLLoadActionClear; |
| mtlRPDADesc.storeAction = MTLStoreActionStore; |
| mtlRPDADesc.clearDepth = pixFmts->getMTLClearDepthValue(_clearValue); |
| } |
| |
| if (isClearingStencil) { |
| mtlRPSADesc = mtlRPDesc.stencilAttachment; |
| mtlRPSADesc.texture = imgMTLTex; |
| mtlRPSADesc.loadAction = MTLLoadActionClear; |
| mtlRPSADesc.storeAction = MTLStoreActionStore; |
| mtlRPSADesc.clearStencil = pixFmts->getMTLClearStencilValue(_clearValue); |
| } |
| |
| // Extract the mipmap levels that are to be updated |
| uint32_t mipLvlStart = srRange.baseMipLevel; |
| uint32_t mipLvlCnt = srRange.levelCount; |
| uint32_t mipLvlEnd = (mipLvlCnt == VK_REMAINING_MIP_LEVELS |
| ? _image->getMipLevelCount() |
| : (mipLvlStart + mipLvlCnt)); |
| |
| // Extract the cube or array layers (slices) that are to be updated |
| bool is3D = _image->getMTLTextureType() == MTLTextureType3D; |
| uint32_t layerStart = is3D ? 0 : srRange.baseArrayLayer; |
| uint32_t layerCnt = srRange.layerCount; |
| uint32_t layerEnd = (layerCnt == VK_REMAINING_ARRAY_LAYERS |
| ? _image->getLayerCount() |
| : (layerStart + layerCnt)); |
| |
| // Iterate across mipmap levels and layers, and perform and empty render to clear each |
| for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) { |
| mtlRPCADesc.level = mipLvl; |
| mtlRPDADesc.level = mipLvl; |
| mtlRPSADesc.level = mipLvl; |
| |
| // If a 3D image, we need to get the depth for each level. |
| if (is3D) { |
| layerCnt = _image->getExtent3D(planeIndex, mipLvl).depth; |
| layerEnd = layerStart + layerCnt; |
| } |
| |
| // If we can do layered rendering, I can clear all the layers at once. |
| if (cmdEncoder->getDevice()->_pMetalFeatures->layeredRendering && |
| (_image->getSampleCount() == VK_SAMPLE_COUNT_1_BIT || cmdEncoder->getDevice()->_pMetalFeatures->multisampleLayeredRendering)) { |
| if (is3D) { |
| mtlRPCADesc.depthPlane = layerStart; |
| mtlRPDADesc.depthPlane = layerStart; |
| mtlRPSADesc.depthPlane = layerStart; |
| } else { |
| mtlRPCADesc.slice = layerStart; |
| mtlRPDADesc.slice = layerStart; |
| mtlRPSADesc.slice = layerStart; |
| } |
| mtlRPDesc.renderTargetArrayLengthMVK = (layerCnt == VK_REMAINING_ARRAY_LAYERS |
| ? (_image->getLayerCount() - layerStart) |
| : layerCnt); |
| |
| id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc]; |
| setLabelIfNotNil(mtlRendEnc, mtlRendEncName); |
| [mtlRendEnc endEncoding]; |
| } else { |
| for (uint32_t layer = layerStart; layer < layerEnd; layer++) { |
| if (is3D) { |
| mtlRPCADesc.depthPlane = layer; |
| mtlRPDADesc.depthPlane = layer; |
| mtlRPSADesc.depthPlane = layer; |
| } else { |
| mtlRPCADesc.slice = layer; |
| mtlRPDADesc.slice = layer; |
| mtlRPSADesc.slice = layer; |
| } |
| |
| id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc]; |
| setLabelIfNotNil(mtlRendEnc, mtlRendEncName); |
| [mtlRendEnc endEncoding]; |
| } |
| } |
| } |
| } |
| } |
| |
| template class MVKCmdClearImage<1>; |
| template class MVKCmdClearImage<4>; |
| |
| template class MVKCmdClearColorImage<1>; |
| template class MVKCmdClearColorImage<4>; |
| |
| template class MVKCmdClearDepthStencilImage<1>; |
| template class MVKCmdClearDepthStencilImage<4>; |
| |
| |
| #pragma mark - |
| #pragma mark MVKCmdFillBuffer |
| |
| VkResult MVKCmdFillBuffer::setContent(MVKCommandBuffer* cmdBuff, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize size, |
| uint32_t data) { |
| _dstBuffer = (MVKBuffer*)dstBuffer; |
| _dstOffset = dstOffset; |
| _dataValue = data; |
| |
| // Round down in case of VK_WHOLE_SIZE on a buffer size which is not aligned to 4 bytes. |
| VkDeviceSize byteCnt = (size == VK_WHOLE_SIZE) ? (_dstBuffer->getByteCount() - _dstOffset) : size; |
| VkDeviceSize wdCnt = byteCnt >> 2; |
| if (mvkFits<uint32_t>(wdCnt)) { |
| _wordCount = (uint32_t)wdCnt; |
| } else { |
| _wordCount = std::numeric_limits<uint32_t>::max(); |
| return cmdBuff->reportError(VK_ERROR_FEATURE_NOT_PRESENT, "vkCmdFillBuffer(): Buffer fill size must fit into a 32-bit unsigned integer. Fill size %llu is too large.", wdCnt); |
| } |
| |
| return VK_SUCCESS; |
| } |
| |
| void MVKCmdFillBuffer::encode(MVKCommandEncoder* cmdEncoder) { |
| if (_wordCount == 0) { return; } |
| |
| id<MTLBuffer> dstMTLBuff = _dstBuffer->getMTLBuffer(); |
| NSUInteger dstMTLBuffOffset = _dstBuffer->getMTLBufferOffset() + _dstOffset; |
| |
| // Determine the number of full threadgroups we can dispatch to cover the buffer content efficiently. |
| // Some GPU's report different values for max threadgroup width between the pipeline state and device, |
| // so conservatively use the minimum of these two reported values. |
| id<MTLComputePipelineState> cps = cmdEncoder->getCommandEncodingPool()->getCmdFillBufferMTLComputePipelineState(); |
| NSUInteger tgWidth = std::min(cps.maxTotalThreadsPerThreadgroup, cmdEncoder->getMTLDevice().maxThreadsPerThreadgroup.width); |
| NSUInteger tgCount = _wordCount / tgWidth; |
| |
| id<MTLComputeCommandEncoder> mtlComputeEnc = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseFillBuffer); |
| [mtlComputeEnc pushDebugGroup: @"vkCmdFillBuffer"]; |
| [mtlComputeEnc setComputePipelineState: cps]; |
| [mtlComputeEnc setBytes: &_dataValue length: sizeof(_dataValue) atIndex: 1]; |
| [mtlComputeEnc setBuffer: dstMTLBuff offset: dstMTLBuffOffset atIndex: 0]; |
| |
| // Run as many full threadgroups as will fit into the buffer content. |
| if (tgCount > 0) { |
| [mtlComputeEnc dispatchThreadgroups: MTLSizeMake(tgCount, 1, 1) |
| threadsPerThreadgroup: MTLSizeMake(tgWidth, 1, 1)]; |
| } |
| |
| // If there is left-over buffer content after running full threadgroups, or if the buffer content |
| // fits within a single threadgroup, run a single partial threadgroup of the appropriate size. |
| uint32_t remainderWordCount = _wordCount % tgWidth; |
| if (remainderWordCount > 0) { |
| if (tgCount > 0) { // If we've already written full threadgroups, skip ahead to unwritten content |
| dstMTLBuffOffset += tgCount * tgWidth * sizeof(_dataValue); |
| [mtlComputeEnc setBufferOffset: dstMTLBuffOffset atIndex: 0]; |
| } |
| [mtlComputeEnc dispatchThreadgroups: MTLSizeMake(1, 1, 1) |
| threadsPerThreadgroup: MTLSizeMake(remainderWordCount, 1, 1)]; |
| } |
| |
| [mtlComputeEnc popDebugGroup]; |
| } |
| |
| |
| #pragma mark - |
| #pragma mark MVKCmdUpdateBuffer |
| |
| VkResult MVKCmdUpdateBuffer::setContent(MVKCommandBuffer* cmdBuff, |
| VkBuffer dstBuffer, |
| VkDeviceSize dstOffset, |
| VkDeviceSize dataSize, |
| const void* pData) { |
| _dstBuffer = (MVKBuffer*)dstBuffer; |
| _dstOffset = dstOffset; |
| _dataSize = dataSize; |
| |
| _srcDataCache.reserve(_dataSize); |
| memcpy(_srcDataCache.data(), pData, _dataSize); |
| |
| return VK_SUCCESS; |
| } |
| |
| void MVKCmdUpdateBuffer::encode(MVKCommandEncoder* cmdEncoder) { |
| |
| id<MTLBlitCommandEncoder> mtlBlitEnc = cmdEncoder->getMTLBlitEncoder(kMVKCommandUseUpdateBuffer); |
| |
| id<MTLBuffer> dstMTLBuff = _dstBuffer->getMTLBuffer(); |
| NSUInteger dstMTLBuffOffset = _dstBuffer->getMTLBufferOffset() + _dstOffset; |
| |
| // Copy data to the source MTLBuffer |
| MVKMTLBufferAllocation* srcMTLBufferAlloc = cmdEncoder->getCommandEncodingPool()->acquireMTLBufferAllocation(_dataSize); |
| void* pBuffData = srcMTLBufferAlloc->getContents(); |
| memcpy(pBuffData, _srcDataCache.data(), _dataSize); |
| |
| [mtlBlitEnc copyFromBuffer: srcMTLBufferAlloc->_mtlBuffer |
| sourceOffset: srcMTLBufferAlloc->_offset |
| toBuffer: dstMTLBuff |
| destinationOffset: dstMTLBuffOffset |
| size: _dataSize]; |
| |
| // Return the MTLBuffer allocation to the pool once the command buffer is done with it |
| [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id<MTLCommandBuffer> mcb) { |
| srcMTLBufferAlloc->returnToPool(); |
| }]; |
| } |
| |