blob: 40c5906746dcad6123c408c9902a8e7963ba284a [file] [log] [blame]
/*
* Copyright 2025 Rive
*/
#include <optional>
#include <sstream>
#include <string>
#include <vulkan/vulkan.h>
#include "rive/renderer/vulkan/render_context_vulkan_impl.hpp"
#include "rive/renderer/stack_vector.hpp"
#include "shaders/constants.glsl"
#include "common_layouts.hpp"
#include "draw_pipeline_layout_vulkan.hpp"
#include "pipeline_manager_vulkan.hpp"
#include "render_pass_vulkan.hpp"
namespace rive::gpu
{
constexpr static VkAttachmentLoadOp vk_load_op(gpu::LoadAction loadAction,
gpu::InterlockMode interlockMode)
{
switch (loadAction)
{
case gpu::LoadAction::preserveRenderTarget:
return (interlockMode == gpu::InterlockMode::msaa)
// In MSAA we need to implement the loadOp with a manual
// draw instead, since the MSAA attachment is transient
// and its color is seeded from the actual render target.
? VK_ATTACHMENT_LOAD_OP_DONT_CARE
: VK_ATTACHMENT_LOAD_OP_LOAD;
case gpu::LoadAction::clear:
return VK_ATTACHMENT_LOAD_OP_CLEAR;
case gpu::LoadAction::dontCare:
return VK_ATTACHMENT_LOAD_OP_DONT_CARE;
}
RIVE_UNREACHABLE();
}
constexpr static VkFormat LAST_NON_SPARSE_VK_FORMAT =
VK_FORMAT_ASTC_12x12_SRGB_BLOCK;
// The VkFormat values are very sparse after LAST_NON_SPARSE_VK_FORMAT. This
// table converts the sparse formats to a 0-based, tightly-packed index that can
// be used to build a key.
static uint32_t vk_sparse_format_index(VkFormat format)
{
assert(format > LAST_NON_SPARSE_VK_FORMAT);
switch (format)
{
// Turn off clang-format so we can fit our case labels on one line.
// clang-format off
case VK_FORMAT_G8B8G8R8_422_UNORM: return 0;
case VK_FORMAT_B8G8R8G8_422_UNORM: return 1;
case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: return 2;
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: return 3;
case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM: return 4;
case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM: return 5;
case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM: return 6;
case VK_FORMAT_R10X6_UNORM_PACK16: return 7;
case VK_FORMAT_R10X6G10X6_UNORM_2PACK16: return 8;
case VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16: return 9;
case VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16: return 10;
case VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16: return 11;
case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16: return 12;
case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16: return 13;
case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16: return 14;
case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16: return 15;
case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16: return 16;
case VK_FORMAT_R12X4_UNORM_PACK16: return 17;
case VK_FORMAT_R12X4G12X4_UNORM_2PACK16: return 18;
case VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16: return 19;
case VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16: return 20;
case VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16: return 21;
case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16: return 22;
case VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16: return 23;
case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16: return 24;
case VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16: return 25;
case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16: return 26;
case VK_FORMAT_G16B16G16R16_422_UNORM: return 27;
case VK_FORMAT_B16G16R16G16_422_UNORM: return 28;
case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM: return 29;
case VK_FORMAT_G16_B16R16_2PLANE_420_UNORM: return 30;
case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM: return 31;
case VK_FORMAT_G16_B16R16_2PLANE_422_UNORM: return 32;
case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM: return 33;
case VK_FORMAT_G8_B8R8_2PLANE_444_UNORM: return 34;
case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16: return 35;
case VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16: return 36;
case VK_FORMAT_G16_B16R16_2PLANE_444_UNORM: return 37;
case VK_FORMAT_A4R4G4B4_UNORM_PACK16: return 38;
case VK_FORMAT_A4B4G4R4_UNORM_PACK16: return 39;
case VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK: return 40;
case VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK: return 41;
case VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK: return 42;
case VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK: return 43;
case VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK: return 44;
case VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK: return 45;
case VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK: return 46;
case VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK: return 47;
case VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK: return 48;
case VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK: return 49;
case VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK: return 50;
case VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK: return 51;
case VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK: return 52;
case VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK: return 53;
case VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG: return 56;
case VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG: return 57;
case VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG: return 58;
case VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG: return 59;
case VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG: return 60;
case VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG: return 61;
case VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG: return 62;
case VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG: return 63;
#ifndef __APPLE__
// Apple clang++ intentionally prioritizes '/usr/local/include' over any
// search paths provided via -I or -isystem. This means we get the
// locally installed MoltenVk headers instead of the Rive-official
// Vulkan headers when building for Apple.
// The following VkFormats are not defined in MoltenVK's headers.
case VK_FORMAT_A1B5G5R5_UNORM_PACK16: return 54;
case VK_FORMAT_A8_UNORM: return 55;
case VK_FORMAT_R8_BOOL_ARM: return 64;
case VK_FORMAT_R16G16_SFIXED5_NV: return 65;
case VK_FORMAT_R10X6_UINT_PACK16_ARM: return 66;
case VK_FORMAT_R10X6G10X6_UINT_2PACK16_ARM: return 67;
case VK_FORMAT_R10X6G10X6B10X6A10X6_UINT_4PACK16_ARM: return 68;
case VK_FORMAT_R12X4_UINT_PACK16_ARM: return 69;
case VK_FORMAT_R12X4G12X4_UINT_2PACK16_ARM: return 70;
case VK_FORMAT_R12X4G12X4B12X4A12X4_UINT_4PACK16_ARM: return 71;
case VK_FORMAT_R14X2_UINT_PACK16_ARM: return 72;
case VK_FORMAT_R14X2G14X2_UINT_2PACK16_ARM: return 73;
case VK_FORMAT_R14X2G14X2B14X2A14X2_UINT_4PACK16_ARM: return 74;
case VK_FORMAT_R14X2_UNORM_PACK16_ARM: return 75;
case VK_FORMAT_R14X2G14X2_UNORM_2PACK16_ARM: return 76;
case VK_FORMAT_R14X2G14X2B14X2A14X2_UNORM_4PACK16_ARM: return 77;
case VK_FORMAT_G14X2_B14X2R14X2_2PLANE_420_UNORM_3PACK16_ARM: return 78;
case VK_FORMAT_G14X2_B14X2R14X2_2PLANE_422_UNORM_3PACK16_ARM: return 79;
#endif
default: break;
// clang-format on
}
assert(false && "Given sparse VkFormat is not supported");
return (1 << RenderPassVulkan::FORMAT_BIT_COUNT) - 1 -
(LAST_NON_SPARSE_VK_FORMAT + 1);
}
static uint32_t vk_format_key(VkFormat format)
{
if (format <= LAST_NON_SPARSE_VK_FORMAT)
{
// Basic case: Almost all normal formats already fit in 8 bits.
return static_cast<uint32_t>(format);
}
else
{
// Pack the sparse VkFormats into a tighter key.
return vk_sparse_format_index(format) + LAST_NON_SPARSE_VK_FORMAT + 1;
}
}
uint32_t RenderPassVulkan::KeyNoInterlockMode(
RenderPassOptionsVulkan renderPassOptions,
VkFormat renderTargetFormat,
gpu::LoadAction loadAction)
{
// gpu::LoadAction.
assert(static_cast<uint32_t>(loadAction) < 1 << LOAD_OP_BIT_COUNT);
uint32_t key = static_cast<uint32_t>(loadAction);
// VkFormat.
const uint32_t renderFormatKey = vk_format_key(renderTargetFormat);
assert(renderFormatKey < 1 << FORMAT_BIT_COUNT);
assert(key << FORMAT_BIT_COUNT >> FORMAT_BIT_COUNT == key);
key = (key << FORMAT_BIT_COUNT) | renderFormatKey;
// DrawPipelineLayoutVulkan::Options.
assert(static_cast<uint32_t>(renderPassOptions) <
1 << RENDER_PASS_OPTION_COUNT);
assert(key << RENDER_PASS_OPTION_COUNT >> RENDER_PASS_OPTION_COUNT == key);
key = (key << RENDER_PASS_OPTION_COUNT) |
static_cast<uint32_t>(renderPassOptions);
assert(key < 1 << KEY_NO_INTERLOCK_MODE_BIT_COUNT);
return key;
}
uint32_t RenderPassVulkan::Key(gpu::InterlockMode interlockMode,
RenderPassOptionsVulkan renderPassOptions,
VkFormat renderTargetFormat,
gpu::LoadAction loadAction)
{
uint32_t key =
KeyNoInterlockMode(renderPassOptions, renderTargetFormat, loadAction);
// gpu::InterlockMode.
assert(key << gpu::INTERLOCK_MODE_BIT_COUNT >>
gpu::INTERLOCK_MODE_BIT_COUNT ==
key);
assert(static_cast<uint32_t>(interlockMode) <
1 << gpu::INTERLOCK_MODE_BIT_COUNT);
key = (key << gpu::INTERLOCK_MODE_BIT_COUNT) |
static_cast<uint32_t>(interlockMode);
assert(key < 1 << KEY_BIT_COUNT);
return key;
}
RenderPassVulkan::RenderPassVulkan(PipelineManagerVulkan* pipelineManager,
gpu::InterlockMode interlockMode,
RenderPassOptionsVulkan renderPassOptions,
VkFormat renderTargetFormat,
gpu::LoadAction loadAction) :
m_vk(ref_rcp(pipelineManager->vulkanContext()))
{
m_drawPipelineLayout =
&pipelineManager->getDrawPipelineLayoutSynchronous(interlockMode,
renderPassOptions);
// COLOR attachment.
const VkImageLayout colorAttachmentLayout =
(renderPassOptions & RenderPassOptionsVulkan::fixedFunctionColorOutput)
? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
: VK_IMAGE_LAYOUT_GENERAL;
const VkSampleCountFlagBits msaaSampleCount =
(interlockMode == gpu::InterlockMode::msaa) ? VK_SAMPLE_COUNT_4_BIT
: VK_SAMPLE_COUNT_1_BIT;
StackVector<VkAttachmentDescription, layout::MAX_RENDER_PASS_ATTACHMENTS>
attachments;
StackVector<VkAttachmentReference, PLS_PLANE_COUNT> colorAttachmentRefs;
std::optional<VkAttachmentReference> depthStencilAttachmentRef;
std::optional<VkAttachmentReference> resolveAttachmentRef;
if (pipelineManager->plsBackingType(interlockMode) ==
PipelineManagerVulkan::PLSBackingType::inputAttachment ||
(renderPassOptions & RenderPassOptionsVulkan::fixedFunctionColorOutput))
{
assert(attachments.size() == COLOR_PLANE_IDX);
assert(colorAttachmentRefs.size() == COLOR_PLANE_IDX);
attachments.push_back({
.format = renderTargetFormat,
.samples = msaaSampleCount,
.loadOp = vk_load_op(loadAction, interlockMode),
.storeOp = ((renderPassOptions &
(RenderPassOptionsVulkan::manuallyResolved |
RenderPassOptionsVulkan::
atomicCoalescedResolveAndTransfer)) ||
interlockMode == gpu::InterlockMode::msaa)
? VK_ATTACHMENT_STORE_OP_DONT_CARE
: VK_ATTACHMENT_STORE_OP_STORE,
// This could be VK_IMAGE_LAYOUT_UNDEFINED more often, but it would
// invalidate the portion outside the renderArea when it isn't the
// full renderTarget, and currently we don't have separate render
// passes for "full renderTarget bounds" and "partial renderTarget
// bounds". Instead, we rely on
// vkutil::ImageAccessAction::invalidateContents to invalidate the
// color attachment when we can.
.initialLayout =
(((renderPassOptions & RenderPassOptionsVulkan::
atomicCoalescedResolveAndTransfer) &&
loadAction != gpu::LoadAction::preserveRenderTarget) ||
interlockMode == gpu::InterlockMode::msaa)
? VK_IMAGE_LAYOUT_UNDEFINED
: colorAttachmentLayout,
.finalLayout = colorAttachmentLayout,
});
colorAttachmentRefs.push_back({
.attachment = COLOR_PLANE_IDX,
.layout = colorAttachmentLayout,
});
}
if (interlockMode == gpu::InterlockMode::rasterOrdering ||
interlockMode == gpu::InterlockMode::atomics)
{
// CLIP attachment.
assert(attachments.size() == CLIP_PLANE_IDX);
assert(colorAttachmentRefs.size() == CLIP_PLANE_IDX);
attachments.push_back({
// The clip buffer is encoded as RGBA8 in atomic mode so we can
// block writes by emitting alpha=0.
.format = (interlockMode == gpu::InterlockMode::atomics)
? VK_FORMAT_R8G8B8A8_UNORM
: VK_FORMAT_R32_UINT,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = (interlockMode == gpu::InterlockMode::rasterOrdering &&
(renderPassOptions &
RenderPassOptionsVulkan::rasterOrderingResume))
? VK_ATTACHMENT_LOAD_OP_LOAD
: VK_ATTACHMENT_LOAD_OP_CLEAR,
.storeOp = (interlockMode == gpu::InterlockMode::rasterOrdering &&
(renderPassOptions &
RenderPassOptionsVulkan::rasterOrderingInterruptible))
? VK_ATTACHMENT_STORE_OP_STORE
: VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout =
(interlockMode == gpu::InterlockMode::rasterOrdering &&
(renderPassOptions &
RenderPassOptionsVulkan::rasterOrderingResume))
? VK_IMAGE_LAYOUT_GENERAL
: VK_IMAGE_LAYOUT_UNDEFINED,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
});
colorAttachmentRefs.push_back({
.attachment = CLIP_PLANE_IDX,
.layout = VK_IMAGE_LAYOUT_GENERAL,
});
}
if (interlockMode == gpu::InterlockMode::rasterOrdering)
{
// SCRATCH_COLOR attachment.
assert(attachments.size() == SCRATCH_COLOR_PLANE_IDX);
assert(colorAttachmentRefs.size() == SCRATCH_COLOR_PLANE_IDX);
attachments.push_back({
.format = VK_FORMAT_R8G8B8A8_UNORM,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = (renderPassOptions &
RenderPassOptionsVulkan::rasterOrderingResume)
? VK_ATTACHMENT_LOAD_OP_LOAD
: VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.storeOp = (renderPassOptions &
RenderPassOptionsVulkan::rasterOrderingInterruptible)
? VK_ATTACHMENT_STORE_OP_STORE
: VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = (renderPassOptions &
RenderPassOptionsVulkan::rasterOrderingResume)
? VK_IMAGE_LAYOUT_GENERAL
: VK_IMAGE_LAYOUT_UNDEFINED,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
});
colorAttachmentRefs.push_back({
.attachment = SCRATCH_COLOR_PLANE_IDX,
.layout = VK_IMAGE_LAYOUT_GENERAL,
});
// COVERAGE attachment.
assert(attachments.size() == COVERAGE_PLANE_IDX);
assert(colorAttachmentRefs.size() == COVERAGE_PLANE_IDX);
attachments.push_back({
.format = VK_FORMAT_R32_UINT,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = (renderPassOptions &
RenderPassOptionsVulkan::rasterOrderingResume)
? VK_ATTACHMENT_LOAD_OP_LOAD
: VK_ATTACHMENT_LOAD_OP_CLEAR,
.storeOp = (renderPassOptions &
RenderPassOptionsVulkan::rasterOrderingInterruptible)
? VK_ATTACHMENT_STORE_OP_STORE
: VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = (renderPassOptions &
RenderPassOptionsVulkan::rasterOrderingResume)
? VK_IMAGE_LAYOUT_GENERAL
: VK_IMAGE_LAYOUT_UNDEFINED,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
});
colorAttachmentRefs.push_back({
.attachment = COVERAGE_PLANE_IDX,
.layout = VK_IMAGE_LAYOUT_GENERAL,
});
if (renderPassOptions & RenderPassOptionsVulkan::manuallyResolved)
{
// The renderTarget does not support
// VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT, so we will instead use an
// offscreen color texture for the main subpass, and then transfer
// it into the renderTarget at the end of the render pass.
assert(attachments.size() == PLS_PLANE_COUNT);
attachments.push_back({
.format = renderTargetFormat,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
});
resolveAttachmentRef = {
.attachment = PLS_PLANE_COUNT,
.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
};
}
}
else if (interlockMode == gpu::InterlockMode::atomics)
{
if (renderPassOptions &
RenderPassOptionsVulkan::atomicCoalescedResolveAndTransfer)
{
// COALESCED_ATOMIC_RESOLVE attachment (primary render target).
assert(attachments.size() == COALESCED_ATOMIC_RESOLVE_IDX);
attachments.push_back({
.format = renderTargetFormat,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
// This could sometimes be VK_IMAGE_LAYOUT_UNDEFINED, but it
// would invalidate the portion outside the renderArea when it
// isn't the full renderTarget, and currently we don't have
// separate render passes for "full renderTarget bounds" and
// "partial renderTarget bounds". Instead, we rely on
// vkutil::ImageAccessAction::invalidateContents to invalidate
// the atomic resolve attachment when we can.
.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
});
// The resolve subpass only renders to the resolve texture.
// And the "coalesced" resolve shader outputs to color
// attachment 0, so alias the COALESCED_ATOMIC_RESOLVE
// attachment on output 0 for this subpass.
assert(!resolveAttachmentRef.has_value());
resolveAttachmentRef = {
.attachment = COALESCED_ATOMIC_RESOLVE_IDX,
.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
};
}
else
{
// When not in "coalesced" mode, the resolve texture is the
// same as the COLOR texture.
static_assert(COLOR_PLANE_IDX == 0);
assert(!resolveAttachmentRef.has_value());
resolveAttachmentRef = colorAttachmentRefs[0];
}
}
else if (interlockMode == gpu::InterlockMode::msaa)
{
// DEPTH attachment.
assert(attachments.size() == MSAA_DEPTH_STENCIL_IDX);
attachments.push_back({
.format = vkutil::get_preferred_depth_stencil_format(
m_vk->supportsD24S8()),
.samples = msaaSampleCount,
.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
});
depthStencilAttachmentRef = {
.attachment = MSAA_DEPTH_STENCIL_IDX,
.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
};
// MSAA_RESOLVE attachment.
const bool readsMSAAResolveAttachment =
loadAction == gpu::LoadAction::preserveRenderTarget &&
!(renderPassOptions &
RenderPassOptionsVulkan::msaaSeedFromOffscreenTexture);
const VkImageLayout msaaResolveLayout =
readsMSAAResolveAttachment
? VK_IMAGE_LAYOUT_GENERAL
: VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
assert(attachments.size() == MSAA_RESOLVE_IDX);
attachments.push_back({
.format = renderTargetFormat,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = readsMSAAResolveAttachment
? VK_ATTACHMENT_LOAD_OP_LOAD
: VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout =
(readsMSAAResolveAttachment ||
(renderPassOptions &
RenderPassOptionsVulkan::manuallyResolved))
? msaaResolveLayout
// NOTE: This can only be VK_IMAGE_LAYOUT_UNDEFINED because
// Vulkan does not support partial resolves to MSAA resolve
// attachments. So every MSAA render pass without
// "manuallyResolved" covers the entire render area.
: VK_IMAGE_LAYOUT_UNDEFINED,
.finalLayout = msaaResolveLayout,
});
resolveAttachmentRef = {
.attachment = MSAA_RESOLVE_IDX,
.layout = msaaResolveLayout,
};
assert(colorAttachmentRefs.size() == 1);
if (renderPassOptions &
RenderPassOptionsVulkan::msaaSeedFromOffscreenTexture)
{
// MSAA_SEED attachment.
assert(loadAction == gpu::LoadAction::preserveRenderTarget);
assert(attachments.size() == MSAA_COLOR_SEED_IDX);
attachments.push_back({
.format = renderTargetFormat,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
});
}
}
// Input attachments.
StackVector<VkAttachmentReference, PLS_PLANE_COUNT> inputAttachmentRefs;
StackVector<VkAttachmentReference, 1> msaaColorSeedInputAttachmentRef;
if (interlockMode != gpu::InterlockMode::clockwiseAtomic)
{
inputAttachmentRefs.push_back_n(colorAttachmentRefs.size(),
colorAttachmentRefs.data());
if (renderPassOptions &
RenderPassOptionsVulkan::fixedFunctionColorOutput)
{
// COLOR is not an input attachment if we're using fixed function
// blending.
if (inputAttachmentRefs.size() > 1)
{
inputAttachmentRefs[0] = {.attachment = VK_ATTACHMENT_UNUSED};
}
else
{
inputAttachmentRefs.clear();
}
}
if (interlockMode == gpu::InterlockMode::msaa &&
loadAction == gpu::LoadAction::preserveRenderTarget)
{
msaaColorSeedInputAttachmentRef.push_back({
.attachment =
(renderPassOptions &
RenderPassOptionsVulkan::msaaSeedFromOffscreenTexture)
? MSAA_COLOR_SEED_IDX
: MSAA_RESOLVE_IDX,
.layout = VK_IMAGE_LAYOUT_GENERAL,
});
}
}
const bool rasterOrderedAttachmentAccess =
interlockMode == gpu::InterlockMode::rasterOrdering &&
m_vk->features.rasterizationOrderColorAttachmentAccess;
constexpr uint32_t MAX_SUBPASSES = 3;
StackVector<VkSubpassDescription, MAX_SUBPASSES> subpassDescs;
constexpr uint32_t MAX_SUBPASS_DEPS = 9;
StackVector<VkSubpassDependency, MAX_SUBPASS_DEPS> subpassDeps;
// The standard initial external input dependency, to ensure that all
// previous writes to subpass 0's color attachment are completed before this
// render pass starts.
static constexpr VkSubpassDependency EXTERNAL_COLOR_INPUT_DEPENDENCY = {
.srcSubpass = VK_SUBPASS_EXTERNAL,
.dstSubpass = 0,
.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.srcAccessMask = VK_ACCESS_NONE,
.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT,
.dependencyFlags = 0,
};
// Helper to add a typical dependency between the current subpass and the
// next one, which blocks between the color attachment being written in the
// current pass and fragment shader reads from the next.
auto addStandardColorDependencyToNextSubpass =
[&](uint32_t dstSubpassIndex) {
subpassDeps.push_back({
.srcSubpass = dstSubpassIndex - 1,
.dstSubpass = dstSubpassIndex,
.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT,
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
});
};
// MSAA color-load subpass.
if (interlockMode == gpu::InterlockMode::msaa &&
loadAction == gpu::LoadAction::preserveRenderTarget)
{
assert(msaaColorSeedInputAttachmentRef.size() ==
colorAttachmentRefs.size());
assert(subpassDescs.size() == 0);
// The color-load subpass takes the seed texture (which may be the same
// as the resolve texture) and writes it out.
subpassDescs.push_back({
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = msaaColorSeedInputAttachmentRef.size(),
.pInputAttachments = msaaColorSeedInputAttachmentRef.data(),
.colorAttachmentCount = colorAttachmentRefs.size(),
.pColorAttachments = colorAttachmentRefs.data(),
});
// The color-load subpass has a self dependency because it reads the
// result of seed attachment's loadOp when it draws it into the MSAA
// attachment. (loadOps always occur in
// VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT.)
// NOTE: This subpass, per the vulkan synchronization validation,
// should not be necessary, as the external input subpass dependency
// should handle it, but in practice without this extra barrier
// everything fails to render properly on Adreno devices.
subpassDeps.push_back({
.srcSubpass = 0,
.dstSubpass = 0,
.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT,
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
});
// This subpass needs an external dependency on the color stages to
// ensure that all of the color rendering from before this renderpass
// completes.
subpassDeps.push_back(EXTERNAL_COLOR_INPUT_DEPENDENCY);
if (renderPassOptions &
RenderPassOptionsVulkan::msaaSeedFromOffscreenTexture)
{
// If we're seeding from offscreen texture, this pass needs an
// external output dependency to ensure that any future writes
// finish after we're done with it.
subpassDeps.push_back({
.srcSubpass = 0,
.dstSubpass = VK_SUBPASS_EXTERNAL,
.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_NONE,
.dependencyFlags = 0,
});
}
// The next subpass (the main subpass) needs an external dependency on
// the depth buffer (which is not used in this subpass but is used in
// that one).
VkSubpassDependency externalInputDeps = {
.srcSubpass = VK_SUBPASS_EXTERNAL,
.dstSubpass = 1,
.srcStageMask = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
.dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.srcAccessMask = VK_ACCESS_NONE,
.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dependencyFlags = 0,
};
if (!(renderPassOptions & RenderPassOptionsVulkan::manuallyResolved))
{
// If we are not doing the manual MSAA resolve, this pass also needs
// barriers to protect the layout transition of the resolve target
// from the load op (even though it's LOAD_OP_DONT_CARE, it is
// possible that it performs a write), so we also need to specify
// COLOR_ATTACHMENT_WRITE as a destination access flag.
// (If we *were* doing the manual resolve the transition and load
// would happen in that subpass instead of this one)
externalInputDeps.dstStageMask |=
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
externalInputDeps.dstAccessMask |=
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
subpassDeps.push_back(externalInputDeps);
// Finally, the standard color dependency from subpass 0 -> subpass 1
addStandardColorDependencyToNextSubpass(subpassDescs.size());
}
else
{
// Without the extra color-load subpass we need an external dependency
// into the main subpass
auto externalInDep = EXTERNAL_COLOR_INPUT_DEPENDENCY;
if (interlockMode == gpu::InterlockMode::msaa)
{
// for msaa where the main subpass is first, the external dependency
// additionally needs to cover depth/stencil.
externalInDep.srcStageMask |=
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
externalInDep.dstStageMask |=
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
externalInDep.dstAccessMask |=
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
}
subpassDeps.push_back(externalInDep);
}
// Main subpass.
const uint32_t mainSubpassIdx = subpassDescs.size();
assert(colorAttachmentRefs.size() ==
m_drawPipelineLayout->colorAttachmentCount(0, renderPassOptions));
subpassDescs.push_back({
.flags =
rasterOrderedAttachmentAccess
? VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT
: 0u,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = inputAttachmentRefs.size(),
.pInputAttachments = inputAttachmentRefs.data(),
.colorAttachmentCount = colorAttachmentRefs.size(),
.pColorAttachments = colorAttachmentRefs.data(),
.pResolveAttachments =
(interlockMode == gpu::InterlockMode::msaa &&
!(renderPassOptions & RenderPassOptionsVulkan::manuallyResolved))
? &resolveAttachmentRef.value()
: nullptr,
.pDepthStencilAttachment = depthStencilAttachmentRef.has_value()
? &depthStencilAttachmentRef.value()
: nullptr,
});
// Add any main subpass self-dependencies if needed
if ((interlockMode == gpu::InterlockMode::rasterOrdering &&
!rasterOrderedAttachmentAccess) ||
interlockMode == gpu::InterlockMode::atomics ||
(interlockMode == gpu::InterlockMode::msaa &&
!(renderPassOptions &
RenderPassOptionsVulkan::fixedFunctionColorOutput)))
{
// Any subpass that reads the framebuffer or PLS planes has a self
// dependency.
//
// In implicit rasterOrdering mode (meaning
// EXT_rasterization_order_attachment_access is not present, but
// we're on ARM hardware and know the hardware is raster ordered
// anyway), we also need to declare this dependency even though
// we won't be issuing any barriers.
subpassDeps.push_back({
.srcSubpass = mainSubpassIdx,
.dstSubpass = mainSubpassIdx,
.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
// TODO: We should add SHADER_READ/SHADER_WRITE flags for the
// coverage buffer as well, but ironically, adding those seems to
// cause artifacts on Qualcomm. Leave them out for now until we can
// investigate further.
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT,
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
});
}
else if (interlockMode == gpu::InterlockMode::clockwiseAtomic)
{
// clockwiseAtomic mode has a dependency when we switch from
// borrowed coverage into forward.
subpassDeps.push_back({
.srcSubpass = mainSubpassIdx,
.dstSubpass = mainSubpassIdx,
.srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
});
}
if (interlockMode == gpu::InterlockMode::msaa)
{
// Main subpass needs a separate external dependency for depth/stencil
subpassDeps.push_back({
.srcSubpass = subpassDescs.size() - 1,
.dstSubpass = VK_SUBPASS_EXTERNAL,
.srcStageMask = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_NONE,
.dependencyFlags = 0,
});
}
// PLS-resolve subpass (atomic mode only).
if (interlockMode == gpu::InterlockMode::atomics)
{
// Add the dependency from main subpass to the resolve subpass.
addStandardColorDependencyToNextSubpass(subpassDescs.size());
// The resolve happens in a separate subpass.
assert(subpassDescs.size() == 1);
assert(
m_drawPipelineLayout->colorAttachmentCount(1, renderPassOptions) ==
1);
assert(resolveAttachmentRef.has_value());
subpassDescs.push_back({
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = inputAttachmentRefs.size(),
.pInputAttachments = inputAttachmentRefs.data(),
.colorAttachmentCount = 1,
.pColorAttachments = &resolveAttachmentRef.value(),
});
}
else if (renderPassOptions & RenderPassOptionsVulkan::manuallyResolved)
{
assert(!(renderPassOptions &
RenderPassOptionsVulkan::fixedFunctionColorOutput));
// Manually resolved render passes aren't currently compatible with
// interruptions.
assert(!(renderPassOptions &
RenderPassOptionsVulkan::rasterOrderingInterruptible));
assert(inputAttachmentRefs[0].attachment == COLOR_PLANE_IDX);
addStandardColorDependencyToNextSubpass(subpassDescs.size());
subpassDescs.push_back({
.flags =
rasterOrderedAttachmentAccess
? VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT
: 0u,
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 1u,
.pInputAttachments = inputAttachmentRefs.data(),
.colorAttachmentCount = 1u,
.pColorAttachments = &resolveAttachmentRef.value(),
});
}
// There always needs to be a final external output dependency for the final
// rendering target.
subpassDeps.push_back({
.srcSubpass = subpassDescs.size() - 1,
.dstSubpass = VK_SUBPASS_EXTERNAL,
.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_TRANSFER_BIT,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_NONE,
.dependencyFlags = 0,
});
VkRenderPassCreateInfo renderPassCreateInfo = {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.attachmentCount = attachments.size(),
.pAttachments = attachments.data(),
.subpassCount = subpassDescs.size(),
.pSubpasses = subpassDescs.data(),
.dependencyCount = subpassDeps.size(),
.pDependencies = subpassDeps.data(),
};
VK_CHECK(m_vk->CreateRenderPass(m_vk->device,
&renderPassCreateInfo,
nullptr,
&m_renderPass));
const std::string renderPipelineLabel =
(std::ostringstream()
<< "RIVE_Draw{interlockMode=" << int(interlockMode)
<< ", renderPassOptions=" << int(renderPassOptions)
<< ", renderTargetFormat=" << int(renderTargetFormat)
<< ", loadAction=" << int(loadAction) << '}')
.str();
m_vk->setDebugNameIfEnabled(uint64_t(m_renderPass),
VK_OBJECT_TYPE_RENDER_PASS,
renderPipelineLabel.c_str());
}
RenderPassVulkan::~RenderPassVulkan()
{
// Don't touch m_drawPipelineLayout in the destructor since destruction
// order of us vs. impl->m_drawPipelineLayouts is uncertain.
m_vk->DestroyRenderPass(m_vk->device, m_renderPass, VK_NULL_HANDLE);
}
} // namespace rive::gpu