| /* |
| * Copyright 2023 Rive |
| */ |
| |
| #include "rive/renderer/vulkan/render_context_vulkan_impl.hpp" |
| |
| #include "vulkan_shaders.hpp" |
| #include "rive/renderer/stack_vector.hpp" |
| #include "rive/renderer/texture.hpp" |
| #include "rive/renderer/rive_render_buffer.hpp" |
| #include "rive/renderer/vulkan/render_target_vulkan.hpp" |
| #include "shaders/constants.glsl" |
| #include "common_layouts.hpp" |
| #include "draw_pipeline_vulkan.hpp" |
| #include "draw_pipeline_layout_vulkan.hpp" |
| #include "draw_shader_vulkan.hpp" |
| #include "pipeline_manager_vulkan.hpp" |
| #include "render_pass_vulkan.hpp" |
| #include "instance_chunker.hpp" |
| #include <sstream> |
| |
| namespace rive::gpu |
| { |
| using PLSBackingType = PipelineManagerVulkan::PLSBackingType; |
| |
| static VkBufferUsageFlagBits render_buffer_usage_flags( |
| RenderBufferType renderBufferType) |
| { |
| switch (renderBufferType) |
| { |
| case RenderBufferType::index: |
| return VK_BUFFER_USAGE_INDEX_BUFFER_BIT; |
| case RenderBufferType::vertex: |
| return VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; |
| } |
| RIVE_UNREACHABLE(); |
| } |
| |
| class RenderBufferVulkanImpl |
| : public LITE_RTTI_OVERRIDE(RiveRenderBuffer, RenderBufferVulkanImpl) |
| { |
| public: |
| RenderBufferVulkanImpl(rcp<VulkanContext> vk, |
| RenderBufferType renderBufferType, |
| RenderBufferFlags renderBufferFlags, |
| size_t sizeInBytes) : |
| lite_rtti_override(renderBufferType, renderBufferFlags, sizeInBytes), |
| m_bufferPool(make_rcp<vkutil::BufferPool>( |
| std::move(vk), |
| render_buffer_usage_flags(renderBufferType), |
| sizeInBytes)) |
| {} |
| |
| vkutil::Buffer* currentBuffer() { return m_currentBuffer.get(); } |
| |
| protected: |
| void* onMap() override |
| { |
| m_bufferPool->recycle(std::move(m_currentBuffer)); |
| m_currentBuffer = m_bufferPool->acquire(); |
| return m_currentBuffer->contents(); |
| } |
| |
| void onUnmap() override { m_currentBuffer->flushContents(); } |
| |
| private: |
| rcp<vkutil::BufferPool> m_bufferPool; |
| rcp<vkutil::Buffer> m_currentBuffer; |
| }; |
| |
| rcp<RenderBuffer> RenderContextVulkanImpl::makeRenderBuffer( |
| RenderBufferType type, |
| RenderBufferFlags flags, |
| size_t sizeInBytes) |
| { |
| return make_rcp<RenderBufferVulkanImpl>(m_vk, type, flags, sizeInBytes); |
| } |
| |
| rcp<Texture> RenderContextVulkanImpl::makeImageTexture( |
| uint32_t width, |
| uint32_t height, |
| uint32_t mipLevelCount, |
| const uint8_t imageDataRGBAPremul[]) |
| { |
| auto texture = m_vk->makeTexture2D( |
| { |
| .format = VK_FORMAT_R8G8B8A8_UNORM, |
| .extent = {width, height}, |
| .mipLevels = mipLevelCount, |
| }, |
| "RenderContext imageTexture"); |
| texture->scheduleUpload(imageDataRGBAPremul, height * width * 4); |
| return texture; |
| } |
| |
| // Common base class for a pipeline that renders a texture resource at the |
| // beginning of a flush, which is then read during the main draw pass. |
| class RenderContextVulkanImpl::ResourceTexturePipeline |
| { |
| public: |
| ResourceTexturePipeline(rcp<VulkanContext> vk, |
| VkFormat format, |
| VkAttachmentLoadOp loadOp, |
| VkPipelineStageFlags resourceConsumptionStage, |
| const char* label, |
| const DriverWorkarounds& workarounds) : |
| m_vk(std::move(vk)) |
| { |
| const VkAttachmentDescription attachment = { |
| .format = format, |
| .samples = VK_SAMPLE_COUNT_1_BIT, |
| .loadOp = loadOp, |
| .storeOp = VK_ATTACHMENT_STORE_OP_STORE, |
| .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| .finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| }; |
| |
| const VkSubpassDependency dependencies[] = { |
| { |
| .srcSubpass = VK_SUBPASS_EXTERNAL, |
| .dstSubpass = 0, |
| .srcStageMask = resourceConsumptionStage, |
| .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| .srcAccessMask = VK_ACCESS_SHADER_READ_BIT, |
| .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, |
| }, |
| { |
| .srcSubpass = 0, |
| .dstSubpass = VK_SUBPASS_EXTERNAL, |
| .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| .dstStageMask = resourceConsumptionStage, |
| .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, |
| .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, |
| }, |
| }; |
| |
| const VkRenderPassCreateInfo renderPassCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, |
| .attachmentCount = 1, |
| .pAttachments = &attachment, |
| .subpassCount = 1, |
| .pSubpasses = &layout::SINGLE_ATTACHMENT_SUBPASS, |
| .dependencyCount = std::size(dependencies), |
| .pDependencies = dependencies, |
| }; |
| |
| VK_CHECK(m_vk->CreateRenderPass(m_vk->device, |
| &renderPassCreateInfo, |
| nullptr, |
| &m_renderPass)); |
| |
| const std::string renderPassLabel = |
| (std::ostringstream() << label << " RenderPass").str(); |
| m_vk->setDebugNameIfEnabled(uint64_t(m_renderPass), |
| VK_OBJECT_TYPE_RENDER_PASS, |
| renderPassLabel.c_str()); |
| |
| if (workarounds.needsInterruptibleRenderPasses()) |
| { |
| // We are running on a device that is known to crash if a render |
| // pass is too complex. Create another render pass that is designed |
| // to resume atlas rendering in the event that the previous render |
| // pass had to be interrupted (in order to work around the crash). |
| auto resumingAttachment = attachment; |
| resumingAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; |
| resumingAttachment.initialLayout = |
| VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; |
| |
| VkRenderPassCreateInfo resumingRenderPassCreateInfo = |
| renderPassCreateInfo; |
| resumingRenderPassCreateInfo.pAttachments = &resumingAttachment; |
| |
| VK_CHECK(m_vk->CreateRenderPass(m_vk->device, |
| &resumingRenderPassCreateInfo, |
| nullptr, |
| &m_resumingRenderPass)); |
| |
| const std::string resumingRenderPassLabel = |
| (std::ostringstream() << label << " RESUME RenderPass").str(); |
| m_vk->setDebugNameIfEnabled(uint64_t(m_resumingRenderPass), |
| VK_OBJECT_TYPE_RENDER_PASS, |
| resumingRenderPassLabel.c_str()); |
| } |
| } |
| |
| virtual ~ResourceTexturePipeline() |
| { |
| m_vk->DestroyRenderPass(m_vk->device, m_renderPass, nullptr); |
| if (m_resumingRenderPass != VK_NULL_HANDLE) |
| { |
| m_vk->DestroyRenderPass(m_vk->device, |
| m_resumingRenderPass, |
| nullptr); |
| } |
| } |
| |
| VkRenderPass renderPass() const { return m_renderPass; } |
| |
| void beginRenderPass(VkCommandBuffer commandBuffer, |
| VkRect2D renderArea, |
| VkFramebuffer framebuffer) |
| { |
| beginRenderPass(commandBuffer, |
| renderArea, |
| framebuffer, |
| RenderPassType::primary); |
| } |
| |
| // Some early Android tilers are known to crash when a render pass is too |
| // complex. This is a mechanism to interrupt and begin a new render pass on |
| // affected devices after a pre-set complexity is reached. |
| void interruptRenderPassIfNeeded(VkCommandBuffer commandBuffer, |
| VkRect2D renderArea, |
| VkFramebuffer framebuffer, |
| uint32_t nextInstanceCount, |
| const DriverWorkarounds& workarounds) |
| { |
| assert(m_instanceCountInCurrentRenderPass <= |
| workarounds.maxInstancesPerRenderPass); |
| assert(nextInstanceCount <= workarounds.maxInstancesPerRenderPass); |
| |
| if (m_instanceCountInCurrentRenderPass + nextInstanceCount > |
| workarounds.maxInstancesPerRenderPass) |
| { |
| m_vk->CmdEndRenderPass(commandBuffer); |
| |
| // We don't need to bind new pipelines, even though we changed |
| // the render pass, because Vulkan allows for pipelines to be |
| // used interchangeably with "compatible" render passes. |
| beginRenderPass(commandBuffer, |
| renderArea, |
| framebuffer, |
| RenderPassType::resume); |
| } |
| m_instanceCountInCurrentRenderPass += nextInstanceCount; |
| }; |
| |
| protected: |
| const rcp<VulkanContext> m_vk; |
| |
| private: |
| enum class RenderPassType |
| { |
| primary, |
| resume, |
| }; |
| |
| void beginRenderPass(VkCommandBuffer commandBuffer, |
| VkRect2D renderArea, |
| VkFramebuffer framebuffer, |
| RenderPassType renderPassType) |
| { |
| constexpr static VkClearValue CLEAR_ZERO = {}; |
| |
| const VkRenderPass renderPass = |
| (renderPassType == RenderPassType::primary) ? m_renderPass |
| : m_resumingRenderPass; |
| assert(renderPass != VK_NULL_HANDLE); |
| |
| VkRenderPassBeginInfo renderPassBeginInfo = { |
| .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, |
| .renderPass = renderPass, |
| .framebuffer = framebuffer, |
| .renderArea = renderArea, |
| .clearValueCount = 1, |
| .pClearValues = &CLEAR_ZERO, |
| }; |
| |
| m_vk->CmdBeginRenderPass(commandBuffer, |
| &renderPassBeginInfo, |
| VK_SUBPASS_CONTENTS_INLINE); |
| |
| m_instanceCountInCurrentRenderPass = 0; |
| } |
| |
| VkRenderPass m_renderPass; |
| VkRenderPass m_resumingRenderPass = VK_NULL_HANDLE; |
| uint32_t m_instanceCountInCurrentRenderPass; |
| }; |
| |
| // Renders color ramps to the gradient texture. |
| class RenderContextVulkanImpl::ColorRampPipeline |
| : public ResourceTexturePipeline |
| { |
| public: |
| ColorRampPipeline(PipelineManagerVulkan* pipelineManager, |
| const DriverWorkarounds& workarounds) : |
| ResourceTexturePipeline(ref_rcp(pipelineManager->vulkanContext()), |
| VK_FORMAT_R8G8B8A8_UNORM, |
| VK_ATTACHMENT_LOAD_OP_DONT_CARE, |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, |
| "ColorRamp", |
| workarounds) |
| { |
| VkDescriptorSetLayout perFlushDescriptorSetLayout = |
| pipelineManager->perFlushDescriptorSetLayout(); |
| VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, |
| .setLayoutCount = 1, |
| .pSetLayouts = &perFlushDescriptorSetLayout, |
| }; |
| |
| VK_CHECK(m_vk->CreatePipelineLayout(m_vk->device, |
| &pipelineLayoutCreateInfo, |
| nullptr, |
| &m_pipelineLayout)); |
| |
| VkShaderModuleCreateInfo shaderModuleCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| .codeSize = spirv::color_ramp_vert.size_bytes(), |
| .pCode = spirv::color_ramp_vert.data(), |
| }; |
| |
| VkShaderModule vertexShader; |
| VK_CHECK(m_vk->CreateShaderModule(m_vk->device, |
| &shaderModuleCreateInfo, |
| nullptr, |
| &vertexShader)); |
| |
| shaderModuleCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| .codeSize = spirv::color_ramp_frag.size_bytes(), |
| .pCode = spirv::color_ramp_frag.data(), |
| }; |
| |
| VkShaderModule fragmentShader; |
| VK_CHECK(m_vk->CreateShaderModule(m_vk->device, |
| &shaderModuleCreateInfo, |
| nullptr, |
| &fragmentShader)); |
| |
| VkPipelineShaderStageCreateInfo stages[] = { |
| { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| .stage = VK_SHADER_STAGE_VERTEX_BIT, |
| .module = vertexShader, |
| .pName = "main", |
| }, |
| { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| .stage = VK_SHADER_STAGE_FRAGMENT_BIT, |
| .module = fragmentShader, |
| .pName = "main", |
| }, |
| }; |
| |
| VkVertexInputBindingDescription vertexInputBindingDescription = { |
| .binding = 0, |
| .stride = sizeof(gpu::GradientSpan), |
| .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE, |
| }; |
| |
| VkVertexInputAttributeDescription vertexAttributeDescription = { |
| .location = 0, |
| .binding = 0, |
| .format = VK_FORMAT_R32G32B32A32_UINT, |
| }; |
| |
| VkPipelineVertexInputStateCreateInfo |
| pipelineVertexInputStateCreateInfo = { |
| .sType = |
| VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, |
| .vertexBindingDescriptionCount = 1, |
| .pVertexBindingDescriptions = &vertexInputBindingDescription, |
| .vertexAttributeDescriptionCount = 1, |
| .pVertexAttributeDescriptions = &vertexAttributeDescription, |
| }; |
| |
| VkGraphicsPipelineCreateInfo pipelineCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, |
| .stageCount = 2, |
| .pStages = stages, |
| .pVertexInputState = &pipelineVertexInputStateCreateInfo, |
| .pInputAssemblyState = &layout::INPUT_ASSEMBLY_TRIANGLE_STRIP, |
| .pViewportState = &layout::SINGLE_VIEWPORT, |
| .pRasterizationState = &layout::RASTER_STATE_CULL_BACK_CCW, |
| .pMultisampleState = &layout::MSAA_DISABLED, |
| .pColorBlendState = &layout::SINGLE_ATTACHMENT_BLEND_DISABLED, |
| .pDynamicState = &layout::DYNAMIC_VIEWPORT_SCISSOR, |
| .layout = m_pipelineLayout, |
| .renderPass = renderPass(), |
| }; |
| |
| VK_CHECK(m_vk->CreateGraphicsPipelines(m_vk->device, |
| VK_NULL_HANDLE, |
| 1, |
| &pipelineCreateInfo, |
| nullptr, |
| &m_renderPipeline)); |
| m_vk->setDebugNameIfEnabled(uint64_t(m_renderPipeline), |
| VK_OBJECT_TYPE_PIPELINE, |
| "Color Ramp Pipeline"); |
| |
| m_vk->DestroyShaderModule(m_vk->device, vertexShader, nullptr); |
| m_vk->DestroyShaderModule(m_vk->device, fragmentShader, nullptr); |
| } |
| |
| ~ColorRampPipeline() |
| { |
| m_vk->DestroyPipelineLayout(m_vk->device, m_pipelineLayout, nullptr); |
| m_vk->DestroyPipeline(m_vk->device, m_renderPipeline, nullptr); |
| } |
| |
| VkPipelineLayout pipelineLayout() const { return m_pipelineLayout; } |
| VkPipeline renderPipeline() const { return m_renderPipeline; } |
| |
| private: |
| VkPipelineLayout m_pipelineLayout; |
| VkPipeline m_renderPipeline; |
| }; |
| |
| // Renders tessellated vertices to the tessellation texture. |
| class RenderContextVulkanImpl::TessellatePipeline |
| : public ResourceTexturePipeline |
| { |
| public: |
| TessellatePipeline(PipelineManagerVulkan* pipelineManager, |
| const DriverWorkarounds& workarounds) : |
| ResourceTexturePipeline(ref_rcp(pipelineManager->vulkanContext()), |
| VK_FORMAT_R32G32B32A32_UINT, |
| VK_ATTACHMENT_LOAD_OP_DONT_CARE, |
| VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, |
| "Tessellate", |
| workarounds) |
| { |
| VkDescriptorSetLayout pipelineDescriptorSetLayouts[] = { |
| pipelineManager->perFlushDescriptorSetLayout(), |
| pipelineManager->emptyDescriptorSetLayout(), |
| pipelineManager->immutableSamplerDescriptorSetLayout(), |
| }; |
| static_assert(PER_FLUSH_BINDINGS_SET == 0); |
| static_assert(IMMUTABLE_SAMPLER_BINDINGS_SET == 2); |
| |
| VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, |
| .setLayoutCount = std::size(pipelineDescriptorSetLayouts), |
| .pSetLayouts = pipelineDescriptorSetLayouts, |
| }; |
| |
| VK_CHECK(m_vk->CreatePipelineLayout(m_vk->device, |
| &pipelineLayoutCreateInfo, |
| nullptr, |
| &m_pipelineLayout)); |
| |
| VkShaderModuleCreateInfo shaderModuleCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| .codeSize = spirv::tessellate_vert.size_bytes(), |
| .pCode = spirv::tessellate_vert.data(), |
| }; |
| |
| VkShaderModule vertexShader; |
| VK_CHECK(m_vk->CreateShaderModule(m_vk->device, |
| &shaderModuleCreateInfo, |
| nullptr, |
| &vertexShader)); |
| |
| shaderModuleCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| .codeSize = spirv::tessellate_frag.size_bytes(), |
| .pCode = spirv::tessellate_frag.data(), |
| }; |
| |
| VkShaderModule fragmentShader; |
| VK_CHECK(m_vk->CreateShaderModule(m_vk->device, |
| &shaderModuleCreateInfo, |
| nullptr, |
| &fragmentShader)); |
| |
| VkPipelineShaderStageCreateInfo stages[] = { |
| { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| .stage = VK_SHADER_STAGE_VERTEX_BIT, |
| .module = vertexShader, |
| .pName = "main", |
| }, |
| { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| .stage = VK_SHADER_STAGE_FRAGMENT_BIT, |
| .module = fragmentShader, |
| .pName = "main", |
| }, |
| }; |
| |
| VkVertexInputBindingDescription vertexInputBindingDescription = { |
| .binding = 0, |
| .stride = sizeof(gpu::TessVertexSpan), |
| .inputRate = VK_VERTEX_INPUT_RATE_INSTANCE, |
| }; |
| |
| VkVertexInputAttributeDescription vertexAttributeDescriptions[] = { |
| { |
| .location = 0, |
| .binding = 0, |
| .format = VK_FORMAT_R32G32B32A32_SFLOAT, |
| .offset = 0, |
| }, |
| { |
| .location = 1, |
| .binding = 0, |
| .format = VK_FORMAT_R32G32B32A32_SFLOAT, |
| .offset = 4 * sizeof(float), |
| }, |
| { |
| .location = 2, |
| .binding = 0, |
| .format = VK_FORMAT_R32G32B32A32_SFLOAT, |
| .offset = 8 * sizeof(float), |
| }, |
| { |
| .location = 3, |
| .binding = 0, |
| .format = VK_FORMAT_R32G32B32A32_UINT, |
| .offset = 12 * sizeof(float), |
| }, |
| }; |
| |
| VkPipelineVertexInputStateCreateInfo |
| pipelineVertexInputStateCreateInfo = { |
| .sType = |
| VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, |
| .vertexBindingDescriptionCount = 1, |
| .pVertexBindingDescriptions = &vertexInputBindingDescription, |
| .vertexAttributeDescriptionCount = 4, |
| .pVertexAttributeDescriptions = vertexAttributeDescriptions, |
| }; |
| |
| VkGraphicsPipelineCreateInfo pipelineCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, |
| .stageCount = 2, |
| .pStages = stages, |
| .pVertexInputState = &pipelineVertexInputStateCreateInfo, |
| .pInputAssemblyState = &layout::INPUT_ASSEMBLY_TRIANGLE_LIST, |
| .pViewportState = &layout::SINGLE_VIEWPORT, |
| .pRasterizationState = &layout::RASTER_STATE_CULL_BACK_CCW, |
| .pMultisampleState = &layout::MSAA_DISABLED, |
| .pColorBlendState = &layout::SINGLE_ATTACHMENT_BLEND_DISABLED, |
| .pDynamicState = &layout::DYNAMIC_VIEWPORT_SCISSOR, |
| .layout = m_pipelineLayout, |
| .renderPass = renderPass(), |
| }; |
| |
| VK_CHECK(m_vk->CreateGraphicsPipelines(m_vk->device, |
| VK_NULL_HANDLE, |
| 1, |
| &pipelineCreateInfo, |
| nullptr, |
| &m_renderPipeline)); |
| m_vk->setDebugNameIfEnabled(uint64_t(m_renderPipeline), |
| VK_OBJECT_TYPE_PIPELINE, |
| "Tesselation Pipeline"); |
| |
| m_vk->DestroyShaderModule(m_vk->device, vertexShader, nullptr); |
| m_vk->DestroyShaderModule(m_vk->device, fragmentShader, nullptr); |
| } |
| |
| ~TessellatePipeline() override |
| { |
| m_vk->DestroyPipelineLayout(m_vk->device, m_pipelineLayout, nullptr); |
| m_vk->DestroyPipeline(m_vk->device, m_renderPipeline, nullptr); |
| } |
| |
| VkPipelineLayout pipelineLayout() const { return m_pipelineLayout; } |
| VkPipeline renderPipeline() const { return m_renderPipeline; } |
| |
| private: |
| VkPipelineLayout m_pipelineLayout; |
| VkPipeline m_renderPipeline; |
| }; |
| |
| // Renders feathers to the atlas. |
| class RenderContextVulkanImpl::AtlasPipeline : public ResourceTexturePipeline |
| { |
| public: |
| AtlasPipeline(PipelineManagerVulkan* pipelineManager, |
| const DriverWorkarounds& workarounds) : |
| ResourceTexturePipeline(ref_rcp(pipelineManager->vulkanContext()), |
| pipelineManager->atlasFormat(), |
| VK_ATTACHMENT_LOAD_OP_CLEAR, |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, |
| "Atlas", |
| workarounds) |
| { |
| VkDescriptorSetLayout pipelineDescriptorSetLayouts[] = { |
| pipelineManager->perFlushDescriptorSetLayout(), |
| pipelineManager->emptyDescriptorSetLayout(), |
| pipelineManager->immutableSamplerDescriptorSetLayout(), |
| }; |
| static_assert(PER_FLUSH_BINDINGS_SET == 0); |
| static_assert(IMMUTABLE_SAMPLER_BINDINGS_SET == 2); |
| |
| VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, |
| .setLayoutCount = std::size(pipelineDescriptorSetLayouts), |
| .pSetLayouts = pipelineDescriptorSetLayouts, |
| }; |
| |
| VK_CHECK(m_vk->CreatePipelineLayout(m_vk->device, |
| &pipelineLayoutCreateInfo, |
| nullptr, |
| &m_pipelineLayout)); |
| |
| VkShaderModuleCreateInfo shaderModuleCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| .codeSize = spirv::render_atlas_vert.size_bytes(), |
| .pCode = spirv::render_atlas_vert.data(), |
| }; |
| |
| VkShaderModule vertexShader; |
| VK_CHECK(m_vk->CreateShaderModule(m_vk->device, |
| &shaderModuleCreateInfo, |
| nullptr, |
| &vertexShader)); |
| |
| shaderModuleCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| .codeSize = spirv::render_atlas_fill_frag.size_bytes(), |
| .pCode = spirv::render_atlas_fill_frag.data(), |
| }; |
| |
| VkShaderModule fragmentFillShader; |
| VK_CHECK(m_vk->CreateShaderModule(m_vk->device, |
| &shaderModuleCreateInfo, |
| nullptr, |
| &fragmentFillShader)); |
| |
| shaderModuleCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| .codeSize = spirv::render_atlas_stroke_frag.size_bytes(), |
| .pCode = spirv::render_atlas_stroke_frag.data(), |
| }; |
| |
| VkShaderModule fragmentStrokeShader; |
| VK_CHECK(m_vk->CreateShaderModule(m_vk->device, |
| &shaderModuleCreateInfo, |
| VK_NULL_HANDLE, |
| &fragmentStrokeShader)); |
| |
| VkPipelineShaderStageCreateInfo stages[] = { |
| { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| .stage = VK_SHADER_STAGE_VERTEX_BIT, |
| .module = vertexShader, |
| .pName = "main", |
| }, |
| { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| .stage = VK_SHADER_STAGE_FRAGMENT_BIT, |
| // Set for individual fill/stroke pipelines. |
| .module = VK_NULL_HANDLE, |
| .pName = "main", |
| }, |
| }; |
| |
| VkPipelineColorBlendAttachmentState blendState = |
| VkPipelineColorBlendAttachmentState{ |
| .blendEnable = VK_TRUE, |
| .srcColorBlendFactor = VK_BLEND_FACTOR_ONE, |
| .dstColorBlendFactor = VK_BLEND_FACTOR_ONE, |
| .colorWriteMask = VK_COLOR_COMPONENT_R_BIT, |
| }; |
| VkPipelineColorBlendStateCreateInfo blendStateCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, |
| .attachmentCount = 1u, |
| .pAttachments = &blendState, |
| }; |
| |
| VkGraphicsPipelineCreateInfo pipelineCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, |
| .stageCount = std::size(stages), |
| .pStages = stages, |
| .pVertexInputState = &layout::PATH_VERTEX_INPUT_STATE, |
| .pInputAssemblyState = &layout::INPUT_ASSEMBLY_TRIANGLE_LIST, |
| .pViewportState = &layout::SINGLE_VIEWPORT, |
| .pRasterizationState = &layout::RASTER_STATE_CULL_BACK_CW, |
| .pMultisampleState = &layout::MSAA_DISABLED, |
| .pColorBlendState = &blendStateCreateInfo, |
| .pDynamicState = &layout::DYNAMIC_VIEWPORT_SCISSOR, |
| .layout = m_pipelineLayout, |
| .renderPass = renderPass(), |
| }; |
| |
| stages[1].module = fragmentFillShader; |
| blendState.colorBlendOp = VK_BLEND_OP_ADD; |
| VK_CHECK(m_vk->CreateGraphicsPipelines(m_vk->device, |
| VK_NULL_HANDLE, |
| 1, |
| &pipelineCreateInfo, |
| nullptr, |
| &m_fillPipeline)); |
| m_vk->setDebugNameIfEnabled(uint64_t(m_fillPipeline), |
| VK_OBJECT_TYPE_PIPELINE, |
| "Atlas Fill Pipeline"); |
| |
| stages[1].module = fragmentStrokeShader; |
| blendState.colorBlendOp = VK_BLEND_OP_MAX; |
| VK_CHECK(m_vk->CreateGraphicsPipelines(m_vk->device, |
| VK_NULL_HANDLE, |
| 1, |
| &pipelineCreateInfo, |
| nullptr, |
| &m_strokePipeline)); |
| m_vk->setDebugNameIfEnabled(uint64_t(m_strokePipeline), |
| VK_OBJECT_TYPE_PIPELINE, |
| "Atlas Stroke Pipeline"); |
| |
| m_vk->DestroyShaderModule(m_vk->device, vertexShader, nullptr); |
| m_vk->DestroyShaderModule(m_vk->device, fragmentFillShader, nullptr); |
| m_vk->DestroyShaderModule(m_vk->device, fragmentStrokeShader, nullptr); |
| } |
| |
| ~AtlasPipeline() override |
| { |
| m_vk->DestroyPipelineLayout(m_vk->device, m_pipelineLayout, nullptr); |
| m_vk->DestroyPipeline(m_vk->device, m_fillPipeline, nullptr); |
| m_vk->DestroyPipeline(m_vk->device, m_strokePipeline, nullptr); |
| } |
| |
| VkPipelineLayout pipelineLayout() const { return m_pipelineLayout; } |
| VkPipeline fillPipeline() const { return m_fillPipeline; } |
| VkPipeline strokePipeline() const { return m_strokePipeline; } |
| |
| private: |
| VkPipelineLayout m_pipelineLayout; |
| VkPipeline m_fillPipeline; |
| VkPipeline m_strokePipeline; |
| }; |
| |
| RenderContextVulkanImpl::RenderContextVulkanImpl( |
| rcp<VulkanContext> vk, |
| const ContextOptions& contextOptions) : |
| m_vk(std::move(vk)), |
| m_workarounds({ |
| .maxInstancesPerRenderPass = |
| (m_vk->physicalDeviceProperties().apiVersion < VK_API_VERSION_1_3 && |
| (m_vk->physicalDeviceProperties().vendorID == VULKAN_VENDOR_ARM || |
| m_vk->physicalDeviceProperties().vendorID == |
| VULKAN_VENDOR_IMG_TEC)) |
| // Early Mali and PowerVR devices are known to crash when a |
| // single render pass is too complex. |
| ? (1u << 13) - 1u |
| : UINT32_MAX, |
| // Early Xclipse drivers struggle with our manual msaa resolve, so we |
| // always do automatic fullscreen resolves on that GPU family. |
| .avoidManualMSAAResolves = |
| m_vk->physicalDeviceProperties().vendorID == VULKAN_VENDOR_SAMSUNG, |
| // Some Android drivers (some Android 12 and earlier Adreno drivers) |
| // have issues with having both a self-dependency for dst reads and |
| // resolve attachments. For now we just always manually resolve these |
| // render passes that use advanced blend on Qualcomm. |
| .needsManualMSAAResolveAfterDstRead = |
| m_vk->physicalDeviceProperties().vendorID == VULKAN_VENDOR_QUALCOMM, |
| }), |
| m_flushUniformBufferPool(m_vk, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), |
| m_imageDrawUniformBufferPool(m_vk, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT), |
| m_pathBufferPool(m_vk, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), |
| m_paintBufferPool(m_vk, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), |
| m_paintAuxBufferPool(m_vk, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), |
| m_contourBufferPool(m_vk, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), |
| m_gradSpanBufferPool(m_vk, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT), |
| m_tessSpanBufferPool(m_vk, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT), |
| m_triangleBufferPool(m_vk, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT), |
| m_descriptorSetPoolPool(make_rcp<DescriptorSetPoolPool>(m_vk)) |
| { |
| const auto& physicalDeviceProps = m_vk->physicalDeviceProperties(); |
| |
| m_platformFeatures.supportsRasterOrderingMode = |
| !contextOptions.forceAtomicMode && |
| m_vk->features.rasterizationOrderColorAttachmentAccess; |
| #ifdef RIVE_ANDROID |
| m_platformFeatures.supportsAtomicMode = |
| m_vk->features.fragmentStoresAndAtomics && |
| // For now, disable gpu::InterlockMode::atomics on Android unless |
| // explicitly requested. We will focus on stabilizing MSAA first, and |
| // then roll this mode back in. |
| contextOptions.forceAtomicMode; |
| #else |
| m_platformFeatures.supportsAtomicMode = |
| m_vk->features.fragmentStoresAndAtomics; |
| m_platformFeatures.supportsClockwiseMode = |
| m_platformFeatures.supportsClockwiseFixedFunctionMode = |
| m_vk->features.fragmentShaderPixelInterlock && |
| !contextOptions.forceAtomicMode && |
| // TODO: Before we can support rasterOrdering and clockwise at the |
| // same time, we need to figure out barriers between transient PLS |
| // attachments being bound as both input attachments and storage |
| // textures. (Probably by using |
| // VK_EXT_rasterization_order_attachment_access whenever possible in |
| // clockwise mode.) |
| !m_platformFeatures.supportsRasterOrderingMode; |
| #endif |
| m_platformFeatures.supportsClockwiseAtomicMode = |
| m_platformFeatures.supportsAtomicMode; |
| m_platformFeatures.supportsClipPlanes = |
| m_vk->features.shaderClipDistance && |
| // The Vulkan spec mandates that the minimum value for maxClipDistances |
| // is 8, but we might as well make this >= 4 check to be more clear |
| // about how we're using it. |
| physicalDeviceProps.limits.maxClipDistances >= 4; |
| m_platformFeatures.clipSpaceBottomUp = false; |
| m_platformFeatures.framebufferBottomUp = false; |
| // Vulkan can't load color from a different texture into the transient MSAA |
| // texture. We need to draw the previous renderTarget contents into it |
| // manually when LoadAction::preserveRenderTarget is specified. |
| m_platformFeatures.msaaColorPreserveNeedsDraw = true; |
| m_platformFeatures.maxTextureSize = |
| physicalDeviceProps.limits.maxImageDimension2D; |
| m_platformFeatures.maxCoverageBufferLength = |
| std::min(physicalDeviceProps.limits.maxStorageBufferRange, 1u << 28) / |
| sizeof(uint32_t); |
| |
| switch (physicalDeviceProps.vendorID) |
| { |
| case VULKAN_VENDOR_QUALCOMM: |
| // Qualcomm advertises EXT_rasterization_order_attachment_access, |
| // but it's slow. Use atomics instead on this platform. |
| m_platformFeatures.supportsRasterOrderingMode = false; |
| // Pixel4 struggles with fine-grained fp16 path IDs. |
| m_platformFeatures.pathIDGranularity = 2; |
| break; |
| |
| case VULKAN_VENDOR_ARM: |
| // Raster ordering is known to work on ARM hardware, even on old |
| // drivers without EXT_rasterization_order_attachment_access, as |
| // long as you define a subpass self-dependency. |
| m_platformFeatures.supportsRasterOrderingMode = |
| !contextOptions.forceAtomicMode; |
| break; |
| |
| case VULKAN_VENDOR_IMG_TEC: |
| // Raster ordering is known to work on IMG hardware, even without |
| // EXT_rasterization_order_attachment_access, as long as you define |
| // a subpass self-dependency. |
| // IMG just can't expose the extension because they do _not_ |
| // guarantee raster ordering across samples, which is required by |
| // the extension, but irrelevant to Rive. |
| // THAT BEING SAID: while Google Chrome relies on implicit raster |
| // ordering on all IMG devices, it has only been observed to work |
| // with Rive on Vulkan 1.3 contexts (PowerVR Rogue GE9215 -- driver |
| // 1.555, and PowerVR D-Series DXT-48-1536 (Pixel 10) -- driver |
| // 1.602). |
| m_platformFeatures.supportsRasterOrderingMode = |
| !contextOptions.forceAtomicMode && |
| m_vk->features.apiVersion >= VK_API_VERSION_1_3; |
| break; |
| } |
| } |
| |
| void RenderContextVulkanImpl::initGPUObjects( |
| ShaderCompilationMode shaderCompilationMode) |
| { |
| // Bound when there is not an image paint. |
| constexpr static uint8_t black[] = {0, 0, 0, 1}; |
| m_nullImageTexture = m_vk->makeTexture2D( |
| { |
| .format = VK_FORMAT_R8G8B8A8_UNORM, |
| .extent = {1, 1}, |
| }, |
| "null image texture"); |
| m_nullImageTexture->scheduleUpload(black, sizeof(black)); |
| |
| if (strstr(m_vk->physicalDeviceProperties().deviceName, "Adreno (TM) 8") != |
| nullptr) |
| { |
| // The Adreno 8s (at least on the Galaxy S25) have a strange |
| // synchronization issue around our tesselation texture, where the |
| // barriers appear to not work properly (leading to tesselation texture |
| // corruption, even across frames). |
| // We can do a blit to a 1x1 texture, however, which seems to make the |
| // synchronization play nice. |
| m_tesselationSyncIssueWorkaroundTexture = m_vk->makeTexture2D( |
| { |
| .format = VK_FORMAT_R8G8B8A8_UINT, |
| .extent = {1, 1}, |
| .usage = VK_IMAGE_USAGE_SAMPLED_BIT | |
| VK_IMAGE_USAGE_TRANSFER_DST_BIT, |
| }, |
| "tesselation sync bug workaround texture"); |
| } |
| |
| m_pipelineManager = std::make_unique<PipelineManagerVulkan>( |
| m_vk, |
| shaderCompilationMode, |
| m_nullImageTexture->vkImageView()); |
| |
| // The pipelines reference our vulkan objects. Delete them first. |
| m_colorRampPipeline = |
| std::make_unique<ColorRampPipeline>(m_pipelineManager.get(), |
| m_workarounds); |
| m_tessellatePipeline = |
| std::make_unique<TessellatePipeline>(m_pipelineManager.get(), |
| m_workarounds); |
| m_atlasPipeline = |
| std::make_unique<AtlasPipeline>(m_pipelineManager.get(), m_workarounds); |
| |
| // Determine usage flags for transient PLS backing textures. |
| m_plsTransientUsageFlags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | |
| VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; |
| if (m_platformFeatures.supportsClockwiseMode) |
| { |
| // When we support the interlock, PLS backings can be storage textures. |
| m_plsTransientUsageFlags |= VK_IMAGE_USAGE_STORAGE_BIT | |
| // For vkCmdClearColorImage. |
| VK_IMAGE_USAGE_TRANSFER_DST_BIT; |
| } |
| else if (!m_workarounds.needsInterruptibleRenderPasses()) |
| { |
| // Otherwise, and as long as we don't have to build interruptible render |
| // passes, PLS backings are transient input attachments. |
| m_plsTransientUsageFlags |= VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT; |
| } |
| |
| // Emulate the feather texture1d array as a 2d texture until we add |
| // texture1d support in Vulkan. |
| uint16_t featherTextureData[gpu::GAUSSIAN_TABLE_SIZE * |
| FEATHER_TEXTURE_1D_ARRAY_LENGTH]; |
| memcpy(featherTextureData, |
| gpu::g_gaussianIntegralTableF16, |
| sizeof(gpu::g_gaussianIntegralTableF16)); |
| memcpy(featherTextureData + gpu::GAUSSIAN_TABLE_SIZE, |
| gpu::g_inverseGaussianIntegralTableF16, |
| sizeof(gpu::g_inverseGaussianIntegralTableF16)); |
| static_assert(FEATHER_FUNCTION_ARRAY_INDEX == 0); |
| static_assert(FEATHER_INVERSE_FUNCTION_ARRAY_INDEX == 1); |
| m_featherTexture = m_vk->makeTexture2D( |
| { |
| .format = VK_FORMAT_R16_SFLOAT, |
| .extent = |
| { |
| .width = gpu::GAUSSIAN_TABLE_SIZE, |
| .height = FEATHER_TEXTURE_1D_ARRAY_LENGTH, |
| }, |
| }, |
| "feather texture"); |
| m_featherTexture->scheduleUpload(featherTextureData, |
| sizeof(featherTextureData)); |
| |
| m_tessSpanIndexBuffer = m_vk->makeBuffer( |
| { |
| .size = sizeof(gpu::kTessSpanIndices), |
| .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT, |
| }, |
| vkutil::Mappability::writeOnly); |
| memcpy(m_tessSpanIndexBuffer->contents(), |
| gpu::kTessSpanIndices, |
| sizeof(gpu::kTessSpanIndices)); |
| m_tessSpanIndexBuffer->flushContents(); |
| |
| m_pathPatchVertexBuffer = m_vk->makeBuffer( |
| { |
| .size = kPatchVertexBufferCount * sizeof(gpu::PatchVertex), |
| .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, |
| }, |
| vkutil::Mappability::writeOnly); |
| m_pathPatchIndexBuffer = m_vk->makeBuffer( |
| { |
| .size = kPatchIndexBufferCount * sizeof(uint16_t), |
| .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT, |
| }, |
| vkutil::Mappability::writeOnly); |
| gpu::GeneratePatchBufferData( |
| reinterpret_cast<PatchVertex*>(m_pathPatchVertexBuffer->contents()), |
| reinterpret_cast<uint16_t*>(m_pathPatchIndexBuffer->contents())); |
| m_pathPatchVertexBuffer->flushContents(); |
| m_pathPatchIndexBuffer->flushContents(); |
| |
| m_imageRectVertexBuffer = m_vk->makeBuffer( |
| { |
| .size = sizeof(gpu::kImageRectVertices), |
| .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, |
| }, |
| vkutil::Mappability::writeOnly); |
| memcpy(m_imageRectVertexBuffer->contents(), |
| gpu::kImageRectVertices, |
| sizeof(gpu::kImageRectVertices)); |
| m_imageRectVertexBuffer->flushContents(); |
| m_imageRectIndexBuffer = m_vk->makeBuffer( |
| { |
| .size = sizeof(gpu::kImageRectIndices), |
| .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT, |
| }, |
| vkutil::Mappability::writeOnly); |
| memcpy(m_imageRectIndexBuffer->contents(), |
| gpu::kImageRectIndices, |
| sizeof(gpu::kImageRectIndices)); |
| m_imageRectIndexBuffer->flushContents(); |
| } |
| |
| RenderContextVulkanImpl::~RenderContextVulkanImpl() |
| { |
| // These should all have gotten recycled at the end of the last frame. |
| assert(m_flushUniformBuffer == nullptr); |
| assert(m_imageDrawUniformBuffer == nullptr); |
| assert(m_pathBuffer == nullptr); |
| assert(m_paintBuffer == nullptr); |
| assert(m_paintAuxBuffer == nullptr); |
| assert(m_contourBuffer == nullptr); |
| assert(m_gradSpanBuffer == nullptr); |
| assert(m_tessSpanBuffer == nullptr); |
| assert(m_triangleBuffer == nullptr); |
| |
| // Tell the context we are entering our shutdown cycle. After this point, |
| // all resources will be deleted immediately upon their refCount reaching |
| // zero, as opposed to being kept alive for in-flight command buffers. |
| m_vk->shutdown(); |
| } |
| |
| void RenderContextVulkanImpl::resizeGradientTexture(uint32_t width, |
| uint32_t height) |
| { |
| width = std::max(width, 1u); |
| height = std::max(height, 1u); |
| |
| m_gradTexture = m_vk->makeTexture2D( |
| { |
| .format = VK_FORMAT_R8G8B8A8_UNORM, |
| .extent = {width, height}, |
| .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | |
| VK_IMAGE_USAGE_SAMPLED_BIT, |
| }, |
| "gradient texture"); |
| |
| m_gradTextureFramebuffer = m_vk->makeFramebuffer({ |
| .renderPass = m_colorRampPipeline->renderPass(), |
| .attachmentCount = 1, |
| .pAttachments = m_gradTexture->vkImageViewAddressOf(), |
| .width = width, |
| .height = height, |
| .layers = 1, |
| }); |
| } |
| |
| void RenderContextVulkanImpl::resizeTessellationTexture(uint32_t width, |
| uint32_t height) |
| { |
| width = std::max(width, 1u); |
| height = std::max(height, 1u); |
| |
| VkImageUsageFlags usage = |
| VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; |
| |
| // If we are doing the Adreno synchronization workaround we also need the |
| // TRANSFER_SRC bit |
| if (m_tesselationSyncIssueWorkaroundTexture != nullptr) |
| { |
| usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; |
| } |
| |
| m_tessTexture = m_vk->makeTexture2D( |
| { |
| .format = VK_FORMAT_R32G32B32A32_UINT, |
| .extent = {width, height}, |
| .usage = usage, |
| }, |
| "tesselation texture"); |
| |
| m_tessTextureFramebuffer = m_vk->makeFramebuffer({ |
| .renderPass = m_tessellatePipeline->renderPass(), |
| .attachmentCount = 1, |
| .pAttachments = m_tessTexture->vkImageViewAddressOf(), |
| .width = width, |
| .height = height, |
| .layers = 1, |
| }); |
| } |
| |
| void RenderContextVulkanImpl::resizeAtlasTexture(uint32_t width, |
| uint32_t height) |
| { |
| width = std::max(width, 1u); |
| height = std::max(height, 1u); |
| |
| m_atlasTexture = m_vk->makeTexture2D( |
| { |
| .format = m_pipelineManager->atlasFormat(), |
| .extent = {width, height}, |
| .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | |
| VK_IMAGE_USAGE_SAMPLED_BIT, |
| }, |
| "atlas texture"); |
| |
| m_atlasFramebuffer = m_vk->makeFramebuffer({ |
| .renderPass = m_atlasPipeline->renderPass(), |
| .attachmentCount = 1, |
| .pAttachments = m_atlasTexture->vkImageViewAddressOf(), |
| .width = width, |
| .height = height, |
| .layers = 1, |
| }); |
| } |
| |
| void RenderContextVulkanImpl::resizeTransientPLSBacking(uint32_t width, |
| uint32_t height, |
| uint32_t planeCount) |
| { |
| // Erase the backings and allocate them lazily. Our Vulkan backend needs |
| // different allocations based on interlock mode and other factors. |
| m_plsExtent = {width, height, 1}; |
| m_plsTransientPlaneCount = planeCount; |
| m_plsTransientImageArray.reset(); |
| m_plsTransientCoverageView.reset(); |
| m_plsTransientClipView.reset(); |
| m_plsTransientScratchColorTexture.reset(); |
| m_plsOffscreenColorTexture.reset(); |
| } |
| |
| void RenderContextVulkanImpl::resizeAtomicCoverageBacking(uint32_t width, |
| uint32_t height) |
| { |
| m_plsAtomicCoverageTexture.reset(); |
| |
| if (width != 0 && height != 0) |
| { |
| m_plsAtomicCoverageTexture = m_vk->makeTexture2D( |
| { |
| .format = VK_FORMAT_R32_UINT, |
| .extent = {width, height}, |
| .usage = |
| VK_IMAGE_USAGE_STORAGE_BIT | |
| VK_IMAGE_USAGE_TRANSFER_DST_BIT, // For vkCmdClearColorImage |
| }, |
| "atomic coverage backing"); |
| } |
| } |
| |
| void RenderContextVulkanImpl::resizeCoverageBuffer(size_t sizeInBytes) |
| { |
| if (sizeInBytes == 0) |
| { |
| m_coverageBuffer = nullptr; |
| } |
| else |
| { |
| m_coverageBuffer = m_vk->makeBuffer( |
| { |
| .size = sizeInBytes, |
| .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | |
| VK_BUFFER_USAGE_TRANSFER_DST_BIT, |
| }, |
| vkutil::Mappability::none); |
| } |
| } |
| |
| vkutil::Image* RenderContextVulkanImpl::plsTransientImageArray() |
| { |
| assert(m_plsExtent.width != 0); |
| assert(m_plsExtent.height != 0); |
| assert(m_plsExtent.depth == 1); |
| assert(m_plsTransientPlaneCount != 0); |
| |
| if (m_plsTransientImageArray == nullptr) |
| { |
| m_plsTransientImageArray = m_vk->makeImage( |
| { |
| .imageType = VK_IMAGE_TYPE_2D, |
| .format = VK_FORMAT_R32_UINT, |
| .extent = m_plsExtent, |
| .arrayLayers = std::min(m_plsTransientPlaneCount, 2u), |
| .usage = m_plsTransientUsageFlags, |
| }, |
| "plsTransientImageArray"); |
| } |
| |
| return m_plsTransientImageArray.get(); |
| } |
| |
| vkutil::ImageView* RenderContextVulkanImpl::plsTransientCoverageView() |
| { |
| if (m_plsTransientCoverageView == nullptr) |
| { |
| m_plsTransientCoverageView = m_vk->makeImageView( |
| ref_rcp(plsTransientImageArray()), |
| { |
| .viewType = VK_IMAGE_VIEW_TYPE_2D, |
| .format = VK_FORMAT_R32_UINT, |
| .subresourceRange = |
| { |
| .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, |
| .levelCount = 1, |
| .baseArrayLayer = 0, |
| .layerCount = 1, |
| }, |
| }, |
| "plsTransientCoverageView"); |
| } |
| |
| return m_plsTransientCoverageView.get(); |
| } |
| |
| vkutil::ImageView* RenderContextVulkanImpl::plsTransientClipView() |
| { |
| if (m_plsTransientClipView == nullptr) |
| { |
| assert(m_plsTransientPlaneCount != 0); |
| if (m_plsTransientPlaneCount == 1) |
| { |
| // When planeCount is 1, the shaders are guaranteed to only use |
| // 1 single plane in an entire render pass, so we can just alias |
| // the coverage & clip views to each other to keep the bindings and |
| // validation happy. |
| m_plsTransientClipView = ref_rcp(plsTransientCoverageView()); |
| } |
| else |
| { |
| m_plsTransientClipView = m_vk->makeImageView( |
| ref_rcp(plsTransientImageArray()), |
| { |
| .viewType = VK_IMAGE_VIEW_TYPE_2D, |
| .format = VK_FORMAT_R32_UINT, |
| .subresourceRange = |
| { |
| .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, |
| .levelCount = 1, |
| .baseArrayLayer = 1, |
| .layerCount = 1, |
| }, |
| }, |
| "plsTransientClipView"); |
| } |
| } |
| |
| return m_plsTransientClipView.get(); |
| } |
| |
| vkutil::Texture2D* RenderContextVulkanImpl::plsTransientScratchColorTexture() |
| { |
| assert(m_plsExtent.width != 0); |
| assert(m_plsExtent.height != 0); |
| assert(m_plsExtent.depth == 1); |
| assert(m_plsTransientPlaneCount != 0); |
| |
| if (m_plsTransientScratchColorTexture == nullptr) |
| { |
| m_plsTransientScratchColorTexture = m_vk->makeTexture2D( |
| { |
| .format = VK_FORMAT_R8G8B8A8_UNORM, |
| .extent = m_plsExtent, |
| .usage = m_plsTransientUsageFlags, |
| }, |
| "plsTransientScratchColorTexture"); |
| } |
| |
| return m_plsTransientScratchColorTexture.get(); |
| } |
| |
| vkutil::Texture2D* RenderContextVulkanImpl::accessPLSOffscreenColorTexture( |
| VkCommandBuffer commandBuffer, |
| const vkutil::ImageAccess& dstAccess, |
| vkutil::ImageAccessAction imageAccessAction) |
| { |
| assert(m_plsExtent.width != 0); |
| assert(m_plsExtent.height != 0); |
| assert(m_plsExtent.depth == 1); |
| assert(m_plsTransientPlaneCount != 0); |
| |
| if (m_plsOffscreenColorTexture == nullptr) |
| { |
| m_plsOffscreenColorTexture = m_vk->makeTexture2D( |
| { |
| .format = VK_FORMAT_R8G8B8A8_UNORM, |
| .extent = m_plsExtent, |
| .usage = (m_plsTransientUsageFlags | |
| // For copying back to the main render target. |
| VK_IMAGE_USAGE_TRANSFER_SRC_BIT) & |
| // This texture is never transient. |
| ~VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT, |
| }, |
| "PLSOffscreenColorTexture"); |
| } |
| |
| m_plsOffscreenColorTexture->barrier(commandBuffer, |
| dstAccess, |
| imageAccessAction); |
| |
| return m_plsOffscreenColorTexture.get(); |
| } |
| |
| vkutil::Texture2D* RenderContextVulkanImpl::clearPLSOffscreenColorTexture( |
| VkCommandBuffer commandBuffer, |
| ColorInt clearColor, |
| const vkutil::ImageAccess& dstAccessAfterClear) |
| { |
| m_vk->clearColorImage( |
| commandBuffer, |
| clearColor, |
| accessPLSOffscreenColorTexture( |
| commandBuffer, |
| { |
| .pipelineStages = VK_PIPELINE_STAGE_TRANSFER_BIT, |
| .accessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .layout = VK_IMAGE_LAYOUT_GENERAL, |
| }, |
| vkutil::ImageAccessAction::invalidateContents) |
| ->vkImage(), |
| VK_IMAGE_LAYOUT_GENERAL); |
| |
| return accessPLSOffscreenColorTexture(commandBuffer, dstAccessAfterClear); |
| } |
| |
| vkutil::Texture2D* RenderContextVulkanImpl:: |
| copyRenderTargetToPLSOffscreenColorTexture( |
| VkCommandBuffer commandBuffer, |
| RenderTargetVulkan* renderTarget, |
| const IAABB& copyBounds, |
| const vkutil::ImageAccess& dstAccessAfterCopy) |
| { |
| m_vk->blitSubRect(commandBuffer, |
| renderTarget->accessTargetImage( |
| commandBuffer, |
| { |
| .pipelineStages = VK_PIPELINE_STAGE_TRANSFER_BIT, |
| .accessMask = VK_ACCESS_TRANSFER_READ_BIT, |
| .layout = VK_IMAGE_LAYOUT_GENERAL, |
| }), |
| VK_IMAGE_LAYOUT_GENERAL, |
| accessPLSOffscreenColorTexture( |
| commandBuffer, |
| { |
| .pipelineStages = VK_PIPELINE_STAGE_TRANSFER_BIT, |
| .accessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .layout = VK_IMAGE_LAYOUT_GENERAL, |
| }, |
| vkutil::ImageAccessAction::invalidateContents) |
| ->vkImage(), |
| VK_IMAGE_LAYOUT_GENERAL, |
| copyBounds); |
| |
| return accessPLSOffscreenColorTexture(commandBuffer, dstAccessAfterCopy); |
| } |
| |
| bool RenderContextVulkanImpl::wantsManualRenderPassResolve( |
| gpu::InterlockMode interlockMode, |
| const RenderTarget* renderTarget, |
| const IAABB& renderTargetUpdateBounds, |
| gpu::DrawContents combinedDrawContents) const |
| { |
| if (interlockMode == gpu::InterlockMode::rasterOrdering && |
| !m_workarounds.needsInterruptibleRenderPasses()) |
| { |
| #ifndef __APPLE__ |
| // If the render target doesn't support input attachment usage, we will |
| // render to an offscreen texture that does. Add a resolve operation at |
| // the end of the render pass that transfers the offscreen data back to |
| // the main render target. On tilers, this saves the memory bandwidth of |
| // a fullscreen copy. |
| // NOTE: The manual resolve doesn't seem to work on MoltenVK, so don't |
| // do it on Apple. |
| auto renderTargetVulkan = |
| static_cast<const RenderTargetVulkan*>(renderTarget); |
| return !(renderTargetVulkan->targetUsageFlags() & |
| VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT); |
| #endif |
| } |
| if (interlockMode == gpu::InterlockMode::msaa && |
| !m_workarounds.avoidManualMSAAResolves) |
| { |
| if (!renderTargetUpdateBounds.contains(renderTarget->bounds())) |
| { |
| // Do manual resolves after partial updates because automatic |
| // resolves only support fullscreen. |
| // TODO: Identify when and if this is actually better than just |
| // taking the hit of an automatic fullscreen resolve. |
| return true; |
| } |
| if (m_workarounds.needsManualMSAAResolveAfterDstRead && |
| (combinedDrawContents & gpu::DrawContents::advancedBlend)) |
| { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| void RenderContextVulkanImpl::prepareToFlush(uint64_t nextFrameNumber, |
| uint64_t safeFrameNumber) |
| { |
| // These should all have gotten recycled at the end of the last frame. |
| assert(m_flushUniformBuffer == nullptr); |
| assert(m_imageDrawUniformBuffer == nullptr); |
| assert(m_pathBuffer == nullptr); |
| assert(m_paintBuffer == nullptr); |
| assert(m_paintAuxBuffer == nullptr); |
| assert(m_contourBuffer == nullptr); |
| assert(m_gradSpanBuffer == nullptr); |
| assert(m_tessSpanBuffer == nullptr); |
| assert(m_triangleBuffer == nullptr); |
| |
| // Advance the context frame and delete resources that are no longer |
| // referenced by in-flight command buffers. |
| m_vk->advanceFrameNumber(nextFrameNumber, safeFrameNumber); |
| |
| // Acquire buffers for the flush. |
| m_flushUniformBuffer = m_flushUniformBufferPool.acquire(); |
| m_imageDrawUniformBuffer = m_imageDrawUniformBufferPool.acquire(); |
| m_pathBuffer = m_pathBufferPool.acquire(); |
| m_paintBuffer = m_paintBufferPool.acquire(); |
| m_paintAuxBuffer = m_paintAuxBufferPool.acquire(); |
| m_contourBuffer = m_contourBufferPool.acquire(); |
| m_gradSpanBuffer = m_gradSpanBufferPool.acquire(); |
| m_tessSpanBuffer = m_tessSpanBufferPool.acquire(); |
| m_triangleBuffer = m_triangleBufferPool.acquire(); |
| } |
| |
| namespace descriptor_pool_limits |
| { |
| constexpr static uint32_t kMaxUniformUpdates = 3; |
| constexpr static uint32_t kMaxDynamicUniformUpdates = 1; |
| constexpr static uint32_t kMaxImageTextureUpdates = 256; |
| constexpr static uint32_t kMaxSampledImageUpdates = |
| 4 + kMaxImageTextureUpdates; // tess + grad + feather + atlas + images |
| constexpr static uint32_t kMaxStorageImageUpdates = |
| 4; // color/coverage/clip/scratch in clockwise mode. |
| constexpr static uint32_t kMaxStorageBufferUpdates = |
| 7 + // path/paint/uniform buffers |
| 1; // coverage buffer in clockwiseAtomic mode |
| constexpr static uint32_t kMaxDescriptorSets = 3 + kMaxImageTextureUpdates; |
| } // namespace descriptor_pool_limits |
| |
| RenderContextVulkanImpl::DescriptorSetPool::DescriptorSetPool( |
| rcp<VulkanContext> vulkanContext) : |
| vkutil::Resource(std::move(vulkanContext)) |
| { |
| VkDescriptorPoolSize descriptorPoolSizes[] = { |
| { |
| .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, |
| .descriptorCount = descriptor_pool_limits::kMaxUniformUpdates, |
| }, |
| { |
| .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, |
| .descriptorCount = |
| descriptor_pool_limits::kMaxDynamicUniformUpdates, |
| }, |
| { |
| .type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, |
| .descriptorCount = descriptor_pool_limits::kMaxSampledImageUpdates, |
| }, |
| { |
| .type = VK_DESCRIPTOR_TYPE_SAMPLER, |
| .descriptorCount = descriptor_pool_limits::kMaxImageTextureUpdates, |
| }, |
| { |
| .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, |
| .descriptorCount = descriptor_pool_limits::kMaxStorageImageUpdates, |
| }, |
| { |
| .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| .descriptorCount = descriptor_pool_limits::kMaxStorageBufferUpdates, |
| }, |
| { |
| .type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, |
| .descriptorCount = 4, |
| }, |
| }; |
| |
| VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = { |
| .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, |
| .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, |
| .maxSets = descriptor_pool_limits::kMaxDescriptorSets, |
| .poolSizeCount = std::size(descriptorPoolSizes), |
| .pPoolSizes = descriptorPoolSizes, |
| }; |
| |
| VK_CHECK(vk()->CreateDescriptorPool(vk()->device, |
| &descriptorPoolCreateInfo, |
| nullptr, |
| &m_vkDescriptorPool)); |
| } |
| |
| RenderContextVulkanImpl::DescriptorSetPool::~DescriptorSetPool() |
| { |
| vk()->DestroyDescriptorPool(vk()->device, m_vkDescriptorPool, nullptr); |
| } |
| |
| VkDescriptorSet RenderContextVulkanImpl::DescriptorSetPool:: |
| allocateDescriptorSet(VkDescriptorSetLayout layout) |
| { |
| VkDescriptorSetAllocateInfo descriptorSetAllocateInfo = { |
| .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, |
| .descriptorPool = m_vkDescriptorPool, |
| .descriptorSetCount = 1, |
| .pSetLayouts = &layout, |
| }; |
| |
| VkDescriptorSet descriptorSet; |
| VK_CHECK(vk()->AllocateDescriptorSets(vk()->device, |
| &descriptorSetAllocateInfo, |
| &descriptorSet)); |
| |
| return descriptorSet; |
| } |
| |
| void RenderContextVulkanImpl::DescriptorSetPool::reset() |
| { |
| vk()->ResetDescriptorPool(vk()->device, m_vkDescriptorPool, 0); |
| } |
| |
| rcp<RenderContextVulkanImpl::DescriptorSetPool> RenderContextVulkanImpl:: |
| DescriptorSetPoolPool::acquire() |
| { |
| auto descriptorSetPool = |
| static_rcp_cast<DescriptorSetPool>(GPUResourcePool::acquire()); |
| if (descriptorSetPool == nullptr) |
| { |
| descriptorSetPool = make_rcp<DescriptorSetPool>( |
| static_rcp_cast<VulkanContext>(m_manager)); |
| } |
| else |
| { |
| descriptorSetPool->reset(); |
| } |
| return descriptorSetPool; |
| } |
| |
| const DrawPipelineLayoutVulkan& RenderContextVulkanImpl::beginDrawRenderPass( |
| const FlushDescriptor& desc, |
| RenderPassOptionsVulkan renderPassOptions, |
| const IAABB& drawBounds, |
| VkImageView colorImageView, |
| VkImageView msaaColorSeedImageView, |
| VkImageView msaaResolveImageView) |
| { |
| const auto commandBuffer = |
| reinterpret_cast<VkCommandBuffer>(desc.externalCommandBuffer); |
| auto* const renderTarget = |
| static_cast<RenderTargetVulkan*>(desc.renderTarget); |
| |
| RenderPassVulkan& renderPass = m_pipelineManager->getRenderPassSynchronous( |
| desc.interlockMode, |
| renderPassOptions, |
| renderTarget->framebufferFormat(), |
| desc.colorLoadAction); |
| |
| const DrawPipelineLayoutVulkan& pipelineLayout = |
| *renderPass.drawPipelineLayout(); |
| |
| // Create the framebuffer. |
| StackVector<VkImageView, layout::MAX_RENDER_PASS_ATTACHMENTS> |
| framebufferViews; |
| StackVector<VkClearValue, layout::MAX_RENDER_PASS_ATTACHMENTS> clearValues; |
| if (m_pipelineManager->plsBackingType(desc.interlockMode) == |
| PLSBackingType::inputAttachment || |
| (pipelineLayout.renderPassOptions() & |
| RenderPassOptionsVulkan::fixedFunctionColorOutput)) |
| { |
| assert(framebufferViews.size() == COLOR_PLANE_IDX); |
| framebufferViews.push_back(colorImageView); |
| clearValues.push_back( |
| {.color = vkutil::color_clear_rgba32f(desc.colorClearValue)}); |
| } |
| if (desc.interlockMode == gpu::InterlockMode::rasterOrdering) |
| { |
| assert(framebufferViews.size() == CLIP_PLANE_IDX); |
| framebufferViews.push_back(*plsTransientClipView()); |
| clearValues.push_back({}); |
| |
| assert(framebufferViews.size() == SCRATCH_COLOR_PLANE_IDX); |
| framebufferViews.push_back( |
| plsTransientScratchColorTexture()->vkImageView()); |
| clearValues.push_back({}); |
| |
| assert(framebufferViews.size() == COVERAGE_PLANE_IDX); |
| framebufferViews.push_back(*plsTransientCoverageView()); |
| clearValues.push_back( |
| {.color = vkutil::color_clear_r32ui(desc.coverageClearValue)}); |
| |
| if (renderPassOptions & RenderPassOptionsVulkan::manuallyResolved) |
| { |
| // The render pass will transfer the color data back into the |
| // renderTarget at the end. |
| assert(framebufferViews.size() == PLS_PLANE_COUNT); |
| framebufferViews.push_back(renderTarget->accessTargetImageView( |
| commandBuffer, |
| { |
| .pipelineStages = |
| VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| .accessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, |
| .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, |
| }, |
| vkutil::ImageAccessAction::invalidateContents)); |
| clearValues.push_back({}); |
| } |
| } |
| else if (desc.interlockMode == gpu::InterlockMode::atomics) |
| { |
| assert(framebufferViews.size() == CLIP_PLANE_IDX); |
| framebufferViews.push_back( |
| plsTransientScratchColorTexture()->vkImageView()); |
| clearValues.push_back({}); |
| |
| if (pipelineLayout.renderPassOptions() & |
| RenderPassOptionsVulkan::atomicCoalescedResolveAndTransfer) |
| { |
| assert(framebufferViews.size() == COALESCED_ATOMIC_RESOLVE_IDX); |
| framebufferViews.push_back(renderTarget->accessTargetImageView( |
| commandBuffer, |
| { |
| .pipelineStages = |
| VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| .accessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, |
| .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, |
| }, |
| drawBounds.contains( |
| IAABB{0, |
| 0, |
| static_cast<int32_t>(renderTarget->width()), |
| static_cast<int32_t>(renderTarget->height())}) |
| ? vkutil::ImageAccessAction::invalidateContents |
| : vkutil::ImageAccessAction::preserveContents)); |
| clearValues.push_back({}); |
| } |
| } |
| else if (desc.interlockMode == gpu::InterlockMode::msaa) |
| { |
| assert(framebufferViews.size() == MSAA_DEPTH_STENCIL_IDX); |
| framebufferViews.push_back( |
| renderTarget->msaaDepthStencilTexture()->vkImageView()); |
| clearValues.push_back( |
| {.depthStencil = {desc.depthClearValue, desc.stencilClearValue}}); |
| |
| assert(framebufferViews.size() == MSAA_RESOLVE_IDX); |
| framebufferViews.push_back(msaaResolveImageView); |
| clearValues.push_back({}); |
| |
| if (pipelineLayout.renderPassOptions() & |
| RenderPassOptionsVulkan::msaaSeedFromOffscreenTexture) |
| { |
| assert(desc.colorLoadAction == |
| gpu::LoadAction::preserveRenderTarget); |
| assert(msaaColorSeedImageView != VK_NULL_HANDLE); |
| assert(msaaColorSeedImageView != msaaResolveImageView); |
| assert(framebufferViews.size() == MSAA_COLOR_SEED_IDX); |
| framebufferViews.push_back(msaaColorSeedImageView); |
| clearValues.push_back({}); |
| } |
| } |
| |
| rcp<vkutil::Framebuffer> framebuffer = m_vk->makeFramebuffer({ |
| .renderPass = renderPass, |
| .attachmentCount = framebufferViews.size(), |
| .pAttachments = framebufferViews.data(), |
| .width = static_cast<uint32_t>(renderTarget->width()), |
| .height = static_cast<uint32_t>(renderTarget->height()), |
| .layers = 1, |
| }); |
| |
| VkRect2D renderArea = { |
| .offset = {drawBounds.left, drawBounds.top}, |
| .extent = {static_cast<uint32_t>(drawBounds.width()), |
| static_cast<uint32_t>(drawBounds.height())}, |
| }; |
| |
| assert(clearValues.size() == framebufferViews.size()); |
| VkRenderPassBeginInfo renderPassBeginInfo = { |
| .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, |
| .renderPass = renderPass, |
| .framebuffer = *framebuffer, |
| .renderArea = renderArea, |
| .clearValueCount = clearValues.size(), |
| .pClearValues = clearValues.data(), |
| }; |
| |
| m_vk->CmdBeginRenderPass(commandBuffer, |
| &renderPassBeginInfo, |
| VK_SUBPASS_CONTENTS_INLINE); |
| |
| m_vk->CmdSetViewport( |
| commandBuffer, |
| 0, |
| 1, |
| vkutil::ViewportFromRect2D( |
| {.extent = {renderTarget->width(), renderTarget->height()}})); |
| |
| m_vk->CmdSetScissor(commandBuffer, 0, 1, &renderArea); |
| |
| return pipelineLayout; |
| } |
| |
| void RenderContextVulkanImpl::flush(const FlushDescriptor& desc) |
| { |
| constexpr static VkDeviceSize ZERO_OFFSET[1] = {0}; |
| constexpr static uint32_t ZERO_OFFSET_32[1] = {0}; |
| |
| auto* const renderTarget = |
| static_cast<RenderTargetVulkan*>(desc.renderTarget); |
| |
| IAABB drawBounds = desc.renderTargetUpdateBounds; |
| if (drawBounds.empty()) |
| { |
| return; |
| } |
| |
| auto renderPassOptions = RenderPassOptionsVulkan::none; |
| if (desc.fixedFunctionColorOutput) |
| { |
| // In the case of Vulkan, fixedFunctionColorOutput means the color |
| // buffer will never be bound as an input attachment. |
| renderPassOptions |= RenderPassOptionsVulkan::fixedFunctionColorOutput; |
| } |
| if (desc.manuallyResolved) |
| { |
| // The drawList ends with a batch of type of type |
| // DrawType::renderPassResolve, and the render pass needs to be set up |
| // to handle manual resolving. |
| renderPassOptions |= RenderPassOptionsVulkan::manuallyResolved; |
| } |
| else if (desc.interlockMode == gpu::InterlockMode::msaa) |
| { |
| // Vulkan does not support partial MSAA resolves when using resolve |
| // attachments. |
| drawBounds = renderTarget->bounds(); |
| } |
| // Vulkan builtin MSAA resolves don't support partial drawBounds. |
| assert(desc.interlockMode != gpu::InterlockMode::msaa || |
| desc.manuallyResolved || drawBounds == renderTarget->bounds()); |
| |
| const auto commandBuffer = |
| reinterpret_cast<VkCommandBuffer>(desc.externalCommandBuffer); |
| rcp<DescriptorSetPool> descriptorSetPool = |
| m_descriptorSetPoolPool->acquire(); |
| |
| m_featherTexture->prepareForVertexOrFragmentShaderRead(commandBuffer); |
| m_nullImageTexture->prepareForFragmentShaderRead(commandBuffer); |
| |
| uint32_t pendingTessPatchCount = 0; |
| for (const DrawBatch& batch : *desc.drawList) |
| { |
| // Apply pending image texture updates. |
| if (auto imageTextureVulkan = |
| static_cast<vkutil::Texture2D*>(batch.imageTexture)) |
| { |
| imageTextureVulkan->prepareForFragmentShaderRead(commandBuffer); |
| } |
| |
| // Count up the complexity in tessellation patches of this flush. |
| switch (batch.drawType) |
| { |
| case DrawType::midpointFanPatches: |
| case DrawType::midpointFanCenterAAPatches: |
| case DrawType::outerCurvePatches: |
| case DrawType::msaaOuterCubics: |
| case DrawType::msaaStrokes: |
| case DrawType::msaaMidpointFanBorrowedCoverage: |
| case DrawType::msaaMidpointFans: |
| case DrawType::msaaMidpointFanStencilReset: |
| case DrawType::msaaMidpointFanPathsStencil: |
| case DrawType::msaaMidpointFanPathsCover: |
| pendingTessPatchCount += batch.elementCount; |
| break; |
| case DrawType::msaaStencilClipReset: |
| case DrawType::interiorTriangulation: |
| case DrawType::atlasBlit: |
| case DrawType::imageRect: |
| case DrawType::imageMesh: |
| case DrawType::renderPassInitialize: |
| case DrawType::renderPassResolve: |
| break; |
| } |
| } |
| if (desc.interlockMode == gpu::InterlockMode::rasterOrdering && |
| pendingTessPatchCount > m_workarounds.maxInstancesPerRenderPass) |
| { |
| // Some early Android tilers are known to crash when a render pass is |
| // too complex. Make this render pass interruptible so we can break it |
| // up and avoid the crash. |
| assert(!(m_plsTransientUsageFlags & |
| VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT)); |
| // Manually resolved render passes aren't currently compatible with |
| // interruptions. |
| assert(!desc.manuallyResolved); |
| renderPassOptions |= |
| RenderPassOptionsVulkan::rasterOrderingInterruptible; |
| } |
| |
| // Create a per-flush descriptor set. |
| VkDescriptorSet perFlushDescriptorSet = |
| descriptorSetPool->allocateDescriptorSet( |
| m_pipelineManager->perFlushDescriptorSetLayout()); |
| |
| m_vk->updateBufferDescriptorSets( |
| perFlushDescriptorSet, |
| { |
| .dstBinding = FLUSH_UNIFORM_BUFFER_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, |
| }, |
| {{ |
| .buffer = *m_flushUniformBuffer, |
| .offset = desc.flushUniformDataOffsetInBytes, |
| .range = sizeof(gpu::FlushUniforms), |
| }}); |
| |
| m_vk->updateBufferDescriptorSets( |
| perFlushDescriptorSet, |
| { |
| .dstBinding = IMAGE_DRAW_UNIFORM_BUFFER_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, |
| }, |
| {{ |
| .buffer = *m_imageDrawUniformBuffer, |
| .offset = 0, |
| .range = sizeof(gpu::ImageDrawUniforms), |
| }}); |
| |
| m_vk->updateBufferDescriptorSets( |
| perFlushDescriptorSet, |
| { |
| .dstBinding = PATH_BUFFER_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| }, |
| {{ |
| .buffer = *m_pathBuffer, |
| .offset = desc.firstPath * sizeof(gpu::PathData), |
| .range = VK_WHOLE_SIZE, |
| }}); |
| |
| m_vk->updateBufferDescriptorSets( |
| perFlushDescriptorSet, |
| { |
| .dstBinding = PAINT_BUFFER_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| }, |
| {{ |
| .buffer = *m_paintBuffer, |
| .offset = desc.firstPaint * sizeof(gpu::PaintData), |
| .range = VK_WHOLE_SIZE, |
| }}); |
| |
| // NOTE: This technically could be part of the above call (passing two |
| // buffers instead of one to set them both at once), but there is a bug on |
| // some Adreno devices where the second one does not get applied properly |
| // (all reads from PaintAuxBuffer end up reading 0s), so instead we'll do it |
| // as a separate call. |
| m_vk->updateBufferDescriptorSets( |
| perFlushDescriptorSet, |
| { |
| .dstBinding = PAINT_AUX_BUFFER_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| }, |
| {{ |
| .buffer = *m_paintAuxBuffer, |
| .offset = desc.firstPaintAux * sizeof(gpu::PaintAuxData), |
| .range = VK_WHOLE_SIZE, |
| }}); |
| static_assert(PAINT_AUX_BUFFER_IDX == PAINT_BUFFER_IDX + 1); |
| |
| m_vk->updateBufferDescriptorSets( |
| perFlushDescriptorSet, |
| { |
| .dstBinding = CONTOUR_BUFFER_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| }, |
| {{ |
| .buffer = *m_contourBuffer, |
| .offset = desc.firstContour * sizeof(gpu::ContourData), |
| .range = VK_WHOLE_SIZE, |
| }}); |
| |
| if (desc.interlockMode == gpu::InterlockMode::clockwiseAtomic && |
| m_coverageBuffer != nullptr) |
| { |
| m_vk->updateBufferDescriptorSets( |
| perFlushDescriptorSet, |
| { |
| .dstBinding = COVERAGE_BUFFER_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| }, |
| {{ |
| .buffer = *m_coverageBuffer, |
| .offset = 0, |
| .range = VK_WHOLE_SIZE, |
| }}); |
| } |
| |
| m_vk->updateImageDescriptorSets( |
| perFlushDescriptorSet, |
| { |
| .dstBinding = TESS_VERTEX_TEXTURE_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, |
| }, |
| {{ |
| .imageView = m_tessTexture->vkImageView(), |
| .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| }}); |
| |
| m_vk->updateImageDescriptorSets( |
| perFlushDescriptorSet, |
| { |
| .dstBinding = GRAD_TEXTURE_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, |
| }, |
| {{ |
| .imageView = m_gradTexture->vkImageView(), |
| .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| }}); |
| |
| m_vk->updateImageDescriptorSets( |
| perFlushDescriptorSet, |
| { |
| .dstBinding = FEATHER_TEXTURE_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, |
| }, |
| {{ |
| .imageView = m_featherTexture->vkImageView(), |
| .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| }}); |
| |
| m_vk->updateImageDescriptorSets( |
| perFlushDescriptorSet, |
| { |
| .dstBinding = ATLAS_TEXTURE_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, |
| }, |
| {{ |
| .imageView = m_atlasTexture->vkImageView(), |
| .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| }}); |
| |
| // Render the complex color ramps to the gradient texture. |
| if (desc.gradSpanCount > 0) |
| { |
| VkRect2D renderArea = { |
| .extent = {gpu::kGradTextureWidth, desc.gradDataHeight}, |
| }; |
| |
| m_colorRampPipeline->beginRenderPass(commandBuffer, |
| renderArea, |
| *m_gradTextureFramebuffer); |
| |
| m_vk->CmdSetViewport(commandBuffer, |
| 0, |
| 1, |
| vkutil::ViewportFromRect2D(renderArea)); |
| |
| m_vk->CmdSetScissor(commandBuffer, 0, 1, &renderArea); |
| |
| VkBuffer gradSpanBuffer = *m_gradSpanBuffer; |
| VkDeviceSize gradSpanOffset = |
| desc.firstGradSpan * sizeof(gpu::GradientSpan); |
| m_vk->CmdBindVertexBuffers(commandBuffer, |
| 0, |
| 1, |
| &gradSpanBuffer, |
| &gradSpanOffset); |
| |
| m_vk->CmdBindDescriptorSets(commandBuffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| m_colorRampPipeline->pipelineLayout(), |
| PER_FLUSH_BINDINGS_SET, |
| 1, |
| &perFlushDescriptorSet, |
| 1, |
| ZERO_OFFSET_32); |
| |
| m_vk->CmdBindPipeline(commandBuffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| m_colorRampPipeline->renderPipeline()); |
| |
| for (auto [chunkInstanceCount, chunkFirstInstance] : |
| InstanceChunker(desc.gradSpanCount, |
| 0, |
| m_workarounds.maxInstancesPerRenderPass)) |
| |
| { |
| m_colorRampPipeline->interruptRenderPassIfNeeded( |
| commandBuffer, |
| renderArea, |
| *m_gradTextureFramebuffer, |
| chunkInstanceCount, |
| m_workarounds); |
| m_vk->CmdDraw(commandBuffer, |
| gpu::GRAD_SPAN_TRI_STRIP_VERTEX_COUNT, |
| chunkInstanceCount, |
| 0, |
| chunkFirstInstance); |
| } |
| |
| m_vk->CmdEndRenderPass(commandBuffer); |
| |
| // The render pass transitioned the gradient texture to |
| // VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL. |
| m_gradTexture->lastAccess().layout = |
| VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; |
| } |
| else |
| { |
| // The above render pass has the barriers in it, but if it was not run, |
| // we still are going to bind it as READ_ONLY_OPTIMAL so need to |
| // transition it |
| // TODO: Perhaps we should have a "null" texture that we can bind for |
| // cases like this where we need to bind all the textures but know it's |
| // not needed |
| m_gradTexture->barrier( |
| commandBuffer, |
| { |
| .pipelineStages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, |
| .accessMask = VK_ACCESS_SHADER_READ_BIT, |
| .layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| }); |
| } |
| VkDescriptorSet immutableSamplerDescriptorSet = |
| m_pipelineManager->immutableSamplerDescriptorSet(); |
| |
| // Tessellate all curves into vertices in the tessellation texture. |
| if (desc.tessVertexSpanCount > 0) |
| { |
| // Don't render new vertices until the previous flush has finished using |
| // the tessellation texture. |
| // TODO: What this barrier does is also part of the tessellation |
| // renderpass, and should be handled automatically, but on early PowerVR |
| // devices (Reno 3 Plus, Vivo Y21) tesselation is still incorrect |
| // without this explicit barrier. Figure out why. |
| m_tessTexture->barrier( |
| commandBuffer, |
| { |
| .pipelineStages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| .accessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, |
| .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, |
| }, |
| vkutil::ImageAccessAction::invalidateContents); |
| |
| const VkRect2D tessellateArea = { |
| .extent = {gpu::kTessTextureWidth, desc.tessDataHeight}, |
| }; |
| |
| m_tessellatePipeline->beginRenderPass(commandBuffer, |
| tessellateArea, |
| *m_tessTextureFramebuffer); |
| |
| m_vk->CmdSetViewport(commandBuffer, |
| 0, |
| 1, |
| vkutil::ViewportFromRect2D(tessellateArea)); |
| |
| m_vk->CmdSetScissor(commandBuffer, 0, 1, &tessellateArea); |
| |
| VkBuffer tessBuffer = *m_tessSpanBuffer; |
| VkDeviceSize tessOffset = |
| desc.firstTessVertexSpan * sizeof(gpu::TessVertexSpan); |
| m_vk->CmdBindVertexBuffers(commandBuffer, |
| 0, |
| 1, |
| &tessBuffer, |
| &tessOffset); |
| |
| m_vk->CmdBindIndexBuffer(commandBuffer, |
| *m_tessSpanIndexBuffer, |
| 0, |
| VK_INDEX_TYPE_UINT16); |
| |
| m_vk->CmdBindDescriptorSets(commandBuffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| m_tessellatePipeline->pipelineLayout(), |
| PER_FLUSH_BINDINGS_SET, |
| 1, |
| &perFlushDescriptorSet, |
| 1, |
| ZERO_OFFSET_32); |
| m_vk->CmdBindDescriptorSets(commandBuffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| m_tessellatePipeline->pipelineLayout(), |
| IMMUTABLE_SAMPLER_BINDINGS_SET, |
| 1, |
| &immutableSamplerDescriptorSet, |
| 0, |
| nullptr); |
| |
| m_vk->CmdBindPipeline(commandBuffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| m_tessellatePipeline->renderPipeline()); |
| |
| for (auto [chunkInstanceCount, chunkFirstInstance] : |
| InstanceChunker(desc.tessVertexSpanCount, |
| 0, |
| m_workarounds.maxInstancesPerRenderPass)) |
| |
| { |
| m_tessellatePipeline->interruptRenderPassIfNeeded( |
| commandBuffer, |
| tessellateArea, |
| *m_tessTextureFramebuffer, |
| chunkInstanceCount, |
| m_workarounds); |
| m_vk->CmdDrawIndexed(commandBuffer, |
| std::size(gpu::kTessSpanIndices), |
| chunkInstanceCount, |
| 0, |
| 0, |
| chunkFirstInstance); |
| } |
| |
| m_vk->CmdEndRenderPass(commandBuffer); |
| |
| // The render pass transitioned the tessellation texture to |
| // VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL. |
| m_tessTexture->lastAccess().layout = |
| VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; |
| |
| if (m_tesselationSyncIssueWorkaroundTexture != nullptr) |
| { |
| // On the Adreno 8xx series drivers we've encountered, there is some |
| // sort of synchronization issue with the tesselation texture that |
| // causes the barriers to not work, and it ends up being corrupted. |
| // However, if we first just blit it to an offscreen texture (just a |
| // 1x1 texture), the render corruption goes away. |
| m_tessTexture->barrier( |
| commandBuffer, |
| { |
| .pipelineStages = VK_PIPELINE_STAGE_TRANSFER_BIT, |
| .accessMask = VK_ACCESS_TRANSFER_READ_BIT, |
| .layout = VK_IMAGE_LAYOUT_GENERAL, |
| }); |
| |
| // We need this transition but really only one time (if we haven't |
| // done so already) |
| if (m_tesselationSyncIssueWorkaroundTexture->lastAccess().layout != |
| VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) |
| { |
| m_tesselationSyncIssueWorkaroundTexture->barrier( |
| commandBuffer, |
| { |
| .pipelineStages = VK_PIPELINE_STAGE_TRANSFER_BIT, |
| .accessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| }, |
| vkutil::ImageAccessAction::invalidateContents); |
| } |
| |
| m_vk->blitSubRect( |
| commandBuffer, |
| m_tessTexture->vkImage(), |
| VK_IMAGE_LAYOUT_GENERAL, |
| m_tesselationSyncIssueWorkaroundTexture->vkImage(), |
| VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| IAABB::MakeWH( |
| m_tesselationSyncIssueWorkaroundTexture->width(), |
| m_tesselationSyncIssueWorkaroundTexture->height())); |
| |
| // NOTE: Technically there should be a barrier after this blit to |
| // prevent a write-after-write hazard. However, we don't use this |
| // texture at all and thus don't care if we overwrite it, so there |
| // is intentionally no barrier here...but you will get a failure on |
| // this texture if you enable synchronization validation on a device |
| // with the workaround enabled. |
| } |
| } |
| |
| // Ensure the tessellation texture has finished rendering before the path |
| // vertex shaders read it. |
| // TODO: Similar to the barrier on the way into rendering the tesselation |
| // texture, this barrier should already be covered by the tesselation |
| // renderpass but fails on early PowerVR devices. |
| m_tessTexture->barrier( |
| commandBuffer, |
| { |
| .pipelineStages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, |
| .accessMask = VK_ACCESS_SHADER_READ_BIT, |
| .layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| }); |
| |
| // Render the atlas if we have any offscreen feathers. |
| if ((desc.atlasFillBatchCount | desc.atlasStrokeBatchCount) != 0) |
| { |
| VkRect2D renderArea = { |
| .extent = {desc.atlasContentWidth, desc.atlasContentHeight}, |
| }; |
| |
| // Begin the render pass before binding buffers or updating descriptor |
| // sets. It's valid Vulkan to do these tasks in any order, but Adreno |
| // 730, 740, and 840 appreciate it when we begin the render pass first. |
| m_atlasPipeline->beginRenderPass(commandBuffer, |
| renderArea, |
| *m_atlasFramebuffer); |
| |
| m_vk->CmdSetViewport(commandBuffer, |
| 0, |
| 1, |
| vkutil::ViewportFromRect2D(renderArea)); |
| |
| m_vk->CmdBindVertexBuffers(commandBuffer, |
| 0, |
| 1, |
| m_pathPatchVertexBuffer->vkBufferAddressOf(), |
| ZERO_OFFSET); |
| m_vk->CmdBindIndexBuffer(commandBuffer, |
| *m_pathPatchIndexBuffer, |
| 0, |
| VK_INDEX_TYPE_UINT16); |
| m_vk->CmdBindDescriptorSets(commandBuffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| m_atlasPipeline->pipelineLayout(), |
| PER_FLUSH_BINDINGS_SET, |
| 1, |
| &perFlushDescriptorSet, |
| 1, |
| ZERO_OFFSET_32); |
| m_vk->CmdBindDescriptorSets(commandBuffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| m_atlasPipeline->pipelineLayout(), |
| IMMUTABLE_SAMPLER_BINDINGS_SET, |
| 1, |
| &immutableSamplerDescriptorSet, |
| 0, |
| nullptr); |
| |
| if (desc.atlasFillBatchCount != 0) |
| { |
| m_vk->CmdBindPipeline(commandBuffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| m_atlasPipeline->fillPipeline()); |
| for (size_t i = 0; i < desc.atlasFillBatchCount; ++i) |
| { |
| const gpu::AtlasDrawBatch& fillBatch = desc.atlasFillBatches[i]; |
| VkRect2D scissor = { |
| .offset = {fillBatch.scissor.left, fillBatch.scissor.top}, |
| .extent = {fillBatch.scissor.width(), |
| fillBatch.scissor.height()}, |
| }; |
| m_vk->CmdSetScissor(commandBuffer, 0, 1, &scissor); |
| for (auto [chunkPatchCount, chunkFirstPatch] : |
| InstanceChunker(fillBatch.patchCount, |
| fillBatch.basePatch, |
| m_workarounds.maxInstancesPerRenderPass)) |
| |
| { |
| m_atlasPipeline->interruptRenderPassIfNeeded( |
| commandBuffer, |
| renderArea, |
| *m_atlasFramebuffer, |
| chunkPatchCount, |
| m_workarounds); |
| m_vk->CmdDrawIndexed( |
| commandBuffer, |
| gpu::kMidpointFanCenterAAPatchIndexCount, |
| chunkPatchCount, |
| gpu::kMidpointFanCenterAAPatchBaseIndex, |
| 0, |
| chunkFirstPatch); |
| } |
| } |
| } |
| |
| if (desc.atlasStrokeBatchCount != 0) |
| { |
| m_vk->CmdBindPipeline(commandBuffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| m_atlasPipeline->strokePipeline()); |
| for (size_t i = 0; i < desc.atlasStrokeBatchCount; ++i) |
| { |
| const gpu::AtlasDrawBatch& strokeBatch = |
| desc.atlasStrokeBatches[i]; |
| VkRect2D scissor = { |
| .offset = {strokeBatch.scissor.left, |
| strokeBatch.scissor.top}, |
| .extent = {strokeBatch.scissor.width(), |
| strokeBatch.scissor.height()}, |
| }; |
| m_vk->CmdSetScissor(commandBuffer, 0, 1, &scissor); |
| for (auto [chunkPatchCount, chunkFirstPatch] : |
| InstanceChunker(strokeBatch.patchCount, |
| strokeBatch.basePatch, |
| m_workarounds.maxInstancesPerRenderPass)) |
| |
| { |
| m_atlasPipeline->interruptRenderPassIfNeeded( |
| commandBuffer, |
| renderArea, |
| *m_atlasFramebuffer, |
| chunkPatchCount, |
| m_workarounds); |
| m_vk->CmdDrawIndexed(commandBuffer, |
| gpu::kMidpointFanPatchBorderIndexCount, |
| chunkPatchCount, |
| gpu::kMidpointFanPatchBaseIndex, |
| 0, |
| chunkFirstPatch); |
| } |
| } |
| } |
| |
| m_vk->CmdEndRenderPass(commandBuffer); |
| |
| // The render pass transitioned the atlas texture to |
| // VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL. |
| m_atlasTexture->lastAccess().layout = |
| VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; |
| } |
| |
| // Ensures any previous accesses to a color attachment complete before we |
| // begin rendering. |
| const vkutil::ImageAccess colorLoadAccess = { |
| // "Load" operations always occur in |
| // VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT. |
| .pipelineStages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| .accessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, |
| .layout = (renderPassOptions & |
| RenderPassOptionsVulkan::fixedFunctionColorOutput) |
| ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL |
| : VK_IMAGE_LAYOUT_GENERAL, |
| }; |
| |
| const bool renderAreaIsFullTarget = drawBounds.contains( |
| IAABB{0, |
| 0, |
| static_cast<int32_t>(renderTarget->width()), |
| static_cast<int32_t>(renderTarget->height())}); |
| |
| const vkutil::ImageAccessAction targetAccessAction = |
| renderAreaIsFullTarget && |
| desc.colorLoadAction != gpu::LoadAction::preserveRenderTarget |
| ? vkutil::ImageAccessAction::invalidateContents |
| : vkutil::ImageAccessAction::preserveContents; |
| |
| const PLSBackingType plsBackingType = |
| m_pipelineManager->plsBackingType(desc.interlockMode); |
| |
| VkImageView colorImageView = VK_NULL_HANDLE; |
| bool colorAttachmentIsOffscreen = false; |
| |
| VkImageView msaaResolveImageView = VK_NULL_HANDLE; |
| VkImageView msaaColorSeedImageView = VK_NULL_HANDLE; |
| |
| if (desc.interlockMode == gpu::InterlockMode::msaa) |
| { |
| colorImageView = renderTarget->msaaColorTexture()->vkImageView(); |
| |
| #if 0 |
| // TODO: Some early Qualcomm devices struggle when seeding from and |
| // resolving to the same texture, even if we implement the MSAA resolve |
| // manually. For now, always copy out the render target to a separate |
| // texture when there's a preserve, but we should investigate this |
| // further. |
| if (desc.colorLoadAction == gpu::LoadAction::preserveRenderTarget && |
| (renderTarget->targetUsageFlags() & |
| VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) |
| { |
| // We can seed from, and resolve to the the same texture. |
| msaaColorSeedImageView = msaaResolveImageView = |
| renderTarget->accessTargetImageView( |
| commandBuffer, |
| { |
| // Apply a barrier for reading from this texture as an |
| // input attachment. |
| // vkCmdNextSubpass() will handle the barrier between |
| // reading this texture and resolving to it. |
| .pipelineStages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, |
| .accessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT, |
| .layout = VK_IMAGE_LAYOUT_GENERAL, |
| }); |
| } |
| else |
| #endif |
| { |
| if (desc.colorLoadAction == gpu::LoadAction::preserveRenderTarget) |
| { |
| // We have to seed the MSAA attachment from a separate texture |
| // because the render target doesn't support being bound as an |
| // input attachment. |
| msaaColorSeedImageView = |
| renderTarget |
| ->copyTargetImageToOffscreenColorTexture( |
| commandBuffer, |
| { |
| .pipelineStages = |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, |
| .accessMask = |
| VK_ACCESS_INPUT_ATTACHMENT_READ_BIT, |
| .layout = VK_IMAGE_LAYOUT_GENERAL, |
| }, |
| drawBounds) |
| ->vkImageView(); |
| renderPassOptions |= |
| RenderPassOptionsVulkan::msaaSeedFromOffscreenTexture; |
| } |
| msaaResolveImageView = renderTarget->accessTargetImageView( |
| commandBuffer, |
| { |
| .pipelineStages = |
| VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| .accessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, |
| .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, |
| }, |
| renderAreaIsFullTarget |
| ? vkutil::ImageAccessAction::invalidateContents |
| : vkutil::ImageAccessAction::preserveContents); |
| } |
| } |
| else if ((renderPassOptions & |
| RenderPassOptionsVulkan::fixedFunctionColorOutput) || |
| ((desc.interlockMode == gpu::InterlockMode::rasterOrdering || |
| desc.interlockMode == gpu::InterlockMode::atomics) && |
| (renderTarget->targetUsageFlags() & |
| VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))) |
| { |
| // We can render directly to the render target. |
| colorImageView = |
| renderTarget->accessTargetImageView(commandBuffer, |
| colorLoadAccess, |
| targetAccessAction); |
| } |
| else if (plsBackingType == PLSBackingType::storageTexture) |
| { |
| constexpr static vkutil::ImageAccess PLS_STORAGE_TEXTURE_ACCESS = { |
| .pipelineStages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, |
| .accessMask = |
| VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, |
| .layout = VK_IMAGE_LAYOUT_GENERAL, |
| }; |
| |
| if ((renderTarget->targetUsageFlags() & VK_IMAGE_USAGE_STORAGE_BIT) && |
| renderTarget->framebufferFormat() == VK_FORMAT_R8G8B8A8_UNORM) |
| { |
| // We can bind the renderTarget as a storage texture directly to the |
| // color plane_. |
| if (desc.colorLoadAction == gpu::LoadAction::clear) |
| { |
| colorImageView = renderTarget->clearTargetImageView( |
| commandBuffer, |
| desc.colorClearValue, |
| PLS_STORAGE_TEXTURE_ACCESS); |
| } |
| else |
| { |
| colorImageView = renderTarget->accessTargetImageView( |
| commandBuffer, |
| PLS_STORAGE_TEXTURE_ACCESS, |
| targetAccessAction); |
| } |
| } |
| else |
| { |
| // We have to bind a separate texture to the color plane. |
| switch (desc.colorLoadAction) |
| { |
| case gpu::LoadAction::clear: |
| colorImageView = clearPLSOffscreenColorTexture( |
| commandBuffer, |
| desc.colorClearValue, |
| PLS_STORAGE_TEXTURE_ACCESS) |
| ->vkImageView(); |
| break; |
| case gpu::LoadAction::preserveRenderTarget: |
| // Preserve the target texture by copying its contents into |
| // our offscreen color texture. |
| colorImageView = copyRenderTargetToPLSOffscreenColorTexture( |
| commandBuffer, |
| renderTarget, |
| drawBounds, |
| PLS_STORAGE_TEXTURE_ACCESS) |
| ->vkImageView(); |
| break; |
| case gpu::LoadAction::dontCare: |
| colorImageView = |
| accessPLSOffscreenColorTexture( |
| commandBuffer, |
| PLS_STORAGE_TEXTURE_ACCESS, |
| vkutil::ImageAccessAction::invalidateContents) |
| ->vkImageView(); |
| break; |
| } |
| colorAttachmentIsOffscreen = true; |
| } |
| } |
| else |
| { |
| // The renderTarget doesn't support input attachments, so we have to |
| // attach a separate texture to the framebuffer for color. |
| if (desc.colorLoadAction == gpu::LoadAction::preserveRenderTarget) |
| { |
| // Preserve the target texture by copying its contents into our |
| // offscreen color texture. |
| colorImageView = |
| renderTarget |
| ->copyTargetImageToOffscreenColorTexture(commandBuffer, |
| colorLoadAccess, |
| drawBounds) |
| ->vkImageView(); |
| } |
| else |
| { |
| colorImageView = |
| renderTarget |
| ->accessOffscreenColorTexture( |
| commandBuffer, |
| colorLoadAccess, |
| vkutil::ImageAccessAction::invalidateContents) |
| ->vkImageView(); |
| } |
| if (desc.interlockMode == gpu::InterlockMode::atomics) |
| { |
| renderPassOptions |= |
| RenderPassOptionsVulkan::atomicCoalescedResolveAndTransfer; |
| } |
| colorAttachmentIsOffscreen = true; |
| } |
| |
| const bool usesStorageTextures = |
| plsBackingType == PLSBackingType::storageTexture || |
| desc.interlockMode == gpu::InterlockMode::atomics; |
| if (usesStorageTextures) |
| { |
| // Clear the PLS planes that are bound as storage textures. |
| const VkImage storageImageToClear = |
| (desc.interlockMode == gpu::InterlockMode::atomics) |
| ? m_plsAtomicCoverageTexture->vkImage() |
| : *plsTransientImageArray(); |
| |
| VkPipelineStageFlags srcStages = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; |
| VkPipelineStageFlags dstStages = VK_PIPELINE_STAGE_TRANSFER_BIT; |
| StackVector<VkImageMemoryBarrier, 2> barriers; |
| |
| // Don't clear the storageImageToClear until shaders in previous flushes |
| // have finished using it. |
| // NOTE: This currently only works becauses we never support PLS as |
| // storage texture (i.e., clockwise) and rasterOrdering at the same |
| // time. If that changes, we will need to consider that the |
| // plsTransientImageArray may have been bound previously as input |
| // attachments. |
| assert(plsBackingType == PLSBackingType::inputAttachment || |
| !m_platformFeatures.supportsRasterOrderingMode); |
| barriers.push_back({ |
| .srcAccessMask = |
| VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, |
| .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| .newLayout = VK_IMAGE_LAYOUT_GENERAL, |
| .image = storageImageToClear, |
| }); |
| |
| // The scratch color texture may also need a barrier. |
| // NOTE: atomic mode uses the scratch color texture for clipping because |
| // of its RGBA format. |
| const bool usesScratchColorTexture = |
| desc.interlockMode == gpu::InterlockMode::atomics || |
| !(renderPassOptions & |
| RenderPassOptionsVulkan::fixedFunctionColorOutput); |
| if (usesScratchColorTexture) |
| { |
| // Don't use the scratch color texture until shaders in previous |
| // render passes have finished using it. |
| // NOTE: The scratch texture may have been bound as an input |
| // attachment OR a storage texture. |
| const VkAccessFlags storageTextureAccess = |
| m_platformFeatures.supportsClockwiseMode |
| ? VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
| : 0; |
| srcStages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; |
| dstStages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; |
| barriers.push_back({ |
| .srcAccessMask = |
| VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | storageTextureAccess, |
| .dstAccessMask = |
| VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | storageTextureAccess, |
| .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| .newLayout = VK_IMAGE_LAYOUT_GENERAL, |
| .image = plsTransientScratchColorTexture()->vkImage(), |
| }); |
| } |
| |
| m_vk->imageMemoryBarriers(commandBuffer, |
| srcStages, |
| dstStages, |
| 0, |
| barriers.size(), |
| barriers.data()); |
| |
| // Clear the entire storageImageToClear, even if we aren't going to use |
| // the whole thing. There is a future world where we may want to |
| // consider a "renderPassInitialize" draw to clear only the |
| // renderTargetUpdateBounds, but in most cases, a full clear will almost |
| // definitely be faster due to hardware optimizations. |
| if (desc.interlockMode == gpu::InterlockMode::atomics) |
| { |
| const VkClearColorValue coverageClearValue = |
| vkutil::color_clear_r32ui(desc.coverageClearValue); |
| |
| const VkImageSubresourceRange clearRange = { |
| .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, |
| .levelCount = 1, |
| .layerCount = 1, |
| }; |
| |
| m_vk->CmdClearColorImage(commandBuffer, |
| m_plsAtomicCoverageTexture->vkImage(), |
| VK_IMAGE_LAYOUT_GENERAL, |
| &coverageClearValue, |
| 1, |
| &clearRange); |
| } |
| else |
| { |
| assert(desc.coverageClearValue == 0); |
| const VkClearColorValue zeroClearValue = {}; |
| |
| const VkImageSubresourceRange transientClearRange = { |
| .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, |
| .levelCount = 1, |
| .baseArrayLayer = 0, |
| .layerCount = (desc.combinedShaderFeatures & |
| gpu::ShaderFeatures::ENABLE_CLIPPING) |
| ? 2u // coverage and clip. |
| : 1u, // coverage only. |
| }; |
| |
| m_vk->CmdClearColorImage(commandBuffer, |
| *plsTransientImageArray(), |
| VK_IMAGE_LAYOUT_GENERAL, |
| &zeroClearValue, |
| 1, |
| &transientClearRange); |
| } |
| |
| // Don't use the storageImageToClear in shaders until the clear |
| // finishes. |
| m_vk->imageMemoryBarrier( |
| commandBuffer, |
| VK_PIPELINE_STAGE_TRANSFER_BIT, |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, |
| 0, |
| { |
| .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .dstAccessMask = |
| VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, |
| .oldLayout = VK_IMAGE_LAYOUT_GENERAL, |
| .newLayout = VK_IMAGE_LAYOUT_GENERAL, |
| .image = storageImageToClear, |
| }); |
| } |
| else |
| { |
| // Any color writes in prior frames should already be complete before |
| // the following load operations. NOTE: This currently only works |
| // because we never support PLS as storage texture (i.e. clockwise) and |
| // rasterOrdering at the same time. If that changes, we will need to |
| // consider that the plsTransientImageArray may have been bound |
| // previously as storage textures. |
| assert(desc.interlockMode != gpu::InterlockMode::rasterOrdering || |
| !m_platformFeatures.supportsClockwiseMode); |
| } |
| |
| if (desc.interlockMode == gpu::InterlockMode::clockwiseAtomic) |
| { |
| VkPipelineStageFlags lastCoverageBufferStage = |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; |
| VkAccessFlags lastCoverageBufferAccess = VK_ACCESS_SHADER_WRITE_BIT; |
| |
| if (desc.needsCoverageBufferClear) |
| { |
| assert(m_coverageBuffer != nullptr); |
| |
| // Don't clear the coverage buffer until shaders in the previous |
| // flush have finished accessing it. |
| m_vk->bufferMemoryBarrier( |
| commandBuffer, |
| lastCoverageBufferStage, |
| VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 0, |
| { |
| .srcAccessMask = lastCoverageBufferAccess, |
| .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .buffer = *m_coverageBuffer, |
| }); |
| |
| m_vk->CmdFillBuffer(commandBuffer, |
| *m_coverageBuffer, |
| 0, |
| m_coverageBuffer->info().size, |
| 0); |
| |
| lastCoverageBufferStage = VK_PIPELINE_STAGE_TRANSFER_BIT; |
| lastCoverageBufferAccess = VK_ACCESS_TRANSFER_WRITE_BIT; |
| } |
| |
| if (m_coverageBuffer != nullptr) |
| { |
| // Don't use the coverage buffer until prior clears/accesses |
| // have completed. |
| m_vk->bufferMemoryBarrier( |
| commandBuffer, |
| lastCoverageBufferStage, |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, |
| 0, |
| { |
| .srcAccessMask = lastCoverageBufferAccess, |
| .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, |
| .buffer = *m_coverageBuffer, |
| }); |
| } |
| } |
| |
| // Begin the render pass before binding buffers or updating descriptor sets. |
| // It's valid Vulkan to do these tasks in any order, but Adreno 730, 740, |
| // and 840 appreciate it when we begin the render pass first. |
| const DrawPipelineLayoutVulkan& pipelineLayout = |
| beginDrawRenderPass(desc, |
| renderPassOptions, |
| drawBounds, |
| colorImageView, |
| msaaColorSeedImageView, |
| msaaResolveImageView); |
| |
| // Some early Android tilers are known to crash when a render pass is too |
| // complex. This is a mechanism to interrupt and begin a new render pass on |
| // affected devices after a pre-set complexity is reached. |
| uint32_t patchCountInCurrentDrawPass = 0; |
| auto interruptDrawPassIfNeeded = [&](uint32_t nextTessPatchCount) { |
| assert(nextTessPatchCount <= m_workarounds.maxInstancesPerRenderPass); |
| if (desc.interlockMode == gpu::InterlockMode::rasterOrdering && |
| patchCountInCurrentDrawPass + nextTessPatchCount > |
| m_workarounds.maxInstancesPerRenderPass) |
| { |
| assert(renderPassOptions & |
| RenderPassOptionsVulkan::rasterOrderingInterruptible); |
| // Manually resolved render passes aren't currently compatible with |
| // interruptions. |
| assert(!(renderPassOptions & |
| RenderPassOptionsVulkan::manuallyResolved)); |
| m_vk->CmdEndRenderPass(commandBuffer); |
| |
| auto resumingDesc = desc; |
| resumingDesc.colorLoadAction = |
| gpu::LoadAction::preserveRenderTarget; |
| renderPassOptions |= RenderPassOptionsVulkan::rasterOrderingResume; |
| if (pendingTessPatchCount <= |
| m_workarounds.maxInstancesPerRenderPass) |
| { |
| renderPassOptions &= |
| ~RenderPassOptionsVulkan::rasterOrderingInterruptible; |
| } |
| const DrawPipelineLayoutVulkan& resumingLayout RIVE_MAYBE_UNUSED = |
| beginDrawRenderPass(resumingDesc, |
| renderPassOptions, |
| drawBounds, |
| colorImageView, |
| msaaColorSeedImageView, |
| msaaResolveImageView); |
| // We don't need to bind new pipelines, even though we changed the |
| // render pass, because Vulkan allows for pipelines to be used |
| // interchangeably with "compatible" render passes. |
| |
| // The renderPassOptions dealing with interrupting a render pass |
| // don't affect the layout. We count on the new render pass having |
| // the same VkPipelineLayout so we don't have to update any |
| // descriptor sets after the interruption. |
| assert(&resumingLayout == &pipelineLayout); |
| |
| patchCountInCurrentDrawPass = 0; |
| } |
| patchCountInCurrentDrawPass += nextTessPatchCount; |
| pendingTessPatchCount -= nextTessPatchCount; |
| }; |
| |
| // Update the PLS input attachment descriptor sets. |
| VkDescriptorSet inputAttachmentDescriptorSet = VK_NULL_HANDLE; |
| if (pipelineLayout.plsLayout() != VK_NULL_HANDLE) |
| { |
| const VkDescriptorType plsDescriptorType = |
| (plsBackingType == |
| PipelineManagerVulkan::PLSBackingType::storageTexture) |
| ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE |
| : VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; |
| inputAttachmentDescriptorSet = descriptorSetPool->allocateDescriptorSet( |
| pipelineLayout.plsLayout()); |
| |
| if (!(renderPassOptions & |
| RenderPassOptionsVulkan::fixedFunctionColorOutput)) |
| { |
| m_vk->updateImageDescriptorSets( |
| inputAttachmentDescriptorSet, |
| { |
| .dstBinding = COLOR_PLANE_IDX, |
| .descriptorType = plsDescriptorType, |
| }, |
| {{ |
| .imageView = colorImageView, |
| .imageLayout = VK_IMAGE_LAYOUT_GENERAL, |
| }}); |
| } |
| |
| if (desc.interlockMode == gpu::InterlockMode::rasterOrdering || |
| desc.interlockMode == gpu::InterlockMode::atomics || |
| desc.interlockMode == gpu::InterlockMode::clockwise) |
| { |
| m_vk->updateImageDescriptorSets( |
| inputAttachmentDescriptorSet, |
| { |
| .dstBinding = CLIP_PLANE_IDX, |
| .descriptorType = plsDescriptorType, |
| }, |
| {{ |
| .imageView = |
| (desc.interlockMode == gpu::InterlockMode::atomics) |
| ? plsTransientScratchColorTexture()->vkImageView() |
| : *plsTransientClipView(), |
| .imageLayout = VK_IMAGE_LAYOUT_GENERAL, |
| }}); |
| |
| if (desc.interlockMode == gpu::InterlockMode::rasterOrdering || |
| (desc.interlockMode == gpu::InterlockMode::clockwise && |
| !(renderPassOptions & |
| RenderPassOptionsVulkan::fixedFunctionColorOutput))) |
| { |
| m_vk->updateImageDescriptorSets( |
| inputAttachmentDescriptorSet, |
| { |
| .dstBinding = SCRATCH_COLOR_PLANE_IDX, |
| .descriptorType = plsDescriptorType, |
| }, |
| {{ |
| .imageView = |
| plsTransientScratchColorTexture()->vkImageView(), |
| .imageLayout = VK_IMAGE_LAYOUT_GENERAL, |
| }}); |
| } |
| |
| m_vk->updateImageDescriptorSets( |
| inputAttachmentDescriptorSet, |
| { |
| .dstBinding = COVERAGE_PLANE_IDX, |
| .descriptorType = |
| (desc.interlockMode == gpu::InterlockMode::atomics) |
| ? VK_DESCRIPTOR_TYPE_STORAGE_IMAGE |
| : plsDescriptorType, |
| }, |
| {{ |
| .imageView = |
| (desc.interlockMode == gpu::InterlockMode::atomics) |
| ? m_plsAtomicCoverageTexture->vkImageView() |
| : *plsTransientCoverageView(), |
| .imageLayout = VK_IMAGE_LAYOUT_GENERAL, |
| }}); |
| } |
| |
| if (msaaColorSeedImageView != VK_NULL_HANDLE) |
| { |
| assert(desc.interlockMode == gpu::InterlockMode::msaa && |
| desc.colorLoadAction == |
| gpu::LoadAction::preserveRenderTarget); |
| m_vk->updateImageDescriptorSets( |
| inputAttachmentDescriptorSet, |
| { |
| .dstBinding = MSAA_COLOR_SEED_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, |
| }, |
| {{ |
| .imageView = msaaColorSeedImageView, |
| .imageLayout = VK_IMAGE_LAYOUT_GENERAL, |
| }}); |
| } |
| } |
| |
| // Bind the descriptor sets for this draw pass. |
| // (The imageTexture and imageDraw dynamic uniform offsets might have to |
| // update between draws, but this is otherwise all we need to bind!) |
| VkDescriptorSet drawDescriptorSets[] = { |
| perFlushDescriptorSet, |
| m_pipelineManager->nullImageDescriptorSet(), |
| m_pipelineManager->immutableSamplerDescriptorSet(), |
| inputAttachmentDescriptorSet, |
| }; |
| static_assert(PER_FLUSH_BINDINGS_SET == 0); |
| static_assert(PER_DRAW_BINDINGS_SET == 1); |
| static_assert(IMMUTABLE_SAMPLER_BINDINGS_SET == 2); |
| static_assert(PLS_TEXTURE_BINDINGS_SET == 3); |
| static_assert(BINDINGS_SET_COUNT == 4); |
| |
| m_vk->CmdBindDescriptorSets(commandBuffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| *pipelineLayout, |
| PER_FLUSH_BINDINGS_SET, |
| pipelineLayout.plsLayout() != VK_NULL_HANDLE |
| ? BINDINGS_SET_COUNT |
| : BINDINGS_SET_COUNT - 1, |
| drawDescriptorSets, |
| 1, |
| ZERO_OFFSET_32); |
| |
| // Execute the DrawList. |
| uint32_t imageTextureUpdateCount = 0; |
| for (const DrawBatch& batch : *desc.drawList) |
| { |
| assert(batch.elementCount > 0); |
| const DrawType drawType = batch.drawType; |
| |
| if (batch.imageTexture != nullptr) |
| { |
| // Update the imageTexture binding and the dynamic offset into the |
| // imageDraw uniform buffer. |
| auto imageTexture = |
| static_cast<vkutil::Texture2D*>(batch.imageTexture); |
| VkDescriptorSet imageDescriptorSet = |
| imageTexture->getCachedDescriptorSet(m_vk->currentFrameNumber(), |
| batch.imageSampler); |
| if (imageDescriptorSet == VK_NULL_HANDLE) |
| { |
| // Update the image's "texture binding" descriptor set. (These |
| // expire every frame, so we need to make a new one each frame.) |
| if (imageTextureUpdateCount >= |
| descriptor_pool_limits::kMaxImageTextureUpdates) |
| { |
| // We ran out of room for image texture updates. Allocate a |
| // new pool. |
| m_descriptorSetPoolPool->recycle( |
| std::move(descriptorSetPool)); |
| descriptorSetPool = m_descriptorSetPoolPool->acquire(); |
| imageTextureUpdateCount = 0; |
| } |
| |
| imageDescriptorSet = descriptorSetPool->allocateDescriptorSet( |
| m_pipelineManager->perDrawDescriptorSetLayout()); |
| |
| m_vk->updateImageDescriptorSets( |
| imageDescriptorSet, |
| { |
| .dstBinding = IMAGE_TEXTURE_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, |
| }, |
| {{ |
| .imageView = imageTexture->vkImageView(), |
| .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, |
| }}); |
| |
| m_vk->updateImageDescriptorSets( |
| imageDescriptorSet, |
| { |
| .dstBinding = IMAGE_SAMPLER_IDX, |
| .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER, |
| }, |
| {{ |
| .sampler = m_pipelineManager->imageSampler( |
| batch.imageSampler.asKey()), |
| }}); |
| |
| ++imageTextureUpdateCount; |
| imageTexture->updateCachedDescriptorSet( |
| imageDescriptorSet, |
| m_vk->currentFrameNumber(), |
| batch.imageSampler); |
| } |
| |
| VkDescriptorSet imageDescriptorSets[] = { |
| perFlushDescriptorSet, // Dynamic offset to imageDraw uniforms. |
| imageDescriptorSet, // imageTexture. |
| }; |
| static_assert(PER_DRAW_BINDINGS_SET == PER_FLUSH_BINDINGS_SET + 1); |
| |
| m_vk->CmdBindDescriptorSets(commandBuffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| *pipelineLayout, |
| PER_FLUSH_BINDINGS_SET, |
| std::size(imageDescriptorSets), |
| imageDescriptorSets, |
| 1, |
| &batch.imageDrawDataOffset); |
| } |
| |
| // Setup the pipeline for this specific drawType and shaderFeatures. |
| gpu::ShaderFeatures shaderFeatures = |
| desc.interlockMode == gpu::InterlockMode::atomics |
| ? desc.combinedShaderFeatures |
| : batch.shaderFeatures; |
| |
| auto shaderMiscFlags = batch.shaderMiscFlags; |
| if ((renderPassOptions & |
| RenderPassOptionsVulkan::atomicCoalescedResolveAndTransfer) && |
| drawType == gpu::DrawType::renderPassResolve) |
| { |
| assert(desc.interlockMode == gpu::InterlockMode::atomics); |
| shaderMiscFlags |= |
| gpu::ShaderMiscFlags::coalescedResolveAndTransfer; |
| } |
| |
| auto drawPipelineOptions = DrawPipelineVulkan::Options::none; |
| if (desc.wireframe && m_vk->features.fillModeNonSolid) |
| { |
| drawPipelineOptions |= DrawPipelineVulkan::Options::wireframe; |
| } |
| |
| gpu::PipelineState pipelineState; |
| gpu::get_pipeline_state(batch, |
| desc, |
| m_platformFeatures, |
| &pipelineState); |
| |
| if (batch.barriers & (gpu::BarrierFlags::plsAtomicPreResolve | |
| gpu::BarrierFlags::msaaPostInit | |
| gpu::BarrierFlags::preManualResolve)) |
| { |
| // vkCmdNextSubpass() supersedes the pipeline barrier we would |
| // insert for plsAtomic | dstBlend. So if those flags are also in |
| // the barrier, we can just call vkCmdNextSubpass() and skip |
| // vkCmdPipelineBarrier(). |
| assert(!(batch.barriers & |
| ~(gpu::BarrierFlags::plsAtomicPreResolve | |
| gpu::BarrierFlags::msaaPostInit | |
| gpu::BarrierFlags::preManualResolve | |
| BarrierFlags::plsAtomic | BarrierFlags::dstBlend | |
| BarrierFlags::drawBatchBreak))); |
| m_vk->CmdNextSubpass(commandBuffer, VK_SUBPASS_CONTENTS_INLINE); |
| } |
| else if (batch.barriers & |
| (BarrierFlags::plsAtomic | BarrierFlags::dstBlend)) |
| { |
| // Wait for color attachment writes to complete before we read the |
| // input attachments again. |
| assert(desc.interlockMode == gpu::InterlockMode::atomics || |
| desc.interlockMode == gpu::InterlockMode::msaa); |
| assert(drawType != gpu::DrawType::renderPassResolve); |
| assert(!(batch.barriers & |
| ~(BarrierFlags::plsAtomic | BarrierFlags::dstBlend | |
| BarrierFlags::drawBatchBreak))); |
| m_vk->memoryBarrier( |
| commandBuffer, |
| VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, |
| VK_DEPENDENCY_BY_REGION_BIT, |
| { |
| // TODO: We should add SHADER_READ/SHADER_WRITE flags for |
| // the coverage buffer as well, but ironically, adding those |
| // causes artifacts on Qualcomm. Leave them out for now |
| // unless we find a case where we don't work without them. |
| .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, |
| .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT, |
| }); |
| } |
| else if (batch.barriers & BarrierFlags::clockwiseBorrowedCoverage) |
| { |
| // Wait for prior fragment shaders to finish updating the coverage |
| // buffer before we read it again. |
| assert(desc.interlockMode == gpu::InterlockMode::clockwiseAtomic); |
| assert( |
| !(batch.barriers & ~(BarrierFlags::clockwiseBorrowedCoverage | |
| BarrierFlags::drawBatchBreak))); |
| m_vk->memoryBarrier(commandBuffer, |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, |
| VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, |
| VK_DEPENDENCY_BY_REGION_BIT, |
| { |
| .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, |
| .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, |
| }); |
| } |
| |
| const DrawPipelineVulkan* drawPipeline = |
| m_pipelineManager->tryGetPipeline( |
| { |
| .drawType = drawType, |
| .shaderFeatures = shaderFeatures, |
| .interlockMode = desc.interlockMode, |
| .shaderMiscFlags = shaderMiscFlags, |
| .pipelineState = pipelineState, |
| .drawPipelineOptions = drawPipelineOptions, |
| .renderPassOptions = renderPassOptions, |
| .renderTargetFormat = renderTarget->framebufferFormat(), |
| .colorLoadAction = desc.colorLoadAction, |
| #ifdef WITH_RIVE_TOOLS |
| .synthesizedFailureType = desc.synthesizedFailureType, |
| #endif |
| }, |
| m_platformFeatures); |
| |
| if (drawPipeline != nullptr) |
| { |
| m_vk->CmdBindPipeline(commandBuffer, |
| VK_PIPELINE_BIND_POINT_GRAPHICS, |
| *drawPipeline); |
| } |
| |
| switch (drawType) |
| { |
| case DrawType::midpointFanPatches: |
| case DrawType::midpointFanCenterAAPatches: |
| case DrawType::outerCurvePatches: |
| case DrawType::msaaOuterCubics: |
| case DrawType::msaaStrokes: |
| case DrawType::msaaMidpointFanBorrowedCoverage: |
| case DrawType::msaaMidpointFans: |
| case DrawType::msaaMidpointFanStencilReset: |
| case DrawType::msaaMidpointFanPathsStencil: |
| case DrawType::msaaMidpointFanPathsCover: |
| { |
| // Draw patches that connect the tessellation vertices. |
| m_vk->CmdBindVertexBuffers( |
| commandBuffer, |
| 0, |
| 1, |
| m_pathPatchVertexBuffer->vkBufferAddressOf(), |
| ZERO_OFFSET); |
| m_vk->CmdBindIndexBuffer(commandBuffer, |
| *m_pathPatchIndexBuffer, |
| 0, |
| VK_INDEX_TYPE_UINT16); |
| for (auto [chunkPatchCount, chunkFirstPatch] : |
| InstanceChunker(batch.elementCount, |
| batch.baseElement, |
| m_workarounds.maxInstancesPerRenderPass)) |
| { |
| interruptDrawPassIfNeeded(chunkPatchCount); |
| if (drawPipeline != nullptr) |
| { |
| m_vk->CmdDrawIndexed(commandBuffer, |
| gpu::PatchIndexCount(drawType), |
| chunkPatchCount, |
| gpu::PatchBaseIndex(drawType), |
| 0, |
| chunkFirstPatch); |
| } |
| } |
| break; |
| } |
| |
| case DrawType::msaaStencilClipReset: |
| case DrawType::interiorTriangulation: |
| case DrawType::atlasBlit: |
| { |
| VkBuffer buffer = *m_triangleBuffer; |
| m_vk->CmdBindVertexBuffers(commandBuffer, |
| 0, |
| 1, |
| &buffer, |
| ZERO_OFFSET); |
| if (drawPipeline != nullptr) |
| { |
| m_vk->CmdDraw(commandBuffer, |
| batch.elementCount, |
| 1, |
| batch.baseElement, |
| 0); |
| } |
| break; |
| } |
| |
| case DrawType::imageRect: |
| { |
| assert(desc.interlockMode == gpu::InterlockMode::atomics); |
| m_vk->CmdBindVertexBuffers( |
| commandBuffer, |
| 0, |
| 1, |
| m_imageRectVertexBuffer->vkBufferAddressOf(), |
| ZERO_OFFSET); |
| m_vk->CmdBindIndexBuffer(commandBuffer, |
| *m_imageRectIndexBuffer, |
| 0, |
| VK_INDEX_TYPE_UINT16); |
| if (drawPipeline != nullptr) |
| { |
| m_vk->CmdDrawIndexed(commandBuffer, |
| std::size(gpu::kImageRectIndices), |
| 1, |
| batch.baseElement, |
| 0, |
| 0); |
| } |
| break; |
| } |
| |
| case DrawType::imageMesh: |
| { |
| LITE_RTTI_CAST_OR_BREAK(vertexBuffer, |
| RenderBufferVulkanImpl*, |
| batch.vertexBuffer); |
| LITE_RTTI_CAST_OR_BREAK(uvBuffer, |
| RenderBufferVulkanImpl*, |
| batch.uvBuffer); |
| LITE_RTTI_CAST_OR_BREAK(indexBuffer, |
| RenderBufferVulkanImpl*, |
| batch.indexBuffer); |
| m_vk->CmdBindVertexBuffers( |
| commandBuffer, |
| 0, |
| 1, |
| vertexBuffer->currentBuffer()->vkBufferAddressOf(), |
| ZERO_OFFSET); |
| m_vk->CmdBindVertexBuffers( |
| commandBuffer, |
| 1, |
| 1, |
| uvBuffer->currentBuffer()->vkBufferAddressOf(), |
| ZERO_OFFSET); |
| m_vk->CmdBindIndexBuffer(commandBuffer, |
| *indexBuffer->currentBuffer(), |
| 0, |
| VK_INDEX_TYPE_UINT16); |
| if (drawPipeline != nullptr) |
| { |
| m_vk->CmdDrawIndexed(commandBuffer, |
| batch.elementCount, |
| 1, |
| batch.baseElement, |
| 0, |
| 0); |
| } |
| break; |
| } |
| |
| case DrawType::renderPassInitialize: |
| case DrawType::renderPassResolve: |
| { |
| if (drawPipeline != nullptr) |
| { |
| m_vk->CmdDraw(commandBuffer, 4, 1, 0, 0); |
| } |
| break; |
| } |
| } |
| } |
| |
| assert(pendingTessPatchCount == 0); |
| m_vk->CmdEndRenderPass(commandBuffer); |
| |
| // If the color attachment is offscreen and wasn't resolved already, copy |
| // it back to the main renderTarget. |
| if (colorAttachmentIsOffscreen && |
| !(renderPassOptions & |
| (RenderPassOptionsVulkan::manuallyResolved | |
| RenderPassOptionsVulkan::atomicCoalescedResolveAndTransfer))) |
| { |
| #ifndef __APPLE__ |
| // In general, rasterOrdering flushes should not need this copy because |
| // they transfer from offscreen back to the rendreTarget as part of the |
| // draw pass (the one exception being interruptible render passes, which |
| // aren't currently compatible with manual resolves). |
| // NOTE: The manual resolve doesn't seem to work on MoltenVK, so don't |
| // do it on Apple. |
| assert(desc.interlockMode != gpu::InterlockMode::rasterOrdering || |
| m_workarounds.needsInterruptibleRenderPasses()); |
| #endif |
| |
| // Atomic flushes don't need this copy either because we always use |
| // "atomicCoalescedResolveAndTransfer" when the color attachment is |
| // offscreen. |
| assert(desc.interlockMode != gpu::InterlockMode::atomics); |
| |
| // MSAA never needs this copy. It handles resolves differently. |
| assert(desc.interlockMode != gpu::InterlockMode::msaa); |
| |
| constexpr static vkutil::ImageAccess ACCESS_COPY_FROM = { |
| .pipelineStages = VK_PIPELINE_STAGE_TRANSFER_BIT, |
| .accessMask = VK_ACCESS_TRANSFER_READ_BIT, |
| .layout = VK_IMAGE_LAYOUT_GENERAL, |
| }; |
| |
| m_vk->blitSubRect( |
| commandBuffer, |
| ((plsBackingType == PLSBackingType::storageTexture) |
| ? accessPLSOffscreenColorTexture(commandBuffer, |
| ACCESS_COPY_FROM) |
| : renderTarget->accessOffscreenColorTexture(commandBuffer, |
| ACCESS_COPY_FROM)) |
| ->vkImage(), |
| ACCESS_COPY_FROM.layout, |
| renderTarget->accessTargetImage( |
| commandBuffer, |
| { |
| .pipelineStages = VK_PIPELINE_STAGE_TRANSFER_BIT, |
| .accessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| .layout = VK_IMAGE_LAYOUT_GENERAL, |
| }, |
| vkutil::ImageAccessAction::invalidateContents), |
| VK_IMAGE_LAYOUT_GENERAL, |
| drawBounds); |
| } |
| |
| m_descriptorSetPoolPool->recycle(std::move(descriptorSetPool)); |
| } |
| |
| void RenderContextVulkanImpl::postFlush(const RenderContext::FlushResources&) |
| { |
| // Recycle buffers. |
| m_flushUniformBufferPool.recycle(std::move(m_flushUniformBuffer)); |
| m_imageDrawUniformBufferPool.recycle(std::move(m_imageDrawUniformBuffer)); |
| m_pathBufferPool.recycle(std::move(m_pathBuffer)); |
| m_paintBufferPool.recycle(std::move(m_paintBuffer)); |
| m_paintAuxBufferPool.recycle(std::move(m_paintAuxBuffer)); |
| m_contourBufferPool.recycle(std::move(m_contourBuffer)); |
| m_gradSpanBufferPool.recycle(std::move(m_gradSpanBuffer)); |
| m_tessSpanBufferPool.recycle(std::move(m_tessSpanBuffer)); |
| m_triangleBufferPool.recycle(std::move(m_triangleBuffer)); |
| } |
| |
| void RenderContextVulkanImpl::hotloadShaders( |
| rive::Span<const uint32_t> spirvData) |
| { |
| m_pipelineManager->clearCache(); |
| spirv::hotload_shaders(spirvData); |
| |
| // Delete and replace old shaders |
| m_colorRampPipeline = |
| std::make_unique<ColorRampPipeline>(m_pipelineManager.get(), |
| m_workarounds); |
| m_tessellatePipeline = |
| std::make_unique<TessellatePipeline>(m_pipelineManager.get(), |
| m_workarounds); |
| m_atlasPipeline = |
| std::make_unique<AtlasPipeline>(m_pipelineManager.get(), m_workarounds); |
| } |
| |
| std::unique_ptr<RenderContext> RenderContextVulkanImpl::MakeContext( |
| VkInstance instance, |
| VkPhysicalDevice physicalDevice, |
| VkDevice device, |
| const VulkanFeatures& features, |
| PFN_vkGetInstanceProcAddr pfnvkGetInstanceProcAddr, |
| const ContextOptions& contextOptions) |
| { |
| rcp<VulkanContext> vk = make_rcp<VulkanContext>(instance, |
| physicalDevice, |
| device, |
| features, |
| pfnvkGetInstanceProcAddr); |
| |
| if (vk->physicalDeviceProperties().apiVersion < VK_API_VERSION_1_1) |
| { |
| fprintf( |
| stderr, |
| "ERROR: Rive Vulkan renderer requires a driver that supports at least Vulkan 1.1.\n"); |
| return nullptr; |
| } |
| |
| if (vk->physicalDeviceProperties().vendorID == VULKAN_VENDOR_IMG_TEC && |
| vk->physicalDeviceProperties().apiVersion < VK_API_VERSION_1_3) |
| { |
| fprintf( |
| stderr, |
| "ERROR: Rive Vulkan renderer requires a driver that supports at least Vulkan 1.3 on PowerVR chipsets.\n"); |
| return nullptr; |
| } |
| |
| std::unique_ptr<RenderContextVulkanImpl> impl( |
| new RenderContextVulkanImpl(std::move(vk), contextOptions)); |
| |
| if (contextOptions.forceAtomicMode && |
| !impl->platformFeatures().supportsAtomicMode) |
| { |
| fprintf( |
| stderr, |
| "ERROR: Requested \"atomic\" mode but Vulkan does not support fragmentStoresAndAtomics on this platform.\n"); |
| return nullptr; |
| } |
| |
| impl->initGPUObjects(contextOptions.shaderCompilationMode); |
| return std::make_unique<RenderContext>(std::move(impl)); |
| } |
| } // namespace rive::gpu |