fix(vk): Implement manual MSAA resolves (#11120) 756dc2db91 * fix(vk): Implement manual MSAA resolves Some Android devices have issues with MSAA resolves when the MSAA color buffer is also read as an input attachment. In the past we've worked around this by adding an empty subpass at the end of the render pass. This PR implements fully manual resolves instead, which we now use when there are blend modes and partial updates. This is hopefully a more stable workaround than a mystery subpass, and will ideally get better performance as well when we don't need to resolve the entire render target. * Fix synchronization validation (had a write/write hazard between the image state transition and the load op) * fix(vk): Implement manual MSAA resolves Some Android devices have issues with MSAA resolves when the MSAA color buffer is also read as an input attachment. In the past we've worked around this by adding an empty subpass at the end of the render pass. This PR implements fully manual resolves instead, which we now use when there are blend modes and partial updates. This is hopefully a more stable workaround than a mystery subpass, and will ideally get better performance as well when we don't need to resolve the entire render target. * Fix synchronization validation (had a write/write hazard between the image state transition and the load op) Co-authored-by: Chris Dalton <99840794+csmartdalton@users.noreply.github.com> Co-authored-by: JoshJRive <joshua@rive.app>

diff --git a/.rive_head b/.rive_head
index 031a0d9..185999f 100644
--- a/.rive_head
+++ b/.rive_head

@@ -1 +1 @@
-376052977bd85cc87cb9f31885344ae771474be8
+756dc2db913a7211e349859a2e2df4f01100b0e4

diff --git a/renderer/include/rive/renderer/gpu.hpp b/renderer/include/rive/renderer/gpu.hpp
index 407ef23..2e2f634 100644
--- a/renderer/include/rive/renderer/gpu.hpp
+++ b/renderer/include/rive/renderer/gpu.hpp

@@ -199,6 +199,11 @@
     // DrawType::renderPassInitialize when LoadAction::preserveRenderTarget is
     // specified.
     bool msaaColorPreserveNeedsDraw = false;
+    // Define the conditions under which MSAA must be resolved manually in a
+    // shader instead of relying on the graphics API’s automatic MSAA resolve.
+    bool msaaResolveNeedsDraw = false;
+    bool msaaResolveWithPartialBoundsNeedsDraw = false;
+    bool msaaResolveAfterDstReadNeedsDraw = false;
     // Workaround for precision issues. Determines how far apart we space unique
     // path IDs when they will be bit-casted to fp16.
     uint8_t pathIDGranularity = 1;
@@ -908,7 +913,7 @@
         case DrawType::imageRect:
         case DrawType::imageMesh:
         case DrawType::atlasBlit:
-            if (interlockMode != gpu::InterlockMode::atomics)
+            if (interlockMode != InterlockMode::atomics)
             {
                 mask = ShaderFeatures::ENABLE_CLIPPING |
                        ShaderFeatures::ENABLE_CLIP_RECT |
@@ -936,7 +941,7 @@
             mask = ShaderFeatures::NONE;
             break;
         case DrawType::renderPassInitialize:
-            if (interlockMode == gpu::InterlockMode::atomics)
+            if (interlockMode == InterlockMode::atomics)
             {
                 // Atomic mode initializes clipping and color (when advanced
                 // blend is active).
@@ -945,7 +950,7 @@
             }
             else
             {
-                assert(interlockMode == gpu::InterlockMode::msaa);
+                assert(interlockMode == InterlockMode::msaa);
                 // MSAA mode only needs to initialize color, and only when
                 // preserving the render target but using a transient MSAA
                 // attachment.
@@ -953,8 +958,15 @@
             }
             break;
         case DrawType::renderPassResolve:
-            assert(interlockMode == gpu::InterlockMode::atomics);
-            mask = kAllShaderFeatures;
+            if (interlockMode == InterlockMode::atomics)
+            {
+                mask = kAllShaderFeatures;
+            }
+            else
+            {
+                assert(interlockMode == InterlockMode::msaa);
+                mask = ShaderFeatures::NONE;
+            }
             break;
     }
     return mask & ShaderFeaturesMaskFor(interlockMode);
@@ -1044,20 +1056,24 @@
     // loading the render target into the transient MSAA attachment.
     msaaPostInit = 1 << 2,
 
+    // Special barrier (e.g., subpass transition) issued prior to a manual MSAA
+    // resolve. (Only applicable with FlushDescriptor::msaaManualResolve.)
+    msaaPreResolve = 1 << 3,
+
     // Pixel-local dependency in the coverage buffer. (clockwiseAtomic mode
     // only.) All "borrowed coverage" draws have now been issued. Ensure they
     // complete at each pixel before beginning the "forward coverage" draws.
-    clockwiseBorrowedCoverage = 1 << 3,
+    clockwiseBorrowedCoverage = 1 << 4,
 
     // The next DrawBatch needs to perform an advanced blend, but the current
     // hardware requires an implementation-dependent barrier before reading the
     // dstColor (pipeline barrier for input attachments, KHR blend barrier, or
     // even a full MSAA resolve & blit into a separate texture.)
-    dstBlend = 1 << 4,
+    dstBlend = 1 << 5,
 
     // Only prevent future DrawBatches from being combined with the current
     // drawList. (No GPU dependencies.)
-    drawBatchBreak = 1 << 5,
+    drawBatchBreak = 1 << 6,
 };
 RIVE_MAKE_ENUM_BITSET(BarrierFlags);
 
@@ -1162,13 +1178,6 @@
     ShaderFeatures combinedShaderFeatures = ShaderFeatures::NONE;
     InterlockMode interlockMode = InterlockMode::rasterOrdering;
     int msaaSampleCount = 0; // (0 unless interlockMode is msaa.)
-    // True if shaders will never read the color buffer, meaning, the render
-    // pass can use a more efficient setup that renders to a standard color
-    // attachment and handles all blending via built-in blend state.
-    // NOTE: This may be false even if all paints use srcOver because some
-    // rendering modes (e.g., rasterOrdering with evenOdd/nonZero) always read
-    // the color buffer, regardless of blend mode.
-    bool fixedFunctionColorOutput = false;
 
     LoadAction colorLoadAction = LoadAction::clear;
     ColorInt colorClearValue = 0; // When loadAction == LoadAction::clear.
@@ -1179,6 +1188,18 @@
     IAABB renderTargetUpdateBounds; // drawBounds, or renderTargetBounds if
                                     // loadAction == LoadAction::clear.
 
+    // True if we are MSAA and the drawList ends with a "renderPassResolve" draw
+    // to resolve MSAA manually in a shader.
+    bool msaaManualResolve = false;
+
+    // True if shaders will never read the color buffer, meaning, the render
+    // pass can use a more efficient setup that renders to a standard color
+    // attachment and handles all blending via built-in blend state.
+    // NOTE: This may be false even if all paints use srcOver because some
+    // rendering modes (e.g., rasterOrdering with evenOdd/nonZero) always read
+    // the color buffer, regardless of blend mode.
+    bool fixedFunctionColorOutput = false;
+
     // Physical size of the atlas texture.
     uint16_t atlasTextureWidth;
     uint16_t atlasTextureHeight;

diff --git a/renderer/include/rive/renderer/vulkan/vulkan_context.hpp b/renderer/include/rive/renderer/vulkan/vulkan_context.hpp
index bd2b2a0..d70f6fe 100644
--- a/renderer/include/rive/renderer/vulkan/vulkan_context.hpp
+++ b/renderer/include/rive/renderer/vulkan/vulkan_context.hpp

@@ -110,6 +110,11 @@
 
     VmaAllocator allocator() const { return m_vmaAllocator; }
 
+    const VkPhysicalDeviceProperties& physicalDeviceProperties() const
+    {
+        return m_physicalDeviceProperties;
+    }
+
     bool isFormatSupportedWithFeatureFlags(VkFormat, VkFormatFeatureFlagBits);
     bool supportsD24S8() const { return m_supportsD24S8; }
 
@@ -194,6 +199,8 @@
 private:
     const VmaAllocator m_vmaAllocator;
 
+    VkPhysicalDeviceProperties m_physicalDeviceProperties;
+
     // Vulkan spec: must support one of D24S8 and D32S8.
     bool m_supportsD24S8 = false;
 };

diff --git a/renderer/shader_hotload/shader_hotload.cpp b/renderer/shader_hotload/shader_hotload.cpp
index b39838a..1b87cf8 100644
--- a/renderer/shader_hotload/shader_hotload.cpp
+++ b/renderer/shader_hotload/shader_hotload.cpp

@@ -106,13 +106,13 @@
         riveSpirvPath / "draw_clockwise_path.fixedcolor_frag.spirv",
         riveSpirvPath / "draw_clockwise_clip.frag.spirv",
         riveSpirvPath / "draw_clockwise_clip.fixedcolor_frag.spirv",
-        riveSpirvPath / "draw_clockwise_interior.triangles_vert.spirv",
-        riveSpirvPath / "draw_clockwise_interior.triangles_frag.spirv",
+        riveSpirvPath / "draw_clockwise_interior_triangles.vert.spirv",
+        riveSpirvPath / "draw_clockwise_interior_triangles.frag.spirv",
         riveSpirvPath /
-            "draw_clockwise_interior.triangles_fixedcolor_frag.spirv",
-        riveSpirvPath / "draw_clockwise_interior.triangles_clip_frag.spirv",
+            "draw_clockwise_interior_triangles.fixedcolor_frag.spirv",
+        riveSpirvPath / "draw_clockwise_interior_triangles_clip.frag.spirv",
         riveSpirvPath /
-            "draw_clockwise_interior.triangles_clip_fixedcolor_frag.spirv",
+            "draw_clockwise_interior_triangles_clip.fixedcolor_frag.spirv",
         riveSpirvPath / "draw_clockwise_atlas_blit.vert.spirv",
         riveSpirvPath / "draw_clockwise_atlas_blit.frag.spirv",
         riveSpirvPath / "draw_clockwise_atlas_blit.fixedcolor_frag.spirv",
@@ -145,8 +145,9 @@
         riveSpirvPath / "draw_msaa_image_mesh.noclipdistance_vert.spirv",
         riveSpirvPath / "draw_msaa_image_mesh.frag.spirv",
         riveSpirvPath / "draw_msaa_image_mesh.fixedcolor_frag.spirv",
-        riveSpirvPath / "copy_attachment_to_attachment.vert.spirv",
+        riveSpirvPath / "draw_fullscreen_quad.vert.spirv",
         riveSpirvPath / "copy_attachment_to_attachment.frag.spirv",
+        riveSpirvPath / "draw_msaa_resolve.frag.spirv",
     };
     constexpr size_t numFiles = std::size(spirvFileNames);
 

diff --git a/renderer/src/gpu.cpp b/renderer/src/gpu.cpp
index faa93e7..f2cecd6 100644
--- a/renderer/src/gpu.cpp
+++ b/renderer/src/gpu.cpp

@@ -72,7 +72,8 @@
             drawTypeKey = 5;
             break;
         case DrawType::renderPassResolve:
-            assert(interlockMode == InterlockMode::atomics);
+            assert(interlockMode == InterlockMode::atomics ||
+                   interlockMode == InterlockMode::msaa);
             drawTypeKey = 6;
             break;
     }
@@ -819,6 +820,7 @@
             break;
 
         case DrawType::renderPassInitialize:
+        case DrawType::renderPassResolve:
             pipelineState->depthTestEnabled = false;
             pipelineState->depthWriteEnabled = false;
             break;
@@ -826,7 +828,6 @@
         case DrawType::interiorTriangulation:
         case DrawType::midpointFanPatches:
         case DrawType::midpointFanCenterAAPatches:
-        case DrawType::renderPassResolve:
             RIVE_UNREACHABLE();
     }
 }
@@ -1093,6 +1094,7 @@
         }
 
         case DrawType::renderPassInitialize:
+        case DrawType::renderPassResolve:
         {
             pipelineState->stencilTestEnabled = false;
             pipelineState->stencilWriteMask = 0;
@@ -1103,11 +1105,11 @@
         case DrawType::midpointFanPatches:
         case DrawType::midpointFanCenterAAPatches:
         case DrawType::outerCurvePatches:
-        case DrawType::renderPassResolve:
             RIVE_UNREACHABLE();
     }
 
-    assert(stencilKey != 0 || drawType == DrawType::renderPassInitialize);
+    assert(stencilKey != 0 || drawType == DrawType::renderPassInitialize ||
+           drawType == DrawType::renderPassResolve);
     assert(stencilKey < 1 << 4);
     if (effectiveDrawContents.hasActiveClip)
         stencilKey |= (1 << 4);
@@ -1187,7 +1189,8 @@
                 // When drawing an advanced blend mode, the shader only does the
                 // "color" portion of the blend equation, and relies on the
                 // hardware blend unit to finish the "alpha" portion.
-                assert(batch.drawType != DrawType::renderPassInitialize);
+                assert(batch.drawType != DrawType::renderPassInitialize &&
+                       batch.drawType != DrawType::renderPassResolve);
                 return BlendEquation::srcOver;
             }
             else
@@ -1195,7 +1198,8 @@
                 // When m_platformFeatures.supportsBlendAdvancedKHR is true in
                 // MSAA mode, the renderContext does not combine draws that have
                 // different blend modes.
-                assert(batch.drawType != DrawType::renderPassInitialize);
+                assert(batch.drawType != DrawType::renderPassInitialize &&
+                       batch.drawType != DrawType::renderPassResolve);
                 return static_cast<BlendEquation>(batch.firstBlendMode);
             }
     }
@@ -1269,7 +1273,8 @@
 
         case DrawType::imageRect:
         case DrawType::renderPassResolve:
-            assert(flushDesc.interlockMode == InterlockMode::atomics);
+            assert(flushDesc.interlockMode == InterlockMode::atomics ||
+                   flushDesc.interlockMode == InterlockMode::msaa);
             break;
 
         case DrawType::renderPassInitialize:

diff --git a/renderer/src/render_context.cpp b/renderer/src/render_context.cpp
index 2d86163..0838ced 100644
--- a/renderer/src/render_context.cpp
+++ b/renderer/src/render_context.cpp

@@ -958,11 +958,35 @@
     RIVE_UNREACHABLE();
 }
 
-static bool wants_fixed_function_color_output(
-    gpu::PlatformFeatures platformFeatures,
-    gpu::InterlockMode interlockMode,
+static bool wants_msaa_manual_resolve(
+    const gpu::PlatformFeatures& platformFeatures,
+    const RenderTarget* renderTarget,
+    const IAABB& renderTargetUpdateBounds,
     gpu::DrawContents combinedDrawContents)
 {
+    if (platformFeatures.msaaResolveNeedsDraw)
+    {
+        return true;
+    }
+    if (platformFeatures.msaaResolveWithPartialBoundsNeedsDraw &&
+        !renderTargetUpdateBounds.contains(renderTarget->bounds()))
+    {
+        return true;
+    }
+    if (platformFeatures.msaaResolveAfterDstReadNeedsDraw &&
+        (combinedDrawContents & gpu::DrawContents::advancedBlend))
+    {
+        return true;
+    }
+    return false;
+}
+
+static bool wants_fixed_function_color_output(
+    const gpu::PlatformFeatures& platformFeatures,
+    gpu::InterlockMode interlockMode,
+    gpu::DrawContents combinedDrawContents,
+    bool msaaManualResolve)
+{
     switch (interlockMode)
     {
         case gpu::InterlockMode::rasterOrdering:
@@ -971,7 +995,6 @@
             return false;
 
         case gpu::InterlockMode::atomics:
-        case gpu::InterlockMode::msaa:
             return !(combinedDrawContents & gpu::DrawContents::advancedBlend);
 
         case gpu::InterlockMode::clockwise:
@@ -983,6 +1006,12 @@
         case gpu::InterlockMode::clockwiseAtomic:
             // clockwiseAtomic currently always sets fixedFunctionColorOutput.
             return true;
+
+        case gpu::InterlockMode::msaa:
+            // Manual MSAA resolves read the framebuffer, so they can't use
+            // fixedFunctionColorOutput.
+            return !msaaManualResolve &&
+                   !(combinedDrawContents & gpu::DrawContents::advancedBlend);
     }
 
     RIVE_UNREACHABLE();
@@ -1085,15 +1114,6 @@
     m_flushDesc.renderTarget = flushResources.renderTarget;
     m_flushDesc.interlockMode = m_ctx->frameInterlockMode();
     m_flushDesc.msaaSampleCount = frameDescriptor.msaaSampleCount;
-    m_flushDesc.fixedFunctionColorOutput =
-        wants_fixed_function_color_output(m_ctx->platformFeatures(),
-                                          m_ctx->frameInterlockMode(),
-                                          m_combinedDrawContents);
-    if (m_flushDesc.fixedFunctionColorOutput)
-    {
-        m_baselineShaderMiscFlags |=
-            gpu::ShaderMiscFlags::fixedFunctionColorOutput;
-    }
 
     // In atomic mode, we may be able to skip the explicit clear of the color
     // buffer and fold it into the atomic "resolve" operation instead.
@@ -1170,6 +1190,23 @@
         m_flushDesc.renderTargetUpdateBounds = {0, 0, 0, 0};
     }
 
+    m_flushDesc.msaaManualResolve =
+        m_flushDesc.interlockMode == gpu::InterlockMode::msaa &&
+        wants_msaa_manual_resolve(m_ctx->platformFeatures(),
+                                  m_flushDesc.renderTarget,
+                                  m_flushDesc.renderTargetUpdateBounds,
+                                  m_combinedDrawContents);
+
+    m_flushDesc.fixedFunctionColorOutput =
+        wants_fixed_function_color_output(m_ctx->platformFeatures(),
+                                          m_ctx->frameInterlockMode(),
+                                          m_combinedDrawContents,
+                                          m_flushDesc.msaaManualResolve);
+    if (m_flushDesc.fixedFunctionColorOutput)
+    {
+        m_baselineShaderMiscFlags |=
+            gpu::ShaderMiscFlags::fixedFunctionColorOutput;
+    }
     m_flushDesc.atlasContentWidth = m_atlasMaxX;
     m_flushDesc.atlasContentHeight = m_atlasMaxY;
 
@@ -1685,22 +1722,27 @@
             m_draws[drawIndex]->pushToRenderContext(this, subpassIndex);
             priorSignedKey = signedKey;
         }
+    }
 
-        // Atomic mode needs one more draw to resolve all the pixels.
-        if (m_ctx->frameInterlockMode() == gpu::InterlockMode::atomics)
-        {
-            m_drawList
-                .emplace_back(m_ctx->perFrameAllocator(),
-                              gpu::DrawType::renderPassResolve,
-                              m_baselineShaderMiscFlags,
-                              gpu::DrawContents::none,
-                              1,
-                              0,
-                              BlendMode::srcOver,
-                              ImageSampler::LinearClamp(),
-                              BarrierFlags::plsAtomicPreResolve)
-                ->shaderFeatures = m_combinedShaderFeatures;
-        }
+    // Some modes need one more draw to resolve all the pixels.
+    if (m_ctx->frameInterlockMode() == gpu::InterlockMode::atomics ||
+        m_flushDesc.msaaManualResolve)
+    {
+        m_drawList.emplace_back(
+            m_ctx->perFrameAllocator(),
+            gpu::DrawType::renderPassResolve,
+            m_baselineShaderMiscFlags,
+            (m_ctx->frameInterlockMode() == gpu::InterlockMode::atomics)
+                ? gpu::DrawContents::none
+                : gpu::DrawContents::opaquePaint,
+            1,
+            0,
+            BlendMode::srcOver,
+            ImageSampler::LinearClamp(),
+            (m_ctx->frameInterlockMode() == gpu::InterlockMode::atomics)
+                ? BarrierFlags::plsAtomicPreResolve
+                : BarrierFlags::msaaPreResolve);
+        m_combinedDrawContents |= m_drawList.tail()->drawContents;
     }
 
     // Write out the draws to the feather atlas. Do this after the main draws

diff --git a/renderer/src/shaders/constants.glsl b/renderer/src/shaders/constants.glsl
index 845ac1f..b13f19e 100644
--- a/renderer/src/shaders/constants.glsl
+++ b/renderer/src/shaders/constants.glsl

@@ -251,6 +251,7 @@
 #define VULKAN_VENDOR_ARM 0x13B5u
 #define VULKAN_VENDOR_QUALCOMM 0x5143u
 #define VULKAN_VENDOR_INTEL 0x8086u
+#define VULKAN_VENDOR_SAMSUNG 0x144d
 
 // Indices for SPIRV specialization constants (used in lieu of #defines in
 // Vulkan.)

diff --git a/renderer/src/shaders/copy_attachment_to_attachment.glsl b/renderer/src/shaders/copy_attachment_to_attachment.frag
similarity index 64%
rename from renderer/src/shaders/copy_attachment_to_attachment.glsl
rename to renderer/src/shaders/copy_attachment_to_attachment.frag
index 2a4cb0c..62dd5d8 100644
--- a/renderer/src/shaders/copy_attachment_to_attachment.glsl
+++ b/renderer/src/shaders/copy_attachment_to_attachment.frag

@@ -2,18 +2,6 @@
  * Copyright 2025 Rive
  */
 
-#ifdef @VERTEX
-void main()
-{
-    // Fill the entire screen. The caller will use a scissor test to control the
-    // bounds being drawn.
-    gl_Position.x = (gl_VertexID & 1) == 0 ? -1. : 1.;
-    gl_Position.y = (gl_VertexID & 2) == 0 ? -1. : 1.;
-    gl_Position.z = 0.;
-    gl_Position.w = 1.;
-}
-#endif
-
 #ifdef @FRAGMENT
 layout(input_attachment_index = 0,
        // TODO: This shader is currently only used by MSAA to seed the color

diff --git a/renderer/src/shaders/draw_fullscreen_quad.vert b/renderer/src/shaders/draw_fullscreen_quad.vert
new file mode 100644
index 0000000..56d7782
--- /dev/null
+++ b/renderer/src/shaders/draw_fullscreen_quad.vert

@@ -0,0 +1,15 @@
+/*
+ * Copyright 2025 Rive
+ */
+
+#ifdef @VERTEX
+void main()
+{
+    // Fill the entire screen. The caller will use a scissor test to control the
+    // bounds being drawn.
+    gl_Position.x = (gl_VertexID & 1) == 0 ? -1. : 1.;
+    gl_Position.y = (gl_VertexID & 2) == 0 ? -1. : 1.;
+    gl_Position.z = 0.;
+    gl_Position.w = 1.;
+}
+#endif

diff --git a/renderer/src/shaders/draw_msaa_resolve.frag b/renderer/src/shaders/draw_msaa_resolve.frag
new file mode 100644
index 0000000..c901d4e
--- /dev/null
+++ b/renderer/src/shaders/draw_msaa_resolve.frag

@@ -0,0 +1,18 @@
+/*
+ * Copyright 2025 Rive
+ */
+
+#ifdef @FRAGMENT
+layout(input_attachment_index = 0,
+       binding = COLOR_PLANE_IDX,
+       set = PLS_TEXTURE_BINDINGS_SET) uniform lowp subpassInputMS msaaColor;
+
+layout(location = 0) out half4 outputColor;
+
+void main()
+{
+    outputColor = (subpassLoad(msaaColor, 0) + subpassLoad(msaaColor, 1) +
+                   subpassLoad(msaaColor, 2) + subpassLoad(msaaColor, 3)) *
+                  .25;
+}
+#endif

diff --git a/renderer/src/shaders/spirv/copy_attachment_to_attachment.main b/renderer/src/shaders/spirv/copy_attachment_to_attachment.frag
similarity index 74%
copy from renderer/src/shaders/spirv/copy_attachment_to_attachment.main
copy to renderer/src/shaders/spirv/copy_attachment_to_attachment.frag
index 545390a..9d88845 100644
--- a/renderer/src/shaders/spirv/copy_attachment_to_attachment.main
+++ b/renderer/src/shaders/spirv/copy_attachment_to_attachment.frag

@@ -3,4 +3,4 @@
 #include "glsl.minified.glsl"
 #include "constants.minified.glsl"
 #include "common.minified.glsl"
-#include "copy_attachment_to_attachment.minified.glsl"
+#include "copy_attachment_to_attachment.minified.frag"

diff --git a/renderer/src/shaders/spirv/copy_attachment_to_attachment.main b/renderer/src/shaders/spirv/draw_fullscreen_quad.vert
similarity index 74%
rename from renderer/src/shaders/spirv/copy_attachment_to_attachment.main
rename to renderer/src/shaders/spirv/draw_fullscreen_quad.vert
index 545390a..641a126 100644
--- a/renderer/src/shaders/spirv/copy_attachment_to_attachment.main
+++ b/renderer/src/shaders/spirv/draw_fullscreen_quad.vert

@@ -3,4 +3,4 @@
 #include "glsl.minified.glsl"
 #include "constants.minified.glsl"
 #include "common.minified.glsl"
-#include "copy_attachment_to_attachment.minified.glsl"
+#include "draw_fullscreen_quad.minified.vert"

diff --git a/renderer/src/shaders/spirv/copy_attachment_to_attachment.main b/renderer/src/shaders/spirv/draw_msaa_resolve.frag
similarity index 74%
copy from renderer/src/shaders/spirv/copy_attachment_to_attachment.main
copy to renderer/src/shaders/spirv/draw_msaa_resolve.frag
index 545390a..b1c841f 100644
--- a/renderer/src/shaders/spirv/copy_attachment_to_attachment.main
+++ b/renderer/src/shaders/spirv/draw_msaa_resolve.frag

@@ -3,4 +3,4 @@
 #include "glsl.minified.glsl"
 #include "constants.minified.glsl"
 #include "common.minified.glsl"
-#include "copy_attachment_to_attachment.minified.glsl"
+#include "draw_msaa_resolve.minified.frag"

diff --git a/renderer/src/vulkan/draw_pipeline_layout_vulkan.cpp b/renderer/src/vulkan/draw_pipeline_layout_vulkan.cpp
index 0ee6f62..5cc29c8 100644
--- a/renderer/src/vulkan/draw_pipeline_layout_vulkan.cpp
+++ b/renderer/src/vulkan/draw_pipeline_layout_vulkan.cpp

@@ -143,19 +143,19 @@
     {
         case gpu::InterlockMode::rasterOrdering:
             assert(subpassIndex == 0);
-            return 4;
+            return 4u;
         case gpu::InterlockMode::atomics:
             assert(subpassIndex <= 1);
             return 2u - subpassIndex; // Subpass 0 -> 2, subpass 1 -> 1.
         case gpu::InterlockMode::clockwise:
             assert(subpassIndex == 0);
-            return (options & Options::fixedFunctionColorOutput) ? 1 : 0;
+            return (options & Options::fixedFunctionColorOutput) ? 1u : 0u;
         case gpu::InterlockMode::clockwiseAtomic:
             assert(subpassIndex == 0);
-            return 1;
+            return 1u;
         case gpu::InterlockMode::msaa:
-            assert(subpassIndex == 0 || subpassIndex == 1);
-            return 1;
+            assert(0 <= subpassIndex && subpassIndex <= 2);
+            return 1u;
     }
     RIVE_UNREACHABLE();
 }

diff --git a/renderer/src/vulkan/draw_pipeline_layout_vulkan.hpp b/renderer/src/vulkan/draw_pipeline_layout_vulkan.hpp
index 8438073..397b30d 100644
--- a/renderer/src/vulkan/draw_pipeline_layout_vulkan.hpp
+++ b/renderer/src/vulkan/draw_pipeline_layout_vulkan.hpp

@@ -37,9 +37,13 @@
         // texture bound as an input attachment, because the final render target
         // itself can't be bound as an input attachment.
         msaaSeedFromOffscreenTexture = 1 << 2,
+
+        // MSAA will be resolved manually in a shader instead of setting up the
+        // render pass with a resolve attachment.
+        msaaManualResolve = 1 << 3,
     };
 
-    constexpr static int OPTION_COUNT = 3;
+    constexpr static int OPTION_COUNT = 4;
 
     DrawPipelineLayoutVulkan(PipelineManagerVulkan*,
                              gpu::InterlockMode,

diff --git a/renderer/src/vulkan/draw_pipeline_vulkan.cpp b/renderer/src/vulkan/draw_pipeline_vulkan.cpp
index 1dc8cd1..8125f13 100644
--- a/renderer/src/vulkan/draw_pipeline_vulkan.cpp
+++ b/renderer/src/vulkan/draw_pipeline_vulkan.cpp

@@ -434,9 +434,11 @@
 
     VkPipelineMultisampleStateCreateInfo msaaState = {
         .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-        .rasterizationSamples = interlockMode == gpu::InterlockMode::msaa
-                                    ? VK_SAMPLE_COUNT_4_BIT
-                                    : VK_SAMPLE_COUNT_1_BIT,
+        .rasterizationSamples =
+            (interlockMode == gpu::InterlockMode::msaa &&
+             props.drawType != gpu::DrawType::renderPassResolve)
+                ? VK_SAMPLE_COUNT_4_BIT
+                : VK_SAMPLE_COUNT_1_BIT,
     };
 
     VkGraphicsPipelineCreateInfo pipelineCreateInfo = {

diff --git a/renderer/src/vulkan/draw_shader_vulkan.cpp b/renderer/src/vulkan/draw_shader_vulkan.cpp
index 995f8ba..8be34a2 100644
--- a/renderer/src/vulkan/draw_shader_vulkan.cpp
+++ b/renderer/src/vulkan/draw_shader_vulkan.cpp

@@ -318,12 +318,16 @@
                     // MSAA render passes get initialized by drawing the
                     // previous contents into the framebuffer.
                     // (LoadAction::preserveRenderTarget only.)
-                    vertCode = spirv::copy_attachment_to_attachment_vert;
+                    vertCode = spirv::draw_fullscreen_quad_vert;
                     fragCode = spirv::copy_attachment_to_attachment_frag;
                     break;
 
-                case DrawType::imageRect:
                 case DrawType::renderPassResolve:
+                    vertCode = spirv::draw_fullscreen_quad_vert;
+                    fragCode = spirv::draw_msaa_resolve_frag;
+                    break;
+
+                case DrawType::imageRect:
                     RIVE_UNREACHABLE();
             }
             break;

diff --git a/renderer/src/vulkan/render_context_vulkan_impl.cpp b/renderer/src/vulkan/render_context_vulkan_impl.cpp
index fe5bf36..57eca1f 100644
--- a/renderer/src/vulkan/render_context_vulkan_impl.cpp
+++ b/renderer/src/vulkan/render_context_vulkan_impl.cpp

@@ -727,6 +727,24 @@
     // texture. We need to draw the previous renderTarget contents into it
     // manually when LoadAction::preserveRenderTarget is specified.
     m_platformFeatures.msaaColorPreserveNeedsDraw = true;
+    if (physicalDeviceProps.vendorID != VULKAN_VENDOR_SAMSUNG)
+    {
+        // Vulkan builtin MSAA resolves only support resolving the entire render
+        // target. Opting for manual resolves when there are only partial
+        // updates will all us to implement our own partial resolve, and
+        // hopefully get better performance.
+        // NOTE: Early Xclipse drivers struggle with the manual resolve, so we
+        // always do automatic fullscreen resolves on that GPU family.
+        m_platformFeatures.msaaResolveWithPartialBoundsNeedsDraw = true;
+    }
+    if (physicalDeviceProps.vendorID == VULKAN_VENDOR_QUALCOMM)
+    {
+        // Some Android drivers (some Android 12 and earlier Adreno drivers)
+        // have issues with having both a self-dependency for dst reads and
+        // resolve attachments. For now we just always manually resolve these
+        // render passes that use advanced blend on Qualcomm.
+        m_platformFeatures.msaaResolveAfterDstReadNeedsDraw = true;
+    }
     m_platformFeatures.maxCoverageBufferLength =
         std::min(physicalDeviceProps.limits.maxStorageBufferRange, 1u << 28) /
         sizeof(uint32_t);
@@ -1373,13 +1391,33 @@
         return;
     }
 
+    auto pipelineLayoutOptions = DrawPipelineLayoutVulkan::Options::none;
+    if (desc.fixedFunctionColorOutput)
+    {
+        // In the case of Vulkan, fixedFunctionColorOutput means the color
+        // buffer will never be bound as an input attachment.
+        pipelineLayoutOptions |=
+            DrawPipelineLayoutVulkan::Options::fixedFunctionColorOutput;
+    }
     if (desc.interlockMode == gpu::InterlockMode::msaa)
     {
-        // Vulkan does not support partial MSAA resolves.
-        // TODO: We should consider adding a new subpass that reads the MSAA
-        // buffer and resolves it manually for partial updates.
-        drawBounds = renderTarget->bounds();
+        if (desc.msaaManualResolve)
+        {
+            // We're going to resolve MSAA manually in a shader instead of using
+            // a resolve attachment.
+            pipelineLayoutOptions |=
+                DrawPipelineLayoutVulkan::Options::msaaManualResolve;
+        }
+        else
+        {
+            // Vulkan does not support partial MSAA resolves when using resolve
+            // attachments.
+            drawBounds = renderTarget->bounds();
+        }
     }
+    // Vulkan builtin MSAA resolves don't support partial drawBounds.
+    assert(desc.interlockMode != gpu::InterlockMode::msaa ||
+           desc.msaaManualResolve || drawBounds == renderTarget->bounds());
 
     auto commandBuffer =
         reinterpret_cast<VkCommandBuffer>(desc.externalCommandBuffer);
@@ -1829,10 +1867,6 @@
             VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
     }
 
-    // In the case of Vulkan, fixedFunctionColorOutput means the color buffer
-    // will never be bound as an input attachment.
-    const bool fixedFunctionColorOutput = desc.fixedFunctionColorOutput;
-
     // Ensures any previous accesses to a color attachment complete before we
     // begin rendering.
     const vkutil::ImageAccess colorLoadAccess = {
@@ -1840,7 +1874,8 @@
         // VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT.
         .pipelineStages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
         .accessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
-        .layout = fixedFunctionColorOutput
+        .layout = (pipelineLayoutOptions &
+                   DrawPipelineLayoutVulkan::Options::fixedFunctionColorOutput)
                       ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
                       : VK_IMAGE_LAYOUT_GENERAL,
     };
@@ -1867,13 +1902,6 @@
     VkImageView msaaResolveImageView = VK_NULL_HANDLE;
     VkImageView msaaColorSeedImageView = VK_NULL_HANDLE;
 
-    auto pipelineLayoutOptions = DrawPipelineLayoutVulkan::Options::none;
-    if (fixedFunctionColorOutput)
-    {
-        pipelineLayoutOptions |=
-            DrawPipelineLayoutVulkan::Options::fixedFunctionColorOutput;
-    }
-
     if (desc.interlockMode == gpu::InterlockMode::msaa)
     {
         colorImageView = renderTarget->msaaColorTexture()->vkImageView();
@@ -1939,7 +1967,8 @@
                     : vkutil::ImageAccessAction::preserveContents);
         }
     }
-    else if (fixedFunctionColorOutput ||
+    else if ((pipelineLayoutOptions &
+              DrawPipelineLayoutVulkan::Options::fixedFunctionColorOutput) ||
              ((desc.interlockMode == gpu::InterlockMode::rasterOrdering ||
                desc.interlockMode == gpu::InterlockMode::atomics) &&
               (renderTarget->targetUsageFlags() &
@@ -2059,7 +2088,8 @@
     StackVector<VkImageView, PLS_PLANE_COUNT> framebufferViews;
     StackVector<VkClearValue, PLS_PLANE_COUNT> clearValues;
     if (plsBackingType == PLSBackingType::inputAttachment ||
-        fixedFunctionColorOutput)
+        (pipelineLayoutOptions &
+         DrawPipelineLayoutVulkan::Options::fixedFunctionColorOutput))
     {
         assert(framebufferViews.size() == COLOR_PLANE_IDX);
         framebufferViews.push_back(colorImageView);
@@ -2185,7 +2215,8 @@
         // of its RGBA format.
         const bool usesScratchColorTexture =
             desc.interlockMode == gpu::InterlockMode::atomics ||
-            !fixedFunctionColorOutput;
+            !(pipelineLayoutOptions &
+              DrawPipelineLayoutVulkan::Options::fixedFunctionColorOutput);
         if (usesScratchColorTexture)
         {
             // Don't use the scratch color texture until shaders in previous
@@ -2375,7 +2406,8 @@
         inputAttachmentDescriptorSet = descriptorSetPool->allocateDescriptorSet(
             pipelineLayout.plsLayout());
 
-        if (!fixedFunctionColorOutput)
+        if (!(pipelineLayoutOptions &
+              DrawPipelineLayoutVulkan::Options::fixedFunctionColorOutput))
         {
             m_vk->updateImageDescriptorSets(
                 inputAttachmentDescriptorSet,
@@ -2409,7 +2441,8 @@
 
             if (desc.interlockMode == gpu::InterlockMode::rasterOrdering ||
                 (desc.interlockMode == gpu::InterlockMode::clockwise &&
-                 !fixedFunctionColorOutput))
+                 !(pipelineLayoutOptions & DrawPipelineLayoutVulkan::Options::
+                                               fixedFunctionColorOutput)))
             {
                 m_vk->updateImageDescriptorSets(
                     inputAttachmentDescriptorSet,
@@ -2613,17 +2646,19 @@
                 m_platformFeatures);
 
         if (batch.barriers & (gpu::BarrierFlags::plsAtomicPreResolve |
-                              gpu::BarrierFlags::msaaPostInit))
+                              gpu::BarrierFlags::msaaPostInit |
+                              gpu::BarrierFlags::msaaPreResolve))
         {
             // vkCmdNextSubpass() supersedes the pipeline barrier we would
             // insert for plsAtomic | dstBlend. So if those flags are also in
             // the barrier, we can just call vkCmdNextSubpass() and skip
             // vkCmdPipelineBarrier().
-            assert(
-                !(batch.barriers &
-                  ~(gpu::BarrierFlags::plsAtomicPreResolve |
-                    gpu::BarrierFlags::msaaPostInit | BarrierFlags::plsAtomic |
-                    BarrierFlags::dstBlend | BarrierFlags::drawBatchBreak)));
+            assert(!(batch.barriers &
+                     ~(gpu::BarrierFlags::plsAtomicPreResolve |
+                       gpu::BarrierFlags::msaaPostInit |
+                       gpu::BarrierFlags::msaaPreResolve |
+                       BarrierFlags::plsAtomic | BarrierFlags::dstBlend |
+                       BarrierFlags::drawBatchBreak)));
             m_vk->CmdNextSubpass(commandBuffer, VK_SUBPASS_CONTENTS_INLINE);
         }
         else if (batch.barriers &
@@ -2796,16 +2831,6 @@
         }
     }
 
-    if (desc.interlockMode == InterlockMode::msaa)
-    {
-        // MSAA needs a follow-up subpass to resolve the MSAA buffer to the
-        // single-sampled render target. No actually rendering is necessary, it
-        // is taken care of entirely via the render pass mechanisms. This is
-        // a workaround for what appears to be a driver bug in some early Adreno
-        // drivers.
-        m_vk->CmdNextSubpass(commandBuffer, VK_SUBPASS_CONTENTS_INLINE);
-    }
-
     m_vk->CmdEndRenderPass(commandBuffer);
 
     if (colorAttachmentIsOffscreen &&

diff --git a/renderer/src/vulkan/render_pass_vulkan.cpp b/renderer/src/vulkan/render_pass_vulkan.cpp
index fb76624..09443db 100644
--- a/renderer/src/vulkan/render_pass_vulkan.cpp
+++ b/renderer/src/vulkan/render_pass_vulkan.cpp

@@ -304,14 +304,14 @@
                           : VK_ATTACHMENT_LOAD_OP_DONT_CARE,
             .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
             .initialLayout =
-                readsMSAAResolveAttachment
+                (readsMSAAResolveAttachment ||
+                 (layoutOptions &
+                  DrawPipelineLayoutVulkan::Options::msaaManualResolve))
                     ? msaaResolveLayout
                     // NOTE: This can only be VK_IMAGE_LAYOUT_UNDEFINED because
-                    // Vulkan does not support partial MSAA resolves, so every
-                    // MSAA render pass covers the entire render area.
-                    // TODO: If we add a new subpass that reads the MSAA buffer
-                    // and resolves it manually for partial updates, this will
-                    // need to change to "msaaResolveLayout".
+                    // Vulkan does not support partial resolves to MSAA resolve
+                    // attachments. So every MSAA render pass without
+                    // "msaaManualResolve" covers the entire render area.
                     : VK_IMAGE_LAYOUT_UNDEFINED,
             .finalLayout = msaaResolveLayout,
         });
@@ -319,6 +319,7 @@
             .attachment = MSAA_RESOLVE_IDX,
             .layout = msaaResolveLayout,
         };
+        assert(colorAttachmentRefs.size() == 1);
 
         if (layoutOptions &
             DrawPipelineLayoutVulkan::Options::msaaSeedFromOffscreenTexture)
@@ -472,16 +473,36 @@
 
         // The next subpass (the main subpass) needs an external dependency on
         //  the depth buffer (which is not used in this subpass but is used in
-        //  that one)
-        subpassDeps.push_back({
+        //  that one).
+        VkSubpassDependency externalInputDeps = {
             .srcSubpass = VK_SUBPASS_EXTERNAL,
             .dstSubpass = 1,
             .srcStageMask = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
-            .dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
+            .dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
+                            VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
             .srcAccessMask = VK_ACCESS_NONE,
-            .dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+                             VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
             .dependencyFlags = 0,
-        });
+        };
+
+        if (!(layoutOptions &
+              DrawPipelineLayoutVulkan::Options::msaaManualResolve))
+        {
+            // If we are not doing the manual MSAA resolve, this pass also needs
+            // barriers to protect the layout transition of the resolve target
+            // from the load op (even though it's LOAD_OP_DONT_CARE, it is
+            // possible that it performs a write), so we also need to specify
+            // COLOR_ATTACHMENT_WRITE as a destination access flag.
+            // (If we *were* doing the manual resolve the transition and load
+            // would happen in that subpass instead of this one)
+            externalInputDeps.dstStageMask |=
+                VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+            externalInputDeps.dstAccessMask |=
+                VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+        }
+
+        subpassDeps.push_back(externalInputDeps);
 
         // Finally, the standard color dependency from subpass 0 -> subpass 1
         addStandardColorDependencyToNextSubpass(subpassDescs.size());
@@ -519,6 +540,12 @@
         .pInputAttachments = inputAttachmentRefs.data(),
         .colorAttachmentCount = colorAttachmentRefs.size(),
         .pColorAttachments = colorAttachmentRefs.data(),
+        .pResolveAttachments =
+            (interlockMode == gpu::InterlockMode::msaa &&
+             !(layoutOptions &
+               DrawPipelineLayoutVulkan::Options::msaaManualResolve))
+                ? &msaaResolveAttachmentRef.value()
+                : nullptr,
         .pDepthStencilAttachment = depthStencilAttachmentRef.has_value()
                                        ? &depthStencilAttachmentRef.value()
                                        : nullptr,
@@ -585,57 +612,26 @@
             .dependencyFlags = 0,
         });
 
-        // Add the dependency from main subpass to the resolve subpass (note
-        // that this is different than the standard COLOR_ATTACHMENT_WRITE ->
-        // INPUT_ATTACHMENT_READ barrier between subpasses).
-        // NOTE: The fragment/input attachment bits here seem like they should
-        // not be necessary (nothing renders during this path, it's purely a
-        // resolve), but it seems on some Android devices at least, the resolve
-        // may be getting done *as* a fragment shader (instead of during
-        // COLOR_ATTACHMENT_OUTPUT, which is what seems to happen on desktop),
-        // so it's necessary. Ditto the input attachment for this subpass, as
-        // just having color and resolve *should* work, but doesn't on Android.
-        subpassDeps.push_back({
-            .srcSubpass = subpassDescs.size() - 1,
-            .dstSubpass = subpassDescs.size(),
-            .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
-            .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
-                            VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
-            .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
-            .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
-                             VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
-                             VK_ACCESS_INPUT_ATTACHMENT_READ_BIT,
-            .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
-        });
+        // Manual MSAA resolve, if needed.
+        if ((layoutOptions &
+             DrawPipelineLayoutVulkan::Options::msaaManualResolve))
+        {
+            assert(
+                !(layoutOptions &
+                  DrawPipelineLayoutVulkan::Options::fixedFunctionColorOutput));
+            assert(inputAttachmentRefs[0].attachment == COLOR_PLANE_IDX);
 
-        // Some Android drivers (some Android 12 and earlier Adreno drivers)
-        // have issues with having both a self-dependency and resolve
-        // attachments. The resolve can instead be done as a second pass (in
-        // which no actual rendering occurs), which eliminates some corruption
-        // during blending on the affected devices.
+            addStandardColorDependencyToNextSubpass(subpassDescs.size());
 
-        // This should be MSAA and there should only be a single color
-        // attachment.
-        assert(interlockMode == gpu::InterlockMode::msaa);
-        assert(colorAttachmentRefs.size() == 1);
-        assert(msaaResolveAttachmentRef.has_value());
-
-        VkAttachmentReference msaaResolveInputAttachmentRef =
-            colorAttachmentRefs[0];
-
-        // the layout is not allowed to be COLOR_ATTACHMENT_OPTIMAL, so instead
-        // use GENERAL.
-        msaaResolveInputAttachmentRef.layout = VK_IMAGE_LAYOUT_GENERAL;
-
-        subpassDescs.push_back({
-            .flags = 0u,
-            .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-            .inputAttachmentCount = 1,
-            .pInputAttachments = &msaaResolveInputAttachmentRef,
-            .colorAttachmentCount = 1,
-            .pColorAttachments = colorAttachmentRefs.data(),
-            .pResolveAttachments = &msaaResolveAttachmentRef.value(),
-        });
+            subpassDescs.push_back({
+                .flags = 0u,
+                .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+                .inputAttachmentCount = 1u,
+                .pInputAttachments = inputAttachmentRefs.data(),
+                .colorAttachmentCount = 1u,
+                .pColorAttachments = &msaaResolveAttachmentRef.value(),
+            });
+        }
     }
 
     // PLS-resolve subpass (atomic mode only).

diff --git a/renderer/src/vulkan/vulkan_context.cpp b/renderer/src/vulkan/vulkan_context.cpp
index baad551..026ef31 100644
--- a/renderer/src/vulkan/vulkan_context.cpp
+++ b/renderer/src/vulkan/vulkan_context.cpp

@@ -54,16 +54,11 @@
 #undef LOAD_VULKAN_DEVICE_COMMAND
             m_vmaAllocator(make_vma_allocator(this, pfnvkGetInstanceProcAddr))
 {
-#ifdef NDEBUG
+    GetPhysicalDeviceProperties(physicalDevice, &m_physicalDeviceProperties);
+
     // Check that we weren't told the device was more capable than it is
-    {
-        VkPhysicalDeviceProperties props{};
-        GetPhysicalDeviceProperties(physicalDevice, &props);
-        assert(
-            props.apiVersion >= features.apiVersion &&
-            "Supplied API version should not be newer than the physical device");
-    }
-#endif
+    assert(m_physicalDeviceProperties.apiVersion >= features.apiVersion &&
+           "Supplied API version should not be newer than the physical device");
 
     // VK spec says between D24_S8 and D32_S8, one of them must be supported
     m_supportsD24S8 = isFormatSupportedWithFeatureFlags(

diff --git a/renderer/src/vulkan/vulkan_shaders.cpp b/renderer/src/vulkan/vulkan_shaders.cpp
index 3f327cc..4aeae03 100644
--- a/renderer/src/vulkan/vulkan_shaders.cpp
+++ b/renderer/src/vulkan/vulkan_shaders.cpp

@@ -97,8 +97,9 @@
 #include "generated/shaders/spirv/draw_msaa_image_mesh.frag.h"
 #include "generated/shaders/spirv/draw_msaa_image_mesh.fixedcolor_frag.h"
 #include "generated/shaders/spirv/draw_msaa_image_mesh.noclipdistance_vert.h"
-#include "generated/shaders/spirv/copy_attachment_to_attachment.vert.h"
+#include "generated/shaders/spirv/draw_fullscreen_quad.vert.h"
 #include "generated/shaders/spirv/copy_attachment_to_attachment.frag.h"
+#include "generated/shaders/spirv/draw_msaa_resolve.frag.h"
 } // namespace embedded
 
 // Draw setup shaders.
@@ -264,10 +265,12 @@
     rive::make_span(embedded::draw_msaa_image_mesh_frag);
 rive::Span<const uint32_t> draw_msaa_image_mesh_fixedcolor_frag =
     rive::make_span(embedded::draw_msaa_image_mesh_fixedcolor_frag);
-rive::Span<const uint32_t> copy_attachment_to_attachment_vert =
-    rive::make_span(embedded::copy_attachment_to_attachment_vert);
+rive::Span<const uint32_t> draw_fullscreen_quad_vert =
+    rive::make_span(embedded::draw_fullscreen_quad_vert);
 rive::Span<const uint32_t> copy_attachment_to_attachment_frag =
     rive::make_span(embedded::copy_attachment_to_attachment_frag);
+rive::Span<const uint32_t> draw_msaa_resolve_frag =
+    rive::make_span(embedded::draw_msaa_resolve_frag);
 
 void hotload_shaders(rive::Span<const uint32_t> spirvData)
 {
@@ -365,7 +368,8 @@
     spirv::draw_msaa_image_mesh_noclipdistance_vert = readNextBytecodeSpan();
     spirv::draw_msaa_image_mesh_frag = readNextBytecodeSpan();
     spirv::draw_msaa_image_mesh_fixedcolor_frag = readNextBytecodeSpan();
-    spirv::copy_attachment_to_attachment_vert = readNextBytecodeSpan();
+    spirv::draw_fullscreen_quad_vert = readNextBytecodeSpan();
     spirv::copy_attachment_to_attachment_frag = readNextBytecodeSpan();
+    spirv::draw_msaa_resolve_frag = readNextBytecodeSpan();
 }
 } // namespace rive::gpu::spirv

diff --git a/renderer/src/vulkan/vulkan_shaders.hpp b/renderer/src/vulkan/vulkan_shaders.hpp
index 6ec8ea9..10baed0 100644
--- a/renderer/src/vulkan/vulkan_shaders.hpp
+++ b/renderer/src/vulkan/vulkan_shaders.hpp

@@ -99,8 +99,9 @@
 extern rive::Span<const uint32_t> draw_msaa_image_mesh_noclipdistance_vert;
 extern rive::Span<const uint32_t> draw_msaa_image_mesh_frag;
 extern rive::Span<const uint32_t> draw_msaa_image_mesh_fixedcolor_frag;
-extern rive::Span<const uint32_t> copy_attachment_to_attachment_vert;
+extern rive::Span<const uint32_t> draw_fullscreen_quad_vert;
 extern rive::Span<const uint32_t> copy_attachment_to_attachment_frag;
+extern rive::Span<const uint32_t> draw_msaa_resolve_frag;
 
 // Reload global SPIRV buffers from runtime data.
 void hotload_shaders(rive::Span<const uint32_t> spirvData);

diff --git a/tests/common/offscreen_render_target_vulkan.cpp b/tests/common/offscreen_render_target_vulkan.cpp
index f0da60c..71c0d50 100644
--- a/tests/common/offscreen_render_target_vulkan.cpp
+++ b/tests/common/offscreen_render_target_vulkan.cpp

@@ -23,6 +23,7 @@
 #include "rive/renderer/rive_render_image.hpp"
 #include "rive/renderer/vulkan/vulkan_context.hpp"
 #include "rive/renderer/vulkan/render_target_vulkan.hpp"
+#include "shaders/constants.glsl"
 
 namespace rive_tests
 {
@@ -58,15 +59,20 @@
                       uint32_t height,
                       bool riveRenderable) :
             RenderTargetVulkan(
-                std::move(vk),
+                vk,
                 width,
                 height,
                 // BGRA is not riveRenderable when using storage textures for
                 // PLS (like in clockwise mode) because storage textures have to
                 // be RGBA8. Let's test both formats, but make sure to use RGBA
                 // for the riveRenderable case.
-                riveRenderable ? VK_FORMAT_R8G8B8A8_UNORM
-                               : VK_FORMAT_B8G8R8A8_UNORM,
+                // Also don't test BGRA on Qualcomm. Various Adreno 6 devices
+                // can get the RGB order wrong when resolving BGRA multisampled
+                // data.
+                (riveRenderable || vk->physicalDeviceProperties().vendorID ==
+                                       VULKAN_VENDOR_QUALCOMM)
+                    ? VK_FORMAT_R8G8B8A8_UNORM
+                    : VK_FORMAT_B8G8R8A8_UNORM,
                 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
                     VK_IMAGE_USAGE_SAMPLED_BIT |
                     // Rendering scenarios that use an offscreen color buffer