refactor: Start splitting shaders into separate files (#10698) b73733a052
* refactor: Start splitting shaders into separate files

The draw shaders are getting unruly with #defines all over the place.
Before we can add more, we need to clean these up a little. This PR
splits out the fragment portions of raster ordered and msaa shaders into
separate files. This also required teaching minify.py to handle .vert and
.frag files.

* apply_frag_coverage

* fixwebgpu

* unreal

* fixmetalagain

* formats

* oops

* rm draw_image_mesh.glsl

Co-authored-by: Chris Dalton <99840794+csmartdalton@users.noreply.github.com>
diff --git a/.rive_head b/.rive_head
index 8a9bed8..d4b2016 100644
--- a/.rive_head
+++ b/.rive_head
@@ -1 +1 @@
-986c92af9ab29f5c68bb3a6a39c1f05924683028
+b73733a0525518a76e8a6b32a2529a2dea319ace
diff --git a/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp b/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp
index 8118137..e448b00 100644
--- a/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp
+++ b/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp
@@ -277,9 +277,9 @@
     glutils::Texture m_atlasTexture = glutils::Texture::Zero();
     glutils::Framebuffer m_atlasFBO;
 
-    // Wraps a compiled GL shader of draw_path.glsl or draw_image_mesh.glsl,
-    // either vertex or fragment, with a specific set of features enabled via
-    // #define. The set of features to enable is dictated by ShaderFeatures.
+    // Wraps a compiled GL "draw" shader, either vertex or fragment, with a
+    // specific set of features enabled via #define. The set of features to
+    // enable is dictated by ShaderFeatures.
     class DrawShader
     {
     public:
@@ -304,9 +304,9 @@
         GLuint m_id = 0;
     };
 
-    // Wraps a compiled and linked GL program of draw_path.glsl or
-    // draw_image_mesh.glsl, with a specific set of features enabled via
-    // #define. The set of features to enable is dictated by ShaderFeatures.
+    // Wraps a compiled and linked GL "draw" program, with a specific set of
+    // features enabled via #define. The set of features to enable is dictated
+    // by ShaderFeatures.
     class DrawProgram
     {
     public:
diff --git a/renderer/src/d3d/pipeline_manager.cpp b/renderer/src/d3d/pipeline_manager.cpp
index 5ba4c20..c410df0 100644
--- a/renderer/src/d3d/pipeline_manager.cpp
+++ b/renderer/src/d3d/pipeline_manager.cpp
@@ -10,16 +10,14 @@
 
 #include "generated/shaders/advanced_blend.glsl.hpp"
 #include "generated/shaders/atomic_draw.glsl.hpp"
-#include "generated/shaders/color_ramp.glsl.hpp"
 #include "generated/shaders/constants.glsl.hpp"
 #include "generated/shaders/common.glsl.hpp"
-#include "generated/shaders/draw_image_mesh.glsl.hpp"
+#include "generated/shaders/draw_image_mesh.vert.hpp"
+#include "generated/shaders/draw_raster_order_image_mesh.frag.hpp"
 #include "generated/shaders/draw_path_common.glsl.hpp"
-#include "generated/shaders/draw_path.glsl.hpp"
+#include "generated/shaders/draw_path.vert.hpp"
+#include "generated/shaders/draw_raster_order_path.frag.hpp"
 #include "generated/shaders/hlsl.glsl.hpp"
-#include "generated/shaders/bezier_utils.glsl.hpp"
-#include "generated/shaders/render_atlas.glsl.hpp"
-#include "generated/shaders/tessellate.glsl.hpp"
 
 namespace rive::gpu::d3d_utils
 {
@@ -125,18 +123,30 @@
         case DrawType::midpointFanCenterAAPatches:
         case DrawType::outerCurvePatches:
             s << glsl::draw_path_common << '\n';
-            s << (interlockMode == InterlockMode::rasterOrdering
-                      ? glsl::draw_path
-                      : glsl::atomic_draw)
-              << '\n';
+            if (interlockMode == gpu::InterlockMode::rasterOrdering)
+            {
+                s << glsl::draw_path_vert << '\n';
+                s << glsl::draw_raster_order_path_frag << '\n';
+            }
+            else
+            {
+                assert(interlockMode == gpu::InterlockMode::atomics);
+                s << glsl::atomic_draw << '\n';
+            }
             break;
         case DrawType::interiorTriangulation:
         case DrawType::atlasBlit:
             s << glsl::draw_path_common << '\n';
-            s << (interlockMode == InterlockMode::rasterOrdering
-                      ? glsl::draw_path
-                      : glsl::atomic_draw)
-              << '\n';
+            if (interlockMode == gpu::InterlockMode::rasterOrdering)
+            {
+                s << glsl::draw_path_vert << '\n';
+                s << glsl::draw_raster_order_path_frag << '\n';
+            }
+            else
+            {
+                assert(interlockMode == gpu::InterlockMode::atomics);
+                s << glsl::atomic_draw << '\n';
+            }
             break;
         case DrawType::imageRect:
             assert(interlockMode == InterlockMode::atomics);
@@ -146,7 +156,8 @@
         case DrawType::imageMesh:
             if (interlockMode == InterlockMode::rasterOrdering)
             {
-                s << glsl::draw_image_mesh << '\n';
+                s << glsl::draw_image_mesh_vert << '\n';
+                s << glsl::draw_raster_order_image_mesh_frag << '\n';
             }
             else
             {
diff --git a/renderer/src/d3d11/render_context_d3d_impl.cpp b/renderer/src/d3d11/render_context_d3d_impl.cpp
index 07c0263..1bdf529 100644
--- a/renderer/src/d3d11/render_context_d3d_impl.cpp
+++ b/renderer/src/d3d11/render_context_d3d_impl.cpp
@@ -9,20 +9,8 @@
 #include "rive/renderer/texture.hpp"
 
 #include <D3DCompiler.h>
-#include <sstream>
 
-#include "generated/shaders/advanced_blend.glsl.hpp"
-#include "generated/shaders/atomic_draw.glsl.hpp"
-#include "generated/shaders/color_ramp.glsl.hpp"
-#include "generated/shaders/constants.glsl.hpp"
-#include "generated/shaders/common.glsl.hpp"
-#include "generated/shaders/draw_image_mesh.glsl.hpp"
-#include "generated/shaders/draw_path_common.glsl.hpp"
-#include "generated/shaders/draw_path.glsl.hpp"
-#include "generated/shaders/hlsl.glsl.hpp"
-#include "generated/shaders/bezier_utils.glsl.hpp"
-#include "generated/shaders/render_atlas.glsl.hpp"
-#include "generated/shaders/tessellate.glsl.hpp"
+#include "generated/shaders/tessellate.glsl.exports.h"
 
 // offline shaders
 namespace shader
diff --git a/renderer/src/d3d12/d3d12_pipeline_manager.cpp b/renderer/src/d3d12/d3d12_pipeline_manager.cpp
index 50270d4..7db9c48 100644
--- a/renderer/src/d3d12/d3d12_pipeline_manager.cpp
+++ b/renderer/src/d3d12/d3d12_pipeline_manager.cpp
@@ -4,18 +4,7 @@
 #include "rive/renderer/d3d12/d3d12_pipeline_manager.hpp"
 #include "rive/renderer/d3d/d3d_constants.hpp"
 
-#include "generated/shaders/advanced_blend.glsl.hpp"
-#include "generated/shaders/atomic_draw.glsl.hpp"
-#include "generated/shaders/color_ramp.glsl.hpp"
-#include "generated/shaders/constants.glsl.hpp"
-#include "generated/shaders/common.glsl.hpp"
-#include "generated/shaders/draw_image_mesh.glsl.hpp"
-#include "generated/shaders/draw_path_common.glsl.hpp"
-#include "generated/shaders/draw_path.glsl.hpp"
-#include "generated/shaders/hlsl.glsl.hpp"
-#include "generated/shaders/bezier_utils.glsl.hpp"
-#include "generated/shaders/render_atlas.glsl.hpp"
-#include "generated/shaders/tessellate.glsl.hpp"
+#include "generated/shaders/tessellate.glsl.exports.h"
 
 // offline shaders
 namespace shader
diff --git a/renderer/src/gl/pls_impl_ext_native.cpp b/renderer/src/gl/pls_impl_ext_native.cpp
index 33a3735..82a2a87 100644
--- a/renderer/src/gl/pls_impl_ext_native.cpp
+++ b/renderer/src/gl/pls_impl_ext_native.cpp
@@ -10,7 +10,7 @@
 #include "shaders/constants.glsl"
 #include <sstream>
 
-#include "generated/shaders/pls_load_store_ext.exports.h"
+#include "generated/shaders/pls_load_store_ext.glsl.exports.h"
 
 namespace rive::gpu
 {
diff --git a/renderer/src/gl/pls_impl_rw_texture.cpp b/renderer/src/gl/pls_impl_rw_texture.cpp
index 668f47b..9d6772b 100644
--- a/renderer/src/gl/pls_impl_rw_texture.cpp
+++ b/renderer/src/gl/pls_impl_rw_texture.cpp
@@ -8,7 +8,7 @@
 #include "shaders/constants.glsl"
 #include "rive/renderer/gl/gl_utils.hpp"
 
-#include "generated/shaders/glsl.exports.h"
+#include "generated/shaders/glsl.glsl.exports.h"
 
 namespace rive::gpu
 {
diff --git a/renderer/src/gl/pls_impl_webgl.cpp b/renderer/src/gl/pls_impl_webgl.cpp
index e4c39af..73100ae 100644
--- a/renderer/src/gl/pls_impl_webgl.cpp
+++ b/renderer/src/gl/pls_impl_webgl.cpp
@@ -8,7 +8,7 @@
 #include "rive/renderer/gl/render_target_gl.hpp"
 #include "shaders/constants.glsl"
 
-#include "generated/shaders/glsl.exports.h"
+#include "generated/shaders/glsl.glsl.exports.h"
 
 #ifdef RIVE_WEBGL
 #include <emscripten/emscripten.h>
diff --git a/renderer/src/gl/render_context_gl_impl.cpp b/renderer/src/gl/render_context_gl_impl.cpp
index ddd5974..a49d497 100644
--- a/renderer/src/gl/render_context_gl_impl.cpp
+++ b/renderer/src/gl/render_context_gl_impl.cpp
@@ -18,8 +18,12 @@
 #include "generated/shaders/constants.glsl.hpp"
 #include "generated/shaders/common.glsl.hpp"
 #include "generated/shaders/draw_path_common.glsl.hpp"
-#include "generated/shaders/draw_path.glsl.hpp"
-#include "generated/shaders/draw_image_mesh.glsl.hpp"
+#include "generated/shaders/draw_path.vert.hpp"
+#include "generated/shaders/draw_raster_order_path.frag.hpp"
+#include "generated/shaders/draw_msaa_path.frag.hpp"
+#include "generated/shaders/draw_image_mesh.vert.hpp"
+#include "generated/shaders/draw_raster_order_image_mesh.frag.hpp"
+#include "generated/shaders/draw_msaa_image_mesh.frag.hpp"
 #include "generated/shaders/bezier_utils.glsl.hpp"
 #include "generated/shaders/tessellate.glsl.hpp"
 #include "generated/shaders/render_atlas.glsl.hpp"
@@ -1098,9 +1102,18 @@
             }
             defines.push_back(GLSL_DRAW_PATH);
             sources.push_back(gpu::glsl::draw_path_common);
-            sources.push_back(interlockMode == gpu::InterlockMode::atomics
-                                  ? gpu::glsl::atomic_draw
-                                  : gpu::glsl::draw_path);
+            if (interlockMode == gpu::InterlockMode::atomics)
+            {
+                sources.push_back(gpu::glsl::atomic_draw);
+            }
+            else
+            {
+
+                sources.push_back(gpu::glsl::draw_path_vert);
+                sources.push_back(interlockMode == gpu::InterlockMode::msaa
+                                      ? gpu::glsl::draw_msaa_path_frag
+                                      : gpu::glsl::draw_raster_order_path_frag);
+            }
             break;
         case gpu::DrawType::msaaStencilClipReset:
             assert(interlockMode == gpu::InterlockMode::msaa);
@@ -1128,9 +1141,18 @@
         case gpu::DrawType::interiorTriangulation:
             defines.push_back(GLSL_DRAW_INTERIOR_TRIANGLES);
             sources.push_back(gpu::glsl::draw_path_common);
-            sources.push_back(interlockMode == gpu::InterlockMode::atomics
-                                  ? gpu::glsl::atomic_draw
-                                  : gpu::glsl::draw_path);
+            if (interlockMode == gpu::InterlockMode::atomics)
+            {
+                sources.push_back(gpu::glsl::atomic_draw);
+            }
+            else
+            {
+
+                sources.push_back(gpu::glsl::draw_path_vert);
+                sources.push_back(interlockMode == gpu::InterlockMode::msaa
+                                      ? gpu::glsl::draw_msaa_path_frag
+                                      : gpu::glsl::draw_raster_order_path_frag);
+            }
             break;
         case gpu::DrawType::imageRect:
             assert(interlockMode == gpu::InterlockMode::atomics);
@@ -1149,7 +1171,11 @@
             }
             else
             {
-                sources.push_back(gpu::glsl::draw_image_mesh);
+                sources.push_back(gpu::glsl::draw_image_mesh_vert);
+                sources.push_back(
+                    interlockMode == gpu::InterlockMode::msaa
+                        ? gpu::glsl::draw_msaa_image_mesh_frag
+                        : gpu::glsl::draw_raster_order_image_mesh_frag);
             }
             break;
         case gpu::DrawType::renderPassResolve:
diff --git a/renderer/src/gpu.cpp b/renderer/src/gpu.cpp
index 97d2c3e..d431841 100644
--- a/renderer/src/gpu.cpp
+++ b/renderer/src/gpu.cpp
@@ -10,7 +10,7 @@
 #include "rive_render_paint.hpp"
 #include "gradient.hpp"
 
-#include "generated/shaders/draw_path.exports.h"
+#include "generated/shaders/draw_path.vert.exports.h"
 
 namespace rive::gpu
 {
diff --git a/renderer/src/metal/background_shader_compiler.h b/renderer/src/metal/background_shader_compiler.h
index 115969d..3e0993a 100644
--- a/renderer/src/metal/background_shader_compiler.h
+++ b/renderer/src/metal/background_shader_compiler.h
@@ -14,8 +14,8 @@
 
 namespace rive::gpu
 {
-// Defines a job to compile a "draw" shader -- either draw_path.glsl or
-// draw_image_mesh.glsl, with a specific set of features enabled.
+// Defines a job to compile a "draw" shader, with a specific set of features
+// enabled.
 struct BackgroundCompileJob
 {
     gpu::DrawType drawType;
@@ -29,9 +29,8 @@
 #endif
 };
 
-// Compiles "draw" shaders in a background thread. A "draw" shaders is either
-// draw_path.glsl or draw_image_mesh.glsl, with a specific set of features
-// enabled.
+// Compiles "draw" shaders in a background thread, with a specific set of
+// features enabled.
 class BackgroundShaderCompiler
 {
 public:
diff --git a/renderer/src/metal/background_shader_compiler.mm b/renderer/src/metal/background_shader_compiler.mm
index 4715ec0..96aee57 100644
--- a/renderer/src/metal/background_shader_compiler.mm
+++ b/renderer/src/metal/background_shader_compiler.mm
@@ -9,8 +9,10 @@
 #include "generated/shaders/common.glsl.hpp"
 #include "generated/shaders/advanced_blend.glsl.hpp"
 #include "generated/shaders/draw_path_common.glsl.hpp"
-#include "generated/shaders/draw_path.glsl.hpp"
-#include "generated/shaders/draw_image_mesh.glsl.hpp"
+#include "generated/shaders/draw_path.vert.hpp"
+#include "generated/shaders/draw_raster_order_path.frag.hpp"
+#include "generated/shaders/draw_image_mesh.vert.hpp"
+#include "generated/shaders/draw_raster_order_image_mesh.frag.hpp"
 
 #ifndef RIVE_IOS
 // iOS doesn't need the atomic shaders; every non-simulated iOS device supports
@@ -144,15 +146,20 @@
                 defines[@GLSL_ENABLE_INSTANCE_INDEX] = @"";
                 defines[@GLSL_DRAW_PATH] = @"";
                 [source appendFormat:@"%s\n", gpu::glsl::draw_path_common];
-#ifdef RIVE_IOS
-                [source appendFormat:@"%s\n", gpu::glsl::draw_path];
-#else
-                [source appendFormat:@"%s\n",
-                                     interlockMode ==
-                                             gpu::InterlockMode::rasterOrdering
-                                         ? gpu::glsl::draw_path
-                                         : gpu::glsl::atomic_draw];
+#ifndef RIVE_IOS
+                if (interlockMode == gpu::InterlockMode::atomics)
+                {
+                    [source appendFormat:@"%s\n", gpu::glsl::atomic_draw];
+                }
+                else
 #endif
+                {
+                    assert(interlockMode == gpu::InterlockMode::rasterOrdering);
+                    [source appendFormat:@"%s\n", gpu::glsl::draw_path_vert];
+                    [source
+                        appendFormat:@"%s\n",
+                                     gpu::glsl::draw_raster_order_path_frag];
+                }
                 break;
             case DrawType::atlasBlit:
                 defines[@GLSL_ATLAS_BLIT] = @"1";
@@ -160,15 +167,20 @@
             case DrawType::interiorTriangulation:
                 defines[@GLSL_DRAW_INTERIOR_TRIANGLES] = @"";
                 [source appendFormat:@"%s\n", gpu::glsl::draw_path_common];
-#ifdef RIVE_IOS
-                [source appendFormat:@"%s\n", gpu::glsl::draw_path];
-#else
-                [source appendFormat:@"%s\n",
-                                     interlockMode ==
-                                             gpu::InterlockMode::rasterOrdering
-                                         ? gpu::glsl::draw_path
-                                         : gpu::glsl::atomic_draw];
+#ifndef RIVE_IOS
+                if (interlockMode == gpu::InterlockMode::atomics)
+                {
+                    [source appendFormat:@"%s\n", gpu::glsl::atomic_draw];
+                }
+                else
 #endif
+                {
+                    assert(interlockMode == gpu::InterlockMode::rasterOrdering);
+                    [source appendFormat:@"%s\n", gpu::glsl::draw_path_vert];
+                    [source
+                        appendFormat:@"%s\n",
+                                     gpu::glsl::draw_raster_order_path_frag];
+                }
                 break;
             case DrawType::imageRect:
 #ifdef RIVE_IOS
@@ -184,19 +196,22 @@
             case DrawType::imageMesh:
                 defines[@GLSL_DRAW_IMAGE] = @"";
                 defines[@GLSL_DRAW_IMAGE_MESH] = @"";
-#ifdef RIVE_IOS
-                [source appendFormat:@"%s\n", gpu::glsl::draw_image_mesh];
-#else
-                if (interlockMode == gpu::InterlockMode::rasterOrdering)
-                {
-                    [source appendFormat:@"%s\n", gpu::glsl::draw_image_mesh];
-                }
-                else
+#ifndef RIVE_IOS
+                if (interlockMode == gpu::InterlockMode::atomics)
                 {
                     [source appendFormat:@"%s\n", gpu::glsl::draw_path_common];
                     [source appendFormat:@"%s\n", gpu::glsl::atomic_draw];
                 }
+                else
 #endif
+                {
+                    assert(interlockMode == gpu::InterlockMode::rasterOrdering);
+                    [source
+                        appendFormat:@"%s\n", gpu::glsl::draw_image_mesh_vert];
+                    [source appendFormat:@"%s\n",
+                                         gpu::glsl::
+                                             draw_raster_order_image_mesh_frag];
+                }
                 break;
             case DrawType::renderPassInitialize:
 #ifdef RIVE_IOS
diff --git a/renderer/src/metal/render_context_metal_impl.mm b/renderer/src/metal/render_context_metal_impl.mm
index 4626399..ad779a7 100644
--- a/renderer/src/metal/render_context_metal_impl.mm
+++ b/renderer/src/metal/render_context_metal_impl.mm
@@ -11,8 +11,8 @@
 #include "shaders/constants.glsl"
 #include <sstream>
 
-#include "generated/shaders/color_ramp.exports.h"
-#include "generated/shaders/tessellate.exports.h"
+#include "generated/shaders/color_ramp.glsl.exports.h"
+#include "generated/shaders/tessellate.glsl.exports.h"
 
 #if defined(RIVE_IOS_SIMULATOR)
 #import <mach-o/arch.h>
diff --git a/renderer/src/shaders/Makefile b/renderer/src/shaders/Makefile
index ba0e804..f7ae789 100644
--- a/renderer/src/shaders/Makefile
+++ b/renderer/src/shaders/Makefile
@@ -7,7 +7,7 @@
 FLAGS :=
 
 ## Shader minification.
-MINIFY_INPUTS := $(wildcard *.glsl)
+MINIFY_INPUTS := $(wildcard *.glsl) $(wildcard *.vert) $(wildcard *.frag)
 MINIFY_EXPORT_OUTPUTS := $(addprefix $(OUT)/, $(patsubst %.glsl, %.exports.h, $(MINIFY_INPUTS)))
 MINIFY_GLSL_OUTPUTS := $(addprefix $(OUT)/, $(patsubst %.glsl, %.minified.glsl, $(MINIFY_INPUTS)))
 MINIFY_HPP_OUTPUTS := $(addprefix $(OUT)/, $(patsubst %.glsl, %.glsl.hpp, $(MINIFY_INPUTS)))
diff --git a/renderer/src/shaders/copy_attachment_to_attachment.glsl b/renderer/src/shaders/copy_attachment_to_attachment.glsl
index 1e2f6ab..2a4cb0c 100644
--- a/renderer/src/shaders/copy_attachment_to_attachment.glsl
+++ b/renderer/src/shaders/copy_attachment_to_attachment.glsl
@@ -23,6 +23,8 @@
        binding = MSAA_COLOR_SEED_IDX,
        set = PLS_TEXTURE_BINDINGS_SET) uniform lowp subpassInput
     inputAttachment;
+
 layout(location = 0) out half4 outputColor;
+
 void main() { outputColor = subpassLoad(inputAttachment); }
 #endif
diff --git a/renderer/src/shaders/draw_image_mesh.glsl b/renderer/src/shaders/draw_image_mesh.glsl
deleted file mode 100644
index b606c15..0000000
--- a/renderer/src/shaders/draw_image_mesh.glsl
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright 2023 Rive
- */
-
-#ifdef @VERTEX
-ATTR_BLOCK_BEGIN(PositionAttr)
-ATTR(0, float2, @a_position);
-ATTR_BLOCK_END
-
-ATTR_BLOCK_BEGIN(UVAttr)
-ATTR(1, float2, @a_texCoord);
-ATTR_BLOCK_END
-#endif
-
-VARYING_BLOCK_BEGIN
-NO_PERSPECTIVE VARYING(0, float2, v_texCoord);
-#ifdef @ENABLE_CLIPPING
-@OPTIONALLY_FLAT VARYING(1, half, v_clipID);
-#endif
-#if defined(@ENABLE_CLIP_RECT) && !defined(@RENDER_MODE_MSAA)
-NO_PERSPECTIVE VARYING(2, float4, v_clipRect);
-#endif
-VARYING_BLOCK_END
-
-#ifdef @VERTEX
-VERTEX_TEXTURE_BLOCK_BEGIN
-VERTEX_TEXTURE_BLOCK_END
-
-IMAGE_MESH_VERTEX_MAIN(@drawVertexMain,
-                       PositionAttr,
-                       position,
-                       UVAttr,
-                       uv,
-                       _vertexID)
-{
-    ATTR_UNPACK(_vertexID, position, @a_position, float2);
-    ATTR_UNPACK(_vertexID, uv, @a_texCoord, float2);
-
-    VARYING_INIT(v_texCoord, float2);
-#ifdef @ENABLE_CLIPPING
-    VARYING_INIT(v_clipID, half);
-#endif
-#if defined(@ENABLE_CLIP_RECT) && !defined(@RENDER_MODE_MSAA)
-    VARYING_INIT(v_clipRect, float4);
-#endif
-
-    float2 vertexPosition =
-        MUL(make_float2x2(imageDrawUniforms.viewMatrix), @a_position) +
-        imageDrawUniforms.translate;
-    v_texCoord = @a_texCoord;
-#ifdef @ENABLE_CLIPPING
-    if (@ENABLE_CLIPPING)
-    {
-        v_clipID = id_bits_to_f16(imageDrawUniforms.clipID,
-                                  uniforms.pathIDGranularity);
-    }
-#endif
-#ifdef @ENABLE_CLIP_RECT
-    if (@ENABLE_CLIP_RECT)
-    {
-#ifndef @RENDER_MODE_MSAA
-        v_clipRect = find_clip_rect_coverage_distances(
-            make_float2x2(imageDrawUniforms.clipRectInverseMatrix),
-            imageDrawUniforms.clipRectInverseTranslate,
-            vertexPosition);
-#else
-        set_clip_rect_plane_distances(
-            make_float2x2(imageDrawUniforms.clipRectInverseMatrix),
-            imageDrawUniforms.clipRectInverseTranslate,
-            vertexPosition);
-#endif
-    }
-#endif // ENABLE_CLIP_RECT
-    float4 pos = RENDER_TARGET_COORD_TO_CLIP_COORD(vertexPosition);
-#ifdef @POST_INVERT_Y
-    pos.y = -pos.y;
-#endif
-#ifdef @RENDER_MODE_MSAA
-    pos.z = normalize_z_index(imageDrawUniforms.zIndex);
-#endif
-
-    VARYING_PACK(v_texCoord);
-#ifdef @ENABLE_CLIPPING
-    VARYING_PACK(v_clipID);
-#endif
-#ifdef @ENABLE_CLIP_RECT
-    VARYING_PACK(v_clipRect);
-#endif
-    EMIT_VERTEX(pos);
-}
-#endif
-
-#ifdef @FRAGMENT
-FRAG_TEXTURE_BLOCK_BEGIN
-TEXTURE_RGBA8(PER_DRAW_BINDINGS_SET, IMAGE_TEXTURE_IDX, @imageTexture);
-#if defined(@RENDER_MODE_MSAA) && defined(@ENABLE_ADVANCED_BLEND)
-DST_COLOR_TEXTURE(@dstColorTexture);
-#endif
-FRAG_TEXTURE_BLOCK_END
-
-DYNAMIC_SAMPLER_BLOCK_BEGIN
-SAMPLER_DYNAMIC(PER_DRAW_BINDINGS_SET, IMAGE_SAMPLER_IDX, imageSampler)
-DYNAMIC_SAMPLER_BLOCK_END
-
-FRAG_STORAGE_BUFFER_BLOCK_BEGIN
-FRAG_STORAGE_BUFFER_BLOCK_END
-
-#ifndef @RENDER_MODE_MSAA
-
-PLS_BLOCK_BEGIN
-PLS_DECL4F(COLOR_PLANE_IDX, colorBuffer);
-PLS_DECLUI(CLIP_PLANE_IDX, clipBuffer);
-PLS_DECL4F(SCRATCH_COLOR_PLANE_IDX, scratchColorBuffer);
-PLS_DECLUI(COVERAGE_PLANE_IDX, coverageCountBuffer);
-PLS_BLOCK_END
-
-PLS_MAIN_WITH_IMAGE_UNIFORMS(@drawFragmentMain)
-{
-    VARYING_UNPACK(v_texCoord, float2);
-#ifdef @ENABLE_CLIPPING
-    VARYING_UNPACK(v_clipID, half);
-#endif
-#ifdef @ENABLE_CLIP_RECT
-    VARYING_UNPACK(v_clipRect, float4);
-#endif
-
-    half4 color = TEXTURE_SAMPLE_DYNAMIC_LODBIAS(@imageTexture,
-                                                 imageSampler,
-                                                 v_texCoord,
-                                                 uniforms.mipMapLODBias);
-
-    half coverage = 1.;
-
-#ifdef @ENABLE_CLIP_RECT
-    if (@ENABLE_CLIP_RECT)
-    {
-        half clipRectCoverage = min_value(cast_float4_to_half4(v_clipRect));
-        coverage = clamp(clipRectCoverage, make_half(.0), coverage);
-    }
-#endif
-
-    PLS_INTERLOCK_BEGIN;
-
-#ifdef @ENABLE_CLIPPING
-    if (@ENABLE_CLIPPING && v_clipID != .0)
-    {
-        half2 clipData = unpackHalf2x16(PLS_LOADUI(clipBuffer));
-        half clipContentID = clipData.g;
-        half clipCoverage =
-            clipContentID == v_clipID ? clipData.r : make_half(.0);
-        coverage = min(coverage, clipCoverage);
-    }
-#endif
-
-    // Blend with the framebuffer color.
-    half4 dstColorPremul = PLS_LOAD4F(colorBuffer);
-#ifdef @ENABLE_ADVANCED_BLEND
-    if (@ENABLE_ADVANCED_BLEND && imageDrawUniforms.blendMode != BLEND_SRC_OVER)
-    {
-        color.rgb = advanced_color_blend(
-                        unmultiply_rgb(color),
-                        dstColorPremul,
-                        cast_uint_to_ushort(imageDrawUniforms.blendMode)) *
-                    color.a;
-    }
-#endif
-    color *= imageDrawUniforms.opacity * coverage;
-    color += dstColorPremul * (1. - color.a);
-
-    PLS_STORE4F(colorBuffer, color);
-    PLS_PRESERVE_UI(clipBuffer);
-    PLS_PRESERVE_UI(coverageCountBuffer);
-
-    PLS_INTERLOCK_END;
-
-    EMIT_PLS;
-}
-
-#else // !@RENDER_MODE_MSAA => @RENDER_MODE_MSAA
-
-FRAG_DATA_MAIN(half4, @drawFragmentMain)
-{
-    VARYING_UNPACK(v_texCoord, float2);
-
-    half4 color = TEXTURE_SAMPLE_DYNAMIC_LODBIAS(@imageTexture,
-                                                 imageSampler,
-                                                 v_texCoord,
-                                                 uniforms.mipMapLODBias) *
-                  imageDrawUniforms.opacity;
-
-#if defined(@ENABLE_ADVANCED_BLEND) && !defined(@FIXED_FUNCTION_COLOR_OUTPUT)
-    if (@ENABLE_ADVANCED_BLEND)
-    {
-        // Do the color portion of the blend mode in the shader.
-        half4 dstColorPremul = DST_COLOR_FETCH(@dstColorTexture);
-        color.rgb = advanced_color_blend(unmultiply_rgb(color),
-                                         dstColorPremul,
-                                         imageDrawUniforms.blendMode);
-        // Src-over blending is enabled, so just premultiply and let the HW
-        // finish the the the alpha portion of the blend mode.
-        color.rgb *= color.a;
-    }
-#endif // @ENABLE_ADVANCED_BLEND && !@FIXED_FUNCTION_COLOR_OUTPUT
-
-    EMIT_FRAG_DATA(color);
-}
-
-#endif // @RENDER_MODE_MSAA
-#endif // FRAGMENT
diff --git a/renderer/src/shaders/draw_image_mesh.vert b/renderer/src/shaders/draw_image_mesh.vert
new file mode 100644
index 0000000..e6873e3
--- /dev/null
+++ b/renderer/src/shaders/draw_image_mesh.vert
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2023 Rive
+ */
+
+#ifdef @VERTEX
+ATTR_BLOCK_BEGIN(PositionAttr)
+ATTR(0, float2, @a_position);
+ATTR_BLOCK_END
+
+ATTR_BLOCK_BEGIN(UVAttr)
+ATTR(1, float2, @a_texCoord);
+ATTR_BLOCK_END
+#endif
+
+VARYING_BLOCK_BEGIN
+NO_PERSPECTIVE VARYING(0, float2, v_texCoord);
+#ifdef @ENABLE_CLIPPING
+@OPTIONALLY_FLAT VARYING(1, half, v_clipID);
+#endif
+#if defined(@ENABLE_CLIP_RECT) && !defined(@RENDER_MODE_MSAA)
+NO_PERSPECTIVE VARYING(2, float4, v_clipRect);
+#endif
+VARYING_BLOCK_END
+
+#ifdef @VERTEX
+VERTEX_TEXTURE_BLOCK_BEGIN
+VERTEX_TEXTURE_BLOCK_END
+
+IMAGE_MESH_VERTEX_MAIN(@drawVertexMain,
+                       PositionAttr,
+                       position,
+                       UVAttr,
+                       uv,
+                       _vertexID)
+{
+    ATTR_UNPACK(_vertexID, position, @a_position, float2);
+    ATTR_UNPACK(_vertexID, uv, @a_texCoord, float2);
+
+    VARYING_INIT(v_texCoord, float2);
+#ifdef @ENABLE_CLIPPING
+    VARYING_INIT(v_clipID, half);
+#endif
+#if defined(@ENABLE_CLIP_RECT) && !defined(@RENDER_MODE_MSAA)
+    VARYING_INIT(v_clipRect, float4);
+#endif
+
+    float2 vertexPosition =
+        MUL(make_float2x2(imageDrawUniforms.viewMatrix), @a_position) +
+        imageDrawUniforms.translate;
+    v_texCoord = @a_texCoord;
+#ifdef @ENABLE_CLIPPING
+    if (@ENABLE_CLIPPING)
+    {
+        v_clipID = id_bits_to_f16(imageDrawUniforms.clipID,
+                                  uniforms.pathIDGranularity);
+    }
+#endif
+#ifdef @ENABLE_CLIP_RECT
+    if (@ENABLE_CLIP_RECT)
+    {
+#ifndef @RENDER_MODE_MSAA
+        v_clipRect = find_clip_rect_coverage_distances(
+            make_float2x2(imageDrawUniforms.clipRectInverseMatrix),
+            imageDrawUniforms.clipRectInverseTranslate,
+            vertexPosition);
+#else
+        set_clip_rect_plane_distances(
+            make_float2x2(imageDrawUniforms.clipRectInverseMatrix),
+            imageDrawUniforms.clipRectInverseTranslate,
+            vertexPosition);
+#endif
+    }
+#endif // ENABLE_CLIP_RECT
+    float4 pos = RENDER_TARGET_COORD_TO_CLIP_COORD(vertexPosition);
+#ifdef @POST_INVERT_Y
+    pos.y = -pos.y;
+#endif
+#ifdef @RENDER_MODE_MSAA
+    pos.z = normalize_z_index(imageDrawUniforms.zIndex);
+#endif
+
+    VARYING_PACK(v_texCoord);
+#ifdef @ENABLE_CLIPPING
+    VARYING_PACK(v_clipID);
+#endif
+#ifdef @ENABLE_CLIP_RECT
+    VARYING_PACK(v_clipRect);
+#endif
+    EMIT_VERTEX(pos);
+}
+#endif
diff --git a/renderer/src/shaders/draw_msaa_image_mesh.frag b/renderer/src/shaders/draw_msaa_image_mesh.frag
new file mode 100644
index 0000000..78029b1
--- /dev/null
+++ b/renderer/src/shaders/draw_msaa_image_mesh.frag
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2023 Rive
+ */
+
+#ifdef @FRAGMENT
+
+FRAG_TEXTURE_BLOCK_BEGIN
+TEXTURE_RGBA8(PER_DRAW_BINDINGS_SET, IMAGE_TEXTURE_IDX, @imageTexture);
+#ifdef @ENABLE_ADVANCED_BLEND
+DST_COLOR_TEXTURE(@dstColorTexture);
+#endif
+FRAG_TEXTURE_BLOCK_END
+
+DYNAMIC_SAMPLER_BLOCK_BEGIN
+SAMPLER_DYNAMIC(PER_DRAW_BINDINGS_SET, IMAGE_SAMPLER_IDX, imageSampler)
+DYNAMIC_SAMPLER_BLOCK_END
+
+FRAG_STORAGE_BUFFER_BLOCK_BEGIN
+FRAG_STORAGE_BUFFER_BLOCK_END
+
+FRAG_DATA_MAIN(half4, @drawFragmentMain)
+{
+    VARYING_UNPACK(v_texCoord, float2);
+
+    half4 color = TEXTURE_SAMPLE_DYNAMIC_LODBIAS(@imageTexture,
+                                                 imageSampler,
+                                                 v_texCoord,
+                                                 uniforms.mipMapLODBias) *
+                  imageDrawUniforms.opacity;
+
+#if defined(@ENABLE_ADVANCED_BLEND) && !defined(@FIXED_FUNCTION_COLOR_OUTPUT)
+    if (@ENABLE_ADVANCED_BLEND)
+    {
+        // Do the color portion of the blend mode in the shader.
+        half4 dstColorPremul = DST_COLOR_FETCH(@dstColorTexture);
+        color.rgb = advanced_color_blend(unmultiply_rgb(color),
+                                         dstColorPremul,
+                                         imageDrawUniforms.blendMode);
+        // Src-over blending is enabled, so just premultiply and let the HW
+        // finish the the the alpha portion of the blend mode.
+        color.rgb *= color.a;
+    }
+#endif // @ENABLE_ADVANCED_BLEND && !@FIXED_FUNCTION_COLOR_OUTPUT
+
+    EMIT_FRAG_DATA(color);
+}
+
+#endif // @FRAGMENT
diff --git a/renderer/src/shaders/draw_msaa_path.frag b/renderer/src/shaders/draw_msaa_path.frag
new file mode 100644
index 0000000..15fa1cb
--- /dev/null
+++ b/renderer/src/shaders/draw_msaa_path.frag
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2022 Rive
+ */
+
+#ifdef @FRAGMENT
+
+FRAG_DATA_MAIN(half4, @drawFragmentMain)
+{
+    VARYING_UNPACK(v_paint, float4);
+#ifdef @ATLAS_BLIT
+    VARYING_UNPACK(v_atlasCoord, float2);
+#endif
+#ifdef @ENABLE_ADVANCED_BLEND
+    VARYING_UNPACK(v_blendMode, half);
+#endif
+
+    half coverage =
+#ifdef @ATLAS_BLIT
+        filter_feather_atlas(
+            v_atlasCoord,
+            uniforms.atlasTextureInverseSize TEXTURE_CONTEXT_FORWARD);
+#else
+        1.;
+#endif
+    half4 color = find_paint_color(v_paint, coverage FRAGMENT_CONTEXT_UNPACK);
+
+#if defined(@ENABLE_ADVANCED_BLEND) && !defined(@FIXED_FUNCTION_COLOR_OUTPUT)
+    if (@ENABLE_ADVANCED_BLEND)
+    {
+        // Do the color portion of the blend mode in the shader.
+        //
+        // NOTE: "color" is already unmultiplied because
+        // GENERATE_PREMULTIPLIED_PAINT_COLORS is false when using advanced
+        // blend.
+        half4 dstColorPremul = DST_COLOR_FETCH(@dstColorTexture);
+        color.rgb = advanced_color_blend(color.rgb,
+                                         dstColorPremul,
+                                         cast_half_to_ushort(v_blendMode));
+        // Src-over blending is enabled, so just premultiply and let the HW
+        // finish the the the alpha portion of the blend mode.
+        color.rgb *= color.a;
+    }
+#endif // @ENABLE_ADVANCED_BLEND && !@FIXED_FUNCTION_COLOR_OUTPUT
+
+    // Certain platforms give us less control of the format of what we are
+    // rendering too. Specifically, we are auto converted from linear -> sRGB on
+    // render target writes in unreal. In those cases we made need to end up in
+    // linear color space
+#ifdef @NEEDS_GAMMA_CORRECTION
+    if (@NEEDS_GAMMA_CORRECTION)
+    {
+        color = gamma_to_linear(color);
+    }
+#endif
+
+    EMIT_FRAG_DATA(color);
+}
+
+#endif // FRAGMENT
diff --git a/renderer/src/shaders/draw_path.glsl b/renderer/src/shaders/draw_path.glsl
deleted file mode 100644
index b585d32..0000000
--- a/renderer/src/shaders/draw_path.glsl
+++ /dev/null
@@ -1,720 +0,0 @@
-/*
- * Copyright 2022 Rive
- */
-
-#ifdef @ENABLE_ADVANCED_BLEND
-// If advanced blend is enabled, we generate unmultiplied paint colors in the
-// shader. Otherwise we would have to just turn around and unmultiply them in
-// order to run the blend equation.
-#define GENERATE_PREMULTIPLIED_PAINT_COLORS !@ENABLE_ADVANCED_BLEND
-#else
-// As long as advanced blend is not enabled, it's more efficient for the shader
-// to generate premultiplied paint colors from the start.
-#define GENERATE_PREMULTIPLIED_PAINT_COLORS true
-#endif
-
-#ifdef @VERTEX
-ATTR_BLOCK_BEGIN(Attrs)
-#ifdef @DRAW_INTERIOR_TRIANGLES
-ATTR(0, packed_float3, @a_triangleVertex);
-#else
-ATTR(0,
-     float4,
-     @a_patchVertexData); // [localVertexID, outset, fillCoverage, vertexType]
-ATTR(1, float4, @a_mirroredVertexData);
-#endif
-ATTR_BLOCK_END
-#endif
-
-VARYING_BLOCK_BEGIN
-NO_PERSPECTIVE VARYING(0, float4, v_paint);
-
-#ifdef @ATLAS_BLIT
-NO_PERSPECTIVE VARYING(1, float2, v_atlasCoord);
-#elif !defined(@RENDER_MODE_MSAA)
-#ifdef @DRAW_INTERIOR_TRIANGLES
-@OPTIONALLY_FLAT VARYING(1, half, v_windingWeight);
-#elif defined(@ENABLE_FEATHER)
-NO_PERSPECTIVE VARYING(2, float4, v_coverages);
-#else
-NO_PERSPECTIVE VARYING(2, half2, v_coverages);
-#endif //@DRAW_INTERIOR_TRIANGLES
-@OPTIONALLY_FLAT VARYING(3, half, v_pathID);
-#endif // !@RENDER_MODE_MSAA
-
-#ifdef @ENABLE_CLIPPING
-@OPTIONALLY_FLAT VARYING(4, half2, v_clipIDs); // [clipID, outerClipID]
-#endif
-#if defined(@ENABLE_CLIP_RECT) && !defined(@RENDER_MODE_MSAA)
-NO_PERSPECTIVE VARYING(5, float4, v_clipRect);
-#endif
-#ifdef @ENABLE_ADVANCED_BLEND
-@OPTIONALLY_FLAT VARYING(6, half, v_blendMode);
-#endif
-VARYING_BLOCK_END
-
-#ifdef @VERTEX
-VERTEX_MAIN(@drawVertexMain, Attrs, attrs, _vertexID, _instanceID)
-{
-#ifdef @DRAW_INTERIOR_TRIANGLES
-    ATTR_UNPACK(_vertexID, attrs, @a_triangleVertex, float3);
-#else
-    ATTR_UNPACK(_vertexID, attrs, @a_patchVertexData, float4);
-    ATTR_UNPACK(_vertexID, attrs, @a_mirroredVertexData, float4);
-#endif
-
-    VARYING_INIT(v_paint, float4);
-
-#ifdef @ATLAS_BLIT
-    VARYING_INIT(v_atlasCoord, float2);
-#elif !defined(@RENDER_MODE_MSAA)
-#ifdef @DRAW_INTERIOR_TRIANGLES
-    VARYING_INIT(v_windingWeight, half);
-#elif defined(@ENABLE_FEATHER)
-    VARYING_INIT(v_coverages, float4);
-#else
-    VARYING_INIT(v_coverages, half2);
-#endif //@DRAW_INTERIOR_TRIANGLES
-    VARYING_INIT(v_pathID, half);
-#endif // !@RENDER_MODE_MSAA
-
-#ifdef @ENABLE_CLIPPING
-    VARYING_INIT(v_clipIDs, half2);
-#endif
-#if defined(@ENABLE_CLIP_RECT) && !defined(@RENDER_MODE_MSAA)
-    VARYING_INIT(v_clipRect, float4);
-#endif
-#ifdef @ENABLE_ADVANCED_BLEND
-    VARYING_INIT(v_blendMode, half);
-#endif
-
-    bool shouldDiscardVertex = false;
-    uint pathID;
-    float2 vertexPosition;
-#ifdef @RENDER_MODE_MSAA
-    ushort pathZIndex;
-#endif
-
-#ifdef @ATLAS_BLIT
-    vertexPosition =
-        unpack_atlas_coverage_vertex(@a_triangleVertex,
-                                     pathID,
-#ifdef @RENDER_MODE_MSAA
-                                     pathZIndex,
-#endif
-                                     v_atlasCoord VERTEX_CONTEXT_UNPACK);
-#elif defined(@DRAW_INTERIOR_TRIANGLES)
-    vertexPosition = unpack_interior_triangle_vertex(@a_triangleVertex,
-                                                     pathID
-#ifdef @RENDER_MODE_MSAA
-                                                     ,
-                                                     pathZIndex
-#else
-                                                     ,
-                                                     v_windingWeight
-#endif
-                                                         VERTEX_CONTEXT_UNPACK);
-#else // !@DRAW_INTERIOR_TRIANGLES
-    float4 coverages;
-    shouldDiscardVertex =
-        !unpack_tessellated_path_vertex(@a_patchVertexData,
-                                        @a_mirroredVertexData,
-                                        _instanceID,
-                                        pathID,
-                                        vertexPosition
-#ifndef @RENDER_MODE_MSAA
-                                        ,
-                                        coverages
-#else
-                                        ,
-                                        pathZIndex
-#endif
-                                            VERTEX_CONTEXT_UNPACK);
-#ifndef @RENDER_MODE_MSAA
-#ifdef @ENABLE_FEATHER
-    v_coverages = coverages;
-#else
-    v_coverages.xy = cast_float2_to_half2(coverages.xy);
-#endif
-#endif
-#endif // !DRAW_INTERIOR_TRIANGLES
-
-    uint2 paintData = STORAGE_BUFFER_LOAD2(@paintBuffer, pathID);
-
-#if !defined(@ATLAS_BLIT) && !defined(@RENDER_MODE_MSAA)
-    // Encode the integral pathID as a "half" that we know the hardware will see
-    // as a unique value in the fragment shader.
-    v_pathID = id_bits_to_f16(pathID, uniforms.pathIDGranularity);
-
-    // Indicate even-odd fill rule by making pathID negative.
-    if ((paintData.x & PAINT_FLAG_EVEN_ODD_FILL) != 0u)
-        v_pathID = -v_pathID;
-#endif // !@ATLAS_BLIT && !@RENDER_MODE_MSAA
-
-    uint paintType = paintData.x & 0xfu;
-#ifdef @ENABLE_CLIPPING
-    if (@ENABLE_CLIPPING)
-    {
-        uint clipIDBits =
-            (paintType == CLIP_UPDATE_PAINT_TYPE ? paintData.y : paintData.x) >>
-            16;
-        half clipID = id_bits_to_f16(clipIDBits, uniforms.pathIDGranularity);
-        // Negative clipID means to update the clip buffer instead of the color
-        // buffer.
-        if (paintType == CLIP_UPDATE_PAINT_TYPE)
-            clipID = -clipID;
-        v_clipIDs.x = clipID;
-    }
-#endif
-#ifdef @ENABLE_ADVANCED_BLEND
-    if (@ENABLE_ADVANCED_BLEND)
-    {
-        v_blendMode = float((paintData.x >> 4) & 0xfu);
-    }
-#endif
-
-    // Paint matrices operate on the fragment shader's "_fragCoord", which is
-    // bottom-up in GL.
-    float2 fragCoord = vertexPosition;
-#ifdef @FRAMEBUFFER_BOTTOM_UP
-    fragCoord.y = float(uniforms.renderTargetHeight) - fragCoord.y;
-#endif
-
-#ifdef @ENABLE_CLIP_RECT
-    if (@ENABLE_CLIP_RECT)
-    {
-        // clipRectInverseMatrix transforms from pixel coordinates to a space
-        // where the clipRect is the normalized rectangle: [-1, -1, 1, 1].
-        float2x2 clipRectInverseMatrix = make_float2x2(
-            STORAGE_BUFFER_LOAD4(@paintAuxBuffer, pathID * 4u + 2u));
-        float4 clipRectInverseTranslate =
-            STORAGE_BUFFER_LOAD4(@paintAuxBuffer, pathID * 4u + 3u);
-#ifndef @RENDER_MODE_MSAA
-        v_clipRect =
-            find_clip_rect_coverage_distances(clipRectInverseMatrix,
-                                              clipRectInverseTranslate.xy,
-                                              fragCoord);
-#else  // !@RENDER_MODE_MSAA => @RENDER_MODE_MSAA
-        set_clip_rect_plane_distances(clipRectInverseMatrix,
-                                      clipRectInverseTranslate.xy,
-                                      fragCoord);
-#endif // @RENDER_MODE_MSAA
-    }
-#endif // ENABLE_CLIP_RECT
-       // #endif // TARGET_VULKAN
-
-    // Unpack the paint once we have a position.
-    if (paintType == SOLID_COLOR_PAINT_TYPE)
-    {
-        half4 color = unpackUnorm4x8(paintData.y);
-        if (GENERATE_PREMULTIPLIED_PAINT_COLORS)
-            color.rgb *= color.a;
-        v_paint = float4(color);
-    }
-#ifdef @ENABLE_CLIPPING
-    else if (@ENABLE_CLIPPING && paintType == CLIP_UPDATE_PAINT_TYPE)
-    {
-        half outerClipID =
-            id_bits_to_f16(paintData.x >> 16, uniforms.pathIDGranularity);
-        v_clipIDs.y = outerClipID;
-    }
-#endif
-    else
-    {
-        float2x2 paintMatrix =
-            make_float2x2(STORAGE_BUFFER_LOAD4(@paintAuxBuffer, pathID * 4u));
-        float4 paintTranslate =
-            STORAGE_BUFFER_LOAD4(@paintAuxBuffer, pathID * 4u + 1u);
-        float2 paintCoord = MUL(paintMatrix, fragCoord) + paintTranslate.xy;
-        if (paintType == LINEAR_GRADIENT_PAINT_TYPE ||
-            paintType == RADIAL_GRADIENT_PAINT_TYPE)
-        {
-            // v_paint.a contains "-row" of the gradient ramp at texel center,
-            // in normalized space.
-            v_paint.a = -uintBitsToFloat(paintData.y);
-            // abs(v_paint.b) contains either:
-            //   - 2 if the gradient ramp spans an entire row.
-            //   - x0 of the gradient ramp in normalized space, if it's a simple
-            //   2-texel ramp.
-            float gradientSpan = paintTranslate.z;
-            // gradientSpan is either ~1 (complex gradients span the whole width
-            // of the texture minus 1px), or 1/GRAD_TEXTURE_WIDTH (simple
-            // gradients span 1px).
-            if (gradientSpan > .9)
-            {
-                // Complex ramps span an entire row. Set it to 2 to convey this.
-                v_paint.b = 2.;
-            }
-            else
-            {
-                // This is a simple ramp.
-                v_paint.b = paintTranslate.w;
-            }
-            if (paintType == LINEAR_GRADIENT_PAINT_TYPE)
-            {
-                // The paint is a linear gradient.
-                v_paint.g = .0;
-                v_paint.r = paintCoord.x;
-            }
-            else
-            {
-                // The paint is a radial gradient. Mark v_paint.b negative to
-                // indicate this to the fragment shader. (v_paint.b can't be
-                // zero because the gradient ramp is aligned on pixel centers,
-                // so negating it will always produce a negative number.)
-                v_paint.b = -v_paint.b;
-                v_paint.rg = paintCoord.xy;
-            }
-        }
-        else // IMAGE_PAINT_TYPE
-        {
-            // v_paint.a <= -1. signals that the paint is an image.
-            // -v_paint.a - 2 is the texture mipmap level-of-detail.
-            // v_paint.b is the image opacity.
-            // v_paint.rg is the normalized image texture coordinate (built into
-            // the paintMatrix).
-            float opacity = uintBitsToFloat(paintData.y);
-            float lod = paintTranslate.z;
-            v_paint = float4(paintCoord.x, paintCoord.y, opacity, -2. - lod);
-        }
-    }
-
-    float4 pos;
-    if (!shouldDiscardVertex)
-    {
-        pos = RENDER_TARGET_COORD_TO_CLIP_COORD(vertexPosition);
-#ifdef @POST_INVERT_Y
-        pos.y = -pos.y;
-#endif
-#ifdef @RENDER_MODE_MSAA
-        pos.z = normalize_z_index(pathZIndex);
-#endif
-    }
-    else
-    {
-        pos = float4(uniforms.vertexDiscardValue,
-                     uniforms.vertexDiscardValue,
-                     uniforms.vertexDiscardValue,
-                     uniforms.vertexDiscardValue);
-    }
-
-    VARYING_PACK(v_paint);
-#ifdef @ATLAS_BLIT
-    VARYING_PACK(v_atlasCoord);
-#elif !defined(@RENDER_MODE_MSAA)
-#ifdef @DRAW_INTERIOR_TRIANGLES
-    VARYING_PACK(v_windingWeight);
-#elif defined(@ENABLE_FEATHER)
-    VARYING_PACK(v_coverages);
-#else
-    VARYING_PACK(v_coverages);
-#endif //@DRAW_INTERIOR_TRIANGLES
-    VARYING_PACK(v_pathID);
-#endif // !@RENDER_MODE_MSAA
-
-#ifdef @ENABLE_CLIPPING
-    VARYING_PACK(v_clipIDs);
-#endif
-#ifdef @ENABLE_CLIP_RECT
-    VARYING_PACK(v_clipRect);
-#endif
-#ifdef @ENABLE_ADVANCED_BLEND
-    VARYING_PACK(v_blendMode);
-#endif
-    EMIT_VERTEX(pos);
-}
-#endif
-
-#ifdef @FRAGMENT
-FRAG_STORAGE_BUFFER_BLOCK_BEGIN
-FRAG_STORAGE_BUFFER_BLOCK_END
-
-INLINE half4 find_paint_color(float4 paint,
-                              float coverage FRAGMENT_CONTEXT_DECL)
-{
-    half4 color;
-    if (paint.a >= .0) // Is the paint a solid color?
-    {
-        // The vertex shader will have premultiplied 'paint' (or not) based on
-        // GENERATE_PREMULTIPLIED_PAINT_COLORS.
-        color = cast_float4_to_half4(paint);
-        if (GENERATE_PREMULTIPLIED_PAINT_COLORS)
-            color *= coverage;
-        else
-            color.a *= coverage;
-    }
-    else if (paint.a > -1.) // Is paint is a gradient (linear or radial)?
-    {
-        float t =
-            paint.b > .0 ? /*linear*/ paint.r : /*radial*/ length(paint.rg);
-        t = clamp(t, .0, 1.);
-        float span = abs(paint.b);
-        float x = span > 1.
-                      ? /*entire row*/ (1. - 1. / GRAD_TEXTURE_WIDTH) * t +
-                            (.5 / GRAD_TEXTURE_WIDTH)
-                      : /*two texels*/ (1. / GRAD_TEXTURE_WIDTH) * t + span;
-        float row = -paint.a;
-        // Our gradient texture is not mipmapped. Issue a texture-sample that
-        // explicitly does not find derivatives for LOD computation.
-        color =
-            TEXTURE_SAMPLE_LOD(@gradTexture, gradSampler, float2(x, row), .0);
-        color.a *= coverage;
-        // Gradients are always unmultiplied so we don't lose color data while
-        // doing the hardware filter.
-        if (GENERATE_PREMULTIPLIED_PAINT_COLORS)
-            color.rgb *= color.a;
-    }
-    else // The paint is an image.
-    {
-        half lod = -paint.a - 2.;
-        color = TEXTURE_SAMPLE_DYNAMIC_LOD(@imageTexture,
-                                           imageSampler,
-                                           paint.rg,
-                                           lod);
-        half opacity = paint.b * coverage;
-        // Images are always premultiplied so the (transparent) background color
-        // doesn't bleed into the edges during the hardware filter.
-        if (GENERATE_PREMULTIPLIED_PAINT_COLORS)
-            color *= opacity;
-        else
-            color = make_half4(unmultiply_rgb(color), color.a * opacity);
-    }
-    return color;
-}
-
-#ifndef @RENDER_MODE_MSAA
-
-PLS_BLOCK_BEGIN
-PLS_DECL4F(COLOR_PLANE_IDX, colorBuffer);
-PLS_DECLUI(CLIP_PLANE_IDX, clipBuffer);
-PLS_DECL4F(SCRATCH_COLOR_PLANE_IDX, scratchColorBuffer);
-PLS_DECLUI(COVERAGE_PLANE_IDX, coverageCountBuffer);
-PLS_BLOCK_END
-
-PLS_MAIN(@drawFragmentMain)
-{
-    VARYING_UNPACK(v_paint, float4);
-
-#ifdef @ATLAS_BLIT
-    VARYING_UNPACK(v_atlasCoord, float2);
-#elif !defined(@RENDER_MODE_MSAA)
-#ifdef @DRAW_INTERIOR_TRIANGLES
-    VARYING_UNPACK(v_windingWeight, half);
-#elif defined(@ENABLE_FEATHER)
-    VARYING_UNPACK(v_coverages, float4);
-#else
-    VARYING_UNPACK(v_coverages, half2);
-#endif //@DRAW_INTERIOR_TRIANGLES
-    VARYING_UNPACK(v_pathID, half);
-#endif // !@RENDER_MODE_MSAA
-
-#ifdef @ENABLE_CLIPPING
-    VARYING_UNPACK(v_clipIDs, half2);
-#endif
-#ifdef @ENABLE_CLIP_RECT
-    VARYING_UNPACK(v_clipRect, float4);
-#endif
-#ifdef @ENABLE_ADVANCED_BLEND
-    VARYING_UNPACK(v_blendMode, half);
-#endif
-
-#if !defined(@DRAW_INTERIOR_TRIANGLES) || defined(@ATLAS_BLIT)
-    // Interior triangles don't overlap, so don't need raster ordering.
-    PLS_INTERLOCK_BEGIN;
-#endif
-
-    half coverage;
-#ifdef @ATLAS_BLIT
-    coverage = filter_feather_atlas(
-        v_atlasCoord,
-        uniforms.atlasTextureInverseSize TEXTURE_CONTEXT_FORWARD);
-#else
-    half2 coverageData = unpackHalf2x16(PLS_LOADUI(coverageCountBuffer));
-    half coverageBufferID = coverageData.g;
-    half coverageCount =
-        coverageBufferID == v_pathID ? coverageData.r : make_half(.0);
-
-#ifdef @DRAW_INTERIOR_TRIANGLES
-    coverageCount += v_windingWeight;
-    PLS_PRESERVE_UI(coverageCountBuffer);
-#else
-    if (is_stroke(v_coverages))
-    {
-        half fragCoverage;
-#ifdef @ENABLE_FEATHER
-        if (@ENABLE_FEATHER && is_feathered_stroke(v_coverages))
-        {
-            fragCoverage =
-                eval_feathered_stroke(v_coverages TEXTURE_CONTEXT_FORWARD);
-        }
-        else
-#endif // @ENABLE_FEATHER
-        {
-            fragCoverage = min(v_coverages.x, v_coverages.y);
-        }
-        coverageCount = max(fragCoverage, coverageCount);
-    }
-    else // Fill. (Back-face culling handles the sign of v_coverages.x.)
-    {
-        half fragCoverage;
-#if defined(@ENABLE_FEATHER)
-        if (@ENABLE_FEATHER && is_feathered_fill(v_coverages))
-        {
-            fragCoverage =
-                eval_feathered_fill(v_coverages TEXTURE_CONTEXT_FORWARD);
-        }
-        else
-#endif // @CLOCKWISE_FILL && @ENABLE_FEATHER
-        {
-            fragCoverage = v_coverages.x;
-        }
-        coverageCount += fragCoverage;
-    }
-
-    // Save the updated coverage.
-    PLS_STOREUI(coverageCountBuffer,
-                packHalf2x16(make_half2(coverageCount, v_pathID)));
-#endif // !@DRAW_INTERIOR_TRIANGLES
-
-    // Convert coverageCount to coverage.
-#ifdef @CLOCKWISE_FILL
-    if (@CLOCKWISE_FILL)
-    {
-#ifdef @VULKAN_VENDOR_ID
-        if (@VULKAN_VENDOR_ID == VULKAN_VENDOR_ARM)
-        {
-            // ARM hits a bug if we use clamp() here.
-            if (coverageCount < .0)
-                coverage = .0;
-            else if (coverageCount <= 1.)
-                coverage = coverageCount;
-            else
-                coverage = 1.;
-        }
-        else
-#endif
-        {
-            coverage = clamp(coverageCount, make_half(.0), make_half(1.));
-        }
-    }
-    else
-#endif // CLOCKWISE_FILL
-    {
-        coverage = abs(coverageCount);
-#ifdef @ENABLE_EVEN_ODD
-        if (@ENABLE_EVEN_ODD && v_pathID < .0 /*even-odd*/)
-        {
-            coverage = 1. - make_half(abs(fract(coverage * .5) * 2. + -1.));
-        }
-#endif
-        // This also caps stroke coverage, which can be >1.
-        coverage = min(coverage, make_half(1.));
-    }
-#endif // !@ATLAS_BLIT
-
-#ifdef @ENABLE_CLIPPING
-    if (@ENABLE_CLIPPING && v_clipIDs.x < .0) // Update the clip buffer.
-    {
-        half clipID = -v_clipIDs.x;
-#ifdef @ENABLE_NESTED_CLIPPING
-        if (@ENABLE_NESTED_CLIPPING)
-        {
-            half outerClipID = v_clipIDs.y;
-            if (outerClipID != .0)
-            {
-                // This is a nested clip. Intersect coverage with the enclosing
-                // clip (outerClipID).
-                half2 clipData = unpackHalf2x16(PLS_LOADUI(clipBuffer));
-                half clipContentID = clipData.g;
-                half outerClipCoverage;
-                if (clipContentID != clipID)
-                {
-                    // First hit: either clipBuffer contains outerClipCoverage,
-                    // or this pixel is not inside the outer clip and
-                    // outerClipCoverage is zero.
-                    outerClipCoverage =
-                        clipContentID == outerClipID ? clipData.r : .0;
-#ifndef @DRAW_INTERIOR_TRIANGLES
-                    // Stash outerClipCoverage before overwriting clipBuffer, in
-                    // case we hit this pixel again and need it. (Not necessary
-                    // when drawing interior triangles because they always go
-                    // last and don't overlap.)
-                    PLS_STORE4F(scratchColorBuffer,
-                                make_half4(outerClipCoverage, .0, .0, .0));
-#endif
-                }
-                else
-                {
-                    // Subsequent hit: outerClipCoverage is stashed in
-                    // scratchColorBuffer.
-                    outerClipCoverage = PLS_LOAD4F(scratchColorBuffer).r;
-#ifndef @DRAW_INTERIOR_TRIANGLES
-                    // Since interior triangles are always last, there's no need
-                    // to preserve this value.
-                    PLS_PRESERVE_4F(scratchColorBuffer);
-#endif
-                }
-                coverage = min(coverage, outerClipCoverage);
-            }
-        }
-#endif // @ENABLE_NESTED_CLIPPING
-        PLS_STOREUI(clipBuffer, packHalf2x16(make_half2(coverage, clipID)));
-        PLS_PRESERVE_4F(colorBuffer);
-    }
-    else // Render to the main framebuffer.
-#endif   // @ENABLE_CLIPPING
-    {
-#ifdef @ENABLE_CLIPPING
-        if (@ENABLE_CLIPPING)
-        {
-            // Apply the clip.
-            half clipID = v_clipIDs.x;
-            if (clipID != .0)
-            {
-                // Clip IDs are not necessarily drawn in monotonically
-                // increasing order, so always check exact equality of the
-                // clipID.
-                half2 clipData = unpackHalf2x16(PLS_LOADUI(clipBuffer));
-                half clipContentID = clipData.g;
-                coverage = (clipContentID == clipID) ? min(clipData.r, coverage)
-                                                     : make_half(.0);
-            }
-        }
-#endif
-#ifdef @ENABLE_CLIP_RECT
-        if (@ENABLE_CLIP_RECT)
-        {
-            half clipRectCoverage = min_value(cast_float4_to_half4(v_clipRect));
-            coverage = clamp(clipRectCoverage, make_half(.0), coverage);
-        }
-#endif // ENABLE_CLIP_RECT
-
-        half4 color =
-            find_paint_color(v_paint, coverage FRAGMENT_CONTEXT_UNPACK);
-
-        half4 dstColorPremul;
-#ifdef @ATLAS_BLIT
-        dstColorPremul = PLS_LOAD4F(colorBuffer);
-#else
-        if (coverageBufferID != v_pathID)
-        {
-            // This is the first fragment from pathID to touch this pixel.
-            dstColorPremul = PLS_LOAD4F(colorBuffer);
-#ifndef @DRAW_INTERIOR_TRIANGLES
-            // We don't need to store coverage when drawing interior triangles
-            // because they always go last and don't overlap, so every fragment
-            // is the final one in the path.
-            PLS_STORE4F(scratchColorBuffer, dstColorPremul);
-#endif
-        }
-        else
-        {
-            dstColorPremul = PLS_LOAD4F(scratchColorBuffer);
-#ifndef @DRAW_INTERIOR_TRIANGLES
-            // Since interior triangles are always last, there's no need to
-            // preserve this value.
-            PLS_PRESERVE_4F(scratchColorBuffer);
-#endif
-        }
-#endif // @ATLAS_BLIT
-
-        // Blend with the framebuffer color.
-#ifdef @ENABLE_ADVANCED_BLEND
-        if (@ENABLE_ADVANCED_BLEND)
-        {
-            // GENERATE_PREMULTIPLIED_PAINT_COLORS is false in this case because
-            // advanced blend needs unmultiplied colors.
-            if (v_blendMode != cast_uint_to_half(BLEND_SRC_OVER))
-            {
-                color.rgb =
-                    advanced_color_blend(color.rgb,
-                                         dstColorPremul,
-                                         cast_half_to_ushort(v_blendMode));
-            }
-            // Premultiply alpha now.
-            color.rgb *= color.a;
-        }
-#endif
-
-        // Certain platforms give us less control of the format of what we are
-        // rendering too. Specifically, we are auto converted from linear ->
-        // sRGB on render target writes in unreal. In those cases we made need
-        // to end up in linear color space
-#ifdef @NEEDS_GAMMA_CORRECTION
-        if (@NEEDS_GAMMA_CORRECTION)
-        {
-            color = gamma_to_linear(color);
-        }
-#endif
-
-        color += dstColorPremul * (1. - color.a);
-
-        PLS_STORE4F(colorBuffer, color);
-        PLS_PRESERVE_UI(clipBuffer);
-    }
-
-#if !defined(@DRAW_INTERIOR_TRIANGLES) || defined(@ATLAS_BLIT)
-    // Interior triangles don't overlap, so don't need raster ordering.
-    PLS_INTERLOCK_END;
-#endif
-
-    EMIT_PLS;
-}
-
-#else // !@RENDER_MODE_MSAA => @RENDER_MODE_MSAA
-
-FRAG_DATA_MAIN(half4, @drawFragmentMain)
-{
-    VARYING_UNPACK(v_paint, float4);
-#ifdef @ATLAS_BLIT
-    VARYING_UNPACK(v_atlasCoord, float2);
-#endif
-#ifdef @ENABLE_ADVANCED_BLEND
-    VARYING_UNPACK(v_blendMode, half);
-#endif
-
-    half coverage =
-#ifdef @ATLAS_BLIT
-        filter_feather_atlas(
-            v_atlasCoord,
-            uniforms.atlasTextureInverseSize TEXTURE_CONTEXT_FORWARD);
-#else
-        1.;
-#endif
-    half4 color = find_paint_color(v_paint, coverage FRAGMENT_CONTEXT_UNPACK);
-
-#if defined(@ENABLE_ADVANCED_BLEND) && !defined(@FIXED_FUNCTION_COLOR_OUTPUT)
-    if (@ENABLE_ADVANCED_BLEND)
-    {
-        // Do the color portion of the blend mode in the shader.
-        //
-        // NOTE: "color" is already unmultiplied because
-        // GENERATE_PREMULTIPLIED_PAINT_COLORS is false when using advanced
-        // blend.
-        half4 dstColorPremul = DST_COLOR_FETCH(@dstColorTexture);
-        color.rgb = advanced_color_blend(color.rgb,
-                                         dstColorPremul,
-                                         cast_half_to_ushort(v_blendMode));
-        // Src-over blending is enabled, so just premultiply and let the HW
-        // finish the the the alpha portion of the blend mode.
-        color.rgb *= color.a;
-    }
-#endif // @ENABLE_ADVANCED_BLEND && !@FIXED_FUNCTION_COLOR_OUTPUT
-
-    // Certain platforms give us less control of the format of what we are
-    // rendering too. Specifically, we are auto converted from linear -> sRGB on
-    // render target writes in unreal. In those cases we made need to end up in
-    // linear color space
-#ifdef @NEEDS_GAMMA_CORRECTION
-    if (@NEEDS_GAMMA_CORRECTION)
-    {
-        color = gamma_to_linear(color);
-    }
-#endif
-
-    EMIT_FRAG_DATA(color);
-}
-
-#endif // @RENDER_MODE_MSAA
-
-#endif // FRAGMENT
diff --git a/renderer/src/shaders/draw_path.vert b/renderer/src/shaders/draw_path.vert
new file mode 100644
index 0000000..6073479
--- /dev/null
+++ b/renderer/src/shaders/draw_path.vert
@@ -0,0 +1,445 @@
+/*
+ * Copyright 2022 Rive
+ */
+
+// undef GENERATE_PREMULTIPLIED_PAINT_COLORS first because this file gets
+// included multiple times with different defines in the Metal library.
+#undef GENERATE_PREMULTIPLIED_PAINT_COLORS
+#ifdef @ENABLE_ADVANCED_BLEND
+// If advanced blend is enabled, we generate unmultiplied paint colors in the
+// shader. Otherwise we would have to just turn around and unmultiply them in
+// order to run the blend equation.
+#define GENERATE_PREMULTIPLIED_PAINT_COLORS !@ENABLE_ADVANCED_BLEND
+#else
+// As long as advanced blend is not enabled, it's more efficient for the shader
+// to generate premultiplied paint colors from the start.
+#define GENERATE_PREMULTIPLIED_PAINT_COLORS true
+#endif
+
+// undef COVERAGE_TYPE first because this file gets included multiple times with
+// different defines in the Metal library.
+#undef COVERAGE_TYPE
+#ifdef @ENABLE_FEATHER
+#define COVERAGE_TYPE float4
+#else
+#define COVERAGE_TYPE half2
+#endif
+
+#ifdef @VERTEX
+ATTR_BLOCK_BEGIN(Attrs)
+#ifdef @DRAW_INTERIOR_TRIANGLES
+ATTR(0, packed_float3, @a_triangleVertex);
+#else
+ATTR(0,
+     float4,
+     @a_patchVertexData); // [localVertexID, outset, fillCoverage, vertexType]
+ATTR(1, float4, @a_mirroredVertexData);
+#endif
+ATTR_BLOCK_END
+#endif
+
+VARYING_BLOCK_BEGIN
+NO_PERSPECTIVE VARYING(0, float4, v_paint);
+
+#ifdef @ATLAS_BLIT
+NO_PERSPECTIVE VARYING(1, float2, v_atlasCoord);
+#elif !defined(@RENDER_MODE_MSAA)
+#ifdef @DRAW_INTERIOR_TRIANGLES
+@OPTIONALLY_FLAT VARYING(1, half, v_windingWeight);
+#else
+NO_PERSPECTIVE VARYING(2, COVERAGE_TYPE, v_coverages);
+#endif //@DRAW_INTERIOR_TRIANGLES
+@OPTIONALLY_FLAT VARYING(3, half, v_pathID);
+#endif // !@RENDER_MODE_MSAA
+
+#ifdef @ENABLE_CLIPPING
+@OPTIONALLY_FLAT VARYING(4, half2, v_clipIDs); // [clipID, outerClipID]
+#endif
+#if defined(@ENABLE_CLIP_RECT) && !defined(@RENDER_MODE_MSAA)
+NO_PERSPECTIVE VARYING(5, float4, v_clipRect);
+#endif
+#ifdef @ENABLE_ADVANCED_BLEND
+@OPTIONALLY_FLAT VARYING(6, half, v_blendMode);
+#endif
+VARYING_BLOCK_END
+
+#ifdef @VERTEX
+VERTEX_MAIN(@drawVertexMain, Attrs, attrs, _vertexID, _instanceID)
+{
+#ifdef @DRAW_INTERIOR_TRIANGLES
+    ATTR_UNPACK(_vertexID, attrs, @a_triangleVertex, float3);
+#else
+    ATTR_UNPACK(_vertexID, attrs, @a_patchVertexData, float4);
+    ATTR_UNPACK(_vertexID, attrs, @a_mirroredVertexData, float4);
+#endif
+
+    VARYING_INIT(v_paint, float4);
+
+#ifdef @ATLAS_BLIT
+    VARYING_INIT(v_atlasCoord, float2);
+#elif !defined(@RENDER_MODE_MSAA)
+#ifdef @DRAW_INTERIOR_TRIANGLES
+    VARYING_INIT(v_windingWeight, half);
+#else
+    VARYING_INIT(v_coverages, COVERAGE_TYPE);
+#endif //@DRAW_INTERIOR_TRIANGLES
+    VARYING_INIT(v_pathID, half);
+#endif // !@RENDER_MODE_MSAA
+
+#ifdef @ENABLE_CLIPPING
+    VARYING_INIT(v_clipIDs, half2);
+#endif
+#if defined(@ENABLE_CLIP_RECT) && !defined(@RENDER_MODE_MSAA)
+    VARYING_INIT(v_clipRect, float4);
+#endif
+#ifdef @ENABLE_ADVANCED_BLEND
+    VARYING_INIT(v_blendMode, half);
+#endif
+
+    bool shouldDiscardVertex = false;
+    uint pathID;
+    float2 vertexPosition;
+#ifdef @RENDER_MODE_MSAA
+    ushort pathZIndex;
+#endif
+
+#ifdef @ATLAS_BLIT
+    vertexPosition =
+        unpack_atlas_coverage_vertex(@a_triangleVertex,
+                                     pathID,
+#ifdef @RENDER_MODE_MSAA
+                                     pathZIndex,
+#endif
+                                     v_atlasCoord VERTEX_CONTEXT_UNPACK);
+#elif defined(@DRAW_INTERIOR_TRIANGLES)
+    vertexPosition = unpack_interior_triangle_vertex(@a_triangleVertex,
+                                                     pathID
+#ifdef @RENDER_MODE_MSAA
+                                                     ,
+                                                     pathZIndex
+#else
+                                                     ,
+                                                     v_windingWeight
+#endif
+                                                         VERTEX_CONTEXT_UNPACK);
+#else // !@DRAW_INTERIOR_TRIANGLES
+    float4 coverages;
+    shouldDiscardVertex =
+        !unpack_tessellated_path_vertex(@a_patchVertexData,
+                                        @a_mirroredVertexData,
+                                        _instanceID,
+                                        pathID,
+                                        vertexPosition
+#ifndef @RENDER_MODE_MSAA
+                                        ,
+                                        coverages
+#else
+                                        ,
+                                        pathZIndex
+#endif
+                                            VERTEX_CONTEXT_UNPACK);
+#ifndef @RENDER_MODE_MSAA
+#ifdef @ENABLE_FEATHER
+    v_coverages = coverages;
+#else
+    v_coverages.xy = cast_float2_to_half2(coverages.xy);
+#endif
+#endif
+#endif // !DRAW_INTERIOR_TRIANGLES
+
+    uint2 paintData = STORAGE_BUFFER_LOAD2(@paintBuffer, pathID);
+
+#if !defined(@ATLAS_BLIT) && !defined(@RENDER_MODE_MSAA)
+    // Encode the integral pathID as a "half" that we know the hardware will see
+    // as a unique value in the fragment shader.
+    v_pathID = id_bits_to_f16(pathID, uniforms.pathIDGranularity);
+
+    // Indicate even-odd fill rule by making pathID negative.
+    if ((paintData.x & PAINT_FLAG_EVEN_ODD_FILL) != 0u)
+        v_pathID = -v_pathID;
+#endif // !@ATLAS_BLIT && !@RENDER_MODE_MSAA
+
+    uint paintType = paintData.x & 0xfu;
+#ifdef @ENABLE_CLIPPING
+    if (@ENABLE_CLIPPING)
+    {
+        uint clipIDBits =
+            (paintType == CLIP_UPDATE_PAINT_TYPE ? paintData.y : paintData.x) >>
+            16;
+        half clipID = id_bits_to_f16(clipIDBits, uniforms.pathIDGranularity);
+        // Negative clipID means to update the clip buffer instead of the color
+        // buffer.
+        if (paintType == CLIP_UPDATE_PAINT_TYPE)
+            clipID = -clipID;
+        v_clipIDs.x = clipID;
+    }
+#endif
+#ifdef @ENABLE_ADVANCED_BLEND
+    if (@ENABLE_ADVANCED_BLEND)
+    {
+        v_blendMode = float((paintData.x >> 4) & 0xfu);
+    }
+#endif
+
+    // Paint matrices operate on the fragment shader's "_fragCoord", which is
+    // bottom-up in GL.
+    float2 fragCoord = vertexPosition;
+#ifdef @FRAMEBUFFER_BOTTOM_UP
+    fragCoord.y = float(uniforms.renderTargetHeight) - fragCoord.y;
+#endif
+
+#ifdef @ENABLE_CLIP_RECT
+    if (@ENABLE_CLIP_RECT)
+    {
+        // clipRectInverseMatrix transforms from pixel coordinates to a space
+        // where the clipRect is the normalized rectangle: [-1, -1, 1, 1].
+        float2x2 clipRectInverseMatrix = make_float2x2(
+            STORAGE_BUFFER_LOAD4(@paintAuxBuffer, pathID * 4u + 2u));
+        float4 clipRectInverseTranslate =
+            STORAGE_BUFFER_LOAD4(@paintAuxBuffer, pathID * 4u + 3u);
+#ifndef @RENDER_MODE_MSAA
+        v_clipRect =
+            find_clip_rect_coverage_distances(clipRectInverseMatrix,
+                                              clipRectInverseTranslate.xy,
+                                              fragCoord);
+#else  // !@RENDER_MODE_MSAA => @RENDER_MODE_MSAA
+        set_clip_rect_plane_distances(clipRectInverseMatrix,
+                                      clipRectInverseTranslate.xy,
+                                      fragCoord);
+#endif // @RENDER_MODE_MSAA
+    }
+#endif // ENABLE_CLIP_RECT
+       // #endif // TARGET_VULKAN
+
+    // Unpack the paint once we have a position.
+    if (paintType == SOLID_COLOR_PAINT_TYPE)
+    {
+        half4 color = unpackUnorm4x8(paintData.y);
+        if (GENERATE_PREMULTIPLIED_PAINT_COLORS)
+            color.rgb *= color.a;
+        v_paint = float4(color);
+    }
+#ifdef @ENABLE_CLIPPING
+    else if (@ENABLE_CLIPPING && paintType == CLIP_UPDATE_PAINT_TYPE)
+    {
+        half outerClipID =
+            id_bits_to_f16(paintData.x >> 16, uniforms.pathIDGranularity);
+        v_clipIDs.y = outerClipID;
+    }
+#endif
+    else
+    {
+        float2x2 paintMatrix =
+            make_float2x2(STORAGE_BUFFER_LOAD4(@paintAuxBuffer, pathID * 4u));
+        float4 paintTranslate =
+            STORAGE_BUFFER_LOAD4(@paintAuxBuffer, pathID * 4u + 1u);
+        float2 paintCoord = MUL(paintMatrix, fragCoord) + paintTranslate.xy;
+        if (paintType == LINEAR_GRADIENT_PAINT_TYPE ||
+            paintType == RADIAL_GRADIENT_PAINT_TYPE)
+        {
+            // v_paint.a contains "-row" of the gradient ramp at texel center,
+            // in normalized space.
+            v_paint.a = -uintBitsToFloat(paintData.y);
+            // abs(v_paint.b) contains either:
+            //   - 2 if the gradient ramp spans an entire row.
+            //   - x0 of the gradient ramp in normalized space, if it's a simple
+            //   2-texel ramp.
+            float gradientSpan = paintTranslate.z;
+            // gradientSpan is either ~1 (complex gradients span the whole width
+            // of the texture minus 1px), or 1/GRAD_TEXTURE_WIDTH (simple
+            // gradients span 1px).
+            if (gradientSpan > .9)
+            {
+                // Complex ramps span an entire row. Set it to 2 to convey this.
+                v_paint.b = 2.;
+            }
+            else
+            {
+                // This is a simple ramp.
+                v_paint.b = paintTranslate.w;
+            }
+            if (paintType == LINEAR_GRADIENT_PAINT_TYPE)
+            {
+                // The paint is a linear gradient.
+                v_paint.g = .0;
+                v_paint.r = paintCoord.x;
+            }
+            else
+            {
+                // The paint is a radial gradient. Mark v_paint.b negative to
+                // indicate this to the fragment shader. (v_paint.b can't be
+                // zero because the gradient ramp is aligned on pixel centers,
+                // so negating it will always produce a negative number.)
+                v_paint.b = -v_paint.b;
+                v_paint.rg = paintCoord.xy;
+            }
+        }
+        else // IMAGE_PAINT_TYPE
+        {
+            // v_paint.a <= -1. signals that the paint is an image.
+            // -v_paint.a - 2 is the texture mipmap level-of-detail.
+            // v_paint.b is the image opacity.
+            // v_paint.rg is the normalized image texture coordinate (built into
+            // the paintMatrix).
+            float opacity = uintBitsToFloat(paintData.y);
+            float lod = paintTranslate.z;
+            v_paint = float4(paintCoord.x, paintCoord.y, opacity, -2. - lod);
+        }
+    }
+
+    float4 pos;
+    if (!shouldDiscardVertex)
+    {
+        pos = RENDER_TARGET_COORD_TO_CLIP_COORD(vertexPosition);
+#ifdef @POST_INVERT_Y
+        pos.y = -pos.y;
+#endif
+#ifdef @RENDER_MODE_MSAA
+        pos.z = normalize_z_index(pathZIndex);
+#endif
+    }
+    else
+    {
+        pos = float4(uniforms.vertexDiscardValue,
+                     uniforms.vertexDiscardValue,
+                     uniforms.vertexDiscardValue,
+                     uniforms.vertexDiscardValue);
+    }
+
+    VARYING_PACK(v_paint);
+#ifdef @ATLAS_BLIT
+    VARYING_PACK(v_atlasCoord);
+#elif !defined(@RENDER_MODE_MSAA)
+#ifdef @DRAW_INTERIOR_TRIANGLES
+    VARYING_PACK(v_windingWeight);
+#else
+    VARYING_PACK(v_coverages);
+#endif //@DRAW_INTERIOR_TRIANGLES
+    VARYING_PACK(v_pathID);
+#endif // !@RENDER_MODE_MSAA
+
+#ifdef @ENABLE_CLIPPING
+    VARYING_PACK(v_clipIDs);
+#endif
+#ifdef @ENABLE_CLIP_RECT
+    VARYING_PACK(v_clipRect);
+#endif
+#ifdef @ENABLE_ADVANCED_BLEND
+    VARYING_PACK(v_blendMode);
+#endif
+    EMIT_VERTEX(pos);
+}
+#endif
+
+#ifdef @FRAGMENT
+
+FRAG_STORAGE_BUFFER_BLOCK_BEGIN
+FRAG_STORAGE_BUFFER_BLOCK_END
+
+// Add a function here for fragments to unpack the paint since we're the ones
+// who packed it in the vertex shader.
+INLINE half4 find_paint_color(float4 paint,
+                              float coverage FRAGMENT_CONTEXT_DECL)
+{
+    half4 color;
+    if (paint.a >= .0) // Is the paint a solid color?
+    {
+        // The vertex shader will have premultiplied 'paint' (or not) based on
+        // GENERATE_PREMULTIPLIED_PAINT_COLORS.
+        color = cast_float4_to_half4(paint);
+        if (GENERATE_PREMULTIPLIED_PAINT_COLORS)
+            color *= coverage;
+        else
+            color.a *= coverage;
+    }
+    else if (paint.a > -1.) // Is paint is a gradient (linear or radial)?
+    {
+        float t =
+            paint.b > .0 ? /*linear*/ paint.r : /*radial*/ length(paint.rg);
+        t = clamp(t, .0, 1.);
+        float span = abs(paint.b);
+        float x = span > 1.
+                      ? /*entire row*/ (1. - 1. / GRAD_TEXTURE_WIDTH) * t +
+                            (.5 / GRAD_TEXTURE_WIDTH)
+                      : /*two texels*/ (1. / GRAD_TEXTURE_WIDTH) * t + span;
+        float row = -paint.a;
+        // Our gradient texture is not mipmapped. Issue a texture-sample that
+        // explicitly does not find derivatives for LOD computation.
+        color =
+            TEXTURE_SAMPLE_LOD(@gradTexture, gradSampler, float2(x, row), .0);
+        color.a *= coverage;
+        // Gradients are always unmultiplied so we don't lose color data while
+        // doing the hardware filter.
+        if (GENERATE_PREMULTIPLIED_PAINT_COLORS)
+            color.rgb *= color.a;
+    }
+    else // The paint is an image.
+    {
+        half lod = -paint.a - 2.;
+        color = TEXTURE_SAMPLE_DYNAMIC_LOD(@imageTexture,
+                                           imageSampler,
+                                           paint.rg,
+                                           lod);
+        half opacity = paint.b * coverage;
+        // Images are always premultiplied so the (transparent) background color
+        // doesn't bleed into the edges during the hardware filter.
+        if (GENERATE_PREMULTIPLIED_PAINT_COLORS)
+            color *= opacity;
+        else
+            color = make_half4(unmultiply_rgb(color), color.a * opacity);
+    }
+    return color;
+}
+
+#ifndef @DRAW_INTERIOR_TRIANGLES
+
+// Add functions here for fragments to unpack and evaluate coverage since we're
+// the ones who packed the coverage components in the vertex shader.
+INLINE half find_stroke_coverage(COVERAGE_TYPE coverages TEXTURE_CONTEXT_DECL)
+{
+#ifdef @ENABLE_FEATHER
+    if (@ENABLE_FEATHER && is_feathered_stroke(coverages))
+        return eval_feathered_stroke(coverages TEXTURE_CONTEXT_FORWARD);
+    else
+#endif // @ENABLE_FEATHER
+        return min(coverages.x, coverages.y);
+}
+
+INLINE half find_fill_coverage(COVERAGE_TYPE coverages TEXTURE_CONTEXT_DECL)
+{
+#if defined(@ENABLE_FEATHER)
+    if (@ENABLE_FEATHER && is_feathered_fill(coverages))
+        return eval_feathered_fill(coverages TEXTURE_CONTEXT_FORWARD);
+    else
+#endif // @ENABLE_FEATHER
+        return coverages.x;
+}
+
+INLINE half find_frag_coverage(COVERAGE_TYPE coverages TEXTURE_CONTEXT_DECL)
+{
+    if (is_stroke(coverages))
+        return find_stroke_coverage(coverages TEXTURE_CONTEXT_FORWARD);
+    else // Fill. (Back-face culling handles the sign of coverages.x.)
+        return find_fill_coverage(coverages TEXTURE_CONTEXT_FORWARD);
+}
+
+INLINE half apply_frag_coverage(half initialCoverage,
+                                COVERAGE_TYPE coverages TEXTURE_CONTEXT_DECL)
+{
+    if (is_stroke(coverages))
+    {
+        half fragCoverage =
+            find_stroke_coverage(coverages TEXTURE_CONTEXT_FORWARD);
+        return max(fragCoverage, initialCoverage);
+    }
+    else // Fill. (Back-face culling handles the sign of coverages.x.)
+    {
+        half fragCoverage =
+            find_fill_coverage(coverages TEXTURE_CONTEXT_FORWARD);
+        return initialCoverage + fragCoverage;
+    }
+}
+
+#endif // !@DRAW_INTERIOR_TRIANGLES
+
+#endif // @FRAGMENT
diff --git a/renderer/src/shaders/draw_raster_order_image_mesh.frag b/renderer/src/shaders/draw_raster_order_image_mesh.frag
new file mode 100644
index 0000000..4fa91b8
--- /dev/null
+++ b/renderer/src/shaders/draw_raster_order_image_mesh.frag
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2023 Rive
+ */
+
+#ifdef @FRAGMENT
+
+PLS_BLOCK_BEGIN
+PLS_DECL4F(COLOR_PLANE_IDX, colorBuffer);
+PLS_DECLUI(CLIP_PLANE_IDX, clipBuffer);
+PLS_DECL4F(SCRATCH_COLOR_PLANE_IDX, scratchColorBuffer);
+PLS_DECLUI(COVERAGE_PLANE_IDX, coverageCountBuffer);
+PLS_BLOCK_END
+
+FRAG_TEXTURE_BLOCK_BEGIN
+TEXTURE_RGBA8(PER_DRAW_BINDINGS_SET, IMAGE_TEXTURE_IDX, @imageTexture);
+FRAG_TEXTURE_BLOCK_END
+
+DYNAMIC_SAMPLER_BLOCK_BEGIN
+SAMPLER_DYNAMIC(PER_DRAW_BINDINGS_SET, IMAGE_SAMPLER_IDX, imageSampler)
+DYNAMIC_SAMPLER_BLOCK_END
+
+FRAG_STORAGE_BUFFER_BLOCK_BEGIN
+FRAG_STORAGE_BUFFER_BLOCK_END
+
+PLS_MAIN_WITH_IMAGE_UNIFORMS(@drawFragmentMain)
+{
+    VARYING_UNPACK(v_texCoord, float2);
+#ifdef @ENABLE_CLIPPING
+    VARYING_UNPACK(v_clipID, half);
+#endif
+#ifdef @ENABLE_CLIP_RECT
+    VARYING_UNPACK(v_clipRect, float4);
+#endif
+
+    half4 color = TEXTURE_SAMPLE_DYNAMIC_LODBIAS(@imageTexture,
+                                                 imageSampler,
+                                                 v_texCoord,
+                                                 uniforms.mipMapLODBias);
+
+    half coverage = 1.;
+
+#ifdef @ENABLE_CLIP_RECT
+    if (@ENABLE_CLIP_RECT)
+    {
+        half clipRectCoverage = min_value(cast_float4_to_half4(v_clipRect));
+        coverage = clamp(clipRectCoverage, make_half(.0), coverage);
+    }
+#endif
+
+    PLS_INTERLOCK_BEGIN;
+
+#ifdef @ENABLE_CLIPPING
+    if (@ENABLE_CLIPPING && v_clipID != .0)
+    {
+        half2 clipData = unpackHalf2x16(PLS_LOADUI(clipBuffer));
+        half clipContentID = clipData.g;
+        half clipCoverage =
+            clipContentID == v_clipID ? clipData.r : make_half(.0);
+        coverage = min(coverage, clipCoverage);
+    }
+#endif
+
+    // Blend with the framebuffer color.
+    half4 dstColorPremul = PLS_LOAD4F(colorBuffer);
+#ifdef @ENABLE_ADVANCED_BLEND
+    if (@ENABLE_ADVANCED_BLEND && imageDrawUniforms.blendMode != BLEND_SRC_OVER)
+    {
+        color.rgb = advanced_color_blend(
+                        unmultiply_rgb(color),
+                        dstColorPremul,
+                        cast_uint_to_ushort(imageDrawUniforms.blendMode)) *
+                    color.a;
+    }
+#endif
+    color *= imageDrawUniforms.opacity * coverage;
+    color += dstColorPremul * (1. - color.a);
+
+    PLS_STORE4F(colorBuffer, color);
+    PLS_PRESERVE_UI(clipBuffer);
+    PLS_PRESERVE_UI(coverageCountBuffer);
+
+    PLS_INTERLOCK_END;
+
+    EMIT_PLS;
+}
+
+#endif // @FRAGMENT
diff --git a/renderer/src/shaders/draw_raster_order_path.frag b/renderer/src/shaders/draw_raster_order_path.frag
new file mode 100644
index 0000000..447f7cc
--- /dev/null
+++ b/renderer/src/shaders/draw_raster_order_path.frag
@@ -0,0 +1,254 @@
+/*
+ * Copyright 2022 Rive
+ */
+
+#ifdef @FRAGMENT
+
+PLS_BLOCK_BEGIN
+PLS_DECL4F(COLOR_PLANE_IDX, colorBuffer);
+PLS_DECLUI(CLIP_PLANE_IDX, clipBuffer);
+PLS_DECL4F(SCRATCH_COLOR_PLANE_IDX, scratchColorBuffer);
+PLS_DECLUI(COVERAGE_PLANE_IDX, coverageCountBuffer);
+PLS_BLOCK_END
+
+PLS_MAIN(@drawFragmentMain)
+{
+    VARYING_UNPACK(v_paint, float4);
+
+#ifdef @ATLAS_BLIT
+    VARYING_UNPACK(v_atlasCoord, float2);
+#else
+#ifdef @DRAW_INTERIOR_TRIANGLES
+    VARYING_UNPACK(v_windingWeight, half);
+#else
+    VARYING_UNPACK(v_coverages, COVERAGE_TYPE);
+#endif //@DRAW_INTERIOR_TRIANGLES
+    VARYING_UNPACK(v_pathID, half);
+#endif
+
+#ifdef @ENABLE_CLIPPING
+    VARYING_UNPACK(v_clipIDs, half2);
+#endif
+#ifdef @ENABLE_CLIP_RECT
+    VARYING_UNPACK(v_clipRect, float4);
+#endif
+#ifdef @ENABLE_ADVANCED_BLEND
+    VARYING_UNPACK(v_blendMode, half);
+#endif
+
+#if !defined(@DRAW_INTERIOR_TRIANGLES) || defined(@ATLAS_BLIT)
+    // Interior triangles don't overlap, so don't need raster ordering.
+    PLS_INTERLOCK_BEGIN;
+#endif
+
+    half coverage;
+#ifdef @ATLAS_BLIT
+    coverage = filter_feather_atlas(
+        v_atlasCoord,
+        uniforms.atlasTextureInverseSize TEXTURE_CONTEXT_FORWARD);
+#else
+    half2 coverageData = unpackHalf2x16(PLS_LOADUI(coverageCountBuffer));
+    half coverageBufferID = coverageData.g;
+    half coverageCount =
+        coverageBufferID == v_pathID ? coverageData.r : make_half(.0);
+
+#ifdef @DRAW_INTERIOR_TRIANGLES
+    coverageCount += v_windingWeight;
+    // Preserve the coverage buffer even though we don't use it, so it doesn't
+    // get overwritten in a way that would corrupt a future draw (e.g., by
+    // accidentally writing the next path's id with a bogus coverage.)
+    PLS_PRESERVE_UI(coverageCountBuffer);
+#else
+    coverageCount =
+        apply_frag_coverage(coverageCount, v_coverages TEXTURE_CONTEXT_FORWARD);
+    // Save the updated coverage.
+    PLS_STOREUI(coverageCountBuffer,
+                packHalf2x16(make_half2(coverageCount, v_pathID)));
+#endif // !@DRAW_INTERIOR_TRIANGLES
+
+    // Convert coverageCount to coverage.
+#ifdef @CLOCKWISE_FILL
+    if (@CLOCKWISE_FILL)
+    {
+#ifdef @VULKAN_VENDOR_ID
+        if (@VULKAN_VENDOR_ID == VULKAN_VENDOR_ARM)
+        {
+            // ARM hits a bug if we use clamp() here.
+            if (coverageCount < .0)
+                coverage = .0;
+            else if (coverageCount <= 1.)
+                coverage = coverageCount;
+            else
+                coverage = 1.;
+        }
+        else
+#endif
+        {
+            coverage = clamp(coverageCount, make_half(.0), make_half(1.));
+        }
+    }
+    else
+#endif // CLOCKWISE_FILL
+    {
+        coverage = abs(coverageCount);
+#ifdef @ENABLE_EVEN_ODD
+        if (@ENABLE_EVEN_ODD && v_pathID < .0 /*even-odd*/)
+        {
+            coverage = 1. - make_half(abs(fract(coverage * .5) * 2. + -1.));
+        }
+#endif
+        // This also caps stroke coverage, which can be >1.
+        coverage = min(coverage, make_half(1.));
+    }
+#endif // !@ATLAS_BLIT
+
+#ifdef @ENABLE_CLIPPING
+    if (@ENABLE_CLIPPING && v_clipIDs.x < .0) // Update the clip buffer.
+    {
+        half clipID = -v_clipIDs.x;
+#ifdef @ENABLE_NESTED_CLIPPING
+        if (@ENABLE_NESTED_CLIPPING)
+        {
+            half outerClipID = v_clipIDs.y;
+            if (outerClipID != .0)
+            {
+                // This is a nested clip. Intersect coverage with the enclosing
+                // clip (outerClipID).
+                half2 clipData = unpackHalf2x16(PLS_LOADUI(clipBuffer));
+                half clipContentID = clipData.g;
+                half outerClipCoverage;
+                if (clipContentID != clipID)
+                {
+                    // First hit: either clipBuffer contains outerClipCoverage,
+                    // or this pixel is not inside the outer clip and
+                    // outerClipCoverage is zero.
+                    outerClipCoverage =
+                        clipContentID == outerClipID ? clipData.r : .0;
+#ifndef @DRAW_INTERIOR_TRIANGLES
+                    // Stash outerClipCoverage before overwriting clipBuffer, in
+                    // case we hit this pixel again and need it. (Not necessary
+                    // when drawing interior triangles because they always go
+                    // last and don't overlap.)
+                    PLS_STORE4F(scratchColorBuffer,
+                                make_half4(outerClipCoverage, .0, .0, .0));
+#endif
+                }
+                else
+                {
+                    // Subsequent hit: outerClipCoverage is stashed in
+                    // scratchColorBuffer.
+                    outerClipCoverage = PLS_LOAD4F(scratchColorBuffer).r;
+#ifndef @DRAW_INTERIOR_TRIANGLES
+                    // Since interior triangles are always last, there's no need
+                    // to preserve this value.
+                    PLS_PRESERVE_4F(scratchColorBuffer);
+#endif
+                }
+                coverage = min(coverage, outerClipCoverage);
+            }
+        }
+#endif // @ENABLE_NESTED_CLIPPING
+        PLS_STOREUI(clipBuffer, packHalf2x16(make_half2(coverage, clipID)));
+        PLS_PRESERVE_4F(colorBuffer);
+    }
+    else // Render to the main framebuffer.
+#endif   // @ENABLE_CLIPPING
+    {
+#ifdef @ENABLE_CLIPPING
+        if (@ENABLE_CLIPPING)
+        {
+            // Apply the clip.
+            half clipID = v_clipIDs.x;
+            if (clipID != .0)
+            {
+                // Clip IDs are not necessarily drawn in monotonically
+                // increasing order, so always check exact equality of the
+                // clipID.
+                half2 clipData = unpackHalf2x16(PLS_LOADUI(clipBuffer));
+                half clipContentID = clipData.g;
+                coverage = (clipContentID == clipID) ? min(clipData.r, coverage)
+                                                     : make_half(.0);
+            }
+        }
+#endif
+#ifdef @ENABLE_CLIP_RECT
+        if (@ENABLE_CLIP_RECT)
+        {
+            half clipRectCoverage = min_value(cast_float4_to_half4(v_clipRect));
+            coverage = clamp(clipRectCoverage, make_half(.0), coverage);
+        }
+#endif // ENABLE_CLIP_RECT
+
+        half4 color =
+            find_paint_color(v_paint, coverage FRAGMENT_CONTEXT_UNPACK);
+
+        half4 dstColorPremul;
+#ifdef @ATLAS_BLIT
+        dstColorPremul = PLS_LOAD4F(colorBuffer);
+#else
+        if (coverageBufferID != v_pathID)
+        {
+            // This is the first fragment from pathID to touch this pixel.
+            dstColorPremul = PLS_LOAD4F(colorBuffer);
+#ifndef @DRAW_INTERIOR_TRIANGLES
+            // We don't need to store coverage when drawing interior triangles
+            // because they always go last and don't overlap, so every fragment
+            // is the final one in the path.
+            PLS_STORE4F(scratchColorBuffer, dstColorPremul);
+#endif
+        }
+        else
+        {
+            dstColorPremul = PLS_LOAD4F(scratchColorBuffer);
+#ifndef @DRAW_INTERIOR_TRIANGLES
+            // Since interior triangles are always last, there's no need to
+            // preserve this value.
+            PLS_PRESERVE_4F(scratchColorBuffer);
+#endif
+        }
+#endif // @ATLAS_BLIT
+
+        // Blend with the framebuffer color.
+#ifdef @ENABLE_ADVANCED_BLEND
+        if (@ENABLE_ADVANCED_BLEND)
+        {
+            // GENERATE_PREMULTIPLIED_PAINT_COLORS is false in this case because
+            // advanced blend needs unmultiplied colors.
+            if (v_blendMode != cast_uint_to_half(BLEND_SRC_OVER))
+            {
+                color.rgb =
+                    advanced_color_blend(color.rgb,
+                                         dstColorPremul,
+                                         cast_half_to_ushort(v_blendMode));
+            }
+            // Premultiply alpha now.
+            color.rgb *= color.a;
+        }
+#endif
+
+        // Certain platforms give us less control of the format of what we are
+        // rendering too. Specifically, we are auto converted from linear ->
+        // sRGB on render target writes in unreal. In those cases we made need
+        // to end up in linear color space
+#ifdef @NEEDS_GAMMA_CORRECTION
+        if (@NEEDS_GAMMA_CORRECTION)
+        {
+            color = gamma_to_linear(color);
+        }
+#endif
+
+        color += dstColorPremul * (1. - color.a);
+
+        PLS_STORE4F(colorBuffer, color);
+        PLS_PRESERVE_UI(clipBuffer);
+    }
+
+#if !defined(@DRAW_INTERIOR_TRIANGLES) || defined(@ATLAS_BLIT)
+    // Interior triangles don't overlap, so don't need raster ordering.
+    PLS_INTERLOCK_END;
+#endif
+
+    EMIT_PLS;
+}
+
+#endif // FRAGMENT
diff --git a/renderer/src/shaders/metal/generate_draw_combinations.py b/renderer/src/shaders/metal/generate_draw_combinations.py
index 31a44ff..a34bed4 100644
--- a/renderer/src/shaders/metal/generate_draw_combinations.py
+++ b/renderer/src/shaders/metal/generate_draw_combinations.py
@@ -87,12 +87,14 @@
                   ('c' if fill_type == FillType.CLOCKWISE else 'p',
                    ''.join(namespace_id)))
         out.write('{\n')
-        out.write('#include "draw_path.minified.glsl"\n')
+        out.write('#include "draw_path.minified.vert"\n')
+        out.write('#include "draw_raster_order_path.minified.frag"\n')
         out.write('}\n')
     else:
         out.write('namespace m%s\n' % ''.join(namespace_id))
         out.write('{\n')
-        out.write('#include "draw_image_mesh.minified.glsl"\n')
+        out.write('#include "draw_image_mesh.minified.vert"\n')
+        out.write('#include "draw_raster_order_image_mesh.minified.frag"\n')
         out.write('}\n')
     for feature in feature_set:
         out.write('#undef %s\n' % feature.name)
diff --git a/renderer/src/shaders/minify.py b/renderer/src/shaders/minify.py
index 0529382..db70014 100644
--- a/renderer/src/shaders/minify.py
+++ b/renderer/src/shaders/minify.py
@@ -549,8 +549,8 @@
 
 
     def write_exports(self, outdir):
-        output_path = os.path.join(outdir, os.path.splitext(self.basename)[0] + ".exports.h")
-        print("Exporting %s <- %s" % (output_path, self.basename))
+        output_path = os.path.join(outdir, f"{self.basename}.exports.h")
+        print(f"Exporting {output_path} <- {self.basename}")
         out = open(output_path, "w", newline='\n')
         out.write('#pragma once\n\n')
         for exp in sorted(self.exports):
@@ -565,13 +565,17 @@
         out = open(output_path, "w", newline='\n')
         out.write("#pragma once\n\n")
 
-        out.write('#include "%s"\n\n' % (os.path.splitext(self.basename)[0] + ".exports.h"))
+        out.write(f'#include "{self.basename}.exports.h"\n\n')
 
         # emit shader code.
+        root, ext = os.path.splitext(self.basename)
+        cpp_name = root
+        if ext != '.glsl':
+            cpp_name = f"{root}_{ext[1:]}"
         out.write("namespace rive {\n")
         out.write("namespace gpu {\n")
         out.write("namespace glsl {\n")
-        out.write('const char %s[] = R"===(' % os.path.splitext(self.basename)[0])
+        out.write(f'const char {cpp_name}[] = R"===(')
 
         is_newline = self.emit_tokens_to_rewritten_glsl(out, preserve_exported_switches=False)
         if not is_newline:
@@ -584,8 +588,9 @@
         out.close()
 
     def write_offline_glsl(self, outdir):
-        output_path = os.path.join(outdir, os.path.splitext(self.basename)[0] + ".minified.glsl")
-        print("Minifying %s <- %s" % (output_path, self.basename))
+        root, ext = os.path.splitext(self.basename)
+        output_path = os.path.join(outdir, f"{root}.minified{ext}")
+        print(f"Minifying f{output_path} <- {self.basename}")
         out = open(output_path, "w", newline='\n')
         self.emit_tokens_to_rewritten_glsl(out, preserve_exported_switches=True)
         out.close()
diff --git a/renderer/src/shaders/spirv/draw_atlas_blit.main b/renderer/src/shaders/spirv/draw_atlas_blit.main
index 053fb0e..fd2bf32 100644
--- a/renderer/src/shaders/spirv/draw_atlas_blit.main
+++ b/renderer/src/shaders/spirv/draw_atlas_blit.main
@@ -10,4 +10,5 @@
 #include "common.minified.glsl"
 #include "draw_path_common.minified.glsl"
 #include "advanced_blend.minified.glsl"
-#include "draw_path.minified.glsl"
+#include "draw_path.minified.vert"
+#include "draw_raster_order_path.minified.frag"
diff --git a/renderer/src/shaders/spirv/draw_image_mesh.main b/renderer/src/shaders/spirv/draw_image_mesh.main
index 132c1d3..73e5723 100644
--- a/renderer/src/shaders/spirv/draw_image_mesh.main
+++ b/renderer/src/shaders/spirv/draw_image_mesh.main
@@ -9,4 +9,5 @@
 #include "specialization.minified.glsl"
 #include "common.minified.glsl"
 #include "advanced_blend.minified.glsl"
-#include "draw_image_mesh.minified.glsl"
+#include "draw_image_mesh.minified.vert"
+#include "draw_raster_order_image_mesh.minified.frag"
diff --git a/renderer/src/shaders/spirv/draw_interior_triangles.main b/renderer/src/shaders/spirv/draw_interior_triangles.main
index b05367b..6ac1d79 100644
--- a/renderer/src/shaders/spirv/draw_interior_triangles.main
+++ b/renderer/src/shaders/spirv/draw_interior_triangles.main
@@ -9,4 +9,5 @@
 #include "common.minified.glsl"
 #include "draw_path_common.minified.glsl"
 #include "advanced_blend.minified.glsl"
-#include "draw_path.minified.glsl"
+#include "draw_path.minified.vert"
+#include "draw_raster_order_path.minified.frag"
diff --git a/renderer/src/shaders/spirv/draw_msaa_atlas_blit.main b/renderer/src/shaders/spirv/draw_msaa_atlas_blit.main
index 6e11db2..00d1c84 100644
--- a/renderer/src/shaders/spirv/draw_msaa_atlas_blit.main
+++ b/renderer/src/shaders/spirv/draw_msaa_atlas_blit.main
@@ -11,4 +11,5 @@
 #include "common.minified.glsl"
 #include "draw_path_common.minified.glsl"
 #include "advanced_blend.minified.glsl"
-#include "draw_path.minified.glsl"
+#include "draw_path.minified.vert"
+#include "draw_msaa_path.minified.frag"
diff --git a/renderer/src/shaders/spirv/draw_msaa_image_mesh.main b/renderer/src/shaders/spirv/draw_msaa_image_mesh.main
index 051bb8c..f74c33d 100644
--- a/renderer/src/shaders/spirv/draw_msaa_image_mesh.main
+++ b/renderer/src/shaders/spirv/draw_msaa_image_mesh.main
@@ -10,4 +10,5 @@
 #include "specialization.minified.glsl"
 #include "common.minified.glsl"
 #include "advanced_blend.minified.glsl"
-#include "draw_image_mesh.minified.glsl"
+#include "draw_image_mesh.minified.vert"
+#include "draw_msaa_image_mesh.minified.frag"
diff --git a/renderer/src/shaders/spirv/draw_msaa_path.main b/renderer/src/shaders/spirv/draw_msaa_path.main
index 79b3feb..0a7fcfb 100644
--- a/renderer/src/shaders/spirv/draw_msaa_path.main
+++ b/renderer/src/shaders/spirv/draw_msaa_path.main
@@ -11,4 +11,5 @@
 #include "common.minified.glsl"
 #include "draw_path_common.minified.glsl"
 #include "advanced_blend.minified.glsl"
-#include "draw_path.minified.glsl"
+#include "draw_path.minified.vert"
+#include "draw_msaa_path.minified.frag"
diff --git a/renderer/src/shaders/spirv/draw_path.main b/renderer/src/shaders/spirv/draw_path.main
index a3713d4..4a0c69a 100644
--- a/renderer/src/shaders/spirv/draw_path.main
+++ b/renderer/src/shaders/spirv/draw_path.main
@@ -10,4 +10,5 @@
 #include "common.minified.glsl"
 #include "draw_path_common.minified.glsl"
 #include "advanced_blend.minified.glsl"
-#include "draw_path.minified.glsl"
+#include "draw_path.minified.vert"
+#include "draw_raster_order_path.minified.frag"
diff --git a/renderer/src/shaders/unreal/draw_image_mesh.usf b/renderer/src/shaders/unreal/draw_image_mesh.usf
index 8311b9b..35c23ac 100644
--- a/renderer/src/shaders/unreal/draw_image_mesh.usf
+++ b/renderer/src/shaders/unreal/draw_image_mesh.usf
@@ -14,4 +14,5 @@
 #include "Generated/constants.minified.ush"
 #include "Generated/common.minified.ush"
 #include "Generated/advanced_blend.minified.ush"
-#include "Generated/draw_image_mesh.minified.ush"
+#include "Generated/draw_image_mesh.minified.vert.ush"
+#include "Generated/draw_raster_order_image_mesh.minified.frag.ush"
diff --git a/renderer/src/shaders/unreal/draw_interior_triangles.usf b/renderer/src/shaders/unreal/draw_interior_triangles.usf
index 9a1ca0b..af8451b 100644
--- a/renderer/src/shaders/unreal/draw_interior_triangles.usf
+++ b/renderer/src/shaders/unreal/draw_interior_triangles.usf
@@ -13,4 +13,5 @@
 #include "Generated/common.minified.ush"
 #include "Generated/draw_path_common.minified.ush"
 #include "Generated/advanced_blend.minified.ush"
-#include "Generated/draw_path.minified.ush"
+#include "Generated/draw_path.minified.vert.ush"
+#include "Generated/draw_raster_order_path.minified.frag.ush"
diff --git a/renderer/src/shaders/unreal/draw_path.usf b/renderer/src/shaders/unreal/draw_path.usf
index 8446ea9..cc44614 100644
--- a/renderer/src/shaders/unreal/draw_path.usf
+++ b/renderer/src/shaders/unreal/draw_path.usf
@@ -15,4 +15,5 @@
 #include "Generated/common.minified.ush"
 #include "Generated/draw_path_common.minified.ush"
 #include "Generated/advanced_blend.minified.ush"
-#include "Generated/draw_path.minified.ush"
+#include "Generated/draw_path.minified.vert.ush"
+#include "Generated/draw_raster_order_path.minified.frag.ush"
diff --git a/renderer/src/webgpu/render_context_webgpu_impl.cpp b/renderer/src/webgpu/render_context_webgpu_impl.cpp
index a354144..8a73d65 100644
--- a/renderer/src/webgpu/render_context_webgpu_impl.cpp
+++ b/renderer/src/webgpu/render_context_webgpu_impl.cpp
@@ -69,9 +69,11 @@
 #include "generated/shaders/tessellate.glsl.hpp"
 #include "generated/shaders/render_atlas.glsl.hpp"
 #include "generated/shaders/advanced_blend.glsl.hpp"
-#include "generated/shaders/draw_path.glsl.hpp"
 #include "generated/shaders/draw_path_common.glsl.hpp"
-#include "generated/shaders/draw_image_mesh.glsl.hpp"
+#include "generated/shaders/draw_path.vert.hpp"
+#include "generated/shaders/draw_raster_order_path.frag.hpp"
+#include "generated/shaders/draw_image_mesh.vert.hpp"
+#include "generated/shaders/draw_raster_order_image_mesh.frag.hpp"
 
 // When compiling "glslRaw" shaders, the WebGPU driver will automatically
 // search for a uniform with this name and update its value when draw commands
@@ -845,15 +847,19 @@
                 case DrawType::outerCurvePatches:
                     addDefine(GLSL_DRAW_PATH);
                     glsl << gpu::glsl::draw_path_common << '\n';
-                    glsl << gpu::glsl::draw_path << '\n';
+                    glsl << gpu::glsl::draw_path_vert << '\n';
+                    glsl << gpu::glsl::draw_raster_order_path_frag << '\n';
                     break;
                 case DrawType::interiorTriangulation:
                 case DrawType::atlasBlit:
                     glsl << gpu::glsl::draw_path_common << '\n';
-                    glsl << gpu::glsl::draw_path << '\n';
+                    glsl << gpu::glsl::draw_path_vert << '\n';
+                    glsl << gpu::glsl::draw_raster_order_path_frag << '\n';
                     break;
                 case DrawType::imageMesh:
-                    glsl << gpu::glsl::draw_image_mesh << '\n';
+                    glsl << gpu::glsl::draw_image_mesh_vert << '\n';
+                    glsl << gpu::glsl::draw_raster_order_image_mesh_frag
+                         << '\n';
                     break;
                 case DrawType::imageRect:
                 case DrawType::msaaStrokes: