feather v1

Diffs=
7ed60faf76 Vector feathering v1! (#8799)

Co-authored-by: Chris Dalton <99840794+csmartdalton@users.noreply.github.com>
diff --git a/.rive_head b/.rive_head
index a210b0c..339cbba 100644
--- a/.rive_head
+++ b/.rive_head
@@ -1 +1 @@
-e042c0af4ce3f444ce8447595e88133961425ffa
+7ed60faf763aa791b129995bb07c5f73f5a3f139
diff --git a/include/rive/math/bezier_utils.hpp b/include/rive/math/bezier_utils.hpp
index 914bd3a..31a9ae4 100644
--- a/include/rive/math/bezier_utils.hpp
+++ b/include/rive/math/bezier_utils.hpp
@@ -20,6 +20,82 @@
 {
 namespace math
 {
+// Finds the cubic bezier's power basis coefficients. These define the bezier
+// curve as:
+//
+//                                    |T^3|
+//     Cubic(T) = x,y = |A  3B  3C| * |T^2| + P0
+//                      |.   .   .|   |T  |
+//
+// And the tangent:
+//
+//                                         |T^2|
+//     Tangent(T) = dx,dy = |3A  6B  3C| * |T  |
+//                          | .   .   .|   |1  |
+//
+struct CubicCoeffs
+{
+    RIVE_ALWAYS_INLINE CubicCoeffs(const Vec2D pts[4]) :
+        CubicCoeffs(simd::load2f(&pts[0].x),
+                    simd::load2f(&pts[1].x),
+                    simd::load2f(&pts[2].x),
+                    simd::load2f(&pts[3].x))
+    {}
+
+    RIVE_ALWAYS_INLINE CubicCoeffs(float2 p0, float2 p1, float2 p2, float2 p3)
+    {
+        C = p1 - p0;
+        float2 D = p2 - p1;
+        float2 E = p3 - p0;
+        B = D - C;
+        A = -3.f * D + E;
+    }
+
+    float2 A, B, C;
+};
+
+// Optimized SIMD helper for evaluating a single cubic at many points.
+class EvalCubic
+{
+public:
+    RIVE_ALWAYS_INLINE EvalCubic(const Vec2D pts[]) :
+        EvalCubic(CubicCoeffs(pts), pts[0])
+    {}
+
+    RIVE_ALWAYS_INLINE EvalCubic(const CubicCoeffs& coeffs, Vec2D p0) :
+        EvalCubic(coeffs, simd::load2f(&p0))
+    {}
+
+    RIVE_ALWAYS_INLINE EvalCubic(const CubicCoeffs& coeffs, float2 p0) :
+        // Duplicate coefficients across a float4 so we can evaluate two at
+        // once.
+        A(coeffs.A.xyxy),
+        B((3.f * coeffs.B).xyxy),
+        C((3.f * coeffs.C).xyxy),
+        D(p0.xyxy)
+    {}
+
+    // Evaluates [x, y] at location t.
+    RIVE_ALWAYS_INLINE float2 operator()(float t) const
+    {
+        // At^3 + Bt^2 + Ct + P0
+        return ((A.xy * t + B.xy) * t + C.xy) * t + D.xy;
+    }
+
+    // Evaluates [Xa, Ya, Xb, Yb] at locations [Ta, Ta, Tb, Tb].
+    RIVE_ALWAYS_INLINE float4 operator()(float4 t) const
+    {
+        // At^3 + Bt^2 + Ct + P0
+        return ((A * t + B) * t + C) * t + D;
+    }
+
+private:
+    const float4 A;
+    const float4 B;
+    const float4 C;
+    const float4 D;
+};
+
 // Decides the number of polar segments the tessellator adds for each curve.
 // (Uniform steps in tangent angle.) The tessellator will add this number of
 // polar segments for each radian of rotation in local path space.
@@ -69,58 +145,38 @@
 // on a degenerate flat line.
 int find_cubic_convex_180_chops(const Vec2D[], float T[2], bool* areCusps);
 
-// Optimized SIMD helper for evaluating a single cubic at many points.
-class EvalCubic
+// Returns up to 4 T values at which to chop the given curve in order to
+// guarantee the resulting cubics are convex and rotate no more than 90 degrees.
+//
+// If the curve has any cusp points (proper cusps or 180-degree turnarounds on
+// a degenerate flat line), the cusps are straddled with `cuspPadding` on either
+// side and `areCusps` is set to true. In this cases, odd-numbered curves after
+// chopping will always be the small sections that pass through the cusp.
+int find_cubic_convex_90_chops(const Vec2D[],
+                               float T[4],
+                               float cuspPadding,
+                               bool* areCusps);
+
+// Find the location and value of a cubic's maximum height, relative to the
+// baseline p0->p3.
+float find_cubic_max_height(const Vec2D pts[4], float* outT);
+
+// Measure the amount of curvature, in radians, of the given cubic, centered at
+// location T, and covering a spread of width "desiredSpread" in local
+// coordinates. If "desiredSpread" would reach outside the range t=0..1, a
+// smaller spread is used.
+float measure_cubic_local_curvature(const Vec2D pts[4],
+                                    const math::CubicCoeffs& coeffs,
+                                    float T,
+                                    float desiredSpread);
+inline float measure_cubic_local_curvature(const Vec2D pts[4],
+                                           float T,
+                                           float desiredSpread)
 {
-public:
-    inline EvalCubic(const Vec2D pts[])
-    {
-        // Cubic power-basis form:
-        //
-        //                                       | 1  0  0  0|   |P0|
-        //   Cubic(T) = x,y = |1  t  t^2  t^3| * |-3  3  0  0| * |P1|
-        //                                       | 3 -6  3  0|   |P2|
-        //                                       |-1  3 -3  1|   |P3|
-        //
-        // Find the cubic's power basis coefficients. These define the bezier
-        // curve as:
-        //
-        //                                  |t^3|
-        //     Cubic(T) = x,y = |A  B  C| * |t^2| + P0
-        //                      |.  .  .|   |t  |
-        //
-        // (Duplicate coefficients across a float4 so we can evaluate two at
-        // once.)
-        m_P0 = simd::load2f(pts + 0).xyxy;
-        float4 P1 = simd::load2f(pts + 1).xyxy;
-        float4 P2 = simd::load2f(pts + 2).xyxy;
-        float4 P3 = simd::load2f(pts + 3).xyxy;
-        m_C = 3.f * (P1 - m_P0);
-        float4 D = 3.f * (P2 - P1);
-        float4 E = P3 - m_P0;
-        m_B = D - m_C;
-        m_A = E - D;
-    }
-
-    // Evaluates [x, y] at location t.
-    inline float2 operator()(float t) const
-    {
-        // At^3 + Bt^2 + Ct + P0
-        return t * (t * (t * m_A.xy + m_B.xy) + m_C.xy) + m_P0.xy;
-    }
-
-    // Evaluates [Xa, Ya, Xb, Yb] at locations [Ta, Ta, Tb, Tb].
-    inline float4 operator()(float4 t) const
-    {
-        // At^3 + Bt^2 + Ct + P0
-        return t * (t * (t * m_A + m_B) + m_C) + m_P0;
-    }
-
-private:
-    float4 m_A;
-    float4 m_B;
-    float4 m_C;
-    float4 m_P0;
-};
+    return measure_cubic_local_curvature(pts,
+                                         CubicCoeffs(pts),
+                                         T,
+                                         desiredSpread);
+}
 } // namespace math
 } // namespace rive
diff --git a/include/rive/math/raw_path.hpp b/include/rive/math/raw_path.hpp
index 7c67b50..ad14aff 100644
--- a/include/rive/math/raw_path.hpp
+++ b/include/rive/math/raw_path.hpp
@@ -260,6 +260,12 @@
     // Otherwise it does nothing.
     void injectImplicitMoveIfNeeded();
 
+    void reserve(size_t numVerbs, size_t numPts)
+    {
+        m_Verbs.reserve(numVerbs);
+        m_Points.reserve(numPts);
+    }
+
 private:
     std::vector<Vec2D> m_Points;
     std::vector<PathVerb> m_Verbs;
diff --git a/include/rive/math/simd.hpp b/include/rive/math/simd.hpp
index 234d856..c52c995 100644
--- a/include/rive/math/simd.hpp
+++ b/include/rive/math/simd.hpp
@@ -378,6 +378,20 @@
 #endif
 }
 
+template <int N>
+SIMD_ALWAYS_INLINE gvec<float, N> copysign(gvec<float, N> x, gvec<float, N> y)
+{
+    constexpr static uint32_t SIGN_BIT = 0x80000000u;
+    // Type punning is free in vector registers.
+    gvec<uint32_t, N> bitsX, bitsY;
+    SIMD_INLINE_MEMCPY(&bitsX, &x, sizeof(x));
+    SIMD_INLINE_MEMCPY(&bitsY, &y, sizeof(y));
+    gvec<uint32_t, N> bitsRet = (bitsY & SIGN_BIT) | (bitsX & ~SIGN_BIT);
+    gvec<float, N> ret;
+    SIMD_INLINE_MEMCPY(&ret, &bitsRet, sizeof(ret));
+    return ret;
+}
+
 // IEEE compliant sqrt.
 template <int N> SIMD_ALWAYS_INLINE gvec<float, N> sqrt(gvec<float, N> x)
 {
@@ -703,9 +717,16 @@
     assert(simd::all(0.f <= t && t < 1.f));
     return (b - a) * t + a;
 }
-
-// Linearly interpolates between a and b, returning precisely 'a' if t==0 and
-// precisely 'b' if t==1.
+// Called when it doesn't matter if mix(a, b, 1) is only ~= b (it may not be
+// precisely b).
+template <int N>
+SIMD_ALWAYS_INLINE gvec<float, N> unchecked_mix(gvec<float, N> a,
+                                                gvec<float, N> b,
+                                                gvec<float, N> t)
+{
+    return (b - a) * t + a;
+}
+// Returns precisely 'a' if t==0 and precisely 'b' if t==1.
 template <int N>
 SIMD_ALWAYS_INLINE gvec<float, N> precise_mix(gvec<float, N> a,
                                               gvec<float, N> b,
diff --git a/include/rive/math/simd_gvec_polyfill.hpp b/include/rive/math/simd_gvec_polyfill.hpp
index 1a1b204..3dd9e9b 100644
--- a/include/rive/math/simd_gvec_polyfill.hpp
+++ b/include/rive/math/simd_gvec_polyfill.hpp
@@ -88,6 +88,8 @@
         gvec<T, 2, PackSwizzle2(2, 1, 0)> yx;
         gvec<T, 4, PackSwizzle4(2, 0, 1, 0, 1)> xyxy;
         gvec<T, 4, PackSwizzle4(2, 1, 0, 1, 0)> yxyx;
+        gvec<T, 4, PackSwizzle4(2, 0, 0, 1, 1)> xxyy;
+        gvec<T, 4, PackSwizzle4(2, 1, 1, 0, 0)> yyxx;
     };
 };
 
@@ -134,6 +136,7 @@
         gvec<T, 4, PackSwizzle4(4, 2, 3, 0, 1)> zwxy;
         gvec<T, 4, PackSwizzle4(4, 2, 1, 0, 3)> zyxw;
         gvec<T, 4, PackSwizzle4(4, 0, 3, 2, 1)> xwzy;
+        gvec<T, 4, PackSwizzle4(4, 0, 2, 1, 3)> xzyw;
     };
 };
 
diff --git a/include/rive/renderer.hpp b/include/rive/renderer.hpp
index 6e21f42..cea6520 100644
--- a/include/rive/renderer.hpp
+++ b/include/rive/renderer.hpp
@@ -121,6 +121,7 @@
     virtual void thickness(float value) = 0;
     virtual void join(StrokeJoin value) = 0;
     virtual void cap(StrokeCap value) = 0;
+    virtual void feather(float value) {} // Not supported on all renderers.
     virtual void blendMode(BlendMode value) = 0;
     virtual void shader(rcp<RenderShader>) = 0;
     virtual void invalidateStroke() = 0;
diff --git a/renderer/include/rive/renderer/d3d/render_context_d3d_impl.hpp b/renderer/include/rive/renderer/d3d/render_context_d3d_impl.hpp
index 79c43ac..3ae60e6 100644
--- a/renderer/include/rive/renderer/d3d/render_context_d3d_impl.hpp
+++ b/renderer/include/rive/renderer/d3d/render_context_d3d_impl.hpp
@@ -151,6 +151,10 @@
     ComPtr<ID3D11ShaderResourceView> m_gradTextureSRV;
     ComPtr<ID3D11RenderTargetView> m_gradTextureRTV;
 
+    // Gaussian integral table for feathering.
+    ComPtr<ID3D11Texture2D> m_featherTexture;
+    ComPtr<ID3D11ShaderResourceView> m_featherTextureSRV;
+
     ComPtr<ID3D11Texture2D> m_tessTexture;
     ComPtr<ID3D11ShaderResourceView> m_tessTextureSRV;
     ComPtr<ID3D11RenderTargetView> m_tessTextureRTV;
diff --git a/renderer/include/rive/renderer/draw.hpp b/renderer/include/rive/renderer/draw.hpp
index 51274a2..1975bd3 100644
--- a/renderer/include/rive/renderer/draw.hpp
+++ b/renderer/include/rive/renderer/draw.hpp
@@ -55,14 +55,7 @@
     BlendMode blendMode() const { return m_blendMode; }
     Type type() const { return m_type; }
     gpu::DrawContents drawContents() const { return m_drawContents; }
-    bool isStroked() const
-    {
-        return m_drawContents & gpu::DrawContents::stroke;
-    }
-    RenderPaintStyle renderPaintStyle() const
-    {
-        return isStroked() ? RenderPaintStyle::stroke : RenderPaintStyle::fill;
-    }
+    bool isStroke() const { return m_drawContents & gpu::DrawContents::stroke; }
     bool isEvenOddFill() const
     {
         return m_drawContents & gpu::DrawContents::evenOddFill;
@@ -225,7 +218,17 @@
 
     const Gradient* gradient() const { return m_gradientRef; }
     gpu::PaintType paintType() const { return m_paintType; }
+    bool isFeatheredFill() const
+    {
+        return m_drawContents & gpu::DrawContents::featheredFill;
+    }
+    bool isStrokeOrFeather() const
+    {
+        return m_drawContents &
+               (gpu::DrawContents::stroke | gpu::DrawContents::featheredFill);
+    }
     float strokeRadius() const { return m_strokeRadius; }
+    float featherRadius() const { return m_featherRadius; }
     gpu::ContourDirections contourDirections() const
     {
         return m_contourDirections;
@@ -311,14 +314,16 @@
     const Gradient* m_gradientRef;
     const gpu::PaintType m_paintType;
     float m_strokeRadius = 0;
+    float m_featherRadius = 0;
     gpu::ContourDirections m_contourDirections;
     uint32_t m_contourFlags = 0;
     gpu::CoverageBufferRange m_coverageBufferRange; // clockwiseAtomic only.
     GrInnerFanTriangulator* m_triangulator = nullptr;
 
-    float m_strokeMatrixMaxScale;
     StrokeJoin m_strokeJoin;
     StrokeCap m_strokeCap;
+    float m_strokeMatrixMaxScale;
+    float m_polarSegmentsPerRadian;
 
     struct ContourInfo
     {
diff --git a/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp b/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp
index ec3ede8..a1f818e 100644
--- a/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp
+++ b/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp
@@ -221,6 +221,9 @@
     glutils::Framebuffer m_colorRampFBO;
     GLuint m_gradientTexture = 0;
 
+    // Gaussian integral table for feathering.
+    glutils::Texture m_featherTexture;
+
     // Tessellation texture rendering.
     glutils::Program m_tessellateProgram;
     glutils::VAO m_tessellateVAO;
diff --git a/renderer/include/rive/renderer/gpu.hpp b/renderer/include/rive/renderer/gpu.hpp
index 1dcc43b..9004dcf 100644
--- a/renderer/include/rive/renderer/gpu.hpp
+++ b/renderer/include/rive/renderer/gpu.hpp
@@ -7,7 +7,6 @@
 #include "rive/enum_bitset.hpp"
 #include "rive/math/aabb.hpp"
 #include "rive/math/mat2d.hpp"
-#include "rive/math/path_types.hpp"
 #include "rive/math/vec2d.hpp"
 #include "rive/refcnt.hpp"
 #include "rive/shapes/paint/blend_mode.hpp"
@@ -399,10 +398,15 @@
     // followed by a ~1px AA ramp.
     midpointFan,
 
+    // Similar to midpointFan, except AA ramps are split down the center and
+    // drawn with a ~1/2px outset AA ramp and a ~1/2px inset AA ramp that
+    // overlaps the inner tessellation and has negative coverage.
+    midpointFanCenterAA,
+
     // Patches only cover the AA ramps and interiors of bezier curves. The
     // interior path triangles that connect the outer curves are triangulated on
     // the CPU to eliminate overlap, and are drawn in a separate call. AA ramps
-    // are split down the middle (on the same lines as the interior
+    // are split down the center (on the same lines as the interior
     // triangulation), and drawn with a ~1/2px outset AA ramp and a ~1/2px inset
     // AA ramp that overlaps the inner tessellation and has negative coverage. A
     // lone bowtie join is emitted at the end of the patch to tie the outer
@@ -494,6 +498,21 @@
     3 /*Triangle from path midpoint*/;
 constexpr static uint32_t kMidpointFanPatchBaseIndex = 0;
 static_assert((kMidpointFanPatchBaseIndex * sizeof(uint16_t)) % 4 == 0);
+
+constexpr static uint32_t kMidpointFanCenterAAPatchVertexCount =
+    kMidpointFanPatchSegmentSpan * 4 * 2 /*Stroke and/or AA outer ramp*/ +
+    (kMidpointFanPatchSegmentSpan + 1) /*Curve fan*/ +
+    1 /*Triangle from path midpoint*/;
+constexpr static uint32_t kMidpointFanCenterAAPatchBorderIndexCount =
+    kMidpointFanPatchSegmentSpan * 12 /*Stroke and/or AA outer ramp*/;
+constexpr static uint32_t kMidpointFanCenterAAPatchIndexCount =
+    kMidpointFanCenterAAPatchBorderIndexCount /*Stroke and/or AA outer ramp*/ +
+    (kMidpointFanPatchSegmentSpan - 1) * 3 /*Curve fan*/ +
+    3 /*Triangle from path midpoint*/;
+constexpr static uint32_t kMidpointFanCenterAAPatchBaseIndex =
+    kMidpointFanPatchBaseIndex + kMidpointFanPatchIndexCount;
+static_assert((kMidpointFanCenterAAPatchBaseIndex * sizeof(uint16_t)) % 4 == 0);
+
 constexpr static uint32_t kOuterCurvePatchVertexCount =
     kOuterCurvePatchSegmentSpan * 8 /*AA center ramp with bowtie*/ +
     kOuterCurvePatchSegmentSpan /*Curve fan*/;
@@ -503,20 +522,24 @@
     kOuterCurvePatchBorderIndexCount /*AA center ramp with bowtie*/ +
     (kOuterCurvePatchSegmentSpan - 2) * 3 /*Curve fan*/;
 constexpr static uint32_t kOuterCurvePatchBaseIndex =
-    kMidpointFanPatchIndexCount;
+    kMidpointFanCenterAAPatchBaseIndex + kMidpointFanCenterAAPatchIndexCount;
 static_assert((kOuterCurvePatchBaseIndex * sizeof(uint16_t)) % 4 == 0);
+
 constexpr static uint32_t kPatchVertexBufferCount =
-    kMidpointFanPatchVertexCount + kOuterCurvePatchVertexCount;
+    kMidpointFanPatchVertexCount + kMidpointFanCenterAAPatchVertexCount +
+    kOuterCurvePatchVertexCount;
 constexpr static uint32_t kPatchIndexBufferCount =
-    kMidpointFanPatchIndexCount + kOuterCurvePatchIndexCount;
+    kMidpointFanPatchIndexCount + kMidpointFanCenterAAPatchIndexCount +
+    kOuterCurvePatchIndexCount;
 void GeneratePatchBufferData(PatchVertex[kPatchVertexBufferCount],
                              uint16_t indices[kPatchIndexBufferCount]);
 
 enum class DrawType : uint8_t
 {
-    midpointFanPatches, // Standard paths and/or strokes.
-    outerCurvePatches,  // Just the outer curves of a path; the interior will be
-                        // triangulated.
+    midpointFanPatches,         // Fills, strokes, feathered strokes.
+    midpointFanCenterAAPatches, // Feathered fills.
+    outerCurvePatches, // Just the outer curves of a path; the interior will be
+                       // triangulated.
     interiorTriangulation,
     imageRect,
     imageMesh,
@@ -536,6 +559,7 @@
         case DrawType::imageMesh:
             return true;
         case DrawType::midpointFanPatches:
+        case DrawType::midpointFanCenterAAPatches:
         case DrawType::outerCurvePatches:
         case DrawType::interiorTriangulation:
         case DrawType::atomicInitialize:
@@ -546,30 +570,25 @@
     RIVE_UNREACHABLE();
 }
 
-constexpr static uint32_t PatchSegmentSpan(DrawType drawType)
-{
-    switch (drawType)
-    {
-        case DrawType::midpointFanPatches:
-            return kMidpointFanPatchSegmentSpan;
-        case DrawType::outerCurvePatches:
-            return kOuterCurvePatchSegmentSpan;
-        default:
-            RIVE_UNREACHABLE();
-    }
-}
-
 constexpr static uint32_t PatchIndexCount(DrawType drawType)
 {
     switch (drawType)
     {
         case DrawType::midpointFanPatches:
             return kMidpointFanPatchIndexCount;
+        case DrawType::midpointFanCenterAAPatches:
+            return kMidpointFanCenterAAPatchIndexCount;
         case DrawType::outerCurvePatches:
             return kOuterCurvePatchIndexCount;
-        default:
+        case DrawType::interiorTriangulation:
+        case DrawType::imageRect:
+        case DrawType::imageMesh:
+        case DrawType::atomicInitialize:
+        case DrawType::atomicResolve:
+        case DrawType::stencilClipReset:
             RIVE_UNREACHABLE();
     }
+    RIVE_UNREACHABLE();
 }
 
 constexpr static uint32_t PatchBorderIndexCount(DrawType drawType)
@@ -578,11 +597,19 @@
     {
         case DrawType::midpointFanPatches:
             return kMidpointFanPatchBorderIndexCount;
+        case DrawType::midpointFanCenterAAPatches:
+            return kMidpointFanCenterAAPatchBorderIndexCount;
         case DrawType::outerCurvePatches:
             return kOuterCurvePatchBorderIndexCount;
-        default:
+        case DrawType::interiorTriangulation:
+        case DrawType::imageRect:
+        case DrawType::imageMesh:
+        case DrawType::atomicInitialize:
+        case DrawType::atomicResolve:
+        case DrawType::stencilClipReset:
             RIVE_UNREACHABLE();
     }
+    RIVE_UNREACHABLE();
 }
 
 constexpr static uint32_t PatchFanIndexCount(DrawType drawType)
@@ -596,11 +623,19 @@
     {
         case DrawType::midpointFanPatches:
             return kMidpointFanPatchBaseIndex;
+        case DrawType::midpointFanCenterAAPatches:
+            return kMidpointFanCenterAAPatchBaseIndex;
         case DrawType::outerCurvePatches:
             return kOuterCurvePatchBaseIndex;
-        default:
+        case DrawType::interiorTriangulation:
+        case DrawType::imageRect:
+        case DrawType::imageMesh:
+        case DrawType::atomicInitialize:
+        case DrawType::atomicResolve:
+        case DrawType::stencilClipReset:
             RIVE_UNREACHABLE();
     }
+    RIVE_UNREACHABLE();
 }
 
 constexpr static uint32_t PatchFanBaseIndex(DrawType drawType)
@@ -645,12 +680,13 @@
     ENABLE_ADVANCED_BLEND = 1 << 2,
 
     // Fragment-only features.
-    ENABLE_EVEN_ODD = 1 << 3,
-    ENABLE_NESTED_CLIPPING = 1 << 4,
-    ENABLE_HSL_BLEND_MODES = 1 << 5,
+    ENABLE_FEATHER = 1 << 3,
+    ENABLE_EVEN_ODD = 1 << 4,
+    ENABLE_NESTED_CLIPPING = 1 << 5,
+    ENABLE_HSL_BLEND_MODES = 1 << 6,
 };
 RIVE_MAKE_ENUM_BITSET(ShaderFeatures)
-constexpr static size_t kShaderFeatureCount = 6;
+constexpr static size_t kShaderFeatureCount = 7;
 constexpr static ShaderFeatures kAllShaderFeatures =
     static_cast<gpu::ShaderFeatures>((1 << kShaderFeatureCount) - 1);
 constexpr static ShaderFeatures kVertexShaderFeaturesMask =
@@ -699,6 +735,7 @@
             // consider the same shader features for path draws.
             [[fallthrough]];
         case DrawType::midpointFanPatches:
+        case DrawType::midpointFanCenterAAPatches:
         case DrawType::outerCurvePatches:
         case DrawType::interiorTriangulation:
         case DrawType::atomicResolve:
@@ -775,13 +812,16 @@
 {
     none = 0,
     opaquePaint = 1 << 0,
-    stroke = 1 << 1,
-    clockwiseFill = 1 << 2,
-    nonZeroFill = 1 << 3,
-    evenOddFill = 1 << 4,
-    activeClip = 1 << 5,
-    clipUpdate = 1 << 6,
-    advancedBlend = 1 << 7,
+    // Put feathered fills down low because they only need to draw different
+    // geometry, which isn't really a context switch at all.
+    featheredFill = 1 << 1,
+    stroke = 1 << 2,
+    clockwiseFill = 1 << 3,
+    nonZeroFill = 1 << 4,
+    evenOddFill = 1 << 5,
+    activeClip = 1 << 6,
+    clipUpdate = 1 << 7,
+    advancedBlend = 1 << 8,
 };
 RIVE_MAKE_ENUM_BITSET(DrawContents)
 
@@ -946,7 +986,7 @@
 
 // Returns the area of the (potentially non-rectangular) quadrilateral that
 // results from transforming the given bounds by the given matrix.
-float FindTransformedArea(const AABB& bounds, const Mat2D&);
+float find_transformed_area(const AABB& bounds, const Mat2D&);
 
 // Convert a BlendMode to the tightly-packed range used by PLS shaders.
 uint32_t ConvertBlendModeToPLSBlendMode(BlendMode riveMode);
@@ -1074,9 +1114,9 @@
     constexpr static StorageBufferStructure kBufferStructure =
         StorageBufferStructure::uint32x4;
 
-    void set(const Mat2D&, float strokeRadius, uint32_t zIndex);
     void set(const Mat2D&,
              float strokeRadius,
+             float featherRadius,
              uint32_t zIndex,
              const CoverageBufferRange&);
 
@@ -1084,11 +1124,12 @@
     WRITEONLY float m_matrix[6];
     // "0" indicates that the path is filled, not stroked.
     WRITEONLY float m_strokeRadius;
+    WRITEONLY float m_featherRadius;
     // InterlockMode::msaa.
     WRITEONLY uint32_t m_zIndex;
+    WRITEONLY uint32_t pad[3];
     // InterlockMode::clockwiseAtomic.
     WRITEONLY CoverageBufferRange m_coverageBufferRange;
-    WRITEONLY uint32_t pad[4];
 };
 static_assert(sizeof(PathData) ==
               StorageBufferElementSizeInBytes(PathData::kBufferStructure) * 4);
@@ -1375,4 +1416,18 @@
     yes,
     unknown
 };
+
+// These tables integrate the gaussian function, and its inverse, covering a
+// spread of -FEATHER_TEXTURE_STDDEVS to +FEATHER_TEXTURE_STDDEVS.
+constexpr int GAUSSIAN_TABLE_SIZE = 512;
+extern const uint16_t g_gaussianIntegralTableF16[GAUSSIAN_TABLE_SIZE];
+extern const float g_inverseGaussianIntegralTableF32[GAUSSIAN_TABLE_SIZE];
+
+// Looks up the value of "x" in the given Gaussian table, with linear filtering.
+float gaussian_table_lookup(const float (&table)[GAUSSIAN_TABLE_SIZE], float x);
+
+inline float inverse_gaussian_integral(float y)
+{
+    return gaussian_table_lookup(g_inverseGaussianIntegralTableF32, y);
+}
 } // namespace rive::gpu
diff --git a/renderer/include/rive/renderer/metal/render_context_metal_impl.h b/renderer/include/rive/renderer/metal/render_context_metal_impl.h
index e30467e..3412e86 100644
--- a/renderer/include/rive/renderer/metal/render_context_metal_impl.h
+++ b/renderer/include/rive/renderer/metal/render_context_metal_impl.h
@@ -209,6 +209,9 @@
     std::unique_ptr<ColorRampPipeline> m_colorRampPipeline;
     id<MTLTexture> m_gradientTexture = nullptr;
 
+    // Gaussian integral table for feathering.
+    id<MTLTexture> m_featherTexture = nullptr;
+
     // Renders tessellated vertices to the tessellation texture.
     class TessellatePipeline;
     std::unique_ptr<TessellatePipeline> m_tessPipeline;
diff --git a/renderer/include/rive/renderer/render_context.hpp b/renderer/include/rive/renderer/render_context.hpp
index dcd0916..28ec202 100644
--- a/renderer/include/rive/renderer/render_context.hpp
+++ b/renderer/include/rive/renderer/render_context.hpp
@@ -607,7 +607,7 @@
         // Returns a unique 16-bit "contourID" handle for this specific record.
         // This ID may be or-ed with '*_CONTOUR_FLAG' bits from constants.glsl.
         [[nodiscard]] uint32_t pushContour(uint32_t pathID,
-                                           RenderPaintStyle,
+                                           gpu::DrawContents,
                                            Vec2D midpoint,
                                            bool closed,
                                            uint32_t vertexIndex0);
@@ -801,7 +801,7 @@
         // 'paddingVertexCount' tessellation vertices, colocated at T=0. The
         // caller must use this argument to align the end of the contour on
         // a boundary of the patch size. (See gpu::PaddingToAlignUp().)
-        [[nodiscard]] uint32_t pushContour(RenderPaintStyle,
+        [[nodiscard]] uint32_t pushContour(gpu::DrawContents,
                                            Vec2D midpoint,
                                            bool closed,
                                            uint32_t paddingVertexCount);
diff --git a/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp b/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp
index d034342..6ec800e 100644
--- a/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp
+++ b/renderer/include/rive/renderer/vulkan/render_context_vulkan_impl.hpp
@@ -242,6 +242,9 @@
     rcp<vkutil::TextureView> m_gradTextureView;
     rcp<vkutil::Framebuffer> m_gradTextureFramebuffer;
 
+    // Gaussian integral table for feathering.
+    rcp<TextureVulkanImpl> m_featherTexture;
+
     // Renders tessellated vertices to the tessellation texture.
     class TessellatePipeline;
     std::unique_ptr<TessellatePipeline> m_tessellatePipeline;
@@ -267,8 +270,8 @@
     class DrawPipeline;
     std::map<uint32_t, DrawPipeline> m_drawPipelines;
 
-    rcp<TextureVulkanImpl>
-        m_nullImageTexture; // Bound when there is not an image paint.
+    // Bound when there is not an image paint.
+    rcp<TextureVulkanImpl> m_nullImageTexture;
     VkSampler m_linearSampler;
     VkSampler m_mipmapSampler;
     rcp<vkutil::Buffer> m_pathPatchVertexBuffer;
diff --git a/renderer/path_fiddle/fiddle_context.hpp b/renderer/path_fiddle/fiddle_context.hpp
index bfa9f75..6f4e015 100644
--- a/renderer/path_fiddle/fiddle_context.hpp
+++ b/renderer/path_fiddle/fiddle_context.hpp
@@ -43,6 +43,7 @@
     virtual void tick(){};
 
     static std::unique_ptr<FiddleContext> MakeGLPLS(FiddleContextOptions = {});
+    static std::unique_ptr<FiddleContext> MakeGLSkia();
 #ifdef RIVE_MACOSX
     static std::unique_ptr<FiddleContext> MakeMetalPLS(
         FiddleContextOptions = {});
diff --git a/renderer/path_fiddle/fiddle_context_gl.cpp b/renderer/path_fiddle/fiddle_context_gl.cpp
index 09f6093..080e321 100644
--- a/renderer/path_fiddle/fiddle_context_gl.cpp
+++ b/renderer/path_fiddle/fiddle_context_gl.cpp
@@ -48,11 +48,10 @@
     }
     else if (type == GL_DEBUG_TYPE_PERFORMANCE)
     {
-        if (strcmp(message,
-                   "API_ID_REDUNDANT_FBO performance warning has been "
-                   "generated. Redundant state "
-                   "change in glBindFramebuffer API call, FBO 0, \"\", already "
-                   "bound.") == 0)
+        if (strstr(
+                message,
+                "API_ID_REDUNDANT_FBO performance warning has been generated. "
+                "Redundant state change in glBindFramebuffer API call, FBO"))
         {
             return;
         }
@@ -62,8 +61,7 @@
         }
         if (strcmp(message,
                    "Pixel-path performance warning: Pixel transfer is "
-                   "synchronized with 3D "
-                   "rendering.") == 0)
+                   "synchronized with 3D rendering.") == 0)
         {
             return;
         }
@@ -74,7 +72,111 @@
 #endif
 #endif
 
-class FiddleContextGL : public FiddleContext
+class FiddleContextGLBase : public FiddleContext
+{
+public:
+    ~FiddleContextGLBase() override
+    {
+        glDeleteFramebuffers(1, &m_zoomWindowFBO);
+    }
+
+    float dpiScale(GLFWwindow*) const override
+    {
+#if defined(__APPLE__) || defined(RIVE_WEBGL)
+        return 2;
+#else
+        return 1;
+#endif
+    }
+
+    void toggleZoomWindow() override
+    {
+        if (m_zoomWindowFBO)
+        {
+            glDeleteFramebuffers(1, &m_zoomWindowFBO);
+            m_zoomWindowFBO = 0;
+        }
+        else
+        {
+            GLuint tex;
+            glGenTextures(1, &tex);
+            glActiveTexture(GL_TEXTURE0);
+            glBindTexture(GL_TEXTURE_2D, tex);
+            glTexStorage2D(GL_TEXTURE_2D,
+                           1,
+                           GL_RGB8,
+                           kZoomWindowWidth,
+                           kZoomWindowHeight);
+
+            glGenFramebuffers(1, &m_zoomWindowFBO);
+            glBindFramebuffer(GL_FRAMEBUFFER, m_zoomWindowFBO);
+            glFramebufferTexture2D(GL_FRAMEBUFFER,
+                                   GL_COLOR_ATTACHMENT0,
+                                   GL_TEXTURE_2D,
+                                   tex,
+                                   0);
+            glBindFramebuffer(GL_FRAMEBUFFER, 0);
+
+            glDeleteTextures(1, &tex);
+        }
+    }
+
+    void end(GLFWwindow* window, std::vector<uint8_t>* pixelData) final
+    {
+        onEnd(pixelData);
+        if (m_zoomWindowFBO)
+        {
+            // Blit the zoom window.
+            double xd, yd;
+            glfwGetCursorPos(window, &xd, &yd);
+            xd *= dpiScale(window);
+            yd *= dpiScale(window);
+            int width = 0, height = 0;
+            glfwGetFramebufferSize(window, &width, &height);
+            int x = xd, y = height - yd;
+            glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_zoomWindowFBO);
+            glBlitFramebuffer(x - kZoomWindowWidth / 2,
+                              y - kZoomWindowHeight / 2,
+                              x + kZoomWindowWidth / 2,
+                              y + kZoomWindowHeight / 2,
+                              0,
+                              0,
+                              kZoomWindowWidth,
+                              kZoomWindowHeight,
+                              GL_COLOR_BUFFER_BIT,
+                              GL_NEAREST);
+            glBindFramebuffer(GL_FRAMEBUFFER, 0);
+
+            glEnable(GL_SCISSOR_TEST);
+            glScissor(0,
+                      0,
+                      kZoomWindowWidth * kZoomWindowScale + 2,
+                      kZoomWindowHeight * kZoomWindowScale + 2);
+            glClearColor(.6f, .6f, .6f, 1);
+            glClear(GL_COLOR_BUFFER_BIT);
+            glBindFramebuffer(GL_READ_FRAMEBUFFER, m_zoomWindowFBO);
+            glBlitFramebuffer(0,
+                              0,
+                              kZoomWindowWidth,
+                              kZoomWindowHeight,
+                              0,
+                              0,
+                              kZoomWindowWidth * kZoomWindowScale,
+                              kZoomWindowHeight * kZoomWindowScale,
+                              GL_COLOR_BUFFER_BIT,
+                              GL_NEAREST);
+            glDisable(GL_SCISSOR_TEST);
+        }
+    }
+
+protected:
+    virtual void onEnd(std::vector<uint8_t>* pixelData) = 0;
+
+private:
+    GLuint m_zoomWindowFBO = 0;
+};
+
+class FiddleContextGL : public FiddleContextGLBase
 {
 public:
     FiddleContextGL(FiddleContextOptions options)
@@ -124,49 +226,6 @@
         }
     }
 
-    ~FiddleContextGL() { glDeleteFramebuffers(1, &m_zoomWindowFBO); }
-
-    float dpiScale(GLFWwindow*) const override
-    {
-#if defined(__APPLE__) || defined(RIVE_WEBGL)
-        return 2;
-#else
-        return 1;
-#endif
-    }
-
-    void toggleZoomWindow() final
-    {
-        if (m_zoomWindowFBO)
-        {
-            glDeleteFramebuffers(1, &m_zoomWindowFBO);
-            m_zoomWindowFBO = 0;
-        }
-        else
-        {
-            GLuint tex;
-            glGenTextures(1, &tex);
-            glActiveTexture(GL_TEXTURE0);
-            glBindTexture(GL_TEXTURE_2D, tex);
-            glTexStorage2D(GL_TEXTURE_2D,
-                           1,
-                           GL_RGB8,
-                           kZoomWindowWidth,
-                           kZoomWindowHeight);
-
-            glGenFramebuffers(1, &m_zoomWindowFBO);
-            glBindFramebuffer(GL_FRAMEBUFFER, m_zoomWindowFBO);
-            glFramebufferTexture2D(GL_FRAMEBUFFER,
-                                   GL_COLOR_ATTACHMENT0,
-                                   GL_TEXTURE_2D,
-                                   tex,
-                                   0);
-            glBindFramebuffer(GL_FRAMEBUFFER, 0);
-
-            glDeleteTextures(1, &tex);
-        }
-    }
-
     rive::Factory* factory() override { return m_renderContext.get(); }
 
     rive::gpu::RenderContext* renderContextOrNull() override
@@ -206,7 +265,7 @@
         m_renderContext->flush({.renderTarget = m_renderTarget.get()});
     }
 
-    void end(GLFWwindow* window, std::vector<uint8_t>* pixelData) final
+    void onEnd(std::vector<uint8_t>* pixelData) override
     {
         flushPLSContext();
         m_renderContext->static_impl_cast<RenderContextGLImpl>()
@@ -225,53 +284,9 @@
                          GL_UNSIGNED_BYTE,
                          pixelData->data());
         }
-        if (m_zoomWindowFBO)
-        {
-            // Blit the zoom window.
-            double xd, yd;
-            glfwGetCursorPos(window, &xd, &yd);
-            xd *= dpiScale(window);
-            yd *= dpiScale(window);
-            int width = 0, height = 0;
-            glfwGetFramebufferSize(window, &width, &height);
-            int x = xd, y = height - yd;
-            glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_zoomWindowFBO);
-            glBlitFramebuffer(x - kZoomWindowWidth / 2,
-                              y - kZoomWindowHeight / 2,
-                              x + kZoomWindowWidth / 2,
-                              y + kZoomWindowHeight / 2,
-                              0,
-                              0,
-                              kZoomWindowWidth,
-                              kZoomWindowHeight,
-                              GL_COLOR_BUFFER_BIT,
-                              GL_NEAREST);
-            glBindFramebuffer(GL_FRAMEBUFFER, 0);
-
-            glEnable(GL_SCISSOR_TEST);
-            glScissor(0,
-                      0,
-                      kZoomWindowWidth * kZoomWindowScale + 2,
-                      kZoomWindowHeight * kZoomWindowScale + 2);
-            glClearColor(.6f, .6f, .6f, 1);
-            glClear(GL_COLOR_BUFFER_BIT);
-            glBindFramebuffer(GL_READ_FRAMEBUFFER, m_zoomWindowFBO);
-            glBlitFramebuffer(0,
-                              0,
-                              kZoomWindowWidth,
-                              kZoomWindowHeight,
-                              0,
-                              0,
-                              kZoomWindowWidth * kZoomWindowScale,
-                              kZoomWindowHeight * kZoomWindowScale,
-                              GL_COLOR_BUFFER_BIT,
-                              GL_NEAREST);
-            glDisable(GL_SCISSOR_TEST);
-        }
     }
 
 private:
-    GLuint m_zoomWindowFBO = 0;
     std::unique_ptr<RenderContext> m_renderContext;
     rcp<RenderTargetGL> m_renderTarget;
 };
@@ -282,4 +297,96 @@
     return std::make_unique<FiddleContextGL>(options);
 }
 
+#ifndef RIVE_SKIA
+
+std::unique_ptr<FiddleContext> FiddleContext::MakeGLSkia() { return nullptr; }
+
+#else
+
+#include "skia_factory.hpp"
+#include "skia_renderer.hpp"
+#include "skia/include/core/SkCanvas.h"
+#include "skia/include/core/SkSurface.h"
+#include "skia/include/gpu/GrDirectContext.h"
+#include "skia/include/gpu/gl/GrGLAssembleInterface.h"
+#include "skia/include/gpu/gl/GrGLInterface.h"
+#include "include/effects/SkImageFilters.h"
+
+static GrGLFuncPtr get_skia_gl_proc_address(void* ctx, const char name[])
+{
+    return glfwGetProcAddress(name);
+}
+
+class FiddleContextGLSkia : public FiddleContextGLBase
+{
+public:
+    FiddleContextGLSkia() :
+        m_grContext(GrDirectContext::MakeGL(
+            GrGLMakeAssembledInterface(nullptr, get_skia_gl_proc_address)))
+    {
+        if (!m_grContext)
+        {
+            fprintf(stderr, "GrDirectContext::MakeGL failed.\n");
+            abort();
+        }
+    }
+
+    rive::Factory* factory() override { return &m_factory; }
+
+    rive::gpu::RenderContext* renderContextOrNull() override { return nullptr; }
+
+    rive::gpu::RenderTarget* renderTargetOrNull() override { return nullptr; }
+
+    std::unique_ptr<Renderer> makeRenderer(int width, int height) override
+    {
+        GrBackendRenderTarget backendRT(width,
+                                        height,
+                                        1 /*samples*/,
+                                        0 /*stencilBits*/,
+                                        {0 /*fbo 0*/, GL_RGBA8});
+
+        SkSurfaceProps surfProps(0, kUnknown_SkPixelGeometry);
+
+        m_skSurface =
+            SkSurface::MakeFromBackendRenderTarget(m_grContext.get(),
+                                                   backendRT,
+                                                   kBottomLeft_GrSurfaceOrigin,
+                                                   kRGBA_8888_SkColorType,
+                                                   nullptr,
+                                                   &surfProps);
+        if (!m_skSurface)
+        {
+            fprintf(stderr, "SkSurface::MakeFromBackendRenderTarget failed.\n");
+            abort();
+        }
+        return std::make_unique<SkiaRenderer>(m_skSurface->getCanvas());
+    }
+
+    void begin(const RenderContext::FrameDescriptor& frameDescriptor) override
+    {
+        m_skSurface->getCanvas()->clear(frameDescriptor.clearColor);
+        m_grContext->resetContext();
+        m_skSurface->getCanvas()->save();
+    }
+
+    void onEnd(std::vector<uint8_t>* pixelData) override
+    {
+        m_skSurface->getCanvas()->restore();
+        m_skSurface->flush();
+    }
+
+    void flushPLSContext() override {}
+
+private:
+    SkiaFactory m_factory;
+    const sk_sp<GrDirectContext> m_grContext;
+    sk_sp<SkSurface> m_skSurface;
+};
+
+std::unique_ptr<FiddleContext> FiddleContext::MakeGLSkia()
+{
+    return std::make_unique<FiddleContextGLSkia>();
+}
+#endif
+
 #endif
diff --git a/renderer/path_fiddle/path_fiddle.cpp b/renderer/path_fiddle/path_fiddle.cpp
index 26f19a7..4dd2852 100644
--- a/renderer/path_fiddle/path_fiddle.cpp
+++ b/renderer/path_fiddle/path_fiddle.cpp
@@ -63,6 +63,7 @@
 constexpr static int kNumInteractivePts = sizeof(s_pts) / sizeof(*s_pts);
 
 static float s_strokeWidth = 70;
+static float featherPower = 0;
 
 static float2 s_translate;
 static float s_scale = 1;
@@ -184,6 +185,28 @@
 int fpsFrames = 0;
 static bool s_needsTitleUpdate = false;
 
+enum class API
+{
+    gl,
+    metal,
+    d3d,
+    dawn,
+    vulkan,
+};
+
+API api =
+#if defined(__APPLE__)
+    API::metal
+#elif defined(_WIN32)
+    API::d3d
+#else
+    API::gl
+#endif
+    ;
+
+bool angle = false;
+bool skia = false;
+
 static void key_callback(GLFWwindow* window,
                          int key,
                          int scancode,
@@ -202,7 +225,6 @@
                 s_forceAtomicMode = !s_forceAtomicMode;
                 fpsLastTime = 0;
                 fpsFrames = 0;
-                s_needsTitleUpdate = true;
                 break;
             case GLFW_KEY_D:
                 printf("static float s_scale = %f;\n", s_scale);
@@ -233,6 +255,12 @@
             case GLFW_KEY_EQUAL:
                 s_strokeWidth *= 1.5f;
                 break;
+            case GLFW_KEY_F:
+                if (!shift)
+                    ++featherPower;
+                else
+                    featherPower = std::max(featherPower - 1, 0.f);
+                break;
             case GLFW_KEY_W:
                 s_wireframe = !s_wireframe;
                 break;
@@ -244,9 +272,27 @@
                 s_doClose = !s_doClose;
                 break;
             case GLFW_KEY_S:
-                s_disableStroke = !s_disableStroke;
+                if (shift)
+                {
+                    // Toggle Skia.
+                    s_scenes.clear();
+                    s_artboards.clear();
+                    s_rivFile = nullptr;
+                    skia = !skia;
+                    s_fiddleContext = skia ? FiddleContext::MakeGLSkia()
+                                           : FiddleContext::MakeGLPLS();
+                    lastWidth = 0;
+                    lastHeight = 0;
+                    fpsLastTime = 0;
+                    fpsFrames = 0;
+                    s_needsTitleUpdate = true;
+                }
+                else
+                {
+                    s_disableStroke = !s_disableStroke;
+                }
                 break;
-            case GLFW_KEY_F:
+            case GLFW_KEY_I:
                 s_disableFill = !s_disableFill;
                 break;
             case GLFW_KEY_X:
@@ -326,28 +372,8 @@
 #endif
 }
 
-enum class API
-{
-    gl,
-    metal,
-    d3d,
-    dawn,
-    vulkan,
-};
-
-API api =
-#if defined(__APPLE__)
-    API::metal
-#elif defined(_WIN32)
-    API::d3d
-#else
-    API::gl
-#endif
-    ;
-
-bool angle = false;
-
 std::unique_ptr<Renderer> renderer;
+const char* s_rivName = nullptr;
 
 void riveMainLoop();
 
@@ -386,7 +412,6 @@
     free(hash);
 #endif
 
-    const char* rivName = nullptr;
     for (int i = 1; i < argc; i++)
     {
         if (!strcmp(argv[i], "--gl"))
@@ -504,6 +529,10 @@
             angle = true;
         }
 #endif
+        else if (!strcmp(argv[i], "--skia"))
+        {
+            skia = true;
+        }
         else if (sscanf(argv[i], "-a%i", &s_animation))
         {
             // Already updated s_animation.
@@ -546,7 +575,7 @@
         }
         else
         {
-            rivName = argv[i];
+            s_rivName = argv[i];
         }
     }
 
@@ -630,7 +659,8 @@
             s_fiddleContext = FiddleContext::MakeVulkanPLS(s_options);
             break;
         case API::gl:
-            s_fiddleContext = FiddleContext::MakeGLPLS();
+            s_fiddleContext =
+                skia ? FiddleContext::MakeGLSkia() : FiddleContext::MakeGLPLS();
             break;
     }
     if (!s_fiddleContext)
@@ -638,15 +668,6 @@
         fprintf(stderr, "Failed to create a fiddle context.\n");
         abort();
     }
-    Factory* factory = s_fiddleContext->factory();
-
-    if (rivName)
-    {
-        std::ifstream rivStream(rivName, std::ios::binary);
-        std::vector<uint8_t> rivBytes(std::istreambuf_iterator<char>(rivStream),
-                                      {});
-        s_rivFile = File::import(rivBytes, factory);
-    }
 
 #ifndef __EMSCRIPTEN__
     if (api == API::gl)
@@ -661,7 +682,7 @@
         {
             glfwSwapBuffers(s_window);
         }
-        if (s_rivFile)
+        if (s_rivName)
         {
             glfwPollEvents();
         }
@@ -691,7 +712,14 @@
     {
         title << " (x" << instances << " instances)";
     }
-    title << " | Rive Renderer";
+    if (skia)
+    {
+        title << " | SKIA Renderer";
+    }
+    else
+    {
+        title << " | RIVE Renderer";
+    }
     if (s_msaa)
     {
         title << " (msaa" << s_msaa << ')';
@@ -706,6 +734,14 @@
 
 void riveMainLoop()
 {
+    if (s_rivName && !s_rivFile)
+    {
+        std::ifstream rivStream(s_rivName, std::ios::binary);
+        std::vector<uint8_t> rivBytes(std::istreambuf_iterator<char>(rivStream),
+                                      {});
+        s_rivFile = File::import(rivBytes, s_fiddleContext->factory());
+    }
+
 #ifdef __EMSCRIPTEN__
     {
         // Fit the canvas to the browser window size.
@@ -749,7 +785,7 @@
     s_fiddleContext->begin({
         .renderTargetWidth = static_cast<uint32_t>(width),
         .renderTargetHeight = static_cast<uint32_t>(height),
-        .clearColor = 0xff404040,
+        .clearColor = 0xff303030,
         .msaaSampleCount = s_msaa,
         .disableRasterOrdering = s_forceAtomicMode,
         .wireframe = s_wireframe,
@@ -820,34 +856,47 @@
         auto path = factory->makeRenderPath(rawPath, FillRule::nonZero);
 
         auto fillPaint = factory->makeRenderPaint();
-        fillPaint->style(RenderPaintStyle::fill);
+        if (featherPower != 0)
+        {
+            fillPaint->feather(powf(1.5f, featherPower));
+        }
         fillPaint->color(0xd0ffffff);
 
-        auto strokePaint = factory->makeRenderPaint();
-        strokePaint->style(RenderPaintStyle::stroke);
-        strokePaint->color(0x8000ffff);
-        strokePaint->thickness(s_strokeWidth);
-        strokePaint->join(s_join);
-        strokePaint->cap(s_cap);
-
         renderer->drawPath(path.get(), fillPaint.get());
-        renderer->drawPath(path.get(), strokePaint.get());
 
-        // Draw the interactive points.
-        auto pointPaint = factory->makeRenderPaint();
-        pointPaint->style(RenderPaintStyle::stroke);
-        pointPaint->color(0xff0000ff);
-        pointPaint->thickness(14);
-        pointPaint->cap(StrokeCap::round);
-
-        auto pointPath = factory->makeEmptyRenderPath();
-        for (int i : {1, 2, 4, 6, 7})
+        if (!s_disableStroke)
         {
-            float2 pt = s_pts[i] + s_translate;
-            pointPath->moveTo(pt.x, pt.y);
-        }
+            auto strokePaint = factory->makeRenderPaint();
+            strokePaint->style(RenderPaintStyle::stroke);
+            strokePaint->color(0x8000ffff);
+            strokePaint->thickness(s_strokeWidth);
+            if (featherPower != 0)
+            {
+                strokePaint->feather(powf(1.5f, featherPower));
+            }
+            strokePaint->join(s_join);
+            strokePaint->cap(s_cap);
 
-        renderer->drawPath(pointPath.get(), pointPaint.get());
+            renderer->drawPath(path.get(), strokePaint.get());
+
+            // Draw the interactive points.
+            auto pointPaint = factory->makeRenderPaint();
+            pointPaint->style(RenderPaintStyle::stroke);
+            pointPaint->color(0xff0000ff);
+            pointPaint->thickness(14);
+            pointPaint->cap(StrokeCap::round);
+            pointPaint->join(StrokeJoin::round);
+
+            auto pointPath = factory->makeEmptyRenderPath();
+            for (int i : {1, 2, 4, 6, 7})
+            {
+                float2 pt = s_pts[i] + s_translate;
+                pointPath->moveTo(pt.x, pt.y);
+                pointPath->close();
+            }
+
+            renderer->drawPath(pointPath.get(), pointPaint.get());
+        }
     }
 
     s_fiddleContext->end(s_window);
diff --git a/renderer/premake5.lua b/renderer/premake5.lua
index 36e0393..bded3c2 100644
--- a/renderer/premake5.lua
+++ b/renderer/premake5.lua
@@ -17,6 +17,7 @@
         'include',
         RIVE_RUNTIME_DIR .. '/include',
         'include',
+        RIVE_RUNTIME_DIR .. '/renderer/src',
     })
     externalincludedirs({
         'glad',
diff --git a/renderer/src/d3d/render_context_d3d_impl.cpp b/renderer/src/d3d/render_context_d3d_impl.cpp
index 9ab656b..ae1bb1e 100644
--- a/renderer/src/d3d/render_context_d3d_impl.cpp
+++ b/renderer/src/d3d/render_context_d3d_impl.cpp
@@ -249,6 +249,30 @@
                                            &m_colorRampPixelShader));
     }
 
+    // Create the feather texture.
+    m_featherTexture = makeSimple2DTexture(DXGI_FORMAT_R16_FLOAT,
+                                           gpu::GAUSSIAN_TABLE_SIZE,
+                                           1,
+                                           1,
+                                           D3D11_BIND_SHADER_RESOURCE);
+    D3D11_BOX box;
+    box.left = 0;
+    box.right = gpu::GAUSSIAN_TABLE_SIZE;
+    box.top = 0;
+    box.bottom = 1;
+    box.front = 0;
+    box.back = 1;
+    m_gpuContext->UpdateSubresource(m_featherTexture.Get(),
+                                    0,
+                                    &box,
+                                    gpu::g_gaussianIntegralTableF16,
+                                    sizeof(gpu::g_gaussianIntegralTableF16),
+                                    0);
+    VERIFY_OK(m_gpu->CreateShaderResourceView(
+        m_featherTexture.Get(),
+        NULL,
+        m_featherTextureSRV.ReleaseAndGetAddressOf()));
+
     // Compile the tessellation shaders.
     {
         std::ostringstream s;
@@ -366,7 +390,7 @@
                                 m_imageDrawUniforms.ReleaseAndGetAddressOf()));
     }
 
-    // Create a linear sampler for the gradient texture.
+    // Create a linear sampler for the gradient & feather textures.
     D3D11_SAMPLER_DESC linearSamplerDesc;
     linearSamplerDesc.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT;
     linearSamplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
@@ -396,10 +420,14 @@
         m_gpu->CreateSamplerState(&mipmapSamplerDesc,
                                   m_mipmapSampler.ReleaseAndGetAddressOf()));
 
-    ID3D11SamplerState* samplers[2] = {m_linearSampler.Get(),
-                                       m_mipmapSampler.Get()};
-    static_assert(IMAGE_TEXTURE_IDX == GRAD_TEXTURE_IDX + 1);
-    m_gpuContext->PSSetSamplers(GRAD_TEXTURE_IDX, 2, samplers);
+    ID3D11SamplerState* samplers[3] = {
+        m_linearSampler.Get(),
+        m_linearSampler.Get(),
+        m_mipmapSampler.Get(),
+    };
+    static_assert(FEATHER_TEXTURE_IDX == GRAD_TEXTURE_IDX + 1);
+    static_assert(IMAGE_TEXTURE_IDX == FEATHER_TEXTURE_IDX + 1);
+    m_gpuContext->PSSetSamplers(GRAD_TEXTURE_IDX, 3, samplers);
 
     D3D11_BLEND_DESC srcOverDesc{};
     srcOverDesc.RenderTarget[0].BlendEnable = TRUE;
@@ -1110,6 +1138,7 @@
         switch (drawType)
         {
             case DrawType::midpointFanPatches:
+            case DrawType::midpointFanCenterAAPatches:
             case DrawType::outerCurvePatches:
                 s << "#define " << GLSL_DRAW_PATH << '\n';
                 break;
@@ -1145,6 +1174,7 @@
         switch (drawType)
         {
             case DrawType::midpointFanPatches:
+            case DrawType::midpointFanCenterAAPatches:
             case DrawType::outerCurvePatches:
                 s << gpu::glsl::draw_path_common << '\n';
                 s << (interlockMode == gpu::InterlockMode::rasterOrdering
@@ -1192,6 +1222,7 @@
             switch (drawType)
             {
                 case DrawType::midpointFanPatches:
+                case DrawType::midpointFanCenterAAPatches:
                 case DrawType::outerCurvePatches:
                     layoutDesc[0] = {GLSL_a_patchVertexData,
                                      0,
@@ -1640,9 +1671,10 @@
     m_gpuContext->VSSetShaderResources(TESS_VERTEX_TEXTURE_IDX,
                                        1,
                                        m_tessTextureSRV.GetAddressOf());
-    m_gpuContext->PSSetShaderResources(GRAD_TEXTURE_IDX,
-                                       1,
-                                       m_gradTextureSRV.GetAddressOf());
+    ID3D11ShaderResourceView* gradFeatherViews[] = {m_gradTextureSRV.Get(),
+                                                    m_featherTextureSRV.Get()};
+    m_gpuContext->PSSetShaderResources(GRAD_TEXTURE_IDX, 2, gradFeatherViews);
+    assert(FEATHER_TEXTURE_IDX == GRAD_TEXTURE_IDX + 1);
 
     const char* const imageDrawUniformData =
         heap_buffer_contents(imageDrawUniformBufferRing());
@@ -1694,6 +1726,7 @@
         switch (drawType)
         {
             case DrawType::midpointFanPatches:
+            case DrawType::midpointFanCenterAAPatches:
             case DrawType::outerCurvePatches:
             {
                 m_gpuContext->IASetPrimitiveTopology(
diff --git a/renderer/src/draw.cpp b/renderer/src/draw.cpp
index e627686..944e883 100644
--- a/renderer/src/draw.cpp
+++ b/renderer/src/draw.cpp
@@ -17,7 +17,6 @@
 {
 namespace
 {
-
 // The final segment in an outerCurve patch is a bowtie join.
 constexpr static size_t kJoinSegmentCount = 1;
 constexpr static size_t kPatchSegmentCountExcludingJoin =
@@ -28,7 +27,7 @@
     (kMaxParametricSegments + kPatchSegmentCountExcludingJoin - 1) /
     kPatchSegmentCountExcludingJoin;
 
-static uint32_t FindSubdivisionCount(
+static uint32_t find_subdivision_count(
     const Vec2D pts[],
     const wangs_formula::VectorXform& vectorXform)
 {
@@ -39,15 +38,16 @@
         math::clamp(numSubdivisions, 1, kMaxCurveSubdivisions));
 }
 
-constexpr static int kNumSegmentsInMiterOrBevelJoin = 5;
-constexpr static int kStrokeStyleFlag = 8;
-constexpr static int kRoundJoinStyleFlag = kStrokeStyleFlag << 1;
-RIVE_ALWAYS_INLINE constexpr int style_flags(bool isStroked,
+constexpr static int NUM_SEGMENTS_IN_MITER_OR_BEVEL_JOIN = 5;
+constexpr static int STROKE_OR_FEATHER_STYLE_FLAG = 8;
+constexpr static int ROUND_JOIN_STYLE_FLAG = STROKE_OR_FEATHER_STYLE_FLAG << 1;
+RIVE_ALWAYS_INLINE constexpr int style_flags(bool isStrokeOrFeather,
                                              bool roundJoinStroked)
 {
-    int styleFlags = (isStroked << 3) | (roundJoinStroked << 4);
-    assert(bool(styleFlags & kStrokeStyleFlag) == isStroked);
-    assert(bool(styleFlags & kRoundJoinStyleFlag) == roundJoinStroked);
+    int styleFlags = (isStrokeOrFeather << 3) | (roundJoinStroked << 4);
+    assert(bool(styleFlags & STROKE_OR_FEATHER_STYLE_FLAG) ==
+           isStrokeOrFeather);
+    assert(bool(styleFlags & ROUND_JOIN_STYLE_FLAG) == roundJoinStroked);
     return styleFlags;
 }
 
@@ -56,28 +56,38 @@
 enum class StyledVerb
 {
     filledMove = static_cast<int>(PathVerb::move),
-    strokedMove = kStrokeStyleFlag | static_cast<int>(PathVerb::move),
-    roundJoinStrokedMove = kStrokeStyleFlag | kRoundJoinStyleFlag |
+    strokedMove =
+        STROKE_OR_FEATHER_STYLE_FLAG | static_cast<int>(PathVerb::move),
+    roundJoinStrokedMove = STROKE_OR_FEATHER_STYLE_FLAG |
+                           ROUND_JOIN_STYLE_FLAG |
                            static_cast<int>(PathVerb::move),
 
     filledLine = static_cast<int>(PathVerb::line),
-    strokedLine = kStrokeStyleFlag | static_cast<int>(PathVerb::line),
-    roundJoinStrokedLine = kStrokeStyleFlag | kRoundJoinStyleFlag |
+    strokedLine =
+        STROKE_OR_FEATHER_STYLE_FLAG | static_cast<int>(PathVerb::line),
+    roundJoinStrokedLine = STROKE_OR_FEATHER_STYLE_FLAG |
+                           ROUND_JOIN_STYLE_FLAG |
                            static_cast<int>(PathVerb::line),
 
     filledQuad = static_cast<int>(PathVerb::quad),
-    strokedQuad = kStrokeStyleFlag | static_cast<int>(PathVerb::quad),
-    roundJoinStrokedQuad = kStrokeStyleFlag | kRoundJoinStyleFlag |
+    strokedQuad =
+        STROKE_OR_FEATHER_STYLE_FLAG | static_cast<int>(PathVerb::quad),
+    roundJoinStrokedQuad = STROKE_OR_FEATHER_STYLE_FLAG |
+                           ROUND_JOIN_STYLE_FLAG |
                            static_cast<int>(PathVerb::quad),
 
     filledCubic = static_cast<int>(PathVerb::cubic),
-    strokedCubic = kStrokeStyleFlag | static_cast<int>(PathVerb::cubic),
-    roundJoinStrokedCubic = kStrokeStyleFlag | kRoundJoinStyleFlag |
+    strokedCubic =
+        STROKE_OR_FEATHER_STYLE_FLAG | static_cast<int>(PathVerb::cubic),
+    roundJoinStrokedCubic = STROKE_OR_FEATHER_STYLE_FLAG |
+                            ROUND_JOIN_STYLE_FLAG |
                             static_cast<int>(PathVerb::cubic),
 
     filledClose = static_cast<int>(PathVerb::close),
-    strokedClose = kStrokeStyleFlag | static_cast<int>(PathVerb::close),
-    roundJoinStrokedClose = kStrokeStyleFlag | kRoundJoinStyleFlag |
+    strokedClose =
+        STROKE_OR_FEATHER_STYLE_FLAG | static_cast<int>(PathVerb::close),
+    roundJoinStrokedClose = STROKE_OR_FEATHER_STYLE_FLAG |
+                            ROUND_JOIN_STYLE_FLAG |
                             static_cast<int>(PathVerb::close),
 };
 RIVE_ALWAYS_INLINE constexpr StyledVerb styled_verb(PathVerb verb,
@@ -311,7 +321,7 @@
         case StrokeJoin::miter:
             return MITER_REVERT_JOIN_CONTOUR_FLAG;
         case StrokeJoin::round:
-            return 0;
+            return ROUND_JOIN_CONTOUR_FLAG;
         case StrokeJoin::bevel:
             return BEVEL_JOIN_CONTOUR_FLAG;
     }
@@ -368,6 +378,8 @@
 {
     assert(path != nullptr);
     assert(paint != nullptr);
+
+    // Compute the screen-space bounding box.
     AABB mappedBounds;
     if (context->frameInterlockMode() == gpu::InterlockMode::rasterOrdering)
     {
@@ -384,17 +396,28 @@
     }
     assert(mappedBounds.width() >= 0);
     assert(mappedBounds.height() >= 0);
-    if (paint->getIsStroked())
+    if (paint->getIsStroked() || paint->getFeather() > 0)
     {
-        // Outset the path's bounding box to account for stroking.
-        float strokeOutset = paint->getThickness() * .5f;
-        if (paint->getJoin() == StrokeJoin::miter)
+        // Outset the path's bounding box to account for stroking & feathering.
+        float strokeOutset = 0;
+        if (paint->getIsStroked())
         {
-            strokeOutset *= 4;
+            strokeOutset = paint->getThickness() * .5f;
+            if (paint->getJoin() == StrokeJoin::miter)
+            {
+                // Miter joins may be longer than the stroke radius.
+                strokeOutset *= RIVE_MITER_LIMIT;
+            }
+            else if (paint->getCap() == StrokeCap::square)
+            {
+                // The diagonal of a square cap is longer than the stroke
+                // radius.
+                strokeOutset *= math::SQRT2;
+            }
         }
-        else if (paint->getCap() == StrokeCap::square)
+        if (paint->getFeather() != 0)
         {
-            strokeOutset *= math::SQRT2;
+            strokeOutset += paint->getFeather() * (FEATHER_TEXTURE_STDDEVS / 2);
         }
         AABB strokePixelOutset =
             matrix.mapBoundingBox({0, 0, strokeOutset, strokeOutset});
@@ -405,6 +428,7 @@
         mappedBounds = mappedBounds.outset(strokePixelOutset.width() + 1,
                                            strokePixelOutset.height() + 1);
     }
+
     IAABB pixelBounds = mappedBounds.roundOut();
     bool doTriangulation = false;
     const AABB& localBounds = path->getBounds();
@@ -412,14 +436,17 @@
     {
         return DrawUniquePtr();
     }
-    if (!paint->getIsStroked())
+    if (!paint->getIsStroked() && paint->getFeather() == 0)
     {
         // Use interior triangulation to draw filled paths if they're large
         // enough to benefit from it.
+        //
+        // FIXME! Implement interior triangulation for feathers.
+        //
         // FIXME! Implement interior triangulation in msaa mode.
         if (context->frameInterlockMode() != gpu::InterlockMode::msaa &&
             path->getRawPath().verbs().count() < 1000 &&
-            gpu::FindTransformedArea(localBounds, matrix) > 512.f * 512.f)
+            gpu::find_transformed_area(localBounds, matrix) > 512.f * 512.f)
         {
             doTriangulation = true;
         }
@@ -473,10 +500,22 @@
     assert(m_pathRef != nullptr);
     assert(!m_pathRef->getRawPath().empty());
     assert(paint != nullptr);
+
     if (m_blendMode == BlendMode::srcOver && paint->getIsOpaque())
     {
         m_drawContents |= gpu::DrawContents::opaquePaint;
     }
+
+    if (paint->getFeather() > 0 &&
+        // MSAA doesn't support feather yet.
+        interlockMode != gpu::InterlockMode::msaa)
+    {
+        m_featherRadius = paint->getFeather() * (FEATHER_TEXTURE_STDDEVS / 2);
+        assert(!std::isnan(m_featherRadius)); // These should get culled in
+                                              // RiveRenderer::drawPath().
+        assert(m_featherRadius > 0);
+    }
+
     if (paint->getIsStroked())
     {
         m_drawContents |= gpu::DrawContents::stroke;
@@ -489,29 +528,36 @@
                                              // RiveRenderer::drawPath().
         assert(m_strokeRadius > 0);
     }
-    else if (initialFillRule == FillRule::clockwise ||
-             frameDesc.clockwiseFillOverride)
+    else
     {
-        m_drawContents |= gpu::DrawContents::clockwiseFill;
-    }
-    else if (initialFillRule == FillRule::nonZero)
-    {
-        m_drawContents |= gpu::DrawContents::nonZeroFill;
-    }
-    else if (initialFillRule == FillRule::evenOdd)
-    {
-        m_drawContents |= gpu::DrawContents::evenOddFill;
-    }
-    if (paint->getType() == gpu::PaintType::clipUpdate)
-    {
-        m_drawContents |= gpu::DrawContents::clipUpdate;
-        if (paint->getSimpleValue().outerClipID != 0)
+        if (m_featherRadius)
         {
-            m_drawContents |= gpu::DrawContents::activeClip;
+            m_drawContents |= gpu::DrawContents::featheredFill;
+        }
+        if (initialFillRule == FillRule::clockwise ||
+            frameDesc.clockwiseFillOverride)
+        {
+            m_drawContents |= gpu::DrawContents::clockwiseFill;
+        }
+        else if (initialFillRule == FillRule::nonZero)
+        {
+            m_drawContents |= gpu::DrawContents::nonZeroFill;
+        }
+        else if (initialFillRule == FillRule::evenOdd)
+        {
+            m_drawContents |= gpu::DrawContents::evenOddFill;
+        }
+        if (paint->getType() == gpu::PaintType::clipUpdate)
+        {
+            m_drawContents |= gpu::DrawContents::clipUpdate;
+            if (paint->getSimpleValue().outerClipID != 0)
+            {
+                m_drawContents |= gpu::DrawContents::activeClip;
+            }
         }
     }
 
-    if (isStroked())
+    if (isStroke())
     {
         // Stroke triangles are always forward.
         m_contourDirections = gpu::ContourDirections::forward;
@@ -578,7 +624,9 @@
     m_simplePaintValue = paint->getSimpleValue();
     RIVE_DEBUG_CODE(m_pathRef->lockRawPathMutations();)
     RIVE_DEBUG_CODE(m_rawPathMutationID = m_pathRef->getRawPathMutationID();)
-    assert(isStroked() == (strokeRadius() > 0));
+    assert(isStroke() == (strokeRadius() > 0));
+    assert(isFeatheredFill() == (!isStroke() && featherRadius() > 0));
+    assert(!isFeatheredFill() || featherRadius() > 0);
 }
 
 RiveRenderPathDraw::RiveRenderPathDraw(
@@ -603,11 +651,11 @@
 
 {
     m_resourceCounts = from.m_resourceCounts;
-    m_strokeMatrixMaxScale = from.m_strokeMatrixMaxScale;
 
-    if (isStroked())
+    if (isStrokeOrFeather())
     {
         m_strokeMatrixMaxScale = from.m_strokeMatrixMaxScale;
+        m_polarSegmentsPerRadian = from.m_polarSegmentsPerRadian;
         m_strokeJoin = from.m_strokeJoin;
         m_strokeCap = from.m_strokeCap;
     }
@@ -643,9 +691,28 @@
     assert(type() == Type::midpointFanPath);
     assert(simd::all(m_resourceCounts.toVec() == 0)); // Only call init() once.
 
-    if (isStroked())
+    if (isStrokeOrFeather())
     {
         m_strokeMatrixMaxScale = m_matrix.findMaxScale();
+
+        float r_ = 0;
+        if (m_featherRadius != 0)
+        {
+            r_ = m_featherRadius * m_strokeMatrixMaxScale;
+            // The Gaussian distribution is very blurry on the outer edges.
+            // Once the radius crosses a certain threshold, we don't ever
+            // need more polar segments.
+            constexpr static float FEATHER_MAX_SCREEN_SPACE_RADIUS =
+                kPolarPrecision * 3;
+            r_ = std::min(r_, FEATHER_MAX_SCREEN_SPACE_RADIUS);
+        }
+        if (isStroke())
+        {
+            r_ += m_strokeRadius * m_strokeMatrixMaxScale;
+        }
+        m_polarSegmentsPerRadian =
+            math::calc_polar_segments_per_radian<kPolarPrecision>(r_);
+
         m_strokeJoin = paint->getJoin();
         m_strokeCap = paint->getCap();
     }
@@ -668,10 +735,10 @@
     size_t pathMaxLinesOrCurvesBeforeChops = rawPath.verbs().size() - 1;
     // Stroked cubics can be chopped into a maximum of 5 segments.
     size_t pathMaxLinesOrCurvesAfterChops =
-        isStroked() ? pathMaxLinesOrCurvesBeforeChops * 5
-                    : pathMaxLinesOrCurvesBeforeChops;
+        isStrokeOrFeather() ? pathMaxLinesOrCurvesBeforeChops * 5
+                            : pathMaxLinesOrCurvesBeforeChops;
     maxCurves += pathMaxLinesOrCurvesAfterChops;
-    if (isStroked())
+    if (isStrokeOrFeather())
     {
         maxStrokedCurvesBeforeChops += pathMaxLinesOrCurvesBeforeChops;
         maxRotations += pathMaxLinesOrCurvesAfterChops;
@@ -696,12 +763,12 @@
     // +3 for each contour because we align each contour's curves and rotations
     // on multiples of 4.
     size_t maxPaddedRotations =
-        isStroked() ? maxRotations + contourCount * 3 : 0;
+        isStrokeOrFeather() ? maxRotations + contourCount * 3 : 0;
     size_t maxPaddedCurves = maxCurves + contourCount * 3;
 
     // Reserve intermediate space for the polar segment counts of each curve and
     // round join.
-    if (isStroked())
+    if (isStrokeOrFeather())
     {
         m_numChops.reset(context->numChopsAllocator(), maxChops);
         m_chopVertices.reset(context->chopVerticesAllocator(), maxChopVertices);
@@ -713,6 +780,19 @@
     m_parametricSegmentCounts =
         context->parametricSegmentCountsAllocator().alloc(maxPaddedCurves);
 
+    float parametricPrecision = gpu::kParametricPrecision;
+    if (m_featherRadius > 1)
+    {
+        // Once the blur radius is above ~50 pixels, we don't have to tessellate
+        // within 1/4px of the edge anymore.
+        // At this point, tessellate within strokeRadius/200 pixels of the edge.
+        // (parametricPrecision == 1/tolerance.)
+        parametricPrecision =
+            std::min(parametricPrecision * 100.f /
+                         (m_featherRadius * m_strokeMatrixMaxScale),
+                     parametricPrecision);
+    }
+
     size_t lineCount = 0;
     size_t unpaddedCurveCount = 0;
     size_t unpaddedRotationCount = 0;
@@ -722,16 +802,16 @@
     // every path in the batch, and begin counting tessellated vertices.
     size_t contourIdx = 0;
     size_t curveIdx = 0;
-    size_t rotationIdx =
-        0; // We measure rotations on both curves and round joins.
-    bool roundJoinStroked = isStroked() && m_strokeJoin == StrokeJoin::round;
+    // We measure rotations on both curves and round joins.
+    size_t rotationIdx = 0;
+    bool roundJoinStroked = isStroke() && m_strokeJoin == StrokeJoin::round;
     wangs_formula::VectorXform vectorXform(m_matrix);
     RawPath::Iter startOfContour = rawPath.begin();
     RawPath::Iter end = rawPath.end();
-    int preChopVerbCount =
-        0; // Original number of lines and curves, before chopping.
+    // Original number of lines and curves, before chopping.
+    int preChopVerbCount = 0;
     Vec2D endpointsSum{};
-    bool closed = !isStroked();
+    bool closed = !isStroke();
     Vec2D lastTangent = {0, 1};
     Vec2D firstTangent = {0, 1};
     size_t roundJoinCount = 0;
@@ -786,7 +866,7 @@
             curveIdx,
             contourFirstRotationIdx,
             rotationIdx,
-            isStroked() ? Vec2D() : endpointsSum * (1.f / preChopVerbCount),
+            isStroke() ? Vec2D() : endpointsSum * (1.f / preChopVerbCount),
             closed,
             strokeJoinCount,
             0,                 // strokeCapSegmentCount
@@ -800,7 +880,7 @@
         contourFirstRotationIdx = rotationIdx =
             math::round_up_to_multiple_of<4>(rotationIdx);
     };
-    const int styleFlags = style_flags(isStroked(), roundJoinStroked);
+    const int styleFlags = style_flags(isStrokeOrFeather(), roundJoinStroked);
     for (RawPath::Iter iter = startOfContour; iter != end; ++iter)
     {
         switch (styled_verb(iter.verb(), styleFlags))
@@ -815,7 +895,7 @@
                 }
                 preChopVerbCount = 0;
                 endpointsSum = {0, 0};
-                closed = !isStroked();
+                closed = !isStroke();
                 lastTangent = {0, 1};
                 firstTangent = {0, 1};
                 roundJoinCount = 0;
@@ -885,20 +965,21 @@
                 // convex), and do not rotate more than 180 degrees. This is
                 // required by the GPU parametric/polar sorter.
                 float t[2];
-                bool areCusps;
+                bool areCusps = false;
                 uint8_t numChops =
-                    math::find_cubic_convex_180_chops(p, t, &areCusps);
+                    isStroke()
+                        ? math::find_cubic_convex_180_chops(p, t, &areCusps)
+                        : 0; // Feathers already got chopped.
                 uint8_t chopKey = chop_key(areCusps, numChops);
                 m_numChops.push_back(chopKey);
                 Vec2D localChopBuffer[16];
                 switch (chopKey)
                 {
                     case cusp_chop_key(2): // 2 cusps
-                    case cusp_chop_key(
-                        1): // 1 cusp
-                            // We have to chop carefully around stroked cusps in
-                            // order to avoid rendering artifacts. Luckily,
-                            // cusps are extremely rare in real-world content.
+                    case cusp_chop_key(1): // 1 cusp
+                        // We have to chop carefully around stroked cusps in
+                        // order to avoid rendering artifacts. Luckily, cusps
+                        // are extremely rare in real-world content.
                         m_chopVertices.push_back() = {t[0], t[1]};
                         chop_cubic_around_cusps(p,
                                                 localChopBuffer,
@@ -929,7 +1010,7 @@
                      p += 3, ++curveIdx, ++rotationIdx)
                 {
                     float n4 = wangs_formula::cubic_pow4(p,
-                                                         kParametricPrecision,
+                                                         parametricPrecision,
                                                          vectorXform);
                     // Record n^4 for now. This will get resolved later.
                     assert(curveIdx < maxPaddedCurves);
@@ -947,7 +1028,7 @@
                 ++preChopVerbCount;
                 endpointsSum += p[3];
                 float n4 = wangs_formula::cubic_pow4(p,
-                                                     kParametricPrecision,
+                                                     parametricPrecision,
                                                      vectorXform);
                 // Record n^4 for now. This will get resolved later.
                 assert(curveIdx < maxPaddedCurves);
@@ -970,11 +1051,11 @@
     assert(curveIdx % 4 == 0);
     // Because we write polar segment counts in batches of 4.
     assert(rotationIdx % 4 == 0);
-    assert(isStroked() || maxPaddedRotations == 0);
-    assert(isStroked() || rotationIdx == 0);
+    assert(isStrokeOrFeather() || maxPaddedRotations == 0);
+    assert(isStrokeOrFeather() || rotationIdx == 0);
 
     // Return any data we conservatively allocated but did not use.
-    if (isStroked())
+    if (isStrokeOrFeather())
     {
         m_numChops.shrinkToFit(context->numChopsAllocator(), maxChops);
         m_chopVertices.shrinkToFit(context->chopVerticesAllocator(),
@@ -1021,13 +1102,10 @@
             contourVertexCount -= m_parametricSegmentCounts[j];
         }
 
-        if (isStroked())
+        if (isStrokeOrFeather())
         {
             // Finish calculating and counting polar segments for each stroked
             // curve and round join.
-            const float r_ = m_strokeRadius * m_strokeMatrixMaxScale;
-            const float polarSegmentsPerRad =
-                math::calc_polar_segments_per_radian<kPolarPrecision>(r_);
             for (j = contour->firstRotationIdx; j < contour->endRotationIdx;
                  j += 4)
             {
@@ -1045,7 +1123,7 @@
                 cosTheta = simd::clamp(cosTheta, float4(-1), float4(1));
                 float4 theta = simd::fast_acos(cosTheta);
                 // Find polar segment counts from the rotation angles.
-                float4 n = simd::ceil(theta * polarSegmentsPerRad);
+                float4 n = simd::ceil(theta * m_polarSegmentsPerRadian);
                 n = simd::clamp(n, float4(1), float4(kMaxPolarSegments));
                 uint4 n_ = simd::cast<uint32_t>(n);
                 assert(j + 4 <= rotationIdx);
@@ -1073,7 +1151,19 @@
             }
 
             // Count joins.
-            if (m_strokeJoin == StrokeJoin::round)
+            if (!isStroke())
+            {
+                assert(isFeatheredFill());
+                uint32_t numSegmentsInFeatherJoin =
+                    static_cast<uint32_t>(std::clamp<float>(
+                        ceilf(m_polarSegmentsPerRadian * math::PI),
+                        2,
+                        kMaxPolarSegments - 2)) +
+                    5;
+                contourVertexCount +=
+                    contour->strokeJoinCount * (numSegmentsInFeatherJoin - 1);
+            }
+            else if (m_strokeJoin == StrokeJoin::round)
             {
                 // Round joins share their beginning and ending vertices with
                 // the curve on either side. Therefore, the number of vertices
@@ -1088,20 +1178,20 @@
                 // their beginning and ending vertices with the curve on either
                 // side).
                 contourVertexCount += contour->strokeJoinCount *
-                                      (kNumSegmentsInMiterOrBevelJoin - 1);
+                                      (NUM_SEGMENTS_IN_MITER_OR_BEVEL_JOIN - 1);
             }
 
             // Count stroke caps, if any.
             bool empty = contour->endLineIdx == contourFirstLineIdx &&
                          contour->endCurveIdx == contour->firstCurveIdx;
             StrokeCap cap;
-            bool needsCaps;
+            bool needsCaps = false;
             if (!empty)
             {
                 cap = m_strokeCap;
                 needsCaps = !contour->closed;
             }
-            else
+            else if (isStroke())
             {
                 cap = empty_stroke_cap(contour->closed,
                                        m_strokeJoin,
@@ -1116,7 +1206,7 @@
                 {
                     // Round caps rotate 180 degrees.
                     float strokeCapSegmentCount =
-                        ceilf(polarSegmentsPerRad * math::PI);
+                        ceilf(m_polarSegmentsPerRadian * math::PI);
                     // +2 because round caps emulated as joins need to emit
                     // vertices at T=0 and T=1, unlike normal round joins.
                     strokeCapSegmentCount += 2;
@@ -1129,7 +1219,7 @@
                 else
                 {
                     contour->strokeCapSegmentCount =
-                        kNumSegmentsInMiterOrBevelJoin;
+                        NUM_SEGMENTS_IN_MITER_OR_BEVEL_JOIN;
                 }
                 // PLS expects all patches to have >0 tessellation vertices, so
                 // for the case of an empty patch with a stroke cap,
@@ -1201,7 +1291,7 @@
 {
     assert(type() == Type::interiorTriangulationPath);
     assert(simd::all(m_resourceCounts.toVec() == 0)); // Only call init() once.
-    assert(!isStroked());
+    assert(!isStrokeOrFeather());
     assert(m_strokeRadius == 0);
 
     // Every path has at least 1 (non-cubic) move.
@@ -1267,7 +1357,7 @@
     m_prepassCount = 0;
 
     if (flush->interlockMode() == gpu::InterlockMode::clockwiseAtomic &&
-        !isStroked())
+        !isStroke())
     {
         // clockwiseAtomic fills need a prepass to render their borrowed
         // coverage.
@@ -1337,7 +1427,7 @@
     assert(m_pathID != 0);
 
     bool clockwiseAtomicFill =
-        !isStroked() &&
+        !isStroke() &&
         flush->desc().interlockMode == gpu::InterlockMode::clockwiseAtomic;
 
     if (clockwiseAtomicFill)
@@ -1415,7 +1505,7 @@
             // Determine where to fill in forward and mirrored tessellations.
             uint32_t forwardTessVertexCount, forwardTessLocation,
                 mirroredTessVertexCount, mirroredTessLocation;
-            if (isStroked())
+            if (isStroke())
             {
                 // Strokes use a single forward tessellation.
                 assert(m_contourDirections == gpu::ContourDirections::forward);
@@ -1575,9 +1665,9 @@
         // Push a contour and curve records.
         const ContourInfo& contour = m_contours[i];
         assert(startOfContour.verb() == PathVerb::move);
-        assert(isStroked() || contour.closed); // Fills are always closed.
+        assert(isStroke() || contour.closed); // Fills are always closed.
         RIVE_DEBUG_CODE(m_pendingStrokeJoinCount =
-                            isStroked() ? contour.strokeJoinCount : 0;)
+                            isStrokeOrFeather() ? contour.strokeJoinCount : 0;)
         RIVE_DEBUG_CODE(m_pendingStrokeCapCount =
                             contour.strokeCapSegmentCount != 0 ? 2 : 0;)
 
@@ -1590,25 +1680,30 @@
         // Emit a starting cap before the next cubic?
         bool needsFirstEmulatedCapAsJoin = false;
         uint32_t emulatedCapAsJoinFlags = 0;
-        if (isStroked())
+        if (isStrokeOrFeather())
         {
-            joinTypeFlags = join_type_flags(m_strokeJoin);
-            roundJoinStroked = joinTypeFlags == 0;
+            joinTypeFlags = isStroke() ? join_type_flags(m_strokeJoin)
+                                       : FEATHER_JOIN_CONTOUR_FLAG;
+            roundJoinStroked = joinTypeFlags == ROUND_JOIN_CONTOUR_FLAG;
             if (contour.strokeCapSegmentCount != 0)
             {
                 StrokeCap cap =
                     !contour.closed
                         ? m_strokeCap
                         : empty_stroke_cap(true, m_strokeJoin, m_strokeCap);
-                emulatedCapAsJoinFlags = EMULATED_STROKE_CAP_CONTOUR_FLAG;
-                if (cap == StrokeCap::square)
+                switch (cap)
                 {
-                    emulatedCapAsJoinFlags |= MITER_CLIP_JOIN_CONTOUR_FLAG;
+                    case StrokeCap::butt:
+                        emulatedCapAsJoinFlags = BEVEL_JOIN_CONTOUR_FLAG;
+                        break;
+                    case StrokeCap::square:
+                        emulatedCapAsJoinFlags = MITER_CLIP_JOIN_CONTOUR_FLAG;
+                        break;
+                    case StrokeCap::round:
+                        emulatedCapAsJoinFlags = ROUND_JOIN_CONTOUR_FLAG;
+                        break;
                 }
-                else if (cap == StrokeCap::butt)
-                {
-                    emulatedCapAsJoinFlags |= BEVEL_JOIN_CONTOUR_FLAG;
-                }
+                emulatedCapAsJoinFlags |= EMULATED_STROKE_CAP_CONTOUR_FLAG;
                 needsFirstEmulatedCapAsJoin = true;
             }
         }
@@ -1616,13 +1711,32 @@
         // Make a data record for this current contour on the GPU.
         uint32_t contourIDWithFlags =
             m_contourFlags |
-            tessWriter->pushContour(renderPaintStyle(),
+            tessWriter->pushContour(m_drawContents,
                                     contour.midpoint,
                                     contour.closed,
                                     contour.paddingVertexCount);
 
+        // When we don't have round joins, the number of segments per join is
+        // constant. (Round joins have a variable number of segments per join,
+        // depending on the angle.)
+        uint32_t numSegmentsInNotRoundJoin;
+        if (isFeatheredFill())
+        {
+            numSegmentsInNotRoundJoin =
+                static_cast<uint32_t>(std::clamp<float>(
+                    ceilf(m_polarSegmentsPerRadian * math::PI),
+                    2,
+                    kMaxPolarSegments - 2)) +
+                5;
+        }
+        else
+        {
+            numSegmentsInNotRoundJoin = NUM_SEGMENTS_IN_MITER_OR_BEVEL_JOIN;
+        }
+
         // Convert all curves in the contour to cubics and push them to the GPU.
-        const int styleFlags = style_flags(isStroked(), roundJoinStroked);
+        const int styleFlags =
+            style_flags(isStrokeOrFeather(), roundJoinStroked);
         Vec2D joinTangent = {0, 1};
         int joinSegmentCount = 1;
         Vec2D implicitClose[2]; // In case we need an implicit closing line.
@@ -1671,7 +1785,7 @@
                                                         end.rawPtsPtr(),
                                                         contour.closed,
                                                         pts);
-                        joinSegmentCount = kNumSegmentsInMiterOrBevelJoin;
+                        joinSegmentCount = numSegmentsInNotRoundJoin;
                         RIVE_DEBUG_CODE(--m_pendingStrokeJoinCount;)
                     }
                     else
@@ -1819,7 +1933,7 @@
                                                             end.rawPtsPtr(),
                                                             contour.closed,
                                                             pts);
-                            joinSegmentCount = kNumSegmentsInMiterOrBevelJoin;
+                            joinSegmentCount = numSegmentsInNotRoundJoin;
                         }
                         RIVE_DEBUG_CODE(--m_pendingStrokeJoinCount;)
                     }
@@ -1897,10 +2011,10 @@
                     RIVE_DEBUG_CODE(--m_pendingRotationCount;)
                     RIVE_DEBUG_CODE(--m_pendingStrokeJoinCount;)
                 }
-                else if (isStroked())
+                else if (isStrokeOrFeather())
                 {
                     joinTangent = find_starting_tangent(pts, end.rawPtsPtr());
-                    joinSegmentCount = kNumSegmentsInMiterOrBevelJoin;
+                    joinSegmentCount = numSegmentsInNotRoundJoin;
                     RIVE_DEBUG_CODE(--m_pendingStrokeJoinCount;)
                 }
                 tessWriter->pushCubic(cubic.data(),
@@ -2003,11 +2117,10 @@
                 else
                 {
                     contourIDWithFlags =
-                        m_contourFlags |
-                        tessWriter->pushContour(renderPaintStyle(),
-                                                {0, 0},
-                                                true,
-                                                0);
+                        m_contourFlags | tessWriter->pushContour(m_drawContents,
+                                                                 {0, 0},
+                                                                 true,
+                                                                 0);
                 }
                 p0 = pts[0];
                 ++contourCount;
@@ -2038,7 +2151,7 @@
                 uint32_t numSubdivisions;
                 if (op == InteriorTriangulationOp::countDataAndTriangulate)
                 {
-                    numSubdivisions = FindSubdivisionCount(pts, vectorXform);
+                    numSubdivisions = find_subdivision_count(pts, vectorXform);
                     m_numChops.push_back(numSubdivisions);
                 }
                 else
diff --git a/renderer/src/gl/render_context_gl_impl.cpp b/renderer/src/gl/render_context_gl_impl.cpp
index 4edc271..939383b 100644
--- a/renderer/src/gl/render_context_gl_impl.cpp
+++ b/renderer/src/gl/render_context_gl_impl.cpp
@@ -106,6 +106,21 @@
     glEnableVertexAttribArray(0);
     glVertexAttribDivisor(0, 1);
 
+    glActiveTexture(GL_TEXTURE0 + kPLSTexIdxOffset + FEATHER_TEXTURE_IDX);
+    glBindTexture(GL_TEXTURE_2D, m_featherTexture);
+    glTexStorage2D(GL_TEXTURE_2D, 1, GL_R16F, gpu::GAUSSIAN_TABLE_SIZE, 1);
+    m_state->bindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+    glTexSubImage2D(GL_TEXTURE_2D,
+                    0,
+                    0,
+                    0,
+                    gpu::GAUSSIAN_TABLE_SIZE,
+                    1,
+                    GL_RED,
+                    GL_HALF_FLOAT,
+                    gpu::g_gaussianIntegralTableF16);
+    glutils::SetTexture2DSamplingParams(GL_LINEAR, GL_LINEAR);
+
     const char* tessellateSources[] = {glsl::constants,
                                        glsl::common,
                                        glsl::tessellate};
@@ -241,6 +256,9 @@
     glActiveTexture(GL_TEXTURE0 + kPLSTexIdxOffset + GRAD_TEXTURE_IDX);
     glBindTexture(GL_TEXTURE_2D, m_gradientTexture);
 
+    glActiveTexture(GL_TEXTURE0 + kPLSTexIdxOffset + FEATHER_TEXTURE_IDX);
+    glBindTexture(GL_TEXTURE_2D, m_featherTexture);
+
     m_state->invalidate();
 }
 
@@ -687,6 +705,7 @@
     switch (drawType)
     {
         case gpu::DrawType::midpointFanPatches:
+        case gpu::DrawType::midpointFanCenterAAPatches:
         case gpu::DrawType::outerCurvePatches:
             if (shaderType == GL_VERTEX_SHADER)
             {
@@ -802,6 +821,7 @@
     const bool isImageDraw = gpu::DrawTypeIsImageDraw(drawType);
     const bool isTessellationDraw =
         drawType == gpu::DrawType::midpointFanPatches ||
+        drawType == gpu::DrawType::midpointFanCenterAAPatches ||
         drawType == gpu::DrawType::outerCurvePatches;
     const bool isPathDraw =
         isTessellationDraw || drawType == gpu::DrawType::interiorTriangulation;
@@ -826,6 +846,12 @@
                                  GLSL_gradTexture,
                                  kPLSTexIdxOffset + GRAD_TEXTURE_IDX);
     }
+    if (shaderFeatures & gpu::ShaderFeatures::ENABLE_FEATHER)
+    {
+        assert(isPathDraw || interlockMode == gpu::InterlockMode::atomics);
+        glUniform1i(glGetUniformLocation(m_id, GLSL_featherTexture),
+                    kPLSTexIdxOffset + FEATHER_TEXTURE_IDX);
+    }
     // Atomic mode doesn't support image paints on paths.
     if (isImageDraw ||
         (isPathDraw && interlockMode != gpu::InterlockMode::atomics))
@@ -1355,6 +1381,7 @@
         switch (gpu::DrawType drawType = batch.drawType)
         {
             case DrawType::midpointFanPatches:
+            case DrawType::midpointFanCenterAAPatches:
             case DrawType::outerCurvePatches:
             {
                 // Draw PLS patches that connect the tessellation vertices.
@@ -1413,8 +1440,8 @@
                     break;
                 }
 
-                // MSAA fills only use the "fan" section of the patch (the don't
-                // need AA borders).
+                // MSAA fills only use the "fan" section of the patch (they
+                // don't need AA borders).
                 drawHelper.setIndexRange(gpu::PatchFanIndexCount(drawType),
                                          gpu::PatchFanBaseIndex(drawType));
 
diff --git a/renderer/src/gpu.cpp b/renderer/src/gpu.cpp
index ff9befa..476ef73 100644
--- a/renderer/src/gpu.cpp
+++ b/renderer/src/gpu.cpp
@@ -38,6 +38,7 @@
     switch (drawType)
     {
         case DrawType::midpointFanPatches:
+        case DrawType::midpointFanCenterAAPatches:
         case DrawType::outerCurvePatches:
             drawTypeKey = 0;
             break;
@@ -86,6 +87,8 @@
             return GLSL_ENABLE_CLIP_RECT;
         case ShaderFeatures::ENABLE_ADVANCED_BLEND:
             return GLSL_ENABLE_ADVANCED_BLEND;
+        case ShaderFeatures::ENABLE_FEATHER:
+            return GLSL_ENABLE_FEATHER;
         case ShaderFeatures::ENABLE_EVEN_ODD:
             return GLSL_ENABLE_EVEN_ODD;
         case ShaderFeatures::ENABLE_NESTED_CLIPPING:
@@ -109,10 +112,10 @@
     // AA border vertices. "Inner tessellation curves" have one more segment
     // without a fan triangle whose purpose is to be a bowtie join.
     size_t vertexCount = 0;
-    int32_t patchSegmentSpan = patchType == PatchType::midpointFan
-                                   ? kMidpointFanPatchSegmentSpan
-                                   : kOuterCurvePatchSegmentSpan;
-    for (int32_t i = 0; i < patchSegmentSpan; ++i)
+    int32_t patchSegmentSpan = patchType == PatchType::outerCurves
+                                   ? kOuterCurvePatchSegmentSpan
+                                   : kMidpointFanPatchSegmentSpan;
+    for (int i = 0; i < patchSegmentSpan; ++i)
     {
         float params = pack_params(patchSegmentSpan, STROKE_VERTEX);
         float l = static_cast<float>(i);
@@ -125,12 +128,26 @@
             vertices[vertexCount + 3].set(r, 1.f, .0f, params);
 
             // Give the vertex an alternate position when mirrored so the border
-            // has the same diagonals whether morrored or not.
+            // has the same diagonals whether mirrored or not.
             vertices[vertexCount + 0].setMirroredPosition(r, 0.f, .5f);
             vertices[vertexCount + 1].setMirroredPosition(l, 0.f, .5f);
             vertices[vertexCount + 2].setMirroredPosition(r, 1.f, .0f);
             vertices[vertexCount + 3].setMirroredPosition(l, 1.f, .0f);
         }
+        else if (patchType == PatchType::midpointFanCenterAA)
+        {
+            vertices[vertexCount + 0].set(l, 0.f, .5f, params);
+            vertices[vertexCount + 1].set(l, 1.f, .0f, params);
+            vertices[vertexCount + 2].set(r, 0.f, .5f, params);
+            vertices[vertexCount + 3].set(r, 1.f, .0f, params);
+
+            // Give the vertex an alternate position when mirrored so the border
+            // has the same diagonals whether mirrored or not.
+            vertices[vertexCount + 0].setMirroredPosition(r - 1.f, 0.f, .5f);
+            vertices[vertexCount + 1].setMirroredPosition(l - 1.f, 0.f, .5f);
+            vertices[vertexCount + 2].setMirroredPosition(r - 1.f, 1.f, .0f);
+            vertices[vertexCount + 3].setMirroredPosition(l - 1.f, 1.f, .0f);
+        }
         else
         {
             assert(patchType == PatchType::midpointFan);
@@ -150,26 +167,39 @@
     }
 
     // Bottom (negative coverage) side of the AA border.
-    if (patchType == PatchType::outerCurves)
+    for (int i = 0; i < patchSegmentSpan; ++i)
     {
         float params = pack_params(patchSegmentSpan, STROKE_VERTEX);
-        for (int i = 0; i < patchSegmentSpan; ++i)
+        float l = static_cast<float>(i);
+        float r = l + 1;
+        if (patchType == PatchType::outerCurves)
         {
-            float l = static_cast<float>(i);
-            float r = l + 1;
-
-            vertices[vertexCount + 0].set(l, -.0f, .5f, params);
-            vertices[vertexCount + 1].set(r, -.0f, .5f, params);
+            vertices[vertexCount + 0].set(l, -0.f, .5f, params);
+            vertices[vertexCount + 1].set(r, -0.f, .5f, params);
             vertices[vertexCount + 2].set(l, -1.f, .0f, params);
             vertices[vertexCount + 3].set(r, -1.f, .0f, params);
 
             // Give the vertex an alternate position when mirrored so the border
-            // has the same diagonals whether morrored or not.
+            // has the same diagonals whether mirrored or not.
             vertices[vertexCount + 0].setMirroredPosition(r, -0.f, .5f);
             vertices[vertexCount + 1].setMirroredPosition(r, -1.f, .0f);
             vertices[vertexCount + 2].setMirroredPosition(l, -0.f, .5f);
             vertices[vertexCount + 3].setMirroredPosition(l, -1.f, .0f);
+            vertexCount += 4;
+        }
+        else if (patchType == PatchType::midpointFanCenterAA)
+        {
+            vertices[vertexCount + 0].set(l, -0.f, .5f, params);
+            vertices[vertexCount + 1].set(r, -0.f, .5f, params);
+            vertices[vertexCount + 2].set(l, -1.f, .0f, params);
+            vertices[vertexCount + 3].set(r, -1.f, .0f, params);
 
+            // Give the vertex an alternate position when mirrored so the border
+            // has the same diagonals whether mirrored or not.
+            vertices[vertexCount + 0].setMirroredPosition(r - 1.f, -0.f, .5f);
+            vertices[vertexCount + 1].setMirroredPosition(r - 1.f, -1.f, .0f);
+            vertices[vertexCount + 2].setMirroredPosition(l - 1.f, -0.f, .5f);
+            vertices[vertexCount + 3].setMirroredPosition(l - 1.f, -1.f, .0f);
             vertexCount += 4;
         }
     }
@@ -177,11 +207,11 @@
     // Triangle fan vertices. (These only touch the first "fanSegmentSpan"
     // segments on inner tessellation curves.
     size_t fanVerticesIdx = vertexCount;
-    size_t fanSegmentSpan = patchType == PatchType::midpointFan
-                                ? patchSegmentSpan
-                                : patchSegmentSpan - 1;
-    assert((fanSegmentSpan & (fanSegmentSpan - 1)) ==
-           0); // The fan must be a power of two.
+    size_t fanSegmentSpan = patchType == PatchType::outerCurves
+                                ? patchSegmentSpan - 1
+                                : patchSegmentSpan;
+    // The fan must be a power of two.
+    assert((fanSegmentSpan & (fanSegmentSpan - 1)) == 0);
     for (int i = 0; i <= fanSegmentSpan; ++i)
     {
         float params = pack_params(patchSegmentSpan, FAN_VERTEX);
@@ -189,6 +219,13 @@
         {
             vertices[vertexCount].set(static_cast<float>(i), 0.f, 1, params);
         }
+        else if (patchType == PatchType::midpointFanCenterAA)
+        {
+            vertices[vertexCount].set(static_cast<float>(i), 0, 1, params);
+            vertices[vertexCount].setMirroredPosition(static_cast<float>(i) - 1,
+                                                      0,
+                                                      1);
+        }
         else
         {
             vertices[vertexCount].set(static_cast<float>(i), -1.f, 1, params);
@@ -199,16 +236,19 @@
         ++vertexCount;
     }
 
-    // The midpoint vertex is only included on midpoint fan patches.
+    // The midpoint vertex isn't included in outer cubic patches.
     size_t midpointIdx = vertexCount;
-    if (patchType == PatchType::midpointFan)
+    if (patchType != PatchType::outerCurves)
     {
         vertices[vertexCount++]
             .set(0, 0, 1, pack_params(patchSegmentSpan, FAN_MIDPOINT_VERTEX));
     }
-    assert(vertexCount == (patchType == PatchType::outerCurves
-                               ? kOuterCurvePatchVertexCount
-                               : kMidpointFanPatchVertexCount));
+    if (patchType == PatchType::outerCurves)
+        assert(vertexCount == kOuterCurvePatchVertexCount);
+    else if (patchType == PatchType::midpointFanCenterAA)
+        assert(vertexCount == kMidpointFanCenterAAPatchVertexCount);
+    else
+        assert(vertexCount == kMidpointFanPatchVertexCount);
 
     // AA border indices.
     constexpr static size_t kBorderPatternVertexCount = 4;
@@ -232,7 +272,11 @@
     }
 
     // Bottom (negative coverage) side of the AA border.
-    if (patchType == PatchType::outerCurves)
+    if (patchType == PatchType::midpointFan)
+    {
+        assert(indexCount == kMidpointFanPatchBorderIndexCount);
+    }
+    else
     {
         for (size_t borderSegmentIdx = 0; borderSegmentIdx < patchSegmentSpan;
              ++borderSegmentIdx)
@@ -244,11 +288,10 @@
             }
             borderEdgeVerticesIdx += kBorderPatternVertexCount;
         }
-        assert(indexCount == kOuterCurvePatchBorderIndexCount);
-    }
-    else
-    {
-        assert(indexCount == kMidpointFanPatchBorderIndexCount);
+        if (patchType == PatchType::midpointFanCenterAA)
+            assert(indexCount == kMidpointFanCenterAAPatchBorderIndexCount);
+        else
+            assert(indexCount == kOuterCurvePatchBorderIndexCount);
     }
 
     assert(borderEdgeVerticesIdx == fanVerticesIdx);
@@ -265,13 +308,17 @@
             indices[indexCount++] = fanVerticesIdx + i + step * 2 + baseVertex;
         }
     }
-    if (patchType == PatchType::midpointFan)
+    if (patchType == PatchType::midpointFan ||
+        patchType == PatchType::midpointFanCenterAA)
     {
         // Triangle to the contour midpoint.
         indices[indexCount++] = fanVerticesIdx + baseVertex;
         indices[indexCount++] = fanVerticesIdx + fanSegmentSpan + baseVertex;
         indices[indexCount++] = midpointIdx + baseVertex;
-        assert(indexCount == kMidpointFanPatchIndexCount);
+        if (patchType == PatchType::midpointFan)
+            assert(indexCount == kMidpointFanPatchIndexCount);
+        else
+            assert(indexCount == kMidpointFanCenterAAPatchIndexCount);
     }
     else
     {
@@ -287,10 +334,17 @@
                                         vertices,
                                         indices,
                                         0);
-    generate_buffer_data_for_patch_type(PatchType::outerCurves,
+    generate_buffer_data_for_patch_type(PatchType::midpointFanCenterAA,
                                         vertices + kMidpointFanPatchVertexCount,
                                         indices + kMidpointFanPatchIndexCount,
                                         kMidpointFanPatchVertexCount);
+    generate_buffer_data_for_patch_type(
+        PatchType::outerCurves,
+        vertices + kMidpointFanPatchVertexCount +
+            kMidpointFanCenterAAPatchVertexCount,
+        indices + kMidpointFanPatchIndexCount +
+            kMidpointFanCenterAAPatchIndexCount,
+        kMidpointFanPatchVertexCount + kMidpointFanCenterAAPatchVertexCount);
 }
 
 void ClipRectInverseMatrix::reset(const Mat2D& clipMatrix, const AABB& clipRect)
@@ -416,19 +470,16 @@
     }
 }
 
-void PathData::set(const Mat2D& m, float strokeRadius, uint32_t zIndex)
+void PathData::set(const Mat2D& m,
+                   float strokeRadius,
+                   float featherRadius,
+                   uint32_t zIndex,
+                   const CoverageBufferRange& coverageBufferRange)
 {
     write_matrix(m_matrix, m);
     m_strokeRadius = strokeRadius; // 0 if the path is filled.
     m_zIndex = zIndex;
-}
-
-void PathData::set(const Mat2D& m,
-                   float strokeRadius,
-                   uint32_t zIndex,
-                   const CoverageBufferRange& coverageBufferRange)
-{
-    set(m, strokeRadius, zIndex);
+    m_featherRadius = featherRadius;
     m_coverageBufferRange.offset = coverageBufferRange.offset;
     m_coverageBufferRange.pitch = coverageBufferRange.pitch;
     m_coverageBufferRange.offsetX = coverageBufferRange.offsetX;
@@ -664,7 +715,7 @@
                gpu::StorageBufferElementSizeInBytes(bufferStructure);
 }
 
-float FindTransformedArea(const AABB& bounds, const Mat2D& matrix)
+float find_transformed_area(const AABB& bounds, const Mat2D& matrix)
 {
     Vec2D pts[4] = {{bounds.left(), bounds.top()},
                     {bounds.right(), bounds.top()},
@@ -678,4 +729,255 @@
     return (fabsf(Vec2D::cross(v[0], v[1])) + fabsf(Vec2D::cross(v[1], v[2]))) *
            .5f;
 }
+
+// Code to generate g_gaussianIntegralTableF16.
+#if 0
+static float eval_normal_distribution(float x, float mu, float inverseSigma)
+{
+    constexpr static float ONE_OVER_SQRT_2_PI = 0.398942280401433f;
+    float y = (x - mu) * inverseSigma;
+    return expf(-.5 * y * y) * inverseSigma * ONE_OVER_SQRT_2_PI;
+}
+
+void generate_gausian_integral_table(float table[], size_t tableSize)
+{
+    float sigma = tableSize / (FEATHER_TEXTURE_STDDEVS * 2);
+    float inverseSigma = 1 / sigma;
+    float mu = tableSize * .5f;
+    float integral = 0;
+    for (size_t i = 0; i < tableSize; ++i)
+    {
+        // Sample the normal distribution in multiple locations for each entry
+        // of the table, in order to get a more accurate integral.
+        constexpr static int SAMPLES = 7;
+        float barCenterX = static_cast<float>(i);
+        for (int sample = 0; sample < SAMPLES; ++sample)
+        {
+            float dx = static_cast<float>(sample - (SAMPLES >> 1)) / SAMPLES;
+            integral +=
+                eval_normal_distribution(barCenterX + dx, mu, inverseSigma) /
+                SAMPLES;
+        }
+        table[i] = integral;
+    }
+    // Account for the area under the curve prior to our table by shifting so
+    // the middle value of the table is exactly 1/2.
+    float shift =
+        .5 - ((tableSize & 1)
+                  ? table[tableSize / 2]
+                  : (table[tableSize / 2 - 1] + table[tableSize / 2]) / 2);
+    table[0] = fminf(fmaxf(0, table[0] + shift), 1);
+    for (size_t i = 1; i < tableSize; ++i)
+    {
+        table[i] = fminf(fmaxf(table[i - 1], table[i] + shift), 1);
+    }
+}
+#endif
+
+const uint16_t g_gaussianIntegralTableF16[GAUSSIAN_TABLE_SIZE] = {
+    0x15a3, 0x15db, 0x1616, 0x1652, 0x1691, 0x16d1, 0x1715, 0x175a, 0x17a2,
+    0x17ec, 0x181c, 0x1844, 0x186d, 0x1898, 0x18c4, 0x18f1, 0x1920, 0x1951,
+    0x1983, 0x19b7, 0x19ec, 0x1a23, 0x1a5d, 0x1a98, 0x1ad4, 0x1b13, 0x1b54,
+    0x1b97, 0x1bdc, 0x1c12, 0x1c36, 0x1c5c, 0x1c83, 0x1cac, 0x1cd5, 0x1d00,
+    0x1d2c, 0x1d5a, 0x1d89, 0x1db9, 0x1deb, 0x1e1e, 0x1e53, 0x1e89, 0x1ec1,
+    0x1efb, 0x1f36, 0x1f73, 0x1fb2, 0x1ff3, 0x201b, 0x203d, 0x2060, 0x2084,
+    0x20a9, 0x20d0, 0x20f7, 0x211f, 0x2149, 0x2173, 0x219f, 0x21cc, 0x21fb,
+    0x222a, 0x225b, 0x228d, 0x22c0, 0x22f5, 0x232b, 0x2363, 0x239c, 0x23d6,
+    0x2409, 0x2428, 0x2447, 0x2468, 0x2489, 0x24ab, 0x24cd, 0x24f1, 0x2516,
+    0x253b, 0x2561, 0x2589, 0x25b1, 0x25da, 0x2604, 0x262f, 0x265b, 0x2688,
+    0x26b7, 0x26e6, 0x2716, 0x2748, 0x277a, 0x27ae, 0x27e3, 0x280c, 0x2828,
+    0x2844, 0x2861, 0x287e, 0x289c, 0x28bb, 0x28da, 0x28fa, 0x291b, 0x293c,
+    0x295f, 0x2981, 0x29a5, 0x29c9, 0x29ee, 0x2a13, 0x2a3a, 0x2a61, 0x2a89,
+    0x2ab1, 0x2adb, 0x2b05, 0x2b30, 0x2b5c, 0x2b89, 0x2bb6, 0x2be4, 0x2c0a,
+    0x2c22, 0x2c3a, 0x2c53, 0x2c6c, 0x2c86, 0x2ca0, 0x2cbb, 0x2cd6, 0x2cf2,
+    0x2d0e, 0x2d2a, 0x2d47, 0x2d65, 0x2d82, 0x2da1, 0x2dc0, 0x2ddf, 0x2dff,
+    0x2e1f, 0x2e40, 0x2e62, 0x2e84, 0x2ea6, 0x2ec9, 0x2eec, 0x2f10, 0x2f35,
+    0x2f5a, 0x2f7f, 0x2fa5, 0x2fcc, 0x2ff3, 0x300d, 0x3021, 0x3036, 0x304a,
+    0x305f, 0x3074, 0x308a, 0x309f, 0x30b5, 0x30cc, 0x30e2, 0x30f9, 0x3110,
+    0x3127, 0x313f, 0x3157, 0x316f, 0x3187, 0x31a0, 0x31b9, 0x31d2, 0x31eb,
+    0x3205, 0x321f, 0x323a, 0x3254, 0x326f, 0x328a, 0x32a5, 0x32c1, 0x32dd,
+    0x32f9, 0x3315, 0x3332, 0x334f, 0x336c, 0x338a, 0x33a7, 0x33c5, 0x33e3,
+    0x3401, 0x3410, 0x3420, 0x342f, 0x343f, 0x344f, 0x345f, 0x346f, 0x347f,
+    0x348f, 0x349f, 0x34b0, 0x34c0, 0x34d1, 0x34e2, 0x34f3, 0x3504, 0x3515,
+    0x3526, 0x3537, 0x3548, 0x355a, 0x356b, 0x357d, 0x358f, 0x35a0, 0x35b2,
+    0x35c4, 0x35d6, 0x35e8, 0x35fa, 0x360d, 0x361f, 0x3631, 0x3644, 0x3656,
+    0x3669, 0x367b, 0x368e, 0x36a0, 0x36b3, 0x36c6, 0x36d9, 0x36ec, 0x36ff,
+    0x3711, 0x3724, 0x3737, 0x374a, 0x375d, 0x3771, 0x3784, 0x3797, 0x37aa,
+    0x37bd, 0x37d0, 0x37e3, 0x37f6, 0x3805, 0x380e, 0x3818, 0x3822, 0x382b,
+    0x3835, 0x383e, 0x3848, 0x3851, 0x385b, 0x3864, 0x386e, 0x3877, 0x3881,
+    0x388a, 0x3894, 0x389d, 0x38a6, 0x38b0, 0x38b9, 0x38c2, 0x38cc, 0x38d5,
+    0x38de, 0x38e7, 0x38f1, 0x38fa, 0x3903, 0x390c, 0x3915, 0x391e, 0x3927,
+    0x3930, 0x3939, 0x3942, 0x394a, 0x3953, 0x395c, 0x3964, 0x396d, 0x3976,
+    0x397e, 0x3987, 0x398f, 0x3998, 0x39a0, 0x39a8, 0x39b0, 0x39b9, 0x39c1,
+    0x39c9, 0x39d1, 0x39d9, 0x39e1, 0x39e8, 0x39f0, 0x39f8, 0x3a00, 0x3a07,
+    0x3a0f, 0x3a16, 0x3a1e, 0x3a25, 0x3a2c, 0x3a33, 0x3a3b, 0x3a42, 0x3a49,
+    0x3a50, 0x3a57, 0x3a5d, 0x3a64, 0x3a6b, 0x3a72, 0x3a78, 0x3a7f, 0x3a85,
+    0x3a8b, 0x3a92, 0x3a98, 0x3a9e, 0x3aa4, 0x3aaa, 0x3ab0, 0x3ab6, 0x3abc,
+    0x3ac2, 0x3ac7, 0x3acd, 0x3ad3, 0x3ad8, 0x3ade, 0x3ae3, 0x3ae8, 0x3aed,
+    0x3af3, 0x3af8, 0x3afd, 0x3b02, 0x3b07, 0x3b0b, 0x3b10, 0x3b15, 0x3b19,
+    0x3b1e, 0x3b22, 0x3b27, 0x3b2b, 0x3b30, 0x3b34, 0x3b38, 0x3b3c, 0x3b40,
+    0x3b44, 0x3b48, 0x3b4c, 0x3b50, 0x3b53, 0x3b57, 0x3b5b, 0x3b5e, 0x3b62,
+    0x3b65, 0x3b69, 0x3b6c, 0x3b6f, 0x3b72, 0x3b76, 0x3b79, 0x3b7c, 0x3b7f,
+    0x3b82, 0x3b85, 0x3b87, 0x3b8a, 0x3b8d, 0x3b90, 0x3b92, 0x3b95, 0x3b97,
+    0x3b9a, 0x3b9c, 0x3b9f, 0x3ba1, 0x3ba3, 0x3ba6, 0x3ba8, 0x3baa, 0x3bac,
+    0x3bae, 0x3bb0, 0x3bb2, 0x3bb4, 0x3bb6, 0x3bb8, 0x3bba, 0x3bbc, 0x3bbe,
+    0x3bbf, 0x3bc1, 0x3bc3, 0x3bc4, 0x3bc6, 0x3bc7, 0x3bc9, 0x3bca, 0x3bcc,
+    0x3bcd, 0x3bcf, 0x3bd0, 0x3bd1, 0x3bd2, 0x3bd4, 0x3bd5, 0x3bd6, 0x3bd7,
+    0x3bd8, 0x3bda, 0x3bdb, 0x3bdc, 0x3bdd, 0x3bde, 0x3bdf, 0x3be0, 0x3be1,
+    0x3be2, 0x3be2, 0x3be3, 0x3be4, 0x3be5, 0x3be6, 0x3be7, 0x3be7, 0x3be8,
+    0x3be9, 0x3bea, 0x3bea, 0x3beb, 0x3bec, 0x3bec, 0x3bed, 0x3bed, 0x3bee,
+    0x3bee, 0x3bef, 0x3bf0, 0x3bf0, 0x3bf1, 0x3bf1, 0x3bf2, 0x3bf2, 0x3bf2,
+    0x3bf3, 0x3bf3, 0x3bf4, 0x3bf4, 0x3bf5, 0x3bf5, 0x3bf5, 0x3bf6, 0x3bf6,
+    0x3bf6, 0x3bf7, 0x3bf7, 0x3bf7, 0x3bf8, 0x3bf8, 0x3bf8, 0x3bf8, 0x3bf9,
+    0x3bf9, 0x3bf9, 0x3bf9, 0x3bfa, 0x3bfa, 0x3bfa, 0x3bfa, 0x3bfa, 0x3bfb,
+    0x3bfb, 0x3bfb, 0x3bfb, 0x3bfb, 0x3bfc, 0x3bfc, 0x3bfc, 0x3bfc, 0x3bfc,
+    0x3bfc, 0x3bfc, 0x3bfd, 0x3bfd, 0x3bfd, 0x3bfd, 0x3bfd, 0x3bfd,
+};
+
+// Code to generate g_inverseGaussianIntegralTableF32.
+#if 0
+void generate_inverse_gausian_integral_table(float table[], size_t tableSize)
+{
+    // Evaluate 32 samples for every table value, for better precision.
+    size_t MULTIPLIER = 32;
+    float sigma = tableSize / (FEATHER_TEXTURE_STDDEVS * 2);
+    float inverseSigma = 1 / sigma;
+    float mu = tableSize * .5f;
+    size_t samples = tableSize * MULTIPLIER;
+
+    // Integrate half the curve in order to determine the initial value of our
+    // integral (the table doesn't begin until -FEATHER_TEXTURE_STDDEVS).
+    float integral = 0;
+    for (size_t i = 0; i < (samples + 1) / 2; ++i)
+    {
+        float barCenterX = static_cast<float>(i) / MULTIPLIER;
+        integral +=
+            eval_normal_distribution(barCenterX, mu, inverseSigma) / MULTIPLIER;
+    }
+    integral = .5 - integral;
+
+    // Reboot now that we know the initial integral value and fill in the
+    // inverse table this time around.
+    float lastInverseX = std::numeric_limits<float>::quiet_NaN(),
+          lastInverseY = 0;
+    table[0] = 0;
+    table[tableSize - 1] = 1;
+    for (size_t i = 0; i < samples; ++i)
+    {
+        float barCenterX = static_cast<float>(i) / MULTIPLIER;
+        integral +=
+            eval_normal_distribution(barCenterX, mu, inverseSigma) / MULTIPLIER;
+        float inverseX = fminf(fmaxf(0, integral), 1) * tableSize;
+        float inverseY = (i + .5f) / samples;
+        size_t cell = static_cast<size_t>(inverseX);
+        float cellCenterX = cell + .5f;
+        if (cellCenterX == mu)
+        {
+            // Make sure the center value is exactly .5, just because.
+            table[cell] = .5f;
+        }
+        else if (lastInverseX <= cellCenterX && inverseX >= cellCenterX)
+        {
+            float t = (cellCenterX - lastInverseX) / (inverseX - lastInverseX);
+            float y = lerp(lastInverseY, inverseY, t);
+            assert(0 <= cell && cell < tableSize);
+            table[cell] = y;
+        }
+        lastInverseX = inverseX;
+        lastInverseY = inverseY;
+    }
+
+    // Use a large enough tableSize that the beginning and ending values are 0
+    // and 1!
+    assert(table[0] == 0 && table[tableSize - 1] == 1);
+}
+#endif
+
+const float g_inverseGaussianIntegralTableF32[GAUSSIAN_TABLE_SIZE] = {
+    0.000000f, 0.039369f, 0.068465f, 0.088398f, 0.103756f, 0.116343f, 0.127060f,
+    0.136427f, 0.144769f, 0.152305f, 0.159192f, 0.165541f, 0.171439f, 0.176952f,
+    0.182133f, 0.187024f, 0.191659f, 0.196068f, 0.200273f, 0.204296f, 0.208153f,
+    0.211861f, 0.215431f, 0.218875f, 0.222203f, 0.225423f, 0.228545f, 0.231574f,
+    0.234516f, 0.237379f, 0.240166f, 0.242882f, 0.245531f, 0.248118f, 0.250645f,
+    0.253116f, 0.255534f, 0.257902f, 0.260222f, 0.262496f, 0.264727f, 0.266917f,
+    0.269067f, 0.271179f, 0.273256f, 0.275297f, 0.277306f, 0.279282f, 0.281228f,
+    0.283145f, 0.285033f, 0.286894f, 0.288729f, 0.290539f, 0.292324f, 0.294086f,
+    0.295825f, 0.297542f, 0.299238f, 0.300914f, 0.302569f, 0.304206f, 0.305823f,
+    0.307423f, 0.309005f, 0.310570f, 0.312118f, 0.313651f, 0.315167f, 0.316669f,
+    0.318156f, 0.319628f, 0.321087f, 0.322532f, 0.323964f, 0.325383f, 0.326789f,
+    0.328183f, 0.329566f, 0.330936f, 0.332296f, 0.333644f, 0.334981f, 0.336308f,
+    0.337625f, 0.338931f, 0.340228f, 0.341515f, 0.342793f, 0.344061f, 0.345321f,
+    0.346572f, 0.347814f, 0.349048f, 0.350274f, 0.351491f, 0.352701f, 0.353903f,
+    0.355097f, 0.356284f, 0.357464f, 0.358637f, 0.359802f, 0.360961f, 0.362114f,
+    0.363259f, 0.364399f, 0.365532f, 0.366658f, 0.367779f, 0.368894f, 0.370003f,
+    0.371106f, 0.372203f, 0.373296f, 0.374382f, 0.375464f, 0.376540f, 0.377611f,
+    0.378677f, 0.379738f, 0.380794f, 0.381845f, 0.382892f, 0.383934f, 0.384972f,
+    0.386005f, 0.387034f, 0.388058f, 0.389079f, 0.390095f, 0.391107f, 0.392115f,
+    0.393119f, 0.394120f, 0.395116f, 0.396109f, 0.397098f, 0.398083f, 0.399065f,
+    0.400044f, 0.401019f, 0.401990f, 0.402959f, 0.403924f, 0.404886f, 0.405844f,
+    0.406800f, 0.407753f, 0.408702f, 0.409649f, 0.410592f, 0.411533f, 0.412471f,
+    0.413406f, 0.414339f, 0.415269f, 0.416196f, 0.417121f, 0.418043f, 0.418962f,
+    0.419879f, 0.420794f, 0.421706f, 0.422616f, 0.423524f, 0.424429f, 0.425333f,
+    0.426234f, 0.427133f, 0.428029f, 0.428924f, 0.429817f, 0.430707f, 0.431596f,
+    0.432482f, 0.433367f, 0.434250f, 0.435131f, 0.436011f, 0.436888f, 0.437764f,
+    0.438638f, 0.439510f, 0.440381f, 0.441250f, 0.442117f, 0.442983f, 0.443848f,
+    0.444711f, 0.445572f, 0.446432f, 0.447290f, 0.448147f, 0.449003f, 0.449858f,
+    0.450711f, 0.451562f, 0.452413f, 0.453262f, 0.454110f, 0.454957f, 0.455803f,
+    0.456648f, 0.457491f, 0.458333f, 0.459175f, 0.460015f, 0.460854f, 0.461693f,
+    0.462530f, 0.463366f, 0.464202f, 0.465036f, 0.465870f, 0.466703f, 0.467535f,
+    0.468366f, 0.469196f, 0.470026f, 0.470855f, 0.471683f, 0.472511f, 0.473337f,
+    0.474164f, 0.474989f, 0.475814f, 0.476638f, 0.477462f, 0.478285f, 0.479108f,
+    0.479930f, 0.480752f, 0.481573f, 0.482394f, 0.483214f, 0.484034f, 0.484853f,
+    0.485673f, 0.486491f, 0.487310f, 0.488128f, 0.488946f, 0.489764f, 0.490581f,
+    0.491398f, 0.492215f, 0.493032f, 0.493848f, 0.494665f, 0.495481f, 0.496297f,
+    0.497113f, 0.497930f, 0.498746f, 0.499561f, 0.500377f, 0.501193f, 0.502009f,
+    0.502825f, 0.503641f, 0.504458f, 0.505274f, 0.506090f, 0.506907f, 0.507724f,
+    0.508541f, 0.509358f, 0.510175f, 0.510993f, 0.511811f, 0.512629f, 0.513447f,
+    0.514266f, 0.515085f, 0.515905f, 0.516725f, 0.517545f, 0.518366f, 0.519187f,
+    0.520008f, 0.520831f, 0.521653f, 0.522476f, 0.523300f, 0.524124f, 0.524949f,
+    0.525775f, 0.526601f, 0.527427f, 0.528255f, 0.529083f, 0.529912f, 0.530741f,
+    0.531572f, 0.532403f, 0.533235f, 0.534068f, 0.534901f, 0.535736f, 0.536571f,
+    0.537407f, 0.538245f, 0.539083f, 0.539922f, 0.540762f, 0.541603f, 0.542446f,
+    0.543289f, 0.544134f, 0.544979f, 0.545826f, 0.546674f, 0.547523f, 0.548374f,
+    0.549225f, 0.550078f, 0.550933f, 0.551788f, 0.552645f, 0.553504f, 0.554364f,
+    0.555225f, 0.556088f, 0.556952f, 0.557818f, 0.558685f, 0.559554f, 0.560424f,
+    0.561297f, 0.562171f, 0.563046f, 0.563924f, 0.564803f, 0.565684f, 0.566566f,
+    0.567451f, 0.568338f, 0.569226f, 0.570117f, 0.571009f, 0.571904f, 0.572800f,
+    0.573699f, 0.574600f, 0.575503f, 0.576408f, 0.577315f, 0.578225f, 0.579137f,
+    0.580052f, 0.580969f, 0.581888f, 0.582810f, 0.583735f, 0.584662f, 0.585591f,
+    0.586524f, 0.587459f, 0.588396f, 0.589337f, 0.590280f, 0.591227f, 0.592176f,
+    0.593129f, 0.594084f, 0.595042f, 0.596004f, 0.596969f, 0.597937f, 0.598908f,
+    0.599883f, 0.600861f, 0.601843f, 0.602828f, 0.603817f, 0.604809f, 0.605806f,
+    0.606806f, 0.607810f, 0.608817f, 0.609829f, 0.610845f, 0.611865f, 0.612889f,
+    0.613918f, 0.614951f, 0.615988f, 0.617030f, 0.618076f, 0.619127f, 0.620183f,
+    0.621244f, 0.622310f, 0.623380f, 0.624456f, 0.625537f, 0.626623f, 0.627715f,
+    0.628812f, 0.629915f, 0.631024f, 0.632138f, 0.633258f, 0.634385f, 0.635517f,
+    0.636656f, 0.637801f, 0.638953f, 0.640111f, 0.641276f, 0.642449f, 0.643628f,
+    0.644814f, 0.646008f, 0.647210f, 0.648419f, 0.649636f, 0.650861f, 0.652094f,
+    0.653336f, 0.654586f, 0.655845f, 0.657112f, 0.658390f, 0.659676f, 0.660972f,
+    0.662278f, 0.663594f, 0.664920f, 0.666256f, 0.667604f, 0.668962f, 0.670332f,
+    0.671713f, 0.673107f, 0.674512f, 0.675930f, 0.677361f, 0.678805f, 0.680263f,
+    0.681734f, 0.683220f, 0.684721f, 0.686236f, 0.687767f, 0.689315f, 0.690878f,
+    0.692459f, 0.694057f, 0.695674f, 0.697308f, 0.698963f, 0.700637f, 0.702331f,
+    0.704046f, 0.705784f, 0.707544f, 0.709328f, 0.711136f, 0.712969f, 0.714828f,
+    0.716714f, 0.718629f, 0.720573f, 0.722547f, 0.724553f, 0.726592f, 0.728666f,
+    0.730776f, 0.732923f, 0.735109f, 0.737337f, 0.739608f, 0.741925f, 0.744289f,
+    0.746703f, 0.749171f, 0.751694f, 0.754276f, 0.756921f, 0.759632f, 0.762413f,
+    0.765270f, 0.768206f, 0.771228f, 0.774343f, 0.777556f, 0.780876f, 0.784311f,
+    0.787871f, 0.791568f, 0.795413f, 0.799423f, 0.803614f, 0.808006f, 0.812624f,
+    0.817495f, 0.822653f, 0.828140f, 0.834008f, 0.840321f, 0.847164f, 0.854648f,
+    0.862924f, 0.872203f, 0.882806f, 0.895230f, 0.910338f, 0.929832f, 0.957939f,
+    1.000000f,
+};
+
+float gaussian_table_lookup(const float (&table)[GAUSSIAN_TABLE_SIZE], float x)
+{
+    x = fminf(fmaxf(0, x), 1);
+    float sampleBoxLeft = x * GAUSSIAN_TABLE_SIZE - .5f;
+    int rightIdx =
+        static_cast<int>(fminf(sampleBoxLeft + 1, GAUSSIAN_TABLE_SIZE - 1));
+    int leftIdx = std::max(rightIdx - 1, 0);
+    float t = fminf(fmaxf(0, sampleBoxLeft - leftIdx), 1);
+    return lerp(table[leftIdx], table[rightIdx], t);
+}
 } // namespace rive::gpu
diff --git a/renderer/src/metal/background_shader_compiler.mm b/renderer/src/metal/background_shader_compiler.mm
index 9235a2e..418fb25 100644
--- a/renderer/src/metal/background_shader_compiler.mm
+++ b/renderer/src/metal/background_shader_compiler.mm
@@ -134,6 +134,7 @@
         switch (drawType)
         {
             case DrawType::midpointFanPatches:
+            case DrawType::midpointFanCenterAAPatches:
             case DrawType::outerCurvePatches:
                 // Add baseInstance to the instanceID for path draws.
                 defines[@GLSL_ENABLE_INSTANCE_INDEX] = @"";
diff --git a/renderer/src/metal/render_context_metal_impl.mm b/renderer/src/metal/render_context_metal_impl.mm
index 8337951..d2fc710 100644
--- a/renderer/src/metal/render_context_metal_impl.mm
+++ b/renderer/src/metal/render_context_metal_impl.mm
@@ -111,32 +111,37 @@
     {
         // Each feature corresponds to a specific index in the namespaceID.
         // These must stay in sync with generate_draw_combinations.py.
-        char namespaceID[] = "0000000";
-        if (drawType == DrawType::interiorTriangulation)
-        {
-            namespaceID[0] = '1';
-        }
+        char namespaceID[] = "00000000";
+        static_assert(sizeof(namespaceID) == gpu::kShaderFeatureCount +
+                                                 1 /*DRAW_INTERIOR_TRIANGLES*/ +
+                                                 1 /*null terminator*/);
         for (size_t i = 0; i < gpu::kShaderFeatureCount; ++i)
         {
             ShaderFeatures feature = static_cast<ShaderFeatures>(1 << i);
             if (shaderFeatures & feature)
             {
-                namespaceID[i + 1] = '1';
+                namespaceID[i] = '1';
             }
             static_assert((int)ShaderFeatures::ENABLE_CLIPPING == 1 << 0);
             static_assert((int)ShaderFeatures::ENABLE_CLIP_RECT == 1 << 1);
             static_assert((int)ShaderFeatures::ENABLE_ADVANCED_BLEND == 1 << 2);
-            static_assert((int)ShaderFeatures::ENABLE_EVEN_ODD == 1 << 3);
+            static_assert((int)ShaderFeatures::ENABLE_FEATHER == 1 << 3);
+            static_assert((int)ShaderFeatures::ENABLE_EVEN_ODD == 1 << 4);
             static_assert((int)ShaderFeatures::ENABLE_NESTED_CLIPPING ==
-                          1 << 4);
-            static_assert((int)ShaderFeatures::ENABLE_HSL_BLEND_MODES ==
                           1 << 5);
+            static_assert((int)ShaderFeatures::ENABLE_HSL_BLEND_MODES ==
+                          1 << 6);
+        }
+        if (drawType == DrawType::interiorTriangulation)
+        {
+            namespaceID[gpu::kShaderFeatureCount] = '1';
         }
 
         char namespacePrefix;
         switch (drawType)
         {
             case DrawType::midpointFanPatches:
+            case DrawType::midpointFanCenterAAPatches:
             case DrawType::outerCurvePatches:
             case DrawType::interiorTriangulation:
                 namespacePrefix =
@@ -448,6 +453,21 @@
 
     m_colorRampPipeline =
         std::make_unique<ColorRampPipeline>(m_gpu, m_plsPrecompiledLibrary);
+
+    MTLTextureDescriptor* desc = [[MTLTextureDescriptor alloc] init];
+    desc.pixelFormat = MTLPixelFormatR16Float;
+    desc.width = gpu::GAUSSIAN_TABLE_SIZE;
+    desc.height = 1;
+    desc.usage = MTLTextureUsageShaderRead;
+    desc.textureType = MTLTextureType2D;
+    desc.mipmapLevelCount = 1;
+    m_featherTexture = [m_gpu newTextureWithDescriptor:desc];
+    [m_featherTexture
+        replaceRegion:MTLRegionMake2D(0, 0, gpu::GAUSSIAN_TABLE_SIZE, 1)
+          mipmapLevel:0
+            withBytes:gpu::g_gaussianIntegralTableF16
+          bytesPerRow:sizeof(gpu::g_gaussianIntegralTableF16)];
+
     m_tessPipeline =
         std::make_unique<TessellatePipeline>(m_gpu, m_plsPrecompiledLibrary);
     m_tessSpanIndexBuffer =
@@ -885,6 +905,7 @@
     [encoder setVertexTexture:m_tessVertexTexture
                       atIndex:TESS_VERTEX_TEXTURE_IDX];
     [encoder setFragmentTexture:m_gradientTexture atIndex:GRAD_TEXTURE_IDX];
+    [encoder setFragmentTexture:m_featherTexture atIndex:FEATHER_TEXTURE_IDX];
     if (flushDesc.pathCount > 0)
     {
         [encoder setVertexBuffer:mtl_buffer(pathBufferRing())
@@ -1215,6 +1236,7 @@
         switch (drawType)
         {
             case DrawType::midpointFanPatches:
+            case DrawType::midpointFanCenterAAPatches:
             case DrawType::outerCurvePatches:
             {
                 // Draw PLS patches that connect the tessellation vertices.
@@ -1229,10 +1251,10 @@
                                  length:sizeof(uint32_t)
                                 atIndex:PATH_BASE_INSTANCE_UNIFORM_BUFFER_IDX];
                 [encoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
-                                    indexCount:PatchIndexCount(drawType)
+                                    indexCount:gpu::PatchIndexCount(drawType)
                                      indexType:MTLIndexTypeUInt16
                                    indexBuffer:m_pathPatchIndexBuffer
-                             indexBufferOffset:PatchBaseIndex(drawType) *
+                             indexBufferOffset:gpu::PatchBaseIndex(drawType) *
                                                sizeof(uint16_t)
                                  instanceCount:batch.elementCount];
                 break;
diff --git a/renderer/src/render_context.cpp b/renderer/src/render_context.cpp
index 3381ec3..1c7b9fa 100644
--- a/renderer/src/render_context.cpp
+++ b/renderer/src/render_context.cpp
@@ -1104,13 +1104,13 @@
         constexpr static int kDrawTypeShift = 45;
         constexpr static int64_t kDrawTypeMask RIVE_MAYBE_UNUSED =
             7llu << kDrawTypeShift;
-        constexpr static int kTextureHashShift = 29;
-        constexpr static int64_t kTextureHashMask = 0xffffllu
+        constexpr static int kTextureHashShift = 30;
+        constexpr static int64_t kTextureHashMask = 0x7fffllu
                                                     << kTextureHashShift;
-        constexpr static int kBlendModeShift = 25;
+        constexpr static int kBlendModeShift = 26;
         constexpr static int kBlendModeMask = 0xf << kBlendModeShift;
         constexpr static int kDrawContentsShift = 17;
-        constexpr static int64_t kDrawContentsMask = 0xffllu
+        constexpr static int64_t kDrawContentsMask = 0x1ffllu
                                                      << kDrawContentsShift;
         constexpr static int kDrawIndexShift = 1;
         constexpr static int64_t kDrawIndexMask = 0x7fff << kDrawIndexShift;
@@ -1748,21 +1748,11 @@
     ++m_currentPathID;
     assert(0 < m_currentPathID && m_currentPathID <= m_ctx->m_maxPathID);
 
-    if (m_flushDesc.interlockMode != gpu::InterlockMode::clockwiseAtomic)
-    {
-        m_ctx->m_pathData.set_back(draw->matrix(),
-                                   draw->strokeRadius(),
-                                   m_currentZIndex);
-    }
-    else
-    {
-
-        m_ctx->m_pathData.set_back(draw->matrix(),
-                                   draw->strokeRadius(),
-                                   m_currentZIndex,
-                                   draw->coverageBufferRange());
-    }
-
+    m_ctx->m_pathData.set_back(draw->matrix(),
+                               draw->strokeRadius(),
+                               draw->featherRadius(),
+                               m_currentZIndex,
+                               draw->coverageBufferRange());
     m_ctx->m_paintData.set_back(draw->drawContents(),
                                 draw->paintType(),
                                 draw->simplePaintValue(),
@@ -1826,16 +1816,17 @@
     assert(m_pathMirroredTessLocation == m_expectedPathMirroredTessEndLocation);
 }
 
-uint32_t RenderContext::LogicalFlush::pushContour(uint32_t pathID,
-                                                  RenderPaintStyle style,
-                                                  Vec2D midpoint,
-                                                  bool closed,
-                                                  uint32_t vertexIndex0)
+uint32_t RenderContext::LogicalFlush::pushContour(
+    uint32_t pathID,
+    gpu::DrawContents drawContents,
+    Vec2D midpoint,
+    bool closed,
+    uint32_t vertexIndex0)
 {
     assert(pathID != 0);
-    assert(style == RenderPaintStyle::stroke || closed);
+    assert((drawContents & gpu::DrawContents::stroke) || closed);
 
-    if (style == RenderPaintStyle::stroke)
+    if (drawContents & gpu::DrawContents::stroke)
     {
         midpoint.x = closed ? 1 : 0;
     }
@@ -1849,7 +1840,7 @@
 }
 
 uint32_t RenderContext::TessellationWriter::pushContour(
-    RenderPaintStyle renderPaintStyle,
+    gpu::DrawContents drawContents,
     Vec2D midpoint,
     bool closed,
     uint32_t paddingVertexCount)
@@ -1861,7 +1852,7 @@
     m_nextCubicPaddingVertexCount = paddingVertexCount;
 
     return m_flush->pushContour(m_pathID,
-                                renderPaintStyle,
+                                drawContents,
                                 midpoint,
                                 closed,
                                 nextVertexIndex());
@@ -2117,7 +2108,9 @@
     assert(instanceCount * kMidpointFanPatchSegmentSpan == tessVertexCount);
 
     pushPathDraw(draw,
-                 gpu::DrawType::midpointFanPatches,
+                 draw->isFeatheredFill()
+                     ? gpu::DrawType::midpointFanCenterAAPatches
+                     : gpu::DrawType::midpointFanPatches,
                  shaderMiscFlags,
                  instanceCount,
                  baseInstance);
@@ -2280,28 +2273,28 @@
                                 vertexCount,
                                 baseVertex);
 
-    if (!(shaderMiscFlags & gpu::ShaderMiscFlags::borrowedCoveragePrepass))
+    auto pathShaderFeatures = gpu::ShaderFeatures::NONE;
+    if (draw->featherRadius() != 0)
     {
-        auto pathShaderFeatures = gpu::ShaderFeatures::NONE;
-        if (draw->isEvenOddFill())
-        {
-            pathShaderFeatures |= ShaderFeatures::ENABLE_EVEN_ODD;
-        }
-        if (draw->paintType() == PaintType::clipUpdate &&
-            draw->simplePaintValue().outerClipID != 0)
-        {
-            pathShaderFeatures |= ShaderFeatures::ENABLE_NESTED_CLIPPING;
-        }
-        batch.shaderFeatures |=
-            pathShaderFeatures & m_ctx->m_frameShaderFeaturesMask;
-        m_combinedShaderFeatures |= batch.shaderFeatures;
+        pathShaderFeatures |= ShaderFeatures::ENABLE_FEATHER;
     }
+    if (draw->isEvenOddFill())
+    {
+        assert(!(shaderMiscFlags & gpu::ShaderMiscFlags::clockwiseFill));
+        pathShaderFeatures |= ShaderFeatures::ENABLE_EVEN_ODD;
+    }
+    if (draw->paintType() == PaintType::clipUpdate &&
+        draw->simplePaintValue().outerClipID != 0)
+    {
+        pathShaderFeatures |= ShaderFeatures::ENABLE_NESTED_CLIPPING;
+    }
+    batch.shaderFeatures |=
+        pathShaderFeatures & m_ctx->m_frameShaderFeaturesMask;
+    m_combinedShaderFeatures |= batch.shaderFeatures;
     assert(
         (batch.shaderFeatures &
          gpu::ShaderFeaturesMaskFor(drawType, m_ctx->frameInterlockMode())) ==
         batch.shaderFeatures);
-    assert(!(shaderMiscFlags & gpu::ShaderMiscFlags::borrowedCoveragePrepass) ||
-           batch.shaderFeatures == gpu::ShaderFeatures::NONE);
     return batch;
 }
 
@@ -2310,6 +2303,11 @@
     gpu::DrawContents batchContents,
     const Draw* draw)
 {
+    // Feathered fills should never attempt to combine with fills, strokes, or
+    // feathered strokes because they use a different DrawType.
+    assert((batchContents & gpu::DrawContents::featheredFill).bits() ==
+           (draw->drawContents() & gpu::DrawContents::featheredFill).bits());
+
     constexpr static auto ANY_FILL = gpu::DrawContents::clockwiseFill |
                                      gpu::DrawContents::evenOddFill |
                                      gpu::DrawContents::nonZeroFill;
@@ -2320,7 +2318,7 @@
         // don't have fills yet.
         (batchContents & ANY_FILL) && (draw->drawContents() & ANY_FILL))
     {
-        assert(!draw->isStroked());
+        assert(!draw->isStroke());
         return (batchContents & gpu::DrawContents::clockwiseFill).bits() ==
                (draw->drawContents() & gpu::DrawContents::clockwiseFill).bits();
     }
@@ -2356,6 +2354,7 @@
     switch (drawType)
     {
         case DrawType::midpointFanPatches:
+        case DrawType::midpointFanCenterAAPatches:
         case DrawType::outerCurvePatches:
         case DrawType::interiorTriangulation:
         case DrawType::stencilClipReset:
@@ -2413,48 +2412,45 @@
     assert(can_combine_draw_images(batch->imageTexture, draw->imageTexture()));
     assert(!batch->needsBarrier);
 
-    if (!(shaderMiscFlags & gpu::ShaderMiscFlags::borrowedCoveragePrepass))
+    auto shaderFeatures = ShaderFeatures::NONE;
+    if (draw->clipID() != 0)
     {
-        auto shaderFeatures = ShaderFeatures::NONE;
-        if (draw->clipID() != 0)
-        {
-            shaderFeatures |= ShaderFeatures::ENABLE_CLIPPING;
-        }
-        if (draw->hasClipRect() && paintType != PaintType::clipUpdate)
-        {
-            shaderFeatures |= ShaderFeatures::ENABLE_CLIP_RECT;
-        }
-        if (paintType != PaintType::clipUpdate)
-        {
-            switch (draw->blendMode())
-            {
-                case BlendMode::hue:
-                case BlendMode::saturation:
-                case BlendMode::color:
-                case BlendMode::luminosity:
-                    shaderFeatures |= ShaderFeatures::ENABLE_HSL_BLEND_MODES;
-                    [[fallthrough]];
-                case BlendMode::screen:
-                case BlendMode::overlay:
-                case BlendMode::darken:
-                case BlendMode::lighten:
-                case BlendMode::colorDodge:
-                case BlendMode::colorBurn:
-                case BlendMode::hardLight:
-                case BlendMode::softLight:
-                case BlendMode::difference:
-                case BlendMode::exclusion:
-                case BlendMode::multiply:
-                    shaderFeatures |= ShaderFeatures::ENABLE_ADVANCED_BLEND;
-                    break;
-                case BlendMode::srcOver:
-                    break;
-            }
-        }
-        batch->shaderFeatures |=
-            shaderFeatures & m_ctx->m_frameShaderFeaturesMask;
-        m_combinedShaderFeatures |= batch->shaderFeatures;
+        shaderFeatures |= ShaderFeatures::ENABLE_CLIPPING;
     }
+    if (draw->hasClipRect() && paintType != PaintType::clipUpdate)
+    {
+        shaderFeatures |= ShaderFeatures::ENABLE_CLIP_RECT;
+    }
+    if (paintType != PaintType::clipUpdate &&
+        !(shaderMiscFlags & gpu::ShaderMiscFlags::borrowedCoveragePrepass))
+    {
+        switch (draw->blendMode())
+        {
+            case BlendMode::hue:
+            case BlendMode::saturation:
+            case BlendMode::color:
+            case BlendMode::luminosity:
+                shaderFeatures |= ShaderFeatures::ENABLE_HSL_BLEND_MODES;
+                [[fallthrough]];
+            case BlendMode::screen:
+            case BlendMode::overlay:
+            case BlendMode::darken:
+            case BlendMode::lighten:
+            case BlendMode::colorDodge:
+            case BlendMode::colorBurn:
+            case BlendMode::hardLight:
+            case BlendMode::softLight:
+            case BlendMode::difference:
+            case BlendMode::exclusion:
+            case BlendMode::multiply:
+                shaderFeatures |= ShaderFeatures::ENABLE_ADVANCED_BLEND;
+                break;
+            case BlendMode::srcOver:
+                break;
+        }
+    }
+    batch->shaderFeatures |= shaderFeatures & m_ctx->m_frameShaderFeaturesMask;
+    m_combinedShaderFeatures |= batch->shaderFeatures;
     assert(
         (batch->shaderFeatures &
          gpu::ShaderFeaturesMaskFor(drawType, m_ctx->frameInterlockMode())) ==
diff --git a/renderer/src/rive_render_paint.cpp b/renderer/src/rive_render_paint.cpp
index a668b1d..e78d281 100644
--- a/renderer/src/rive_render_paint.cpp
+++ b/renderer/src/rive_render_paint.cpp
@@ -3,7 +3,6 @@
  */
 
 #include "rive_render_paint.hpp"
-#include "rive/renderer/rive_render_image.hpp"
 #include "gradient.hpp"
 
 namespace rive
diff --git a/renderer/src/rive_render_paint.hpp b/renderer/src/rive_render_paint.hpp
index 71e472b..bc82693 100644
--- a/renderer/src/rive_render_paint.hpp
+++ b/renderer/src/rive_render_paint.hpp
@@ -4,8 +4,9 @@
 
 #pragma once
 
-#include "rive/renderer/gpu.hpp"
 #include "rive/renderer.hpp"
+#include "rive/renderer/gpu.hpp"
+#include "rive/renderer/texture.hpp"
 
 namespace rive::gpu
 {
@@ -29,6 +30,7 @@
     void thickness(float thickness) override { m_thickness = fabsf(thickness); }
     void join(StrokeJoin join) override { m_join = join; }
     void cap(StrokeCap cap) override { m_cap = cap; }
+    void feather(float feather) override { m_feather = fabsf(feather); }
     void blendMode(BlendMode mode) override { m_blendMode = mode; }
     void shader(rcp<RenderShader> shader) override;
     void image(rcp<const gpu::Texture>, float opacity);
@@ -38,13 +40,22 @@
     gpu::PaintType getType() const { return m_paintType; }
     bool getIsStroked() const { return m_stroked; }
     ColorInt getColor() const { return m_simpleValue.color; }
-    float getThickness() const { return m_thickness; }
     const gpu::Gradient* getGradient() const { return m_gradient.get(); }
     const gpu::Texture* getImageTexture() const { return m_imageTexture.get(); }
     float getImageOpacity() const { return m_simpleValue.imageOpacity; }
     float getOuterClipID() const { return m_simpleValue.outerClipID; }
-    StrokeJoin getJoin() const { return m_join; }
-    StrokeCap getCap() const { return m_cap; }
+    float getThickness() const { return m_thickness; }
+    StrokeJoin getJoin() const
+    {
+        // Feathers ignore the join and always use round.
+        return m_feather != 0 ? StrokeJoin::round : m_join;
+    }
+    StrokeCap getCap() const
+    {
+        // Feathers ignore the cap and always use round.
+        return m_feather != .0 ? StrokeCap::round : m_cap;
+    }
+    float getFeather() const { return m_feather; }
     BlendMode getBlendMode() const { return m_blendMode; }
     gpu::SimplePaintValue getSimpleValue() const { return m_simpleValue; }
     bool getIsOpaque() const;
@@ -57,6 +68,7 @@
     float m_thickness = 1;
     StrokeJoin m_join = StrokeJoin::miter;
     StrokeCap m_cap = StrokeCap::butt;
+    float m_feather = 0;
     BlendMode m_blendMode = BlendMode::srcOver;
     bool m_stroked = false;
 };
diff --git a/renderer/src/rive_render_path.cpp b/renderer/src/rive_render_path.cpp
index 43c6cd8..424a3b6 100644
--- a/renderer/src/rive_render_path.cpp
+++ b/renderer/src/rive_render_path.cpp
@@ -7,12 +7,14 @@
 #include "rive/math/bezier_utils.hpp"
 #include "rive/math/simd.hpp"
 #include "rive/math/wangs_formula.hpp"
+#include "shaders/constants.glsl"
 
 namespace rive
 {
 
 RiveRenderPath::RiveRenderPath(FillRule fillRule, RawPath& rawPath)
 {
+    m_fillRule = fillRule;
     m_rawPath.swap(rawPath);
     m_rawPath.pruneEmptySegments();
 }
@@ -180,6 +182,150 @@
     return m_rawPathMutationID;
 }
 
+// When a blurred shape curves away from the convolution matrix, the curvature
+// makes the blur softer, which does not happen naturally in feathering.
+//
+// To simulate the softening effect from curving away, we flatten curves
+// proportionaly to curvature. This works really well for gaussian feathers, but
+// we may also split the curve and recurse if there is enough flattening to
+// become noticeable.
+//
+// TODO: Move this work to the GPU.
+static void add_softened_cubic_for_feathering(RawPath* featheredPath,
+                                              const Vec2D p[4],
+                                              float feather,
+                                              float matrixMaxScale,
+                                              int maxDepth = 3)
+{
+    float2 p0 = simd::load2f(p), p1 = simd::load2f(p + 1),
+           p2 = simd::load2f(p + 2), p3 = simd::load2f(p + 3);
+    math::CubicCoeffs coeffs(p);
+
+    // Find the point of maximum height on the cubic.
+    float maxHeightT;
+    float height = math::find_cubic_max_height(p, &maxHeightT);
+
+    // Measure curvature across one standard deviation of the feather.
+    // ("feather" is 2 std devs.)
+    float desiredSpread = feather * .5f;
+
+    // The feather gets dimmer with curvature. Find a dimming factor based on
+    // the strength of curvature at maximum height.
+    float theta = math::measure_cubic_local_curvature(p,
+                                                      coeffs,
+                                                      maxHeightT,
+                                                      desiredSpread);
+    float dimming = 1 - theta * (1 / math::PI);
+
+    // Always dim a little bit in order to avoid artifacts on tight cusps.
+    // FIXME: This is unfortunate. There must be a better way to handle cusps.
+    dimming = fminf(dimming, .925f);
+
+    // Find a new height such that the center of the feather (currently 50%
+    // opacity) is reduced to "50% * dimming".
+    float desiredOpacityOnCenter = .5f * dimming;
+    float x = gpu::inverse_gaussian_integral(desiredOpacityOnCenter) - .5f;
+    float newHeight = height + feather * FEATHER_TEXTURE_STDDEVS * x;
+
+    if (maxDepth > 0 && (height - newHeight) * matrixMaxScale > 8)
+    {
+        // The curve would be flattened too much. Chop at max height and
+        // recurse.
+        Vec2D pp[7];
+        math::chop_cubic_at(p, pp, maxHeightT);
+        add_softened_cubic_for_feathering(featheredPath,
+                                          pp,
+                                          feather,
+                                          matrixMaxScale,
+                                          maxDepth - 1);
+        add_softened_cubic_for_feathering(featheredPath,
+                                          pp + 3,
+                                          feather,
+                                          matrixMaxScale,
+                                          maxDepth - 1);
+        return;
+    }
+
+    // Flatten the curve down to "newHeight". (Height scales linearly as we lerp
+    // the control points to "flatLinePoints".)
+    float4 flatLinePoints =
+        simd::mix(p0.xyxy, p3.xyxy, float4{1.f / 3, 1.f / 3, 2.f / 3, 2.f / 3});
+    float softness = height != 0 ? 1 - newHeight / height : 1;
+    // Do the "min" first so softness is 1 if anything went NaN.
+    softness = fmaxf(0, fminf(softness, 1));
+    assert(softness >= 0 && softness <= 1);
+    float4 softenedPoints = simd::unchecked_mix(simd::join(p1, p2),
+                                                flatLinePoints,
+                                                float4(softness));
+    featheredPath->cubic(math::bit_cast<Vec2D>(softenedPoints.xy),
+                         math::bit_cast<Vec2D>(softenedPoints.zw),
+                         p[3]);
+}
+
+rcp<RiveRenderPath> RiveRenderPath::makeSoftenedCopyForFeathering(
+    float feather,
+    float matrixMaxScale)
+{
+    RawPath featheredPath;
+    // Reserve a generous amount of space upfront so we hopefully don't have to
+    // reallocate -- enough for each verb to be chopped 4 times.
+    featheredPath.reserve(m_rawPath.verbs().size() * 4,
+                          m_rawPath.points().size() * 4);
+    for (auto [verb, pts] : m_rawPath)
+    {
+        switch (verb)
+        {
+            case PathVerb::move:
+                featheredPath.move(pts[0]);
+                break;
+            case PathVerb::line:
+                featheredPath.line(pts[1]);
+                break;
+            case PathVerb::cubic:
+            {
+                // Start by chopping all cubics so they are convex and rotate no
+                // more than 90 degrees. The stroke algorithm requires them not
+                // to have inflections
+                float T[4];
+                Vec2D chops[(std::size(T) + 1) * 3 + 1]; // 4 chops will produce
+                                                         // 16 cubic vertices.
+                bool areCusps;
+                // A generous cusp padding looks better empirically.
+                constexpr static float CUSP_PADDING = 1e-2f;
+                int n = math::find_cubic_convex_90_chops(pts,
+                                                         T,
+                                                         CUSP_PADDING,
+                                                         &areCusps);
+                math::chop_cubic_at(pts, chops, T, n);
+                Vec2D* p = chops;
+                for (int i = 0; i <= n; ++i, p += 3)
+                {
+                    if (areCusps && (i & 1))
+                    {
+                        // If the chops are straddling cusps, odd-numbered chops
+                        // are the ones that pass through a cusp.
+                        featheredPath.line(p[3]);
+                    }
+                    else
+                    {
+                        add_softened_cubic_for_feathering(&featheredPath,
+                                                          p,
+                                                          feather,
+                                                          matrixMaxScale);
+                    }
+                }
+                break;
+            }
+            case PathVerb::close:
+                featheredPath.close();
+                break;
+            case PathVerb::quad:
+                RIVE_UNREACHABLE();
+        }
+    }
+    return make_rcp<RiveRenderPath>(m_fillRule, featheredPath);
+}
+
 void RiveRenderPath::setDrawCache(gpu::RiveRenderPathDraw* drawCache,
                                   const Mat2D& mat,
                                   rive::RiveRenderPaint* riveRenderPaint) const
@@ -201,6 +347,7 @@
         m_cachedJoin = riveRenderPaint->getJoin();
         m_cachedCap = riveRenderPaint->getCap();
     }
+    m_cachedFeather = riveRenderPaint->getFeather();
 }
 
 gpu::DrawUniquePtr RiveRenderPath::getDrawCache(
@@ -237,6 +384,11 @@
         }
     }
 
+    if (m_cachedFeather != paint->getFeather())
+    {
+        return nullptr;
+    }
+
     if (matrix.xx() != cache.xx || matrix.xy() != cache.xy ||
         matrix.yx() != cache.yx || matrix.yy() != cache.yy)
     {
diff --git a/renderer/src/rive_render_path.hpp b/renderer/src/rive_render_path.hpp
index b75eb94..92573bc 100644
--- a/renderer/src/rive_render_path.hpp
+++ b/renderer/src/rive_render_path.hpp
@@ -60,6 +60,15 @@
     bool isClockwiseDominant(const Mat2D& viewMatrix) const;
     uint64_t getRawPathMutationID() const;
 
+    // Feathering does not always look like a blur when there is strong
+    // curvature. This method returns a copy of the path with flatter curves
+    // that will more accurately depict a gaussian blur when drawn with the
+    // given feather.
+    //
+    // TODO: Move this work to the GPU.
+    rcp<RiveRenderPath> makeSoftenedCopyForFeathering(float feather,
+                                                      float matrixMaxScale);
+
 #ifdef DEBUG
     // Allows ref holders to guarantee the rawPath doesn't mutate during a
     // specific time.
@@ -131,5 +140,6 @@
     mutable float m_cachedThickness;
     mutable StrokeJoin m_cachedJoin;
     mutable StrokeCap m_cachedCap;
+    mutable float m_cachedFeather;
 };
 } // namespace rive
diff --git a/renderer/src/rive_renderer.cpp b/renderer/src/rive_renderer.cpp
index cfa019d..69125c2 100644
--- a/renderer/src/rive_renderer.cpp
+++ b/renderer/src/rive_renderer.cpp
@@ -9,7 +9,6 @@
 #include "rive/math/math_types.hpp"
 #include "rive/math/simd.hpp"
 #include "rive/renderer/rive_render_image.hpp"
-#include "shaders/constants.glsl"
 
 namespace rive
 {
@@ -117,18 +116,23 @@
         return;
     }
 
-    bool stroked = paint->getIsStroked();
-    if (stroked && m_context->frameDescriptor().strokesDisabled)
+    if (paint->getIsStroked() && m_context->frameDescriptor().strokesDisabled)
     {
         return;
     }
-    if (!stroked && m_context->frameDescriptor().fillsDisabled)
+    if (!paint->getIsStroked() && m_context->frameDescriptor().fillsDisabled)
     {
         return;
     }
-    if (stroked && !(paint->getThickness() >
-                     0)) // Use inverse logic to ensure we abort when stroke
-    {                    // thickness is NaN.
+    if (paint->getIsStroked() &&
+        // Use inverse logic to ensure we abort when stroke thickness is NaN.
+        !(paint->getThickness() > 0))
+    {
+        return;
+    }
+    // Use inverse logic to ensure we abort when stroke thickness is NaN.
+    if (!(paint->getFeather() >= 0))
+    {
         return;
     }
     if (m_stack.back().clipIsEmpty)
@@ -136,6 +140,29 @@
         return;
     }
 
+    if (paint->getFeather() != 0 && !paint->getIsStroked())
+    {
+        if (path->getFillRule() != FillRule::clockwise &&
+            !m_context->frameDescriptor().clockwiseFillOverride)
+        {
+            // Don't draw feathered fills that aren't clockwise.
+            return;
+        }
+        float matrixMaxScale = m_stack.back().matrix.findMaxScale();
+        if (paint->getFeather() * matrixMaxScale > 1)
+        {
+            clipAndPushDraw(gpu::RiveRenderPathDraw::Make(
+                m_context,
+                m_stack.back().matrix,
+                path->makeSoftenedCopyForFeathering(paint->getFeather(),
+                                                    matrixMaxScale),
+                path->getFillRule(),
+                paint,
+                &m_scratchPath));
+            return;
+        }
+    }
+
     gpu::DrawUniquePtr cacheDraw =
         path->getDrawCache(m_stack.back().matrix,
                            paint,
diff --git a/renderer/src/shaders/atomic_draw.glsl b/renderer/src/shaders/atomic_draw.glsl
index d499ff4..7386c77 100644
--- a/renderer/src/shaders/atomic_draw.glsl
+++ b/renderer/src/shaders/atomic_draw.glsl
@@ -284,12 +284,18 @@
 #ifdef @FRAGMENT
 FRAG_TEXTURE_BLOCK_BEGIN
 TEXTURE_RGBA8(PER_FLUSH_BINDINGS_SET, GRAD_TEXTURE_IDX, @gradTexture);
+#ifdef @ENABLE_FEATHER
+TEXTURE_R16F(PER_FLUSH_BINDINGS_SET, FEATHER_TEXTURE_IDX, @featherTexture);
+#endif
 #ifdef NEEDS_IMAGE_TEXTURE
 TEXTURE_RGBA8(PER_DRAW_BINDINGS_SET, IMAGE_TEXTURE_IDX, @imageTexture);
 #endif
 FRAG_TEXTURE_BLOCK_END
 
 SAMPLER_LINEAR(GRAD_TEXTURE_IDX, gradSampler)
+#ifdef @ENABLE_FEATHER
+SAMPLER_LINEAR(FEATHER_TEXTURE_IDX, featherSampler)
+#endif
 #ifdef NEEDS_IMAGE_TEXTURE
 SAMPLER_MIPMAP(IMAGE_TEXTURE_IDX, imageSampler)
 #endif
@@ -343,14 +349,14 @@
 
 INLINE uint to_fixed(float x)
 {
-    return uint(x * FIXED_COVERAGE_FACTOR + FIXED_COVERAGE_ZERO);
+    return uint(round(x * FIXED_COVERAGE_PRECISION + FIXED_COVERAGE_ZERO));
 }
 
 INLINE half from_fixed(uint x)
 {
     return cast_float_to_half(
-        float(x) * FIXED_COVERAGE_INVERSE_FACTOR +
-        (-FIXED_COVERAGE_ZERO * FIXED_COVERAGE_INVERSE_FACTOR));
+        float(x) * FIXED_COVERAGE_INVERSE_PRECISION +
+        (-FIXED_COVERAGE_ZERO * FIXED_COVERAGE_INVERSE_PRECISION));
 }
 
 #ifdef @ENABLE_CLIPPING
@@ -537,8 +543,27 @@
     VARYING_UNPACK(v_edgeDistance, half2);
     VARYING_UNPACK(v_pathID, ushort);
 
-    half fragmentCoverage =
-        min(min(v_edgeDistance.x, abs(v_edgeDistance.y)), make_half(1.));
+    half fragmentCoverage;
+#ifdef @ENABLE_FEATHER
+    if (@ENABLE_FEATHER && is_feathered_stroke(v_edgeDistance))
+    {
+        fragmentCoverage = feathered_stroke_coverage(
+            v_edgeDistance,
+            SAMPLED_R16F(@featherTexture, featherSampler));
+    }
+    else if (@ENABLE_FEATHER && is_feathered_fill(v_edgeDistance))
+    {
+        fragmentCoverage = feathered_fill_coverage(
+            v_edgeDistance,
+            SAMPLED_R16F(@featherTexture, featherSampler));
+    }
+    else
+#endif
+    {
+        // Cover stroke and fill both in a branchless expression.
+        fragmentCoverage =
+            min(min(v_edgeDistance.x, abs(v_edgeDistance.y)), make_half(1.));
+    }
 
     // Since v_pathID increases monotonically with every draw, and since it
     // lives in the most significant bits of the coverage data, an atomic max()
@@ -548,21 +573,22 @@
     //      invocation to hit the new path, and the one that should resolve the
     //      previous path in the framebuffer.
     //    * Properly resets coverage to zero when we do cross over into
-    //    processing
-    //      a new path.
+    //      processing a new path.
     //    * Accumulates coverage for strokes.
     //
     uint fixedCoverage = to_fixed(fragmentCoverage);
-    uint minCoverageData = (make_uint(v_pathID) << 16) | fixedCoverage;
+    uint minCoverageData =
+        (make_uint(v_pathID) << FIXED_COVERAGE_BIT_COUNT) | fixedCoverage;
     uint lastCoverageData =
         PLS_ATOMIC_MAX(coverageAtomicBuffer, minCoverageData);
-    ushort lastPathID = cast_uint_to_ushort(lastCoverageData >> 16);
+    ushort lastPathID =
+        cast_uint_to_ushort(lastCoverageData >> FIXED_COVERAGE_BIT_COUNT);
     if (lastPathID == v_pathID)
     {
         // This is not the first fragment of the current path to touch this
         // pixel. We already resolved the previous path, so just update coverage
         // (if we're a fill) and move on.
-        if (v_edgeDistance.y < .0 /*fill?*/)
+        if (!is_stroke(v_edgeDistance))
         {
             // Only apply the effect of the min() the first time we cross into a
             // path.
@@ -578,7 +604,7 @@
 
     // We crossed into a new path! Resolve the previous path now that we know
     // its exact coverage.
-    half coverageCount = from_fixed(lastCoverageData & 0xffffu);
+    half coverageCount = from_fixed(lastCoverageData & FIXED_COVERAGE_MASK);
     half4 fragColorOut;
 #ifdef @ENABLE_CLIPPING
     CLIP_VALUE_TYPE fragClipOut = MAKE_NON_UPDATING_CLIP_VALUE;
@@ -612,16 +638,19 @@
     VARYING_UNPACK(v_pathID, ushort);
 
     uint lastCoverageData = PLS_LOADUI_ATOMIC(coverageAtomicBuffer);
-    ushort lastPathID = cast_uint_to_ushort(lastCoverageData >> 16);
+    ushort lastPathID =
+        cast_uint_to_ushort(lastCoverageData >> FIXED_COVERAGE_BIT_COUNT);
 
     // Update coverageAtomicBuffer with the coverage weight of the current
     // triangle. This does not need to be atomic since interior triangles don't
     // overlap.
-    int coverageDeltaFixed = int(v_windingWeight * FIXED_COVERAGE_FACTOR);
+    int coverageDeltaFixed =
+        int(round(v_windingWeight * FIXED_COVERAGE_PRECISION));
     uint currPathCoverageData =
         lastPathID == v_pathID
             ? lastCoverageData
-            : (make_uint(v_pathID) << 16) + FIXED_COVERAGE_ZERO_UINT;
+            : (make_uint(v_pathID) << FIXED_COVERAGE_BIT_COUNT) +
+                  FIXED_COVERAGE_ZERO_UINT;
     PLS_STOREUI_ATOMIC(coverageAtomicBuffer,
                        currPathCoverageData + uint(coverageDeltaFixed));
 
@@ -634,7 +663,7 @@
 
     // We crossed into a new path! Resolve the previous path now that we know
     // its exact coverage.
-    half lastCoverageCount = from_fixed(lastCoverageData & 0xffffu);
+    half lastCoverageCount = from_fixed(lastCoverageData & FIXED_COVERAGE_MASK);
     half4 fragColorOut;
 #ifdef @ENABLE_CLIPPING
     CLIP_VALUE_TYPE fragClipOut = MAKE_NON_UPDATING_CLIP_VALUE;
@@ -692,8 +721,9 @@
 
     // Resolve the previous path.
     uint lastCoverageData = PLS_LOADUI_ATOMIC(coverageAtomicBuffer);
-    ushort lastPathID = cast_uint_to_ushort(lastCoverageData >> 16);
-    half lastCoverageCount = from_fixed(lastCoverageData & 0xffffu);
+    ushort lastPathID =
+        cast_uint_to_ushort(lastCoverageData >> FIXED_COVERAGE_BIT_COUNT);
+    half lastCoverageCount = from_fixed(lastCoverageData & FIXED_COVERAGE_MASK);
     half4 fragColorOut;
 #ifdef @ENABLE_CLIPPING
     CLIP_VALUE_TYPE fragClipOut = MAKE_NON_UPDATING_CLIP_VALUE;
@@ -810,8 +840,9 @@
 #endif
 {
     uint lastCoverageData = PLS_LOADUI_ATOMIC(coverageAtomicBuffer);
-    half coverageCount = from_fixed(lastCoverageData & 0xffffu);
-    ushort lastPathID = cast_uint_to_ushort(lastCoverageData >> 16);
+    half coverageCount = from_fixed(lastCoverageData & FIXED_COVERAGE_MASK);
+    ushort lastPathID =
+        cast_uint_to_ushort(lastCoverageData >> FIXED_COVERAGE_BIT_COUNT);
     half4 fragColorOut;
     resolve_paint(lastPathID,
                   coverageCount,
diff --git a/renderer/src/shaders/constants.glsl b/renderer/src/shaders/constants.glsl
index 494da11..62c83d1 100644
--- a/renderer/src/shaders/constants.glsl
+++ b/renderer/src/shaders/constants.glsl
@@ -8,6 +8,11 @@
 #define GRAD_TEXTURE_WIDTH float(512)
 #define GRAD_TEXTURE_INVERSE_WIDTH float(0.001953125)
 
+// Number of standard deviations on either side of the middle of the feather
+// texture. The feather texture integrates the normal distribution from
+// -FEATHER_TEXTURE_STDDEVS to +FEATHER_TEXTURE_STDDEVS in the domain x=0..1.
+#define FEATHER_TEXTURE_STDDEVS float(3)
+
 // Width to use for a texture that emulates a storage buffer.
 //
 // Minimize width since the texture needs to be updated in entire rows from the
@@ -43,26 +48,30 @@
 #define CULL_EXCESS_TESSELLATION_SEGMENTS_CONTOUR_FLAG (1u << 30u)
 
 // Flags for specifying the join type.
-#define JOIN_TYPE_MASK (3u << 28u)
-#define MITER_CLIP_JOIN_CONTOUR_FLAG (3u << 28u)
-#define MITER_REVERT_JOIN_CONTOUR_FLAG (2u << 28u)
-#define BEVEL_JOIN_CONTOUR_FLAG (1u << 28u)
+#define JOIN_TYPE_MASK (7u << 27u)
+#define MITER_CLIP_JOIN_CONTOUR_FLAG (5u << 27u)
+#define MITER_REVERT_JOIN_CONTOUR_FLAG (4u << 27u)
+#define BEVEL_JOIN_CONTOUR_FLAG (3u << 27u)
+#define ROUND_JOIN_CONTOUR_FLAG (2u << 27u)
+#define FEATHER_JOIN_CONTOUR_FLAG (1u << 27u)
 
 // When a join is being used to emulate a stroke cap, the shader emits
 // additional vertices at T=0 and T=1 for round joins, and changes the miter
 // limit to 1 for miter-clip joins.
-#define EMULATED_STROKE_CAP_CONTOUR_FLAG (1u << 27u)
+#define EMULATED_STROKE_CAP_CONTOUR_FLAG (1u << 26u)
 
 // Flip the sign on interpolated fragment coverage for fills. Ignored on
 // strokes. This is used when reversing the winding direction of a path.
-#define NEGATE_PATH_FILL_COVERAGE_FLAG (1u << 26u)
+#define NEGATE_PATH_FILL_COVERAGE_FLAG (1u << 25u)
 
 // Internal contour flags.
-#define MIRRORED_CONTOUR_CONTOUR_FLAG (1u << 25u)
-#define JOIN_TANGENT_0_CONTOUR_FLAG (1u << 24u)
-#define JOIN_TANGENT_INNER_CONTOUR_FLAG (1u << 23u)
-#define LEFT_JOIN_CONTOUR_FLAG (1u << 22u)
-#define RIGHT_JOIN_CONTOUR_FLAG (1u << 21u)
+#define MIRRORED_CONTOUR_CONTOUR_FLAG (1u << 24u)
+// Degenerate outsets are used to implement discontinuities in feather joins.
+#define ZERO_FEATHER_OUTSET_CONTOUR_FLAG (1u << 23u)
+#define JOIN_TANGENT_0_CONTOUR_FLAG (1u << 22u)
+#define JOIN_TANGENT_INNER_CONTOUR_FLAG (1u << 21u)
+#define LEFT_JOIN_CONTOUR_FLAG (1u << 20u)
+#define RIGHT_JOIN_CONTOUR_FLAG (1u << 19u)
 #define CONTOUR_ID_MASK 0xffffu
 
 // Says which part of the patch a vertex belongs to.
@@ -95,18 +104,19 @@
 // Index at which we access each resource.
 #define TESS_VERTEX_TEXTURE_IDX 0
 #define GRAD_TEXTURE_IDX 1
-#define IMAGE_TEXTURE_IDX 2
-#define PATH_BUFFER_IDX 3
-#define PAINT_BUFFER_IDX 4
-#define PAINT_AUX_BUFFER_IDX 5
-#define CONTOUR_BUFFER_IDX 6
-#define FLUSH_UNIFORM_BUFFER_IDX 7
-#define PATH_BASE_INSTANCE_UNIFORM_BUFFER_IDX 8
-#define IMAGE_DRAW_UNIFORM_BUFFER_IDX 9
+#define FEATHER_TEXTURE_IDX 2
+#define IMAGE_TEXTURE_IDX 3
+#define PATH_BUFFER_IDX 4
+#define PAINT_BUFFER_IDX 5
+#define PAINT_AUX_BUFFER_IDX 6
+#define CONTOUR_BUFFER_IDX 7
+#define FLUSH_UNIFORM_BUFFER_IDX 8
+#define PATH_BASE_INSTANCE_UNIFORM_BUFFER_IDX 9
+#define IMAGE_DRAW_UNIFORM_BUFFER_IDX 10
 // Coverage buffer used in coverageAtomic mode.
-#define COVERAGE_BUFFER_IDX 10
-#define DST_COLOR_TEXTURE_IDX 11
-#define DEFAULT_BINDINGS_SET_SIZE 12
+#define COVERAGE_BUFFER_IDX 11
+#define DST_COLOR_TEXTURE_IDX 12
+#define DEFAULT_BINDINGS_SET_SIZE 13
 
 // Samplers are accessed at the same index as their corresponding texture, so we
 // put them in a separate binding set.
@@ -124,7 +134,9 @@
 #define SCRATCH_COLOR_PLANE_IDX 2
 #define COVERAGE_PLANE_IDX 3
 
-// acos(1/4), because the miter limit is always 4.
+// Rive has a hard-coded miter limit of 4 in the editor and all runtimes.
+#define RIVE_MITER_LIMIT float(4)
+// acos(1/4), because the miter limit is 4.
 #define MITER_ANGLE_LIMIT float(1.318116071652817965746)
 
 // Raw bit representation of the largest denormalized fp16 value. We offset all
@@ -152,21 +164,26 @@
 #define BLEND_MODE_LUMINOSITY 15u
 
 // Fixed-point coverage values for atomic mode.
-// Atomic mode uses 7:9 fixed point, so the winding number breaks if a shape has
-// more than 64 levels of self overlap in either winding direction at any point.
-#define FIXED_COVERAGE_FACTOR float(512)
-#define FIXED_COVERAGE_INVERSE_FACTOR float(0.001953125)
-#define FIXED_COVERAGE_ZERO float(1 << 15)
-#define FIXED_COVERAGE_ZERO_UINT (1u << 15)
-#define FIXED_COVERAGE_ONE (FIXED_COVERAGE_FACTOR + FIXED_COVERAGE_ZERO)
+// Atomic mode uses 6:11 fixed point, so the winding number breaks if a shape
+// has more than 32 levels of self overlap in either winding direction at any
+// point.
+#define FIXED_COVERAGE_PRECISION float(2048)
+#define FIXED_COVERAGE_INVERSE_PRECISION float(0.00048828125)
+#define FIXED_COVERAGE_ZERO float(1 << 16)
+#define FIXED_COVERAGE_ZERO_UINT (1u << 16)
+#define FIXED_COVERAGE_ONE (FIXED_COVERAGE_PRECISION + FIXED_COVERAGE_ZERO)
+#define FIXED_COVERAGE_BIT_COUNT 17u
+#define FIXED_COVERAGE_MASK 0x1ffffu
 
 // Fixed-point coverage values for clockwiseAtomic mode.
-// clockwiseAtomic mode uses 5:8 fixed point, so the winding number breaks if a
-// shape has more than 16 levels of self overlap in either winding direction at
+// clockwiseAtomic mode uses 6:11 fixed point, so the winding number breaks if a
+// shape has more than 32 levels of self overlap in either winding direction at
 // any point.
-#define CLOCKWISE_COVERAGE_BIT_COUNT 15u
-#define CLOCKWISE_COVERAGE_MASK (0x7fffu)
-#define CLOCKWISE_FILL_ZERO_VALUE (1u << 14)
+#define CLOCKWISE_COVERAGE_BIT_COUNT 17u
+#define CLOCKWISE_COVERAGE_MASK 0x1ffffu
+#define CLOCKWISE_COVERAGE_PRECISION float(2048)
+#define CLOCKWISE_COVERAGE_INVERSE_PRECISION float(0.00048828125)
+#define CLOCKWISE_FILL_ZERO_VALUE (1u << 16)
 
 // Binding points for storage buffers.
 #define PAINT_STORAGE_BUFFER_IDX 8
@@ -180,9 +197,10 @@
 #define CLIPPING_SPECIALIZATION_IDX 0
 #define CLIP_RECT_SPECIALIZATION_IDX 1
 #define ADVANCED_BLEND_SPECIALIZATION_IDX 2
-#define EVEN_ODD_SPECIALIZATION_IDX 3
-#define NESTED_CLIPPING_SPECIALIZATION_IDX 4
-#define HSL_BLEND_MODES_SPECIALIZATION_IDX 5
-#define CLOCKWISE_FILL_SPECIALIZATION_IDX 6
-#define BORROWED_COVERAGE_PREPASS_SPECIALIZATION_IDX 7
-#define SPECIALIZATION_COUNT 8
+#define FEATHER_SPECIALIZATION_IDX 3
+#define EVEN_ODD_SPECIALIZATION_IDX 4
+#define NESTED_CLIPPING_SPECIALIZATION_IDX 5
+#define HSL_BLEND_MODES_SPECIALIZATION_IDX 6
+#define CLOCKWISE_FILL_SPECIALIZATION_IDX 7
+#define BORROWED_COVERAGE_PREPASS_SPECIALIZATION_IDX 8
+#define SPECIALIZATION_COUNT 9
diff --git a/renderer/src/shaders/draw_clockwise_path.glsl b/renderer/src/shaders/draw_clockwise_path.glsl
index eb08f83..591dd3a 100644
--- a/renderer/src/shaders/draw_clockwise_path.glsl
+++ b/renderer/src/shaders/draw_clockwise_path.glsl
@@ -40,7 +40,7 @@
                                        v_edgeDistance VERTEX_CONTEXT_UNPACK))
     {
         uint4 coverageData =
-            STORAGE_BUFFER_LOAD4(@pathBuffer, pathID * 4u + 2u);
+            STORAGE_BUFFER_LOAD4(@pathBuffer, pathID * 4u + 3u);
         v_pathID = pathID;
         v_coveragePlacement = coverageData.xy;
         v_coverageCoord = vertexPosition + uintBitsToFloat(coverageData.zw);
@@ -92,7 +92,7 @@
         unpack_interior_triangle_vertex(@a_triangleVertex,
                                         pathID,
                                         v_windingWeight VERTEX_CONTEXT_UNPACK);
-    uint4 coverageData = STORAGE_BUFFER_LOAD4(@pathBuffer, pathID * 4u + 2u);
+    uint4 coverageData = STORAGE_BUFFER_LOAD4(@pathBuffer, pathID * 4u + 3u);
     v_pathID = pathID;
     v_coveragePlacement = coverageData.xy;
     v_coverageCoord = vertexPosition + uintBitsToFloat(coverageData.zw);
@@ -110,10 +110,16 @@
 #ifdef @FRAGMENT
 FRAG_TEXTURE_BLOCK_BEGIN
 TEXTURE_RGBA8(PER_FLUSH_BINDINGS_SET, GRAD_TEXTURE_IDX, @gradTexture);
+#ifdef @ENABLE_FEATHER
+TEXTURE_R16F(PER_FLUSH_BINDINGS_SET, FEATHER_TEXTURE_IDX, @featherTexture);
+#endif
 TEXTURE_RGBA8(PER_DRAW_BINDINGS_SET, IMAGE_TEXTURE_IDX, @imageTexture);
 FRAG_TEXTURE_BLOCK_END
 
 SAMPLER_LINEAR(GRAD_TEXTURE_IDX, gradSampler)
+#ifdef @ENABLE_FEATHER
+SAMPLER_LINEAR(FEATHER_TEXTURE_IDX, featherSampler)
+#endif
 SAMPLER_MIPMAP(IMAGE_TEXTURE_IDX, imageSampler)
 
 FRAG_STORAGE_BUFFER_BLOCK_BEGIN
@@ -127,7 +133,8 @@
 {
     // Try to apply borrowedCoverage, assuming the existing coverage value
     // is zero.
-    uint borrowedCoverageFixed = uint(abs(borrowedCoverage) * 255.);
+    uint borrowedCoverageFixed =
+        uint(abs(borrowedCoverage) * CLOCKWISE_COVERAGE_PRECISION + .5);
     uint targetCoverageValue =
         uniforms.coverageBufferPrefix |
         (CLOCKWISE_FILL_ZERO_VALUE - borrowedCoverageFixed);
@@ -159,7 +166,8 @@
     }
 
     half X;
-    uint fragCoverageFixed = uint(abs(fragCoverage) * 255.);
+    uint fragCoverageFixed =
+        uint(abs(fragCoverage) * CLOCKWISE_COVERAGE_PRECISION + .5);
     uint coverageBeforeMax = STORAGE_BUFFER_ATOMIC_MAX(
         coverageBuffer,
         coverageIndex,
@@ -175,7 +183,9 @@
         // This pixel has been touched previously by a fragment in the stroke.
         // Multiply in an incremental coverage value that mixes with what's
         // already in the framebuffer.
-        half c1 = cast_uint_to_half(coverageBeforeMax & 0xffu) * (1. / 255.);
+        half c1 =
+            cast_uint_to_half(coverageBeforeMax & CLOCKWISE_COVERAGE_MASK) *
+            CLOCKWISE_COVERAGE_INVERSE_PRECISION;
         half c2 = max(c1, fragCoverage);
         X = (c2 - c1) / (1. - c1 * paintAlpha);
     }
@@ -201,7 +211,8 @@
     }
 
     half X = .0; // Amount by which to multiply paintAlpha.
-    uint fragCoverageRemainingFixed = uint(abs(fragCoverageRemaining) * 255.);
+    uint fragCoverageRemainingFixed =
+        uint(abs(fragCoverageRemaining) * CLOCKWISE_COVERAGE_PRECISION + .5);
     if (coverageInitialValue < uniforms.coverageBufferPrefix)
     {
         // The initial coverage value does not belong to this path. We *might*
@@ -234,7 +245,8 @@
             // coverageBeforeMax >= 0.
             uint c1Fixed = (coverageBeforeMax & CLOCKWISE_COVERAGE_MASK) -
                            CLOCKWISE_FILL_ZERO_VALUE;
-            half c1 = cast_uint_to_half(c1Fixed) * (1. / 255.);
+            half c1 = cast_uint_to_half(c1Fixed) *
+                      CLOCKWISE_COVERAGE_INVERSE_PRECISION;
             half c2 = fragCoverageRemaining;
 #ifdef @DRAW_INTERIOR_TRIANGLES
             c2 = min(c2, 1.);
@@ -263,7 +275,7 @@
         half c1 =
             cast_int_to_half(int((coverageBeforeAdd & CLOCKWISE_COVERAGE_MASK) -
                                  CLOCKWISE_FILL_ZERO_VALUE)) *
-            (1. / 255.);
+            CLOCKWISE_COVERAGE_INVERSE_PRECISION;
         half c2 = c1 + fragCoverageRemaining;
         c1 = clamp(c1, .0, 1.);
         c2 = clamp(c2, .0, 1.);
@@ -350,7 +362,20 @@
 #ifdef @DRAW_INTERIOR_TRIANGLES
         half borrowedCoverage = -v_windingWeight;
 #else
-        half borrowedCoverage = max(-v_edgeDistance.x, .0);
+        half fragCoverage;
+#ifdef @ENABLE_FEATHER
+        if (@ENABLE_FEATHER && is_feathered_fill(v_edgeDistance))
+        {
+            fragCoverage = feathered_fill_coverage(
+                v_edgeDistance,
+                SAMPLED_R16F(@featherTexture, featherSampler));
+        }
+        else
+#endif
+        {
+            fragCoverage = v_edgeDistance.x;
+        }
+        half borrowedCoverage = max(-fragCoverage, .0);
 #endif
         apply_borrowed_coverage(borrowedCoverage, coverageIndex);
         discard;
@@ -358,10 +383,22 @@
 #endif // BORROWED_COVERAGE_PREPASS
 
 #ifndef @DRAW_INTERIOR_TRIANGLES
-    if (v_edgeDistance.y >= .0) // Is this a stroke?
+    if (is_stroke(v_edgeDistance))
     {
-        half fragCoverage =
-            clamp(min(v_edgeDistance.x, v_edgeDistance.y), .0, 1.);
+        half fragCoverage;
+#ifdef @ENABLE_FEATHER
+        if (@ENABLE_FEATHER && is_feathered_stroke(v_edgeDistance))
+        {
+            fragCoverage = feathered_stroke_coverage(
+                v_edgeDistance,
+                SAMPLED_R16F(@featherTexture, featherSampler));
+        }
+        else
+#endif
+        {
+            fragCoverage = min(v_edgeDistance.x, v_edgeDistance.y);
+        }
+        fragCoverage = clamp(fragCoverage, .0, 1.);
         apply_stroke_coverage(paintColor.a, fragCoverage, coverageIndex);
     }
     else // It's a fill.
@@ -370,7 +407,20 @@
 #ifdef @DRAW_INTERIOR_TRIANGLES
         half fragCoverage = v_windingWeight;
 #else
-        half fragCoverage = clamp(v_edgeDistance.x, .0, 1.);
+        half fragCoverage;
+#ifdef @ENABLE_FEATHER
+        if (@ENABLE_FEATHER && is_feathered_fill(v_edgeDistance))
+        {
+            fragCoverage = feathered_fill_coverage(
+                v_edgeDistance,
+                SAMPLED_R16F(@featherTexture, featherSampler));
+        }
+        else
+#endif
+        {
+            fragCoverage = v_edgeDistance.x;
+        }
+        fragCoverage = clamp(fragCoverage, .0, 1.);
 #endif
         apply_fill_coverage(paintColor.a, fragCoverage, coverageIndex);
     }
diff --git a/renderer/src/shaders/draw_path.glsl b/renderer/src/shaders/draw_path.glsl
index 673cbbe..e619ee7 100644
--- a/renderer/src/shaders/draw_path.glsl
+++ b/renderer/src/shaders/draw_path.glsl
@@ -271,6 +271,9 @@
 #ifdef @FRAGMENT
 FRAG_TEXTURE_BLOCK_BEGIN
 TEXTURE_RGBA8(PER_FLUSH_BINDINGS_SET, GRAD_TEXTURE_IDX, @gradTexture);
+#if defined(@ENABLE_FEATHER)
+TEXTURE_R16F(PER_FLUSH_BINDINGS_SET, FEATHER_TEXTURE_IDX, @featherTexture);
+#endif
 TEXTURE_RGBA8(PER_DRAW_BINDINGS_SET, IMAGE_TEXTURE_IDX, @imageTexture);
 #if defined(@RENDER_MODE_MSAA) && defined(@ENABLE_ADVANCED_BLEND)
 TEXTURE_RGBA8(PER_FLUSH_BINDINGS_SET, DST_COLOR_TEXTURE_IDX, @dstColorTexture);
@@ -278,6 +281,9 @@
 FRAG_TEXTURE_BLOCK_END
 
 SAMPLER_LINEAR(GRAD_TEXTURE_IDX, gradSampler)
+#if defined(@ENABLE_FEATHER)
+SAMPLER_LINEAR(FEATHER_TEXTURE_IDX, featherSampler)
+#endif
 SAMPLER_MIPMAP(IMAGE_TEXTURE_IDX, imageSampler)
 
 FRAG_STORAGE_BUFFER_BLOCK_BEGIN
@@ -361,12 +367,41 @@
     coverageCount += v_windingWeight;
     PLS_PRESERVE_UI(coverageCountBuffer);
 #else
-    if (v_edgeDistance.y >= .0) // Stroke.
-        coverageCount =
-            max(min(v_edgeDistance.x, v_edgeDistance.y), coverageCount);
-    else // Fill. (Back-face culling ensures v_edgeDistance.x is appropriately
-         // signed.)
-        coverageCount += v_edgeDistance.x;
+    if (is_stroke(v_edgeDistance))
+    {
+        half fragCoverage;
+#if defined(@ENABLE_FEATHER)
+        if (@ENABLE_FEATHER && is_feathered_stroke(v_edgeDistance))
+        {
+            fragCoverage = feathered_stroke_coverage(
+                v_edgeDistance,
+                SAMPLED_R16F(@featherTexture, featherSampler));
+        }
+        else
+#endif
+        {
+            fragCoverage = min(v_edgeDistance.x, v_edgeDistance.y);
+        }
+        coverageCount = max(fragCoverage, coverageCount);
+    }
+    else // Fill. (Back-face culling handles the sign of v_edgeDistance.x.)
+    {
+        half fragCoverage;
+#if defined(@CLOCKWISE_FILL) && defined(@ENABLE_FEATHER)
+        if (@CLOCKWISE_FILL && @ENABLE_FEATHER &&
+            is_feathered_fill(v_edgeDistance))
+        {
+            fragCoverage = feathered_fill_coverage(
+                v_edgeDistance,
+                SAMPLED_R16F(@featherTexture, featherSampler));
+        }
+        else
+#endif
+        {
+            fragCoverage = v_edgeDistance.x;
+        }
+        coverageCount += fragCoverage;
+    }
 
     // Save the updated coverage.
     PLS_STOREUI(coverageCountBuffer,
diff --git a/renderer/src/shaders/draw_path_common.glsl b/renderer/src/shaders/draw_path_common.glsl
index 2943bf1..43c8e69 100644
--- a/renderer/src/shaders/draw_path_common.glsl
+++ b/renderer/src/shaders/draw_path_common.glsl
@@ -4,6 +4,14 @@
 
 // Common functions shared by draw shaders.
 
+// Feathered coverage values get shifted by "FEATHER_COVERAGE_BIAS" in order
+// to classify the coverage as belonging to a feather.
+#define FEATHER_COVERAGE_BIAS -2.
+
+// Fragment shaders test if a coverage value is less than
+// "FEATHER_COVERAGE_THRESHOLD" to test if the coverage belongs to a feather.
+#define FEATHER_COVERAGE_THRESHOLD -1.5
+
 #ifdef @VERTEX
 
 VERTEX_TEXTURE_BLOCK_BEGIN
@@ -76,8 +84,12 @@
     float2 translate = uintBitsToFloat(pathData.xy);
 
     float strokeRadius = uintBitsToFloat(pathData.z);
+    float featherRadius = uintBitsToFloat(pathData.w);
+
 #ifdef @RENDER_MODE_MSAA
-    outPathZIndex = cast_uint_to_ushort(pathData.w);
+    // Unpack the rest of the path data.
+    uint4 pathData2 = STORAGE_BUFFER_LOAD4(@pathBuffer, outPathID * 4u + 2u);
+    outPathZIndex = cast_uint_to_ushort(pathData2.r);
 #endif
 
     // Fix the tessellation vertex if we fetched the wrong one in order to
@@ -133,6 +145,16 @@
     float2 origin = uintBitsToFloat(tessVertexData.xy);
     float2 postTransformVertexOffset;
 
+    if (featherRadius != .0)
+    {
+        // Never use a feather harder than 1.5 standard deviations across a
+        // radius of 1/2px. This is the point where feathering just looks like
+        // antialiasing, and any harder looks aliased.
+        featherRadius =
+            max(featherRadius,
+                (FEATHER_TEXTURE_STDDEVS / 3.) / length(MUL(M, norm)));
+    }
+
     if (strokeRadius != .0) // Is this a stroke?
     {
         // Ensure strokes always emit clockwise triangles.
@@ -144,9 +166,11 @@
         if ((contourIDWithFlags & RIGHT_JOIN_CONTOUR_FLAG) != 0u)
             outset = max(outset, .0);
 
-        float aaRadius = manhattan_pixel_width(M, norm) * AA_RADIUS;
+        float aaRadius = featherRadius != .0
+                             ? featherRadius
+                             : manhattan_pixel_width(M, norm) * AA_RADIUS;
         half globalCoverage = 1.;
-        if (aaRadius > strokeRadius)
+        if (aaRadius > strokeRadius && featherRadius == .0)
         {
             // The stroke is narrower than the AA ramp. Instead of emitting
             // subpixel geometry, make the stroke as wide as the AA ramp and
@@ -169,7 +193,7 @@
 #endif
 
         uint joinType = contourIDWithFlags & JOIN_TYPE_MASK;
-        if (joinType != 0u)
+        if (joinType > ROUND_JOIN_CONTOUR_FLAG)
         {
             // This vertex belongs to a miter or bevel join. Begin by finding
             // the bisector, which is the same as the miter line. The first two
@@ -281,6 +305,13 @@
         // outEdgeDistance.y is >= 0 at every pixel. "outEdgeDistance.y < 0" is
         // used to differentiate between strokes and fills.
         outEdgeDistance.y = max(outEdgeDistance.y, make_half(1e-4));
+
+        if (featherRadius != .0)
+        {
+            // Bias x to tell the fragment shader that this is a feathered
+            // stroke.
+            outEdgeDistance.x = FEATHER_COVERAGE_BIAS - outEdgeDistance.x;
+        }
 #endif
 
         postTransformVertexOffset = MUL(M, outset * vertexOffset);
@@ -291,14 +322,6 @@
     }
     else // This is a fill.
     {
-        // Place the fan point.
-        if (vertexType == FAN_MIDPOINT_VERTEX)
-            origin = midpoint;
-
-        // Offset the vertex for Manhattan AA.
-        postTransformVertexOffset =
-            sign(MUL(outset * norm, inverse(M))) * AA_RADIUS;
-
         if (bool(contourIDWithFlags & MIRRORED_CONTOUR_CONTOUR_FLAG) !=
             bool(contourIDWithFlags & NEGATE_PATH_FILL_COVERAGE_FLAG))
         {
@@ -309,13 +332,81 @@
         // "outEdgeDistance.y < 0" indicates to the fragment shader that this is
         // a fill.
         outEdgeDistance = make_half2(fillCoverage, -1.);
-#endif
+
+        if (featherRadius != .0)
+        {
+            if (vertexType == STROKE_VERTEX)
+            {
+                // Bias y to tells the fragment shader that this is a feathered
+                // fill.
+                outEdgeDistance.y = FEATHER_COVERAGE_BIAS;
+            }
+            if ((contourIDWithFlags & JOIN_TYPE_MASK) ==
+                FEATHER_JOIN_CONTOUR_FLAG)
+            {
+                // Feather joins need some dimming data, but there wasn't any
+                // room left in the tessellation texture. Luckily, since feather
+                // joins are all colocated on the same point, we were able to
+                // slip in a sneaky backset to a different texel that has the
+                // location, which freed up 32 bits for our dimming data.
+                int backset = int(tessVertexData.x);
+                if ((contourIDWithFlags & MIRRORED_CONTOUR_CONTOUR_FLAG) != 0u)
+                    backset = -backset;
+                // Replace origin with the real one.
+                origin = uintBitsToFloat(
+                    TEXEL_FETCH(@tessVertexTexture,
+                                tess_texel_coord(tessVertexIdx + backset))
+                        .xy);
+                if (vertexType == STROKE_VERTEX)
+                {
+                    // The dimming factor was placed where we typically would
+                    // have found the origin.
+                    half2 featherCorrection = unpackHalf2x16(tessVertexData.y);
+                    // The dimming factor is "y^(x+1)" on the outer edge of the
+                    // feather (y <= 1, x >= 0) and just "y^0" at the center.
+                    //
+                    // TODO: can we accomplish this by just making the local
+                    // feather thinner instead?
+                    half y = featherCorrection.y;
+                    half x = fillCoverage == .0 ? featherCorrection.x : .0;
+                    // If we change the base to 2, x becomes negative and this
+                    // exponent can be expressed with one single scalar value:
+                    //
+                    //   y^(x+1) == 2^((x+1) * log2(y))
+                    //
+                    x = min((x + 1.) * log2(max(y, make_half(1e-5))), .0);
+                    // Bias y to tells the fragment shader that this is a
+                    // feather.
+                    outEdgeDistance.y = x + FEATHER_COVERAGE_BIAS;
+                }
+                if ((contourIDWithFlags & ZERO_FEATHER_OUTSET_CONTOUR_FLAG) !=
+                    0u)
+                {
+                    outset = .0;
+                }
+            }
+            // Offset the vertex for feathering.
+            postTransformVertexOffset = MUL(M, (outset * featherRadius) * norm);
+        }
+        else
+        {
+            // Offset the vertex for Manhattan AA.
+            postTransformVertexOffset =
+                sign(MUL(outset * norm, inverse(M))) * AA_RADIUS;
+        }
+#endif // !RENDER_MODE_MSAA
+
+        // Place the fan point.
+        if (vertexType == FAN_MIDPOINT_VERTEX)
+            origin = midpoint;
 
         // If we're actually just drawing a triangle, throw away the entire
         // patch except a single fan triangle.
         if ((contourIDWithFlags & RETROFITTED_TRIANGLE_CONTOUR_FLAG) != 0u &&
             vertexType != FAN_VERTEX)
+        {
             return false;
+        }
     }
 
     outVertexPosition = MUL(M, origin) + postTransformVertexOffset + translate;
@@ -340,3 +431,63 @@
 #endif // @DRAW_INTERIOR_TRIANGLES
 
 #endif // @VERTEX
+
+#ifdef @FRAGMENT
+INLINE bool is_stroke(half2 edgeDistance) { return edgeDistance.y >= .0; }
+
+INLINE bool is_feathered_stroke(half2 edgeDistance)
+{
+    return edgeDistance.x < FEATHER_COVERAGE_THRESHOLD;
+}
+
+INLINE bool is_feathered_fill(half2 edgeDistance)
+{
+    return edgeDistance.y < FEATHER_COVERAGE_THRESHOLD;
+}
+
+INLINE half feathered_stroke_coverage(half2 edgeDistance,
+                                      SAMPLED_R16F_REF(featherTextureRef,
+                                                       featherSamplerRef))
+{
+    // Feathered stroke is:
+    // 1 - feather(1 - leftCoverage) - feather(1 - rightCoverage)
+    half coverage = 1.;
+
+    // The portion OUTSIDE the coverage is "1 - coverage".
+    // (edgeDistance.x is biased in order to classify this coverage as a
+    // feather, so also remove the bias.)
+    half leftOutsideCoverage = (1. - FEATHER_COVERAGE_BIAS) + edgeDistance.x;
+    coverage -= TEXTURE_REF_SAMPLE_LOD(featherTextureRef,
+                                       featherSamplerRef,
+                                       float2(leftOutsideCoverage, .5),
+                                       .0)
+                    .r;
+
+    half rightOutsideCoverage = 1. - edgeDistance.y;
+    coverage -= TEXTURE_REF_SAMPLE_LOD(featherTextureRef,
+                                       featherSamplerRef,
+                                       float2(rightOutsideCoverage, .5),
+                                       .0)
+                    .r;
+
+    return coverage;
+}
+
+INLINE half feathered_fill_coverage(half2 edgeDistance,
+                                    SAMPLED_R16F_REF(featherTexture,
+                                                     featherSampler))
+{
+    half fragCoverage = TEXTURE_REF_SAMPLE_LOD(featherTexture,
+                                               featherSampler,
+                                               float2(abs(edgeDistance.x), .5),
+                                               .0)
+                            .r *
+                        sign(edgeDistance.x);
+    // Apply nonlinear falloff to corners.
+    // (edgeDistance.y is biased in order to classify this coverage as a
+    // feather, so also remove the bias.)
+    half x = min(edgeDistance.y - FEATHER_COVERAGE_BIAS, .0);
+    fragCoverage *= exp2(x);
+    return fragCoverage;
+}
+#endif // @FRAGMENT
diff --git a/renderer/src/shaders/glsl.glsl b/renderer/src/shaders/glsl.glsl
index f5272df..b3b4bf4 100644
--- a/renderer/src/shaders/glsl.glsl
+++ b/renderer/src/shaders/glsl.glsl
@@ -136,6 +136,8 @@
     layout(set = SET, binding = IDX) uniform highp texture2D NAME
 #define TEXTURE_RGBA8(SET, IDX, NAME)                                          \
     layout(set = SET, binding = IDX) uniform mediump texture2D NAME
+#define TEXTURE_R16F(SET, IDX, NAME)                                           \
+    layout(binding = IDX) uniform mediump texture2D NAME
 #elif @GLSL_VERSION >= 310
 #define TEXTURE_RGBA32UI(SET, IDX, NAME)                                       \
     layout(binding = IDX) uniform highp usampler2D NAME
@@ -143,11 +145,15 @@
     layout(binding = IDX) uniform highp sampler2D NAME
 #define TEXTURE_RGBA8(SET, IDX, NAME)                                          \
     layout(binding = IDX) uniform mediump sampler2D NAME
+#define TEXTURE_R16F(SET, IDX, NAME)                                           \
+    layout(binding = IDX) uniform mediump sampler2D NAME
 #else
 #define TEXTURE_RGBA32UI(SET, IDX, NAME) uniform highp usampler2D NAME
 #define TEXTURE_RGBA32F(SET, IDX, NAME) uniform highp sampler2D NAME
 #define TEXTURE_RGBA8(SET, IDX, NAME) uniform mediump sampler2D NAME
+#define TEXTURE_R16F(SET, IDX, NAME) uniform mediump sampler2D NAME
 #endif
+
 #define TEXTURE_RG32UI(SET, IDX, NAME) TEXTURE_RGBA32UI(SET, IDX, NAME)
 
 #ifdef @TARGET_VULKAN
@@ -163,6 +169,9 @@
     textureLod(sampler2D(NAME, SAMPLER_NAME), COORD, LOD)
 #define TEXTURE_SAMPLE_GRAD(NAME, SAMPLER_NAME, COORD, DDX, DDY)               \
     textureGrad(sampler2D(NAME, SAMPLER_NAME), COORD, DDX, DDY)
+#define SAMPLED_R16F_REF(NAME, SAMPLER_NAME)                                   \
+    mediump texture2D NAME, mediump sampler SAMPLER_NAME
+#define SAMPLED_R16F(NAME, SAMPLER_NAME) NAME, SAMPLER_NAME
 #else
 // SAMPLER_LINEAR and SAMPLER_MIPMAP are no-ops because in GL, sampling
 // parameters are API-level state tied to the texture.
@@ -173,8 +182,11 @@
     textureLod(NAME, COORD, LOD)
 #define TEXTURE_SAMPLE_GRAD(NAME, SAMPLER_NAME, COORD, DDX, DDY)               \
     textureGrad(NAME, COORD, DDX, DDY)
-#endif
+#define SAMPLED_R16F_REF(NAME, SAMPLER_NAME) mediump sampler2D NAME
+#define SAMPLED_R16F(NAME, SAMPLER_NAME) NAME
+#endif // !@TARGET_VULKAN
 
+#define TEXTURE_REF_SAMPLE_LOD TEXTURE_SAMPLE_LOD
 #define TEXEL_FETCH(NAME, COORD) texelFetch(NAME, COORD, 0)
 
 #define VERTEX_STORAGE_BUFFER_BLOCK_BEGIN
diff --git a/renderer/src/shaders/hlsl.glsl b/renderer/src/shaders/hlsl.glsl
index 382791a..3ed6f18 100644
--- a/renderer/src/shaders/hlsl.glsl
+++ b/renderer/src/shaders/hlsl.glsl
@@ -125,6 +125,11 @@
     uniform $Texture2D<float4> NAME : $register($t##IDX)
 #define TEXTURE_RGBA8(SET, IDX, NAME)                                          \
     uniform $Texture2D<$unorm float4> NAME : $register($t##IDX)
+#define TEXTURE_R16F(SET, IDX, NAME)                                           \
+    uniform $Texture2D<half> NAME : $register($t##IDX)
+#define SAMPLED_R16F_REF(NAME, SAMPLER_NAME)                                   \
+    $Texture2D<half> NAME, $SamplerState SAMPLER_NAME
+#define SAMPLED_R16F(NAME, SAMPLER_NAME) NAME, SAMPLER_NAME
 
 // SAMPLER_LINEAR and SAMPLER_MIPMAP are the same because in d3d11, sampler
 // parameters are defined at the API level.
@@ -138,6 +143,7 @@
     NAME.$Sample(SAMPLER_NAME, COORD)
 #define TEXTURE_SAMPLE_LOD(NAME, SAMPLER_NAME, COORD, LOD)                     \
     NAME.$SampleLevel(SAMPLER_NAME, COORD, LOD)
+#define TEXTURE_REF_SAMPLE_LOD TEXTURE_SAMPLE_LOD
 #define TEXTURE_SAMPLE_GRAD(NAME, SAMPLER_NAME, COORD, DDX, DDY)               \
     NAME.$SampleGrad(SAMPLER_NAME, COORD, DDX, DDY)
 
diff --git a/renderer/src/shaders/metal.glsl b/renderer/src/shaders/metal.glsl
index fdf3611..c7c1bcf 100644
--- a/renderer/src/shaders/metal.glsl
+++ b/renderer/src/shaders/metal.glsl
@@ -133,6 +133,10 @@
 #define TEXTURE_RGBA32UI(SET, IDX, NAME) [[$texture(IDX)]] $texture2d<uint> NAME
 #define TEXTURE_RGBA32F(SET, IDX, NAME) [[$texture(IDX)]] $texture2d<float> NAME
 #define TEXTURE_RGBA8(SET, IDX, NAME) [[$texture(IDX)]] $texture2d<half> NAME
+#define TEXTURE_R16F(SET, IDX, NAME) [[$texture(IDX)]] $texture2d<half> NAME
+#define SAMPLED_R16F_REF(NAME, SAMPLER_NAME)                                   \
+    $thread $const $texture2d<half>&NAME, $const $thread $sampler &SAMPLER_NAME
+#define SAMPLED_R16F(NAME, SAMPLER_NAME) _textures.NAME, SAMPLER_NAME
 
 #define SAMPLER_LINEAR(TEXTURE_IDX, NAME)                                      \
     $constexpr $sampler NAME($filter::$linear, $mip_filter::$none);
@@ -142,8 +146,10 @@
 #define TEXEL_FETCH(TEXTURE, COORD) _textures.TEXTURE.$read(uint2(COORD))
 #define TEXTURE_SAMPLE(TEXTURE, SAMPLER_NAME, COORD)                           \
     _textures.TEXTURE.$sample(SAMPLER_NAME, COORD)
+#define TEXTURE_REF_SAMPLE_LOD(TEXTURE_REF, SAMPLER_NAME, COORD, LOD)          \
+    TEXTURE_REF.$sample(SAMPLER_NAME, COORD, $level(LOD))
 #define TEXTURE_SAMPLE_LOD(TEXTURE, SAMPLER_NAME, COORD, LOD)                  \
-    _textures.TEXTURE.$sample(SAMPLER_NAME, COORD, $level(LOD))
+    TEXTURE_REF_SAMPLE_LOD(_textures.TEXTURE, SAMPLER_NAME, COORD, LOD)
 #define TEXTURE_SAMPLE_GRAD(TEXTURE, SAMPLER_NAME, COORD, DDX, DDY)            \
     _textures.TEXTURE.$sample(SAMPLER_NAME, COORD, $gradient2d(DDX, DDY))
 
diff --git a/renderer/src/shaders/metal/generate_draw_combinations.py b/renderer/src/shaders/metal/generate_draw_combinations.py
index 6a55b47..ed62e96 100644
--- a/renderer/src/shaders/metal/generate_draw_combinations.py
+++ b/renderer/src/shaders/metal/generate_draw_combinations.py
@@ -10,21 +10,23 @@
         self.name = name
         self.index = index
 
-# Each feature has a specific index. These must stay in sync with pls_render_context_metal_impl.mm.
-DRAW_INTERIOR_TRIANGLES = Feature('DRAW_INTERIOR_TRIANGLES', 0)
-ENABLE_CLIPPING = Feature('ENABLE_CLIPPING', 1)
-ENABLE_CLIP_RECT =  Feature('ENABLE_CLIP_RECT', 2)
-ENABLE_ADVANCED_BLEND = Feature('ENABLE_ADVANCED_BLEND', 3)
+# Each feature has a specific index. These must stay in sync with render_context_metal_impl.mm.
+ENABLE_CLIPPING = Feature('ENABLE_CLIPPING', 0)
+ENABLE_CLIP_RECT =  Feature('ENABLE_CLIP_RECT', 1)
+ENABLE_ADVANCED_BLEND = Feature('ENABLE_ADVANCED_BLEND', 2)
+ENABLE_FEATHER = Feature('ENABLE_FEATHER', 3)
 ENABLE_EVEN_ODD = Feature('ENABLE_EVEN_ODD', 4)
 ENABLE_NESTED_CLIPPING = Feature('ENABLE_NESTED_CLIPPING', 5)
 ENABLE_HSL_BLEND_MODES = Feature('ENABLE_HSL_BLEND_MODES', 6)
+DRAW_INTERIOR_TRIANGLES = Feature('DRAW_INTERIOR_TRIANGLES', 7)
 
 whole_program_features = {DRAW_INTERIOR_TRIANGLES,
                           ENABLE_CLIPPING,
                           ENABLE_CLIP_RECT,
                           ENABLE_ADVANCED_BLEND}
 
-fragment_only_features = {ENABLE_EVEN_ODD,
+fragment_only_features = {ENABLE_FEATHER,
+                          ENABLE_EVEN_ODD,
                           ENABLE_NESTED_CLIPPING,
                           ENABLE_HSL_BLEND_MODES}
 
@@ -47,6 +49,7 @@
     return True
 
 non_image_mesh_features = {DRAW_INTERIOR_TRIANGLES,
+                           ENABLE_FEATHER,
                            ENABLE_EVEN_ODD,
                            ENABLE_NESTED_CLIPPING}
 
@@ -67,7 +70,7 @@
         out.write('#define FRAGMENT\n')
     if draw_type == DrawType.IMAGE_MESH:
         assert(is_image_mesh_feature_set(feature_set))
-    namespace_id = ['0', '0', '0', '0', '0', '0', '0']
+    namespace_id = ['0', '0', '0', '0', '0', '0', '0', '0']
     for feature in feature_set:
         namespace_id[feature.index] = '1'
     for feature in feature_set:
diff --git a/renderer/src/shaders/rhi.glsl b/renderer/src/shaders/rhi.glsl
index f36e961..7516a99 100644
--- a/renderer/src/shaders/rhi.glsl
+++ b/renderer/src/shaders/rhi.glsl
@@ -107,6 +107,11 @@
 #define TEXTURE_RGBA32UI(SET, IDX, NAME) uniform $Texture2D<uint4> NAME
 #define TEXTURE_RGBA32F(SET, IDX, NAME) uniform $Texture2D<float4> NAME
 #define TEXTURE_RGBA8(SET, IDX, NAME) uniform $Texture2D<$unorm float4> NAME
+#define TEXTURE_R16F(SET, IDX, NAME)                                           \
+    uniform $Texture2D<half> NAME : $register($t##IDX)
+#define SAMPLED_R16F_REF(NAME, SAMPLER_NAME)                                   \
+    $Texture2D<half> NAME, $SamplerState SAMPLER_NAME
+#define SAMPLED_R16F(NAME, SAMPLER_NAME) NAME, SAMPLER_NAME
 
 // SAMPLER_LINEAR and SAMPLER_MIPMAP are the same because in d3d11, sampler
 // parameters are defined at the API level.
@@ -119,6 +124,7 @@
     NAME.$Sample(SAMPLER_NAME, COORD)
 #define TEXTURE_SAMPLE_LOD(NAME, SAMPLER_NAME, COORD, LOD)                     \
     NAME.$SampleLevel(SAMPLER_NAME, COORD, LOD)
+#define TEXTURE_REF_SAMPLE_LOD TEXTURE_SAMPLE_LOD
 #define TEXTURE_SAMPLE_GRAD(NAME, SAMPLER_NAME, COORD, DDX, DDY)               \
     NAME.$SampleGrad(SAMPLER_NAME, COORD, DDX, DDY)
 
diff --git a/renderer/src/shaders/specialization.glsl b/renderer/src/shaders/specialization.glsl
index fce15c8..a7a4735 100644
--- a/renderer/src/shaders/specialization.glsl
+++ b/renderer/src/shaders/specialization.glsl
@@ -4,6 +4,8 @@
     bool kEnableClipRect = false;
 layout(constant_id = ADVANCED_BLEND_SPECIALIZATION_IDX) const
     bool kEnableAdvancedBlend = false;
+layout(constant_id = FEATHER_SPECIALIZATION_IDX) const
+    bool kEnableFeather = false;
 layout(constant_id = EVEN_ODD_SPECIALIZATION_IDX) const
     bool kEnableEvenOdd = false;
 layout(constant_id = NESTED_CLIPPING_SPECIALIZATION_IDX) const
@@ -18,6 +20,7 @@
 #define @ENABLE_CLIPPING kEnableClipping
 #define @ENABLE_CLIP_RECT kEnableClipRect
 #define @ENABLE_ADVANCED_BLEND kEnableAdvancedBlend
+#define @ENABLE_FEATHER kEnableFeather
 #define @ENABLE_EVEN_ODD kEnableEvenOdd
 #define @ENABLE_NESTED_CLIPPING kEnableNestedClipping
 #define @ENABLE_HSL_BLEND_MODES kEnableHSLBlendModes
diff --git a/renderer/src/shaders/tessellate.glsl b/renderer/src/shaders/tessellate.glsl
index 555572e..bcf3498 100644
--- a/renderer/src/shaders/tessellate.glsl
+++ b/renderer/src/shaders/tessellate.glsl
@@ -65,7 +65,7 @@
 STORAGE_BUFFER_U32x4(CONTOUR_BUFFER_IDX, ContourBuffer, @contourBuffer);
 VERTEX_STORAGE_BUFFER_BLOCK_END
 
-float cosine_between_vectors(float2 a, float2 b)
+INLINE float cosine_between_vectors(float2 a, float2 b)
 {
     // FIXME(crbug.com/800804,skbug.com/11268): This can overflow if we don't
     // normalize exponents.
@@ -200,7 +200,7 @@
         float joinSpan = float(joinSegmentCount);
         if ((contourIDWithFlags &
              (JOIN_TYPE_MASK | EMULATED_STROKE_CAP_CONTOUR_FLAG)) ==
-            EMULATED_STROKE_CAP_CONTOUR_FLAG)
+            (ROUND_JOIN_CONTOUR_FLAG | EMULATED_STROKE_CAP_CONTOUR_FLAG))
         {
             // Round caps emulated as joins need to emit vertices at T=0 and
             // T=1, unlike normal round joins. The fragment shader will handle
@@ -265,6 +265,8 @@
     // Begin with the assumption that we belong to the curve section.
     float mergedSegmentCount = totalVertexCount - joinSegmentCount;
     float mergedVertexID = vertexIdx;
+    half2 featherCorrection = make_half2(.0, .0);
+    int featherVertexIndex0;
     if (mergedVertexID <= mergedSegmentCount)
     {
         // We do belong to the curve section. Clear out any stroke join flags.
@@ -279,7 +281,8 @@
         parametricSegmentCount = 1.;
         mergedVertexID -= mergedSegmentCount;
         mergedSegmentCount = joinSegmentCount;
-        if ((contourIDWithFlags & JOIN_TYPE_MASK) != 0u)
+        radsPerPolarSegment = v_joinArgs.z; // radsPerJoinSegment.
+        if ((contourIDWithFlags & JOIN_TYPE_MASK) > ROUND_JOIN_CONTOUR_FLAG)
         {
             // Miter or bevel join vertices snap to either tangents[0] or
             // tangents[1], and get adjusted in the shader that follows.
@@ -298,9 +301,71 @@
             // accounted for this in radsPerJoinSegment), but adjust our
             // stepping parameters so we begin at T=0 and end at T=1.
             mergedSegmentCount -= 2.;
-            mergedVertexID--;
+            --mergedVertexID;
         }
-        radsPerPolarSegment = v_joinArgs.z; // radsPerJoinSegment.
+        else if ((contourIDWithFlags & JOIN_TYPE_MASK) ==
+                 FEATHER_JOIN_CONTOUR_FLAG)
+        {
+            featherVertexIndex0 = -int(mergedVertexID);
+            --mergedVertexID; // Duplicate the vertex at T=1 on the join also.
+
+            // radsPerPolarSegment was calculated under the assumption that all
+            // joinSegmentCount vertices were meant for the angle between tan0
+            // and tan1. However, feather joins always have a rotation of PI.
+            float rads = radsPerPolarSegment * joinSegmentCount;
+            float joinVertexCount = joinSegmentCount - 1.;
+            float joinNonEmptySegmentCount = joinVertexCount - 1. - 3.;
+            // Feather joins draw backwards segments across the angle outside
+            // the join, in order to erase some of the coverage that got
+            // written.
+            float backwardSegmentCount =
+                clamp(round(abs(rads) / PI * joinNonEmptySegmentCount),
+                      1.,
+                      joinNonEmptySegmentCount - 1.);
+            // Forward segments are in the normal join angle.
+            float forwardSegmentCount =
+                joinNonEmptySegmentCount - backwardSegmentCount;
+            featherCorrection.y = cast_float_to_half(abs(rads) / PI);
+            if (mergedVertexID <= forwardSegmentCount)
+            {
+                // We're a backwards segment of the feather join.
+                tangents[1] = -tangents[1];
+                radsPerPolarSegment =
+                    -(PI * sign(rads) - rads) / forwardSegmentCount;
+                mergedSegmentCount = forwardSegmentCount;
+                featherCorrection.x = 1.;
+            }
+            else if (mergedVertexID == forwardSegmentCount + 1.)
+            {
+                // There's a discontinuous jump between the backwards and
+                // forward segments. Duplicate the final backwards vertex with
+                // zero height.
+                tangents[0] = tangents[1] = -tangents[1];
+                mergedVertexID = .0;
+                mergedSegmentCount = 1.;
+                contourIDWithFlags |= ZERO_FEATHER_OUTSET_CONTOUR_FLAG;
+            }
+            else if (mergedVertexID == forwardSegmentCount + 2.)
+            {
+                // There's a discontinuous jump between the backwards and
+                // forward segments. Duplicate the first forward vertex with
+                // zero height.
+                tangents[1] = tangents[0];
+                mergedVertexID = .0;
+                mergedSegmentCount = 1.;
+                contourIDWithFlags |= ZERO_FEATHER_OUTSET_CONTOUR_FLAG;
+            }
+            else
+            {
+                // We're a forward segment of the feather join.
+                mergedVertexID -= forwardSegmentCount + 3.;
+                mergedSegmentCount = backwardSegmentCount;
+                radsPerPolarSegment = rads / backwardSegmentCount;
+                featherCorrection.y =
+                    cast_float_to_half(1. - featherCorrection.y);
+                featherCorrection.x = 1.;
+            }
+        }
         contourIDWithFlags |= radsPerPolarSegment < .0
                                   ? LEFT_JOIN_CONTOUR_FLAG
                                   : RIGHT_JOIN_CONTOUR_FLAG;
@@ -309,7 +374,7 @@
     float2 tessCoord;
     float theta = .0;
     if (mergedVertexID == .0 || mergedVertexID == mergedSegmentCount ||
-        (contourIDWithFlags & JOIN_TYPE_MASK) != 0u)
+        (contourIDWithFlags & JOIN_TYPE_MASK) > ROUND_JOIN_CONTOUR_FLAG)
     {
         // Tessellated vertices at the beginning and end of the strip use exact
         // endpoints and tangents. This ensures crack-free seaming between
@@ -317,6 +382,9 @@
         bool isTan0 = mergedVertexID < mergedSegmentCount * .5;
         tessCoord = isTan0 ? p0 : p3;
         theta = atan2(isTan0 ? tangents[0] : tangents[1]);
+        // Never do feather correction on the ends; the falloff is linear here
+        // plus we need to blend smoothly with the edges.
+        featherCorrection.x = .0;
     }
     else if ((contourIDWithFlags & RETROFITTED_TRIANGLE_CONTOUR_FLAG) != 0u)
     {
@@ -362,11 +430,11 @@
             // Now find the coefficients that give a tangent direction from a
             // parametric vertex ID:
             //
-            //                                                                |parametricVertexID^2|
-            //  Tangent_Direction(parametricVertexID) = dx,dy = |A  B_  C_| *
-            //  |parametricVertexID  |
-            //                                                  |.   .   .|   |1
-            //                                                  |
+            //  Tangent_Direction(parametricVertexID) = dx,dy =
+            //
+            //                     |parametricVertexID^2|
+            //      |A  B_  C_| *  |parametricVertexID  |
+            //      |.   .   .|    |1                   |
             //
             float2 B_ = B * (parametricSegmentCount * 2.);
             float2 C_ = C * (parametricSegmentCount * parametricSegmentCount);
@@ -462,6 +530,15 @@
             // polar vertices, our final T value for mergedVertexID is whichever
             // is larger.
             T = max(parametricT, polarT);
+
+            if (featherCorrection.x == 1.)
+            {
+                // This is a total hack to give feather joins nonlinear falloff
+                // on the bisector while keeping it linear on the ends.
+                // TODO: This needs a more methodical approach.
+                featherCorrection.x =
+                    sqrt(sin(lastPolarVertexID / mergedSegmentCount * PI));
+            }
         }
 
         // Evaluate the cubic at T. Use De Casteljau's for its accuracy and
@@ -480,7 +557,20 @@
             theta = atan2(bcd - abc);
     }
 
-    EMIT_FRAG_DATA(
-        uint4(floatBitsToUint(float3(tessCoord, theta)), contourIDWithFlags));
+    uint4 tessData =
+        uint4(floatBitsToUint(float3(tessCoord, theta)), contourIDWithFlags);
+    if ((contourIDWithFlags & JOIN_TYPE_MASK) == FEATHER_JOIN_CONTOUR_FLAG)
+    {
+        // Feathered corners need an extra dimming factor. Luckily, the corner
+        // spokes are all centered on the same point, so we can store a backset
+        // to that point instead of the point itself, and thus make room for the
+        // dimming factor.
+        tessData.x = uint(featherVertexIndex0);
+        // The coverage dimming factor is "y^(x+1)" on the outer edge of the
+        // feather (y <= 1, x >= 0), and just "y^0" at the center. We provide x
+        // and y, and the vertex shader works out the rest.
+        tessData.y = packHalf2x16(featherCorrection);
+    }
+    EMIT_FRAG_DATA(tessData);
 }
 #endif
diff --git a/renderer/src/vulkan/render_context_vulkan_impl.cpp b/renderer/src/vulkan/render_context_vulkan_impl.cpp
index 422de4d..c3947dc 100644
--- a/renderer/src/vulkan/render_context_vulkan_impl.cpp
+++ b/renderer/src/vulkan/render_context_vulkan_impl.cpp
@@ -114,6 +114,36 @@
     return make_rcp<RenderBufferVulkanImpl>(m_vk, type, flags, sizeInBytes);
 }
 
+enum class TextureFormat
+{
+    rgba8,
+    r16f,
+};
+
+constexpr static VkFormat vulkan_texture_format(TextureFormat format)
+{
+    switch (format)
+    {
+        case TextureFormat::rgba8:
+            return VK_FORMAT_R8G8B8A8_UNORM;
+        case TextureFormat::r16f:
+            return VK_FORMAT_R16_SFLOAT;
+    }
+    RIVE_UNREACHABLE();
+}
+
+constexpr static size_t vulkan_texture_bytes_per_pixel(TextureFormat format)
+{
+    switch (format)
+    {
+        case TextureFormat::rgba8:
+            return 4;
+        case TextureFormat::r16f:
+            return 2;
+    }
+    RIVE_UNREACHABLE();
+}
+
 class TextureVulkanImpl : public Texture
 {
 public:
@@ -121,11 +151,12 @@
                       uint32_t width,
                       uint32_t height,
                       uint32_t mipLevelCount,
-                      const uint8_t imageDataRGBA[]) :
+                      TextureFormat format,
+                      const void* imageData) :
         Texture(width, height),
         m_vk(std::move(vk)),
         m_texture(m_vk->makeTexture({
-            .format = VK_FORMAT_R8G8B8A8_UNORM,
+            .format = vulkan_texture_format(format),
             .extent = {width, height, 1},
             .mipLevels = mipLevelCount,
             .usage =
@@ -134,13 +165,13 @@
         m_textureView(m_vk->makeTextureView(m_texture)),
         m_imageUploadBuffer(m_vk->makeBuffer(
             {
-                .size = height * width * 4,
+                .size = height * width * vulkan_texture_bytes_per_pixel(format),
                 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
             },
             vkutil::Mappability::writeOnly))
     {
         memcpy(m_imageUploadBuffer->contents(),
-               imageDataRGBA,
+               imageData,
                m_imageUploadBuffer->info().size);
         m_imageUploadBuffer->flushContents();
     }
@@ -321,6 +352,7 @@
                                            width,
                                            height,
                                            mipLevelCount,
+                                           TextureFormat::rgba8,
                                            bitmap->bytes());
     }
 #endif
@@ -891,6 +923,12 @@
                 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
             },
             {
+                .binding = FEATHER_TEXTURE_IDX,
+                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+                .descriptorCount = 1,
+                .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+            },
+            {
                 .binding = PATH_BUFFER_IDX,
                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
                 .descriptorCount = 1,
@@ -988,6 +1026,12 @@
                 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
             },
             {
+                .binding = FEATHER_TEXTURE_IDX,
+                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER,
+                .descriptorCount = 1,
+                .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+            },
+            {
                 .binding = IMAGE_TEXTURE_IDX,
                 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER,
                 .descriptorCount = 1,
@@ -1097,7 +1141,7 @@
             },
             {
                 .type = VK_DESCRIPTOR_TYPE_SAMPLER,
-                .descriptorCount = 2, // m_linearSampler, m_mipmapSampler
+                .descriptorCount = 3, // grad, feather, image samplers
             },
         };
         VkDescriptorPoolCreateInfo staticDescriptorPoolCreateInfo = {
@@ -1154,9 +1198,11 @@
             },
             {
                 {.sampler = impl->m_linearSampler},
+                {.sampler = impl->m_linearSampler},
                 {.sampler = impl->m_mipmapSampler},
             });
-        static_assert(IMAGE_TEXTURE_IDX == GRAD_TEXTURE_IDX + 1);
+        static_assert(FEATHER_TEXTURE_IDX == GRAD_TEXTURE_IDX + 1);
+        static_assert(IMAGE_TEXTURE_IDX == FEATHER_TEXTURE_IDX + 1);
     }
 
     VkRenderPass renderPassAt(int renderPassVariantIdx)
@@ -1460,6 +1506,7 @@
             switch (drawType)
             {
                 case DrawType::midpointFanPatches:
+                case DrawType::midpointFanCenterAAPatches:
                 case DrawType::outerCurvePatches:
                     vkutil::set_shader_code(vsInfo, spirv::draw_path_vert);
                     vkutil::set_shader_code(fsInfo, spirv::draw_path_frag);
@@ -1497,6 +1544,7 @@
             switch (drawType)
             {
                 case DrawType::midpointFanPatches:
+                case DrawType::midpointFanCenterAAPatches:
                 case DrawType::outerCurvePatches:
                     vkutil::set_shader_code(vsInfo,
                                             spirv::atomic_draw_path_vert);
@@ -1559,6 +1607,7 @@
             switch (drawType)
             {
                 case DrawType::midpointFanPatches:
+                case DrawType::midpointFanCenterAAPatches:
                 case DrawType::outerCurvePatches:
                     vkutil::set_shader_code(vsInfo,
                                             spirv::draw_clockwise_path_vert);
@@ -1656,6 +1705,7 @@
             shaderFeatures & gpu::ShaderFeatures::ENABLE_CLIPPING,
             shaderFeatures & gpu::ShaderFeatures::ENABLE_CLIP_RECT,
             shaderFeatures & gpu::ShaderFeatures::ENABLE_ADVANCED_BLEND,
+            shaderFeatures & gpu::ShaderFeatures::ENABLE_FEATHER,
             shaderFeatures & gpu::ShaderFeatures::ENABLE_EVEN_ODD,
             shaderFeatures & gpu::ShaderFeatures::ENABLE_NESTED_CLIPPING,
             shaderFeatures & gpu::ShaderFeatures::ENABLE_HSL_BLEND_MODES,
@@ -1665,12 +1715,13 @@
         static_assert(CLIPPING_SPECIALIZATION_IDX == 0);
         static_assert(CLIP_RECT_SPECIALIZATION_IDX == 1);
         static_assert(ADVANCED_BLEND_SPECIALIZATION_IDX == 2);
-        static_assert(EVEN_ODD_SPECIALIZATION_IDX == 3);
-        static_assert(NESTED_CLIPPING_SPECIALIZATION_IDX == 4);
-        static_assert(HSL_BLEND_MODES_SPECIALIZATION_IDX == 5);
-        static_assert(CLOCKWISE_FILL_SPECIALIZATION_IDX == 6);
-        static_assert(BORROWED_COVERAGE_PREPASS_SPECIALIZATION_IDX == 7);
-        static_assert(SPECIALIZATION_COUNT == 8);
+        static_assert(FEATHER_SPECIALIZATION_IDX == 3);
+        static_assert(EVEN_ODD_SPECIALIZATION_IDX == 4);
+        static_assert(NESTED_CLIPPING_SPECIALIZATION_IDX == 5);
+        static_assert(HSL_BLEND_MODES_SPECIALIZATION_IDX == 6);
+        static_assert(CLOCKWISE_FILL_SPECIALIZATION_IDX == 7);
+        static_assert(BORROWED_COVERAGE_PREPASS_SPECIALIZATION_IDX == 8);
+        static_assert(SPECIALIZATION_COUNT == 9);
 
         VkSpecializationMapEntry permutationMapEntries[SPECIALIZATION_COUNT];
         for (uint32_t i = 0; i < SPECIALIZATION_COUNT; ++i)
@@ -1732,6 +1783,7 @@
         switch (drawType)
         {
             case DrawType::midpointFanPatches:
+            case DrawType::midpointFanCenterAAPatches:
             case DrawType::outerCurvePatches:
             {
                 vertexInputBindingDescriptions = {{{
@@ -2076,8 +2128,17 @@
 
 void RenderContextVulkanImpl::initGPUObjects()
 {
+    m_featherTexture =
+        make_rcp<TextureVulkanImpl>(m_vk,
+                                    gpu::GAUSSIAN_TABLE_SIZE,
+                                    1,
+                                    1,
+                                    TextureFormat::r16f,
+                                    gpu::g_gaussianIntegralTableF16);
+
     constexpr static uint8_t black[] = {0, 0, 0, 1};
-    m_nullImageTexture = make_rcp<TextureVulkanImpl>(m_vk, 1, 1, 1, black);
+    m_nullImageTexture =
+        make_rcp<TextureVulkanImpl>(m_vk, 1, 1, 1, TextureFormat::rgba8, black);
 
     VkSamplerCreateInfo linearSamplerCreateInfo = {
         .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
@@ -2295,7 +2356,7 @@
 constexpr static uint32_t kMaxDynamicUniformUpdates = 1;
 constexpr static uint32_t kMaxImageTextureUpdates = 256;
 constexpr static uint32_t kMaxSampledImageUpdates =
-    2 + kMaxImageTextureUpdates; // tess + grad + imageTextures
+    3 + kMaxImageTextureUpdates; // tess + feather + grad + imageTextures
 constexpr static uint32_t kMaxStorageImageUpdates =
     1; // coverageAtomicTexture in atomic mode.
 constexpr static uint32_t kMaxStorageBufferUpdates =
@@ -2487,6 +2548,10 @@
     rcp<DescriptorSetPool> descriptorSetPool = m_descriptorSetPoolPool->make();
 
     // Apply pending texture updates.
+    if (m_featherTexture->hasUpdates())
+    {
+        m_featherTexture->synchronize(commandBuffer);
+    }
     if (m_nullImageTexture->hasUpdates())
     {
         m_nullImageTexture->synchronize(commandBuffer);
@@ -3070,10 +3135,17 @@
             .dstBinding = GRAD_TEXTURE_IDX,
             .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
         },
-        {{
-            .imageView = *m_gradTextureView,
-            .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
-        }});
+        {
+            {
+                .imageView = *m_gradTextureView,
+                .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+            },
+            {
+                .imageView = *m_featherTexture->m_textureView,
+                .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+            },
+        });
+    static_assert(FEATHER_TEXTURE_IDX == GRAD_TEXTURE_IDX + 1);
 
     m_vk->updateBufferDescriptorSets(
         perFlushDescriptorSet,
@@ -3406,9 +3478,10 @@
         switch (drawType)
         {
             case DrawType::midpointFanPatches:
+            case DrawType::midpointFanCenterAAPatches:
             case DrawType::outerCurvePatches:
             {
-                // Draw PLS patches that connect the tessellation vertices.
+                // Draw patches that connect the tessellation vertices.
                 m_vk->CmdBindVertexBuffers(
                     commandBuffer,
                     0,
diff --git a/renderer/src/webgpu/render_context_webgpu_impl.cpp b/renderer/src/webgpu/render_context_webgpu_impl.cpp
index 37f8cdf..8030b22 100644
--- a/renderer/src/webgpu/render_context_webgpu_impl.cpp
+++ b/renderer/src/webgpu/render_context_webgpu_impl.cpp
@@ -627,6 +627,7 @@
             switch (drawType)
             {
                 case DrawType::midpointFanPatches:
+                case DrawType::midpointFanCenterAAPatches:
                 case DrawType::outerCurvePatches:
                     addDefine(GLSL_ENABLE_INSTANCE_INDEX);
                     if (plsType ==
@@ -676,6 +677,7 @@
             switch (drawType)
             {
                 case DrawType::midpointFanPatches:
+                case DrawType::midpointFanCenterAAPatches:
                 case DrawType::outerCurvePatches:
                     addDefine(GLSL_DRAW_PATH);
                     glsl << gpu::glsl::draw_path_common << '\n';
@@ -732,6 +734,7 @@
             switch (drawType)
             {
                 case DrawType::midpointFanPatches:
+                case DrawType::midpointFanCenterAAPatches:
                 case DrawType::outerCurvePatches:
                     vertexShader =
                         m_vertexShaderHandle.compileSPIRVShaderModule(
@@ -1557,6 +1560,7 @@
     switch (drawType)
     {
         case DrawType::midpointFanPatches:
+        case DrawType::midpointFanCenterAAPatches:
         case DrawType::outerCurvePatches:
         {
             attrs = {
@@ -2177,6 +2181,7 @@
         switch (drawType)
         {
             case DrawType::midpointFanPatches:
+            case DrawType::midpointFanCenterAAPatches:
             case DrawType::outerCurvePatches:
             {
                 // Draw PLS patches that connect the tessellation vertices.
diff --git a/skia/renderer/src/skia_factory.cpp b/skia/renderer/src/skia_factory.cpp
index 3a47009..27abb56 100644
--- a/skia/renderer/src/skia_factory.cpp
+++ b/skia/renderer/src/skia_factory.cpp
@@ -14,6 +14,7 @@
 #include "include/core/SkPath.h"
 #include "include/core/SkVertices.h"
 #include "include/effects/SkGradientShader.h"
+#include "include/effects/SkImageFilters.h"
 
 #include "rive/math/vec2d.hpp"
 #include "rive/shapes/paint/color.hpp"
@@ -50,6 +51,7 @@
 {
 private:
     SkPaint m_Paint;
+    bool m_hasFeather = false;
 
 public:
     SkiaRenderPaint();
@@ -61,9 +63,41 @@
     void thickness(float value) override;
     void join(StrokeJoin value) override;
     void cap(StrokeCap value) override;
+    void feather(float value) override;
     void blendMode(BlendMode value) override;
     void shader(rcp<RenderShader>) override;
     void invalidateStroke() override {}
+
+    class OverrideStrokeParamsForFeather
+    {
+    public:
+        OverrideStrokeParamsForFeather(SkiaRenderPaint* paint) :
+            m_skPaint(&paint->m_Paint), m_hasFeather(paint->m_hasFeather)
+        {
+            if (m_hasFeather)
+            {
+                m_overriddenCap = m_skPaint->getStrokeCap();
+                m_overriddenJoin = m_skPaint->getStrokeJoin();
+                m_skPaint->setStrokeCap(SkPaint::kRound_Cap);
+                m_skPaint->setStrokeJoin(SkPaint::kRound_Join);
+            }
+        }
+
+        ~OverrideStrokeParamsForFeather()
+        {
+            if (m_hasFeather)
+            {
+                m_skPaint->setStrokeCap(m_overriddenCap);
+                m_skPaint->setStrokeJoin(m_overriddenJoin);
+            }
+        }
+
+    private:
+        SkPaint* const m_skPaint;
+        const bool m_hasFeather;
+        SkPaint::Cap m_overriddenCap;
+        SkPaint::Join m_overriddenJoin;
+    };
 };
 
 class SkiaRenderImage : public LITE_RTTI_OVERRIDE(RenderImage, SkiaRenderImage)
@@ -136,6 +170,20 @@
     m_Paint.setStrokeCap(ToSkia::convert(value));
 }
 
+void SkiaRenderPaint::feather(float value)
+{
+    m_hasFeather = value != 0;
+    if (m_hasFeather)
+    {
+        m_Paint.setImageFilter(
+            SkImageFilters::Blur(value * .5f, value * .5f, nullptr));
+    }
+    else
+    {
+        m_Paint.setImageFilter(nullptr);
+    }
+}
+
 void SkiaRenderPaint::blendMode(BlendMode value)
 {
     m_Paint.setBlendMode(ToSkia::convert(value));
@@ -155,9 +203,11 @@
 }
 void SkiaRenderer::drawPath(RenderPath* path, RenderPaint* paint)
 {
-    LITE_RTTI_CAST_OR_RETURN(skPath, SkiaRenderPath*, path);
-    LITE_RTTI_CAST_OR_RETURN(skPaint, SkiaRenderPaint*, paint);
-    m_Canvas->drawPath(skPath->path(), skPaint->paint());
+    LITE_RTTI_CAST_OR_RETURN(skiaRenderPath, SkiaRenderPath*, path);
+    LITE_RTTI_CAST_OR_RETURN(skiaRenderPaint, SkiaRenderPaint*, paint);
+
+    SkiaRenderPaint::OverrideStrokeParamsForFeather ospff(skiaRenderPaint);
+    m_Canvas->drawPath(skiaRenderPath->path(), skiaRenderPaint->paint());
 }
 
 void SkiaRenderer::clipPath(RenderPath* path)
diff --git a/src/math/bezier_utils.cpp b/src/math/bezier_utils.cpp
index e6ca87a..02a71a3 100644
--- a/src/math/bezier_utils.cpp
+++ b/src/math/bezier_utils.cpp
@@ -169,50 +169,33 @@
     return acosf(cosTheta);
 }
 
+// If a chop falls within a distance of "TESS_EPSILON" from 0 or 1, throw it
+// out. Tangents become unstable when we chop too close to the boundary. This
+// works out because the tessellation shaders don't allow more than 2^10
+// parametric segments, and they snap the beginning and ending edges at 0 and 1.
+// So if we overstep an inflection or point of 180-degree rotation by a fraction
+// of a tessellation segment, it just gets snapped.
+constexpr static float TESS_EPSILON = 1.f / (1 << 10);
+
 int find_cubic_convex_180_chops(const Vec2D pts[], float T[2], bool* areCusps)
 {
     assert(pts);
     assert(T);
     assert(areCusps);
 
-    // If a chop falls within a distance of "kEpsilon" from 0 or 1, throw it
-    // out. Tangents become unstable when we chop too close to the boundary.
-    // This works out because the tessellation shaders don't allow more than
-    // 2^10 parametric segments, and they snap the beginning and ending edges at
-    // 0 and 1. So if we overstep an inflection or point of 180-degree rotation
-    // by a fraction of a tessellation segment, it just gets snapped.
-    constexpr static float kEpsilon = 1.f / (1 << 10);
-    // Floating-point representation of "1 - 2*kEpsilon".
+    // Floating-point representation of "1 - 2*TESS_EPSILON".
     constexpr static uint32_t kIEEE_one_minus_2_epsilon =
         (127 << 23) - 2 * (1 << (24 - 10));
     // Unfortunately we don't have a way to static_assert this, but we can
     // runtime assert that the kIEEE_one_minus_2_epsilon bits are correct.
     assert(math::bit_cast<float>(kIEEE_one_minus_2_epsilon) ==
-           1 - 2 * kEpsilon);
+           1 - 2 * TESS_EPSILON);
 
     float2 p0 = simd::load2f(&pts[0].x);
     float2 p1 = simd::load2f(&pts[1].x);
     float2 p2 = simd::load2f(&pts[2].x);
     float2 p3 = simd::load2f(&pts[3].x);
-
-    // Find the cubic's power basis coefficients. These define the bezier curve
-    // as:
-    //
-    //                                    |T^3|
-    //     Cubic(T) = x,y = |A  3B  3C| * |T^2| + P0
-    //                      |.   .   .|   |T  |
-    //
-    // And the tangent direction (scaled by a uniform 1/3) will be:
-    //
-    //                                                 |T^2|
-    //     Tangent_Direction(T) = dx,dy = |A  2B  C| * |T  |
-    //                                    |.   .  .|   |1  |
-    //
-    float2 C = p1 - p0;
-    float2 D = p2 - p1;
-    float2 E = p3 - p0;
-    float2 B = D - C;
-    float2 A = -3.f * D + E;
+    CubicCoeffs coeffs(p0, p1, p2, p3);
 
     // Now find the cubic's inflection function.
     // There are inflections where F' x F'' == 0.
@@ -225,16 +208,16 @@
     // https://www.microsoft.com/en-us/research/wp-content/uploads/2005/01/p1000-loop.pdf
     // NOTE: We only need the roots, so a uniform scale factor does not affect
     // the solution.
-    float a = simd::cross(A, B);
-    float b = simd::cross(A, C);
-    float c = simd::cross(B, C);
+    float a = simd::cross(coeffs.A, coeffs.B);
+    float b = simd::cross(coeffs.A, coeffs.C);
+    float c = simd::cross(coeffs.B, coeffs.C);
     float b_over_minus_2 = -.5f * b;
     float discr_over_4 = b_over_minus_2 * b_over_minus_2 - a * c;
 
     // If -cuspThreshold <= discr_over_4 <= cuspThreshold, it means the two
-    // roots are within kEpsilon of one another (in parametric space). This is
-    // close enough for our purposes to consider them a single cusp.
-    float cuspThreshold = a * (kEpsilon / 2);
+    // roots are within TESS_EPSILON of one another (in parametric space). This
+    // is close enough for our purposes to consider them a single cusp.
+    float cuspThreshold = a * (TESS_EPSILON / 2);
     cuspThreshold *= cuspThreshold;
 
     if (discr_over_4 < -cuspThreshold)
@@ -255,8 +238,8 @@
         // equal NaN which returns 0 chops.
         *areCusps = false;
         float root = math::ieee_float_divide(c, b_over_minus_2);
-        // Is "root" inside the range [kEpsilon, 1 - kEpsilon)?
-        if (math::bit_cast<uint32_t>(root - kEpsilon) <
+        // Is "root" inside the range [TESS_EPSILON, 1 - TESS_EPSILON)?
+        if (math::bit_cast<uint32_t>(root - TESS_EPSILON) <
             kIEEE_one_minus_2_epsilon)
         {
             T[0] = root;
@@ -265,7 +248,7 @@
         return 0;
     }
 
-    *areCusps = (discr_over_4 <= cuspThreshold);
+    *areCusps = discr_over_4 <= cuspThreshold;
     if (*areCusps)
     {
         // The two roots are close enough that we can consider them a single
@@ -274,8 +257,8 @@
         {
             // Pick the average of both roots.
             float root = math::ieee_float_divide(b_over_minus_2, a);
-            // Is "root" inside the range [kEpsilon, 1 - kEpsilon)?
-            if (math::bit_cast<uint32_t>(root - kEpsilon) <
+            // Is "root" inside the range [TESS_EPSILON, 1 - TESS_EPSILON)?
+            if (math::bit_cast<uint32_t>(root - TESS_EPSILON) <
                 kIEEE_one_minus_2_epsilon)
             {
                 T[0] = root;
@@ -309,10 +292,10 @@
         //     tan0 * |A  2B  C| * |T  | == 0
         //            |.   .  .|   |1  |
         //
-        float2 tan0 = simd::any(C != 0.f) ? C : p2 - p0;
-        a = simd::dot(tan0, A);
-        b_over_minus_2 = -simd::dot(tan0, B);
-        c = simd::dot(tan0, C);
+        float2 tan0 = simd::any(coeffs.C != 0.f) ? coeffs.C : p2 - p0;
+        a = simd::dot(tan0, coeffs.A);
+        b_over_minus_2 = -simd::dot(tan0, coeffs.B);
+        c = simd::dot(tan0, coeffs.C);
         discr_over_4 = std::max(b_over_minus_2 * b_over_minus_2 - a * c, 0.f);
     }
 
@@ -323,7 +306,7 @@
     q = q + b_over_minus_2;
     float2 roots = float2{q, c} / float2{a, q};
 
-    auto inside = (roots > kEpsilon) & (roots < (1 - kEpsilon));
+    auto inside = (roots > TESS_EPSILON) & (roots < (1 - TESS_EPSILON));
     if (inside[0])
     {
         if (inside[1] && roots[0] != roots[1])
@@ -345,5 +328,260 @@
     }
     return 0;
 }
+
+int find_cubic_convex_90_chops(const Vec2D pts[],
+                               float outT[4],
+                               float cuspPadding,
+                               bool* areCusps)
+{
+    assert(pts);
+    assert(outT);
+    assert(areCusps);
+
+    // Now find the cubic's inflection function.
+    // There are inflections where F' x F'' == 0.
+    //
+    // We formulate this as a quadratic equation:
+    //
+    //     F' x F'' == a * T^2 + b * T + c == 0.
+    //
+    // See:
+    // https://www.microsoft.com/en-us/research/wp-content/uploads/2005/01/p1000-loop.pdf
+    // NOTE: We only need the roots, so a uniform scale factor does not affect
+    // the solution.
+    CubicCoeffs coeffs(pts);
+    float a = simd::cross(coeffs.A, coeffs.B);
+    float b_over_2 = simd::cross(coeffs.A, coeffs.C) * .5f;
+    float c = simd::cross(coeffs.B, coeffs.C);
+    float discr_over_4 = b_over_2 * b_over_2 - a * c;
+
+    // If -cuspThreshold <= discr_over_4 <= cuspThreshold, it means the two
+    // roots are within TESS_EPSILON of one another (in parametric space). This
+    // is close enough for our purposes to consider them a single cusp.
+    float cuspThreshold = a * (TESS_EPSILON / 2);
+    cuspThreshold *= cuspThreshold;
+
+    // Find the first two chops, based on curve classification. Also fill in
+    // "tan90", which will define the second pair of chops as the two points
+    // perpendicular to "tan90".
+    float4 T;
+    float2 tan90;
+    if (discr_over_4 < -cuspThreshold ||
+        // Check if it's quadratic.
+        std::max(fabs(a), fabs(b_over_2)) < fabs(c) * TESS_EPSILON)
+    {
+        // The curve is a loop or quadratic.
+        // One chop is where rotation == 180 deg (which happens at infinity if
+        // the curve is quadratic).
+        // (This is the 2nd root where the tangent is parallel to tan0.)
+        //
+        //    Tangent_Direction(T) x tan0 == 0
+        //    (AT^2 x tan0) + (2BT x tan0) + (C x tan0) == 0
+        //    (A x C)T^2 + (2B x C)T + (C x C) == 0
+        //        [[because tan0 == P1 - P0 == C]]
+        //    bT^2 + 2cT + 0 == 0  [[because A x C == b, B x C == c]]
+        //    T = [0, -2c/b]
+        //
+        // NOTE: if C == 0, then C != tan0. But this is fine because the curve
+        // can only rotate 180 degrees if the endpoints are colocated, and this
+        // gets handled next.
+        T.xy = {-c / b_over_2, 1};
+
+        // Next chop 90 degrees from the starting tangent of the curve.
+        tan90 = simd::any(coeffs.C != 0.f)
+                    ? coeffs.C
+                    : math::bit_cast<float2>(pts[2] - pts[0]);
+        *areCusps = false;
+    }
+    else if (discr_over_4 > cuspThreshold)
+    {
+        // The curve is serpentine. Solve for the two inflection points.
+        float q = sqrtf(discr_over_4);
+        q = -b_over_2 - copysignf(q, b_over_2);
+        T.xy = float2{q, c} / float2{a, q};
+
+        // Next chop 90 degrees from the whichever inflection point is closest
+        // to the middle.
+        float t = fabsf(T.x - .5f) < fabsf(T.y - .5f) ? T.x : T.y;
+        tan90 = (coeffs.A * t + 2.f * coeffs.B) * t + coeffs.C;
+        *areCusps = false;
+    }
+    else
+    {
+        // The curve is a cusp. A proper cusp is at T=-b/2a, but just solving
+        // for 90 degrees from the starting tangent will also find it, in
+        // addition to finding cusps from degenerate flat lines reversing
+        // direction. Since 180 degrees of rotation is lost to the cusp, we only
+        // need to find 2 roots max.
+        T.xy = 1;
+        tan90 = simd::any(coeffs.C != 0.f)
+                    ? coeffs.C
+                    : math::bit_cast<float2>(pts[2] - pts[0]);
+        *areCusps = true;
+    }
+
+    // Find a second set of chops where the curve is perpendicular to tan90.
+    //
+    //   Tangent_Direction(T) dot tan90 == 0
+    //   (A dot tan90) * T^2 + (2B dot tan90) * T + (C dot tan90) == 0
+    //
+    a = simd::dot(coeffs.A, tan90);
+    b_over_2 = simd::dot(coeffs.B, tan90);
+    c = simd::dot(coeffs.C, tan90);
+    discr_over_4 = b_over_2 * b_over_2 - a * c;
+    float q = sqrtf(discr_over_4);
+    q = -b_over_2 - copysignf(q, b_over_2);
+    T.zw = float2{q, c} / float2{a, q};
+
+    // Throw out T <= epsilon and T >= epsilon by converting them to 1.
+    // (Use logic such that NaN also converts to 1.)
+    T = simd::if_then_else((T > 0) & (T < 1), T, float4(1));
+    assert(simd::all(T > 0));
+    assert(simd::all(T <= 1));
+
+    // Sort the roots.
+    T = simd::if_then_else((float2{T.x, T.z} < float2{T.y, T.w}).xxyy,
+                           T,
+                           T.yxwz);
+    T = simd::if_then_else((float2{T.x, T.y} < float2{T.z, T.w}).xyxy,
+                           T,
+                           T.zwxy);
+    T = T.y < T.z ? T : T.xzyw;
+
+    // Count the number of roots that != 1 and store T.
+    int4 n4 = (T != 1) & 1;
+    n4.xy += n4.zw;
+    int n = n4.x + n4.y;
+    RIVE_INLINE_MEMCPY(outT, &T, 4 * sizeof(float));
+
+    if (*areCusps)
+    {
+        // Generate padding around cusp points. Odd numbered chops are always
+        // padding sections that pass through a cusp.
+        assert(n <= 2);
+        for (int i = n - 1; i >= 0; --i)
+        {
+            float maxT = i == n - 1 ? 1 : outT[i * 2 + 1];
+            float minT = i == 0 ? 0 : (outT[i - 1] + outT[i]) * .5f;
+            outT[i * 2 + 1] = std::min(outT[i] + cuspPadding, maxT);
+            outT[i * 2 + 0] = std::max(outT[i] - cuspPadding, minT);
+        }
+        n *= 2;
+    }
+
+    return n;
+}
+
+float find_cubic_max_height(const Vec2D p[4], float* outT)
+{
+    // Calculate the cubic height function: 3(dht^3 - (h1 + dh)t^2 + h1t)
+    Vec2D n = (p[3] - p[0]).normalized();
+    n = {-n.y, n.x};
+    float h2 = Vec2D::dot(n, p[2] - p[0]);
+    float h1 = Vec2D::dot(n, p[1] - p[0]);
+    float dh = h1 - h2;
+
+    // A cubic's height function has two maxima. Find both.
+    float a = 3 * dh;
+    float b_over_minus_2 = dh + h1;
+    float c = h1;
+    float q = sqrtf(std::max(dh * dh + h2 * h1, 0.f));
+    q = b_over_minus_2 + copysignf(q, b_over_minus_2);
+    float2 tt = float2{q, c} / float2{a, q};
+    tt = simd::clamp(tt, float2{0, 0}, float2{1, 1});
+    float2 hh = 3.f * (tt * (tt * (tt * dh - (h1 + dh)) + h1));
+
+    // Go with whichever maximum is larger.
+    hh = simd::abs(hh);
+    if (outT != nullptr)
+        *outT = hh.x > hh.y ? tt.x : tt.y;
+    return fmaxf(hh.x, hh.y);
+}
+
+float measure_cubic_local_curvature(const Vec2D p[4],
+                                    const math::CubicCoeffs& coeffs,
+                                    float T,
+                                    float desiredSpread)
+{
+    float2 tan = 3.f * (((coeffs.A * T) + 2.f * coeffs.B) * T + coeffs.C);
+    float lengthTan = sqrtf(simd::dot(tan, tan));
+    if (lengthTan == 0)
+    {
+        return 0;
+    }
+
+    // Define the function
+    //
+    //    Spread(dt) = A__*dt^3 + C__*dt
+    //
+    // Which calculates the spread of the curve in local coordinates, parallel
+    // to tan, over the range "T - dt .. T + dt".
+    tan *= 1 / lengthTan;
+    float A__ = 2 * simd::dot(coeffs.A, tan);
+    float C__ = 3 * (A__ * T + 4 * simd::dot(coeffs.B, tan)) * T +
+                6 * simd::dot(coeffs.C, tan);
+
+    // Decide the "targetSpread" across which we will measure curvature. Ideally
+    // this is "desiredSpread", but use less than that if that would reach
+    // outside T=0..1.
+    float maxDT = fminf(T, 1 - T);
+    float maxSpread = (A__ * maxDT * maxDT + C__) * maxDT;
+    // Pad the maxSpread to guarantee we won't step outside T=0..1.
+    float targetSpread = fminf(desiredSpread, maxSpread * .9999f);
+
+    // Solve for dt, where Spread(dt) == targetSpread.
+    float dt;
+    if (A__ == 0)
+    {
+        // Degenerate case: Spread(dt) == C__*dt.
+        dt = targetSpread / C__;
+    }
+    else
+    {
+        // Solve the normalized cubic x^3 + ax^2 + bx + c == 0.
+        // (Numerical Recipes in C, 5.6 Quadratic and Cubic Equations,
+        // https://hd.fizyka.umk.pl/~jacek/docs/nrc/c5-6.pdf)
+        float r = 1 / A__;
+        float /*a = 0,*/ b = C__ * r, c = -targetSpread * r;
+        float Q = (-1.f / 3) * b, R = .5f * c;
+        float discr = R * R - Q * Q * Q;
+        if (discr < 0)
+        {
+            float sqrtQ = sqrtf(Q);
+            float theta = acosf(R / (sqrtQ * sqrtQ * sqrtQ));
+            // The 3 roots are: (because a == 0)
+            //   -2 * sqrt(Q) * cos(theta/3 + float3{0, 1, -1} * 2*pi/3)
+            // We want the root closest to zero, which will be the 3rd root
+            // because its argument for cos() is always closest to +-pi/2.
+            dt = -2 * sqrtQ * cosf(theta * (1.f / 3) + (-math::PI * 2 / 3));
+        }
+        else
+        {
+            float A = -copysignf(cbrtf(fabsf(R) + sqrtf(discr)), R);
+            dt = A != 0 ? A + Q / A : 0;
+        }
+    }
+    dt = fabsf(dt);
+
+    // Measure curvature over the spread T - dt .. T + dt.
+    float4 t0011 = T + float4{-dt, -dt, dt, dt};
+    float4 tanDirs =
+        (coeffs.A.xyxy * t0011 + 2.f * coeffs.B.xyxy) * t0011 + coeffs.C.xyxy;
+    Vec2D tan0 = math::bit_cast<Vec2D>(tanDirs.xy);
+    Vec2D tan1 = math::bit_cast<Vec2D>(tanDirs.zw);
+    if (t0011.x < 1e-3f) // Calculate a more stable tangent at T <= 0 in case
+    {                    // we've encountered a cusp.
+        tan0 = (p[0] != p[1] ? p[1] : p[1] != p[2] ? p[2] : p[3]) - p[0];
+    }
+    if (t0011.z > 1 - 1e-3f) // Calculate a more stable tangent at T >= 1 in
+    {                        // case we've encountered a cusp.
+        tan1 = p[3] - (p[3] != p[2] ? p[2] : p[2] != p[1] ? p[1] : p[0]);
+    }
+    // NOTE: this will not capture the total absolute curvature if there is an
+    // inflection point, but it's arguably what we want anyway since this will
+    // return the composite curvature over the spread (i.e., clockwise curvature
+    // minus counterclockwise).
+    return math::measure_angle_between_vectors(tan0, tan1);
+}
 } // namespace math
 } // namespace rive
diff --git a/tests/common/testing_gl_renderer.cpp b/tests/common/testing_gl_renderer.cpp
index 4be0c6f..f97277f 100644
--- a/tests/common/testing_gl_renderer.cpp
+++ b/tests/common/testing_gl_renderer.cpp
@@ -76,9 +76,7 @@
             return std::make_unique<rive::RiveRenderer>(m_renderContext.get());
         }
 
-        void beginFrame(rive::ColorInt clearColor,
-                        bool doClear,
-                        bool wireframe) override
+        void beginFrame(const TestingWindow::FrameOptions& options) override
         {
             // For testing, reset GPU resources to their initial sizes every
             // frame. This will stress intermediate flushes more, as well as
@@ -89,10 +87,10 @@
             rive::gpu::RenderContext::FrameDescriptor frameDescriptor = {
                 .renderTargetWidth = m_renderTarget->width(),
                 .renderTargetHeight = m_renderTarget->height(),
-                .loadAction = doClear
+                .loadAction = options.doClear
                                   ? rive::gpu::LoadAction::clear
                                   : rive::gpu::LoadAction::preserveRenderTarget,
-                .clearColor = clearColor,
+                .clearColor = options.clearColor,
                 .msaaSampleCount =
                     (m_rendererFlags & TestingWindow::RendererFlags::useMSAA)
                         ? 4
@@ -100,8 +98,11 @@
                 .disableRasterOrdering =
                     (m_rendererFlags &
                      TestingWindow::RendererFlags::disableRasterOrdering),
-                .wireframe = wireframe,
-            };
+                .wireframe = options.wireframe,
+                .clockwiseFillOverride =
+                    (m_rendererFlags &
+                     TestingWindow::RendererFlags::clockwiseFillOverride) ||
+                    options.clockwiseFillOverride};
             m_renderContext->beginFrame(frameDescriptor);
         }
 
diff --git a/tests/common/testing_gl_renderer.hpp b/tests/common/testing_gl_renderer.hpp
index 17e2ee2..d21a5f8 100644
--- a/tests/common/testing_gl_renderer.hpp
+++ b/tests/common/testing_gl_renderer.hpp
@@ -31,9 +31,7 @@
         int width,
         int height,
         uint32_t targetTextureID = 0) = 0;
-    virtual void beginFrame(rive::ColorInt clearColor,
-                            bool doClear,
-                            bool wireframe) = 0;
+    virtual void beginFrame(const TestingWindow::FrameOptions&) = 0;
     virtual void flush(int dpiScale = 1) = 0;
 
     // For testing directly on RenderContext.
diff --git a/tests/common/testing_window.cpp b/tests/common/testing_window.cpp
index ca35964..48e0622 100644
--- a/tests/common/testing_window.cpp
+++ b/tests/common/testing_window.cpp
@@ -57,10 +57,12 @@
             return "anglemsaa";
         case TestingWindow::Backend::dawn:
             return "dawn";
-        case TestingWindow::Backend::coregraphics:
-            return "coregraphics";
         case Backend::rhi:
             return "rhi";
+        case TestingWindow::Backend::coregraphics:
+            return "coregraphics";
+        case TestingWindow::Backend::skia:
+            return "skia";
     }
     RIVE_UNREACHABLE();
 }
@@ -128,10 +130,12 @@
         return Backend::anglemsaa;
     if (nameStr == "dawn")
         return Backend::dawn;
-    if (nameStr == "coregraphics")
-        return Backend::coregraphics;
     if (nameStr == "rhi")
         return Backend::rhi;
+    if (nameStr == "coregraphics")
+        return Backend::coregraphics;
+    if (nameStr == "skia")
+        return Backend::skia;
     fprintf(stderr, "'%s': invalid TestingWindow::Backend\n", name);
     abort();
 }
@@ -266,10 +270,13 @@
                                                                gpuNameFilter,
                                                                platformWindow);
             break;
+        case Backend::rhi:
+            break;
         case Backend::coregraphics:
             s_TestingWindow = MakeCoreGraphics();
             break;
-        case Backend::rhi:
+        case Backend::skia:
+            s_TestingWindow = MakeSkia();
             break;
     }
     if (!s_TestingWindow)
diff --git a/tests/common/testing_window.hpp b/tests/common/testing_window.hpp
index f1091f2..2defe40 100644
--- a/tests/common/testing_window.hpp
+++ b/tests/common/testing_window.hpp
@@ -66,9 +66,11 @@
         angle,
         anglemsaa,
         dawn,
-        coregraphics,
 
         rhi,
+
+        coregraphics,
+        skia,
     };
 
     constexpr static bool IsGL(Backend backend)
@@ -95,8 +97,9 @@
             case Backend::swiftshader:
             case Backend::swiftshadercore:
             case Backend::dawn:
-            case Backend::coregraphics:
             case Backend::rhi:
+            case Backend::coregraphics:
+            case Backend::skia:
                 return false;
         }
         RIVE_UNREACHABLE();
@@ -126,8 +129,9 @@
             case Backend::swiftshader:
             case Backend::swiftshadercore:
             case Backend::dawn:
-            case Backend::coregraphics:
             case Backend::rhi:
+            case Backend::coregraphics:
+            case Backend::skia:
                 return false;
         }
         RIVE_UNREACHABLE();
@@ -155,10 +159,11 @@
             case Backend::metalcw:
             case Backend::metalatomic:
             case Backend::dawn:
-            case Backend::coregraphics:
             case Backend::angle:
             case Backend::anglemsaa:
             case Backend::rhi:
+            case Backend::coregraphics:
+            case Backend::skia:
                 return false;
         }
         RIVE_UNREACHABLE();
@@ -190,6 +195,7 @@
             case Backend::anglemsaa:
             case Backend::dawn:
             case Backend::coregraphics:
+            case Backend::skia:
                 return false;
         }
         RIVE_UNREACHABLE();
@@ -219,8 +225,9 @@
             case Backend::angle:
             case Backend::anglemsaa:
             case Backend::dawn:
-            case Backend::coregraphics:
             case Backend::rhi:
+            case Backend::coregraphics:
+            case Backend::skia:
                 return false;
         }
         RIVE_UNREACHABLE();
@@ -250,8 +257,9 @@
             case Backend::swiftshader:
             case Backend::angle:
             case Backend::dawn:
-            case Backend::coregraphics:
             case Backend::rhi:
+            case Backend::coregraphics:
+            case Backend::skia:
                 return false;
         }
         RIVE_UNREACHABLE();
@@ -281,8 +289,9 @@
             case Backend::angle:
             case Backend::anglemsaa:
             case Backend::dawn:
-            case Backend::coregraphics:
             case Backend::rhi:
+            case Backend::coregraphics:
+            case Backend::skia:
                 return false;
         }
         RIVE_UNREACHABLE();
@@ -293,6 +302,7 @@
         none = 0,
         useMSAA = 1 << 0,
         disableRasterOrdering = 1 << 1,
+        clockwiseFillOverride = 1 << 2,
     };
 
     enum class Visibility
@@ -321,10 +331,14 @@
         m_width = width;
         m_height = height;
     }
-    virtual std::unique_ptr<rive::Renderer> beginFrame(
-        uint32_t clearColor,
-        bool doClear = true,
-        bool wireframe = false) = 0;
+    struct FrameOptions
+    {
+        uint32_t clearColor;
+        bool doClear = true;
+        bool wireframe = false;
+        bool clockwiseFillOverride = false;
+    };
+    virtual std::unique_ptr<rive::Renderer> beginFrame(const FrameOptions&) = 0;
     virtual void endFrame(std::vector<uint8_t>* pixelData = nullptr) = 0;
 
     // For testing directly on RenderContext.
@@ -374,6 +388,7 @@
     static TestingWindow* MakeAndroidVulkan(void* platformWindow,
                                             bool coreFeaturesOnly,
                                             bool clockwiseFill);
+    static TestingWindow* MakeSkia();
 };
 
 RIVE_MAKE_ENUM_BITSET(TestingWindow::RendererFlags);
diff --git a/tests/common/testing_window_android_vulkan.cpp b/tests/common/testing_window_android_vulkan.cpp
index b35e72a..990a5e9 100644
--- a/tests/common/testing_window_android_vulkan.cpp
+++ b/tests/common/testing_window_android_vulkan.cpp
@@ -199,18 +199,19 @@
         abort();
     }
 
-    std::unique_ptr<rive::Renderer> beginFrame(uint32_t clearColor,
-                                               bool doClear,
-                                               bool wireframe) override
+    std::unique_ptr<rive::Renderer> beginFrame(
+        const FrameOptions& options) override
     {
         m_renderContext->beginFrame(RenderContext::FrameDescriptor{
             .renderTargetWidth = m_width,
             .renderTargetHeight = m_height,
-            .loadAction = doClear ? gpu::LoadAction::clear
-                                  : gpu::LoadAction::preserveRenderTarget,
-            .clearColor = clearColor,
-            .wireframe = wireframe,
-            .clockwiseFillOverride = m_clockwiseFill,
+            .loadAction = options.doClear
+                              ? gpu::LoadAction::clear
+                              : gpu::LoadAction::preserveRenderTarget,
+            .clearColor = options.clearColor,
+            .wireframe = options.wireframe,
+            .clockwiseFillOverride =
+                m_clockwiseFill || options.clockwiseFillOverride,
         });
 
         return std::make_unique<RiveRenderer>(m_renderContext.get());
diff --git a/tests/common/testing_window_coregraphics.cpp b/tests/common/testing_window_coregraphics.cpp
index 370a633..481191e 100644
--- a/tests/common/testing_window_coregraphics.cpp
+++ b/tests/common/testing_window_coregraphics.cpp
@@ -48,14 +48,13 @@
         m_height = h;
     }
 
-    std::unique_ptr<rive::Renderer> beginFrame(uint32_t clearColor,
-                                               bool doClear,
-                                               bool wireframe) override
+    std::unique_ptr<rive::Renderer> beginFrame(
+        const FrameOptions& options) override
     {
         CGContextFlush(m_ctx);
-        if (doClear)
+        if (options.doClear)
         {
-            std::fill(m_pixels.begin(), m_pixels.end(), clearColor);
+            std::fill(m_pixels.begin(), m_pixels.end(), options.clearColor);
         }
         return std::make_unique<rive::CGRenderer>(m_ctx, m_width, m_height);
     }
diff --git a/tests/common/testing_window_egl.cpp b/tests/common/testing_window_egl.cpp
index 7f4d15b..a10b6b6 100644
--- a/tests/common/testing_window_egl.cpp
+++ b/tests/common/testing_window_egl.cpp
@@ -520,13 +520,12 @@
         m_height = height;
     }
 
-    std::unique_ptr<rive::Renderer> beginFrame(uint32_t clearColor,
-                                               bool doClear,
-                                               bool wireframe) override
+    std::unique_ptr<rive::Renderer> beginFrame(
+        const FrameOptions& options) override
     {
         auto renderer =
             m_renderer->reset(m_width, m_height, m_headlessRenderTexture);
-        m_renderer->beginFrame(clearColor, doClear, wireframe);
+        m_renderer->beginFrame(options);
         return renderer;
     }
 
@@ -640,12 +639,17 @@
         case Backend::swiftshader:
         case Backend::swiftshadercore:
         case Backend::dawn:
-        case Backend::coregraphics:
         case Backend::rhi:
+        case Backend::coregraphics:
+        case Backend::skia:
             printf("Invalid backend for TestingWindow::MakeEGLPbuffer.");
             abort();
             break;
     }
+    if (IsClockwiseFill(backend))
+    {
+        rendererFlags |= RendererFlags::clockwiseFillOverride;
+    }
     return new TestingWindowEGL(angleBackend,
                                 samples,
                                 TestingGLRenderer::Make(rendererFlags),
diff --git a/tests/common/testing_window_fiddle_context.cpp b/tests/common/testing_window_fiddle_context.cpp
index f158d45..caff160 100644
--- a/tests/common/testing_window_fiddle_context.cpp
+++ b/tests/common/testing_window_fiddle_context.cpp
@@ -207,6 +207,7 @@
         {
             case Backend::rhi:
             case Backend::coregraphics:
+            case Backend::skia:
                 break;
             case Backend::gl:
             case Backend::glatomic:
@@ -279,19 +280,20 @@
         }
     }
 
-    std::unique_ptr<rive::Renderer> beginFrame(uint32_t clearColor,
-                                               bool doClear,
-                                               bool wireframe) override
+    std::unique_ptr<rive::Renderer> beginFrame(
+        const FrameOptions& options) override
     {
         rive::gpu::RenderContext::FrameDescriptor frameDescriptor = {
             .renderTargetWidth = static_cast<uint32_t>(m_width),
             .renderTargetHeight = static_cast<uint32_t>(m_height),
-            .loadAction = doClear ? rive::gpu::LoadAction::clear
-                                  : rive::gpu::LoadAction::preserveRenderTarget,
-            .clearColor = clearColor,
+            .loadAction = options.doClear
+                              ? rive::gpu::LoadAction::clear
+                              : rive::gpu::LoadAction::preserveRenderTarget,
+            .clearColor = options.clearColor,
             .msaaSampleCount = m_msaaSampleCount,
-            .wireframe = wireframe,
-            .clockwiseFillOverride = m_clockwiseFill,
+            .wireframe = options.wireframe,
+            .clockwiseFillOverride =
+                m_clockwiseFill || options.clockwiseFillOverride,
         };
         m_fiddleContext->begin(std::move(frameDescriptor));
         return m_fiddleContext->makeRenderer(m_width, m_height);
diff --git a/tests/common/testing_window_metal_texture.mm b/tests/common/testing_window_metal_texture.mm
index d05281b..68f8b58 100644
--- a/tests/common/testing_window_metal_texture.mm
+++ b/tests/common/testing_window_metal_texture.mm
@@ -37,17 +37,18 @@
     // rive::gpu::RenderTarget* renderTarget() const override { return
     // m_renderTarget.get(); }
 
-    std::unique_ptr<rive::Renderer> beginFrame(uint32_t clearColor,
-                                               bool doClear,
-                                               bool wireframe) override
+    std::unique_ptr<rive::Renderer> beginFrame(
+        const FrameOptions& options) override
     {
         rive::gpu::RenderContext::FrameDescriptor frameDescriptor = {
             .renderTargetWidth = m_width,
             .renderTargetHeight = m_height,
-            .loadAction = doClear ? rive::gpu::LoadAction::clear
-                                  : rive::gpu::LoadAction::preserveRenderTarget,
-            .clearColor = clearColor,
-            .wireframe = wireframe,
+            .loadAction = options.doClear
+                              ? rive::gpu::LoadAction::clear
+                              : rive::gpu::LoadAction::preserveRenderTarget,
+            .clearColor = options.clearColor,
+            .wireframe = options.wireframe,
+            .clockwiseFillOverride = options.clockwiseFillOverride,
         };
         m_renderContext->beginFrame(frameDescriptor);
         m_flushCommandBuffer = [m_queue commandBuffer];
diff --git a/tests/common/testing_window_skia.cpp b/tests/common/testing_window_skia.cpp
new file mode 100644
index 0000000..aba63ea
--- /dev/null
+++ b/tests/common/testing_window_skia.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2022 Rive
+ */
+#include "testing_window.hpp"
+
+#if defined(TESTING) || !defined(RIVE_SKIA)
+
+TestingWindow* TestingWindow::MakeSkia() { return nullptr; }
+
+#else
+
+#include "skia_factory.hpp"
+#include "skia_renderer.hpp"
+#include "skia/include/core/SkCanvas.h"
+#include "skia/include/core/SkSurface.h"
+#include "skia/include/gpu/GrDirectContext.h"
+#include "skia/include/gpu/gl/GrGLAssembleInterface.h"
+
+TestingWindow* TestingWindow::MakeSkia()
+{
+    class TestingWindowSkiaRaster : public TestingWindow
+    {
+    public:
+        rive::Factory* factory() override { return &m_factory; }
+
+        void resize(int width, int height) override
+        {
+            auto info = SkImageInfo::MakeN32Premul(width, height);
+            m_surface = SkSurface::MakeRaster(info);
+        }
+
+        std::unique_ptr<rive::Renderer> beginFrame(
+            const FrameOptions& options) override
+        {
+            if (m_surface)
+            {
+                SkCanvas* canvas = m_surface->getCanvas();
+                if (options.doClear)
+                {
+                    canvas->clear(options.clearColor);
+                }
+                return std::make_unique<rive::SkiaRenderer>(canvas);
+            }
+            return nullptr;
+        }
+
+        void endFrame(std::vector<uint8_t>* pixelData) override
+        {
+            if (!m_surface)
+            {
+                return;
+            }
+            m_surface->getCanvas()->flush();
+            if (pixelData)
+            {
+                auto img = m_surface->makeImageSnapshot();
+                int w = img->width();
+                int h = img->height();
+                pixelData->resize(h * w * 4);
+                SkColorInfo colorInfo(kRGBA_8888_SkColorType,
+                                      kPremul_SkAlphaType,
+                                      nullptr);
+                // Read the canvas back upside down to match GL's orientation.
+                for (int y = 0; y < h; ++y)
+                {
+                    img->readPixels(
+                        nullptr,
+                        SkPixmap(SkImageInfo::Make({w, 1}, colorInfo),
+                                 pixelData->data() + y * w * 4,
+                                 img->width() * 4),
+                        0,
+                        h - y - 1);
+                }
+            }
+        }
+
+    private:
+        rive::SkiaFactory m_factory;
+        sk_sp<SkSurface> m_surface;
+    };
+
+    return new TestingWindowSkiaRaster();
+}
+
+#endif
diff --git a/tests/common/testing_window_vulkan_texture.cpp b/tests/common/testing_window_vulkan_texture.cpp
index b897ce0..8c86d57 100644
--- a/tests/common/testing_window_vulkan_texture.cpp
+++ b/tests/common/testing_window_vulkan_texture.cpp
@@ -104,9 +104,8 @@
         return m_renderContext.get();
     }
 
-    std::unique_ptr<rive::Renderer> beginFrame(uint32_t clearColor,
-                                               bool doClear,
-                                               bool wireframe) override
+    std::unique_ptr<rive::Renderer> beginFrame(
+        const FrameOptions& options) override
     {
         if (m_lastFrameFence != nullptr)
         {
@@ -124,11 +123,13 @@
         rive::gpu::RenderContext::FrameDescriptor frameDescriptor = {
             .renderTargetWidth = m_width,
             .renderTargetHeight = m_height,
-            .loadAction = doClear ? rive::gpu::LoadAction::clear
-                                  : rive::gpu::LoadAction::preserveRenderTarget,
-            .clearColor = clearColor,
-            .wireframe = wireframe,
-            .clockwiseFillOverride = m_clockwiseFill,
+            .loadAction = options.doClear
+                              ? rive::gpu::LoadAction::clear
+                              : rive::gpu::LoadAction::preserveRenderTarget,
+            .clearColor = options.clearColor,
+            .wireframe = options.wireframe,
+            .clockwiseFillOverride =
+                m_clockwiseFill || options.clockwiseFillOverride,
         };
         m_renderContext->beginFrame(frameDescriptor);
         return std::make_unique<RiveRenderer>(m_renderContext.get());
diff --git a/tests/gm/emptypath.cpp b/tests/gm/emptypath.cpp
index 26d67c5..a92c985 100644
--- a/tests/gm/emptypath.cpp
+++ b/tests/gm/emptypath.cpp
@@ -64,7 +64,14 @@
 class EmptyStrokeGM : public GM
 {
 public:
-    EmptyStrokeGM() : GM(180, 540, "emptystroke") {}
+    struct Options
+    {
+        bool stroke = false, feather = false;
+    };
+
+    EmptyStrokeGM(Options options, const char* name) :
+        GM(180, 540, name), m_options(options)
+    {}
 
 private:
     void onDraw(Renderer* renderer) override
@@ -76,9 +83,16 @@
             make_path_move_mix,   // expect red black black,
         };
 
-        Paint strokePaint;
-        strokePaint->style(RenderPaintStyle::stroke);
-        strokePaint->thickness(21);
+        Paint paint;
+        if (m_options.stroke)
+        {
+            paint->style(RenderPaintStyle::stroke);
+            paint->thickness(21);
+        }
+        if (m_options.feather)
+        {
+            paint->feather(21);
+        }
 
         Paint dotPaint;
         dotPaint->color(0xffff0000);
@@ -86,8 +100,8 @@
 
         for (size_t j = 0; j < 3; ++j)
         {
-            strokePaint->cap(static_cast<StrokeCap>((3 - j) % 3));
-            strokePaint->join(static_cast<StrokeJoin>(j));
+            paint->cap(static_cast<StrokeCap>((3 - j) % 3));
+            paint->join(static_cast<StrokeJoin>(j));
             for (auto proc : kProcs)
             {
                 for (int i = 0; i < 3; ++i)
@@ -98,10 +112,19 @@
                                                           kPts[i].y + 3.5f}),
                                        dotPaint);
                 }
-                renderer->drawPath(proc(), strokePaint);
+                Path path = proc();
+                path->fillRule(FillRule::clockwise);
+                renderer->drawPath(path, paint);
                 renderer->translate(0, 40);
             }
         }
     }
+
+    Options m_options;
 };
-GMREGISTER(return new EmptyStrokeGM;)
+GMREGISTER(return new EmptyStrokeGM({.stroke = true, .feather = false},
+                                    "emptystroke");)
+GMREGISTER(return new EmptyStrokeGM({.stroke = false, .feather = true},
+                                    "emptyfeather");)
+GMREGISTER(return new EmptyStrokeGM({.stroke = true, .feather = true},
+                                    "emptystrokefeather");)
diff --git a/tests/gm/feather.cpp b/tests/gm/feather.cpp
new file mode 100644
index 0000000..f16ba88
--- /dev/null
+++ b/tests/gm/feather.cpp
@@ -0,0 +1,391 @@
+/*
+ * Copyright 2024 Rive
+ */
+
+#include "gm.hpp"
+#include "gmutils.hpp"
+#include "rive/math/bezier_utils.hpp"
+#include "rive/math/math_types.hpp"
+#include "rive/math/wangs_formula.hpp"
+
+using namespace rivegm;
+
+namespace rive::gpu
+{
+class FeatherGM : public GM
+{
+public:
+    FeatherGM(const char* tag) :
+        GM(1800, 2100, (std::string("feather_") + tag).c_str())
+    {
+        m_paint = TestingWindow::Get()->factory()->makeRenderPaint();
+        m_paint->color(0xffffffff);
+    }
+
+    ColorInt clearColor() const override { return 0xff000000; }
+
+    void onDraw(Renderer* renderer) override
+    {
+        renderer->scale(1.5f, 1.5f);
+        for (int y = 0; y < 7; ++y)
+        {
+            renderer->save();
+            for (int x = 0; x < 6; ++x)
+            {
+                renderer->save();
+                renderer->translate(50, 50);
+                // For the y=0 case, this checks that epsilon size feathers
+                // generate smooth AA.
+                m_paint->feather(expf(y));
+                drawCell(renderer, x, y, m_paint.get());
+                renderer->restore();
+                renderer->translate(200, 0);
+            }
+            renderer->restore();
+            renderer->translate(0, 200);
+        }
+    }
+
+private:
+    virtual void drawCell(Renderer*, int x, int y, RenderPaint*) = 0;
+
+    rcp<RenderPaint> m_paint;
+};
+
+// Check that basic shapes feather correctly (enough).
+class FeatherShapesGM : public FeatherGM
+{
+public:
+    FeatherShapesGM(const char* tag = "shapes") : FeatherGM(tag) {}
+
+    void onOnceBeforeDraw() override
+    {
+        m_shapes.reserve(6);
+
+        auto& square = m_shapes.emplace_back(makeShape());
+        square->addRect(0, 0, 100, 100);
+
+        auto& circle = m_shapes.emplace_back(makeShape());
+        path_addOval(circle.get(),
+                     AABB{0, 0, 100, 100},
+                     rivegm::PathDirection::clockwise);
+
+        auto shark = makeShape();
+        shark->moveTo(376, 1422);
+        shark->cubicTo(774, 526, 60, 660, 398, 329);
+        shark->cubicTo(639.333374f,
+                       149.666656f,
+                       905.333313f,
+                       258.666656f,
+                       1196,
+                       656);
+        shark->cubicTo(686, 460, 686, 660, 1370, 1006);
+        // "cuspy" flat line with control points outside T=0..1.
+        shark->cubicTo(1701.333374f,
+                       867.333374f,
+                       44.666626f,
+                       1560.666626f,
+                       376,
+                       1422);
+        m_shapes.emplace_back(makeShape())
+            ->addRenderPath(shark.get(),
+                            Mat2D().scale({.11f, .11f}).translate({-40, -38}));
+
+        auto& cusp = m_shapes.emplace_back(makeShape());
+        cusp->lineTo(100, 0);
+        cusp->cubicTo(0, 100, 0, 0, 100, 100);
+        cusp->lineTo(0, 100);
+        cusp->cubicTo(50, 67, -50, 33, 0, 0);
+
+        float r = 40;
+        auto& rrect = m_shapes.emplace_back(makeShape());
+        rrect->moveTo(r, 0);
+        rrect->lineTo(100 - r, 0);
+        rrect->cubicTo(100 - r / 2, 0, 100, r / 2, 100, r);
+        rrect->lineTo(100, 100 - r);
+        rrect->cubicTo(100, 100 - r / 5, 100 - r / 5, 100, 100 - r, 100);
+        rrect->lineTo(r, 100);
+        rrect->cubicTo(0 + r / 3, 100, 0, 100 - r / 3, 0, 100 - r);
+        rrect->lineTo(0, r);
+        rrect->cubicTo(0, 0, 0, 0, r, 0);
+
+        auto& irrect = m_shapes.emplace_back(makeShape());
+        irrect->addRenderPath(square.get(), Mat2D());
+        irrect->addRenderPath(rrect.get(),
+                              Mat2D(-60.f / 100, 0, 0, 60.f / 100, 80, 20));
+    }
+
+protected:
+    class Shape : public RenderPath
+    {
+    public:
+        Shape() : m_path(FillRule::clockwise) {}
+
+        RenderPath* renderPath() override { return m_path.get(); }
+        void rewind() override { m_path = Path(); }
+        void fillRule(FillRule value) override { m_path->fillRule(value); }
+        void moveTo(float x, float y) override
+        {
+            m_path->moveTo(x, y);
+            m_begin = m_pen = {x, y};
+        }
+        void lineTo(float x, float y) override
+        {
+            m_path->lineTo(x, y);
+            m_pen = {x, y};
+        }
+        void cubicTo(float ox, float oy, float ix, float iy, float x, float y)
+            override
+        {
+            m_path->cubicTo(ox, oy, ix, iy, x, y);
+            m_pen = {x, y};
+        }
+        void close() override
+        {
+            m_path->close();
+            m_pen = m_begin;
+        }
+        void addRenderPath(RenderPath* path, const Mat2D& transform) override
+        {
+            auto shape = static_cast<Shape*>(path);
+            m_path->addRenderPath(shape->renderPath(), transform);
+            m_pen = shape->m_pen;
+            m_begin = shape->m_begin;
+        }
+
+    protected:
+        Path m_path;
+        Vec2D m_pen = {0, 0};
+        Vec2D m_begin = {0, 0};
+    };
+
+    virtual std::unique_ptr<Shape> makeShape()
+    {
+        return std::make_unique<Shape>();
+    }
+
+    void drawCell(Renderer* renderer, int x, int y, RenderPaint* paint) override
+    {
+        renderer->drawPath(m_shapes[x]->renderPath(), paint);
+    }
+
+    std::vector<std::unique_ptr<Shape>> m_shapes;
+};
+GMREGISTER(return new FeatherShapesGM)
+
+// Validate corners by tessellating shapes into polygons and then feathering
+// them.
+class FeatherPolyShapesGM : public FeatherShapesGM
+{
+public:
+    FeatherPolyShapesGM() : FeatherShapesGM("polyshapes") {}
+
+private:
+    class PolyShape : public Shape
+    {
+    public:
+        void cubicTo(float ox, float oy, float ix, float iy, float x, float y)
+            override
+        {
+            Vec2D p[4] = {m_pen, {ox, oy}, {ix, iy}, {x, y}};
+            int n = wangs_formula::cubic(p, 8);
+            n = std::max(n, 3);
+            n = (n & ~1) + 1;
+            math::EvalCubic ec(p);
+            float dt = 2.f / n;
+            float4 t = dt * float4{.5f, .5f, 1, 1};
+            for (int i = 1; i < n; i += 2, t += dt)
+            {
+                float4 result = ec(t);
+                Shape::lineTo(result.x, result.y);
+                Shape::lineTo(result.z, result.w);
+            }
+            Shape::lineTo(x, y);
+        }
+    };
+
+    virtual std::unique_ptr<Shape> makeShape()
+    {
+        return std::make_unique<PolyShape>();
+    }
+};
+GMREGISTER(return new FeatherPolyShapesGM)
+
+// Check that corners don't have artifacts.
+class FeatherCornerGM : public FeatherGM
+{
+public:
+    FeatherCornerGM() : FeatherGM("corner") {}
+
+private:
+    virtual void drawCell(Renderer* renderer,
+                          int x,
+                          int y,
+                          RenderPaint* paint) override
+    {
+        float theta;
+        if (x == 0)
+        {
+            theta = math::PI;
+        }
+        else if (x == 1)
+        {
+            theta = math::PI / 2;
+        }
+        else
+        {
+            theta = math::PI * powf((5 - x) / 5.f, 2.71828f);
+        }
+        renderer->clipPath(PathBuilder::Rect({-20, -20, 120, 120}));
+        float left = 3 * math::PI;
+        Vec2D v0 = Vec2D(cosf(left), sinf(left));
+        Vec2D v1 = Vec2D(cosf(left - theta), sinf(left - theta));
+        Path path(FillRule::clockwise);
+        path->move(200 * v0 + Vec2D{0, 200});
+        path->line(200 * v0);
+        path->lineTo(0, 0);
+        path->line(200 * v1);
+        path->line(200 * v1 + Vec2D{0, 200});
+        renderer->translate(50, 50);
+        renderer->drawPath(path, paint);
+    }
+};
+GMREGISTER(return new FeatherCornerGM)
+
+// Check that tightly rounded corners don't have artifacts.
+class FeatherRoundCornerGM : public FeatherGM
+{
+public:
+    FeatherRoundCornerGM() : FeatherGM("roundcorner") {}
+
+private:
+    virtual void drawCell(Renderer* renderer,
+                          int x,
+                          int y,
+                          RenderPaint* paint) override
+    {
+        float theta = math::PI * powf((5 - x) / 5.f, 1.5f);
+        renderer->clipPath(PathBuilder::Rect({-20, -20, 120, 120}));
+        float down = math::PI / 2;
+        Vec2D v0 = Vec2D(cosf(down + theta / 2), sinf(down + theta / 2));
+        Vec2D v1 = Vec2D(cosf(down - theta / 2), sinf(down - theta / 2));
+        Path path(FillRule::clockwise);
+        path->move(200 * v0 + Vec2D{0, 200});
+        path->line(200 * v0);
+        path->line(75 * v0);
+        path->cubic({0, 0}, {0, 0}, 75 * v1);
+        path->line(200 * v1);
+        path->line(200 * v1 + Vec2D{0, 200});
+        renderer->translate(50, 50);
+        renderer->drawPath(path, paint);
+    }
+};
+GMREGISTER(return new FeatherRoundCornerGM)
+
+// Check that the cusp points on a squashed ellipse don't have artifacts.
+class FeatherEllipseGM : public FeatherGM
+{
+public:
+    FeatherEllipseGM() : FeatherGM("ellipse") {}
+
+private:
+    virtual void drawCell(Renderer* renderer,
+                          int x,
+                          int y,
+                          RenderPaint* paint) override
+    {
+        auto unitCircle = PathBuilder::Circle(0, 0, 1);
+        float squash = powf((5 - x) / 5.f, 2.71828f);
+        Path ellipse(FillRule::clockwise);
+        ellipse->addRenderPath(unitCircle, Mat2D::fromScale(50 * squash, 50));
+        renderer->translate(50, 50);
+        renderer->drawPath(ellipse, paint);
+    }
+};
+GMREGISTER(return new FeatherEllipseGM)
+
+// Check that a non-degenerate cubic cusps and near-cusps don't have artifacts.
+class FeatherCuspGM : public FeatherGM
+{
+public:
+    FeatherCuspGM() : FeatherGM("cusp") {}
+
+private:
+    virtual void drawCell(Renderer* renderer,
+                          int x,
+                          int y,
+                          RenderPaint* paint) override
+    {
+        float dx = 10 * copysignf(powf(fabsf(x - 3.f), 1.75f), x - 3);
+        Path cusp(FillRule::clockwise);
+        cusp->moveTo(0, 100);
+        cusp->moveTo(0, 100);
+        cusp->cubicTo(100 + dx, 0, 0 - dx, 0, 100, 100);
+        renderer->drawPath(cusp, paint);
+    }
+};
+GMREGISTER(return new FeatherCuspGM)
+
+// Check that basic strokes feather correctly (enough).
+class FeatherStrokesGM : public FeatherGM
+{
+public:
+    FeatherStrokesGM() : FeatherGM("strokes")
+    {
+        m_strokes.reserve(6);
+
+        auto& square = m_strokes.emplace_back();
+        square->addRect(0, 0, 100, 100);
+
+        m_strokes.emplace_back(PathBuilder::Circle(50, 50, 50));
+
+        auto& serp = m_strokes.emplace_back();
+        serp->moveTo(0, 100);
+        serp->cubicTo(60, 0, 30, 0, 100, 100);
+
+        auto& cusp = m_strokes.emplace_back();
+        cusp->lineTo(100, 0);
+        cusp->cubicTo(0, 100, 0, 0, 100, 100);
+        cusp->lineTo(0, 100);
+        cusp->cubicTo(50, 67, -50, 33, 0, 0);
+
+        auto& loop = m_strokes.emplace_back();
+        loop->moveTo(25, 100);
+        loop->cubicTo(250, -20, -150, -20, 75, 100);
+
+        auto& irrect = m_strokes.emplace_back();
+        float r = 40;
+        Path rrect;
+        rrect->moveTo(r, 0);
+        rrect->lineTo(100 - r, 0);
+        rrect->cubicTo(100 - r / 2, 0, 100, r / 2, 100, r);
+        rrect->lineTo(100, 100 - r);
+        rrect->cubicTo(100, 100 - r / 5, 100 - r / 5, 100, 100 - r, 100);
+        rrect->lineTo(r, 100);
+        rrect->cubicTo(0 + r / 3, 100, 0, 100 - r / 3, 0, 100 - r);
+        rrect->lineTo(0, r);
+        rrect->cubicTo(0, 0, 0, 0, r, 0);
+        irrect->addRenderPath(square, Mat2D());
+        irrect->addRenderPath(rrect,
+                              Mat2D(-80.f / 100, 0, 0, 80.f / 100, 90, 10));
+    }
+
+private:
+    virtual void drawCell(Renderer* renderer,
+                          int x,
+                          int y,
+                          RenderPaint* paint) override
+    {
+        paint->style(RenderPaintStyle::stroke);
+        paint->thickness(15);
+        // Feathers ignore the join.
+        paint->join((y & 1) ? StrokeJoin::bevel : StrokeJoin::miter);
+        // Feathers ignore the cap.
+        paint->cap((y & 1) ? StrokeCap::square : StrokeCap::butt);
+        renderer->drawPath(m_strokes[x].get(), paint);
+    }
+
+    std::vector<Path> m_strokes;
+};
+GMREGISTER(return new FeatherStrokesGM)
+} // namespace rive::gpu
diff --git a/tests/gm/feathertext.cpp b/tests/gm/feathertext.cpp
new file mode 100644
index 0000000..2dd181a
--- /dev/null
+++ b/tests/gm/feathertext.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2024 Rive
+ */
+
+#include "gm.hpp"
+#include "gmutils.hpp"
+
+#ifdef WITH_RIVE_TEXT
+
+#include "assets/roboto_flex.ttf.hpp"
+#include "assets/montserrat.ttf.hpp"
+#include "common/testing_window.hpp"
+#include "rive/text/font_hb.hpp"
+#include "rive/text/raw_text.hpp"
+
+using namespace rivegm;
+
+constexpr static char LOREM_IPSUM[] =
+    "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam lorem "
+    "magna, pharetra at eros a, malesuada vehicula ante. Cras pharetra metus "
+    "eu augue venenatis, sed semper justo vulputate. Suspendisse a velit "
+    "neque. In massa odio, auctor ac tincidunt ac, cursus sit amet sapien. "
+    "Donec in sem vitae nisi commodo feugiat. Lorem ipsum dolor sit amet, "
+    "consectetur adipiscing elit.";
+
+class FeatherTextGM : public GM
+{
+public:
+    FeatherTextGM(rive::Span<uint8_t> fontBytes, const char* name) :
+        GM(1600, 1840, name), m_text(TestingWindow::Get()->factory())
+    {
+        m_paint->color(0xff000000);
+        m_text.maxWidth(720);
+        m_text.sizing(rive::TextSizing::fixed);
+        m_text.append(LOREM_IPSUM,
+                      ref_rcp(m_paint.get()),
+                      HBFont::Decode(fontBytes),
+                      40.0f);
+    }
+
+    void updateFrameOptions(TestingWindow::FrameOptions* options) const override
+    {
+        // Force the RawText to clockwise.
+        options->clockwiseFillOverride = true;
+    }
+
+    void onDraw(rive::Renderer* renderer) override
+    {
+        renderer->translate(40, 40);
+
+        for (int i = 0; i < 3; ++i)
+        {
+            m_paint->feather(powf(2.2f, i * 2 + 0) - .99999f);
+            m_text.render(renderer);
+            renderer->translate(800, 0);
+
+            m_paint->feather(powf(2.2f, i * 2 + 1) - .99999f);
+            m_text.render(renderer);
+            renderer->translate(-800, 600);
+        }
+    }
+
+private:
+    rive::RawText m_text;
+    Paint m_paint;
+};
+
+GMREGISTER_SLOW(return new FeatherTextGM(assets::roboto_flex_ttf(),
+                                         "feathertext_roboto"))
+GMREGISTER_SLOW(return new FeatherTextGM(assets::montserrat_ttf(),
+                                         "feathertext_montserrat"))
+
+#endif
diff --git a/tests/gm/gm.cpp b/tests/gm/gm.cpp
index 9d24f74..94c3410 100644
--- a/tests/gm/gm.cpp
+++ b/tests/gm/gm.cpp
@@ -4,7 +4,7 @@
 
 #include "gm.hpp"
 
-#include "gmutils.hpp"
+#include "common/testing_window.hpp"
 
 using namespace rivegm;
 
@@ -12,7 +12,9 @@
 
 void GM::run(std::vector<uint8_t>* pixels)
 {
-    auto renderer = TestingWindow::Get()->beginFrame(clearColor());
+    TestingWindow::FrameOptions frameOptions = {.clearColor = clearColor()};
+    updateFrameOptions(&frameOptions);
+    auto renderer = TestingWindow::Get()->beginFrame(frameOptions);
     draw(renderer.get());
     TestingWindow::Get()->endFrame(pixels);
 }
diff --git a/tests/gm/gm.hpp b/tests/gm/gm.hpp
index 40f5e28..30318b3 100644
--- a/tests/gm/gm.hpp
+++ b/tests/gm/gm.hpp
@@ -5,6 +5,7 @@
 #ifndef _RIVEGM_GM_HPP_
 #define _RIVEGM_GM_HPP_
 
+#include "common/testing_window.hpp"
 #include "rive/renderer.hpp"
 #include <cstring>
 #include <memory>
@@ -31,13 +32,15 @@
 
     void onceBeforeDraw() { this->onOnceBeforeDraw(); }
 
-    // Calls clearColor(), TestingWindow::beginFrame(), draw(),
-    // TestingWindow::flush(). (Most GMs just need to override onDraw() instead
-    // of overriding this method.)
+    // Calls clearColor(), updateFrameOptions(),
+    // TestingWindow::beginFrame(), draw(), TestingWindow::flush(). (Most GMs
+    // just need to override onDraw() instead of overriding this method.)
     virtual void run(std::vector<uint8_t>* pixels);
 
     virtual rive::ColorInt clearColor() const { return 0xffffffff; }
 
+    virtual void updateFrameOptions(TestingWindow::FrameOptions*) const {}
+
     void draw(rive::Renderer*);
 
 protected:
diff --git a/tests/gm/gmutils.hpp b/tests/gm/gmutils.hpp
index b5de656..c7a5794 100644
--- a/tests/gm/gmutils.hpp
+++ b/tests/gm/gmutils.hpp
@@ -58,6 +58,7 @@
 
 public:
     Path() : m_Path(TestingWindow::Get()->factory()->makeEmptyRenderPath()) {}
+    Path(rive::FillRule rule) : Path() { m_Path->fillRule(rule); }
     Path(Path&& path) : m_Path(std::move(path.m_Path)) {}
     Path(const Path&) = delete;
 
diff --git a/tests/gm/interleavedfeather.cpp b/tests/gm/interleavedfeather.cpp
new file mode 100644
index 0000000..fe8d62d
--- /dev/null
+++ b/tests/gm/interleavedfeather.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2024 Rive
+ */
+
+#include "gm.hpp"
+#include "gmutils.hpp"
+#include "common/rand.hpp"
+#include "rive/math/math_types.hpp"
+
+using namespace rivegm;
+
+namespace rive::gpu
+{
+// Verifies that draw call batching works correctly when drawing strokes and
+// fills with and without feathers.
+class InterleavedFeatherGM : public GM
+{
+public:
+    InterleavedFeatherGM() : GM(1000, 1000, "interleavedfeather") {}
+
+    ColorInt clearColor() const override { return 0; }
+
+    void onDraw(Renderer* renderer) override
+    {
+        Rand rando;
+        rando.seed(1);
+
+        Path flower;
+        flower->fillRule(FillRule::clockwise);
+        constexpr static int NUM_PETALS = 7;
+        constexpr static float R = 100;
+        flower->moveTo(R, 0);
+        for (int i = 1; i <= NUM_PETALS; ++i)
+        {
+            float c1 = 2 * M_PI * (i - 2 / 3.f) / NUM_PETALS;
+            float c2 = 2 * M_PI * (i - 1 / 3.f) / NUM_PETALS;
+            float theta = 2 * M_PI * i / NUM_PETALS;
+            flower->cubicTo(cosf(c1) * R * 1.65,
+                            sinf(c1) * R * 1.65,
+                            cosf(c2) * R * 1.65,
+                            sinf(c2) * R * 1.65,
+                            cosf(theta) * R,
+                            sinf(theta) * R);
+        }
+        path_addOval(flower,
+                     AABB(-.6f * R, -.6f * R, .6f * R, .6f * R),
+                     rivegm::PathDirection::ccw);
+        flower->close();
+
+        Paint paint;
+        paint->blendMode(BlendMode::colorBurn);
+        for (int i = 0; i < 300; ++i)
+        {
+            if (rando.boolean())
+            {
+                paint->feather(rando.f32(20));
+            }
+            else
+            {
+                paint->feather(expf(rando.f32(4)));
+            }
+            renderer->save();
+            renderer->translate(rando.f32(0, 1000), rando.f32(0, 1000));
+            float s = rando.f32(.25f, .5f);
+            renderer->scale(s, s);
+            renderer->rotate(rando.f32(math::PI * 2));
+
+            paint->style(RenderPaintStyle::fill);
+            paint->color(rando.u32() | 0x40808080);
+            renderer->drawPath(flower.get(), paint.get());
+
+            if (rando.boolean())
+            {
+                paint->style(RenderPaintStyle::stroke);
+                paint->thickness(rando.f32(1, 8));
+                paint->join(static_cast<StrokeJoin>(rando.u32(0, 2)));
+                paint->color(rando.u32() & 0xff000000);
+                renderer->drawPath(flower.get(), paint.get());
+            }
+
+            renderer->restore();
+        }
+    }
+};
+GMREGISTER(return new InterleavedFeatherGM)
+} // namespace rive::gpu
diff --git a/tests/gm/mandoline.cpp b/tests/gm/mandoline.cpp
index 300337a..3ec3964 100644
--- a/tests/gm/mandoline.cpp
+++ b/tests/gm/mandoline.cpp
@@ -18,189 +18,28 @@
 using namespace rive;
 using namespace rive::math;
 
-static Vec2D SkFindBisector(Vec2D a, Vec2D b)
-{
-    std::array<Vec2D, 2> v;
-    if (Vec2D::dot(a, b) >= 0)
-    {
-        // a,b are within +/-90 degrees apart.
-        v = {a, b};
-    }
-    else if (Vec2D::cross(a, b) >= 0)
-    {
-        // a,b are >90 degrees apart. Find the bisector of their interior
-        // normals instead. (Above 90 degrees, the original vectors start
-        // cancelling each other out which eventually becomes unstable.)
-        v[0] = {-a.y, +a.x};
-        v[1] = {+b.y, -b.x};
-    }
-    else
-    {
-        // a,b are <-90 degrees apart. Find the bisector of their interior
-        // normals instead. (Below -90 degrees, the original vectors start
-        // cancelling each other out which eventually becomes unstable.)
-        v[0] = {+a.y, -a.x};
-        v[1] = {-b.y, +b.x};
-    }
-    // Return "normalize(v[0]) + normalize(v[1])".
-    float2 x0_x1{v[0].x, v[1].x};
-    float2 y0_y1{v[0].y, v[1].y};
-    auto invLengths = 1.0f / simd::sqrt(x0_x1 * x0_x1 + y0_y1 * y0_y1);
-    x0_x1 *= invLengths;
-    y0_y1 *= invLengths;
-    return Vec2D{x0_x1[0] + x0_x1[1], y0_y1[0] + y0_y1[1]};
-}
-
-static float SkFindQuadMidTangent(const Vec2D src[3])
-{
-    // Tangents point in the direction of increasing T, so tan0 and -tan1 both
-    // point toward the midtangent. The bisector of tan0 and -tan1 is orthogonal
-    // to the midtangent:
-    //
-    //     n dot midtangent = 0
-    //
-    Vec2D tan0 = src[1] - src[0];
-    Vec2D tan1 = src[2] - src[1];
-    Vec2D bisector = SkFindBisector(tan0, -tan1);
-
-    // The midtangent can be found where (F' dot bisector) = 0:
-    //
-    //   0 = (F'(T) dot bisector) = |2*T 1| * |p0 - 2*p1 + p2| * |bisector.x|
-    //                                        |-2*p0 + 2*p1  |   |bisector.y|
-    //
-    //                     = |2*T 1| * |tan1 - tan0| * |nx|
-    //                                 |2*tan0     |   |ny|
-    //
-    //                     = 2*T * ((tan1 - tan0) dot bisector) + (2*tan0 dot
-    //                     bisector)
-    //
-    //   T = (tan0 dot bisector) / ((tan0 - tan1) dot bisector)
-    float T = Vec2D::dot(tan0, bisector) / Vec2D::dot(tan0 - tan1, bisector);
-    if (!(T > 0 && T < 1))
-    {           // Use "!(positive_logic)" so T=nan will take this branch.
-        T = .5; // The quadratic was a line or near-line. Just chop at .5.
-    }
-
-    return T;
-}
-
-// Finds the root nearest 0.5. Returns 0.5 if the roots are undefined or outside
-// 0..1.
-static float solve_quadratic_equation_for_midtangent(float a,
-                                                     float b,
-                                                     float c,
-                                                     float discr)
-{
-    // Quadratic formula from Numerical Recipes in C:
-    float q = -.5f * (b + copysignf(sqrtf(discr), b));
-    // The roots are q/a and c/q. Pick the midtangent closer to T=.5.
-    float _5qa = -.5f * q * a;
-    float T = fabsf(q * q + _5qa) < fabsf(a * c + _5qa) ? q / a : c / q;
-    if (!(T > 0 && T < 1))
-    { // Use "!(positive_logic)" so T=NaN will take this branch.
-        // Either the curve is a flat line with no rotation or FP precision
-        // failed us. Chop at .5.
-        T = .5;
-    }
-    return T;
-}
-
-static float4 fma(float4 f, float4 m, float4 a) { return f * m + a; }
-
-float SkFindCubicMidTangent(const Vec2D src[4])
-{
-    // Tangents point in the direction of increasing T, so tan0 and -tan1 both
-    // point toward the midtangent. The bisector of tan0 and -tan1 is orthogonal
-    // to the midtangent:
-    //
-    //     bisector dot midtangent == 0
-    //
-    Vec2D tan0 = (src[0] == src[1]) ? src[2] - src[0] : src[1] - src[0];
-    Vec2D tan1 = (src[2] == src[3]) ? src[3] - src[1] : src[3] - src[2];
-    Vec2D bisector = SkFindBisector(tan0, -tan1);
-
-    // Find the T value at the midtangent. This is a simple quadratic equation:
-    //
-    //     midtangent dot bisector == 0, or using a tangent matrix C' in power
-    //     basis form:
-    //
-    //                   |C'x  C'y|
-    //     |T^2  T  1| * |.    .  | * |bisector.x| == 0
-    //                   |.    .  |   |bisector.y|
-    //
-    // The coeffs for the quadratic equation we need to solve are therefore:  C'
-    // * bisector
-    static const float4 kM[3] = {float4{-1, 2, -1, 0},
-                                 float4{3, -4, 1, 0},
-                                 float4{-3, 2, 0, 0}};
-    auto C_x = fma(
-        kM[0],
-        src[0].x,
-        fma(kM[1], src[1].x, fma(kM[2], src[2].x, float4{src[3].x, 0, 0, 0})));
-    auto C_y = fma(
-        kM[0],
-        src[0].y,
-        fma(kM[1], src[1].y, fma(kM[2], src[2].y, float4{src[3].y, 0, 0, 0})));
-    auto coeffs = C_x * bisector.x + C_y * bisector.y;
-
-    // Now solve the quadratic for T.
-    float T = 0;
-    float a = coeffs[0], b = coeffs[1], c = coeffs[2];
-    float discr = b * b - 4 * a * c;
-    if (discr > 0)
-    { // This will only be false if the curve is a line.
-        return solve_quadratic_equation_for_midtangent(a, b, c, discr);
-    }
-    else
-    {
-        // This is a 0- or 360-degree flat line. It doesn't have single points
-        // of midtangent. (tangent == midtangent at every point on the curve
-        // except the cusp points.) Chop in between both cusps instead, if any.
-        // There can be up to two cusps on a flat line, both where the tangent
-        // is perpendicular to the starting tangent:
-        //
-        //     tangent dot tan0 == 0
-        //
-        coeffs = C_x * tan0.x + C_y * tan0.y;
-        a = coeffs[0];
-        b = coeffs[1];
-        if (a != 0)
-        {
-            // We want the point in between both cusps. The midpoint of:
-            //
-            //     (-b +/- sqrt(b^2 - 4*a*c)) / (2*a)
-            //
-            // Is equal to:
-            //
-            //     -b / (2*a)
-            T = -b / (2 * a);
-        }
-        if (!(T > 0 && T < 1))
-        { // Use "!(positive_logic)" so T=NaN will take this branch.
-            // Either the curve is a flat line with no rotation or FP precision
-            // failed us. Chop at .5.
-            T = .5;
-        }
-        return T;
-    }
-}
+// Atomic mode uses 6:11 fixed point and clockwiseAtomic uses 7:8, so the
+// winding number breaks if a shape has more than +/-32 [ == +/-2^(6-1)] levels
+// of self overlap at any point.
+constexpr static float SLICE_LENGTH = 2;
 
 // Slices paths into sliver-size contours shaped like ice cream cones.
 class MandolineSlicer
 {
 public:
-    MandolineSlicer(Vec2D anchorPt) { this->reset(anchorPt); }
+    MandolineSlicer(Vec2D anchorPt, Vec2D p0) { this->reset(anchorPt, p0); }
 
-    void reset(Vec2D anchorPt)
+    void reset(Vec2D anchorPt, Vec2D p0)
     {
         fPath = Path();
         fPath->fillRule(FillRule::evenOdd);
-        fLastPt = fAnchorPt = anchorPt;
+        fAnchorPt = anchorPt;
+        fLastPt = p0;
     }
 
-    void sliceLine(Vec2D pt, int subdivisionDepth)
+    void sliceLine(Vec2D pt)
     {
-        if (subdivisionDepth <= 0)
+        if ((pt - fLastPt).length() < SLICE_LENGTH)
         {
             fPath->moveTo(fAnchorPt.x, fAnchorPt.y);
             fPath->lineTo(fLastPt.x, fLastPt.y);
@@ -215,13 +54,13 @@
             return;
         }
         Vec2D midpt = fLastPt * (1 - T) + pt * T;
-        this->sliceLine(midpt, subdivisionDepth - 1);
-        this->sliceLine(pt, subdivisionDepth - 1);
+        this->sliceLine(midpt);
+        this->sliceLine(pt);
     }
 
-    void sliceQuadratic(Vec2D p1, Vec2D p2, int subdivisionDepth)
+    void sliceQuadratic(Vec2D p1, Vec2D p2)
     {
-        if (subdivisionDepth <= 0)
+        if ((p2 - fLastPt).length() < SLICE_LENGTH)
         {
             fPath->moveTo(fAnchorPt.x, fAnchorPt.y);
             fPath->lineTo(fLastPt.x, fLastPt.y);
@@ -235,26 +74,20 @@
             fLastPt = p2;
             return;
         }
-        float T = SkFindQuadMidTangent(
-            std::array<Vec2D, 3>{fAnchorPt, p1, p2}.data());
-        if (0 == T)
-        {
-            return;
-        }
         Vec2D P[4] = {fLastPt,
                       Vec2D::lerp(fLastPt, p1, 2 / 3.f),
                       Vec2D::lerp(p2, p1, 2 / 3.f),
                       p2},
               PP[7];
-        math::chop_cubic_at(P, PP, T);
+        math::chop_cubic_at(P, PP, .5f);
 
-        this->sliceCubic(PP[1], PP[2], PP[3], subdivisionDepth - 1);
-        this->sliceCubic(PP[4], PP[5], PP[6], subdivisionDepth - 1);
+        this->sliceCubic(PP[1], PP[2], PP[3]);
+        this->sliceCubic(PP[4], PP[5], PP[6]);
     }
 
-    void sliceCubic(Vec2D p1, Vec2D p2, Vec2D p3, int subdivisionDepth)
+    void sliceCubic(Vec2D p1, Vec2D p2, Vec2D p3)
     {
-        if (subdivisionDepth <= 0)
+        if ((p3 - fLastPt).length() < SLICE_LENGTH)
         {
             fPath->moveTo(fAnchorPt.x, fAnchorPt.y);
             fPath->lineTo(fLastPt.x, fLastPt.y);
@@ -263,16 +96,10 @@
             fLastPt = p3;
             return;
         }
-        float T = SkFindCubicMidTangent(
-            std::array<Vec2D, 4>{fAnchorPt, p1, p2, p3}.data());
-        if (0 == T)
-        {
-            return;
-        }
         Vec2D P[4] = {fLastPt, p1, p2, p3}, PP[7];
-        math::chop_cubic_at(P, PP, T);
-        this->sliceCubic(PP[1], PP[2], PP[3], subdivisionDepth - 1);
-        this->sliceCubic(PP[4], PP[5], PP[6], subdivisionDepth - 1);
+        math::chop_cubic_at(P, PP, .5f);
+        this->sliceCubic(PP[1], PP[2], PP[3]);
+        this->sliceCubic(PP[4], PP[5], PP[6]);
     }
 
     rive::RenderPath* path() const { return fPath; }
@@ -294,63 +121,51 @@
 
     void onDraw(Renderer* renderer) override
     {
-        // Atomic mode uses 7:9 fixed point and clockwiseAtomic uses 7:8, so the
-        // winding number breaks if a shape has more than +/-64 [ == +/-2^(7-1)]
-        // levels of self overlap at any point.
-        constexpr int SUBDIVISION_DEPTH = 4;
-
         Paint paint;
         paint->color(0xc0c0c0c0);
 
-        MandolineSlicer mandoline({41, 43});
-        mandoline.sliceCubic({5, 277},
-                             {381, -74},
-                             {243, 162},
-                             SUBDIVISION_DEPTH);
-        mandoline.sliceLine({41, 43}, SUBDIVISION_DEPTH);
+        MandolineSlicer mandoline({0, -500}, {41, 43});
+        mandoline.sliceCubic({5, 277}, {381, -74}, {243, 162});
+        mandoline.sliceLine({41, 43});
         renderer->drawPath(mandoline.path(), paint);
 
-        mandoline.reset({357.049988f, 446.049988f});
+        mandoline.reset({700, 700}, {357.049988f, 446.049988f});
         mandoline.sliceCubic({472.750000f, -71.950012f},
                              {639.750000f, 531.950012f},
-                             {309.049988f, 347.950012f},
-                             SUBDIVISION_DEPTH);
-        mandoline.sliceLine({309.049988f, 419}, SUBDIVISION_DEPTH);
-        mandoline.sliceLine({357.049988f, 446.049988f}, SUBDIVISION_DEPTH);
+                             {309.049988f, 347.950012f});
+        mandoline.sliceLine({309.049988f, 419});
+        mandoline.sliceLine({357.049988f, 446.049988f});
         renderer->drawPath(mandoline.path(), paint);
 
         renderer->save();
         renderer->translate(421, 105);
         renderer->scale(100, 81);
         mandoline.reset(
+            {0, 500},
             {-cosf(degreesToRadians(-60)), sinf(degreesToRadians(-60))});
         mandoline.sliceQuadratic(
             {-2, 0},
-            {-cosf(degreesToRadians(60)), sinf(degreesToRadians(60))},
-            SUBDIVISION_DEPTH);
+            {-cosf(degreesToRadians(60)), sinf(degreesToRadians(60))});
         mandoline.sliceQuadratic(
             {-cosf(degreesToRadians(120)) * 2, sinf(degreesToRadians(120)) * 2},
-            {1, 0},
-            SUBDIVISION_DEPTH);
-        mandoline.sliceLine({0, 0}, SUBDIVISION_DEPTH);
+            {1, 0});
+        mandoline.sliceLine({0, 0});
         mandoline.sliceLine(
-            {-cosf(degreesToRadians(-60)), sinf(degreesToRadians(-60))},
-            SUBDIVISION_DEPTH);
+            {-cosf(degreesToRadians(-60)), sinf(degreesToRadians(-60))});
         renderer->drawPath(mandoline.path(), paint);
         renderer->restore();
 
         renderer->save();
         renderer->translate(150, 300);
         renderer->scale(75, 75);
-        mandoline.reset({1, 0});
+        mandoline.reset({0, 0}, {1, 0});
         constexpr int nquads = 5;
         for (int i = 0; i < nquads; ++i)
         {
             float theta1 = 2 * PI / nquads * (i + .5f);
             float theta2 = 2 * PI / nquads * (i + 1);
             mandoline.sliceQuadratic({cosf(theta1) * 2, sinf(theta1) * 2},
-                                     {cosf(theta2), sinf(theta2)},
-                                     SUBDIVISION_DEPTH);
+                                     {cosf(theta2), sinf(theta2)});
         }
         renderer->drawPath(mandoline.path(), paint);
         renderer->restore();
diff --git a/tests/gm/preserverendertarget.cpp b/tests/gm/preserverendertarget.cpp
index 572174c..5eafa9b 100644
--- a/tests/gm/preserverendertarget.cpp
+++ b/tests/gm/preserverendertarget.cpp
@@ -27,7 +27,8 @@
         yellow->color(0xffffff00);
 
         // Set the render target to a cyan background with a yellow circle.
-        auto renderer = TestingWindow::Get()->beginFrame(0xff008080);
+        auto renderer =
+            TestingWindow::Get()->beginFrame({.clearColor = 0xff008080});
         renderer->drawPath(PathBuilder::Circle(32, 32, 20), yellow);
 
         // Don't clear to red!
@@ -45,8 +46,10 @@
         else
         {
             TestingWindow::Get()->endFrame();
-            renderer =
-                TestingWindow::Get()->beginFrame(0xffff0000, false /*doClear*/);
+            renderer = TestingWindow::Get()->beginFrame({
+                .clearColor = 0xffff0000,
+                .doClear = false,
+            });
         }
 
         if (!m_empty)
diff --git a/tests/gm/retrofittedcubictriangles.cpp b/tests/gm/retrofittedcubictriangles.cpp
index e926654..f5b0480 100644
--- a/tests/gm/retrofittedcubictriangles.cpp
+++ b/tests/gm/retrofittedcubictriangles.cpp
@@ -119,7 +119,7 @@
 
             // PushRetrofittedTrianglesGMDraw specific push to render
             uint32_t contourID = tessWriter.pushContour(
-                renderPaintStyle(),
+                m_drawContents,
                 {0, 0},
                 true,
                 0 /* gpu::kOuterCurvePatchSegmentSpan - 2 */);
@@ -164,11 +164,11 @@
             TestingWindow::Get()->renderContext();
         if (!renderContext)
         {
-            TestingWindow::Get()->beginFrame(0xffff0000, true);
+            TestingWindow::Get()->beginFrame({.clearColor = 0xffff0000});
         }
         else
         {
-            TestingWindow::Get()->beginFrame(0xff000000, true);
+            TestingWindow::Get()->beginFrame({.clearColor = 0xff000000});
             RiveRenderPaint paint;
             paint.color(0xffffffff);
             DrawUniquePtr draw(
diff --git a/tests/gm/trickycubicstrokes.cpp b/tests/gm/trickycubicstrokes.cpp
index 7549832..066f1f9 100644
--- a/tests/gm/trickycubicstrokes.cpp
+++ b/tests/gm/trickycubicstrokes.cpp
@@ -164,8 +164,14 @@
 class TrickyCubicsGM : public GM
 {
 public:
-    TrickyCubicsGM(StrokeCap cap, StrokeJoin join, const char* name) :
-        GM(kTestWidth, kTestHeight, name), m_Cap(cap), m_Join(join)
+    TrickyCubicsGM(StrokeCap cap,
+                   StrokeJoin join,
+                   float feather,
+                   const char* name) :
+        GM(kTestWidth, kTestHeight, name),
+        m_Cap(cap),
+        m_Join(join),
+        m_feather(feather)
     {}
 
     ColorInt clearColor() const override { return 0xff000000; }
@@ -247,6 +253,10 @@
                 Vec2D c1 = p[2] + (p[1] - p[2]) * (2 / 3.f);
                 path->cubicTo(c0.x, c0.y, c1.x, c1.y, p[2].x, p[2].y);
             }
+            if (m_feather != 0)
+            {
+                strokePaint->feather(m_feather / matrix.findMaxScale());
+            }
             renderer->drawPath(path, strokePaint);
             renderer->restore();
         }
@@ -255,11 +265,19 @@
 private:
     StrokeCap m_Cap;
     StrokeJoin m_Join;
+    float m_feather;
 };
 
 GMREGISTER(return new TrickyCubicsGM(StrokeCap::butt,
                                      StrokeJoin::miter,
+                                     0,
                                      "trickycubicstrokes"))
 GMREGISTER(return new TrickyCubicsGM(StrokeCap::round,
                                      StrokeJoin::round,
+                                     0,
                                      "trickycubicstrokes_roundcaps"))
+// Feathers ignore cap and join.
+GMREGISTER(return new TrickyCubicsGM(StrokeCap::butt,
+                                     StrokeJoin::miter,
+                                     20,
+                                     "trickycubicstrokes_feather"))
diff --git a/tests/goldens/goldens.cpp b/tests/goldens/goldens.cpp
index 67aeb9e..ea74ebb 100644
--- a/tests/goldens/goldens.cpp
+++ b/tests/goldens/goldens.cpp
@@ -44,7 +44,8 @@
         const rive::AABB cellBounds = rive::AABB(0, 0, cellSize, cellSize);
 
         // Render the scene in a grid.
-        auto renderer = TestingWindow::Get()->beginFrame(0xffffffff);
+        auto renderer =
+            TestingWindow::Get()->beginFrame({.clearColor = 0xffffffff});
         renderer->save();
         scene->advanceAndApply(0);
         for (int y = 0; y < s_args.rows(); ++y)
@@ -54,7 +55,7 @@
                 if ((x | y) != 0)
                 {
                     TestingWindow::Get()->endFrame();
-                    TestingWindow::Get()->beginFrame(0, false);
+                    TestingWindow::Get()->beginFrame({.doClear = false});
                     scene->advanceAndApply(frameDuration);
                 }
 
diff --git a/tests/goldens/goldens_arguments.hpp b/tests/goldens/goldens_arguments.hpp
index 5ed9dd6..5f8d672 100644
--- a/tests/goldens/goldens_arguments.hpp
+++ b/tests/goldens/goldens_arguments.hpp
@@ -59,7 +59,7 @@
             optional,
             "backend",
             "backend type: [gl, metal, angle_gl, angle_d3d, "
-            "angle_vk, angle_mtl, coregraphics, skia_raster, rhi]",
+            "angle_vk, angle_mtl, coregraphics, skia, rhi]",
             {'b', "backend"});
         args::Flag headless(optional,
                             "headless",
diff --git a/tests/player/player.cpp b/tests/player/player.cpp
index b7ab54e..3aaf8e7 100644
--- a/tests/player/player.cpp
+++ b/tests/player/player.cpp
@@ -272,8 +272,11 @@
             lastReportedPauseState = paused;
         }
 
-        auto renderer =
-            TestingWindow::Get()->beginFrame(0xff303030, true, wireframe);
+        auto renderer = TestingWindow::Get()->beginFrame({
+            .clearColor = 0xff303030,
+            .doClear = true,
+            .wireframe = wireframe,
+        });
         renderer->save();
 
         uint32_t width = TestingWindow::Get()->width();
diff --git a/tests/rive_tools_project.lua b/tests/rive_tools_project.lua
index 010e291..945e046 100644
--- a/tests/rive_tools_project.lua
+++ b/tests/rive_tools_project.lua
@@ -9,6 +9,11 @@
 dofile(RIVE_RUNTIME_DIR .. '/decoders/premake5_v2.lua')
 dofile(RIVE_PLS_DIR .. '/premake5_pls_renderer.lua')
 
+newoption({ trigger = 'with-skia', description = 'use skia' })
+if _OPTIONS['with-skia'] then
+    dofile(RIVE_RUNTIME_DIR .. '/skia/renderer/build/premake5.lua')
+end
+
 function rive_tools_project(name, project_kind)
     project(name)
     cppdialect('C++17')
@@ -82,6 +87,18 @@
         dofile(RIVE_PLS_DIR .. '/rive_vk_bootstrap/bootstrap_project.lua')
     end
 
+    filter('options:with-skia')
+    do
+        includedirs({
+            RIVE_RUNTIME_DIR .. '/skia/renderer/include',
+            RIVE_RUNTIME_DIR .. '/skia/dependencies',
+            RIVE_RUNTIME_DIR .. '/skia/dependencies/skia',
+        })
+        defines({ 'RIVE_SKIA' })
+        libdirs({ RIVE_RUNTIME_DIR .. '/skia/dependencies/skia/out/static' })
+        links({ 'skia', 'rive_skia_renderer' })
+    end
+
     filter({ 'toolset:not msc' })
     do
         buildoptions({ '-Wshorten-64-to-32' })
diff --git a/tests/unit_tests/renderer/gpu_namespace_test.cpp b/tests/unit_tests/renderer/gpu_namespace_test.cpp
new file mode 100644
index 0000000..f94b4fd
--- /dev/null
+++ b/tests/unit_tests/renderer/gpu_namespace_test.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright 2024 Rive
+ */
+
+#include "rive/math/math_types.hpp"
+#include "rive/renderer/gpu.hpp"
+#include "shaders/constants.glsl"
+#include <catch.hpp>
+
+namespace rive
+{
+TEST_CASE("find_transformed_area", "[gpu]")
+{
+    AABB unitSquare{0, 0, 1, 1};
+    CHECK(gpu::find_transformed_area(unitSquare, Mat2D()) == 1);
+    CHECK(gpu::find_transformed_area(unitSquare, Mat2D::fromScale(2, 2)) == 4);
+    CHECK(gpu::find_transformed_area(unitSquare, Mat2D::fromScale(2, 1)) == 2);
+    CHECK(gpu::find_transformed_area(unitSquare, Mat2D::fromScale(0, 1)) == 0);
+    CHECK(gpu::find_transformed_area(unitSquare,
+                                     Mat2D::fromRotation(math::PI / 4)) ==
+          Approx(1.f));
+    CHECK(gpu::find_transformed_area(
+              unitSquare,
+              Mat2D::fromRotation(math::PI / 8).scale({2, 2})) == Approx(4.f));
+    CHECK(gpu::find_transformed_area(
+              unitSquare,
+              Mat2D::fromRotation(math::PI / 16).scale({2, 1})) == Approx(2.f));
+    CHECK(
+        gpu::find_transformed_area(unitSquare, {1, .87f, 8, 8 * .87f, 0, 0}) ==
+        Approx(0.f).margin(math::EPSILON));
+}
+
+// Borrowed from:
+// https://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
+float half_to_float(uint16_t x)
+{
+    // IEEE-754 16-bit floating-point format (without infinity): 1-5-10, exp-15,
+    // +-131008.0, +-6.1035156E-5, +-5.9604645E-8, 3.311 digits
+    const uint32_t e = (x & 0x7C00) >> 10; // exponent
+    const uint32_t m = (x & 0x03FF) << 13; // mantissa
+    // evil log2 bit hack to count leading zeros in denormalized format
+    const uint32_t v = math::bit_cast<uint32_t>(static_cast<float>(m)) >> 23;
+    return math::bit_cast<float>(
+        (x & 0x8000) << 16 | (e != 0) * ((e + 112) << 23 | m) |
+        ((e == 0) & (m != 0)) *
+            ((v - 37) << 23 |
+             ((m << (150 - v)) &
+              0x007FE000))); // sign : normalized : denormalized
+}
+
+TEST_CASE("gaussian_integral_table", "[gpu]")
+{
+    float gaussianTable[gpu::GAUSSIAN_TABLE_SIZE];
+    for (int i = 0; i < gpu::GAUSSIAN_TABLE_SIZE; ++i)
+    {
+        gaussianTable[i] = half_to_float(gpu::g_gaussianIntegralTableF16[i]);
+    }
+
+    CHECK(gaussianTable[0] >= 0);
+    CHECK(gaussianTable[0] <= expf(-.5f * FEATHER_TEXTURE_STDDEVS));
+    CHECK(gaussianTable[gpu::GAUSSIAN_TABLE_SIZE - 1] <= 1);
+    CHECK(gaussianTable[gpu::GAUSSIAN_TABLE_SIZE - 1] >=
+          1 - expf(-.5f * FEATHER_TEXTURE_STDDEVS));
+    if (gpu::GAUSSIAN_TABLE_SIZE & 1)
+    {
+        CHECK(gaussianTable[gpu::GAUSSIAN_TABLE_SIZE / 2] == .5f);
+    }
+    else
+    {
+        CHECK(gaussianTable[gpu::GAUSSIAN_TABLE_SIZE / 2 - 1] <= .5f);
+        CHECK(gaussianTable[gpu::GAUSSIAN_TABLE_SIZE / 2] >= .5f);
+        CHECK((gaussianTable[gpu::GAUSSIAN_TABLE_SIZE / 2 - 1] +
+               gaussianTable[gpu::GAUSSIAN_TABLE_SIZE / 2]) /
+                  2 ==
+              Approx(.5f).margin(1e-3f));
+    }
+    for (int i = 1; i < gpu::GAUSSIAN_TABLE_SIZE; ++i)
+    {
+        CHECK(gaussianTable[i - 1] <= gaussianTable[i]);
+    }
+    for (int i = 0; i < (gpu::GAUSSIAN_TABLE_SIZE + 1) / 2; ++i)
+    {
+        CHECK(gaussianTable[i] +
+                  gaussianTable[gpu::GAUSSIAN_TABLE_SIZE - 1 - i] ==
+              Approx(1).margin(1e-3f));
+    }
+}
+
+TEST_CASE("inverse_gaussian_integral_table", "[gpu]")
+{
+    CHECK(gpu::g_inverseGaussianIntegralTableF32[0] == 0);
+    CHECK(
+        gpu::g_inverseGaussianIntegralTableF32[gpu::GAUSSIAN_TABLE_SIZE - 1] ==
+        1);
+    if (gpu::GAUSSIAN_TABLE_SIZE & 1)
+    {
+        CHECK(gpu::g_inverseGaussianIntegralTableF32[gpu::GAUSSIAN_TABLE_SIZE /
+                                                     2] == .5f);
+    }
+    else
+    {
+        CHECK((gpu::g_inverseGaussianIntegralTableF32
+                   [gpu::GAUSSIAN_TABLE_SIZE / 2 - 1] +
+               gpu::g_inverseGaussianIntegralTableF32[gpu::GAUSSIAN_TABLE_SIZE /
+                                                      2]) /
+                  2 ==
+              Approx(.5f).margin(1e-4f));
+    }
+    for (int i = 1; i < gpu::GAUSSIAN_TABLE_SIZE; ++i)
+    {
+        CHECK(gpu::g_inverseGaussianIntegralTableF32[i - 1] <=
+              gpu::g_inverseGaussianIntegralTableF32[i]);
+    }
+    for (int i = 0; i < (gpu::GAUSSIAN_TABLE_SIZE + 1) / 2 - 4; ++i)
+    {
+        CHECK(gpu::g_inverseGaussianIntegralTableF32[i] +
+                  gpu::g_inverseGaussianIntegralTableF32
+                      [gpu::GAUSSIAN_TABLE_SIZE - 1 - i] ==
+              Approx(1).margin(i > 100 ? 1e-4f
+                               : i > 4 ? 1e-3f
+                                       : 1e-2f));
+    }
+
+    // Check that the inverse table is actually an inverse of the gaussian
+    // integral.
+    float gaussianTable[gpu::GAUSSIAN_TABLE_SIZE];
+    for (int i = 0; i < gpu::GAUSSIAN_TABLE_SIZE; ++i)
+    {
+        gaussianTable[i] = half_to_float(gpu::g_gaussianIntegralTableF16[i]);
+    }
+    float M = 21;
+    for (float x = 0; x <= 1; x += 1.f / (gpu::GAUSSIAN_TABLE_SIZE * M))
+    {
+        float y = gpu::gaussian_table_lookup(gaussianTable, x);
+        float inverseY = gpu::inverse_gaussian_integral(y);
+        // The inverse table loses precision at the inner and outer cells.
+        float margin = x > .125f && x < .875f ? 1.f / 512
+                       : x > .04f && x < .96f ? 1.f / 256
+                       : x > .02f && x < .98f ? 1.f / 128
+                                              : 1.f / 95;
+        CHECK(inverseY == Approx(x).margin(margin));
+    }
+
+    // Check inverse_gaussian_integral edge cases.
+    CHECK(gpu::inverse_gaussian_integral(-1) == 0);
+    CHECK(gpu::inverse_gaussian_integral(2) == 1);
+    CHECK(gpu::inverse_gaussian_integral(
+              -std::numeric_limits<float>::infinity()) == 0);
+    CHECK(gpu::inverse_gaussian_integral(
+              std::numeric_limits<float>::infinity()) == 1);
+    CHECK(gpu::inverse_gaussian_integral(
+              std::numeric_limits<float>::quiet_NaN()) == 0);
+}
+} // namespace rive
diff --git a/tests/unit_tests/renderer/pls_test.cpp b/tests/unit_tests/renderer/pls_test.cpp
deleted file mode 100644
index b382ef4..0000000
--- a/tests/unit_tests/renderer/pls_test.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright 2020 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "rive/math/math_types.hpp"
-#include "rive/renderer/gpu.hpp"
-#include <catch.hpp>
-
-namespace rive
-{
-TEST_CASE("FindTransformedArea", "[pls]")
-{
-    AABB unitSquare{0, 0, 1, 1};
-    CHECK(gpu::FindTransformedArea(unitSquare, Mat2D()) == 1);
-    CHECK(gpu::FindTransformedArea(unitSquare, Mat2D::fromScale(2, 2)) == 4);
-    CHECK(gpu::FindTransformedArea(unitSquare, Mat2D::fromScale(2, 1)) == 2);
-    CHECK(gpu::FindTransformedArea(unitSquare, Mat2D::fromScale(0, 1)) == 0);
-    CHECK(gpu::FindTransformedArea(unitSquare,
-                                   Mat2D::fromRotation(math::PI / 4)) ==
-          Approx(1.f));
-    CHECK(gpu::FindTransformedArea(
-              unitSquare,
-              Mat2D::fromRotation(math::PI / 8).scale({2, 2})) == Approx(4.f));
-    CHECK(gpu::FindTransformedArea(
-              unitSquare,
-              Mat2D::fromRotation(math::PI / 16).scale({2, 1})) == Approx(2.f));
-    CHECK(gpu::FindTransformedArea(unitSquare, {1, .87f, 8, 8 * .87f, 0, 0}) ==
-          Approx(0.f).margin(math::EPSILON));
-}
-} // namespace rive
diff --git a/tests/unit_tests/runtime/bezier_utils_test.cpp b/tests/unit_tests/runtime/bezier_utils_test.cpp
index 3cb119a..d74c957 100644
--- a/tests/unit_tests/runtime/bezier_utils_test.cpp
+++ b/tests/unit_tests/runtime/bezier_utils_test.cpp
@@ -8,6 +8,7 @@
 #include "rive/math/math_types.hpp"
 #include "rive/math/bezier_utils.hpp"
 #include "rive/math/simd.hpp"
+#include "common/rand.hpp"
 #include <catch.hpp>
 
 namespace rive
@@ -150,20 +151,25 @@
     }
 }
 
-static float SkMeasureNonInflectCubicRotation(const Vec2D pts[4])
+static float measure_non_inflect_cubic_rotation(const Vec2D pts[4])
 {
     Vec2D a = pts[1] - pts[0];
     Vec2D b = pts[2] - pts[1];
     Vec2D c = pts[3] - pts[2];
-    if (a == Vec2D())
+    float lengthA = a.length();
+    float lengthB = b.length();
+    float lengthC = c.length();
+    float lengthMax = std::max(std::max(lengthA, lengthB), lengthC);
+    float zeroThreshold = std::max(lengthMax, 1.f) * 1e-4f;
+    if (lengthA <= zeroThreshold)
     {
         return math::measure_angle_between_vectors(b, c);
     }
-    if (b == Vec2D())
+    if (lengthB <= zeroThreshold)
     {
         return math::measure_angle_between_vectors(a, c);
     }
-    if (c == Vec2D())
+    if (lengthC <= zeroThreshold)
     {
         return math::measure_angle_between_vectors(a, b);
     }
@@ -174,25 +180,26 @@
            math::measure_angle_between_vectors(b, -c);
 }
 
-TEST_CASE("SkMeasureNonInflectCubicRotation", "[bezier_utils]")
+TEST_CASE("measure_non_inflect_cubic_rotation", "[bezier_utils]")
 {
     static Vec2D kFlatCubic[4] = {{0, 0}, {0, 1}, {0, 2}, {0, 3}};
-    CHECK(fuzzy_equal(SkMeasureNonInflectCubicRotation(kFlatCubic), 0));
+    CHECK(fuzzy_equal(measure_non_inflect_cubic_rotation(kFlatCubic), 0));
 
     static Vec2D kFlatCubic180_1[4] = {{0, 0}, {1, 0}, {3, 0}, {2, 0}};
-    CHECK(fuzzy_equal(SkMeasureNonInflectCubicRotation(kFlatCubic180_1),
+    CHECK(fuzzy_equal(measure_non_inflect_cubic_rotation(kFlatCubic180_1),
                       math::PI));
 
     static Vec2D kFlatCubic180_2[4] = {{0, 1}, {0, 0}, {0, 2}, {0, 3}};
-    CHECK(fuzzy_equal(SkMeasureNonInflectCubicRotation(kFlatCubic180_2),
+    CHECK(fuzzy_equal(measure_non_inflect_cubic_rotation(kFlatCubic180_2),
                       math::PI));
 
     static Vec2D kFlatCubic360[4] = {{0, 1}, {0, 0}, {0, 3}, {0, 2}};
-    CHECK(fuzzy_equal(SkMeasureNonInflectCubicRotation(kFlatCubic360),
+    CHECK(fuzzy_equal(measure_non_inflect_cubic_rotation(kFlatCubic360),
                       2 * math::PI));
 
     static Vec2D kSquare180[4] = {{0, 0}, {0, 1}, {1, 1}, {1, 0}};
-    CHECK(fuzzy_equal(SkMeasureNonInflectCubicRotation(kSquare180), math::PI));
+    CHECK(
+        fuzzy_equal(measure_non_inflect_cubic_rotation(kSquare180), math::PI));
 
     auto checkQuadRotation = [](const Vec2D pts[3], float expectedRotation) {
 #if 0
@@ -200,15 +207,15 @@
         CHECK(fuzzy_equal(r, expectedRotation));
 #endif
         Vec2D cubic1[4] = {pts[0], pts[0], pts[1], pts[2]};
-        CHECK(fuzzy_equal(SkMeasureNonInflectCubicRotation(cubic1),
+        CHECK(fuzzy_equal(measure_non_inflect_cubic_rotation(cubic1),
                           expectedRotation));
 
         Vec2D cubic2[4] = {pts[0], pts[1], pts[1], pts[2]};
-        CHECK(fuzzy_equal(SkMeasureNonInflectCubicRotation(cubic2),
+        CHECK(fuzzy_equal(measure_non_inflect_cubic_rotation(cubic2),
                           expectedRotation));
 
         Vec2D cubic3[4] = {pts[0], pts[1], pts[2], pts[2]};
-        CHECK(fuzzy_equal(SkMeasureNonInflectCubicRotation(cubic3),
+        CHECK(fuzzy_equal(measure_non_inflect_cubic_rotation(cubic3),
                           expectedRotation));
     };
 
@@ -367,7 +374,7 @@
     }
     else
     {
-        float totalRotation = SkMeasureNonInflectCubicRotation(p);
+        float totalRotation = measure_non_inflect_cubic_rotation(p);
         int convex180N =
             math::find_cubic_convex_180_chops(p, convex180T, &areCusps);
         Vec2D chops[10];
@@ -402,7 +409,7 @@
         }
         for (int i = 0; i <= convex180N; ++i)
         {
-            float rads = SkMeasureNonInflectCubicRotation(chops + i * 3);
+            float rads = measure_non_inflect_cubic_rotation(chops + i * 3);
             assert(rads < math::PI + kEpsilon);
             radsSum += rads;
         }
@@ -420,10 +427,11 @@
                 // This works because cusps take the "inflection" path above, so
                 // we don't get non-lilnear curves that lose rotation when
                 // chopped.
-                REQUIRE(fuzzy_equal(SkMeasureNonInflectCubicRotation(chops),
+                REQUIRE(fuzzy_equal(measure_non_inflect_cubic_rotation(chops),
                                     math::PI));
-                REQUIRE(fuzzy_equal(SkMeasureNonInflectCubicRotation(chops + 3),
-                                    totalRotation - math::PI));
+                REQUIRE(
+                    fuzzy_equal(measure_non_inflect_cubic_rotation(chops + 3),
+                                totalRotation - math::PI));
             }
             REQUIRE(!areCusps);
         }
@@ -543,10 +551,35 @@
            3 * (1 - t) * pow2(t) * p[2] + pow3(t) * p[3];
 }
 
-void check_eval_cubic(const EvalCubic& evalCubic,
-                      const Vec2D* cubic,
-                      float t0,
-                      float t1)
+constexpr static Vec2D TEST_CUBICS[][4] = {
+    {{199, 1225}, {197, 943}, {349, 607}, {549, 427}},
+    {{549, 427}, {349, 607}, {197, 943}, {199, 1225}},
+    {{460, 1060}, {403, -320}, {60, 660}, {1181, 634}},
+    {{1181, 634}, {60, 660}, {403, -320}, {460, 1060}},
+    {{0, 0}, {0, 0}, {0, 0}, {0, 0}},
+    {{0, 0}, {0, 0}, {0, 0}, {100, 100}},
+    {{0, 0}, {0, 0}, {100, 100}, {100, 100}},
+    {{0, 0}, {100, 100}, {100, 100}, {0, 0}},
+    {{-100, -100}, {100, 100}, {100, -100}, {-100, 100}}, // Cusp
+    {{0, 0}, {0, 0}, {100, 100}, {100, 100}},             // Line
+    {{0, 0}, {-100, -100}, {200, 200}, {100, 100}},       // Line w/ 2 cusps
+    {{0, 0},
+     {50 * 2.f / 3.f, 100 * 2.f / 3.f},
+     {100 - 50 * 2.f / 3.f, 100 * 2.f / 3.f},
+     {100, 0}}, // Quadratic
+    // The remaining cubics had some sort of issue during development. They're
+    // in the list to make sure they don't regress.
+    {{0, 0},
+     {50 * 2.f / 3.f, 100 * 2.f / 3.f},
+     {100 - 50 * 2.f / 3.f, 100 * 2.f / 3.f},
+     {100, 100}},
+    {{100, 0}, {0, 0}, {0, 0}, {0, 0}},
+};
+
+static void check_eval_cubic(const EvalCubic& evalCubic,
+                             const Vec2D* cubic,
+                             float t0,
+                             float t1)
 {
     float4 pp = evalCubic(float4{t0, t0, t1, t1});
     Vec2D p0ref = eval_cubic(cubic, t0);
@@ -565,18 +598,7 @@
 
 TEST_CASE("EvalCubic", "[bezier_utils]")
 {
-    Vec2D cubics[][4] = {
-        {{199, 1225}, {197, 943}, {349, 607}, {549, 427}},
-        {{549, 427}, {349, 607}, {197, 943}, {199, 1225}},
-        {{460, 1060}, {403, -320}, {60, 660}, {1181, 634}},
-        {{1181, 634}, {60, 660}, {403, -320}, {460, 1060}},
-        {{0, 0}, {0, 0}, {0, 0}, {0, 0}},
-        {{0, 0}, {0, 0}, {0, 0}, {100, 100}},
-        {{0, 0}, {0, 0}, {100, 100}, {100, 100}},
-        {{0, 0}, {100, 100}, {100, 100}, {0, 0}},
-        {{-100, -100}, {100, 100}, {100, -100}, {-100, 100}}, // Cusp
-    };
-    for (auto cubic : cubics)
+    for (auto cubic : TEST_CUBICS)
     {
         math::EvalCubic evalCubic(cubic);
         check_eval_cubic(evalCubic, cubic, 0, 1);
@@ -586,5 +608,229 @@
         }
     }
 }
+
+static void check_cubic_max_height(const Vec2D* pts)
+{
+    float t, h = math::find_cubic_max_height(pts, &t);
+    CHECK(h >= 0);
+    CHECK(t >= 0);
+    CHECK(t <= 1);
+    Vec2D norm = (pts[3] - pts[0]).normalized();
+    norm = {-norm.y, norm.x};
+    float k = -Vec2D::dot(norm, pts[0]);
+    auto heightAt = [=](float t) {
+        return fabsf(Vec2D::dot(norm, math::eval_cubic_at(pts, t)) + k);
+    };
+    constexpr static float EPSILON = 5e-5f;
+    CHECK(heightAt(t) == Approx(h).margin(EPSILON));
+    CHECK(h + EPSILON > heightAt(0));
+    CHECK(h + EPSILON > heightAt(1));
+    for (float t2 = 0; t2 <= 1; t2 += .005137f)
+    {
+        CHECK(h + EPSILON > heightAt(t2));
+    }
+}
+
+TEST_CASE("find_cubic_max_height", "[bezier_utils]")
+{
+    for (auto pts : TEST_CUBICS)
+    {
+        check_cubic_max_height(pts);
+    }
+    // Test all combinations of corners from the square [0,0,1,1]. This covers
+    // every cubic type as well as a wide variety of special cases for cusps,
+    // lines, loops, and inflections.
+    for (int i = 0; i < (1 << 8); ++i)
+    {
+        Vec2D pts[4] = {Vec2D((i >> 0) & 1, (i >> 1) & 1) * 100,
+                        Vec2D((i >> 2) & 1, (i >> 3) & 1) * 100,
+                        Vec2D((i >> 4) & 1, (i >> 5) & 1) * 100,
+                        Vec2D((i >> 6) & 1, (i >> 7) & 1) * 100};
+        check_cubic_max_height(pts);
+    }
+    Rand rando;
+    rando.seed(0);
+    for (int i = 0; i < 100; ++i)
+    {
+        Vec2D randos[] = {
+            {rando.f32(-100, 100), rando.f32(-100, 100)},
+            {rando.f32(-100, 100), rando.f32(-100, 100)},
+            {rando.f32(-100, 100), rando.f32(-100, 100)},
+            {rando.f32(-100, 100), rando.f32(-100, 100)},
+        };
+        check_cubic_max_height(randos);
+    }
+}
+
+static float guess_cubic_local_curvature(const Vec2D* p,
+                                         const CubicCoeffs& coeffs,
+                                         float t,
+                                         float desiredSpread)
+{
+    // Iteratively guess a spread and compare with the computed result.
+    float maxDT = fminf(t, 1 - t) * .9999f;
+    float2 tan = 3.f * ((coeffs.A * t + 2.f * coeffs.B) * t + coeffs.C);
+    float lengthTan = sqrtf(simd::dot(tan, tan));
+    tan /= lengthTan;
+    float dt = 0;
+    math::EvalCubic evalCubic(coeffs, p[0]);
+    // This would converge faster if we used the derivative of the Spread(dt)
+    // function from math::measure_cubic_local_curvature, but since the whole
+    // point of this test is to validate that function, use a binary search
+    // instead.
+    for (int i = 0; i < 24; ++i)
+    {
+        float guessDT = (dt + maxDT) * .5f;
+        float guessT0 = t - guessDT;
+        float guessT1 = t + guessDT;
+        float4 endpts = evalCubic(float4{guessT0, guessT0, guessT1, guessT1});
+        float spread = fabsf(simd::dot(endpts.zw - endpts.xy, tan));
+        if (spread <= desiredSpread)
+            dt = guessDT;
+        else
+            maxDT = guessDT;
+    }
+    Vec2D chops[10];
+    math::chop_cubic_at(p, chops, t - dt, t + dt);
+    return measure_non_inflect_cubic_rotation(chops + 3);
+}
+
+constexpr static float FEATHERING_CUSP_PADDING = 1e-3f;
+
+static void check_cubic_convex_90_chops(const Vec2D* pts)
+{
+    float T[4];
+    bool areCusps;
+    int n = math::find_cubic_convex_90_chops(pts,
+                                             T,
+                                             FEATHERING_CUSP_PADDING,
+                                             &areCusps);
+    CHECK(n <= 4);
+
+    Vec2D chops[16];
+    assert(n * 3 + 1 <= std::size(chops));
+    math::chop_cubic_at(pts, chops, T, n);
+    Vec2D* p = chops;
+    for (int i = 0; i <= n; ++i, p += 3)
+    {
+        if (areCusps && (i & 1))
+        {
+            // If the chops are around a cusp, odd-numbered chops are a padding
+            // section that passes through a cusp.
+            continue;
+        }
+        CHECK(measure_non_inflect_cubic_rotation(p) <=
+              Approx(math::PI / 2).margin(1e-2f));
+    }
+}
+
+TEST_CASE("find_cubic_convex_90_chops", "[bezier_utils]")
+{
+    for (auto pts : TEST_CUBICS)
+    {
+        check_cubic_convex_90_chops(pts);
+    }
+    // Test all combinations of corners from the square [0,0,1,1]. This covers
+    // every cubic type as well as a wide variety of special cases for cusps,
+    // lines, loops, and inflections.
+    for (int i = 0; i < (1 << 8); ++i)
+    {
+        Vec2D pts[4] = {Vec2D((i >> 0) & 1, (i >> 1) & 1) * 100,
+                        Vec2D((i >> 2) & 1, (i >> 3) & 1) * 100,
+                        Vec2D((i >> 4) & 1, (i >> 5) & 1) * 100,
+                        Vec2D((i >> 6) & 1, (i >> 7) & 1) * 100};
+        check_cubic_convex_90_chops(pts);
+    }
+    Rand rando;
+    rando.seed(0);
+    for (int i = 0; i < 100; ++i)
+    {
+        Vec2D randos[] = {
+            {rando.f32(-100, 100), rando.f32(-100, 100)},
+            {rando.f32(-100, 100), rando.f32(-100, 100)},
+            {rando.f32(-100, 100), rando.f32(-100, 100)},
+            {rando.f32(-100, 100), rando.f32(-100, 100)},
+        };
+        check_cubic_convex_90_chops(randos);
+    }
+}
+
+static void check_cubic_local_curvature(const Vec2D* pts)
+{
+    float T[4];
+    bool areCusps;
+    int n = math::find_cubic_convex_90_chops(pts,
+                                             T,
+                                             FEATHERING_CUSP_PADDING,
+                                             &areCusps);
+    CHECK(n <= 4);
+
+    Vec2D chops[16];
+    assert(n * 3 + 1 <= std::size(chops));
+    math::chop_cubic_at(pts, chops, T, n);
+    Vec2D* p = chops;
+    for (int i = 0; i <= n; ++i, p += 3)
+    {
+        if (areCusps && (i & 1))
+        {
+            // If the chops are around a cusp, odd-numbered chops are a padding
+            // section that passes through a cusp.
+            continue;
+        }
+        float maxHeightT;
+        CHECK(measure_non_inflect_cubic_rotation(p) <=
+              Approx(math::PI / 2).margin(2.6e-3f));
+        math::find_cubic_max_height(p, &maxHeightT);
+        CubicCoeffs coeffs(p);
+        for (float desiredSpread : {1.f, 10.f, 100.f})
+        {
+            for (float t : {maxHeightT, .5f})
+            {
+                float theta =
+                    math::measure_cubic_local_curvature(p,
+                                                        coeffs,
+                                                        t,
+                                                        desiredSpread);
+                CHECK(theta >= 0);
+                CHECK(theta <= math::PI);
+
+                float guess =
+                    guess_cubic_local_curvature(p, coeffs, t, desiredSpread);
+                CHECK(theta == Approx(guess).margin(1e-2f));
+            }
+        }
+    }
+}
+
+TEST_CASE("measure_cubic_local_curvature", "[bezier_utils]")
+{
+    for (auto pts : TEST_CUBICS)
+    {
+        check_cubic_local_curvature(pts);
+    }
+    // Test all combinations of corners from the square [0,0,1,1]. This covers
+    // every cubic type as well as a wide variety of special cases for cusps,
+    // lines, loops, and inflections.
+    for (int i = 0; i < (1 << 8); ++i)
+    {
+        Vec2D pts[4] = {Vec2D((i >> 0) & 1, (i >> 1) & 1) * 100,
+                        Vec2D((i >> 2) & 1, (i >> 3) & 1) * 100,
+                        Vec2D((i >> 4) & 1, (i >> 5) & 1) * 100,
+                        Vec2D((i >> 6) & 1, (i >> 7) & 1) * 100};
+        check_cubic_local_curvature(pts);
+    }
+    Rand rando;
+    rando.seed(0);
+    for (int i = 0; i < 100; ++i)
+    {
+        Vec2D randos[] = {
+            {rando.f32(-100, 100), rando.f32(-100, 100)},
+            {rando.f32(-100, 100), rando.f32(-100, 100)},
+            {rando.f32(-100, 100), rando.f32(-100, 100)},
+            {rando.f32(-100, 100), rando.f32(-100, 100)},
+        };
+        check_cubic_local_curvature(randos);
+    }
+}
 } // namespace math
 } // namespace rive
diff --git a/tests/unit_tests/runtime/simd_test.cpp b/tests/unit_tests/runtime/simd_test.cpp
index 08d6b42..45f21bc 100644
--- a/tests/unit_tests/runtime/simd_test.cpp
+++ b/tests/unit_tests/runtime/simd_test.cpp
@@ -542,6 +542,23 @@
     CHECK_ALL((simd::isnan(simd::ceil(float2{kNaN, -kNaN}))));
 }
 
+// Check simd::copysign.
+TEST_CASE("copysign", "[simd]")
+{
+    CHECK_ALL((simd::copysign(float4{-1, 2, -3, 4},
+                              float4{-999.2f, -kInf, 123.4f, .0000001f}) ==
+               float4{-1, -2, 3, 4}));
+    CHECK_ALL((simd::copysign(float4{kInf, -kInf, kInf, -kInf},
+                              float4{-999.2f, -kInf, 123.4f, .0000001f}) ==
+               float4{-kInf, -kInf, kInf, kInf}));
+    CHECK_ALL(
+        (simd::copysign(float2{998, -23}, float2{-1, 1}) == float2{-998, 23}));
+    CHECK_ALL(
+        (simd::isnan(simd::copysign(float2{kNaN, -kNaN}, float2{-1, 1}))));
+    CHECK_ALL((simd::isnan(simd::copysign(float4{kNaN, -kNaN, kNaN, -kNaN},
+                                          float4{-1, -1, 1, 1}))));
+}
+
 // Check simd::sqrt.
 TEST_CASE("sqrt", "[simd]")
 {