Consolidate tessellation tolerance tracking into helper class

Moving it into the helper class serves a few purposes:
1. Consolidate the more complicated stroke vertex count calculations so
   that they can be reused between graphite and ganesh more easily.
2. Gives us an object that the PatchWriter can pass to its
   PatchAllocators that is itself not a template, so the allocators can
   be easily reused across different PatchWriter configurations.
3. Gives us a good place to start experimenting with tighter tolerance
   tracking (particularly around internal rotation and join rotation).

Bug: skia:13056, skia:13012
Change-Id: I6086d459a21ac7ab8e833a988cc7c403983c3dd2
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/537083
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Robert Phillips <robertphillips@google.com>
diff --git a/public.bzl b/public.bzl
index 638eb9f..048c618 100644
--- a/public.bzl
+++ b/public.bzl
@@ -1248,6 +1248,7 @@
     "src/gpu/tessellate/CullTest.h",
     "src/gpu/tessellate/FixedCountBufferUtils.cpp",
     "src/gpu/tessellate/FixedCountBufferUtils.h",
+    "src/gpu/tessellate/LinearTolerances.h",
     "src/gpu/tessellate/MiddleOutPolygonTriangulator.h",
     "src/gpu/tessellate/MidpointContourParser.h",
     "src/gpu/tessellate/PatchWriter.h",
diff --git a/src/gpu/ganesh/tessellate/PathTessellator.cpp b/src/gpu/ganesh/tessellate/PathTessellator.cpp
index ccad1bf..e017a97 100644
--- a/src/gpu/ganesh/tessellate/PathTessellator.cpp
+++ b/src/gpu/ganesh/tessellate/PathTessellator.cpp
@@ -35,7 +35,7 @@
 int write_curve_patches(CurveWriter&& patchWriter,
                         const SkMatrix& shaderMatrix,
                         const PathTessellator::PathDrawList& pathDrawList) {
-    wangs_formula::VectorXform shaderXform(shaderMatrix);
+    patchWriter.setShaderTransform(wangs_formula::VectorXform{shaderMatrix});
     for (auto [pathMatrix, path, color] : pathDrawList) {
         AffineMatrix m(pathMatrix);
         if (patchWriter.attribs() & PatchAttribs::kColor) {
@@ -47,7 +47,7 @@
                     auto [p0, p1] = m.map2Points(pts);
                     auto p2 = m.map1Point(pts+2);
 
-                    patchWriter.writeQuadratic(p0, p1, p2, shaderXform);
+                    patchWriter.writeQuadratic(p0, p1, p2);
                     break;
                 }
 
@@ -55,7 +55,7 @@
                     auto [p0, p1] = m.map2Points(pts);
                     auto p2 = m.map1Point(pts+2);
 
-                    patchWriter.writeConic(p0, p1, p2, *w, shaderXform);
+                    patchWriter.writeConic(p0, p1, p2, *w);
                     break;
                 }
 
@@ -63,7 +63,7 @@
                     auto [p0, p1] = m.map2Points(pts);
                     auto [p2, p3] = m.map2Points(pts+2);
 
-                    patchWriter.writeCubic(p0, p1, p2, p3, shaderXform);
+                    patchWriter.writeCubic(p0, p1, p2, p3);
                     break;
                 }
 
@@ -72,7 +72,7 @@
         }
     }
 
-    return patchWriter.requiredResolveLevel();
+    return patchWriter.tolerances().requiredResolveLevel();
 }
 
 using WedgeWriter = PatchWriter<VertexChunkPatchAllocator,
@@ -84,7 +84,7 @@
 int write_wedge_patches(WedgeWriter&& patchWriter,
                         const SkMatrix& shaderMatrix,
                         const PathTessellator::PathDrawList& pathDrawList) {
-    wangs_formula::VectorXform shaderXform(shaderMatrix);
+    patchWriter.setShaderTransform(wangs_formula::VectorXform{shaderMatrix});
     for (auto [pathMatrix, path, color] : pathDrawList) {
         AffineMatrix m(pathMatrix);
         if (patchWriter.attribs() & PatchAttribs::kColor) {
@@ -114,7 +114,7 @@
                         auto [p0, p1] = m.map2Points(pts);
                         auto p2 = m.map1Point(pts+2);
 
-                        patchWriter.writeQuadratic(p0, p1, p2, shaderXform);
+                        patchWriter.writeQuadratic(p0, p1, p2);
                         lastPoint = pts[2];
                         break;
                     }
@@ -123,7 +123,7 @@
                         auto [p0, p1] = m.map2Points(pts);
                         auto p2 = m.map1Point(pts+2);
 
-                        patchWriter.writeConic(p0, p1, p2, *w, shaderXform);
+                        patchWriter.writeConic(p0, p1, p2, *w);
                         lastPoint = pts[2];
                         break;
                     }
@@ -132,7 +132,7 @@
                         auto [p0, p1] = m.map2Points(pts);
                         auto [p2, p3] = m.map2Points(pts+2);
 
-                        patchWriter.writeCubic(p0, p1, p2, p3, shaderXform);
+                        patchWriter.writeCubic(p0, p1, p2, p3);
                         lastPoint = pts[3];
                         break;
                     }
@@ -149,7 +149,7 @@
         }
     }
 
-    return patchWriter.requiredResolveLevel();
+    return patchWriter.tolerances().requiredResolveLevel();
 }
 
 }  // namespace
diff --git a/src/gpu/ganesh/tessellate/StrokeTessellator.cpp b/src/gpu/ganesh/tessellate/StrokeTessellator.cpp
index b438d56..73b7075 100644
--- a/src/gpu/ganesh/tessellate/StrokeTessellator.cpp
+++ b/src/gpu/ganesh/tessellate/StrokeTessellator.cpp
@@ -35,38 +35,21 @@
 int write_fixed_count_patches(FixedCountStrokeWriter&& patchWriter,
                               const SkMatrix& shaderMatrix,
                               StrokeTessellator::PathStrokeList* pathStrokeList) {
-    int maxEdgesInJoin = 0;
-    float maxRadialSegmentsPerRadian = 0;
+    // The vector xform approximates how the control points are transformed by the shader to
+    // more accurately compute how many *parametric* segments are needed.
     // getMaxScale() returns -1 if it can't compute a scale factor (e.g. perspective), taking the
     // absolute value automatically converts that to an identity scale factor for our purposes.
     float maxScale = std::abs(shaderMatrix.getMaxScale());
-
+    patchWriter.setShaderTransform(wangs_formula::VectorXform{shaderMatrix}, maxScale);
     if (!(patchWriter.attribs() & PatchAttribs::kStrokeParams)) {
         // Strokes are static. Calculate tolerances once.
-        const SkStrokeRec& stroke = pathStrokeList->fStroke;
-        float approxDevStrokeWidth = stroke.isHairlineStyle() ? 1.f : maxScale * stroke.getWidth();
-        float numRadialSegmentsPerRadian =
-                CalcNumRadialSegmentsPerRadian(0.5f * approxDevStrokeWidth);
-        maxEdgesInJoin = WorstCaseEdgesInJoin(stroke.getJoin(), numRadialSegmentsPerRadian);
-        maxRadialSegmentsPerRadian = numRadialSegmentsPerRadian;
+        patchWriter.tolerances().accumulateStroke(pathStrokeList->fStroke, maxScale);
     }
 
-    // The vector xform approximates how the control points are transformed by the shader to
-    // more accurately compute how many *parametric* segments are needed.
-    wangs_formula::VectorXform shaderXform{shaderMatrix};
     for (auto* pathStroke = pathStrokeList; pathStroke; pathStroke = pathStroke->fNext) {
         const SkStrokeRec& stroke = pathStroke->fStroke;
         if (patchWriter.attribs() & PatchAttribs::kStrokeParams) {
             // Strokes are dynamic. Calculate tolerances every time.
-            float approxDevStrokeWidth =
-                    stroke.isHairlineStyle() ? 1.f : maxScale * stroke.getWidth();
-            float numRadialSegmentsPerRadian =
-                    CalcNumRadialSegmentsPerRadian(0.5f * approxDevStrokeWidth);
-            maxEdgesInJoin = std::max(
-                    WorstCaseEdgesInJoin(stroke.getJoin(), numRadialSegmentsPerRadian),
-                    maxEdgesInJoin);
-            maxRadialSegmentsPerRadian = std::max(numRadialSegmentsPerRadian,
-                                                  maxRadialSegmentsPerRadian);
             patchWriter.updateStrokeParamsAttrib(stroke);
         }
         if (patchWriter.attribs() & PatchAttribs::kColor) {
@@ -103,7 +86,7 @@
                         patchWriter.writeLine(p[0], cusp);
                         patchWriter.writeLine(cusp, p[2]);
                     } else {
-                        patchWriter.writeQuadratic(p, shaderXform);
+                        patchWriter.writeQuadratic(p);
                     }
                     break;
                 case Verb::kConic:
@@ -116,7 +99,7 @@
                         patchWriter.writeLine(p[0], cusp);
                         patchWriter.writeLine(cusp, p[2]);
                     } else {
-                        patchWriter.writeConic(p, strokeIter.w(), shaderXform);
+                        patchWriter.writeConic(p, strokeIter.w());
                     }
                     break;
                 case Verb::kCubic:
@@ -125,7 +108,7 @@
                     bool areCusps;
                     numChops = FindCubicConvex180Chops(p, T, &areCusps);
                     if (numChops == 0) {
-                        patchWriter.writeCubic(p, shaderXform);
+                        patchWriter.writeCubic(p);
                     } else if (numChops == 1) {
                         SkChopCubicAt(p, chops, T[0]);
                         if (areCusps) {
@@ -134,8 +117,8 @@
                             // on a cusp.
                             chops[2] = chops[4] = chops[3];
                         }
-                        patchWriter.writeCubic(chops, shaderXform);
-                        patchWriter.writeCubic(chops + 3, shaderXform);
+                        patchWriter.writeCubic(chops);
+                        patchWriter.writeCubic(chops + 3);
                     } else {
                         SkASSERT(numChops == 2);
                         SkChopCubicAt(p, chops, T[0], T[1]);
@@ -148,9 +131,9 @@
                             patchWriter.writeLine(chops[3], chops[6]);
                             patchWriter.writeLine(chops[6], chops[9]);
                         } else {
-                            patchWriter.writeCubic(chops, shaderXform);
-                            patchWriter.writeCubic(chops + 3, shaderXform);
-                            patchWriter.writeCubic(chops + 6, shaderXform);
+                            patchWriter.writeCubic(chops);
+                            patchWriter.writeCubic(chops + 3);
+                            patchWriter.writeCubic(chops + 6);
                         }
                     }
                     break;
@@ -158,35 +141,7 @@
         }
     }
 
-    // The maximum rotation we can have in a stroke is 180 degrees (SK_ScalarPI radians).
-    int maxRadialSegmentsInStroke =
-            std::max(SkScalarCeilToInt(maxRadialSegmentsPerRadian * SK_ScalarPI), 1);
-
-    int maxParametricSegmentsInStroke = patchWriter.requiredFixedSegments();
-    SkASSERT(maxParametricSegmentsInStroke >= 1);
-
-    // Now calculate the maximum number of edges we will need in the stroke portion of the instance.
-    // The first and last edges in a stroke are shared by both the parametric and radial sets of
-    // edges, so the total number of edges is:
-    //
-    //   numCombinedEdges = numParametricEdges + numRadialEdges - 2
-    //
-    // It's also important to differentiate between the number of edges and segments in a strip:
-    //
-    //   numSegments = numEdges - 1
-    //
-    // So the total number of combined edges in the stroke is:
-    //
-    //   numEdgesInStroke = numParametricSegments + 1 + numRadialSegments + 1 - 2
-    //                    = numParametricSegments + numRadialSegments
-    //
-    int maxEdgesInStroke = maxRadialSegmentsInStroke + maxParametricSegmentsInStroke;
-
-    // Each triangle strip has two sections: It starts with a join then transitions to a stroke. The
-    // number of edges in an instance is the sum of edges from the join and stroke sections both.
-    // NOTE: The final join edge and the first stroke edge are co-located, however we still need to
-    // emit both because the join's edge is half-width and the stroke's is full-width.
-    return maxEdgesInJoin + maxEdgesInStroke;
+    return patchWriter.tolerances().requiredStrokeEdges();
 }
 
 }  // namespace
diff --git a/src/gpu/graphite/render/TessellateCurvesRenderStep.cpp b/src/gpu/graphite/render/TessellateCurvesRenderStep.cpp
index 2bc6a12..d0a3aed 100644
--- a/src/gpu/graphite/render/TessellateCurvesRenderStep.cpp
+++ b/src/gpu/graphite/render/TessellateCurvesRenderStep.cpp
@@ -106,7 +106,7 @@
     // uniform data to upload, dependent on push constants or storage buffers for good batching)
 
     // Currently no additional transform is applied by the GPU.
-    wangs_formula::VectorXform shaderXform(SkMatrix::I());
+    writer.setShaderTransform(wangs_formula::VectorXform{});
     // TODO: This doesn't handle perspective yet, and ideally wouldn't go through SkMatrix.
     // It may not be relevant, though, if transforms are applied on the GPU and we only need to
     // determine an approximate 2x2 for 'shaderXform' and Wang's formula evaluation.
@@ -125,7 +125,7 @@
                 auto [p0, p1] = m.map2Points(pts);
                 auto p2 = m.map1Point(pts+2);
 
-                writer.writeQuadratic(p0, p1, p2, shaderXform);
+                writer.writeQuadratic(p0, p1, p2);
                 break;
             }
 
@@ -133,7 +133,7 @@
                 auto [p0, p1] = m.map2Points(pts);
                 auto p2 = m.map1Point(pts+2);
 
-                writer.writeConic(p0, p1, p2, *w, shaderXform);
+                writer.writeConic(p0, p1, p2, *w);
                 break;
             }
 
@@ -141,7 +141,7 @@
                 auto [p0, p1] = m.map2Points(pts);
                 auto [p2, p3] = m.map2Points(pts+2);
 
-                writer.writeCubic(p0, p1, p2, p3, shaderXform);
+                writer.writeCubic(p0, p1, p2, p3);
                 break;
             }
 
diff --git a/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp b/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp
index 670e5f7..9c2a67f 100644
--- a/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp
+++ b/src/gpu/graphite/render/TessellateWedgesRenderStep.cpp
@@ -110,7 +110,7 @@
     // uniform data to upload, dependent on push constants or storage buffers for good batching)
 
     // Currently no additional transform is applied by the GPU.
-    wangs_formula::VectorXform shaderXform(SkMatrix::I());
+    writer.setShaderTransform(wangs_formula::VectorXform{});
     // TODO: This doesn't handle perspective yet, and ideally wouldn't go through SkMatrix.
     // It may not be relevant, though, if transforms are applied on the GPU and we only need to
     // determine an approximate 2x2 for 'shaderXform' and Wang's formula evaluation.
@@ -145,7 +145,7 @@
                     auto [p0, p1] = m.map2Points(pts);
                     auto p2 = m.map1Point(pts+2);
 
-                    writer.writeQuadratic(p0, p1, p2, shaderXform);
+                    writer.writeQuadratic(p0, p1, p2);
                     lastPoint = p2;
                     break;
                 }
@@ -154,7 +154,7 @@
                     auto [p0, p1] = m.map2Points(pts);
                     auto p2 = m.map1Point(pts+2);
 
-                    writer.writeConic(p0, p1, p2, *w, shaderXform);
+                    writer.writeConic(p0, p1, p2, *w);
                     lastPoint = p2;
                     break;
                 }
@@ -163,7 +163,7 @@
                     auto [p0, p1] = m.map2Points(pts);
                     auto [p2, p3] = m.map2Points(pts+2);
 
-                    writer.writeCubic(p0, p1, p2, p3, shaderXform);
+                    writer.writeCubic(p0, p1, p2, p3);
                     lastPoint = p3;
                     break;
                 }
diff --git a/src/gpu/tessellate/BUILD.bazel b/src/gpu/tessellate/BUILD.bazel
index 8e7d84c..1416d3d 100644
--- a/src/gpu/tessellate/BUILD.bazel
+++ b/src/gpu/tessellate/BUILD.bazel
@@ -42,6 +42,7 @@
     hdrs = ["PatchWriter.h"],
     visibility = ["//:__subpackages__"],
     deps = [
+        ":LinearTolerances_hdr",
         ":MiddleOutPolygonTriangulator_hdr",
         ":Tessellation_hdr",
         ":WangsFormula_hdr",
@@ -132,3 +133,13 @@
         "//src/gpu:BufferWriter_hdr",
     ],
 )
+
+generated_cc_atom(
+    name = "LinearTolerances_hdr",
+    hdrs = ["LinearTolerances.h"],
+    visibility = ["//:__subpackages__"],
+    deps = [
+        ":Tessellation_hdr",
+        ":WangsFormula_hdr",
+    ],
+)
diff --git a/src/gpu/tessellate/LinearTolerances.h b/src/gpu/tessellate/LinearTolerances.h
new file mode 100644
index 0000000..22b0263
--- /dev/null
+++ b/src/gpu/tessellate/LinearTolerances.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef skgpu_tessellate_LinearTolerances_DEFINED
+#define skgpu_tessellate_LinearTolerances_DEFINED
+
+#include "src/gpu/tessellate/Tessellation.h"
+#include "src/gpu/tessellate/WangsFormula.h"
+
+namespace skgpu::tess {
+
+/**
+ * LinearTolerances stores state to approximate the final device-space transform applied
+ * to curves, and uses that to calculate segmentation levels for both the parametric curves and
+ * radial components (when stroking, where you have to represent the offset of a curve).
+ * These tolerances determine the worst-case number of parametric and radial segments required to
+ * accurately linearize curves.
+ * - segments = a linear subsection on the curve, either defined as parametric (linear in t) or
+ *   radial (linear in curve's internal rotation).
+ * - edges = orthogonal geometry to segments, used in stroking to offset from the central curve by
+ *   half the stroke width, or to construct the join geometry.
+ *
+ * The tolerance values and decisions are estimated in the local path space, although PatchWriter
+ * uses a 2x2 vector transform that approximates the scale/skew (as-best-as-possible) of the full
+ * local-to-device transform applied in the vertex shader.
+ *
+ * The properties tracked in LinearTolerances can be used to compute the final segmentation factor
+ * for filled paths (the resolve level) or stroked paths (the number of edges).
+ */
+class LinearTolerances {
+public:
+    float numParametricSegments_pow4() const { return fNumParametricSegments_pow4; }
+    float numRadialSegmentsPerRadian() const { return fNumRadialSegmentsPerRadian; }
+    int   numEdgesInJoins() const { return fEdgesInJoins; }
+
+    // Fast log2 of minimum required # of segments per tracked Wang's formula calculations.
+    int requiredResolveLevel() const {
+        // log16(n^4) == log2(n)
+        return wangs_formula::nextlog16(fNumParametricSegments_pow4);
+    }
+
+    int requiredStrokeEdges() const {
+        // The maximum rotation we can have in a stroke is 180 degrees (SK_ScalarPI radians).
+        int maxRadialSegmentsInStroke =
+                std::max(SkScalarCeilToInt(fNumRadialSegmentsPerRadian * SK_ScalarPI), 1);
+
+        int maxParametricSegmentsInStroke =
+                SkScalarCeilToInt(wangs_formula::root4(fNumParametricSegments_pow4));
+        SkASSERT(maxParametricSegmentsInStroke >= 1);
+
+        // Now calculate the maximum number of edges we will need in the stroke portion of the
+        // instance. The first and last edges in a stroke are shared by both the parametric and
+        // radial sets of edges, so the total number of edges is:
+        //
+        //   numCombinedEdges = numParametricEdges + numRadialEdges - 2
+        //
+        // It's important to differentiate between the number of edges and segments in a strip:
+        //
+        //   numSegments = numEdges - 1
+        //
+        // So the total number of combined edges in the stroke is:
+        //
+        //   numEdgesInStroke = numParametricSegments + 1 + numRadialSegments + 1 - 2
+        //                    = numParametricSegments + numRadialSegments
+        //
+        int maxEdgesInStroke = maxRadialSegmentsInStroke + maxParametricSegmentsInStroke;
+
+        // Each triangle strip has two sections: It starts with a join then transitions to a
+        // stroke. The number of edges in an instance is the sum of edges from the join and
+        // stroke sections both.
+        // NOTE: The final join edge and the first stroke edge are co-located, however we still
+        // need to emit both because the join's edge is half-width and the stroke is full-width.
+        return fEdgesInJoins + maxEdgesInStroke;
+    }
+
+    // TODO: These will be renamed to setFoo() when accumulation of worst case tolerances is moved
+    // outside of PatchWriter.
+    void accumulateParametricSegments(float n4) {
+        SkASSERT(n4 >= 0.f);
+        fNumParametricSegments_pow4 = std::max(n4, fNumParametricSegments_pow4);
+    }
+
+    void accumulateStroke(const StrokeParams& strokeParams, float maxScale) {
+        float approxDeviceStrokeRadius;
+        if (strokeParams.fRadius == 0.f) {
+            // Hairlines are always 1 px wide
+            approxDeviceStrokeRadius = 0.5f;
+        } else {
+            // Approximate max scale * local stroke width / 2
+            approxDeviceStrokeRadius = strokeParams.fRadius * maxScale;
+        }
+
+        float numRadialSegmentsPerRadian = CalcNumRadialSegmentsPerRadian(approxDeviceStrokeRadius);
+        fNumRadialSegmentsPerRadian = std::max(fNumRadialSegmentsPerRadian,
+                                               numRadialSegmentsPerRadian);
+
+        int edgesInJoins = NumFixedEdgesInJoin(strokeParams);
+        if (strokeParams.fJoinType < 0.f && numRadialSegmentsPerRadian > 0.f) {
+            // For round joins we need to count the radial edges on our own. Account for a
+            // worst-case join of 180 degrees (SK_ScalarPI radians).
+            edgesInJoins += SkScalarCeilToInt(numRadialSegmentsPerRadian * SK_ScalarPI) - 1;
+        }
+        fEdgesInJoins = std::max(fEdgesInJoins, edgesInJoins);
+    }
+
+
+private:
+    // Used for both fills and strokes, always at least one parametric segment
+    float fNumParametricSegments_pow4 = 1.f;
+    // Used for strokes, adding additional segments along the curve to account for its rotation
+    // TODO: Currently we assume the worst case 180 degree rotation for any curve, but tracking
+    // max(radialSegments * patch curvature) would be tighter. This would require computing
+    // rotation per patch, which could be approximated by tracking min of the tangent dot
+    // products, but then we'd be left with the slightly less accurate
+    // "max(radialSegments) * acos(min(tan dot product))". It is also unknown if requesting
+    // tighter bounds pays off with less GPU work for more CPU work
+    float fNumRadialSegmentsPerRadian = 0.f;
+    // Used for strokes, tracking the number of additional vertices required to handle joins
+    // based on the join type and stroke width.
+    // TODO: For round joins, we could also track the rotation angle of the join, instead of
+    // assuming 180 degrees. PatchWriter has all necessary control points to do so, but runs
+    // into similar trade offs between CPU vs GPU work, and accuracy vs. reducing calls to acos.
+    int   fEdgesInJoins = 0;
+};
+
+}  // namespace skgpu::tess
+
+#endif // skgpu_tessellate_LinearTolerances_DEFINED
diff --git a/src/gpu/tessellate/PatchWriter.h b/src/gpu/tessellate/PatchWriter.h
index fe674c3..ac7c31a 100644
--- a/src/gpu/tessellate/PatchWriter.h
+++ b/src/gpu/tessellate/PatchWriter.h
@@ -10,6 +10,7 @@
 
 #include "include/private/SkColorData.h"
 #include "src/gpu/BufferWriter.h"
+#include "src/gpu/tessellate/LinearTolerances.h"
 #include "src/gpu/tessellate/MiddleOutPolygonTriangulator.h"
 #include "src/gpu/tessellate/Tessellation.h"
 #include "src/gpu/tessellate/WangsFormula.h"
@@ -231,7 +232,6 @@
             PatchStorage<kMaxStride>, std::monostate>;
     using InnerTriangulator = std::conditional_t<kAddTrianglesWhenChopping,
             MiddleOutPolygonTriangulator, NullTriangulator>;
-    using VectorXform = wangs_formula::VectorXform;
 
     static_assert(!kTrackJoinControlPoints || req_attrib<PatchAttribs::kJoinControlPoint>::value,
                   "Deferred patches and auto-updating joins requires kJoinControlPoint attrib");
@@ -240,7 +240,6 @@
     PatchWriter(PatchAttribs attribs,
                 Args&&... allocArgs)
             : fAttribs(attribs)
-            , fCurrMinSegments_pow4(1.f)
             , fPatchAllocator(PatchStride(attribs), std::forward<Args>(allocArgs)...)
             , fJoin(attribs)
             , fFanPoint(attribs)
@@ -267,13 +266,15 @@
 
     PatchAttribs attribs() const { return fAttribs; }
 
-    // Fast log2 of minimum required # of segments per tracked Wang's formula calculations.
-    int requiredResolveLevel() const {
-        return wangs_formula::nextlog16(fCurrMinSegments_pow4); // log16(n^4) == log2(n)
-    }
-    // Fast minimum required # of segments from tracked Wang's formula calculations.
-    int requiredFixedSegments() const {
-        return SkScalarCeilToInt(wangs_formula::root4(fCurrMinSegments_pow4));
+    LinearTolerances& tolerances() { return fTolerances; }
+    const LinearTolerances& tolerances() const { return fTolerances; }
+
+    // The max scale factor should be derived from the same matrix that 'xform' was. It's only used
+    // in stroking calculations, so can be ignored for path filling.
+    void setShaderTransform(const wangs_formula::VectorXform& xform,
+                            float maxScale = 1.f) {
+        fApproxTransform = xform;
+        fMaxScale = maxScale;
     }
 
     // Completes a closed contour of a stroke by rewriting a deferred patch with now-available
@@ -316,6 +317,7 @@
     ENABLE_IF(StrokeAttrib::kEnabled) updateStrokeParamsAttrib(StrokeParams strokeParams) {
         SkASSERT(fAttribs & PatchAttribs::kStrokeParams);
         fStrokeParams = strokeParams;
+        fTolerances.accumulateStroke(strokeParams, fMaxScale);
     }
 
     // Updates the color that will be written out with each patch.
@@ -346,9 +348,8 @@
      */
 
     // Write a cubic curve with its four control points.
-    AI void writeCubic(float2 p0, float2 p1, float2 p2, float2 p3,
-                       const VectorXform& shaderXform) {
-        float n4 = wangs_formula::cubic_pow4(kPrecision, p0, p1, p2, p3, shaderXform);
+    AI void writeCubic(float2 p0, float2 p1, float2 p2, float2 p3) {
+        float n4 = wangs_formula::cubic_pow4(kPrecision, p0, p1, p2, p3, fApproxTransform);
         if constexpr (kDiscardFlatCurves) {
             if (n4 <= 1.f) {
                 // This cubic only needs one segment (e.g. a line) but we're not filling space with
@@ -356,26 +357,22 @@
                 return;
             }
         }
-        if (this->curveFitsInMaxSegments(n4)) {
-            this->writeCubicPatch(p0, p1, p2, p3);
-        } else {
-            int numPatches = SkScalarCeilToInt(wangs_formula::root4(
-                    std::min(n4, pow4(kMaxSegmentsPerCurve)) / pow4(kMaxParametricSegments)));
+        if (int numPatches = this->accountForCurve(n4)) {
             this->chopAndWriteCubics(p0, p1, p2, p3, numPatches);
+        } else {
+            this->writeCubicPatch(p0, p1, p2, p3);
         }
     }
-    AI void writeCubic(const SkPoint pts[4],
-                       const VectorXform& shaderXform) {
+    AI void writeCubic(const SkPoint pts[4]) {
         float4 p0p1 = float4::Load(pts);
         float4 p2p3 = float4::Load(pts + 2);
-        this->writeCubic(p0p1.lo, p0p1.hi, p2p3.lo, p2p3.hi, shaderXform);
+        this->writeCubic(p0p1.lo, p0p1.hi, p2p3.lo, p2p3.hi);
     }
 
     // Write a conic curve with three control points and 'w', with the last coord of the last
     // control point signaling a conic by being set to infinity.
-    AI void writeConic(float2 p0, float2 p1, float2 p2, float w,
-                       const VectorXform& shaderXform) {
-        float n2 = wangs_formula::conic_pow2(kPrecision, p0, p1, p2, w, shaderXform);
+    AI void writeConic(float2 p0, float2 p1, float2 p2, float w) {
+        float n2 = wangs_formula::conic_pow2(kPrecision, p0, p1, p2, w, fApproxTransform);
         if constexpr (kDiscardFlatCurves) {
             if (n2 <= 1.f) {
                 // This conic only needs one segment (e.g. a line) but we're not filling space with
@@ -383,27 +380,23 @@
                 return;
             }
         }
-        if (this->curveFitsInMaxSegments(n2*n2)) {
-            this->writeConicPatch(p0, p1, p2, w);
-        } else {
-            int numPatches = SkScalarCeilToInt(sqrtf(
-                    std::min(n2, pow2(kMaxSegmentsPerCurve)) / pow2(kMaxParametricSegments)));
+        if (int numPatches = this->accountForCurve(n2 * n2)) {
             this->chopAndWriteConics(p0, p1, p2, w, numPatches);
+        } else {
+            this->writeConicPatch(p0, p1, p2, w);
         }
     }
-    AI void writeConic(const SkPoint pts[3], float w,
-                       const VectorXform& shaderXform) {
+    AI void writeConic(const SkPoint pts[3], float w) {
         this->writeConic(skvx::bit_pun<float2>(pts[0]),
                          skvx::bit_pun<float2>(pts[1]),
                          skvx::bit_pun<float2>(pts[2]),
-                         w, shaderXform);
+                         w);
     }
 
     // Write a quadratic curve that automatically converts its three control points into an
     // equivalent cubic.
-    AI void writeQuadratic(float2 p0, float2 p1, float2 p2,
-                           const VectorXform& shaderXform) {
-        float n4 = wangs_formula::quadratic_pow4(kPrecision, p0, p1, p2, shaderXform);
+    AI void writeQuadratic(float2 p0, float2 p1, float2 p2) {
+        float n4 = wangs_formula::quadratic_pow4(kPrecision, p0, p1, p2, fApproxTransform);
         if constexpr (kDiscardFlatCurves) {
             if (n4 <= 1.f) {
                 // This quad only needs one segment (e.g. a line) but we're not filling space with
@@ -411,25 +404,22 @@
                 return;
             }
         }
-        if (this->curveFitsInMaxSegments(n4)) {
-            this->writeQuadPatch(p0, p1, p2);
-        } else {
-            int numPatches = SkScalarCeilToInt(wangs_formula::root4(
-                    std::min(n4, pow4(kMaxSegmentsPerCurve)) / pow4(kMaxParametricSegments)));
+        if (int numPatches = this->accountForCurve(n4)) {
             this->chopAndWriteQuads(p0, p1, p2, numPatches);
+        } else {
+            this->writeQuadPatch(p0, p1, p2);
         }
     }
-    AI void writeQuadratic(const SkPoint pts[3],
-                           const VectorXform& shaderXform) {
+    AI void writeQuadratic(const SkPoint pts[3]) {
         this->writeQuadratic(skvx::bit_pun<float2>(pts[0]),
                              skvx::bit_pun<float2>(pts[1]),
-                             skvx::bit_pun<float2>(pts[2]),
-                             shaderXform);
+                             skvx::bit_pun<float2>(pts[2]));
     }
 
     // Write a line that is automatically converted into an equivalent cubic.
     AI void writeLine(float4 p0p1) {
-        // No chopping needed, minimum segments is always at least 1
+        // No chopping needed, a line only ever requires one segment (the minimum required already).
+        fTolerances.accumulateParametricSegments(1.f);
         if constexpr (kReplicateLineEndPoints) {
             // Visually this cubic is still a line, but 't' does not move linearly over the line,
             // so Wang's formula is more pessimistic. Shaders should avoid evaluating Wang's
@@ -452,7 +442,8 @@
     AI void writeTriangle(float2 p0, float2 p1, float2 p2) {
         // No chopping needed, the max supported segment count should always support 2 lines
         // (which form a triangle when implicitly closed).
-        SkAssertResult(this->curveFitsInMaxSegments(2.f * 2.f * 2.f * 2.f));
+        static constexpr float kTriangleSegments_pow4 = 2.f * 2.f * 2.f * 2.f;
+        fTolerances.accumulateParametricSegments(kTriangleSegments_pow4);
         this->writePatch(p0, p1, p2, {SK_FloatInfinity, SK_FloatInfinity},
                          kTriangularConicCurveType);
     }
@@ -533,14 +524,17 @@
         this->writePatch(p0, p1, p2, {w, SK_FloatInfinity}, kConicCurveType);
     }
 
-    // Returns true if curve can be written w/o needing to chop (e.g. represented by one instance)
-    bool curveFitsInMaxSegments(float n4) {
+    int accountForCurve(float n4) {
         if (n4 <= pow4(kMaxParametricSegments)) {
-            fCurrMinSegments_pow4 = std::max(n4, fCurrMinSegments_pow4);
-            return true;
+            // Record n^4 and return 0 to signal no chopping
+            fTolerances.accumulateParametricSegments(n4);
+            return 0;
         } else {
-            fCurrMinSegments_pow4 = pow4(kMaxParametricSegments);
-            return false;
+            // Clamp to max allowed segmentation for a patch and return required number of chops
+            // to achieve visual correctness.
+            fTolerances.accumulateParametricSegments(pow4(kMaxParametricSegments));
+            return SkScalarCeilToInt(wangs_formula::root4(std::min(n4, pow4(kMaxSegmentsPerCurve)) /
+                                                          pow4(kMaxParametricSegments)));
         }
     }
 
@@ -683,7 +677,15 @@
     // attribs enabled (e.g. depending on caps or batching).
     const PatchAttribs fAttribs;
 
-    float fCurrMinSegments_pow4;
+    // The 2x2 approximation of the local-to-device transform that will affect subsequently
+    // recorded curves (when fully transformed in the vertex shader).
+    wangs_formula::VectorXform fApproxTransform = {};
+    // A maximum scale factor extracted from the current approximate transform.
+    float fMaxScale = 1.0f;
+    // Tracks the linear tolerances for the worst-case written patches.
+    // TODO: This will change to be just the most-recent patch when accumulation is moved outside
+    // of PatchWriter.
+    LinearTolerances fTolerances;
 
     PatchAllocator fPatchAllocator;
     DeferredPatch  fDeferredPatch; // only usable if kTrackJoinControlPoints is true
diff --git a/src/gpu/tessellate/Tessellation.h b/src/gpu/tessellate/Tessellation.h
index d286be6..2dbe1ed 100644
--- a/src/gpu/tessellate/Tessellation.h
+++ b/src/gpu/tessellate/Tessellation.h
@@ -232,17 +232,9 @@
     }
     SkUNREACHABLE;
 }
-
-// Returns the worst-case number of edges we will need in order to draw a join of the given type.
-constexpr int WorstCaseEdgesInJoin(SkPaint::Join joinType,
-                                   float numRadialSegmentsPerRadian) {
-    int numEdges = NumFixedEdgesInJoin(joinType);
-    if (joinType == SkPaint::kRound_Join) {
-        // For round joins we need to count the radial edges on our own. Account for a worst-case
-        // join of 180 degrees (SK_ScalarPI radians).
-        numEdges += std::max(SkScalarCeilToInt(numRadialSegmentsPerRadian * SK_ScalarPI) - 1, 0);
-    }
-    return numEdges;
+constexpr int NumFixedEdgesInJoin(const StrokeParams& strokeParams) {
+    // The caller is responsible for counting the variable number of segments for round joins.
+    return strokeParams.fJoinType > 0.f ? /* miter */ 4 : /* round or bevel */ 3;
 }
 
 // Decides the number of radial segments the tessellator adds for each curve. (Uniform steps
diff --git a/src/gpu/tessellate/WangsFormula.h b/src/gpu/tessellate/WangsFormula.h
index 22cc8767..ff48150 100644
--- a/src/gpu/tessellate/WangsFormula.h
+++ b/src/gpu/tessellate/WangsFormula.h
@@ -62,7 +62,7 @@
 //
 class VectorXform {
 public:
-    AI explicit VectorXform() : fType(Type::kIdentity) {}
+    AI VectorXform() : fType(Type::kIdentity) {}
     AI explicit VectorXform(const SkMatrix& m) { *this = m; }
     AI VectorXform& operator=(const SkMatrix& m) {
         SkASSERT(!m.hasPerspective());