Revert "Lift recursive curve culling out of tessellators"

This reverts commit 75e1f4c028c956064d351196605dfa557b318d30.

Reason for revert: need to revert parent CL

Original change's description:
> Lift recursive curve culling out of tessellators
>
> We need recursive chopping/culling logic in order to draw
> astronomically large paths. But rather than do that at the same time
> the tessellators chop curves, this CL moves that logic into an
> SkPath -> SkPath transformation that runs ahead of time (and only if
> the path is extremely large to begin with). This will enable us to
> remove recursion from the tessellators and quickly determine ahead of
> time the size of buffers they need.
>
> Bug: skia:12524
> Change-Id: Ib2800fb23054f1548501811203173e58273fbc83
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/463936
> Commit-Queue: Chris Dalton <csmartdalton@google.com>
> Reviewed-by: Greg Daniel <egdaniel@google.com>

Bug: skia:12524
Change-Id: Ib075e8398767fd7b9f49e50ecee98ce47a2ad6b1
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/464296
Auto-Submit: Greg Daniel <egdaniel@google.com>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Commit-Queue: Greg Daniel <egdaniel@google.com>
diff --git a/bench/TessellateBench.cpp b/bench/TessellateBench.cpp
index 5fcbb8c..b60478b 100644
--- a/bench/TessellateBench.cpp
+++ b/bench/TessellateBench.cpp
@@ -128,7 +128,8 @@
                                            fTarget->caps().minPathVerbsForHwTessellation(),
                                            noVaryingsPipeline,
                                            fTarget->caps());
-    tess->prepare(fTarget.get(), {gAlmostIdentity, fPath}, fPath.countVerbs());
+    tess->prepare(fTarget.get(), SkRectPriv::MakeLargest(), {gAlmostIdentity, fPath},
+                  fPath.countVerbs());
 }
 
 DEF_PATH_TESS_BENCH(GrPathWedgeTessellator, make_cubic_path(8), SkMatrix::I()) {
@@ -141,7 +142,8 @@
                                            fTarget->caps().minPathVerbsForHwTessellation(),
                                            noVaryingsPipeline,
                                            fTarget->caps());
-    tess->prepare(fTarget.get(), {gAlmostIdentity, fPath}, fPath.countVerbs());
+    tess->prepare(fTarget.get(), SkRectPriv::MakeLargest(), {gAlmostIdentity, fPath},
+                  fPath.countVerbs());
 }
 
 static void benchmark_wangs_formula_cubic_log2(const SkMatrix& matrix, const SkPath& path) {
@@ -229,16 +231,19 @@
                                                                 const GrShaderCaps&,
                                                                 const SkMatrix&,
                                                                 PathStrokeList*,
-                                                                std::array<float, 2>);
+                                                                std::array<float, 2>,
+                                                                const SkRect&);
 
 static std::unique_ptr<StrokeTessellator> make_hw_tessellator(
         ShaderFlags shaderFlags,
         const GrShaderCaps& shaderCaps,
         const SkMatrix& viewMatrix,
         PathStrokeList* pathStrokeList,
-        std::array<float,2> matrixMinMaxScales) {
+        std::array<float,2> matrixMinMaxScales,
+        const SkRect& strokeCullBounds) {
     return std::make_unique<StrokeHardwareTessellator>(shaderCaps, shaderFlags, viewMatrix,
-                                                       pathStrokeList, matrixMinMaxScales);
+                                                       pathStrokeList, matrixMinMaxScales,
+                                                       strokeCullBounds);
 }
 
 static std::unique_ptr<StrokeTessellator> make_fixed_count_tessellator(
@@ -246,9 +251,11 @@
         const GrShaderCaps& shaderCaps,
         const SkMatrix& viewMatrix,
         PathStrokeList* pathStrokeList,
-        std::array<float, 2> matrixMinMaxScales) {
+        std::array<float, 2> matrixMinMaxScales,
+        const SkRect& strokeCullBounds) {
     return std::make_unique<StrokeFixedCountTessellator>(shaderCaps, shaderFlags, viewMatrix,
-                                                         pathStrokeList, matrixMinMaxScales);
+                                                         pathStrokeList, matrixMinMaxScales,
+                                                         strokeCullBounds);
 }
 
 using MakePathStrokesFn = std::vector<PathStrokeList>(*)();
@@ -350,7 +357,8 @@
 
         fTessellator = fMakeTessellatorFn(fShaderFlags, *fTarget->caps().shaderCaps(),
                                           SkMatrix::Scale(fMatrixScale, fMatrixScale),
-                                          fPathStrokes.data(), {fMatrixScale, fMatrixScale});
+                                          fPathStrokes.data(), {fMatrixScale, fMatrixScale},
+                                          SkRectPriv::MakeLargest());
     }
 
     void onDraw(int loops, SkCanvas*) final {
diff --git a/gn/gpu.gni b/gn/gpu.gni
index 72c54df..aa2d570 100644
--- a/gn/gpu.gni
+++ b/gn/gpu.gni
@@ -323,6 +323,7 @@
   "$_src/gpu/gradients/GrGradientShader.h",
 
   # tessellate
+  "$_src/gpu/tessellate/CullTest.h",
   "$_src/gpu/tessellate/PathCurveTessellator.cpp",
   "$_src/gpu/tessellate/PathCurveTessellator.h",
   "$_src/gpu/tessellate/PathTessellator.h",
@@ -335,6 +336,7 @@
   "$_src/gpu/tessellate/StrokeHardwareTessellator.h",
   "$_src/gpu/tessellate/StrokeIterator.h",
   "$_src/gpu/tessellate/StrokeTessellator.h",
+  "$_src/gpu/tessellate/WangsFormula.h",
 
   # tessellate/shaders
   "$_src/gpu/tessellate/shaders/GrPathTessellationShader.cpp",
@@ -827,9 +829,7 @@
   "$_src/gpu/BufferWriter.h",
 
   # tessellate
-  "$_src/gpu/tessellate/CullTest.h",
   "$_src/gpu/tessellate/MiddleOutPolygonTriangulator.h",
   "$_src/gpu/tessellate/Tessellation.cpp",
   "$_src/gpu/tessellate/Tessellation.h",
-  "$_src/gpu/tessellate/WangsFormula.h",
 ]
diff --git a/samplecode/SamplePathTessellators.cpp b/samplecode/SamplePathTessellators.cpp
index ab92f8a..824cbb7 100644
--- a/samplecode/SamplePathTessellators.cpp
+++ b/samplecode/SamplePathTessellators.cpp
@@ -122,7 +122,7 @@
                                                           caps);
                 break;
         }
-        fTessellator->prepare(flushState, {pathMatrix, fPath}, fPath.countVerbs());
+        fTessellator->prepare(flushState, this->bounds(), {pathMatrix, fPath}, fPath.countVerbs());
         fProgram = GrTessellationShader::MakeProgram({alloc, flushState->writeView(),
                                                      flushState->usesMSAASurface(),
                                                      &flushState->dstProxyView(),
diff --git a/src/gpu/ops/PathInnerTriangulateOp.cpp b/src/gpu/ops/PathInnerTriangulateOp.cpp
index 043c336..ef794d0 100644
--- a/src/gpu/ops/PathInnerTriangulateOp.cpp
+++ b/src/gpu/ops/PathInnerTriangulateOp.cpp
@@ -408,10 +408,8 @@
 
     if (fTessellator) {
         // Must be called after polysToTriangles() in order for fFanBreadcrumbs to be complete.
-        fTessellator->prepare(flushState,
-                              {SkMatrix::I(), fPath},
-                              fPath.countVerbs(),
-                              &fFanBreadcrumbs);
+        fTessellator->prepare(flushState, this->bounds(), {SkMatrix::I(), fPath},
+                              fPath.countVerbs(), &fFanBreadcrumbs);
     }
 
     if (!flushState->caps().shaderCaps()->vertexIDSupport()) {
diff --git a/src/gpu/ops/PathStencilCoverOp.cpp b/src/gpu/ops/PathStencilCoverOp.cpp
index 07f908d..df30fd1 100644
--- a/src/gpu/ops/PathStencilCoverOp.cpp
+++ b/src/gpu/ops/PathStencilCoverOp.cpp
@@ -259,7 +259,7 @@
         vertexAlloc.unlock(fFanVertexCount);
     }
 
-    fTessellator->prepare(flushState, *fPathDrawList, fTotalCombinedPathVerbCnt);
+    fTessellator->prepare(flushState, this->bounds(), *fPathDrawList, fTotalCombinedPathVerbCnt);
 
     if (fCoverBBoxProgram) {
         size_t instanceStride = fCoverBBoxProgram->geomProc().instanceStride();
diff --git a/src/gpu/ops/PathTessellateOp.cpp b/src/gpu/ops/PathTessellateOp.cpp
index 3be1b86..09176da 100644
--- a/src/gpu/ops/PathTessellateOp.cpp
+++ b/src/gpu/ops/PathTessellateOp.cpp
@@ -64,7 +64,7 @@
                                  &flushState->caps()}, flushState->detachAppliedClip());
         SkASSERT(fTessellator);
     }
-    fTessellator->prepare(flushState, {SkMatrix::I(), fPath}, fPath.countVerbs());
+    fTessellator->prepare(flushState, this->bounds(), {SkMatrix::I(), fPath}, fPath.countVerbs());
 }
 
 void PathTessellateOp::onExecute(GrOpFlushState* flushState, const SkRect& chainBounds) {
diff --git a/src/gpu/ops/StrokeTessellateOp.cpp b/src/gpu/ops/StrokeTessellateOp.cpp
index e9f1c77..cb83f8a 100644
--- a/src/gpu/ops/StrokeTessellateOp.cpp
+++ b/src/gpu/ops/StrokeTessellateOp.cpp
@@ -54,15 +54,15 @@
     SkRect devBounds = path.getBounds();
     if (!this->headStroke().isHairlineStyle()) {
         // Non-hairlines inflate in local path space (pre-transform).
-        float r = stroke.getInflationRadius();
-        devBounds.outset(r, r);
+        fInflationRadius = stroke.getInflationRadius();
+        devBounds.outset(fInflationRadius, fInflationRadius);
     }
     viewMatrix.mapRect(&devBounds, devBounds);
     if (this->headStroke().isHairlineStyle()) {
         // Hairlines inflate in device space (post-transform).
-        float r = SkStrokeRec::GetInflationRadius(stroke.getJoin(), stroke.getMiter(),
-                                                  stroke.getCap(), 1);
-        devBounds.outset(r, r);
+        fInflationRadius = SkStrokeRec::GetInflationRadius(stroke.getJoin(), stroke.getMiter(),
+                                                           stroke.getCap(), 1);
+        devBounds.outset(fInflationRadius, fInflationRadius);
     }
     this->setBounds(devBounds, HasAABloat::kNo, IsHairline::kNo);
 }
@@ -143,6 +143,7 @@
     fPathStrokeTail = (op->fPathStrokeTail == &op->fPathStrokeList.fNext) ? &headCopy->fNext
                                                                           : op->fPathStrokeTail;
 
+    fInflationRadius = std::max(fInflationRadius, op->fInflationRadius);
     fTotalCombinedVerbCnt += op->fTotalCombinedVerbCnt;
     return CombineResult::kMerged;
 }
@@ -186,6 +187,12 @@
         matrixMinMaxScales.fill(1);
     }
 
+    float devInflationRadius = fInflationRadius;
+    if (!this->headStroke().isHairlineStyle()) {
+        devInflationRadius *= matrixMinMaxScales[1];
+    }
+    SkRect strokeCullBounds = this->bounds().makeOutset(devInflationRadius, devInflationRadius);
+
     auto* pipeline = GrTessellationShader::MakePipeline(args, fAAType, std::move(clip),
                                                         std::move(fProcessors));
 
@@ -196,13 +203,15 @@
                                                               fShaderFlags,
                                                               fViewMatrix,
                                                               &fPathStrokeList,
-                                                              matrixMinMaxScales);
+                                                              matrixMinMaxScales,
+                                                              strokeCullBounds);
     } else {
         fTessellator = arena->make<StrokeFixedCountTessellator>(*caps.shaderCaps(),
                                                                 fShaderFlags,
                                                                 fViewMatrix,
                                                                 &fPathStrokeList,
-                                                                matrixMinMaxScales);
+                                                                matrixMinMaxScales,
+                                                                strokeCullBounds);
     }
 
     auto fillStencil = &GrUserStencilSettings::kUnused;
diff --git a/src/gpu/ops/StrokeTessellateOp.h b/src/gpu/ops/StrokeTessellateOp.h
index a8e2ef6..8934865 100644
--- a/src/gpu/ops/StrokeTessellateOp.h
+++ b/src/gpu/ops/StrokeTessellateOp.h
@@ -69,6 +69,7 @@
     ShaderFlags fShaderFlags = ShaderFlags::kNone;
     PathStrokeList fPathStrokeList;
     PathStrokeList** fPathStrokeTail = &fPathStrokeList.fNext;
+    float fInflationRadius = 0;
     int fTotalCombinedVerbCnt = 0;
     GrProcessorSet fProcessors;
     bool fNeedsStencil;
diff --git a/src/gpu/ops/TessellationPathRenderer.cpp b/src/gpu/ops/TessellationPathRenderer.cpp
index 77105b2..ce4ed0e 100644
--- a/src/gpu/ops/TessellationPathRenderer.cpp
+++ b/src/gpu/ops/TessellationPathRenderer.cpp
@@ -19,8 +19,6 @@
 #include "src/gpu/ops/PathStencilCoverOp.h"
 #include "src/gpu/ops/PathTessellateOp.h"
 #include "src/gpu/ops/StrokeTessellateOp.h"
-#include "src/gpu/tessellate/Tessellation.h"
-#include "src/gpu/tessellate/WangsFormula.h"
 #include "src/gpu/v1/SurfaceDrawContext_v1.h"
 
 namespace {
@@ -115,33 +113,6 @@
     SkPath path;
     args.fShape->asPath(&path);
 
-    const SkRect pathDevBounds = args.fViewMatrix->mapRect(args.fShape->bounds());
-    float n = wangs_formula::worst_case_cubic_pow4(kTessellationPrecision,
-                                                   pathDevBounds.width(),
-                                                   pathDevBounds.height());
-    if (n > pow4(kMaxTessellationSegmentsPerCurve)) {
-        // The path is extremely large. Pre-chop its curves to keep the number of tessellation
-        // segments tractable. This will also flatten curves that fall completely outside the
-        // viewport.
-        SkRect viewport = SkRect::Make(*args.fClipConservativeBounds);
-        if (!args.fShape->style().isSimpleFill()) {
-            // Outset the viewport to pad for the stroke width.
-            const SkStrokeRec& stroke = args.fShape->style().strokeRec();
-            float inflationRadius;
-            if (stroke.isHairlineStyle()) {
-                // SkStrokeRec::getInflationRadius() doesn't handle hairlines robustly. Instead
-                // find the inflation of an equivalent stroke in device space with a width of 1.
-                inflationRadius = SkStrokeRec::GetInflationRadius(stroke.getJoin(),
-                                                                  stroke.getMiter(),
-                                                                  stroke.getCap(), 1);
-            } else {
-                inflationRadius = stroke.getInflationRadius() * args.fViewMatrix->getMaxScale();
-            }
-            viewport.outset(inflationRadius, inflationRadius);
-        }
-        path = PreChopPathCurves(path, *args.fViewMatrix, viewport);
-    }
-
     // Handle strokes first.
     if (!args.fShape->style().isSimpleFill()) {
         SkASSERT(!path.isInverseFillType());  // See onGetStencilSupport().
@@ -155,6 +126,7 @@
     }
 
     // Handle empty paths.
+    const SkRect pathDevBounds = args.fViewMatrix->mapRect(args.fShape->bounds());
     if (pathDevBounds.isEmpty()) {
         if (path.isInverseFillType()) {
             args.fSurfaceDrawContext->drawPaint(args.fClip, std::move(args.fPaint),
@@ -201,14 +173,6 @@
     SkPath path;
     args.fShape->asPath(&path);
 
-    float n = wangs_formula::worst_case_cubic_pow4(kTessellationPrecision,
-                                                   pathDevBounds.width(),
-                                                   pathDevBounds.height());
-    if (n > pow4(kMaxTessellationSegmentsPerCurve)) {
-        SkRect viewport = SkRect::Make(*args.fClipConservativeBounds);
-        path = PreChopPathCurves(path, *args.fViewMatrix, viewport);
-    }
-
     if (args.fShape->knownToBeConvex()) {
         constexpr static GrUserStencilSettings kMarkStencil(
             GrUserStencilSettings::StaticInit<
diff --git a/src/gpu/tessellate/PathCurveTessellator.cpp b/src/gpu/tessellate/PathCurveTessellator.cpp
index bc76286..2985369 100644
--- a/src/gpu/tessellate/PathCurveTessellator.cpp
+++ b/src/gpu/tessellate/PathCurveTessellator.cpp
@@ -11,6 +11,7 @@
 #include "src/gpu/GrMeshDrawTarget.h"
 #include "src/gpu/GrResourceProvider.h"
 #include "src/gpu/geometry/GrPathUtils.h"
+#include "src/gpu/tessellate/CullTest.h"
 #include "src/gpu/tessellate/MiddleOutPolygonTriangulator.h"
 #include "src/gpu/tessellate/PathXform.h"
 #include "src/gpu/tessellate/Tessellation.h"
@@ -34,10 +35,12 @@
             , fMaxSegments_pow4(fMaxSegments_pow2 * fMaxSegments_pow2) {
     }
 
-    void setMatrices(const SkMatrix& shaderMatrix,
+    void setMatrices(const SkRect& cullBounds,
+                     const SkMatrix& shaderMatrix,
                      const SkMatrix& pathMatrix) {
         SkMatrix totalMatrix;
         totalMatrix.setConcat(shaderMatrix, pathMatrix);
+        fCullTest.set(cullBounds, totalMatrix);
         fTotalVectorXform = totalMatrix;
         fPathXform = pathMatrix;
     }
@@ -108,8 +111,12 @@
                                const SkPoint p[3]) {
         SkPoint chops[5];
         SkChopQuadAtHalf(p, chops);
-        this->writeQuadratic(shaderCaps, chunker, chops);
-        this->writeQuadratic(shaderCaps, chunker, chops + 2);
+        for (int i = 0; i < 2; ++i) {
+            const SkPoint* q = chops + i*2;
+            if (fCullTest.areVisible3(q)) {
+                this->writeQuadratic(shaderCaps, chunker, q);
+            }
+        }
         // Connect the two halves.
         this->writeTriangle(shaderCaps, chunker, chops[0], chops[2], chops[4]);
     }
@@ -120,8 +127,11 @@
         if (!conic.chopAt(.5, chops)) {
             return;
         }
-        this->writeConic(shaderCaps, chunker, chops[0].fPts, chops[0].fW);
-        this->writeConic(shaderCaps, chunker, chops[1].fPts, chops[1].fW);
+        for (int i = 0; i < 2; ++i) {
+            if (fCullTest.areVisible3(chops[i].fPts)) {
+                this->writeConic(shaderCaps, chunker, chops[i].fPts, chops[i].fW);
+            }
+        }
         // Connect the two halves.
         this->writeTriangle(shaderCaps, chunker, conic.fPts[0], chops[0].fPts[2], chops[1].fPts[2]);
     }
@@ -130,8 +140,12 @@
                            const SkPoint p[4]) {
         SkPoint chops[7];
         SkChopCubicAtHalf(p, chops);
-        this->writeCubic(shaderCaps, chunker, chops);
-        this->writeCubic(shaderCaps, chunker, chops + 3);
+        for (int i = 0; i < 2; ++i) {
+            const SkPoint* c = chops + i*3;
+            if (fCullTest.areVisible4(c)) {
+                this->writeCubic(shaderCaps, chunker, c);
+            }
+        }
         // Connect the two halves.
         this->writeTriangle(shaderCaps, chunker, chops[0], chops[3], chops[6]);
     }
@@ -149,6 +163,7 @@
         }
     }
 
+    CullTest fCullTest;
     wangs_formula::VectorXform fTotalVectorXform;
     PathXform fPathXform;
     const float fMaxSegments_pow2;
@@ -191,6 +206,7 @@
 GR_DECLARE_STATIC_UNIQUE_KEY(gFixedCountIndexBufferKey);
 
 void PathCurveTessellator::prepare(GrMeshDrawTarget* target,
+                                   const SkRect& cullBounds,
                                    const PathDrawList& pathDrawList,
                                    int totalCombinedPathVerbCnt,
                                    const BreadcrumbTriangleList* breadcrumbTriangleList) {
@@ -296,7 +312,7 @@
 
     CurveWriter curveWriter(maxSegments);
     for (auto [pathMatrix, path] : pathDrawList) {
-        curveWriter.setMatrices(fShader->viewMatrix(), pathMatrix);
+        curveWriter.setMatrices(cullBounds, fShader->viewMatrix(), pathMatrix);
         for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) {
             switch (verb) {
                 case SkPathVerb::kQuad:
diff --git a/src/gpu/tessellate/PathCurveTessellator.h b/src/gpu/tessellate/PathCurveTessellator.h
index af0c71a..6e8ca5d 100644
--- a/src/gpu/tessellate/PathCurveTessellator.h
+++ b/src/gpu/tessellate/PathCurveTessellator.h
@@ -39,9 +39,10 @@
                                       const GrCaps&);
 
     void prepare(GrMeshDrawTarget* target,
+                 const SkRect& cullBounds,
                  const PathDrawList& pathDrawList,
                  int totalCombinedPathVerbCnt) override {
-        this->prepare(target, pathDrawList, totalCombinedPathVerbCnt, nullptr);
+        this->prepare(target, cullBounds, pathDrawList, totalCombinedPathVerbCnt, nullptr);
     }
 
     // Implements PathTessellator::prepare(), also sending an additional list of breadcrumb
@@ -50,6 +51,7 @@
     // ALSO NOTE: The breadcrumb triangles do not have a matrix. These need to be pre-transformed by
     // the caller if a CPU-side transformation is desired.
     void prepare(GrMeshDrawTarget*,
+                 const SkRect& cullBounds,
                  const PathDrawList&,
                  int totalCombinedPathVerbCnt,
                  const BreadcrumbTriangleList*);
diff --git a/src/gpu/tessellate/PathTessellator.h b/src/gpu/tessellate/PathTessellator.h
index fc009c1..fae9892 100644
--- a/src/gpu/tessellate/PathTessellator.h
+++ b/src/gpu/tessellate/PathTessellator.h
@@ -55,7 +55,10 @@
     //
     // Each path's fPathMatrix in the list is applied on the CPU while the geometry is being written
     // out. This is a tool for batching, and is applied in addition to the shader's on-GPU matrix.
-    virtual void prepare(GrMeshDrawTarget*, const PathDrawList&, int totalCombinedPathVerbCnt) = 0;
+    virtual void prepare(GrMeshDrawTarget*,
+                         const SkRect& cullBounds,
+                         const PathDrawList&,
+                         int totalCombinedPathVerbCnt) = 0;
 
 #if SK_GPU_V1
     // Issues draw calls for the tessellated geometry. The caller is responsible for binding its
diff --git a/src/gpu/tessellate/PathWedgeTessellator.cpp b/src/gpu/tessellate/PathWedgeTessellator.cpp
index 50a041f..3fbcc41 100644
--- a/src/gpu/tessellate/PathWedgeTessellator.cpp
+++ b/src/gpu/tessellate/PathWedgeTessellator.cpp
@@ -10,6 +10,7 @@
 #include "src/gpu/GrMeshDrawTarget.h"
 #include "src/gpu/GrResourceProvider.h"
 #include "src/gpu/geometry/GrPathUtils.h"
+#include "src/gpu/tessellate/CullTest.h"
 #include "src/gpu/tessellate/PathXform.h"
 #include "src/gpu/tessellate/Tessellation.h"
 #include "src/gpu/tessellate/WangsFormula.h"
@@ -129,10 +130,12 @@
             , fMaxSegments_pow4(fMaxSegments_pow2 * fMaxSegments_pow2) {
     }
 
-    void setMatrices(const SkMatrix& shaderMatrix,
+    void setMatrices(const SkRect& cullBounds,
+                     const SkMatrix& shaderMatrix,
                      const SkMatrix& pathMatrix) {
         SkMatrix totalMatrix;
         totalMatrix.setConcat(shaderMatrix, pathMatrix);
+        fCullTest.set(cullBounds, totalMatrix);
         fTotalVectorXform = totalMatrix;
         fPathXform = pathMatrix;
     }
@@ -219,8 +222,14 @@
                                      SkPoint midpoint) {
         SkPoint chops[5];
         SkChopQuadAtHalf(p, chops);
-        this->writeQuadraticWedge(shaderCaps, chops, midpoint);
-        this->writeQuadraticWedge(shaderCaps, chops + 2, midpoint);
+        for (int i = 0; i < 2; ++i) {
+            const SkPoint* q = chops + i*2;
+            if (fCullTest.areVisible3(q)) {
+                this->writeQuadraticWedge(shaderCaps, q, midpoint);
+            } else {
+                this->writeFlatWedge(shaderCaps, q[0], q[2], midpoint);
+            }
+        }
     }
 
     void chopAndWriteConicWedges(const GrShaderCaps& shaderCaps,
@@ -230,8 +239,13 @@
         if (!conic.chopAt(.5, chops)) {
             return;
         }
-        this->writeConicWedge(shaderCaps, chops[0].fPts, chops[0].fW, midpoint);
-        this->writeConicWedge(shaderCaps, chops[1].fPts, chops[1].fW, midpoint);
+        for (int i = 0; i < 2; ++i) {
+            if (fCullTest.areVisible3(chops[i].fPts)) {
+                this->writeConicWedge(shaderCaps, chops[i].fPts, chops[i].fW, midpoint);
+            } else {
+                this->writeFlatWedge(shaderCaps, chops[i].fPts[0], chops[i].fPts[2], midpoint);
+            }
+        }
     }
 
     void chopAndWriteCubicWedges(const GrShaderCaps& shaderCaps,
@@ -239,11 +253,18 @@
                                  SkPoint midpoint) {
         SkPoint chops[7];
         SkChopCubicAtHalf(p, chops);
-        this->writeCubicWedge(shaderCaps, chops, midpoint);
-        this->writeCubicWedge(shaderCaps, chops + 3, midpoint);
+        for (int i = 0; i < 2; ++i) {
+            const SkPoint* c = chops + i*3;
+            if (fCullTest.areVisible4(c)) {
+                this->writeCubicWedge(shaderCaps, c, midpoint);
+            } else {
+                this->writeFlatWedge(shaderCaps, c[0], c[3], midpoint);
+            }
+        }
     }
 
     GrVertexChunkBuilder fChunker;
+    CullTest fCullTest;
     wangs_formula::VectorXform fTotalVectorXform;
     PathXform fPathXform;
     const float fMaxSegments_pow2;
@@ -281,6 +302,7 @@
 GR_DECLARE_STATIC_UNIQUE_KEY(gFixedCountIndexBufferKey);
 
 void PathWedgeTessellator::prepare(GrMeshDrawTarget* target,
+                                   const SkRect& cullBounds,
                                    const PathDrawList& pathDrawList,
                                    int totalCombinedPathVerbCnt) {
     SkASSERT(fVertexChunkArray.empty());
@@ -305,7 +327,7 @@
 
     WedgeWriter wedgeWriter(target, &fVertexChunkArray, patchStride, wedgeAllocCount, maxSegments);
     for (auto [pathMatrix, path] : pathDrawList) {
-        wedgeWriter.setMatrices(fShader->viewMatrix(), pathMatrix);
+        wedgeWriter.setMatrices(cullBounds, fShader->viewMatrix(), pathMatrix);
         MidpointContourParser parser(path);
         while (parser.parseNextContour()) {
             SkPoint midpoint = wedgeWriter.pathXform().mapPoint(parser.currentMidpoint());
diff --git a/src/gpu/tessellate/PathWedgeTessellator.h b/src/gpu/tessellate/PathWedgeTessellator.h
index 38d9c6f..5e1320a 100644
--- a/src/gpu/tessellate/PathWedgeTessellator.h
+++ b/src/gpu/tessellate/PathWedgeTessellator.h
@@ -31,7 +31,10 @@
                                  const GrPipeline&,
                                  const GrCaps&);
 
-    void prepare(GrMeshDrawTarget*, const PathDrawList&, int totalCombinedPathVerbCnt) override;
+    void prepare(GrMeshDrawTarget*,
+                 const SkRect& cullBounds,
+                 const PathDrawList&,
+                 int totalCombinedPathVerbCnt) override;
 
 
 #if SK_GPU_V1
diff --git a/src/gpu/tessellate/StrokeFixedCountTessellator.cpp b/src/gpu/tessellate/StrokeFixedCountTessellator.cpp
index daadb94..5903029 100644
--- a/src/gpu/tessellate/StrokeFixedCountTessellator.cpp
+++ b/src/gpu/tessellate/StrokeFixedCountTessellator.cpp
@@ -11,6 +11,7 @@
 #include "src/gpu/GrMeshDrawTarget.h"
 #include "src/gpu/GrResourceProvider.h"
 #include "src/gpu/geometry/GrPathUtils.h"
+#include "src/gpu/tessellate/CullTest.h"
 #include "src/gpu/tessellate/StrokeIterator.h"
 #include "src/gpu/tessellate/WangsFormula.h"
 
@@ -32,16 +33,13 @@
 public:
     using ShaderFlags = StrokeTessellator::ShaderFlags;
 
-    InstanceWriter(const GrShaderCaps* shaderCaps,
-                   ShaderFlags shaderFlags,
-                   GrMeshDrawTarget* target,
-                   float matrixMaxScale,
-                   const SkMatrix& viewMatrix,
-                   GrVertexChunkArray* patchChunks,
-                   size_t instanceStride,
-                   int minInstancesPerChunk)
+    InstanceWriter(const GrShaderCaps* shaderCaps, ShaderFlags shaderFlags,
+                   GrMeshDrawTarget* target, float matrixMaxScale, const SkRect& strokeCullBounds,
+                   const SkMatrix& viewMatrix, GrVertexChunkArray* patchChunks,
+                   size_t instanceStride, int minInstancesPerChunk)
             : fShaderCaps(shaderCaps)
             , fShaderFlags(shaderFlags)
+            , fCullTest(strokeCullBounds, viewMatrix)
             , fChunkBuilder(target, patchChunks, instanceStride, minInstancesPerChunk)
             , fParametricPrecision(StrokeTolerances::CalcParametricPrecision(matrixMaxScale)) {
     }
@@ -151,8 +149,14 @@
     void chopQuadraticTo(const SkPoint p[3]) {
         SkPoint chops[5];
         SkChopQuadAtHalf(p, chops);
-        this->quadraticTo(chops);
-        this->quadraticTo(chops + 2);
+        for (int i = 0; i < 2; ++i) {
+            const SkPoint* q = chops + i*2;
+            if (fCullTest.areVisible3(q)) {
+                this->quadraticTo(q);
+            } else {
+                this->discardStroke(q, 3);
+            }
+        }
     }
 
     void chopConicTo(const SkConic& conic) {
@@ -160,15 +164,26 @@
         if (!conic.chopAt(.5f, chops)) {
             return;
         }
-        this->conicTo(chops[0].fPts, chops[0].fW);
-        this->conicTo(chops[1].fPts, chops[1].fW);
+        for (int i = 0; i < 2; ++i) {
+            if (fCullTest.areVisible3(chops[i].fPts)) {
+                this->conicTo(chops[i].fPts, chops[i].fW);
+            } else {
+                this->discardStroke(chops[i].fPts, 3);
+            }
+        }
     }
 
     void chopCubicConvex180To(const SkPoint p[4]) {
         SkPoint chops[7];
         SkChopCubicAtHalf(p, chops);
-        this->cubicConvex180To(chops);
-        this->cubicConvex180To(chops + 3);
+        for (int i = 0; i < 2; ++i) {
+            const SkPoint* c = chops + i*3;
+            if (fCullTest.areVisible4(c)) {
+                this->cubicConvex180To(c);
+            } else {
+                this->discardStroke(c, 4);
+            }
+        }
     }
 
     SK_ALWAYS_INLINE void writeStroke(const SkPoint p[4], SkPoint endControlPoint,
@@ -208,6 +223,7 @@
 
     const GrShaderCaps* fShaderCaps;
     const ShaderFlags fShaderFlags;
+    const CullTest fCullTest;
     GrVertexChunkBuilder fChunkBuilder;
     const float fParametricPrecision;
     float fMaxParametricSegments_pow4 = 1;
@@ -242,10 +258,11 @@
                                                          ShaderFlags shaderFlags,
                                                          const SkMatrix& viewMatrix,
                                                          PathStrokeList* pathStrokeList,
-                                                         std::array<float,2> matrixMinMaxScales)
+                                                         std::array<float,2> matrixMinMaxScales,
+                                                         const SkRect& strokeCullBounds)
         : StrokeTessellator(shaderCaps, GrStrokeTessellationShader::Mode::kFixedCount, shaderFlags,
                             kMaxParametricSegments_log2, viewMatrix, pathStrokeList,
-                            matrixMinMaxScales) {
+                            matrixMinMaxScales, strokeCullBounds) {
 }
 
 GR_DECLARE_STATIC_UNIQUE_KEY(gVertexIDFallbackBufferKey);
@@ -261,8 +278,8 @@
     int capPreallocCount = 8;
     int minInstancesPerChunk = strokePreallocCount + capPreallocCount;
     InstanceWriter instanceWriter(target->caps().shaderCaps(), fShader.flags(), target,
-                                  fMatrixMinMaxScales[1], fShader.viewMatrix(), &fInstanceChunks,
-                                  fShader.instanceStride(), minInstancesPerChunk);
+                                  fMatrixMinMaxScales[1], fStrokeCullBounds, fShader.viewMatrix(),
+                                  &fInstanceChunks, fShader.instanceStride(), minInstancesPerChunk);
 
     if (!fShader.hasDynamicStroke()) {
         // Strokes are static. Calculate tolerances once.
diff --git a/src/gpu/tessellate/StrokeFixedCountTessellator.h b/src/gpu/tessellate/StrokeFixedCountTessellator.h
index 0ef9ec3..982e2d4 100644
--- a/src/gpu/tessellate/StrokeFixedCountTessellator.h
+++ b/src/gpu/tessellate/StrokeFixedCountTessellator.h
@@ -21,7 +21,8 @@
                                 ShaderFlags,
                                 const SkMatrix&,
                                 PathStrokeList*,
-                                std::array<float, 2> matrixMinMaxScales);
+                                std::array<float, 2> matrixMinMaxScales,
+                                const SkRect& strokeCullBounds);
 
     void prepare(GrMeshDrawTarget*, int totalCombinedVerbCnt) override;
 #if SK_GPU_V1
diff --git a/src/gpu/tessellate/StrokeHardwareTessellator.cpp b/src/gpu/tessellate/StrokeHardwareTessellator.cpp
index 4c32270..fa6b563 100644
--- a/src/gpu/tessellate/StrokeHardwareTessellator.cpp
+++ b/src/gpu/tessellate/StrokeHardwareTessellator.cpp
@@ -12,6 +12,7 @@
 #include "src/gpu/GrMeshDrawTarget.h"
 #include "src/gpu/GrRecordingContextPriv.h"
 #include "src/gpu/geometry/GrPathUtils.h"
+#include "src/gpu/tessellate/CullTest.h"
 #include "src/gpu/tessellate/WangsFormula.h"
 
 #if SK_GPU_V1
@@ -57,14 +58,11 @@
         kBowtie = SkPaint::kLast_Join + 1  // Double sided round join.
     };
 
-    PatchWriter(ShaderFlags shaderFlags,
-                GrMeshDrawTarget* target,
-                const SkMatrix& viewMatrix,
-                float matrixMaxScale,
-                GrVertexChunkArray* patchChunks,
-                size_t patchStride,
-                int minPatchesPerChunk)
+    PatchWriter(ShaderFlags shaderFlags, GrMeshDrawTarget* target,
+                const SkRect& strokeCullBounds, const SkMatrix& viewMatrix, float matrixMaxScale,
+                GrVertexChunkArray* patchChunks, size_t patchStride, int minPatchesPerChunk)
             : fShaderFlags(shaderFlags)
+            , fCullTest(strokeCullBounds, viewMatrix)
             , fChunkBuilder(target, patchChunks, patchStride, minPatchesPerChunk)
             // Subtract 2 because the tessellation shader chops every cubic at two locations, and
             // each chop has the potential to introduce an extra segment.
@@ -358,6 +356,11 @@
     // tessellation patches.
     void internalConicPatchesTo(JoinType prevJoinType, const SkPoint p[3], float w,
                                 int maxDepth = -1) {
+        if (!fCullTest.areVisible3(p)) {
+            // The stroke is out of view. Discard it.
+            this->discardStroke(p, 3);
+            return;
+        }
         // Zero-length paths need special treatment because they are spec'd to behave differently.
         // If the control point is colocated on an endpoint then this might end up being the case.
         // Fall back on a lineTo and let it make the final check.
@@ -437,6 +440,11 @@
     // tessellation patches. The cubic must be convex and must not rotate more than 180 degrees.
     void internalCubicConvex180PatchesTo(JoinType prevJoinType, const SkPoint p[4],
                                          int maxDepth = -1) {
+        if (!fCullTest.areVisible4(p)) {
+            // The stroke is out of view. Discard it.
+            this->discardStroke(p, 4);
+            return;
+        }
         // The stroke tessellation shader assigns special meaning to p0==p1==p2 and p1==p2==p3. If
         // this is the case then we need to rewrite the cubic.
         if (p[1] == p[2] && (p[1] == p[0] || p[1] == p[3])) {
@@ -621,6 +629,7 @@
     }
 
     const ShaderFlags fShaderFlags;
+    const CullTest fCullTest;
     GrVertexChunkBuilder fChunkBuilder;
 
     // The maximum number of tessellation segments the hardware can emit for a single patch.
@@ -701,10 +710,11 @@
                                                      ShaderFlags shaderFlags,
                                                      const SkMatrix& viewMatrix,
                                                      PathStrokeList* pathStrokeList,
-                                                     std::array<float,2> matrixMinMaxScales)
+                                                     std::array<float,2> matrixMinMaxScales,
+                                                     const SkRect& strokeCullBounds)
         : StrokeTessellator(shaderCaps, GrStrokeTessellationShader::Mode::kHardwareTessellation,
                             shaderFlags, SkNextLog2(shaderCaps.maxTessellationSegments()),
-                            viewMatrix, pathStrokeList, matrixMinMaxScales) {
+                            viewMatrix, pathStrokeList, matrixMinMaxScales, strokeCullBounds) {
 }
 
 void StrokeHardwareTessellator::prepare(GrMeshDrawTarget* target, int totalCombinedVerbCnt) {
@@ -714,8 +724,9 @@
     int strokePreallocCount = totalCombinedVerbCnt * 5/4;
     int capPreallocCount = 8;
     int minPatchesPerChunk = strokePreallocCount + capPreallocCount;
-    PatchWriter patchWriter(fShader.flags(), target, fShader.viewMatrix(), fMatrixMinMaxScales[1],
-                            &fPatchChunks, fShader.vertexStride(), minPatchesPerChunk);
+    PatchWriter patchWriter(fShader.flags(), target, fStrokeCullBounds, fShader.viewMatrix(),
+                            fMatrixMinMaxScales[1], &fPatchChunks, fShader.vertexStride(),
+                            minPatchesPerChunk);
 
     if (!fShader.hasDynamicStroke()) {
         // Strokes are static. Calculate tolerances once.
diff --git a/src/gpu/tessellate/StrokeHardwareTessellator.h b/src/gpu/tessellate/StrokeHardwareTessellator.h
index 25bdc78..0adf291 100644
--- a/src/gpu/tessellate/StrokeHardwareTessellator.h
+++ b/src/gpu/tessellate/StrokeHardwareTessellator.h
@@ -22,7 +22,8 @@
                               ShaderFlags shaderFlags,
                               const SkMatrix& viewMatrix,
                               PathStrokeList* pathStrokeList,
-                              std::array<float,2> matrixMinMaxScales);
+                              std::array<float,2> matrixMinMaxScales,
+                              const SkRect& strokeCullBounds);
 
     void prepare(GrMeshDrawTarget*, int totalCombinedVerbCnt) override;
 #if SK_GPU_V1
diff --git a/src/gpu/tessellate/StrokeTessellator.h b/src/gpu/tessellate/StrokeTessellator.h
index cce45ee..ea8227e 100644
--- a/src/gpu/tessellate/StrokeTessellator.h
+++ b/src/gpu/tessellate/StrokeTessellator.h
@@ -36,11 +36,13 @@
                       int8_t maxParametricSegments_log2,
                       const SkMatrix& viewMatrix,
                       PathStrokeList* pathStrokeList,
-                      std::array<float, 2> matrixMinMaxScales)
+                      std::array<float, 2> matrixMinMaxScales,
+                      const SkRect& strokeCullBounds)
             : fShader(shaderCaps, shaderMode, shaderFlags, viewMatrix, pathStrokeList->fStroke,
                       pathStrokeList->fColor, maxParametricSegments_log2)
             , fPathStrokeList(pathStrokeList)
-            , fMatrixMinMaxScales(matrixMinMaxScales) {
+            , fMatrixMinMaxScales(matrixMinMaxScales)
+            , fStrokeCullBounds(strokeCullBounds) {
     }
 
     const GrTessellationShader* shader() const { return &fShader; }
@@ -60,6 +62,7 @@
     GrStrokeTessellationShader fShader;
     PathStrokeList* fPathStrokeList;
     const std::array<float,2> fMatrixMinMaxScales;
+    const SkRect fStrokeCullBounds;  // See SkStrokeRec::inflationRadius.
 };
 
 // These tolerances decide the number of parametric and radial segments the tessellator will
diff --git a/src/gpu/tessellate/Tessellation.cpp b/src/gpu/tessellate/Tessellation.cpp
index 324ccbb..cb8871e 100644
--- a/src/gpu/tessellate/Tessellation.cpp
+++ b/src/gpu/tessellate/Tessellation.cpp
@@ -8,120 +8,12 @@
 #include "src/gpu/tessellate/Tessellation.h"
 
 #include "include/core/SkPath.h"
-#include "src/core/SkGeometry.h"
 #include "src/core/SkPathPriv.h"
 #include "src/gpu/BufferWriter.h"
-#include "src/gpu/tessellate/CullTest.h"
 #include "src/gpu/tessellate/MiddleOutPolygonTriangulator.h"
-#include "src/gpu/tessellate/WangsFormula.h"
 
 namespace skgpu {
 
-namespace {
-
-// Writes a new path, chopping as necessary so no verbs require more segments than
-// kMaxTessellationSegmentsPerCurve. Curves completely outside the viewport are flattened into
-// lines.
-class PathChopper {
-public:
-    PathChopper(const SkMatrix& matrix, const SkRect& viewport)
-            : fCullTest(viewport, matrix)
-            , fVectorXform(matrix) {
-        fPath.setIsVolatile(true);
-    }
-
-    SkPath path() const { return fPath; }
-
-    void moveTo(SkPoint p) { fPath.moveTo(p); }
-    void lineTo(SkPoint p1) { fPath.lineTo(p1); }
-    void close() { fPath.close(); }
-
-    void quadTo(const SkPoint p[3]) {
-        if (!fCullTest.areVisible3(p)) {
-            this->lineTo(p[2]);
-            return;
-        }
-        float n = wangs_formula::quadratic_pow4(kTessellationPrecision, p, fVectorXform);
-        if (n > pow4(kMaxTessellationSegmentsPerCurve)) {
-            SkPoint chops[5];
-            SkChopQuadAtHalf(p, chops);
-            this->quadTo(chops);
-            this->quadTo(chops + 2);
-            return;
-        }
-        fPath.quadTo(p[1], p[2]);
-    }
-
-    void conicTo(const SkPoint p[3], float w) {
-        if (!fCullTest.areVisible3(p)) {
-            this->lineTo(p[2]);
-            return;
-        }
-        float n = wangs_formula::conic_pow2(kTessellationPrecision, p, w, fVectorXform);
-        if (n > pow2(kMaxTessellationSegmentsPerCurve)) {
-            SkConic chops[2];
-            if (!SkConic(p,w).chopAt(.5, chops)) {
-                this->lineTo(p[2]);
-                return;
-            }
-            this->conicTo(chops[0].fPts, chops[0].fW);
-            this->conicTo(chops[1].fPts, chops[1].fW);
-            return;
-        }
-        fPath.conicTo(p[1], p[2], w);
-    }
-
-    void cubicTo(const SkPoint p[4]) {
-        if (!fCullTest.areVisible4(p)) {
-            this->lineTo(p[3]);
-            return;
-        }
-        float n = wangs_formula::cubic_pow4(kTessellationPrecision, p, fVectorXform);
-        if (n > pow4(kMaxTessellationSegmentsPerCurve)) {
-            SkPoint chops[7];
-            SkChopCubicAtHalf(p, chops);
-            this->cubicTo(chops);
-            this->cubicTo(chops + 3);
-            return;
-        }
-        fPath.cubicTo(p[1], p[2], p[3]);
-    }
-
-private:
-    const CullTest fCullTest;
-    const wangs_formula::VectorXform fVectorXform;
-    SkPath fPath;
-};
-
-}  // namespace
-
-SkPath PreChopPathCurves(const SkPath& path, const SkMatrix& matrix, const SkRect& viewport) {
-    PathChopper chopper(matrix, viewport);
-    for (auto [verb, p, w] : SkPathPriv::Iterate(path)) {
-        switch (verb) {
-            case SkPathVerb::kMove:
-                chopper.moveTo(p[0]);
-                break;
-            case SkPathVerb::kLine:
-                chopper.lineTo(p[1]);
-                break;
-            case SkPathVerb::kQuad:
-                chopper.quadTo(p);
-                break;
-            case SkPathVerb::kConic:
-                chopper.conicTo(p, *w);
-                break;
-            case SkPathVerb::kCubic:
-                chopper.cubicTo(p);
-                break;
-            case SkPathVerb::kClose:
-                chopper.close();
-                break;
-        }
-    }
-    return chopper.path();
-}
-
 VertexWriter WritePathMiddleOutInnerFan(VertexWriter&& vertexWriter,
                                         int pad32Count,
                                         uint32_t pad32Value,
diff --git a/src/gpu/tessellate/Tessellation.h b/src/gpu/tessellate/Tessellation.h
index a16599e..85d2107 100644
--- a/src/gpu/tessellate/Tessellation.h
+++ b/src/gpu/tessellate/Tessellation.h
@@ -11,9 +11,8 @@
 #include "include/core/SkTypes.h"
 #include "include/private/SkVx.h"
 
-class SkMatrix;
 class SkPath;
-struct SkRect;
+class SkMatrix;
 
 namespace skgpu {
 
@@ -45,21 +44,9 @@
     return x[0] - x[1];
 }
 
-SK_MAYBE_UNUSED constexpr SK_ALWAYS_INLINE float pow2(float x) { return x*x; }
-SK_MAYBE_UNUSED constexpr SK_ALWAYS_INLINE float pow4(float x) { return pow2(x*x); }
-
-// Don't tessellate paths that might have an individual curve that requires more than 1024 segments.
-// (See wangs_formula::worst_case_cubic). If this is the case, call "PreChopPathCurves" first.
-constexpr static float kMaxTessellationSegmentsPerCurve SK_MAYBE_UNUSED = 1024;
-
-// Returns a new path, equivalent to 'path' within the given viewport, whose verbs can all be drawn
-// with 'maxSegments' tessellation segments or fewer. Curves and chops that fall completely outside
-// the viewport are flattened into lines.
-SkPath PreChopPathCurves(const SkPath&, const SkMatrix&, const SkRect& viewport);
-
-// Writes out the path's inner fan using a middle-out topology. Writes 3 points per triangle.
-// Additionally writes out "pad32Count" repetitions of "pad32Value" after each triangle. Set
-// pad32Count to 0 if the triangles are to be tightly packed.
+// Writes out the path's inner fan using a middle-out topology. Writes 3 SkPoints per triangle to
+// the VertexWriter. Additionally writes out "pad32Count" repetitions of "pad32Value" after each
+// triangle. Set pad32Count to 0 if the triangles are to be tightly packed.
 VertexWriter WritePathMiddleOutInnerFan(VertexWriter&&,
                                         int pad32Count,
                                         uint32_t pad32Value,
diff --git a/src/gpu/tessellate/WangsFormula.h b/src/gpu/tessellate/WangsFormula.h
index 4957a91..11d50f8 100644
--- a/src/gpu/tessellate/WangsFormula.h
+++ b/src/gpu/tessellate/WangsFormula.h
@@ -173,25 +173,19 @@
 }
 
 // Returns the maximum number of line segments a cubic with the given device-space bounding box size
-// would ever need to be divided into, raised to the 4th power. This is simply a special case of the
-// cubic formula where we maximize its value by placing control points on specific corners of the
-// bounding box.
-AI float worst_case_cubic_pow4(float precision, float devWidth, float devHeight) {
-    float kk = length_term_pow2<3>(precision);
-    return 4*kk * (devWidth * devWidth + devHeight * devHeight);
-}
-
-// Returns the maximum number of line segments a cubic with the given device-space bounding box size
-// would ever need to be divided into.
+// would ever need to be divided into. This is simply a special case of the cubic formula where we
+// maximize its value by placing control points on specific corners of the bounding box.
 AI float worst_case_cubic(float precision, float devWidth, float devHeight) {
-    return root4(worst_case_cubic_pow4(precision, devWidth, devHeight));
+    float k = length_term<3>(precision);
+    return sqrtf(2*k * SkVector::Length(devWidth, devHeight));
 }
 
 // Returns the maximum log2 number of line segments a cubic with the given device-space bounding box
 // size would ever need to be divided into.
 AI int worst_case_cubic_log2(float precision, float devWidth, float devHeight) {
+    float kk = length_term_pow2<3>(precision);
     // nextlog16(x) == ceil(log2(sqrt(sqrt(x))))
-    return nextlog16(worst_case_cubic_pow4(precision, devWidth, devHeight));
+    return nextlog16(4*kk * (devWidth * devWidth + devHeight * devHeight));
 }
 
 // Returns Wang's formula specialized for a conic curve, raised to the second power.
diff --git a/tests/WangsFormulaTest.cpp b/tests/WangsFormulaTest.cpp
index 86b31eb..9e55d3f 100644
--- a/tests/WangsFormulaTest.cpp
+++ b/tests/WangsFormulaTest.cpp
@@ -342,10 +342,6 @@
             check_worst_case_cubic(pts);
         });
     }
-    // Make sure overflow saturates at infinity (not NaN).
-    constexpr static float inf = std::numeric_limits<float>::infinity();
-    REPORTER_ASSERT(r, wangs_formula::worst_case_cubic_pow4(kPrecision, inf, inf) == inf);
-    REPORTER_ASSERT(r, wangs_formula::worst_case_cubic(kPrecision, inf, inf) == inf);
 }
 
 // Ensure Wang's formula for quads produces max error within tolerance.