Revert "Add a tessellation mode that uses indirect draws"

This reverts commit 02d7cf79beec7f9fe0af4d0c4ad764264556727b.

Reason for revert:

  Perf-Debian10-Clang-GCE-CPU-AVX2-x86_64-Debug-All-ASAN
  Perf-Win2019-Clang-GCE-CPU-AVX2-x86_64-Debug-All-ASAN

Running tessellate_prepareTessellatedCubicWedges	nonrendering
../../../../../../skia/bench/TessellatePathBench.cpp:79: fatal error: "unimplemented."

Signal 5 [Trace/breakpoint trap]:
    /mnt/pd0/s/w/ir/build/nanobench(backtrace+0x3d) [0x34851fd]
    /mnt/pd0/s/w/ir/build/nanobench() [0x3db3d49]
    /lib/x86_64-linux-gnu/libc.so.6(+0x37840) [0x7fdb4722d840]
    sk_abort_no_print()
    BenchmarkTarget::caps() const
    GrTessellatePathOp::prepareTessellatedCubicWedges(GrMeshDrawOp::Target*)
    GrTessellatePathOp::TestingOnly_Benchmark::prepareTessellatedCubicWedges::runBench(GrMeshDrawOp::Target*, GrTessellatePathOp*)
    GrTessellatePathOp::TestingOnly_Benchmark::onDraw(int, SkCanvas*)
    Benchmark::draw(int, SkCanvas*)
    /mnt/pd0/s/w/ir/build/nanobench() [0x3543b81]
    /mnt/pd0/s/w/ir/build/nanobench(main+0x1347) [0x3540a17]
    /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xeb) [0x7fdb4721a09b]
    /mnt/pd0/s/w/ir/build/nanobench(_start+0x2a) [0x346429a]
Command exited with code 5


Original change's description:
> Add a tessellation mode that uses indirect draws
> 
> This mode is oftentimes faster than tessellation, and other times it
> serves as a polyfill when tessellation just isn't supported.
> 
> Change-Id: I7b3d57fd0194c6869bfe28ee53ff0ff2e43df479
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/291036
> Commit-Queue: Chris Dalton <csmartdalton@google.com>
> Reviewed-by: Brian Osman <brianosman@google.com>

TBR=egdaniel@google.com,brianosman@google.com,csmartdalton@google.com

Change-Id: I4257d5ae5c0aafeebd61d8f1b78eca755ad977ff
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/291642
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/bench/TessellatePathBench.cpp b/bench/TessellatePathBench.cpp
index 9066659..610eadc 100644
--- a/bench/TessellatePathBench.cpp
+++ b/bench/TessellatePathBench.cpp
@@ -10,8 +10,6 @@
 #include "src/core/SkPathPriv.h"
 #include "src/gpu/GrContextPriv.h"
 #include "src/gpu/GrOpFlushState.h"
-#include "src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h"
-#include "src/gpu/tessellate/GrResolveLevelCounter.h"
 #include "src/gpu/tessellate/GrTessellatePathOp.h"
 #include "src/gpu/tessellate/GrWangsFormula.h"
 #include "tools/ToolUtils.h"
@@ -81,7 +79,7 @@
 #undef UNIMPL
 
 private:
-    SkPoint fStaticVertexData[(kNumCubicsInChalkboard + 2) * 8];
+    SkPoint fStaticVertexData[(kNumCubicsInChalkboard + 2) * 5];
     GrDrawIndexedIndirectCommand fStaticDrawIndexedIndirectData[32];
     SkSTArenaAlloc<1024 * 1024> fAllocator;
 };
@@ -97,15 +95,10 @@
     const char* onGetName() override { return fName.c_str(); }
     bool isSuitableFor(Backend backend) final { return backend == kNonRendering_Backend; }
 
-    class prepareMiddleOutStencilGeometry;
-    class prepareMiddleOutStencilGeometry_indirect;
-    class prepareIndirectOuterCubics;
-    class prepareTessellatedOuterCubics;
-    class prepareTessellatedCubicWedges;
-    class wangs_formula_cubic_log2;
-    class wangs_formula_cubic_log2_scale;
-    class wangs_formula_cubic_log2_affine;
-    class middle_out_triangulation;
+    class MiddleOutInnerTrianglesBench;
+    class OuterCubicsBench;
+    class CubicWedgesBench;
+    class WangsFormulaBench;
 
 private:
     void onDraw(int loops, SkCanvas*) final {
@@ -127,91 +120,83 @@
     SkString fName;
 };
 
-#define DEF_TESS_BENCH(NAME, PATH, MATRIX, TARGET, OP) \
-    class GrTessellatePathOp::TestingOnly_Benchmark::NAME \
-            : public GrTessellatePathOp::TestingOnly_Benchmark { \
-    public: \
-        NAME() : TestingOnly_Benchmark(#NAME, (PATH), (MATRIX)) {} \
-        void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override; \
-    }; \
-    DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::NAME(); ); \
-    void GrTessellatePathOp::TestingOnly_Benchmark::NAME::runBench( \
-            GrMeshDrawOp::Target* TARGET, GrTessellatePathOp* op)
+class GrTessellatePathOp::TestingOnly_Benchmark::MiddleOutInnerTrianglesBench
+        : public GrTessellatePathOp::TestingOnly_Benchmark {
+public:
+    MiddleOutInnerTrianglesBench()
+            : TestingOnly_Benchmark("prepareMiddleOutInnerTriangles",
+                                    ToolUtils::make_star(SkRect::MakeWH(100, 100),
+                                                         kNumCubicsInChalkboard),
+                                    SkMatrix::I()) {
+    }
+    void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override {
+        int numBeziers;
+        op->prepareMiddleOutInnerTriangles(target, &numBeziers);
+    }
+};
 
-DEF_TESS_BENCH(prepareMiddleOutStencilGeometry, make_cubic_path(), SkMatrix::I(), target, op) {
-    op->prepareMiddleOutTrianglesAndCubics(target);
-}
+DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::MiddleOutInnerTrianglesBench(); );
 
-DEF_TESS_BENCH(prepareMiddleOutStencilGeometry_indirect, make_cubic_path(), SkMatrix::I(), target,
-               op) {
-    GrResolveLevelCounter resolveLevelCounter;
-    op->prepareMiddleOutTrianglesAndCubics(target, &resolveLevelCounter, true);
-}
+class GrTessellatePathOp::TestingOnly_Benchmark::OuterCubicsBench
+        : public GrTessellatePathOp::TestingOnly_Benchmark {
+public:
+    OuterCubicsBench()
+            : TestingOnly_Benchmark("prepareOuterCubics", make_cubic_path(), SkMatrix::I()) {
+    }
+    void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override {
+        op->prepareOuterCubics(target, kNumCubicsInChalkboard,
+                               CubicDataAlignment::kVertexBoundary);
+    }
+};
 
-DEF_TESS_BENCH(prepareIndirectOuterCubics, make_cubic_path(), SkMatrix::I(), target, op) {
-    GrResolveLevelCounter resolveLevelCounter;
-    resolveLevelCounter.reset(op->fPath, SkMatrix::I(), 4);
-    op->prepareIndirectOuterCubics(target, resolveLevelCounter);
-}
+DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::OuterCubicsBench(); );
 
-DEF_TESS_BENCH(prepareTessellatedOuterCubics, make_cubic_path(), SkMatrix::I(), target, op) {
-    op->prepareTessellatedOuterCubics(target, kNumCubicsInChalkboard);
-}
+class GrTessellatePathOp::TestingOnly_Benchmark::CubicWedgesBench
+        : public GrTessellatePathOp::TestingOnly_Benchmark {
+public:
+    CubicWedgesBench()
+            : TestingOnly_Benchmark("prepareCubicWedges", make_cubic_path(), SkMatrix::I()) {
+    }
+    void runBench(GrMeshDrawOp::Target* target, GrTessellatePathOp* op) override {
+        op->prepareCubicWedges(target);
+    }
+};
 
-DEF_TESS_BENCH(prepareTessellatedCubicWedges, make_cubic_path(), SkMatrix::I(), target, op) {
-    op->prepareTessellatedCubicWedges(target);
-}
+DEF_BENCH( return new GrTessellatePathOp::TestingOnly_Benchmark::CubicWedgesBench(););
 
-static void benchmark_wangs_formula_cubic_log2(const SkMatrix& matrix, const SkPath& path) {
-    int sum = 0;
-    GrVectorXform xform(matrix);
-    for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) {
-        if (verb == SkPathVerb::kCubic) {
-            sum += GrWangsFormula::cubic_log2(4, pts, xform);
+class GrTessellatePathOp::TestingOnly_Benchmark::WangsFormulaBench
+        : public GrTessellatePathOp::TestingOnly_Benchmark {
+public:
+    WangsFormulaBench(const char* suffix, const SkMatrix& matrix)
+            : TestingOnly_Benchmark(SkStringPrintf("wangs_formula_cubic_log2%s", suffix).c_str(),
+                                    make_cubic_path(), SkMatrix::I())
+            , fMatrix(matrix) {
+    }
+    void runBench(GrMeshDrawOp::Target*, GrTessellatePathOp* op) override {
+        int sum = 0;
+        GrVectorXform xform(fMatrix);
+        for (auto [verb, pts, w] : SkPathPriv::Iterate(op->fPath)) {
+            if (verb == SkPathVerb::kCubic) {
+                sum += GrWangsFormula::cubic_log2(4, pts, xform);
+            }
+        }
+        // Don't let the compiler optimize away GrWangsFormula::cubic_log2.
+        if (sum <= 0) {
+            SK_ABORT("sum should be > 0.");
         }
     }
-    // Don't let the compiler optimize away GrWangsFormula::cubic_log2.
-    if (sum <= 0) {
-        SK_ABORT("sum should be > 0.");
-    }
-}
+private:
+    SkMatrix fMatrix;
+};
 
-DEF_TESS_BENCH(wangs_formula_cubic_log2, make_cubic_path(), SkMatrix::I(), target, op) {
-    benchmark_wangs_formula_cubic_log2(op->fViewMatrix, op->fPath);
-}
-
-DEF_TESS_BENCH(wangs_formula_cubic_log2_scale, make_cubic_path(), SkMatrix::Scale(1.1f, 0.9f),
-               target, op) {
-    benchmark_wangs_formula_cubic_log2(op->fViewMatrix, op->fPath);
-}
-
-DEF_TESS_BENCH(wangs_formula_cubic_log2_affine, make_cubic_path(),
-               SkMatrix::MakeAll(.9f,0.9f,0,  1.1f,1.1f,0, 0,0,1), target, op) {
-    benchmark_wangs_formula_cubic_log2(op->fViewMatrix, op->fPath);
-}
-
-DEF_TESS_BENCH(middle_out_triangulation,
-               ToolUtils::make_star(SkRect::MakeWH(500, 500), kNumCubicsInChalkboard),
-               SkMatrix::I(), target, op) {
-    auto vertexData = static_cast<SkPoint*>(target->makeVertexSpace(
-            sizeof(SkPoint), kNumCubicsInChalkboard, nullptr, nullptr));
-    GrMiddleOutPolygonTriangulator middleOut(vertexData, 3, kNumCubicsInChalkboard + 2);
-    for (auto [verb, pts, w] : SkPathPriv::Iterate(op->fPath)) {
-        switch (verb) {
-            case SkPathVerb::kMove:
-                middleOut.closeAndMove(pts[0]);
-                break;
-            case SkPathVerb::kLine:
-                middleOut.pushVertex(pts[1]);
-                break;
-            case SkPathVerb::kClose:
-                middleOut.close();
-                break;
-            case SkPathVerb::kQuad:
-            case SkPathVerb::kConic:
-            case SkPathVerb::kCubic:
-                SkUNREACHABLE;
-        }
-        middleOut.closeAndMove(pts[0]);
-    }
-}
+DEF_BENCH(
+    return new GrTessellatePathOp::TestingOnly_Benchmark::WangsFormulaBench("", SkMatrix::I());
+);
+DEF_BENCH(
+    return new GrTessellatePathOp::TestingOnly_Benchmark::WangsFormulaBench(
+            "_scale", SkMatrix::Scale(1.1f, 0.9f));
+);
+DEF_BENCH(
+    return new GrTessellatePathOp::TestingOnly_Benchmark::WangsFormulaBench(
+            "_affine", SkMatrix::MakeAll(.9f,0.9f,0,  1.1f,1.1f,0, 0,0,1));
+);
diff --git a/gn/gpu.gni b/gn/gpu.gni
index c13461a..bc5bcf1 100644
--- a/gn/gpu.gni
+++ b/gn/gpu.gni
@@ -444,7 +444,6 @@
   "$_src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h",
   "$_src/gpu/tessellate/GrMidpointContourParser.h",
   "$_src/gpu/tessellate/GrPathShader.h",
-  "$_src/gpu/tessellate/GrResolveLevelCounter.h",
   "$_src/gpu/tessellate/GrStencilPathShader.cpp",
   "$_src/gpu/tessellate/GrStencilPathShader.h",
   "$_src/gpu/tessellate/GrTessellatePathOp.cpp",
diff --git a/src/gpu/GrPathRendererChain.cpp b/src/gpu/GrPathRendererChain.cpp
index cf7e271..ca2d7e4 100644
--- a/src/gpu/GrPathRendererChain.cpp
+++ b/src/gpu/GrPathRendererChain.cpp
@@ -32,7 +32,7 @@
         fChain.push_back(sk_make_sp<GrDashLinePathRenderer>());
     }
     if (options.fGpuPathRenderers & GpuPathRenderers::kTessellation) {
-        if (caps.drawInstancedSupport()) {
+        if (caps.shaderCaps()->tessellationSupport() && caps.drawInstancedSupport()) {
             auto tess = sk_make_sp<GrTessellationPathRenderer>(caps);
             context->priv().addOnFlushCallbackObject(tess.get());
             fChain.push_back(std::move(tess));
diff --git a/src/gpu/GrProcessor.h b/src/gpu/GrProcessor.h
index 1a16595..cb4711c6 100644
--- a/src/gpu/GrProcessor.h
+++ b/src/gpu/GrProcessor.h
@@ -162,10 +162,9 @@
         kTessellate_GrFillBoundingBoxShader_ClassID,
         kTessellate_GrFillCubicHullShader_ClassID,
         kTessellate_GrFillTriangleShader_ClassID,
-        kTessellate_GrMiddleOutCubicShader_ClassID,
+        kTessellate_GrStencilCubicShader_ClassID,
         kTessellate_GrStencilTriangleShader_ClassID,
-        kTessellate_GrTessellateCubicShader_ClassID,
-        kTessellate_GrTessellateWedgeShader_ClassID,
+        kTessellate_GrStencilWedgeShader_ClassID,
         kTestFP_ClassID,
         kTestRectOp_ClassID,
         kFlatNormalsFP_ClassID,
diff --git a/src/gpu/tessellate/GrResolveLevelCounter.h b/src/gpu/tessellate/GrResolveLevelCounter.h
deleted file mode 100644
index c74f46c..0000000
--- a/src/gpu/tessellate/GrResolveLevelCounter.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2020 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef GrResolveLevelCounter_DEFINED
-#define GrResolveLevelCounter_DEFINED
-
-#include "src/core/SkPathPriv.h"
-#include "src/gpu/tessellate/GrStencilPathShader.h"
-#include "src/gpu/tessellate/GrWangsFormula.h"
-
-// This class helps bin cubics by log2 "resolveLevel" when we don't use hardware tessellation. It is
-// composed of simple counters that track how many cubics we intend to draw at each resolveLevel,
-// and how many resolveLevels there are that have at least one cubic.
-class GrResolveLevelCounter {
-public:
-    void reset() {
-        memset(fInstanceCounts, 0, sizeof(fInstanceCounts));
-        SkDEBUGCODE(fHasCalledReset = true;)
-    }
-
-    int reset(const SkPath& path, const SkMatrix& viewMatrix, float intolerance) {
-        this->reset();
-        GrVectorXform xform(viewMatrix);
-        for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) {
-            switch (verb) {
-                case SkPathVerb::kQuad:
-                    // Quadratics get converted to cubics before rendering.
-                    this->countCubic(GrWangsFormula::quadratic_log2(intolerance, pts, xform));
-                    break;
-                case SkPathVerb::kCubic:
-                    this->countCubic(GrWangsFormula::cubic_log2(intolerance, pts, xform));
-                    break;
-                default:
-                    break;
-            }
-        }
-        return fTotalCubicInstanceCount;
-    }
-
-    void countCubic(int resolveLevel) {
-        SkASSERT(fHasCalledReset);
-        SkASSERT(resolveLevel >= 0);
-        if (resolveLevel == 0) {
-            // Cubics with 2^0=1 segments are empty (zero area). We ignore them completely.
-            return;
-        }
-        resolveLevel = std::min(resolveLevel, GrMiddleOutCubicShader::kMaxResolveLevel);
-        if (!fInstanceCounts[resolveLevel]++) {
-            ++fTotalCubicIndirectDrawCount;
-        }
-        ++fTotalCubicInstanceCount;
-    }
-
-    int operator[](int resolveLevel) const {
-        SkASSERT(fHasCalledReset);
-        SkASSERT(resolveLevel > 0);  // Empty cubics with 2^0=1 segments do not need to be drawn.
-        SkASSERT(resolveLevel <= GrMiddleOutCubicShader::kMaxResolveLevel);
-        return fInstanceCounts[resolveLevel];
-    }
-    int totalCubicInstanceCount() const { return fTotalCubicInstanceCount; }
-    int totalCubicIndirectDrawCount() const { return fTotalCubicIndirectDrawCount; }
-
-private:
-    SkDEBUGCODE(bool fHasCalledReset = false;)
-    int fInstanceCounts[GrMiddleOutCubicShader::kMaxResolveLevel + 1];
-    int fTotalCubicInstanceCount = 0;
-    int fTotalCubicIndirectDrawCount = 0;
-};
-
-#endif
diff --git a/src/gpu/tessellate/GrStencilPathShader.cpp b/src/gpu/tessellate/GrStencilPathShader.cpp
index fadc5b1..9b36c2a 100644
--- a/src/gpu/tessellate/GrStencilPathShader.cpp
+++ b/src/gpu/tessellate/GrStencilPathShader.cpp
@@ -35,7 +35,6 @@
         })";
 
 class GrStencilPathShader::Impl : public GrGLSLGeometryProcessor {
-protected:
     void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
         const auto& shader = args.fGP.cast<GrStencilPathShader>();
         args.fVaryingHandler->emitAttributes(shader);
@@ -46,7 +45,7 @@
             fViewMatrixUniform = args.fUniformHandler->addUniform(
                     nullptr, kVertex_GrShaderFlag, kFloat3x3_GrSLType, "view_matrix", &viewMatrix);
             args.fVertBuilder->codeAppendf(
-                    "float2 vertexpos = (%s * float3(inputPoint, 1)).xy;", viewMatrix);
+                    "float2 vertexpos = (%s * float3(point, 1)).xy;", viewMatrix);
             vertexPos.set(kFloat2_GrSLType, "vertexpos");
         }
 
@@ -76,8 +75,8 @@
     return new Impl;
 }
 
-SkString GrTessellateCubicShader::getTessControlShaderGLSL(const char* versionAndExtensionDecls,
-                                                           const GrShaderCaps&) const {
+SkString GrStencilCubicShader::getTessControlShaderGLSL(const char* versionAndExtensionDecls,
+                                                        const GrShaderCaps&) const {
     SkString code(versionAndExtensionDecls);
     code.append(kWangsFormulaCubicFn);
     code.append(R"(
@@ -116,8 +115,8 @@
     return code;
 }
 
-SkString GrTessellateCubicShader::getTessEvaluationShaderGLSL(const char* versionAndExtensionDecls,
-                                                              const GrShaderCaps&) const {
+SkString GrStencilCubicShader::getTessEvaluationShaderGLSL(const char* versionAndExtensionDecls,
+                                                           const GrShaderCaps&) const {
     SkString code(versionAndExtensionDecls);
     code.append(kEvalCubicFn);
     code.append(R"(
@@ -148,8 +147,8 @@
     return code;
 }
 
-SkString GrTessellateWedgeShader::getTessControlShaderGLSL(const char* versionAndExtensionDecls,
-                                                           const GrShaderCaps&) const {
+SkString GrStencilWedgeShader::getTessControlShaderGLSL(const char* versionAndExtensionDecls,
+                                                        const GrShaderCaps&) const {
     SkString code(versionAndExtensionDecls);
     code.append(kWangsFormulaCubicFn);
     code.append(R"(
@@ -184,8 +183,8 @@
     return code;
 }
 
-SkString GrTessellateWedgeShader::getTessEvaluationShaderGLSL(const char* versionAndExtensionDecls,
-                                                              const GrShaderCaps&) const {
+SkString GrStencilWedgeShader::getTessEvaluationShaderGLSL(const char* versionAndExtensionDecls,
+                                                           const GrShaderCaps&) const {
     SkString code(versionAndExtensionDecls);
     code.append(kEvalCubicFn);
     code.append(R"(
@@ -219,109 +218,3 @@
 
     return code;
 }
-
-GR_DECLARE_STATIC_UNIQUE_KEY(gMiddleOutIndexBufferKey);
-
-sk_sp<const GrGpuBuffer> GrMiddleOutCubicShader::FindOrMakeMiddleOutIndexBuffer(
-        GrResourceProvider* resourceProvider) {
-    GR_DEFINE_STATIC_UNIQUE_KEY(gMiddleOutIndexBufferKey);
-    if (auto buffer = resourceProvider->findByUniqueKey<GrGpuBuffer>(gMiddleOutIndexBufferKey)) {
-        return std::move(buffer);
-    }
-
-    // One explicit triangle at index 0, and one middle-out cubic with kMaxResolveLevel line
-    // segments beginning at index 3.
-    constexpr static int indexCount = 3 + NumVerticesAtResolveLevel(kMaxResolveLevel);
-    auto buffer = resourceProvider->createBuffer(
-            indexCount * sizeof(uint16_t), GrGpuBufferType::kIndex, kStatic_GrAccessPattern);
-    if (!buffer) {
-        return nullptr;
-    }
-
-    // We shouldn't bin and/or cache static buffers.
-    SkASSERT(buffer->size() == indexCount * sizeof(uint16_t));
-    SkASSERT(!buffer->resourcePriv().getScratchKey().isValid());
-    auto indexData = static_cast<uint16_t*>(buffer->map());
-    SkAutoTMalloc<uint16_t> stagingBuffer;
-    if (!indexData) {
-        SkASSERT(!buffer->isMapped());
-        indexData = stagingBuffer.reset(indexCount);
-    }
-
-    // Indices 0,1,2 contain special values that emit points P0, P1, and P2 respectively. (When the
-    // vertex shader is fed an index value larger than (1 << kMaxResolveLevel), it emits
-    // P[index % 4].)
-    int i = 0;
-    indexData[i++] = (1 << kMaxResolveLevel) + 4;  // % 4 == 0
-    indexData[i++] = (1 << kMaxResolveLevel) + 5;  // % 4 == 1
-    indexData[i++] = (1 << kMaxResolveLevel) + 6;  // % 4 == 2
-
-    // Starting at index 3, we triangulate a cubic with 2^kMaxResolveLevel line segments. Each
-    // index value corresponds to parametric value T=(index / 2^kMaxResolveLevel). Since the
-    // triangles are arranged in "middle-out" order, we will be able to conveniently control the
-    // resolveLevel by changing only the indexCount.
-    for (uint16_t advance = 1 << (kMaxResolveLevel - 1); advance; advance >>= 1) {
-        uint16_t T = 0;
-        do {
-            indexData[i++] = T;
-            indexData[i++] = (T += advance);
-            indexData[i++] = (T += advance);
-        } while (T != (1 << kMaxResolveLevel));
-    }
-    SkASSERT(i == indexCount);
-
-    if (buffer->isMapped()) {
-        buffer->unmap();
-    } else {
-        buffer->updateData(stagingBuffer, indexCount * sizeof(uint16_t));
-    }
-    buffer->resourcePriv().setUniqueKey(gMiddleOutIndexBufferKey);
-    return std::move(buffer);
-}
-
-class GrMiddleOutCubicShader::Impl : public GrStencilPathShader::Impl {
-    void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override {
-        const auto& shader = args.fGP.cast<GrMiddleOutCubicShader>();
-        args.fVaryingHandler->emitAttributes(shader);
-        args.fVertBuilder->defineConstant("kMaxResolveLevel", kMaxResolveLevel);
-        args.fVertBuilder->codeAppend(R"(
-                float4x2 P = float4x2(inputPoints_0_1, inputPoints_2_3);
-                float2 point;
-                if (sk_VertexID > (1 << kMaxResolveLevel)) {
-                    // This is a special index value that wants us to emit a specific point.
-                    point = P[sk_VertexID & 3];
-                } else {)");
-        // Evaluate the cubic at T=(sk_VertexID / 2^kMaxResolveLevel).
-        if (args.fShaderCaps->fpManipulationSupport()) {
-            args.fVertBuilder->codeAppend(R"(
-                    float T = ldexp(sk_VertexID, -kMaxResolveLevel);)");
-        } else {
-            args.fVertBuilder->codeAppend(R"(
-                    float T = sk_VertexID / float(1 << kMaxResolveLevel);)");
-        }
-        args.fVertBuilder->codeAppend(R"(
-                    float2 ab = mix(P[0], P[1], T);
-                    float2 bc = mix(P[1], P[2], T);
-                    float2 cd = mix(P[2], P[3], T);
-                    float2 abc = mix(ab, bc, T);
-                    float2 bcd = mix(bc, cd, T);
-                    point = mix(abc, bcd, T);
-                })");
-
-        GrShaderVar vertexPos("point", kFloat2_GrSLType);
-        if (!shader.viewMatrix().isIdentity()) {
-            const char* viewMatrix;
-            fViewMatrixUniform = args.fUniformHandler->addUniform(
-                    nullptr, kVertex_GrShaderFlag, kFloat3x3_GrSLType, "view_matrix", &viewMatrix);
-            args.fVertBuilder->codeAppendf(R"(
-                    float2 transformedPoint = (%s * float3(point, 1)).xy;)", viewMatrix);
-            vertexPos.set(kFloat2_GrSLType, "transformedPoint");
-        }
-        gpArgs->fPositionVar = vertexPos;
-        // No fragment shader.
-    }
-};
-
-GrGLSLPrimitiveProcessor* GrMiddleOutCubicShader::createGLSLInstance(const GrShaderCaps&) const {
-    return new Impl;
-}
diff --git a/src/gpu/tessellate/GrStencilPathShader.h b/src/gpu/tessellate/GrStencilPathShader.h
index c331726..0aa0b59 100644
--- a/src/gpu/tessellate/GrStencilPathShader.h
+++ b/src/gpu/tessellate/GrStencilPathShader.h
@@ -17,15 +17,16 @@
     GrStencilPathShader(ClassID classID, const SkMatrix& viewMatrix, GrPrimitiveType primitiveType,
                         int tessellationPatchVertexCount = 0)
             : GrPathShader(classID, viewMatrix, primitiveType, tessellationPatchVertexCount) {
+        constexpr static Attribute kPointAttrib = {
+                "point", kFloat2_GrVertexAttribType, kFloat2_GrSLType};
+        this->setVertexAttributes(&kPointAttrib, 1);
     }
 
-protected:
-    constexpr static Attribute kSinglePointAttrib{"inputPoint", kFloat2_GrVertexAttribType,
-                                                  kFloat2_GrSLType};
-    void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder* b) const override {
+private:
+    void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder* b) const final {
         b->add32(this->viewMatrix().isIdentity());
     }
-    GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const override;
+    GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const final;
 
     class Impl;
 };
@@ -34,21 +35,18 @@
 class GrStencilTriangleShader : public GrStencilPathShader {
 public:
     GrStencilTriangleShader(const SkMatrix& viewMatrix) : GrStencilPathShader(
-            kTessellate_GrStencilTriangleShader_ClassID, viewMatrix, GrPrimitiveType::kTriangles) {
-        this->setVertexAttributes(&kSinglePointAttrib, 1);
-    }
+            kTessellate_GrStencilTriangleShader_ClassID, viewMatrix, GrPrimitiveType::kTriangles) {}
     const char* name() const override { return "tessellate_GrStencilTriangleShader"; }
 };
 
-// Uses GPU tessellation shaders to linearize, triangulate, and render standalone closed cubics.
+// Uses GPU tessellation shaders to linearize, triangulate, and render standalone cubics. Here, a
+// "cubic" is a standalone closed contour consisting of a single cubic bezier.
 // TODO: Eventually we want to use rational cubic wedges in order to support perspective and conics.
-class GrTessellateCubicShader : public GrStencilPathShader {
+class GrStencilCubicShader : public GrStencilPathShader {
 public:
-    GrTessellateCubicShader(const SkMatrix& viewMatrix) : GrStencilPathShader(
-            kTessellate_GrTessellateCubicShader_ClassID, viewMatrix, GrPrimitiveType::kPatches, 4) {
-        this->setVertexAttributes(&kSinglePointAttrib, 1);
-    }
-    const char* name() const override { return "tessellate_GrTessellateCubicShader"; }
+    GrStencilCubicShader(const SkMatrix& viewMatrix) : GrStencilPathShader(
+            kTessellate_GrStencilCubicShader_ClassID, viewMatrix, GrPrimitiveType::kPatches, 4) {}
+    const char* name() const override { return "tessellate_GrStencilCubicShader"; }
 
 private:
     SkString getTessControlShaderGLSL(const char* versionAndExtensionDecls,
@@ -61,13 +59,11 @@
 // wedge is a 5-point patch consisting of 4 cubic control points, plus an anchor point fanning from
 // the center of the curve's resident contour.
 // TODO: Eventually we want to use rational cubic wedges in order to support perspective and conics.
-class GrTessellateWedgeShader : public GrStencilPathShader {
+class GrStencilWedgeShader : public GrStencilPathShader {
 public:
-    GrTessellateWedgeShader(const SkMatrix& viewMatrix) : GrStencilPathShader(
-            kTessellate_GrTessellateWedgeShader_ClassID, viewMatrix, GrPrimitiveType::kPatches, 5) {
-        this->setVertexAttributes(&kSinglePointAttrib, 1);
-    }
-    const char* name() const override { return "tessellate_GrTessellateWedgeShader"; }
+    GrStencilWedgeShader(const SkMatrix& viewMatrix) : GrStencilPathShader(
+            kTessellate_GrStencilWedgeShader_ClassID, viewMatrix, GrPrimitiveType::kPatches, 5) {}
+    const char* name() const override { return "tessellate_GrStencilWedgeShader"; }
 
 private:
     SkString getTessControlShaderGLSL(const char* versionAndExtensionDecls,
@@ -76,69 +72,4 @@
                                          const GrShaderCaps&) const override;
 };
 
-// Uses indirect (instanced) draws to triangulate standalone closed cubics with a "middle-out"
-// topology. The caller must compute each cubic's resolveLevel on the CPU (i.e., the log2 number of
-// line segments it will be divided into; see GrWangsFormula::cubic_log2/quadratic_log2), and then
-// sort the instance buffer by resolveLevel for efficient batching of indirect draws.
-class GrMiddleOutCubicShader : public GrStencilPathShader {
-public:
-    // Each resolveLevel linearizes the curve into 2^resolveLevel line segments. The finest
-    // supported resolveLevel is therefore 2^12=4096 line segments.
-    constexpr static int kMaxResolveLevel = 12;
-
-    // How many vertices do we need to draw in order to triangulate a cubic with 2^resolveLevel
-    // line segments?
-    constexpr static int NumVerticesAtResolveLevel(int resolveLevel) {
-        // resolveLevel=0 -> 0 line segments -> 0 triangles -> 0 vertices
-        // resolveLevel=1 -> 2 line segments -> 1 triangle -> 3 vertices
-        // resolveLevel=2 -> 4 line segments -> 3 triangles -> 9 vertices
-        // resolveLevel=3 -> 8 line segments -> 7 triangles -> 21 vertices
-        // ...
-        return ((1 << resolveLevel) - 1) * 3;
-    }
-
-    // Configures an indirect draw to render cubic instances with 2^resolveLevel evenly-spaced (in
-    // the parametric sense) line segments.
-    static GrDrawIndexedIndirectCommand MakeDrawCubicsIndirectCmd(int resolveLevel,
-                                                                  uint32_t instanceCount,
-                                                                  uint32_t baseInstance) {
-        SkASSERT(resolveLevel > 0 && resolveLevel <= kMaxResolveLevel);
-        // Starting at baseIndex=3, the index buffer triangulates a cubic with 2^kMaxResolveLevel
-        // line segments. Each index value corresponds to a parametric T value on the curve. Since
-        // the triangles are arranged in "middle-out" order, we can conveniently control the
-        // resolveLevel by changing only the indexCount.
-        uint32_t indexCount = NumVerticesAtResolveLevel(resolveLevel);
-        return {indexCount, instanceCount, 3, 0, baseInstance};
-    }
-
-    // For performance reasons we can often express triangles as an indirect cubic draw and sneak
-    // them in alongside the other indirect draws. This method configures an indirect draw to emit
-    // the triangle [P0, P1, P2] from a 4-point instance.
-    static GrDrawIndexedIndirectCommand MakeDrawTrianglesIndirectCmd(uint32_t instanceCount,
-                                                                     uint32_t baseInstance) {
-        // Indices 0,1,2 have special index values that emit points P0, P1, and P2 respectively.
-        return {3, instanceCount, 0, 0, baseInstance};
-    }
-
-    // Returns the index buffer that should be bound when drawing with this shader.
-    // (Our vertex shader uses raw index values directly, so there is no vertex buffer.)
-    static sk_sp<const GrGpuBuffer> FindOrMakeMiddleOutIndexBuffer(GrResourceProvider*);
-
-    GrMiddleOutCubicShader(const SkMatrix& viewMatrix)
-            : GrStencilPathShader(kTessellate_GrMiddleOutCubicShader_ClassID, viewMatrix,
-                                  GrPrimitiveType::kTriangles) {
-        constexpr static Attribute kInputPtsAttribs[] = {
-                {"inputPoints_0_1", kFloat4_GrVertexAttribType, kFloat4_GrSLType},
-                {"inputPoints_2_3", kFloat4_GrVertexAttribType, kFloat4_GrSLType}};
-        this->setInstanceAttributes(kInputPtsAttribs, 2);
-    }
-
-    const char* name() const override { return "tessellate_GrMiddleOutCubicShader"; }
-
-private:
-    GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const override;
-
-    class Impl;
-};
-
 #endif
diff --git a/src/gpu/tessellate/GrTessellatePathOp.cpp b/src/gpu/tessellate/GrTessellatePathOp.cpp
index 6c57881..cf79daa 100644
--- a/src/gpu/tessellate/GrTessellatePathOp.cpp
+++ b/src/gpu/tessellate/GrTessellatePathOp.cpp
@@ -14,12 +14,8 @@
 #include "src/gpu/tessellate/GrFillPathShader.h"
 #include "src/gpu/tessellate/GrMiddleOutPolygonTriangulator.h"
 #include "src/gpu/tessellate/GrMidpointContourParser.h"
-#include "src/gpu/tessellate/GrResolveLevelCounter.h"
 #include "src/gpu/tessellate/GrStencilPathShader.h"
 
-constexpr static int kMaxResolveLevel = GrMiddleOutCubicShader::kMaxResolveLevel;
-constexpr static float kTessellationIntolerance = 4;  // 1/4 of a pixel.
-
 GrTessellatePathOp::FixedFunctionFlags GrTessellatePathOp::fixedFunctionFlags() const {
     auto flags = FixedFunctionFlags::kUsesStencil;
     if (GrAAType::kNone != fAAType) {
@@ -34,69 +30,50 @@
                                       const GrXferProcessor::DstProxyView&) {
 }
 
-void GrTessellatePathOp::onPrepare(GrOpFlushState* flushState) {
+void GrTessellatePathOp::onPrepare(GrOpFlushState* state) {
+    // First check if the path is large and/or simple enough that we can actually triangulate the
+    // inner polygon(s) on the CPU. This is our fastest approach. It allows us to stencil only the
+    // curves, and then fill the internal polygons directly to the final render target, thus filling
+    // in the majority of pixels in a single render pass.
+    SkScalar scales[2];
+    SkAssertResult(fViewMatrix.getMinMaxScales(scales));  // Will fail if perspective.
+    const SkRect& bounds = fPath.getBounds();
     int numVerbs = fPath.countVerbs();
     if (numVerbs <= 0) {
         return;
     }
-
-    // First check if the path is large and/or simple enough that we can actually triangulate the
-    // inner polygon(s) on the CPU. This is our fastest approach. It allows us to stencil only the
-    // curves, and then fill the internal polygons directly to the final render target, thus drawing
-    // the majority of pixels in a single render pass.
-    SkScalar scales[2];
-    SkAssertResult(fViewMatrix.getMinMaxScales(scales));  // Will fail if perspective.
-    const SkRect& bounds = fPath.getBounds();
     float gpuFragmentWork = bounds.height() * scales[0] * bounds.width() * scales[1];
     float cpuTessellationWork = (float)numVerbs * SkNextLog2(numVerbs);  // N log N.
     if (cpuTessellationWork * 500 + (256 * 256) < gpuFragmentWork) {  // Don't try below 256x256.
-        int numCountedCubics;
+        int numCountedCurves;
         // This will fail if the inner triangles do not form a simple polygon (e.g., self
         // intersection, double winding).
-        if (this->prepareNonOverlappingInnerTriangles(flushState, &numCountedCubics)) {
-            if (!numCountedCubics) {
-                return;
-            }
-            // Always use indirect draws for cubics instead of tessellation here. Our goal in this
-            // mode is to maximize GPU performance, and the middle-out topology used by our indirect
-            // draws is easier on the rasterizer than a tessellated fan. There also seems to be a
-            // small amount of fixed tessellation overhead that this avoids.
-            //
-            // NOTE: This will count fewer cubics than above if it discards any for resolveLevel=0.
-            GrResolveLevelCounter resolveLevelCounter;
-            numCountedCubics = resolveLevelCounter.reset(fPath, fViewMatrix,
-                                                         kTessellationIntolerance);
-            this->prepareIndirectOuterCubics(flushState, resolveLevelCounter);
+        if (this->prepareNonOverlappingInnerTriangles(state, &numCountedCurves)) {
+            // Prepare cubics on an instance boundary so we can use the buffer to fill local convex
+            // hulls as well.
+            this->prepareOuterCubics(state, numCountedCurves,
+                                     CubicDataAlignment::kInstanceBoundary);
             return;
         }
     }
 
-    // When there are only a few verbs, it seems to always be fastest to make a single indirect draw
-    // that contains both the inner triangles and the outer cubics, instead of using hardware
-    // tessellation. Also take this path if tessellation is not supported.
-    bool drawTrianglesAsIndirectCubicDraw = (numVerbs < 50);
-    if (drawTrianglesAsIndirectCubicDraw ||
-        !flushState->caps().shaderCaps()->tessellationSupport()) {
-        // Prepare outer cubics with indirect draws.
-        GrResolveLevelCounter resolveLevelCounter;
-        this->prepareMiddleOutTrianglesAndCubics(flushState, &resolveLevelCounter,
-                                                 drawTrianglesAsIndirectCubicDraw);
-        return;
-    }
-
-    // Next see if we can split up the inner triangles and outer cubics into two draw calls. This
-    // allows for a more efficient inner triangle topology that can reduce the rasterizer load by a
-    // large margin on complex paths, but also causes greater CPU overhead due to the extra shader
-    // switches and draw calls.
+    // Next see if we can split up inner polygon triangles and curves, and triangulate the inner
+    // polygon(s) more efficiently. This causes greater CPU overhead due to the extra shaders and
+    // draw calls, but the better triangulation can reduce the rasterizer load by a great deal on
+    // complex paths.
     // NOTE: Raster-edge work is 1-dimensional, so we sum height and width instead of multiplying.
     float rasterEdgeWork = (bounds.height() + bounds.width()) * scales[1] * fPath.countVerbs();
-    if (rasterEdgeWork > 300 * 300) {
-        this->prepareMiddleOutTrianglesAndCubics(flushState);
+    if (rasterEdgeWork > 1000 * 1000) {
+        int numCountedCurves;
+        this->prepareMiddleOutInnerTriangles(state, &numCountedCurves);
+        // We will fill the path with a bounding box instead local cubic convex hulls, so there is
+        // no need to prepare the cubics on an instance boundary.
+        this->prepareOuterCubics(state, numCountedCurves, CubicDataAlignment::kVertexBoundary);
         return;
     }
 
     // Fastest CPU approach: emit one cubic wedge per verb, fanning out from the center.
-    this->prepareTessellatedCubicWedges(flushState);
+    this->prepareCubicWedges(state);
 }
 
 bool GrTessellatePathOp::prepareNonOverlappingInnerTriangles(GrMeshDrawOp::Target* target,
@@ -129,74 +106,40 @@
     return true;
 }
 
-void GrTessellatePathOp::prepareMiddleOutTrianglesAndCubics(
-        GrMeshDrawOp::Target* target, GrResolveLevelCounter* resolveLevelCounter,
-        bool drawTrianglesAsIndirectCubicDraw) {
+void GrTessellatePathOp::prepareMiddleOutInnerTriangles(GrMeshDrawOp::Target* target,
+                                                        int* numCountedCurves) {
     SkASSERT(!fTriangleBuffer);
     SkASSERT(!fDoStencilTriangleBuffer);
     SkASSERT(!fDoFillTriangleBuffer);
-    SkASSERT(!fCubicBuffer);
-    SkASSERT(!fStencilCubicsShader);
-    SkASSERT(!fIndirectDrawBuffer);
 
     // No initial moveTo, plus an implicit close at the end; n-2 triangles fill an n-gon.
-    int maxInnerTriangles = fPath.countVerbs() - 1;
-    int maxCubics = fPath.countVerbs();
+    // Each triangle has 3 vertices.
+    int maxVertices = (fPath.countVerbs() - 1) * 3;
 
-    SkPoint* vertexData;
-    int vertexAdvancePerTriangle;
-    if (drawTrianglesAsIndirectCubicDraw) {
-        // Allocate the triangles as 4-point instances at the beginning of the cubic buffer.
-        SkASSERT(resolveLevelCounter);
-        vertexAdvancePerTriangle = 4;
-        int baseTriangleInstance;
-        vertexData = static_cast<SkPoint*>(target->makeVertexSpace(
-                sizeof(SkPoint) * 4, maxInnerTriangles + maxCubics, &fCubicBuffer,
-                &baseTriangleInstance));
-        fBaseCubicVertex = baseTriangleInstance * 4;
-    } else {
-        // Allocate the triangles as normal 3-point instances in the triangle buffer.
-        vertexAdvancePerTriangle = 3;
-        vertexData = static_cast<SkPoint*>(target->makeVertexSpace(
-                sizeof(SkPoint), maxInnerTriangles * 3, &fTriangleBuffer, &fBaseTriangleVertex));
-    }
+    GrEagerDynamicVertexAllocator vertexAlloc(target, &fTriangleBuffer, &fBaseTriangleVertex);
+    auto* vertexData = vertexAlloc.lock<SkPoint>(maxVertices);
     if (!vertexData) {
         return;
     }
 
-    GrVectorXform xform(fViewMatrix);
-    GrMiddleOutPolygonTriangulator middleOut(vertexData, vertexAdvancePerTriangle,
-                                             fPath.countVerbs());
-    if (resolveLevelCounter) {
-        resolveLevelCounter->reset();
-    }
-    int numCountedCurves = 0;
+    constexpr static int kNumVerticesPerTriangle = 3;
+    GrMiddleOutPolygonTriangulator middleOut(vertexData, kNumVerticesPerTriangle, maxVertices);
+    int localCurveCount = 0;
     for (auto [verb, pts, w] : SkPathPriv::Iterate(fPath)) {
         switch (verb) {
             case SkPathVerb::kMove:
-                middleOut.closeAndMove(pts[0]);
+                middleOut.closeAndMove(*pts++);
                 break;
             case SkPathVerb::kLine:
                 middleOut.pushVertex(pts[1]);
                 break;
             case SkPathVerb::kQuad:
                 middleOut.pushVertex(pts[2]);
-                if (resolveLevelCounter) {
-                    // Quadratics get converted to cubics before rendering.
-                    resolveLevelCounter->countCubic(GrWangsFormula::quadratic_log2(
-                            kTessellationIntolerance, pts, xform));
-                    break;
-                }
-                ++numCountedCurves;
+                ++localCurveCount;
                 break;
             case SkPathVerb::kCubic:
                 middleOut.pushVertex(pts[3]);
-                if (resolveLevelCounter) {
-                    resolveLevelCounter->countCubic(GrWangsFormula::cubic_log2(
-                            kTessellationIntolerance, pts, xform));
-                    break;
-                }
-                ++numCountedCurves;
+                ++localCurveCount;
                 break;
             case SkPathVerb::kClose:
                 middleOut.close();
@@ -205,31 +148,13 @@
                 SkUNREACHABLE;
         }
     }
-    int triangleCount = middleOut.close();
-    SkASSERT(triangleCount <= maxInnerTriangles);
+    fTriangleVertexCount = middleOut.close() * kNumVerticesPerTriangle;
+    *numCountedCurves = localCurveCount;
 
-    if (drawTrianglesAsIndirectCubicDraw) {
-        SkASSERT(resolveLevelCounter);
-        int totalInstanceCount = triangleCount + resolveLevelCounter->totalCubicInstanceCount();
-        SkASSERT(vertexAdvancePerTriangle == 4);
-        target->putBackVertices(maxInnerTriangles + maxCubics - totalInstanceCount,
-                                sizeof(SkPoint) * 4);
-        if (totalInstanceCount) {
-            this->prepareIndirectOuterCubicsAndTriangles(target, *resolveLevelCounter, vertexData,
-                                                         triangleCount);
-        }
-    } else {
-        SkASSERT(vertexAdvancePerTriangle == 3);
-        target->putBackVertices(maxInnerTriangles - triangleCount, sizeof(SkPoint) * 3);
-        fTriangleVertexCount = triangleCount * 3;
-        if (fTriangleVertexCount) {
-            fDoStencilTriangleBuffer = true;
-        }
-        if (resolveLevelCounter) {
-            this->prepareIndirectOuterCubics(target, *resolveLevelCounter);
-        } else {
-            this->prepareTessellatedOuterCubics(target, numCountedCurves);
-        }
+    vertexAlloc.unlock(fTriangleVertexCount);
+
+    if (fTriangleVertexCount) {
+        fDoStencilTriangleBuffer = true;
     }
 }
 
@@ -252,133 +177,8 @@
     out[3] = pts[2];
 }
 
-void GrTessellatePathOp::prepareIndirectOuterCubics(
-        GrMeshDrawOp::Target* target, const GrResolveLevelCounter& resolveLevelCounter) {
-    SkASSERT(resolveLevelCounter.totalCubicInstanceCount() >= 0);
-    if (resolveLevelCounter.totalCubicInstanceCount() == 0) {
-        return;
-    }
-    // Allocate a buffer to store the cubic data.
-    SkPoint* cubicData;
-    int baseInstance;
-    cubicData = static_cast<SkPoint*>(target->makeVertexSpace(
-            sizeof(SkPoint) * 4, resolveLevelCounter.totalCubicInstanceCount(), &fCubicBuffer,
-            &baseInstance));
-    if (!cubicData) {
-        return;
-    }
-    fBaseCubicVertex = baseInstance * 4;
-    this->prepareIndirectOuterCubicsAndTriangles(target, resolveLevelCounter, cubicData,
-                                                 /*numTrianglesAtBeginningOfData=*/0);
-}
-
-void GrTessellatePathOp::prepareIndirectOuterCubicsAndTriangles(
-        GrMeshDrawOp::Target* target, const GrResolveLevelCounter& resolveLevelCounter,
-        SkPoint* cubicData, int numTrianglesAtBeginningOfData) {
-    SkASSERT(numTrianglesAtBeginningOfData + resolveLevelCounter.totalCubicInstanceCount() > 0);
-    SkASSERT(!fStencilCubicsShader);
-    SkASSERT(cubicData);
-
-    // Here we treat fCubicBuffer as an instance buffer. It should have been prepared with the base
-    // vertex on an instance boundary in order to accommodate this.
-    SkASSERT(fBaseCubicVertex % 4 == 0);
-    int baseInstance = fBaseCubicVertex >> 2;
-
-    // Start preparing the indirect draw buffer.
-    fIndirectDrawCount = resolveLevelCounter.totalCubicIndirectDrawCount();
-    if (numTrianglesAtBeginningOfData) {
-        ++fIndirectDrawCount;  // Add an indirect draw for the triangles at the beginning.
-    }
-
-    // Allocate space for the GrDrawIndexedIndirectCommand structs.
-    GrDrawIndexedIndirectCommand* indirectData = target->makeDrawIndexedIndirectSpace(
-            fIndirectDrawCount, &fIndirectDrawBuffer, &fIndirectDrawOffset);
-    if (!indirectData) {
-        SkASSERT(!fIndirectDrawBuffer);
-        return;
-    }
-
-    // Fill out the GrDrawIndexedIndirectCommand structs and determine the starting instance data
-    // location at each resolve level.
-    SkPoint* instanceLocations[kMaxResolveLevel + 1];
-    int indirectIdx = 0;
-    int runningInstanceCount = 0;
-    if (numTrianglesAtBeginningOfData) {
-        // The caller has already packed "triangleInstanceCount" triangles into 4-point instances
-        // at the beginning of the instance buffer. Add a special-case indirect draw here that will
-        // emit the triangles [P0, P1, P2] from these 4-point instances.
-        indirectData[0] = GrMiddleOutCubicShader::MakeDrawTrianglesIndirectCmd(
-                numTrianglesAtBeginningOfData, baseInstance);
-        indirectIdx = 1;
-        runningInstanceCount = numTrianglesAtBeginningOfData;
-    }
-    for (int resolveLevel = 1; resolveLevel <= kMaxResolveLevel; ++resolveLevel) {
-        instanceLocations[resolveLevel] = cubicData + runningInstanceCount * 4;
-        if (int instanceCountAtCurrLevel = resolveLevelCounter[resolveLevel]) {
-            indirectData[indirectIdx++] = GrMiddleOutCubicShader::MakeDrawCubicsIndirectCmd(
-                    resolveLevel, instanceCountAtCurrLevel, baseInstance + runningInstanceCount);
-            runningInstanceCount += instanceCountAtCurrLevel;
-        }
-    }
-
-#ifdef SK_DEBUG
-    SkASSERT(indirectIdx == fIndirectDrawCount);
-    SkASSERT(runningInstanceCount == numTrianglesAtBeginningOfData +
-                                     resolveLevelCounter.totalCubicInstanceCount());
-    SkASSERT(fIndirectDrawCount > 0);
-
-    SkPoint* endLocations[kMaxResolveLevel + 1];
-    memcpy(endLocations, instanceLocations + 1, kMaxResolveLevel * sizeof(SkPoint*));
-    int totalInstanceCount = numTrianglesAtBeginningOfData +
-                             resolveLevelCounter.totalCubicInstanceCount();
-    endLocations[kMaxResolveLevel] = cubicData + totalInstanceCount * 4;
-#endif
-
-    fCubicVertexCount = numTrianglesAtBeginningOfData * 4;
-
-    if (resolveLevelCounter.totalCubicInstanceCount()) {
-        GrVectorXform xform(fViewMatrix);
-        for (auto [verb, pts, w] : SkPathPriv::Iterate(fPath)) {
-            int level;
-            switch (verb) {
-                default:
-                    continue;
-                case SkPathVerb::kQuad:
-                    level = GrWangsFormula::quadratic_log2(kTessellationIntolerance, pts, xform);
-                    if (level == 0) {
-                        continue;
-                    }
-                    level = std::min(level, kMaxResolveLevel);
-                    quad2cubic(pts, instanceLocations[level]);
-                    break;
-                case SkPathVerb::kCubic:
-                    level = GrWangsFormula::cubic_log2(kTessellationIntolerance, pts, xform);
-                    if (level == 0) {
-                        continue;
-                    }
-                    level = std::min(level, kMaxResolveLevel);
-                    memcpy(instanceLocations[level], pts, sizeof(SkPoint) * 4);
-                    break;
-            }
-            instanceLocations[level] += 4;
-            fCubicVertexCount += 4;
-        }
-    }
-
-#ifdef SK_DEBUG
-    for (int i = 1; i <= kMaxResolveLevel; ++i) {
-        SkASSERT(instanceLocations[i] == endLocations[i]);
-    }
-    SkASSERT(fCubicVertexCount == (numTrianglesAtBeginningOfData +
-                                   resolveLevelCounter.totalCubicInstanceCount()) * 4);
-#endif
-
-    fStencilCubicsShader = target->allocator()->make<GrMiddleOutCubicShader>(fViewMatrix);
-}
-
-void GrTessellatePathOp::prepareTessellatedOuterCubics(GrMeshDrawOp::Target* target,
-                                                       int numCountedCurves) {
-    SkASSERT(numCountedCurves >= 0);
+void GrTessellatePathOp::prepareOuterCubics(GrMeshDrawOp::Target* target, int numCountedCurves,
+                                            CubicDataAlignment alignment) {
     SkASSERT(!fCubicBuffer);
     SkASSERT(!fStencilCubicsShader);
 
@@ -386,37 +186,43 @@
         return;
     }
 
+    bool instanceAligned = (alignment == CubicDataAlignment::kInstanceBoundary);
+    int instanceOrVertexStride = (instanceAligned) ? sizeof(SkPoint) * 4 : sizeof(SkPoint);
+    int instanceOrVertexCount = (instanceAligned) ? numCountedCurves : numCountedCurves * 4;
+    int baseInstanceOrVertex;
+
     auto* vertexData = static_cast<SkPoint*>(target->makeVertexSpace(
-            sizeof(SkPoint), numCountedCurves * 4, &fCubicBuffer, &fBaseCubicVertex));
+            instanceOrVertexStride, instanceOrVertexCount, &fCubicBuffer, &baseInstanceOrVertex));
     if (!vertexData) {
         return;
     }
+    fBaseCubicVertex = (instanceAligned) ? baseInstanceOrVertex * 4 : baseInstanceOrVertex;
     fCubicVertexCount = 0;
 
     for (auto [verb, pts, w] : SkPathPriv::Iterate(fPath)) {
         switch (verb) {
-            default:
-                continue;
             case SkPathVerb::kQuad:
                 SkASSERT(fCubicVertexCount < numCountedCurves * 4);
                 quad2cubic(pts, vertexData + fCubicVertexCount);
+                fCubicVertexCount += 4;
                 break;
             case SkPathVerb::kCubic:
                 SkASSERT(fCubicVertexCount < numCountedCurves * 4);
                 memcpy(vertexData + fCubicVertexCount, pts, sizeof(SkPoint) * 4);
+                fCubicVertexCount += 4;
+                break;
+            default:
                 break;
         }
-        fCubicVertexCount += 4;
     }
     SkASSERT(fCubicVertexCount == numCountedCurves * 4);
 
-    fStencilCubicsShader = target->allocator()->make<GrTessellateCubicShader>(fViewMatrix);
+    fStencilCubicsShader = target->allocator()->make<GrStencilCubicShader>(fViewMatrix);
 }
 
-void GrTessellatePathOp::prepareTessellatedCubicWedges(GrMeshDrawOp::Target* target) {
+void GrTessellatePathOp::prepareCubicWedges(GrMeshDrawOp::Target* target) {
     SkASSERT(!fCubicBuffer);
     SkASSERT(!fStencilCubicsShader);
-    SkASSERT(target->caps().shaderCaps()->tessellationSupport());
 
     // No initial moveTo, one wedge per verb, plus an implicit close at the end.
     // Each wedge has 5 vertices.
@@ -469,18 +275,18 @@
     vertexAlloc.unlock(fCubicVertexCount);
 
     if (fCubicVertexCount) {
-        fStencilCubicsShader = target->allocator()->make<GrTessellateWedgeShader>(fViewMatrix);
+        fStencilCubicsShader = target->allocator()->make<GrStencilWedgeShader>(fViewMatrix);
     }
 }
 
-void GrTessellatePathOp::onExecute(GrOpFlushState* flushState, const SkRect& chainBounds) {
-    this->drawStencilPass(flushState);
+void GrTessellatePathOp::onExecute(GrOpFlushState* state, const SkRect& chainBounds) {
+    this->drawStencilPass(state);
     if (!(Flags::kStencilOnly & fFlags)) {
-        this->drawCoverPass(flushState);
+        this->drawCoverPass(state);
     }
 }
 
-void GrTessellatePathOp::drawStencilPass(GrOpFlushState* flushState) {
+void GrTessellatePathOp::drawStencilPass(GrOpFlushState* state) {
     // Increments clockwise triangles and decrements counterclockwise. Used for "winding" fill.
     constexpr static GrUserStencilSettings kIncrDecrStencil(
         GrUserStencilSettings::StaticInitSeparate<
@@ -505,49 +311,41 @@
     if (GrAAType::kNone != fAAType) {
         initArgs.fInputFlags |= GrPipeline::InputFlags::kHWAntialias;
     }
-    if (flushState->caps().wireframeSupport() && (Flags::kWireframe & fFlags)) {
+    if (state->caps().wireframeSupport() && (Flags::kWireframe & fFlags)) {
         initArgs.fInputFlags |= GrPipeline::InputFlags::kWireframe;
     }
     SkASSERT(SkPathFillType::kWinding == fPath.getFillType() ||
              SkPathFillType::kEvenOdd == fPath.getFillType());
     initArgs.fUserStencil = (SkPathFillType::kWinding == fPath.getFillType()) ?
             &kIncrDecrStencil : &kInvertStencil;
-    initArgs.fCaps = &flushState->caps();
+    initArgs.fCaps = &state->caps();
     GrPipeline pipeline(initArgs, GrDisableColorXPFactory::MakeXferProcessor(),
-                        flushState->appliedHardClip());
+                        state->appliedHardClip());
 
     if (fDoStencilTriangleBuffer) {
         SkASSERT(fTriangleBuffer);
         GrStencilTriangleShader stencilTriangleShader(fViewMatrix);
-        GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline,
+        GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline,
                                               &stencilTriangleShader);
-        flushState->bindPipelineAndScissorClip(programInfo, this->bounds());
-        flushState->bindBuffers(nullptr, nullptr, fTriangleBuffer.get());
-        flushState->draw(fTriangleVertexCount, fBaseTriangleVertex);
+        state->bindPipelineAndScissorClip(programInfo, this->bounds());
+        state->bindBuffers(nullptr, nullptr, fTriangleBuffer.get());
+        state->draw(fTriangleVertexCount, fBaseTriangleVertex);
     }
 
     if (fStencilCubicsShader) {
-        SkASSERT(fCubicBuffer);
-        GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline,
-                                              fStencilCubicsShader);
-        flushState->bindPipelineAndScissorClip(programInfo, this->bounds());
-        if (fIndirectDrawBuffer) {
-            auto indexBuffer = GrMiddleOutCubicShader::FindOrMakeMiddleOutIndexBuffer(
-                    flushState->resourceProvider());
-            flushState->bindBuffers(indexBuffer.get(), fCubicBuffer.get(), nullptr);
-            flushState->drawIndexedIndirect(fIndirectDrawBuffer.get(), fIndirectDrawOffset,
-                                            fIndirectDrawCount);
-        } else {
-            flushState->bindBuffers(nullptr, nullptr, fCubicBuffer.get());
-            flushState->draw(fCubicVertexCount, fBaseCubicVertex);
-            if (flushState->caps().requiresManualFBBarrierAfterTessellatedStencilDraw()) {
-                flushState->gpu()->insertManualFramebufferBarrier();  // http://skbug.com/9739
-            }
-        }
+        GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline, fStencilCubicsShader);
+        state->bindPipelineAndScissorClip(programInfo, this->bounds());
+        state->bindBuffers(nullptr, nullptr, fCubicBuffer.get());
+        state->draw(fCubicVertexCount, fBaseCubicVertex);
+    }
+
+    // http://skbug.com/9739
+    if (state->caps().requiresManualFBBarrierAfterTessellatedStencilDraw()) {
+        state->gpu()->insertManualFramebufferBarrier();
     }
 }
 
-void GrTessellatePathOp::drawCoverPass(GrOpFlushState* flushState) {
+void GrTessellatePathOp::drawCoverPass(GrOpFlushState* state) {
     // Allows non-zero stencil values to pass and write a color, and resets the stencil value back
     // to zero; discards immediately on stencil values of zero.
     // NOTE: It's ok to not check the clip here because the previous stencil pass only wrote to
@@ -564,7 +362,7 @@
     GrPipeline::InitArgs initArgs;
     if (GrAAType::kNone != fAAType) {
         initArgs.fInputFlags |= GrPipeline::InputFlags::kHWAntialias;
-        if (1 == flushState->proxy()->numSamples()) {
+        if (1 == state->proxy()->numSamples()) {
             SkASSERT(GrAAType::kCoverage == fAAType);
             // We are mixed sampled. Use conservative raster to make the sample coverage mask 100%
             // at every fragment. This way we will still get a double hit on shared edges, but
@@ -573,10 +371,10 @@
             initArgs.fInputFlags |= GrPipeline::InputFlags::kConservativeRaster;
         }
     }
-    initArgs.fCaps = &flushState->caps();
-    initArgs.fDstProxyView = flushState->drawOpArgs().dstProxyView();
-    initArgs.fWriteSwizzle = flushState->drawOpArgs().writeSwizzle();
-    GrPipeline pipeline(initArgs, std::move(fProcessors), flushState->detachAppliedClip());
+    initArgs.fCaps = &state->caps();
+    initArgs.fDstProxyView = state->drawOpArgs().dstProxyView();
+    initArgs.fWriteSwizzle = state->drawOpArgs().writeSwizzle();
+    GrPipeline pipeline(initArgs, std::move(fProcessors), state->detachAppliedClip());
 
     if (fDoFillTriangleBuffer) {
         SkASSERT(fTriangleBuffer);
@@ -623,32 +421,29 @@
         }
 
         GrFillTriangleShader fillTriangleShader(fViewMatrix, fColor);
-        GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline,
-                                              &fillTriangleShader);
-        flushState->bindPipelineAndScissorClip(programInfo, this->bounds());
-        flushState->bindTextures(fillTriangleShader, nullptr, pipeline);
-        flushState->bindBuffers(nullptr, nullptr, fTriangleBuffer.get());
-        flushState->draw(fTriangleVertexCount, fBaseTriangleVertex);
+        GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline, &fillTriangleShader);
+        state->bindPipelineAndScissorClip(programInfo, this->bounds());
+        state->bindTextures(fillTriangleShader, nullptr, pipeline);
+        state->bindBuffers(nullptr, nullptr, fTriangleBuffer.get());
+        state->draw(fTriangleVertexCount, fBaseTriangleVertex);
 
         if (fStencilCubicsShader) {
-            SkASSERT(fCubicBuffer);
-
             // At this point, every pixel is filled in except the ones touched by curves. Issue a
             // final cover pass over the curves by drawing their convex hulls. This will fill in any
             // remaining samples and reset the stencil buffer.
             pipeline.setUserStencil(&kTestAndResetStencil);
             GrFillCubicHullShader fillCubicHullShader(fViewMatrix, fColor);
-            GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline,
+            GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline,
                                                   &fillCubicHullShader);
-            flushState->bindPipelineAndScissorClip(programInfo, this->bounds());
-            flushState->bindTextures(fillCubicHullShader, nullptr, pipeline);
+            state->bindPipelineAndScissorClip(programInfo, this->bounds());
+            state->bindTextures(fillCubicHullShader, nullptr, pipeline);
 
             // Here we treat fCubicBuffer as an instance buffer. It should have been prepared with
             // the base vertex on an instance boundary in order to accommodate this.
             SkASSERT((fCubicVertexCount % 4) == 0);
             SkASSERT((fBaseCubicVertex % 4) == 0);
-            flushState->bindBuffers(nullptr, fCubicBuffer.get(), nullptr);
-            flushState->drawInstanced(fCubicVertexCount >> 2, fBaseCubicVertex >> 2, 4, 0);
+            state->bindBuffers(nullptr, fCubicBuffer.get(), nullptr);
+            state->drawInstanced(fCubicVertexCount >> 2, fBaseCubicVertex >> 2, 4, 0);
         }
         return;
     }
@@ -656,10 +451,9 @@
     // There are no triangles to fill. Just draw a bounding box.
     pipeline.setUserStencil(&kTestAndResetStencil);
     GrFillBoundingBoxShader fillBoundingBoxShader(fViewMatrix, fColor, fPath.getBounds());
-    GrPathShader::ProgramInfo programInfo(flushState->writeView(), &pipeline,
-                                          &fillBoundingBoxShader);
-    flushState->bindPipelineAndScissorClip(programInfo, this->bounds());
-    flushState->bindTextures(fillBoundingBoxShader, nullptr, pipeline);
-    flushState->bindBuffers(nullptr, nullptr, nullptr);
-    flushState->draw(4, 0);
+    GrPathShader::ProgramInfo programInfo(state->writeView(), &pipeline, &fillBoundingBoxShader);
+    state->bindPipelineAndScissorClip(programInfo, this->bounds());
+    state->bindTextures(fillBoundingBoxShader, nullptr, pipeline);
+    state->bindBuffers(nullptr, nullptr, nullptr);
+    state->draw(4, 0);
 }
diff --git a/src/gpu/tessellate/GrTessellatePathOp.h b/src/gpu/tessellate/GrTessellatePathOp.h
index 89c9f7f..8ee6d21 100644
--- a/src/gpu/tessellate/GrTessellatePathOp.h
+++ b/src/gpu/tessellate/GrTessellatePathOp.h
@@ -12,11 +12,10 @@
 
 class GrAppliedHardClip;
 class GrStencilPathShader;
-class GrResolveLevelCounter;
 
-// Renders paths using a hybrid "Red Book" (stencil, then cover) method. Curves get linearized by
-// either GPU tessellation shaders or indirect draws. This Op doesn't apply analytic AA, so it
-// requires a render target that supports either MSAA or mixed samples if AA is desired.
+// Renders paths using a hybrid Red Book "stencil, then cover" method. Curves get linearized by
+// GPU tessellation shaders. This Op doesn't apply analytic AA, so it requires a render target that
+// supports either MSAA or mixed samples if AA is desired.
 class GrTessellatePathOp : public GrDrawOp {
 public:
     enum class Flags {
@@ -67,41 +66,25 @@
     // and this is not an option as it would introduce T-junctions with the outer cubics.
     bool prepareNonOverlappingInnerTriangles(GrMeshDrawOp::Target*, int* numCountedCurves);
 
-    // Produces a "Red Book" style triangulation of the SkPath's inner polygon(s) using a
-    // "middle-out" topology (See GrMiddleOutPolygonTriangulator), and then prepares outer cubics in
-    // the cubic buffer. The inner triangles and outer cubics stencilled together define the
-    // complete path.
-    //
-    // If a resolveLevel counter is provided, this method resets it and uses it to count and
-    // prepares the outer cubics as indirect draws. Otherwise they are prepared as hardware
-    // tessellation patches.
-    //
-    // If drawTrianglesAsIndirectCubicDraw is true, then the resolveLevel counter must be non-null,
-    // and we express the inner triangles as an indirect cubic draw and sneak them in alongside the
-    // other cubic draws.
-    void prepareMiddleOutTrianglesAndCubics(GrMeshDrawOp::Target*, GrResolveLevelCounter* = nullptr,
-                                            bool drawTrianglesAsIndirectCubicDraw = false);
-
-    // Prepares a list of indirect draw commands and instance data for the path's "outer cubics",
-    // converting any quadratics to cubics. An outer cubic is an independent, 4-point closed contour
-    // consisting of a single cubic curve. Stencilled together with the inner triangles, these
+    // Produces a "Red Book" style triangulation of the SkPath's inner polygon(s). The inner
+    // polygons connect the endpoints of each verb. (i.e., they are the path that would result from
+    // collapsing all curves to single lines.) Stencilled together with the outer cubics, these
     // define the complete path.
-    void prepareIndirectOuterCubics(GrMeshDrawOp::Target*, const GrResolveLevelCounter&);
+    //
+    // This method emits the inner triangles with a "middle-out" topology. Middle-out can reduce
+    // the load on the rasterizer by a great deal as compared to a linear triangle strip or fan.
+    // See GrMiddleOutPolygonTriangulator.
+    void prepareMiddleOutInnerTriangles(GrMeshDrawOp::Target*, int* numCountedCurves);
 
-    // For performance reasons we can often express triangles as an indirect cubic draw and sneak
-    // them in alongside the other indirect draws. This prepareIndirectOuterCubics variant allows
-    // the caller to provide a mapped cubic buffer with triangles already written into 4-point
-    // instances at the beginning. If numTrianglesAtBeginningOfData is nonzero, we add an extra
-    // indirect draw that renders these triangles.
-    void prepareIndirectOuterCubicsAndTriangles(GrMeshDrawOp::Target*, const GrResolveLevelCounter&,
-                                                SkPoint* cubicData,
-                                                int numTrianglesAtBeginningOfData);
+    enum class CubicDataAlignment : bool {
+        kVertexBoundary,
+        kInstanceBoundary
+    };
 
-    // Writes an array of "outer cubic" tessellation patches from each bezier in the SkPath,
-    // converting any quadratics to cubics. An outer cubic is an independent, 4-point closed contour
-    // consisting of a single cubic curve. Stencilled together with the inner triangles, these
-    // define the complete path.
-    void prepareTessellatedOuterCubics(GrMeshDrawOp::Target*, int numCountedCurves);
+    // Writes an array of "outer" cubics from each bezier in the SkPath, converting any quadratics
+    // to cubics. An outer cubic is an independent, 4-point closed contour consisting of a single
+    // cubic curve. Stencilled together with the inner triangles, these define the complete path.
+    void prepareOuterCubics(GrMeshDrawOp::Target*, int numCountedCurves, CubicDataAlignment);
 
     // Writes an array of cubic "wedges" from the SkPath, converting any lines or quadratics to
     // cubics. A wedge is an independent, 5-point closed contour consisting of 4 cubic control
@@ -109,7 +92,7 @@
     // stencilled, these wedges alone define the complete path.
     //
     // TODO: Eventually we want to use rational cubic wedges in order to support conics.
-    void prepareTessellatedCubicWedges(GrMeshDrawOp::Target*);
+    void prepareCubicWedges(GrMeshDrawOp::Target*);
 
     void onExecute(GrOpFlushState*, const SkRect& chainBounds) override;
     void drawStencilPass(GrOpFlushState*);
@@ -154,13 +137,6 @@
     int fCubicVertexCount;
     GrStencilPathShader* fStencilCubicsShader = nullptr;
 
-    // If fIndirectDrawBuffer is non-null, then we issue an indexed-indirect draw instead of using
-    // hardware tessellation. This is oftentimes faster than tessellation, and other times it serves
-    // as a polyfill when tessellation just isn't supported.
-    sk_sp<const GrBuffer> fIndirectDrawBuffer;
-    size_t fIndirectDrawOffset;
-    int fIndirectDrawCount;
-
     friend class GrOpMemoryPool;  // For ctor.
 
 public:
diff --git a/src/gpu/tessellate/GrTessellationPathRenderer.cpp b/src/gpu/tessellate/GrTessellationPathRenderer.cpp
index ab21f28..85fa43e 100644
--- a/src/gpu/tessellate/GrTessellationPathRenderer.cpp
+++ b/src/gpu/tessellate/GrTessellationPathRenderer.cpp
@@ -37,6 +37,8 @@
 
 GrPathRenderer::CanDrawPath GrTessellationPathRenderer::onCanDrawPath(
         const CanDrawPathArgs& args) const {
+    // This class should not have been added to the chain without tessellation support.
+    SkASSERT(args.fCaps->shaderCaps()->tessellationSupport());
     if (!args.fShape->style().isSimpleFill() || args.fShape->inverseFilled() ||
         args.fViewMatrix->hasPerspective()) {
         return CanDrawPath::kNo;