Update GrPathTessellator::prepare() to take a list of paths

This allows us to batch multiple paths together in a single
tessellation. The first user will be the atlas, but the convex
renderer is also a good candidate.

Bug: skia:12258
Change-Id: I4d415d32bbf423cfd9b7ddf2543c21371936da90
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/433776
Commit-Queue: Chris Dalton <csmartdalton@google.com>
Reviewed-by: Brian Salomon <bsalomon@google.com>
diff --git a/bench/TessellateBench.cpp b/bench/TessellateBench.cpp
index 25468aa..b646cfe 100644
--- a/bench/TessellateBench.cpp
+++ b/bench/TessellateBench.cpp
@@ -123,7 +123,8 @@
                                              GrPathCurveTessellator::DrawInnerFan::kNo,
                                              fTarget->caps().minPathVerbsForHwTessellation(),
                                              noVaryingsPipeline, fTarget->caps());
-    tess->prepare(fTarget.get(), SkRectPriv::MakeLargest(), gAlmostIdentity, fPath, nullptr);
+    tess->prepare(fTarget.get(), SkRectPriv::MakeLargest(), {gAlmostIdentity, fPath},
+                  fPath.countVerbs());
 }
 
 DEF_PATH_TESS_BENCH(GrPathWedgeTessellator, make_cubic_path(8), SkMatrix::I()) {
@@ -133,7 +134,8 @@
     auto tess = GrPathWedgeTessellator::Make(&arena, fMatrix, SK_PMColor4fTRANSPARENT,
                                              fTarget->caps().minPathVerbsForHwTessellation(),
                                              noVaryingsPipeline, fTarget->caps());
-    tess->prepare(fTarget.get(), SkRectPriv::MakeLargest(), gAlmostIdentity, fPath, nullptr);
+    tess->prepare(fTarget.get(), SkRectPriv::MakeLargest(), {gAlmostIdentity, fPath},
+                  fPath.countVerbs());
 }
 
 static void benchmark_wangs_formula_cubic_log2(const SkMatrix& matrix, const SkPath& path) {
diff --git a/samplecode/SamplePathTessellators.cpp b/samplecode/SamplePathTessellators.cpp
index b355592..b71aad6 100644
--- a/samplecode/SamplePathTessellators.cpp
+++ b/samplecode/SamplePathTessellators.cpp
@@ -103,7 +103,7 @@
                                                             caps);
                 break;
         }
-        fTessellator->prepare(flushState, this->bounds(), pathMatrix, fPath);
+        fTessellator->prepare(flushState, this->bounds(), {pathMatrix, fPath}, fPath.countVerbs());
         fProgram = GrTessellationShader::MakeProgram({alloc, flushState->writeView(),
                                                      &flushState->dstProxyView(),
                                                      flushState->renderPassBarriers(),
diff --git a/src/gpu/tessellate/GrCullTest.h b/src/gpu/tessellate/GrCullTest.h
index 971b1c2..37431a6 100644
--- a/src/gpu/tessellate/GrCullTest.h
+++ b/src/gpu/tessellate/GrCullTest.h
@@ -21,18 +21,24 @@
 // translation element. Instead we unapply the translation to the cull bounds ahead of time.
 class GrCullTest {
 public:
-    GrCullTest(const SkRect& devCullBounds, const SkMatrix& m)
-            // [fMatX, fMatY] maps path coordinates to the float4 [x, y, -x, -y] in device space.
-            : fMatX{m.getScaleX(), m.getSkewY(), -m.getScaleX(), -m.getSkewY()}
-            , fMatY{m.getSkewX(), m.getScaleY(), -m.getSkewX(), -m.getScaleY()}
-            // Store the cull bounds as [l, t, -r, -b] for faster math.
-            // Also subtract the matrix translate from the cull bounds ahead of time, rather than
-            // adding it to every point every time we test.
-            , fCullBounds{devCullBounds.fLeft - m.getTranslateX(),
-                          devCullBounds.fTop - m.getTranslateY(),
-                          m.getTranslateX() - devCullBounds.fRight,
-                          m.getTranslateY() - devCullBounds.fBottom} {
+    GrCullTest() = default;
+
+    GrCullTest(const SkRect& devCullBounds, const SkMatrix& m) {
+        this->set(devCullBounds, m);
+    }
+
+    void set(const SkRect& devCullBounds, const SkMatrix& m) {
         SkASSERT(!m.hasPerspective());
+        // [fMatX, fMatY] maps path coordinates to the float4 [x, y, -x, -y] in device space.
+        fMatX = {m.getScaleX(), m.getSkewY(), -m.getScaleX(), -m.getSkewY()};
+        fMatY = {m.getSkewX(), m.getScaleY(), -m.getSkewX(), -m.getScaleY()};
+        // Store the cull bounds as [l, t, -r, -b] for faster math.
+        // Also subtract the matrix translate from the cull bounds ahead of time, rather than adding
+        // it to every point every time we test.
+        fCullBounds = {devCullBounds.fLeft - m.getTranslateX(),
+                       devCullBounds.fTop - m.getTranslateY(),
+                       m.getTranslateX() - devCullBounds.fRight,
+                       m.getTranslateY() - devCullBounds.fBottom};
     }
 
     // Returns whether M*p will be in the viewport.
diff --git a/src/gpu/tessellate/GrPathCurveTessellator.cpp b/src/gpu/tessellate/GrPathCurveTessellator.cpp
index e1c48ce..3d56746 100644
--- a/src/gpu/tessellate/GrPathCurveTessellator.cpp
+++ b/src/gpu/tessellate/GrPathCurveTessellator.cpp
@@ -24,18 +24,20 @@
 // supported by the hardware.
 class CurveWriter {
 public:
-    CurveWriter(const SkRect& cullBounds,
-                const SkMatrix& totalMatrix,  // shaderMatrix * pathMatrix
-                const SkMatrix& pathMatrix,
-                int maxSegments)
-            : fCullTest(cullBounds, totalMatrix)
-            , fTotalVectorXform(totalMatrix)
-            , fPathXform(pathMatrix)
-            , fMaxSegments_pow2(maxSegments * maxSegments)
+    CurveWriter(int maxSegments)
+            : fMaxSegments_pow2(maxSegments * maxSegments)
             , fMaxSegments_pow4(fMaxSegments_pow2 * fMaxSegments_pow2) {
     }
 
-    const GrPathXform& pathXform() const { return fPathXform; }
+    void setMatrices(const SkRect& cullBounds,
+                     const SkMatrix& shaderMatrix,
+                     const SkMatrix& pathMatrix) {
+        SkMatrix totalMatrix;
+        totalMatrix.setConcat(shaderMatrix, pathMatrix);
+        fCullTest.set(cullBounds, totalMatrix);
+        fTotalVectorXform = totalMatrix;
+        fPathXform = pathMatrix;
+    }
 
     SK_ALWAYS_INLINE void writeQuadratic(const GrShaderCaps& shaderCaps,
                                          GrVertexChunkBuilder* chunker, const SkPoint p[3]) {
@@ -191,8 +193,8 @@
 
 void GrPathCurveTessellator::prepare(GrMeshDrawTarget* target,
                                      const SkRect& cullBounds,
-                                     const SkMatrix& pathMatrix,
-                                     const SkPath& path,
+                                     const PathDrawList& pathDrawList,
+                                     int totalCombinedPathVerbCnt,
                                      const BreadcrumbTriangleList* breadcrumbTriangleList) {
     SkASSERT(fVertexChunkArray.empty());
 
@@ -201,16 +203,17 @@
     // Determine how many triangles to allocate.
     int maxTriangles = 0;
     if (fDrawInnerFan) {
-        // An n-sided polygon is fanned by n-2 triangles.
-        int maxEdgesInFan = GrPathTessellator::MaxSegmentsInPath(path);
-        int maxTrianglesInFan = std::max(maxEdgesInFan - 2, 0);
-        maxTriangles += maxTrianglesInFan;
+        int maxCombinedFanEdges = MaxCombinedFanEdgesInPathDrawList(totalCombinedPathVerbCnt);
+        // A single n-sided polygon is fanned by n-2 triangles. Multiple polygons with a combined
+        // edge count of n are fanned by strictly fewer triangles.
+        int maxTrianglesInFans = std::max(maxCombinedFanEdges - 2, 0);
+        maxTriangles += maxTrianglesInFans;
     }
     if (breadcrumbTriangleList) {
         maxTriangles += breadcrumbTriangleList->count();
     }
     // Over-allocate enough curves for 1 in 4 to chop.
-    int curveAllocCount = (path.countVerbs() * 5 + 3) / 4;  // i.e., ceil(numVerbs * 5/4)
+    int curveAllocCount = (totalCombinedPathVerbCnt * 5 + 3) / 4;  // i.e., ceil(numVerbs * 5/4)
     int patchAllocCount = maxTriangles + curveAllocCount;
     if (!patchAllocCount) {
         return;
@@ -219,21 +222,6 @@
                                                                : fShader->instanceStride();
     GrVertexChunkBuilder chunker(target, &fVertexChunkArray, patchStride, patchAllocCount);
 
-    int maxSegments;
-    if (fShader->willUseTessellationShaders()) {
-        // The curve shader tessellates T=0..(1/2) on the first side of the canonical triangle and
-        // T=(1/2)..1 on the second side. This means we get double the max tessellation segments
-        // for the range T=0..1.
-        maxSegments = shaderCaps.maxTessellationSegments() * 2;
-    } else {
-        maxSegments = GrPathTessellationShader::kMaxFixedCountSegments;
-    }
-
-    CurveWriter curveWriter(cullBounds,
-                            SkMatrix::Concat(fShader->viewMatrix(), pathMatrix),
-                            pathMatrix,
-                            maxSegments);
-
     // Write out the triangles.
     if (maxTriangles) {
         GrVertexWriter vertexWriter = chunker.appendVertices(maxTriangles);
@@ -249,19 +237,30 @@
             uint32_t pad32Value = shaderCaps.infinitySupport()
                     ? GrVertexWriter::kIEEE_32_infinity
                     : sk_bit_cast<uint32_t>(GrTessellationShader::kTriangularConicCurveType);
-            int numTrianglesWritten;
-            vertexWriter = GrMiddleOutPolygonTriangulator::WritePathInnerFan(
-                    std::move(vertexWriter),
-                    pad32Count,
-                    pad32Value,
-                    curveWriter.pathXform(),
-                    path,
-                    &numTrianglesWritten);
-            numRemainingTriangles -= numTrianglesWritten;
+            for (auto [pathMatrix, path] : pathDrawList) {
+                int numTrianglesWritten;
+                vertexWriter = GrMiddleOutPolygonTriangulator::WritePathInnerFan(
+                        std::move(vertexWriter),
+                        pad32Count,
+                        pad32Value,
+                        pathMatrix,
+                        path,
+                        &numTrianglesWritten);
+                numRemainingTriangles -= numTrianglesWritten;
+            }
         }
         if (breadcrumbTriangleList) {
             int numWritten = 0;
             SkDEBUGCODE(int count = 0;)
+#ifdef SK_DEBUG
+            for (auto [pathMatrix, path] : pathDrawList) {
+                // This assert isn't actually necessary, but we currently only use breadcrumb
+                // triangles with an identity pathMatrix. If that ever changes, this assert will
+                // serve as a gentle reminder to make sure the breadcrumb triangles are also
+                // transformed on the CPU.
+                SkASSERT(pathMatrix.isIdentity());
+            }
+#endif
             for (const auto* tri = breadcrumbTriangleList->head(); tri; tri = tri->fNext) {
                 SkDEBUGCODE(++count;)
                 auto p0 = grvx::float2::Load(tri->fPts);
@@ -275,7 +274,7 @@
                     // introduce T-junctions.
                     continue;
                 }
-                curveWriter.pathXform().map3Points(&vertexWriter, tri->fPts);
+                vertexWriter.writeArray(tri->fPts, 3);
                 // Mark this instance as a triangle by setting it to a conic with w=Inf.
                 vertexWriter.fill(GrVertexWriter::kIEEE_32_infinity, 2);
                 vertexWriter.write(
@@ -289,19 +288,33 @@
         chunker.popVertices(numRemainingTriangles);
     }
 
-    for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) {
-        switch (verb) {
-            case SkPathVerb::kQuad:
-                curveWriter.writeQuadratic(shaderCaps, &chunker, pts);
-                break;
-            case SkPathVerb::kConic:
-                curveWriter.writeConic(shaderCaps, &chunker, pts, *w);
-                break;
-            case SkPathVerb::kCubic:
-                curveWriter.writeCubic(shaderCaps, &chunker, pts);
-                break;
-            default:
-                break;
+    int maxSegments;
+    if (fShader->willUseTessellationShaders()) {
+        // The curve shader tessellates T=0..(1/2) on the first side of the canonical triangle and
+        // T=(1/2)..1 on the second side. This means we get double the max tessellation segments
+        // for the range T=0..1.
+        maxSegments = shaderCaps.maxTessellationSegments() * 2;
+    } else {
+        maxSegments = GrPathTessellationShader::kMaxFixedCountSegments;
+    }
+
+    CurveWriter curveWriter(maxSegments);
+    for (auto [pathMatrix, path] : pathDrawList) {
+        curveWriter.setMatrices(cullBounds, fShader->viewMatrix(), pathMatrix);
+        for (auto [verb, pts, w] : SkPathPriv::Iterate(path)) {
+            switch (verb) {
+                case SkPathVerb::kQuad:
+                    curveWriter.writeQuadratic(shaderCaps, &chunker, pts);
+                    break;
+                case SkPathVerb::kConic:
+                    curveWriter.writeConic(shaderCaps, &chunker, pts, *w);
+                    break;
+                case SkPathVerb::kCubic:
+                    curveWriter.writeCubic(shaderCaps, &chunker, pts);
+                    break;
+                default:
+                    break;
+            }
         }
     }
 
diff --git a/src/gpu/tessellate/GrPathCurveTessellator.h b/src/gpu/tessellate/GrPathCurveTessellator.h
index 7d2601c..a7b77bc 100644
--- a/src/gpu/tessellate/GrPathCurveTessellator.h
+++ b/src/gpu/tessellate/GrPathCurveTessellator.h
@@ -32,12 +32,23 @@
                                         const SkPMColor4f&, DrawInnerFan, int numPathVerbs,
                                         const GrPipeline&, const GrCaps&);
 
+    void prepare(GrMeshDrawTarget* target,
+                 const SkRect& cullBounds,
+                 const PathDrawList& pathDrawList,
+                 int totalCombinedPathVerbCnt) override {
+        this->prepare(target, cullBounds, pathDrawList, totalCombinedPathVerbCnt, nullptr);
+    }
 
+    // Implements GrPathTessellator::prepare(), also sending an additional list of breadcrumb
+    // triangles to the GPU. The breadcrumb triangles are implemented as conics with w=Infinity.
+    //
+    // ALSO NOTE: The breadcrumb triangles do not have a matrix. These need to be pre-transformed by
+    // the caller if a CPU-side transformation is desired.
     void prepare(GrMeshDrawTarget*,
                  const SkRect& cullBounds,
-                 const SkMatrix& pathMatrix,
-                 const SkPath&,
-                 const BreadcrumbTriangleList*) override;
+                 const PathDrawList&,
+                 int totalCombinedPathVerbCnt,
+                 const BreadcrumbTriangleList*);
 
     void draw(GrOpFlushState*) const override;
 
diff --git a/src/gpu/tessellate/GrPathInnerTriangulateOp.cpp b/src/gpu/tessellate/GrPathInnerTriangulateOp.cpp
index 0322191..635e122 100644
--- a/src/gpu/tessellate/GrPathInnerTriangulateOp.cpp
+++ b/src/gpu/tessellate/GrPathInnerTriangulateOp.cpp
@@ -400,7 +400,8 @@
 
     if (fTessellator) {
         // Must be called after polysToTriangles() in order for fFanBreadcrumbs to be complete.
-        fTessellator->prepare(flushState, this->bounds(), SkMatrix::I(), fPath, &fFanBreadcrumbs);
+        fTessellator->prepare(flushState, this->bounds(), {SkMatrix::I(), fPath},
+                              fPath.countVerbs(), &fFanBreadcrumbs);
     }
 
     if (!flushState->caps().shaderCaps()->vertexIDSupport()) {
diff --git a/src/gpu/tessellate/GrPathStencilCoverOp.cpp b/src/gpu/tessellate/GrPathStencilCoverOp.cpp
index e7bf11e..f864bce 100644
--- a/src/gpu/tessellate/GrPathStencilCoverOp.cpp
+++ b/src/gpu/tessellate/GrPathStencilCoverOp.cpp
@@ -239,7 +239,7 @@
         vertexAlloc.unlock(fFanVertexCount);
     }
 
-    fTessellator->prepare(flushState, this->bounds(), pathMatrix, fPath);
+    fTessellator->prepare(flushState, this->bounds(), {pathMatrix, fPath}, fPath.countVerbs());
 
     if (fCoverBBoxProgram) {
         size_t instanceStride = fCoverBBoxProgram->geomProc().instanceStride();
diff --git a/src/gpu/tessellate/GrPathTessellateOp.cpp b/src/gpu/tessellate/GrPathTessellateOp.cpp
index 173e39f..653a779 100644
--- a/src/gpu/tessellate/GrPathTessellateOp.cpp
+++ b/src/gpu/tessellate/GrPathTessellateOp.cpp
@@ -57,7 +57,7 @@
                                   flushState->detachAppliedClip());
         SkASSERT(fTessellator);
     }
-    fTessellator->prepare(flushState, this->bounds(), SkMatrix::I(), fPath);
+    fTessellator->prepare(flushState, this->bounds(), {SkMatrix::I(), fPath}, fPath.countVerbs());
 }
 
 void GrPathTessellateOp::onExecute(GrOpFlushState* flushState, const SkRect& chainBounds) {
diff --git a/src/gpu/tessellate/GrPathTessellator.h b/src/gpu/tessellate/GrPathTessellator.h
index 2e0a566..6dc72dd 100644
--- a/src/gpu/tessellate/GrPathTessellator.h
+++ b/src/gpu/tessellate/GrPathTessellator.h
@@ -25,34 +25,51 @@
 public:
     using BreadcrumbTriangleList = GrInnerFanTriangulator::BreadcrumbTriangleList;
 
+    struct PathDrawList {
+        PathDrawList(const SkMatrix& pathMatrix, const SkPath& path)
+                : fPathMatrix(pathMatrix), fPath(path) {}
+
+        SkMatrix fPathMatrix;
+        SkPath fPath;
+        PathDrawList* fNext = nullptr;
+
+        struct Iter {
+            void operator++() { fHead = fHead->fNext; }
+            bool operator!=(const Iter& b) const { return fHead != b.fHead; }
+            std::tuple<const SkMatrix&, const SkPath&> operator*() const {
+                return {fHead->fPathMatrix, fHead->fPath};
+            }
+            const PathDrawList* fHead;
+        };
+        Iter begin() const { return {this}; }
+        Iter end() const { return {nullptr}; }
+    };
+
+    virtual ~GrPathTessellator() {}
+
     const GrPathTessellationShader* shader() const { return fShader; }
 
     // Called before draw(). Prepares GPU buffers containing the geometry to tessellate.
     //
-    // 'pathMatrix' is applied on the CPU while the geometry is being written out. This is a tool
-    // for batching, and is applied in addition to the shader's on-GPU matrix.
-    //
-    // If the given BreadcrumbTriangleList is non-null, then we also emit geometry for the
-    // breadcrumb triangles.
+    // Each path's fPathMatrix in the list is applied on the CPU while the geometry is being written
+    // out. This is a tool for batching, and is applied in addition to the shader's on-GPU matrix.
     virtual void prepare(GrMeshDrawTarget*,
                          const SkRect& cullBounds,
-                         const SkMatrix& pathMatrix,
-                         const SkPath&,
-                         const BreadcrumbTriangleList* = nullptr) = 0;
+                         const PathDrawList&,
+                         int totalCombinedPathVerbCnt) = 0;
 
     // Issues draw calls for the tessellated geometry. The caller is responsible for binding its
     // desired pipeline ahead of time.
     virtual void draw(GrOpFlushState*) const = 0;
 
-    virtual ~GrPathTessellator() {}
-
-    // Returns an upper bound on the number of segments (lineTo, quadTo, conicTo, cubicTo) in a
-    // path, also accounting for any implicit lineTos from closing contours.
-    static int MaxSegmentsInPath(const SkPath& path) {
-        // There might be an implicit kClose at the end, but the path always begins with kMove. So
-        // the max number of segments in the path is equal to the number of verbs.
-        SkASSERT(path.countVerbs() == 0 || SkPathPriv::VerbData(path)[0] == SkPath::kMove_Verb);
-        return path.countVerbs();
+    // Returns an upper bound on the number of combined edges there might be from all inner fans in
+    // a PathDrawList.
+    static int MaxCombinedFanEdgesInPathDrawList(int totalCombinedPathVerbCnt) {
+        // Path fans might have an extra edge from an implicit kClose at the end, but they also
+        // always begin with kMove. So the max possible number of edges in a single path is equal to
+        // the number of verbs. Therefore, the max number of combined fan edges in a PathDrawList is
+        // the number of combined path verbs in that PathDrawList.
+        return totalCombinedPathVerbCnt;
     }
 
 protected:
diff --git a/src/gpu/tessellate/GrPathWedgeTessellator.cpp b/src/gpu/tessellate/GrPathWedgeTessellator.cpp
index 869fc33..e215605 100644
--- a/src/gpu/tessellate/GrPathWedgeTessellator.cpp
+++ b/src/gpu/tessellate/GrPathWedgeTessellator.cpp
@@ -111,23 +111,33 @@
 // by the hardware.
 class WedgeWriter {
 public:
-    WedgeWriter(const SkRect& cullBounds,
-                const SkMatrix& totalMatrix,  // shaderMatrix * pathMatrix
-                const SkMatrix& pathMatrix,
+    WedgeWriter(GrMeshDrawTarget* target,
+                GrVertexChunkArray* vertexChunkArray,
+                size_t patchStride,
+                int initialPatchAllocCount,
                 int maxSegments)
-            : fCullTest(cullBounds, totalMatrix)
-            , fTotalVectorXform(totalMatrix)
-            , fPathXform(pathMatrix)
+            : fChunker(target, vertexChunkArray, patchStride, initialPatchAllocCount)
             , fMaxSegments_pow2(maxSegments * maxSegments)
             , fMaxSegments_pow4(fMaxSegments_pow2 * fMaxSegments_pow2) {
     }
 
+    void setMatrices(const SkRect& cullBounds,
+                     const SkMatrix& shaderMatrix,
+                     const SkMatrix& pathMatrix) {
+        SkMatrix totalMatrix;
+        totalMatrix.setConcat(shaderMatrix, pathMatrix);
+        fCullTest.set(cullBounds, totalMatrix);
+        fTotalVectorXform = totalMatrix;
+        fPathXform = pathMatrix;
+    }
+
     const GrPathXform& pathXform() const { return fPathXform; }
 
     SK_ALWAYS_INLINE void writeFlatWedge(const GrShaderCaps& shaderCaps,
-                                         GrVertexChunkBuilder* chunker, SkPoint p0, SkPoint p1,
+                                         SkPoint p0,
+                                         SkPoint p1,
                                          SkPoint midpoint) {
-        if (GrVertexWriter vertexWriter = chunker->appendVertex()) {
+        if (GrVertexWriter vertexWriter = fChunker.appendVertex()) {
             fPathXform.mapLineToCubic(&vertexWriter, p0, p1);
             vertexWriter.write(midpoint);
             vertexWriter.write(GrVertexWriter::If(!shaderCaps.infinitySupport(),
@@ -136,14 +146,14 @@
     }
 
     SK_ALWAYS_INLINE void writeQuadraticWedge(const GrShaderCaps& shaderCaps,
-                                              GrVertexChunkBuilder* chunker, const SkPoint p[3],
+                                              const SkPoint p[3],
                                               SkPoint midpoint) {
         float numSegments_pow4 = GrWangsFormula::quadratic_pow4(kPrecision, p, fTotalVectorXform);
         if (numSegments_pow4 > fMaxSegments_pow4) {
-            this->chopAndWriteQuadraticWedges(shaderCaps, chunker, p, midpoint);
+            this->chopAndWriteQuadraticWedges(shaderCaps, p, midpoint);
             return;
         }
-        if (GrVertexWriter vertexWriter = chunker->appendVertex()) {
+        if (GrVertexWriter vertexWriter = fChunker.appendVertex()) {
             fPathXform.mapQuadToCubic(&vertexWriter, p);
             vertexWriter.write(midpoint);
             vertexWriter.write(GrVertexWriter::If(!shaderCaps.infinitySupport(),
@@ -153,14 +163,15 @@
     }
 
     SK_ALWAYS_INLINE void writeConicWedge(const GrShaderCaps& shaderCaps,
-                                          GrVertexChunkBuilder* chunker, const SkPoint p[3],
-                                          float w, SkPoint midpoint) {
+                                          const SkPoint p[3],
+                                          float w,
+                                          SkPoint midpoint) {
         float numSegments_pow2 = GrWangsFormula::conic_pow2(kPrecision, p, w, fTotalVectorXform);
         if (numSegments_pow2 > fMaxSegments_pow2) {
-            this->chopAndWriteConicWedges(shaderCaps, chunker, {p, w}, midpoint);
+            this->chopAndWriteConicWedges(shaderCaps, {p, w}, midpoint);
             return;
         }
-        if (GrVertexWriter vertexWriter = chunker->appendVertex()) {
+        if (GrVertexWriter vertexWriter = fChunker.appendVertex()) {
             fPathXform.mapConicToPatch(&vertexWriter, p, w);
             vertexWriter.write(midpoint);
             vertexWriter.write(GrVertexWriter::If(!shaderCaps.infinitySupport(),
@@ -171,14 +182,14 @@
     }
 
     SK_ALWAYS_INLINE void writeCubicWedge(const GrShaderCaps& shaderCaps,
-                                          GrVertexChunkBuilder* chunker, const SkPoint p[4],
+                                          const SkPoint p[4],
                                           SkPoint midpoint) {
         float numSegments_pow4 = GrWangsFormula::cubic_pow4(kPrecision, p, fTotalVectorXform);
         if (numSegments_pow4 > fMaxSegments_pow4) {
-            this->chopAndWriteCubicWedges(shaderCaps, chunker, p, midpoint);
+            this->chopAndWriteCubicWedges(shaderCaps, p, midpoint);
             return;
         }
-        if (GrVertexWriter vertexWriter = chunker->appendVertex()) {
+        if (GrVertexWriter vertexWriter = fChunker.appendVertex()) {
             fPathXform.map4Points(&vertexWriter, p);
             vertexWriter.write(midpoint);
             vertexWriter.write(GrVertexWriter::If(!shaderCaps.infinitySupport(),
@@ -190,50 +201,53 @@
     int numFixedSegments_pow4() const { return fNumFixedSegments_pow4; }
 
 private:
-    void chopAndWriteQuadraticWedges(const GrShaderCaps& shaderCaps, GrVertexChunkBuilder* chunker,
-                                     const SkPoint p[3], SkPoint midpoint) {
+    void chopAndWriteQuadraticWedges(const GrShaderCaps& shaderCaps,
+                                     const SkPoint p[3],
+                                     SkPoint midpoint) {
         SkPoint chops[5];
         SkChopQuadAtHalf(p, chops);
         for (int i = 0; i < 2; ++i) {
             const SkPoint* q = chops + i*2;
             if (fCullTest.areVisible3(q)) {
-                this->writeQuadraticWedge(shaderCaps, chunker, q, midpoint);
+                this->writeQuadraticWedge(shaderCaps, q, midpoint);
             } else {
-                this->writeFlatWedge(shaderCaps, chunker, q[0], q[2], midpoint);
+                this->writeFlatWedge(shaderCaps, q[0], q[2], midpoint);
             }
         }
     }
 
-    void chopAndWriteConicWedges(const GrShaderCaps& shaderCaps, GrVertexChunkBuilder* chunker,
-                                 const SkConic& conic, SkPoint midpoint) {
+    void chopAndWriteConicWedges(const GrShaderCaps& shaderCaps,
+                                 const SkConic& conic,
+                                 SkPoint midpoint) {
         SkConic chops[2];
         if (!conic.chopAt(.5, chops)) {
             return;
         }
         for (int i = 0; i < 2; ++i) {
             if (fCullTest.areVisible3(chops[i].fPts)) {
-                this->writeConicWedge(shaderCaps, chunker, chops[i].fPts, chops[i].fW, midpoint);
+                this->writeConicWedge(shaderCaps, chops[i].fPts, chops[i].fW, midpoint);
             } else {
-                this->writeFlatWedge(shaderCaps, chunker, chops[i].fPts[0], chops[i].fPts[2],
-                                     midpoint);
+                this->writeFlatWedge(shaderCaps, chops[i].fPts[0], chops[i].fPts[2], midpoint);
             }
         }
     }
 
-    void chopAndWriteCubicWedges(const GrShaderCaps& shaderCaps, GrVertexChunkBuilder* chunker,
-                                 const SkPoint p[4], SkPoint midpoint) {
+    void chopAndWriteCubicWedges(const GrShaderCaps& shaderCaps,
+                                 const SkPoint p[4],
+                                 SkPoint midpoint) {
         SkPoint chops[7];
         SkChopCubicAtHalf(p, chops);
         for (int i = 0; i < 2; ++i) {
             const SkPoint* c = chops + i*3;
             if (fCullTest.areVisible4(c)) {
-                this->writeCubicWedge(shaderCaps, chunker, c, midpoint);
+                this->writeCubicWedge(shaderCaps, c, midpoint);
             } else {
-                this->writeFlatWedge(shaderCaps, chunker, c[0], c[3], midpoint);
+                this->writeFlatWedge(shaderCaps, c[0], c[3], midpoint);
             }
         }
     }
 
+    GrVertexChunkBuilder fChunker;
     GrCullTest fCullTest;
     GrVectorXform fTotalVectorXform;
     GrPathXform fPathXform;
@@ -273,23 +287,20 @@
 
 void GrPathWedgeTessellator::prepare(GrMeshDrawTarget* target,
                                      const SkRect& cullBounds,
-                                     const SkMatrix& pathMatrix,
-                                     const SkPath& path,
-                                     const BreadcrumbTriangleList* breadcrumbTriangleList) {
-    SkASSERT(!breadcrumbTriangleList);
+                                     const PathDrawList& pathDrawList,
+                                     int totalCombinedPathVerbCnt) {
     SkASSERT(fVertexChunkArray.empty());
 
     const GrShaderCaps& shaderCaps = *target->caps().shaderCaps();
 
     // Over-allocate enough wedges for 1 in 4 to chop.
-    int maxWedges = GrPathTessellator::MaxSegmentsInPath(path);
+    int maxWedges = MaxCombinedFanEdgesInPathDrawList(totalCombinedPathVerbCnt);
     int wedgeAllocCount = (maxWedges * 5 + 3) / 4;  // i.e., ceil(maxWedges * 5/4)
     if (!wedgeAllocCount) {
         return;
     }
     size_t patchStride = fShader->willUseTessellationShaders() ? fShader->vertexStride() * 5
                                                                : fShader->instanceStride();
-    GrVertexChunkBuilder chunker(target, &fVertexChunkArray, patchStride, wedgeAllocCount);
 
     int maxSegments;
     if (fShader->willUseTessellationShaders()) {
@@ -298,43 +309,42 @@
         maxSegments = GrPathTessellationShader::kMaxFixedCountSegments;
     }
 
-    WedgeWriter wedgeWriter(cullBounds,
-                            SkMatrix::Concat(fShader->viewMatrix(), pathMatrix),
-                            pathMatrix,
-                            maxSegments);
-
-    MidpointContourParser parser(path);
-    while (parser.parseNextContour()) {
-        SkPoint midpoint = wedgeWriter.pathXform().mapPoint(parser.currentMidpoint());
-        SkPoint startPoint = {0, 0};
-        SkPoint lastPoint = startPoint;
-        for (auto [verb, pts, w] : parser.currentContour()) {
-            switch (verb) {
-                case SkPathVerb::kMove:
-                    startPoint = lastPoint = pts[0];
-                    break;
-                case SkPathVerb::kClose:
-                    break;  // Ignore. We can assume an implicit close at the end.
-                case SkPathVerb::kLine:
-                    wedgeWriter.writeFlatWedge(shaderCaps, &chunker, pts[0], pts[1], midpoint);
-                    lastPoint = pts[1];
-                    break;
-                case SkPathVerb::kQuad:
-                    wedgeWriter.writeQuadraticWedge(shaderCaps, &chunker, pts, midpoint);
-                    lastPoint = pts[2];
-                    break;
-                case SkPathVerb::kConic:
-                    wedgeWriter.writeConicWedge(shaderCaps, &chunker, pts, *w, midpoint);
-                    lastPoint = pts[2];
-                    break;
-                case SkPathVerb::kCubic:
-                    wedgeWriter.writeCubicWedge(shaderCaps, &chunker, pts, midpoint);
-                    lastPoint = pts[3];
-                    break;
+    WedgeWriter wedgeWriter(target, &fVertexChunkArray, patchStride, wedgeAllocCount, maxSegments);
+    for (auto [pathMatrix, path] : pathDrawList) {
+        wedgeWriter.setMatrices(cullBounds, fShader->viewMatrix(), pathMatrix);
+        MidpointContourParser parser(path);
+        while (parser.parseNextContour()) {
+            SkPoint midpoint = wedgeWriter.pathXform().mapPoint(parser.currentMidpoint());
+            SkPoint startPoint = {0, 0};
+            SkPoint lastPoint = startPoint;
+            for (auto [verb, pts, w] : parser.currentContour()) {
+                switch (verb) {
+                    case SkPathVerb::kMove:
+                        startPoint = lastPoint = pts[0];
+                        break;
+                    case SkPathVerb::kClose:
+                        break;  // Ignore. We can assume an implicit close at the end.
+                    case SkPathVerb::kLine:
+                        wedgeWriter.writeFlatWedge(shaderCaps, pts[0], pts[1], midpoint);
+                        lastPoint = pts[1];
+                        break;
+                    case SkPathVerb::kQuad:
+                        wedgeWriter.writeQuadraticWedge(shaderCaps, pts, midpoint);
+                        lastPoint = pts[2];
+                        break;
+                    case SkPathVerb::kConic:
+                        wedgeWriter.writeConicWedge(shaderCaps, pts, *w, midpoint);
+                        lastPoint = pts[2];
+                        break;
+                    case SkPathVerb::kCubic:
+                        wedgeWriter.writeCubicWedge(shaderCaps, pts, midpoint);
+                        lastPoint = pts[3];
+                        break;
+                }
             }
-        }
-        if (lastPoint != startPoint) {
-            wedgeWriter.writeFlatWedge(shaderCaps, &chunker, lastPoint, startPoint, midpoint);
+            if (lastPoint != startPoint) {
+                wedgeWriter.writeFlatWedge(shaderCaps, lastPoint, startPoint, midpoint);
+            }
         }
     }
 
diff --git a/src/gpu/tessellate/GrPathWedgeTessellator.h b/src/gpu/tessellate/GrPathWedgeTessellator.h
index 50490e4..3ec575a 100644
--- a/src/gpu/tessellate/GrPathWedgeTessellator.h
+++ b/src/gpu/tessellate/GrPathWedgeTessellator.h
@@ -27,9 +27,8 @@
 
     void prepare(GrMeshDrawTarget*,
                  const SkRect& cullBounds,
-                 const SkMatrix& pathMatrix,
-                 const SkPath&,
-                 const BreadcrumbTriangleList*) override;
+                 const PathDrawList&,
+                 int totalCombinedPathVerbCnt) override;
 
     void draw(GrOpFlushState*) const override;
 
diff --git a/src/gpu/tessellate/GrPathXform.h b/src/gpu/tessellate/GrPathXform.h
index 2203df1..c4f3aac 100644
--- a/src/gpu/tessellate/GrPathXform.h
+++ b/src/gpu/tessellate/GrPathXform.h
@@ -23,11 +23,16 @@
     using float4 = grvx::float4;
 
 public:
-    GrPathXform(const SkMatrix& m)
-            : fScale{m.getScaleX(), m.getScaleY(), m.getScaleX(), m.getScaleY()}
-            , fSkew{m.getSkewX(), m.getSkewY(), m.getSkewX(), m.getSkewY()}
-            , fTrans{m.getTranslateX(), m.getTranslateY(), m.getTranslateX(), m.getTranslateY()} {
+    GrPathXform() = default;
+    GrPathXform(const SkMatrix& m) { *this = m; }
+
+    GrPathXform& operator=(const SkMatrix& m) {
         SkASSERT(!m.hasPerspective());
+        // Duplicate the matrix in float4.lo and float4.hi so we can map two points at once.
+        fScale = {m.getScaleX(), m.getScaleY(), m.getScaleX(), m.getScaleY()};
+        fSkew = {m.getSkewX(), m.getSkewY(), m.getSkewX(), m.getSkewY()};
+        fTrans = {m.getTranslateX(), m.getTranslateY(), m.getTranslateX(), m.getTranslateY()};
+        return *this;
     }
 
     SK_ALWAYS_INLINE float2 mapPoint(float2 p) const {
diff --git a/src/gpu/tessellate/GrVectorXform.h b/src/gpu/tessellate/GrVectorXform.h
index 5a47a8b..d80f28e 100644
--- a/src/gpu/tessellate/GrVectorXform.h
+++ b/src/gpu/tessellate/GrVectorXform.h
@@ -20,7 +20,8 @@
     using float2 = skvx::Vec<2, float>;
     using float4 = skvx::Vec<4, float>;
     explicit GrVectorXform() : fType(Type::kIdentity) {}
-    explicit GrVectorXform(const SkMatrix& m) {
+    explicit GrVectorXform(const SkMatrix& m) { *this = m; }
+    GrVectorXform& operator=(const SkMatrix& m) {
         SkASSERT(!m.hasPerspective());
         if (m.getType() & SkMatrix::kAffine_Mask) {
             fType = Type::kAffine;
@@ -36,6 +37,7 @@
             SkASSERT(!(m.getType() & ~SkMatrix::kTranslate_Mask));
             fType = Type::kIdentity;
         }
+        return *this;
     }
     float2 operator()(float2 vector) const {
         switch (fType) {