Use specialized quad lists in rectangle ops

Hopefully reduces memory footprint of GrFillRectOp and GrTextureOp

The original rect code (GrAAFillRectOp) stored 2 SkMatrices (18 floats), 2
SkRects (8 floats) an SkPMColor4f (4 floats) and a flag (1 int) for a total
of 124 bytes per quad that was stored in the op.

The first pass at the rectangle consolidation switched to storing device and
local quads as GrPerspQuads (32 floats), an SkPMColor4f (4 floats) and a flag
(1 int) for a total of 148 bytes per quad. After landing, several memory
regressions appeared in Chrome and our perf monitor.

Several intertwined approaches are taken here. First, GrPerspQuad no longer
caches 1/w, which makes a quad 12 floats instead of 16. Second, a specialized
list type is defined that allows storing the x, y, and extra metadata together
for quads, but keeps the w components separate. When the quad type isn't
perspective, w is not stored at all since it is implicitly 1 and can be
reconstituted at tessellation time. This brings the total per quad to either
84 or 116 bytes, depending on if the op list needs perspective information.

Bug: chromium:915025
Bug: chromium:917242
Change-Id: If37ee122847b0c32604bb45dc2a1326b544f9cf6
Reviewed-on: https://skia-review.googlesource.com/c/180644
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Robert Phillips <robertphillips@google.com>
diff --git a/gn/tests.gni b/gn/tests.gni
index 351efae..27a4448 100644
--- a/gn/tests.gni
+++ b/gn/tests.gni
@@ -101,6 +101,7 @@
   "$_tests/GrOpListFlushTest.cpp",
   "$_tests/GrPipelineDynamicStateTest.cpp",
   "$_tests/GrPorterDuffTest.cpp",
+  "$_tests/GrQuadListTest.cpp",
   "$_tests/GrShapeTest.cpp",
   "$_tests/GrSKSLPrettyPrintTest.cpp",
   "$_tests/GrSurfaceTest.cpp",
diff --git a/src/gpu/GrQuad.cpp b/src/gpu/GrQuad.cpp
index 5193d5c..454779b 100644
--- a/src/gpu/GrQuad.cpp
+++ b/src/gpu/GrQuad.cpp
@@ -202,7 +202,6 @@
         SkNx_shuffle<0, 0, 2, 2>(r).store(fX);
         SkNx_shuffle<1, 3, 1, 3>(r).store(fY);
         fW[0] = fW[1] = fW[2] = fW[3] = 1.f;
-        fIW[0] = fIW[1] = fIW[2] = fIW[3] = 1.f;
     } else {
         Sk4f rx(rect.fLeft, rect.fLeft, rect.fRight, rect.fRight);
         Sk4f ry(rect.fTop, rect.fBottom, rect.fTop, rect.fBottom);
@@ -220,14 +219,19 @@
             Sk4f w2(m.get(SkMatrix::kMPersp2));
             auto w = SkNx_fma(w0, rx, SkNx_fma(w1, ry, w2));
             w.store(fW);
-            w.invert().store(fIW);
         } else {
             fW[0] = fW[1] = fW[2] = fW[3] = 1.f;
-            fIW[0] = fIW[1] = fIW[2] = fIW[3] = 1.f;
         }
     }
 }
 
+// Private constructor used by GrQuadList to quickly fill in a quad's values from the channel arrays
+GrPerspQuad::GrPerspQuad(const float* xs, const float* ys, const float* ws) {
+    memcpy(&fX, xs, 4 * sizeof(float));
+    memcpy(&fY, ys, 4 * sizeof(float));
+    memcpy(&fW, ws, 4 * sizeof(float));
+}
+
 bool GrPerspQuad::aaHasEffectOnRect() const {
     SkASSERT(this->quadType() == GrQuadType::kRect);
     // If rect, ws must all be 1s so no need to divide
diff --git a/src/gpu/GrQuad.h b/src/gpu/GrQuad.h
index 824131b..edb1bb2 100644
--- a/src/gpu/GrQuad.h
+++ b/src/gpu/GrQuad.h
@@ -12,6 +12,7 @@
 #include "SkNx.h"
 #include "SkPoint.h"
 #include "SkPoint3.h"
+#include "SkTArray.h"
 
 enum class GrAAType : unsigned;
 enum class GrQuadAAFlags;
@@ -90,6 +91,9 @@
 #endif
 
 private:
+    template<typename T>
+    friend class GrQuadListBase;
+
     float fX[4];
     float fY[4];
 };
@@ -105,20 +109,21 @@
     SkPoint3 point(int i) const { return {fX[i], fY[i], fW[i]}; }
 
     SkRect bounds() const {
-        auto x = this->x4f() * this->iw4f();
-        auto y = this->y4f() * this->iw4f();
+        auto iw = this->iw4f();
+        auto x = this->x4f() * iw;
+        auto y = this->y4f() * iw;
         return {x.min(), y.min(), x.max(), y.max()};
     }
 
     float x(int i) const { return fX[i]; }
     float y(int i) const { return fY[i]; }
     float w(int i) const { return fW[i]; }
-    float iw(int i) const { return fIW[i]; }
+    float iw(int i) const { return sk_ieee_float_divide(1.f, fW[i]); }
 
     Sk4f x4f() const { return Sk4f::Load(fX); }
     Sk4f y4f() const { return Sk4f::Load(fY); }
     Sk4f w4f() const { return Sk4f::Load(fW); }
-    Sk4f iw4f() const { return Sk4f::Load(fIW); }
+    Sk4f iw4f() const { return this->w4f().invert(); }
 
     bool hasPerspective() const { return (w4f() != Sk4f(1.f)).anyTrue(); }
 
@@ -130,10 +135,197 @@
 #endif
 
 private:
+    template<typename T>
+    friend class GrQuadListBase;
+
+    // Copy 4 values from each of the arrays into the quad's components
+    GrPerspQuad(const float xs[4], const float ys[4], const float ws[4]);
+
     float fX[4];
     float fY[4];
     float fW[4];
-    float fIW[4];  // 1/w
+};
+
+// Underlying data used by GrQuadListBase. It is defined outside of GrQuadListBase due to compiler
+// issues related to specializing member types.
+template<typename T>
+struct QuadData {
+    float fX[4];
+    float fY[4];
+    T fMetadata;
+};
+
+template<>
+struct QuadData<void> {
+    float fX[4];
+    float fY[4];
+};
+
+// A dynamic list of (possibly) perspective quads that tracks the most general quad type of all
+// added quads. It avoids storing the 3rd component if the quad type never becomes perspective.
+// Use GrQuadList subclass when only storing quads. Use GrTQuadList subclass when storing quads
+// and per-quad templated metadata (such as color or domain).
+template<typename T>
+class GrQuadListBase {
+public:
+
+    int count() const { return fXYs.count(); }
+
+    GrQuadType quadType() const { return fType; }
+
+    void reserve(int count, GrQuadType forType) {
+        fXYs.reserve(count);
+        if (forType == GrQuadType::kPerspective || fType == GrQuadType::kPerspective) {
+            fWs.reserve(4 * count);
+        }
+    }
+
+    GrPerspQuad operator[] (int i) const {
+        SkASSERT(i < this->count());
+        SkASSERT(i >= 0);
+
+        const QuadData<T>& item = fXYs[i];
+        if (fType == GrQuadType::kPerspective) {
+            // Read the explicit ws
+            return GrPerspQuad(item.fX, item.fY, fWs.begin() + 4 * i);
+        } else {
+            // Ws are implicitly 1s.
+            static constexpr float kNoPerspectiveWs[4] = {1.f, 1.f, 1.f, 1.f};
+            return GrPerspQuad(item.fX, item.fY, kNoPerspectiveWs);
+        }
+    }
+
+    // Subclasses expose push_back(const GrQuad|GrPerspQuad&, GrQuadType, [const T&]), where
+    // the metadata argument is only present in GrTQuadList's push_back definition.
+
+protected:
+    GrQuadListBase() : fType(GrQuadType::kRect) {}
+
+    void concatImpl(const GrQuadListBase<T>& that) {
+        this->upgradeType(that.fType);
+        fXYs.push_back_n(that.fXYs.count(), that.fXYs.begin());
+        if (fType == GrQuadType::kPerspective) {
+            if (that.fType == GrQuadType::kPerspective) {
+                // Copy the other's ws into the end of this list's data
+                fWs.push_back_n(that.fWs.count(), that.fWs.begin());
+            } else {
+                // This list stores ws but the appended list had implicit 1s, so add explicit 1s to
+                // fill out the total list
+                fWs.push_back_n(4 * that.count(), 1.f);
+            }
+        }
+    }
+
+    // Returns the added item data so that its metadata can be initialized if T is not void
+    QuadData<T>& pushBackImpl(const GrQuad& quad, GrQuadType type) {
+        SkASSERT(quad.quadType() <= type);
+
+        this->upgradeType(type);
+        QuadData<T>& item = fXYs.push_back();
+        memcpy(&item.fX, quad.fX, 4 * sizeof(float));
+        memcpy(&item.fY, quad.fY, 4 * sizeof(float));
+        if (fType == GrQuadType::kPerspective) {
+            fWs.push_back_n(4, 1.f);
+        }
+        return item;
+    }
+
+    QuadData<T>& pushBackImpl(const GrPerspQuad& quad, GrQuadType type) {
+        SkASSERT(quad.quadType() <= type);
+
+        this->upgradeType(type);
+        QuadData<T>& item = fXYs.push_back();
+        memcpy(&item.fX, quad.fX, 4 * sizeof(float));
+        memcpy(&item.fY, quad.fY, 4 * sizeof(float));
+        if (fType == GrQuadType::kPerspective) {
+            fWs.push_back_n(4, quad.fW);
+        }
+        return item;
+    }
+
+    const QuadData<T>& item(int i) const {
+        return fXYs[i];
+    }
+
+    QuadData<T>& item(int i) {
+        return fXYs[i];
+    }
+
+private:
+    void upgradeType(GrQuadType type) {
+        // Possibly upgrade the overall type tracked by the list
+        if (type > fType) {
+            fType = type;
+            if (type == GrQuadType::kPerspective) {
+                // All existing quads were 2D, so the ws array just needs to be filled with 1s
+                fWs.push_back_n(4 * this->count(), 1.f);
+            }
+        }
+    }
+
+    // Interleaves xs, ys, and per-quad metadata so that all data for a single quad is together
+    // (barring ws, which can be dropped entirely if the quad type allows it).
+    SkSTArray<1, QuadData<T>, true> fXYs;
+    // The w channel is kept separate so that it can remain empty when only dealing with 2D quads.
+    SkTArray<float, true> fWs;
+
+    GrQuadType fType;
+};
+
+// This list only stores the quad data itself.
+class GrQuadList : public GrQuadListBase<void> {
+public:
+    GrQuadList() : INHERITED() {}
+
+    void concat(const GrQuadList& that) {
+        this->concatImpl(that);
+    }
+
+    void push_back(const GrQuad& quad, GrQuadType type) {
+        this->pushBackImpl(quad, type);
+    }
+
+    void push_back(const GrPerspQuad& quad, GrQuadType type) {
+        this->pushBackImpl(quad, type);
+    }
+
+private:
+    typedef GrQuadListBase<void> INHERITED;
+};
+
+// This variant of the list allows simple metadata to be stored per quad as well, such as color
+// or texture domain.
+template<typename T>
+class GrTQuadList : public GrQuadListBase<T> {
+public:
+    GrTQuadList() : INHERITED() {}
+
+    void concat(const GrTQuadList<T>& that) {
+        this->concatImpl(that);
+    }
+
+    // Adding to the list requires metadata
+    void push_back(const GrQuad& quad, GrQuadType type, T&& metadata) {
+        QuadData<T>& item = this->pushBackImpl(quad, type);
+        item.fMetadata = std::move(metadata);
+    }
+
+    void push_back(const GrPerspQuad& quad, GrQuadType type, T&& metadata) {
+        QuadData<T>& item = this->pushBackImpl(quad, type);
+        item.fMetadata = std::move(metadata);
+    }
+
+    // And provide access to the metadata per quad
+    const T& metadata(int i) const {
+        return this->item(i).fMetadata;
+    }
+
+    T& metadata(int i) {
+        return this->item(i).fMetadata;
+    }
+
+private:
+    typedef GrQuadListBase<T> INHERITED;
 };
 
 #endif
diff --git a/src/gpu/ops/GrFillRectOp.cpp b/src/gpu/ops/GrFillRectOp.cpp
index 8efbcbd..0f3f6dc 100644
--- a/src/gpu/ops/GrFillRectOp.cpp
+++ b/src/gpu/ops/GrFillRectOp.cpp
@@ -24,54 +24,32 @@
 using VertexSpec = GrQuadPerEdgeAA::VertexSpec;
 using ColorType = GrQuadPerEdgeAA::ColorType;
 
-// NOTE: This info structure is intentionally modeled after GrTextureOps' Quad so that they can
-// more easily be integrated together in the future.
-class TransformedQuad {
-public:
-    TransformedQuad(const GrPerspQuad& deviceQuad, const GrPerspQuad& localQuad,
-                    const SkPMColor4f& color, GrQuadAAFlags aaFlags)
-            : fDeviceQuad(deviceQuad)
-            , fLocalQuad(localQuad)
-            , fColor(color)
-            , fAAFlags(aaFlags) {}
-
-    const GrPerspQuad& deviceQuad() const { return fDeviceQuad; }
-    const GrPerspQuad& localQuad() const { return fLocalQuad; }
-    const SkPMColor4f& color() const { return fColor; }
-    GrQuadAAFlags aaFlags() const { return fAAFlags; }
-
-    void setColor(const SkPMColor4f& color) { fColor = color; }
-
-    SkString dumpInfo(int index) const {
-        SkString str;
-        str.appendf("%d: Color: [%.2f, %.2f, %.2f, %.2f], Edge AA: l%u_t%u_r%u_b%u, \n"
-                    "  device quad: [(%.2f, %2.f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), "
-                    "(%.2f, %.2f, %.2f)],\n"
-                    "  local quad: [(%.2f, %2.f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), "
-                    "(%.2f, %.2f, %.2f)]\n",
-                    index, fColor.fR, fColor.fG, fColor.fB, fColor.fA,
-                    (uint32_t) (fAAFlags & GrQuadAAFlags::kLeft),
-                    (uint32_t) (fAAFlags & GrQuadAAFlags::kTop),
-                    (uint32_t) (fAAFlags & GrQuadAAFlags::kRight),
-                    (uint32_t) (fAAFlags & GrQuadAAFlags::kBottom),
-                    fDeviceQuad.x(0), fDeviceQuad.y(0), fDeviceQuad.w(0),
-                    fDeviceQuad.x(1), fDeviceQuad.y(1), fDeviceQuad.w(1),
-                    fDeviceQuad.x(2), fDeviceQuad.y(2), fDeviceQuad.w(2),
-                    fDeviceQuad.x(3), fDeviceQuad.y(3), fDeviceQuad.w(3),
-                    fLocalQuad.x(0), fLocalQuad.y(0), fLocalQuad.w(0),
-                    fLocalQuad.x(1), fLocalQuad.y(1), fLocalQuad.w(1),
-                    fLocalQuad.x(2), fLocalQuad.y(2), fLocalQuad.w(2),
-                    fLocalQuad.x(3), fLocalQuad.y(3), fLocalQuad.w(3));
-        return str;
-    }
-private:
-    // NOTE: The TransformedQuad does not store the types for device and local. The owning op tracks
-    // the most general type for device and local across all of its merged quads.
-    GrPerspQuad fDeviceQuad; // In device space, allowing rects to be combined across view matrices
-    GrPerspQuad fLocalQuad; // Original rect transformed by its local matrix
-    SkPMColor4f fColor;
-    GrQuadAAFlags fAAFlags;
-};
+#ifdef SK_DEBUG
+static SkString dump_quad_info(int index, const GrPerspQuad& deviceQuad,
+                               const GrPerspQuad& localQuad, const SkPMColor4f& color,
+                               GrQuadAAFlags aaFlags) {
+    SkString str;
+    str.appendf("%d: Color: [%.2f, %.2f, %.2f, %.2f], Edge AA: l%u_t%u_r%u_b%u, \n"
+                "  device quad: [(%.2f, %2.f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), "
+                "(%.2f, %.2f, %.2f)],\n"
+                "  local quad: [(%.2f, %2.f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), "
+                "(%.2f, %.2f, %.2f)]\n",
+                index, color.fR, color.fG, color.fB, color.fA,
+                (uint32_t) (aaFlags & GrQuadAAFlags::kLeft),
+                (uint32_t) (aaFlags & GrQuadAAFlags::kTop),
+                (uint32_t) (aaFlags & GrQuadAAFlags::kRight),
+                (uint32_t) (aaFlags & GrQuadAAFlags::kBottom),
+                deviceQuad.x(0), deviceQuad.y(0), deviceQuad.w(0),
+                deviceQuad.x(1), deviceQuad.y(1), deviceQuad.w(1),
+                deviceQuad.x(2), deviceQuad.y(2), deviceQuad.w(2),
+                deviceQuad.x(3), deviceQuad.y(3), deviceQuad.w(3),
+                localQuad.x(0), localQuad.y(0), localQuad.w(0),
+                localQuad.x(1), localQuad.y(1), localQuad.w(1),
+                localQuad.x(2), localQuad.y(2), localQuad.w(2),
+                localQuad.x(3), localQuad.y(3), localQuad.w(3));
+    return str;
+}
+#endif
 
 class FillRectOp final : public GrMeshDrawOp {
 private:
@@ -113,9 +91,7 @@
                const GrPerspQuad& deviceQuad, GrQuadType deviceQuadType,
                const GrPerspQuad& localQuad, GrQuadType localQuadType)
             : INHERITED(ClassID())
-            , fHelper(args, aaType, stencil)
-            , fDeviceQuadType(static_cast<unsigned>(deviceQuadType))
-            , fLocalQuadType(static_cast<unsigned>(localQuadType)) {
+            , fHelper(args, aaType, stencil) {
         if (constBlendColor) {
             // The GrPaint is compatible with clearing, and the constant blend color overrides the
             // paint color (although in most cases they are probably the same)
@@ -133,7 +109,8 @@
 
         // The color stored with the quad is the clear color if a scissor-clear is decided upon
         // when executing the op.
-        fQuads.emplace_back(deviceQuad, localQuad, paintColor, edgeFlags);
+        fDeviceQuads.push_back(deviceQuad, deviceQuadType, { paintColor, edgeFlags });
+        fLocalQuads.push_back(localQuad, localQuadType);
         this->setBounds(deviceQuad.bounds(), HasAABloat(aaType == GrAAType::kCoverage),
                         IsZeroArea::kNo);
     }
@@ -147,14 +124,16 @@
 #ifdef SK_DEBUG
     SkString dumpInfo() const override {
         SkString str;
-        str.appendf("# draws: %d\n", fQuads.count());
-        str.appendf("Clear compatible: %u\n", static_cast<bool>(fClearCompatible));
+        str.appendf("# draws: %u\n", this->quadCount());
         str.appendf("Device quad type: %u, local quad type: %u\n",
-                    fDeviceQuadType, fLocalQuadType);
+                    (uint32_t) fDeviceQuads.quadType(), (uint32_t) fLocalQuads.quadType());
         str += fHelper.dumpInfo();
-        for (int i = 0; i < fQuads.count(); i++) {
-            str += fQuads[i].dumpInfo(i);
-
+        GrPerspQuad device, local;
+        for (int i = 0; i < this->quadCount(); i++) {
+            device = fDeviceQuads[i];
+            const ColorAndAA& info = fDeviceQuads.metadata(i);
+            local = fLocalQuads[i];
+            str += dump_quad_info(i, device, local, info.fColor, info.fAAFlags);
         }
         str += INHERITED::dumpInfo();
         return str;
@@ -163,11 +142,12 @@
 
     RequiresDstTexture finalize(const GrCaps& caps, const GrAppliedClip* clip) override {
         // Initialize aggregate color analysis with the first quad's color (which always exists)
-        SkASSERT(fQuads.count() > 0);
-        GrProcessorAnalysisColor quadColors(fQuads[0].color());
+        SkASSERT(this->quadCount() > 0);
+        GrProcessorAnalysisColor quadColors(fDeviceQuads.metadata(0).fColor);
         // Then combine the colors of any additional quads (e.g. from MakeSet)
-        for (int i = 1; i < fQuads.count(); ++i) {
-            quadColors = GrProcessorAnalysisColor::Combine(quadColors, fQuads[i].color());
+        for (int i = 1; i < this->quadCount(); ++i) {
+            quadColors = GrProcessorAnalysisColor::Combine(quadColors,
+                                                           fDeviceQuads.metadata(i).fColor);
             if (quadColors.isUnknown()) {
                 // No point in accumulating additional starting colors, combining cannot make it
                 // less unknown.
@@ -185,8 +165,8 @@
         // to the same color (even if they started out with different colors).
         SkPMColor4f colorOverride;
         if (quadColors.isConstant(&colorOverride)) {
-            for (int i = 0; i < fQuads.count(); ++i) {
-                fQuads[i].setColor(colorOverride);
+            for (int i = 0; i < this->quadCount(); ++i) {
+                fDeviceQuads.metadata(i).fColor = colorOverride;
             }
         }
 
@@ -203,21 +183,20 @@
 
 private:
     // For GrFillRectOp::MakeSet's use of addQuad
-    // FIXME(reviewer): better to just make addQuad public?
     friend std::unique_ptr<GrDrawOp> GrFillRectOp::MakeSet(GrContext* context, GrPaint&& paint,
             GrAAType aaType, const SkMatrix& viewMatrix,
             const GrRenderTargetContext::QuadSetEntry quads[], int quadCount,
             const GrUserStencilSettings* stencilSettings);
 
-   void onPrepareDraws(Target* target) override {
+    void onPrepareDraws(Target* target) override {
         TRACE_EVENT0("skia", TRACE_FUNC);
 
         using Domain = GrQuadPerEdgeAA::Domain;
         static constexpr SkRect kEmptyDomain = SkRect::MakeEmpty();
 
-        VertexSpec vertexSpec(this->deviceQuadType(),
+        VertexSpec vertexSpec(fDeviceQuads.quadType(),
                               fWideColor ? ColorType::kHalf : ColorType::kByte,
-                              this->localQuadType(), fHelper.usesLocalCoords(), Domain::kNo,
+                              fLocalQuads.quadType(), fHelper.usesLocalCoords(), Domain::kNo,
                               fHelper.aaType(), fHelper.compatibleWithAlphaAsCoverage());
 
         sk_sp<GrGeometryProcessor> gp = GrQuadPerEdgeAA::MakeProcessor(vertexSpec);
@@ -228,7 +207,7 @@
 
         // Fill the allocated vertex data
         void* vdata = target->makeVertexSpace(
-                vertexSize, fQuads.count() * vertexSpec.verticesPerQuad(),
+                vertexSize, this->quadCount() * vertexSpec.verticesPerQuad(),
                 &vbuffer, &vertexOffsetInBuffer);
         if (!vdata) {
             SkDebugf("Could not allocate vertices\n");
@@ -237,15 +216,18 @@
 
         // vertices pointer advances through vdata based on Tessellate's return value
         void* vertices = vdata;
-        for (int i = 0; i < fQuads.count(); ++i) {
-            const auto& q = fQuads[i];
-            vertices = GrQuadPerEdgeAA::Tessellate(vertices, vertexSpec, q.deviceQuad(), q.color(),
-                                                   q.localQuad(), kEmptyDomain, q.aaFlags());
+        for (int i = 0; i < this->quadCount(); ++i) {
+            const GrPerspQuad& device = fDeviceQuads[i];
+            const ColorAndAA& info = fDeviceQuads.metadata(i);
+            const GrPerspQuad& local = fLocalQuads[i];
+
+            vertices = GrQuadPerEdgeAA::Tessellate(vertices, vertexSpec, device, info.fColor, local,
+                                                   kEmptyDomain, info.fAAFlags);
         }
 
         // Configure the mesh for the vertex data
         GrMesh* mesh = target->allocMeshes(1);
-        if (!GrQuadPerEdgeAA::ConfigureMeshIndices(target, mesh, vertexSpec, fQuads.count())) {
+        if (!GrQuadPerEdgeAA::ConfigureMeshIndices(target, mesh, vertexSpec, this->quadCount())) {
             SkDebugf("Could not allocate indices\n");
             return;
         }
@@ -261,13 +243,13 @@
 
         if ((fHelper.aaType() == GrAAType::kCoverage ||
              that->fHelper.aaType() == GrAAType::kCoverage) &&
-            fQuads.count() + that->fQuads.count() > GrQuadPerEdgeAA::kNumAAQuadsInIndexBuffer) {
+            this->quadCount() + that->quadCount() > GrQuadPerEdgeAA::kNumAAQuadsInIndexBuffer) {
             // This limit on batch size seems to help on Adreno devices
             return CombineResult::kCannotCombine;
         }
 
-        // Unlike most users of the draw op helper, this op can merge none-aa and coverage-aa
-        // draw ops together, so pass true as the last argument.
+        // Unlike most users of the draw op helper, this op can merge none-aa and coverage-aa draw
+        // ops together, so pass true as the last argument.
         if (!fHelper.isCompatible(that->fHelper, caps, this->bounds(), that->bounds(), true)) {
             return CombineResult::kCannotCombine;
         }
@@ -275,13 +257,6 @@
         // If the processor sets are compatible, the two ops are always compatible; it just needs
         // to adjust the state of the op to be the more general quad and aa types of the two ops.
 
-        // The GrQuadType enum is ordered such that higher values are more general quad types
-        if (that->fDeviceQuadType > fDeviceQuadType) {
-            fDeviceQuadType = that->fDeviceQuadType;
-        }
-        if (that->fLocalQuadType > fLocalQuadType) {
-            fLocalQuadType = that->fLocalQuadType;
-        }
         fClearCompatible &= that->fClearCompatible;
         fWideColor |= that->fWideColor;
 
@@ -292,7 +267,8 @@
             fHelper.setAAType(GrAAType::kCoverage);
         }
 
-        fQuads.push_back_n(that->fQuads.count(), that->fQuads.begin());
+        fDeviceQuads.concat(that->fDeviceQuads);
+        fLocalQuads.concat(that->fLocalQuads);
         return CombineResult::kMerged;
     }
 
@@ -300,8 +276,10 @@
     // But since it's avoiding the op list management, it must update the op's bounds. This is only
     // used with quad sets, which uses the same view matrix for each quad so this assumes that the
     // device quad type of the new quad is the same as the op's.
-    void addQuad(TransformedQuad&& quad, GrQuadType localQuadType, GrAAType aaType) {
-        SkASSERT(quad.deviceQuad().quadType() <= this->deviceQuadType());
+    void addQuad(const GrPerspQuad& deviceQuad, const GrPerspQuad& localQuad,
+                 GrQuadType localQuadType, const SkPMColor4f& color, GrQuadAAFlags edgeAA,
+                 GrAAType aaType) {
+        SkASSERT(deviceQuad.quadType() <= fDeviceQuads.quadType());
 
         // The new quad's aa type should be the same as the first quad's or none, except when the
         // first quad's aa type was already downgraded to none, in which case the stored type must
@@ -316,33 +294,35 @@
             // reset the op's accumulated aa type.
         }
 
-        // The new quad's local coordinates could differ
-        if (localQuadType > this->localQuadType()) {
-            fLocalQuadType = static_cast<unsigned>(localQuadType);
-        }
-
         // clear compatible won't need to be updated, since device quad type and paint is the same,
         // but this quad has a new color, so maybe update wide color
-        fWideColor |= !SkPMColor4fFitsInBytes(quad.color());
+        fWideColor |= !SkPMColor4fFitsInBytes(color);
 
         // Update the bounds and add the quad to this op's storage
         SkRect newBounds = this->bounds();
-        newBounds.joinPossiblyEmptyRect(quad.deviceQuad().bounds());
+        newBounds.joinPossiblyEmptyRect(deviceQuad.bounds());
         this->setBounds(newBounds, HasAABloat(fHelper.aaType() == GrAAType::kCoverage),
                         IsZeroArea::kNo);
-        fQuads.push_back(std::move(quad));
+        fDeviceQuads.push_back(deviceQuad, fDeviceQuads.quadType(), { color, edgeAA });
+        fLocalQuads.push_back(localQuad, localQuadType);
     }
 
-    GrQuadType deviceQuadType() const { return static_cast<GrQuadType>(fDeviceQuadType); }
-    GrQuadType localQuadType() const { return static_cast<GrQuadType>(fLocalQuadType); }
+    int quadCount() const {
+        // Sanity check that the parallel arrays for quad properties all have the same size
+        SkASSERT(fDeviceQuads.count() == fLocalQuads.count());
+        return fDeviceQuads.count();
+    }
+
+    struct ColorAndAA {
+        SkPMColor4f fColor;
+        GrQuadAAFlags fAAFlags;
+    };
 
     Helper fHelper;
-    SkSTArray<1, TransformedQuad, true> fQuads;
+    GrTQuadList<ColorAndAA> fDeviceQuads;
+    // No metadata attached to the local quads
+    GrQuadList fLocalQuads;
 
-    // While we always store full GrPerspQuads in memory, if the type is known to be simpler we can
-    // optimize our geometry generation.
-    unsigned fDeviceQuadType: 2;
-    unsigned fLocalQuadType: 2;
     unsigned fWideColor: 1;
 
     // True if fQuad produced by a rectangle-preserving view matrix, is pixel aligned or non-AA,
@@ -422,9 +402,9 @@
         GrResolveAATypeForQuad(aaType, quads[i].fAAFlags, deviceQuad, deviceQuadType,
                                &resolvedAA, &resolvedEdgeFlags);
 
-        fillRects->addQuad({ deviceQuad, GrPerspQuad(quads[i].fRect, quads[i].fLocalMatrix),
-                             quads[i].fColor, resolvedEdgeFlags },
-                           GrQuadTypeForTransformedRect(quads[i].fLocalMatrix), resolvedAA);
+        fillRects->addQuad(deviceQuad, GrPerspQuad(quads[i].fRect, quads[i].fLocalMatrix),
+                           GrQuadTypeForTransformedRect(quads[i].fLocalMatrix), quads[i].fColor,
+                           resolvedEdgeFlags,resolvedAA);
     }
 
     return op;
diff --git a/src/gpu/ops/GrTextureOp.cpp b/src/gpu/ops/GrTextureOp.cpp
index 7b2acc6..a494e1b 100644
--- a/src/gpu/ops/GrTextureOp.cpp
+++ b/src/gpu/ops/GrTextureOp.cpp
@@ -169,15 +169,16 @@
             str.appendf("Proxy ID: %d, Filter: %d\n", fProxies[p].fProxy->uniqueID().asUInt(),
                         static_cast<int>(fFilter));
             for (int i = 0; i < fProxies[p].fQuadCnt; ++i, ++q) {
-                const Quad& quad = fQuads[q];
+                GrPerspQuad quad = fQuads[q];
+                const ColorDomainAndAA& info = fQuads.metadata(i);
                 str.appendf(
                         "%d: Color: 0x%08x, TexRect [L: %.2f, T: %.2f, R: %.2f, B: %.2f] "
                         "Quad [(%.2f, %.2f), (%.2f, %.2f), (%.2f, %.2f), (%.2f, %.2f)]\n",
-                        i, quad.color().toBytes_RGBA(), quad.srcRect().fLeft, quad.srcRect().fTop,
-                        quad.srcRect().fRight, quad.srcRect().fBottom, quad.quad().point(0).fX,
-                        quad.quad().point(0).fY, quad.quad().point(1).fX, quad.quad().point(1).fY,
-                        quad.quad().point(2).fX, quad.quad().point(2).fY, quad.quad().point(3).fX,
-                        quad.quad().point(3).fY);
+                        i, info.fColor.toBytes_RGBA(), info.fSrcRect.fLeft, info.fSrcRect.fTop,
+                        info.fSrcRect.fRight, info.fSrcRect.fBottom, quad.point(0).fX,
+                        quad.point(0).fY, quad.point(1).fX, quad.point(1).fY,
+                        quad.point(2).fX, quad.point(2).fY, quad.point(3).fX,
+                        quad.point(3).fY);
             }
         }
         str += INHERITED::dumpInfo();
@@ -221,7 +222,6 @@
         GrResolveAATypeForQuad(aaType, aaFlags, quad, quadType, &aaType, &aaFlags);
         fAAType = static_cast<unsigned>(aaType);
 
-        fQuadType = static_cast<unsigned>(quadType);
         // We expect our caller to have already caught this optimization.
         SkASSERT(!srcRect.contains(proxy->getWorstCaseBoundsRect()) ||
                  constraint == SkCanvas::kFast_SrcRectConstraint);
@@ -241,12 +241,15 @@
             aaType != GrAAType::kCoverage) {
             constraint = SkCanvas::kFast_SrcRectConstraint;
         }
-        const auto& draw = fQuads.emplace_back(srcRect, quad, aaFlags, constraint, color);
+
+        Domain domain = constraint == SkCanvas::kStrict_SrcRectConstraint ? Domain::kYes
+                                                                          : Domain::kNo;
+        fQuads.push_back(quad, quadType, {color, srcRect, domain, aaFlags});
         fProxyCnt = 1;
         fProxies[0] = {proxy.release(), 1};
         auto bounds = quad.bounds();
         this->setBounds(bounds, HasAABloat(aaType == GrAAType::kCoverage), IsZeroArea::kNo);
-        fDomain = static_cast<unsigned>(draw.domain());
+        fDomain = static_cast<unsigned>(domain);
         fWideColor = !SkPMColor4fFitsInBytes(color);
         fCanSkipAllocatorGather =
                 static_cast<unsigned>(fProxies[0].fProxy->canSkipResourceAllocator());
@@ -258,7 +261,6 @@
             , fTextureColorSpaceXform(std::move(textureColorSpaceXform))
             , fFilter(static_cast<unsigned>(filter))
             , fFinalized(0) {
-        fQuads.reserve(cnt);
         fProxyCnt = SkToUInt(cnt);
         SkRect bounds = SkRectPriv::MakeLargestInverted();
         GrAAType overallAAType = GrAAType::kNone; // aa type maximally compatible with all dst rects
@@ -266,6 +268,8 @@
         fCanSkipAllocatorGather = static_cast<unsigned>(true);
         // All dst rects are transformed by the same view matrix, so their quad types are identical
         GrQuadType quadType = GrQuadTypeForTransformedRect(viewMatrix);
+        fQuads.reserve(cnt, quadType);
+
         for (unsigned p = 0; p < fProxyCnt; ++p) {
             fProxies[p].fProxy = SkRef(set[p].fProxy.get());
             fProxies[p].fQuadCnt = 1;
@@ -291,15 +295,13 @@
             }
             float alpha = SkTPin(set[p].fAlpha, 0.f, 1.f);
             SkPMColor4f color{alpha, alpha, alpha, alpha};
-            fQuads.emplace_back(set[p].fSrcRect, quad, aaFlags, SkCanvas::kFast_SrcRectConstraint,
-                                color);
+            fQuads.push_back(quad, quadType, {color, set[p].fSrcRect, Domain::kNo, aaFlags});
         }
         fAAType = static_cast<unsigned>(overallAAType);
         if (!mustFilter) {
             fFilter = static_cast<unsigned>(GrSamplerState::Filter::kNearest);
         }
         this->setBounds(bounds, HasAABloat(this->aaType() == GrAAType::kCoverage), IsZeroArea::kNo);
-        fQuadType = static_cast<unsigned>(quadType);
         fDomain = static_cast<unsigned>(false);
         fWideColor = static_cast<unsigned>(false);
     }
@@ -320,12 +322,14 @@
         }
 
         for (int i = start; i < start + cnt; ++i) {
-            const auto q = fQuads[i];
-            GrPerspQuad srcQuad = compute_src_quad(origin, q.srcRect(), iw, ih, h);
+            const GrPerspQuad& device = fQuads[i];
+            const ColorDomainAndAA& info = fQuads.metadata(i);
+
+            GrPerspQuad srcQuad = compute_src_quad(origin, info.fSrcRect, iw, ih, h);
             SkRect domain =
-                    compute_domain(q.domain(), this->filter(), origin, q.srcRect(), iw, ih, h);
-            v = GrQuadPerEdgeAA::Tessellate(v, spec, q.quad(), q.color(), srcQuad, domain,
-                                            q.aaFlags());
+                    compute_domain(info.domain(), this->filter(), origin, info.fSrcRect, iw, ih, h);
+            v = GrQuadPerEdgeAA::Tessellate(v, spec, device, info.fColor, srcQuad, domain,
+                                            info.aaFlags());
         }
     }
 
@@ -340,8 +344,8 @@
         auto config = fProxies[0].fProxy->config();
         GrAAType aaType = this->aaType();
         for (const auto& op : ChainRange<TextureOp>(this)) {
-            if (op.quadType() > quadType) {
-                quadType = op.quadType();
+            if (op.fQuads.quadType() > quadType) {
+                quadType = op.fQuads.quadType();
             }
             if (op.fDomain) {
                 domain = Domain::kYes;
@@ -485,10 +489,7 @@
             return CombineResult::kCannotCombine;
         }
         fProxies[0].fQuadCnt += that->fQuads.count();
-        fQuads.push_back_n(that->fQuads.count(), that->fQuads.begin());
-        if (that->fQuadType > fQuadType) {
-            fQuadType = that->fQuadType;
-        }
+        fQuads.concat(that->fQuads);
         fDomain |= that->fDomain;
         fWideColor |= that->fWideColor;
         if (upgradeToCoverageAAOnMerge) {
@@ -499,47 +500,44 @@
 
     GrAAType aaType() const { return static_cast<GrAAType>(fAAType); }
     GrSamplerState::Filter filter() const { return static_cast<GrSamplerState::Filter>(fFilter); }
-    GrQuadType quadType() const { return static_cast<GrQuadType>(fQuadType); }
 
-    class Quad {
-    public:
-        Quad(const SkRect& srcRect, const GrPerspQuad& quad, GrQuadAAFlags aaFlags,
-             SkCanvas::SrcRectConstraint constraint, const SkPMColor4f& color)
-                : fSrcRect(srcRect)
-                , fQuad(quad)
-                , fColor(color)
-                , fHasDomain(constraint == SkCanvas::kStrict_SrcRectConstraint)
+    struct ColorDomainAndAA {
+        // Special constructor to convert enums into the packed bits, which should not delete
+        // the implicit move constructor (but it does require us to declare an empty ctor for
+        // use with the GrTQuadList).
+        ColorDomainAndAA(const SkPMColor4f& color, const SkRect& srcRect,
+                         Domain hasDomain, GrQuadAAFlags aaFlags)
+                : fColor(color)
+                , fSrcRect(srcRect)
+                , fHasDomain(static_cast<unsigned>(hasDomain))
                 , fAAFlags(static_cast<unsigned>(aaFlags)) {
+            SkASSERT(fHasDomain == static_cast<unsigned>(hasDomain));
             SkASSERT(fAAFlags == static_cast<unsigned>(aaFlags));
         }
-        const GrPerspQuad& quad() const { return fQuad; }
-        const SkRect& srcRect() const { return fSrcRect; }
-        SkPMColor4f color() const { return fColor; }
-        Domain domain() const { return Domain(fHasDomain); }
-        GrQuadAAFlags aaFlags() const { return static_cast<GrQuadAAFlags>(fAAFlags); }
+        ColorDomainAndAA() = default;
 
-    private:
-        SkRect fSrcRect;
-        GrPerspQuad fQuad;
         SkPMColor4f fColor;
+        SkRect fSrcRect;
         unsigned fHasDomain : 1;
         unsigned fAAFlags : 4;
+
+        Domain domain() const { return Domain(fHasDomain); }
+        GrQuadAAFlags aaFlags() const { return static_cast<GrQuadAAFlags>(fAAFlags); }
     };
     struct Proxy {
         GrTextureProxy* fProxy;
         int fQuadCnt;
     };
-    SkSTArray<1, Quad, true> fQuads;
+    GrTQuadList<ColorDomainAndAA> fQuads;
     sk_sp<GrColorSpaceXform> fTextureColorSpaceXform;
     unsigned fFilter : 2;
     unsigned fAAType : 2;
-    unsigned fQuadType : 2; // Device quad, src quad is always trivial
     unsigned fDomain : 1;
     unsigned fWideColor : 1;
     // Used to track whether fProxy is ref'ed or has a pending IO after finalize() is called.
     unsigned fFinalized : 1;
     unsigned fCanSkipAllocatorGather : 1;
-    unsigned fProxyCnt : 32 - 10;
+    unsigned fProxyCnt : 32 - 8;
     Proxy fProxies[1];
 
     static_assert(kGrQuadTypeCount <= 4, "GrQuadType does not fit in 2 bits");
diff --git a/tests/GrQuadListTest.cpp b/tests/GrQuadListTest.cpp
new file mode 100644
index 0000000..fd88dde
--- /dev/null
+++ b/tests/GrQuadListTest.cpp
@@ -0,0 +1,282 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "Test.h"
+
+#include "GrQuad.h"
+
+#define ASSERT(cond) REPORTER_ASSERT(r, cond)
+#define ASSERTF(cond, ...) REPORTER_ASSERT(r, cond, __VA_ARGS__)
+#define TEST(name) DEF_TEST(GrQuadList##name, r)
+
+struct TestData {
+    int fItem1;
+    float fItem2;
+};
+
+// Simple factories to make placeholder quads used in the tests. The 2D quads
+// will have the kRect quad type.
+static GrQuad make_2d_quad() {
+    return GrQuad(SkRect::MakeLTRB(1.f, 2.f, 3.f, 4.f));
+}
+static bool is_2d_quad(const GrPerspQuad& quad) {
+    return quad.x(0) == 1.f && quad.x(1) == 1.f && quad.x(2) == 3.f && quad.x(3) == 3.f &&
+           quad.y(0) == 2.f && quad.y(1) == 4.f && quad.y(2) == 2.f && quad.y(3) == 4.f &&
+           quad.w(0) == 1.f && quad.w(1) == 1.f && quad.w(2) == 1.f && quad.w(3) == 1.f;
+}
+
+static GrPerspQuad make_2d_persp_quad() {
+    return GrPerspQuad(SkRect::MakeLTRB(5.f, 6.f, 7.f, 8.f), SkMatrix::I());
+}
+static bool is_2d_persp_quad(const GrPerspQuad& quad) {
+    return quad.x(0) == 5.f && quad.x(1) == 5.f && quad.x(2) == 7.f && quad.x(3) == 7.f &&
+           quad.y(0) == 6.f && quad.y(1) == 8.f && quad.y(2) == 6.f && quad.y(3) == 8.f &&
+           quad.w(0) == 1.f && quad.w(1) == 1.f && quad.w(2) == 1.f && quad.w(3) == 1.f;
+}
+
+static GrPerspQuad make_3d_persp_quad() {
+    // This perspective matrix leaves x and y unmodified, and sets w to the persp2 value
+    SkMatrix p = SkMatrix::I();
+    p[SkMatrix::kMPersp2] = 13.f;
+    SkASSERT(p.hasPerspective()); // Sanity check
+    return GrPerspQuad(SkRect::MakeLTRB(9.f, 10.f, 11.f, 12.f), p);
+}
+static bool is_3d_persp_quad(const GrPerspQuad& quad) {
+    return quad.x(0) == 9.f && quad.x(1) == 9.f && quad.x(2) == 11.f && quad.x(3) == 11.f &&
+           quad.y(0) == 10.f && quad.y(1) == 12.f && quad.y(2) == 10.f && quad.y(3) == 12.f &&
+           quad.w(0) == 13.f && quad.w(1) == 13.f && quad.w(2) == 13.f && quad.w(3) == 13.f;
+}
+
+TEST(Add2D) {
+    GrQuadList list2D;
+    // Add a plain quad, a 2D persp quad, and then a 3D persp quad, then read back and make sure
+    // the coordinates make sense (including that the type was lifted to perspective).
+    list2D.push_back(make_2d_quad(), GrQuadType::kRect);
+    list2D.push_back(make_2d_persp_quad(), GrQuadType::kRect);
+
+    // Check 2D state of the list
+    ASSERTF(list2D.count() == 2, "Unexpected count: %d", list2D.count());
+    ASSERTF(list2D.quadType() == GrQuadType::kRect, "Unexpected quad type: %d",
+            (uint32_t) list2D.quadType());
+    ASSERTF(is_2d_quad(list2D[0]), "Incorrect quad at i=0");
+    ASSERTF(is_2d_persp_quad(list2D[1]), "Incorrect quad at i=1");
+
+    // Force the 2D quads to be updated to store ws by adding a perspective quad
+    list2D.push_back(make_3d_persp_quad(), GrQuadType::kPerspective);
+    ASSERTF(list2D.quadType() == GrQuadType::kPerspective,
+            "Expected 2D list to be upgraded to perspective");
+
+    // Re-check full state of list after type upgrade
+    ASSERTF(list2D.count() == 3, "Unexpected count: %d", list2D.count());
+    ASSERTF(is_2d_quad(list2D[0]), "Incorrect quad at i=0 after upgrade");
+    ASSERTF(is_2d_persp_quad(list2D[1]), "Incorrect quad at i=1 after upgrade");
+    ASSERTF(is_3d_persp_quad(list2D[2]), "Incorrect quad at i=2");
+}
+
+TEST(Add3D) {
+    // Now make a list that starts with a 3D persp quad, then has conventional quads added to it
+    // and make sure its state is correct
+    GrQuadList list3D;
+    list3D.push_back(make_3d_persp_quad(), GrQuadType::kPerspective);
+    list3D.push_back(make_2d_persp_quad(), GrQuadType::kRect);
+    list3D.push_back(make_2d_quad(), GrQuadType::kRect);
+
+    ASSERTF(list3D.count() == 3, "Unexpected count: %d", list3D.count());
+    ASSERTF(is_3d_persp_quad(list3D[0]), "Incorrect quad at i=0");
+    ASSERTF(is_2d_persp_quad(list3D[1]), "Incorrect quad at i=1");
+    ASSERTF(is_2d_quad(list3D[2]), "Incorrect quad at i=2");
+}
+
+TEST(AddWithMetadata2D) {
+    // As above, but also make sure that the metadata is saved and read properly
+    GrTQuadList<TestData> list2D;
+    // Add a plain quad, a 2D persp quad, and then a 3D persp quad, then read back and make sure
+    // the coordinates make sense (including that the type was lifted to perspective).
+    list2D.push_back(make_2d_quad(), GrQuadType::kRect, {1, 1.f});
+    list2D.push_back(make_2d_persp_quad(), GrQuadType::kRect, {2, 2.f});
+
+    // Check 2D state of the list
+    ASSERTF(list2D.count() == 2, "Unexpected count: %d", list2D.count());
+    ASSERTF(list2D.quadType() == GrQuadType::kRect, "Unexpected quad type: %d",
+            (uint32_t) list2D.quadType());
+    ASSERTF(is_2d_quad(list2D[0]), "Incorrect quad at i=0");
+    ASSERTF(list2D.metadata(0).fItem1 == 1 && list2D.metadata(0).fItem2 == 1.f,
+            "Incorrect metadata at i=0");
+    ASSERTF(is_2d_persp_quad(list2D[1]), "Incorrect quad at i=1");
+    ASSERTF(list2D.metadata(1).fItem1 == 2 && list2D.metadata(1).fItem2 == 2.f,
+            "Incorrect metadata at i=1");
+
+    // Force the 2D quads to be updated to store ws by adding a perspective quad
+    list2D.push_back(make_3d_persp_quad(), GrQuadType::kPerspective, {3, 3.f});
+    ASSERTF(list2D.quadType() == GrQuadType::kPerspective,
+            "Expected 2D list to be upgraded to perspective");
+
+    // Re-check full state of list after type upgrade
+    ASSERTF(list2D.count() == 3, "Unexpected count: %d", list2D.count());
+    ASSERTF(is_2d_quad(list2D[0]), "Incorrect quad at i=0 after upgrade");
+    ASSERTF(list2D.metadata(0).fItem1 == 1 && list2D.metadata(0).fItem2 == 1.f,
+            "Incorrect metadata at i=0");
+    ASSERTF(is_2d_persp_quad(list2D[1]), "Incorrect quad at i=1 after upgrade");
+    ASSERTF(list2D.metadata(1).fItem1 == 2 && list2D.metadata(1).fItem2 == 2.f,
+            "Incorrect metadata at i=1");
+    ASSERTF(is_3d_persp_quad(list2D[2]), "Incorrect quad at i=2");
+    ASSERTF(list2D.metadata(2).fItem1 == 3 && list2D.metadata(2).fItem2 == 3.f,
+            "Incorrect metadata at i=2");
+}
+
+TEST(AddWithMetadata3D) {
+    // Now make a list that starts with a 3D persp quad, then has conventional quads added to it
+    // and make sure its state is correct
+    GrTQuadList<TestData> list3D;
+    list3D.push_back(make_3d_persp_quad(), GrQuadType::kPerspective, {3, 3.f});
+    list3D.push_back(make_2d_persp_quad(), GrQuadType::kRect, {2, 2.f});
+    list3D.push_back(make_2d_quad(), GrQuadType::kRect, {1, 1.f});
+
+    ASSERTF(list3D.count() == 3, "Unexpected count: %d", list3D.count());
+    ASSERTF(is_3d_persp_quad(list3D[0]), "Incorrect quad at i=0");
+    ASSERTF(list3D.metadata(0).fItem1 == 3 && list3D.metadata(0).fItem2 == 3.f,
+            "Incorrect metadata at i=0");
+    ASSERTF(is_2d_persp_quad(list3D[1]), "Incorrect quad at i=1");
+    ASSERTF(list3D.metadata(1).fItem1 == 2 && list3D.metadata(1).fItem2 == 2.f,
+            "Incorrect metadata at i=1");
+    ASSERTF(is_2d_quad(list3D[2]), "Incorrect quad at i=2");
+    ASSERTF(list3D.metadata(2).fItem1 == 1 && list3D.metadata(2).fItem2 == 1.f,
+            "Incorrect metadata at i=2");
+}
+
+TEST(Concat2DWith2D) {
+    GrQuadList a2D;
+    a2D.push_back(make_2d_quad(), GrQuadType::kRect);
+    GrQuadList b2D;
+    b2D.push_back(make_2d_persp_quad(), GrQuadType::kRect);
+
+    a2D.concat(b2D);
+
+    ASSERTF(a2D.count() == 2, "Unexpected count: %d", a2D.count());
+    ASSERTF(is_2d_quad(a2D[0]), "Incorrect quad at i=0");
+    ASSERTF(is_2d_persp_quad(a2D[1]), "Incorrect quad at i=1");
+}
+
+TEST(Concat2DWith3D) {
+    GrQuadList a2D;
+    a2D.push_back(make_2d_quad(), GrQuadType::kRect);
+    GrQuadList b3D;
+    b3D.push_back(make_3d_persp_quad(), GrQuadType::kPerspective);
+
+    a2D.concat(b3D);
+
+    ASSERTF(a2D.count() == 2, "Unexpected count: %d", a2D.count());
+    ASSERTF(is_2d_quad(a2D[0]), "Incorrect quad at i=0");
+    ASSERTF(is_3d_persp_quad(a2D[1]), "Incorrect quad at i=1");
+}
+
+TEST(Concat3DWith2D) {
+    GrQuadList a3D;
+    a3D.push_back(make_3d_persp_quad(), GrQuadType::kPerspective);
+    GrQuadList b2D;
+    b2D.push_back(make_2d_quad(), GrQuadType::kRect);
+
+    a3D.concat(b2D);
+
+    ASSERTF(a3D.count() == 2, "Unexpected count: %d", a3D.count());
+    ASSERTF(is_3d_persp_quad(a3D[0]), "Incorrect quad at i=0");
+    ASSERTF(is_2d_quad(a3D[1]), "Incorrect quad at i=1");
+}
+
+TEST(Concat3DWith3D) {
+    GrQuadList a3D;
+    a3D.push_back(make_3d_persp_quad(), GrQuadType::kPerspective);
+    GrQuadList b3D;
+    b3D.push_back(make_3d_persp_quad(), GrQuadType::kPerspective);
+
+    a3D.concat(b3D);
+
+    ASSERTF(a3D.count() == 2, "Unexpected count: %d", a3D.count());
+    ASSERTF(is_3d_persp_quad(a3D[0]), "Incorrect quad at i=0");
+    ASSERTF(is_3d_persp_quad(a3D[1]), "Incorrect quad at i=1");
+}
+
+TEST(Concat2DWith2DMetadata) {
+    GrTQuadList<TestData> a2D;
+    a2D.push_back(make_2d_quad(), GrQuadType::kRect, {1, 1.f});
+    GrTQuadList<TestData> b2D;
+    b2D.push_back(make_2d_persp_quad(), GrQuadType::kRect, {2, 2.f});
+
+    a2D.concat(b2D);
+
+    ASSERTF(a2D.count() == 2, "Unexpected count: %d", a2D.count());
+    ASSERTF(is_2d_quad(a2D[0]), "Incorrect quad at i=0");
+    ASSERTF(a2D.metadata(0).fItem1 == 1 && a2D.metadata(0).fItem2 == 1.f,
+            "Incorrect metadata at i=0");
+    ASSERTF(is_2d_persp_quad(a2D[1]), "Incorrect quad at i=1");
+    ASSERTF(a2D.metadata(1).fItem1 == 2 && a2D.metadata(1).fItem2 == 2.f,
+            "Incorrect metadata at i=1");
+}
+
+TEST(Concat2DWith3DMetadata) {
+    GrTQuadList<TestData> a2D;
+    a2D.push_back(make_2d_quad(), GrQuadType::kRect, {1, 1.f});
+    GrTQuadList<TestData> b3D;
+    b3D.push_back(make_3d_persp_quad(), GrQuadType::kPerspective, {2, 2.f});
+
+    a2D.concat(b3D);
+
+    ASSERTF(a2D.count() == 2, "Unexpected count: %d", a2D.count());
+    ASSERTF(is_2d_quad(a2D[0]), "Incorrect quad at i=0");
+    ASSERTF(a2D.metadata(0).fItem1 == 1 && a2D.metadata(0).fItem2 == 1.f,
+            "Incorrect metadata at i=0");
+    ASSERTF(is_3d_persp_quad(a2D[1]), "Incorrect quad at i=1");
+    ASSERTF(a2D.metadata(1).fItem1 == 2 && a2D.metadata(1).fItem2 == 2.f,
+            "Incorrect metadata at i=1");
+}
+
+TEST(Concat3DWith2DMetadata) {
+    GrTQuadList<TestData> a3D;
+    a3D.push_back(make_3d_persp_quad(), GrQuadType::kPerspective, {1, 1.f});
+    GrTQuadList<TestData> b2D;
+    b2D.push_back(make_2d_quad(), GrQuadType::kRect, {2, 2.f});
+
+    a3D.concat(b2D);
+
+    ASSERTF(a3D.count() == 2, "Unexpected count: %d", a3D.count());
+    ASSERTF(is_3d_persp_quad(a3D[0]), "Incorrect quad at i=0");
+    ASSERTF(a3D.metadata(0).fItem1 == 1 && a3D.metadata(0).fItem2 == 1.f,
+            "Incorrect metadata at i=0");
+    ASSERTF(is_2d_quad(a3D[1]), "Incorrect quad at i=1");
+    ASSERTF(a3D.metadata(1).fItem1 == 2 && a3D.metadata(1).fItem2 == 2.f,
+            "Incorrect metadata at i=1");
+}
+
+TEST(Concat3DWith3DMetadata) {
+    GrTQuadList<TestData> a3D;
+    a3D.push_back(make_3d_persp_quad(), GrQuadType::kPerspective, {1, 1.f});
+    GrTQuadList<TestData> b3D;
+    b3D.push_back(make_3d_persp_quad(), GrQuadType::kPerspective, {2, 2.f});
+
+    a3D.concat(b3D);
+
+    ASSERTF(a3D.count() == 2, "Unexpected count: %d", a3D.count());
+    ASSERTF(is_3d_persp_quad(a3D[0]), "Incorrect quad at i=0");
+    ASSERTF(a3D.metadata(0).fItem1 == 1 && a3D.metadata(0).fItem2 == 1.f,
+            "Incorrect metadata at i=0");
+    ASSERTF(is_3d_persp_quad(a3D[1]), "Incorrect quad at i=1");
+    ASSERTF(a3D.metadata(1).fItem1 == 2 && a3D.metadata(1).fItem2 == 2.f,
+            "Incorrect metadata at i=1");
+}
+
+TEST(WriteMetadata) {
+    GrTQuadList<TestData> list;
+    list.push_back(make_2d_quad(), GrQuadType::kRect, {1, 1.f});
+    ASSERTF(list.metadata(0).fItem1 == 1 && list.metadata(0).fItem2 == 1.f,
+            "Incorrect metadata at i=0"); // Sanity check
+
+    // Rewrite metadata within the list and read back
+    list.metadata(0).fItem1 = 2;
+    list.metadata(0).fItem2 = 2.f;
+    ASSERTF(list.metadata(0).fItem1 == 2 && list.metadata(0).fItem2 == 2.f,
+            "Incorrect metadata at i=0 after edit");
+}