[graphite] Add RenderStep to GraphicsPipelineDesc, provides vertex MSL

Bug: skia:12466
Change-Id: I84d017eda7964a19ddd36570cd46e8d98ba2f50f
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/474936
Reviewed-by: Jim Van Verth <jvanverth@google.com>
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
diff --git a/experimental/graphite/src/GraphicsPipelineDesc.h b/experimental/graphite/src/GraphicsPipelineDesc.h
index 6bd3b23..b2ca5cc 100644
--- a/experimental/graphite/src/GraphicsPipelineDesc.h
+++ b/experimental/graphite/src/GraphicsPipelineDesc.h
@@ -16,6 +16,8 @@
 
 namespace skgpu {
 
+class RenderStep;
+
 /**
  * GraphicsPipelineDesc represents the state needed to create a backend specific GraphicsPipeline,
  * minus the target-specific properties that can be inferred from the DrawPass and RenderPassTask.
@@ -24,74 +26,6 @@
 public:
     GraphicsPipelineDesc();
 
-    // TODO: Iter and AttributeSet go away when the RenderStep is part of the GraphicsPipelineDesc.
-    class Iter {
-    public:
-        Iter() : fCurr(nullptr), fRemaining(0) {}
-        Iter(const Iter& iter) : fCurr(iter.fCurr), fRemaining(iter.fRemaining) {}
-        Iter& operator= (const Iter& iter) {
-            fCurr = iter.fCurr;
-            fRemaining = iter.fRemaining;
-            return *this;
-        }
-        Iter(const Attribute* attrs, int count) : fCurr(attrs), fRemaining(count) {
-            this->skipUninitialized();
-        }
-
-        bool operator!=(const Iter& that) const { return fCurr != that.fCurr; }
-        const Attribute& operator*() const { return *fCurr; }
-        void operator++() {
-            if (fRemaining) {
-                fRemaining--;
-                fCurr++;
-                this->skipUninitialized();
-            }
-        }
-
-    private:
-        void skipUninitialized() {
-            if (!fRemaining) {
-                fCurr = nullptr;
-            } else {
-                while (!fCurr->isInitialized()) {
-                    ++fCurr;
-                }
-            }
-        }
-
-        const Attribute* fCurr;
-        int fRemaining;
-    };
-
-    class AttributeSet {
-    public:
-        Iter begin() const { return Iter(fAttributes, fCount); }
-        Iter end() const { return Iter(); }
-
-        int count() const { return fCount; }
-        size_t stride() const { return fStride; }
-
-    private:
-        friend class GraphicsPipelineDesc;
-        void init(const Attribute* attrs, int count) {
-            fAttributes = attrs;
-            fRawCount = count;
-            fCount = 0;
-            fStride = 0;
-            for (int i = 0; i < count; ++i) {
-                if (attrs[i].isInitialized()) {
-                    fCount++;
-                    fStride += attrs[i].sizeAlign4();
-                }
-            }
-        }
-
-        const Attribute* fAttributes = nullptr;
-        int              fRawCount = 0;
-        int              fCount = 0;
-        size_t           fStride = 0;
-    };
-
     // Returns this as a uint32_t array to be used as a key in the pipeline cache.
     // TODO: Do we want to do anything here with a tuple or an SkSpan?
     const uint32_t* asKey() const {
@@ -111,54 +45,39 @@
         return !(*this == other);
     }
 
-    // TODO: remove this once we have something real working
-    void setTestingOnlyShaderIndex(int index) {
-        fTestingOnlyShaderIndex = index;
-        if (fKey.count() >= 1) {
-            fKey[0] = index;
-        } else {
-            fKey.push_back(index);
+    const RenderStep* renderStep() const { return fRenderStep; }
+    void setRenderStep(const RenderStep* step) {
+        SkASSERT(step);
+        fRenderStep = step;
+
+        static constexpr int kWords = sizeof(uintptr_t) / sizeof(uint32_t);
+        static_assert(sizeof(uintptr_t) % sizeof(uint32_t) == 0);
+
+        if (fKey.count() < kWords) {
+            fKey.push_back_n(kWords - fKey.count());
         }
-    }
-    int testingOnlyShaderIndex() const {
-        return fTestingOnlyShaderIndex;
-    }
 
-    void setVertexAttributes(const Attribute* attrs, int attrCount) {
-        fVertexAttributes.init(attrs, attrCount);
+        uintptr_t addr = reinterpret_cast<uintptr_t>(fRenderStep);
+        memcpy(fKey.data(), &addr, sizeof(uintptr_t));
     }
-    void setInstanceAttributes(const Attribute* attrs, int attrCount) {
-        SkASSERT(attrCount >= 0);
-        fInstanceAttributes.init(attrs, attrCount);
-    }
-
-    int numVertexAttributes() const { return fVertexAttributes.fCount; }
-    const AttributeSet& vertexAttributes() const { return fVertexAttributes; }
-    int numInstanceAttributes() const { return fInstanceAttributes.fCount; }
-    const AttributeSet& instanceAttributes() const { return fInstanceAttributes; }
-
-    bool hasVertexAttributes() const { return SkToBool(fVertexAttributes.fCount); }
-    bool hasInstanceAttributes() const { return SkToBool(fInstanceAttributes.fCount); }
-
-    /**
-     * A common practice is to populate the the vertex/instance's memory using an implicit array of
-     * structs. In this case, it is best to assert that:
-     *     stride == sizeof(struct)
-     */
-    size_t vertexStride() const { return fVertexAttributes.fStride; }
-    size_t instanceStride() const { return fInstanceAttributes.fStride; }
 
 private:
     // Estimate of max expected key size
     // TODO: flesh this out
     inline static constexpr int kPreAllocSize = 1;
 
+    // TODO: I wonder if we could expose the "key" as just a char[] union over the renderstep and
+    // paint combination? That would avoid extra size, but definitely locks GraphicsPipelineDesc
+    // keys to the current process, which is probably okay since we can have something a with a more
+    // stable hash used for the pre-compilation combos.
     SkSTArray<kPreAllocSize, uint32_t, true> fKey;
 
-    int fTestingOnlyShaderIndex;
-
-    AttributeSet fVertexAttributes;
-    AttributeSet fInstanceAttributes;
+    // Each RenderStep defines a fixed set of attributes and rasterization state, as well as the
+    // shader fragments that control the geometry and coverage calculations. The RenderStep's shader
+    // is combined with the rest of the shader generated from the PaintParams. Because each
+    // RenderStep is fixed, its pointer can be used as a proxy for everything that it specifies in
+    // the GraphicsPipeline.
+    const RenderStep* fRenderStep = nullptr;
 };
 
 } // namespace skgpu
diff --git a/experimental/graphite/src/Renderer.h b/experimental/graphite/src/Renderer.h
index 52cfc09..fc27c9a 100644
--- a/experimental/graphite/src/Renderer.h
+++ b/experimental/graphite/src/Renderer.h
@@ -52,6 +52,23 @@
 
     virtual const char* name()      const = 0;
 
+    // TODO: This is only temporary. Eventually the RenderStep will define its logic in SkSL and
+    // be able to have code operate in both the vertex and fragment shaders. Ideally the RenderStep
+    // will provide two functions that fit some ABI for integrating with the common and paint SkSL,
+    // although we could go as far as allowing RenderStep to handle composing the final SkSL if
+    // given the paint combination's SkSL.
+
+    // Returns the body of a vertex function, which must include writing to a float4 "out.position".
+    // It has access to a "vtx" variable equivalent to the struct defined by
+    // vertexAttributes() and instanceAttributes() joined together, and a "uniforms" instance
+    // equivalent to the struct defined by uniforms(). If these structs would be empty, the
+    // variables are unavailable. Additionally "vertexID" and "instanceID" are always available.
+    //
+    // NOTE: The above contract is mainly so that the entire MSL program can be created by just str
+    // concatenating struct definitions generated from the RenderStep and paint Combination, some
+    // hardcoded MSL prefixes and suffices, and then including the function bodies returned here.
+    virtual const char* vertexMSL() const = 0;
+
     bool          requiresStencil() const { return fFlags & Flags::kRequiresStencil; }
     bool          requiresMSAA()    const { return fFlags & Flags::kRequiresMSAA;    }
     bool          performsShading() const { return fFlags & Flags::kPerformsShading; }
@@ -72,8 +89,6 @@
 
     // TODO: Actual API to do things
     // 1. Provide stencil settings
-    // 2. Provide shader key or MSL(?) for the vertex stage
-    // 4. Write uniform data given a Shape/Transform/Stroke info
     // 6. Some Renderers benefit from being able to share vertices between RenderSteps. Must find a
     //    way to support that. It may mean that RenderSteps get state per draw.
     //    - Does Renderer make RenderStepFactories that create steps for each DrawList::Draw?
diff --git a/experimental/graphite/src/mtl/MtlGraphicsPipeline.mm b/experimental/graphite/src/mtl/MtlGraphicsPipeline.mm
index 7ba662e..e9a7ffc 100644
--- a/experimental/graphite/src/mtl/MtlGraphicsPipeline.mm
+++ b/experimental/graphite/src/mtl/MtlGraphicsPipeline.mm
@@ -8,134 +8,163 @@
 #include "experimental/graphite/src/mtl/MtlGraphicsPipeline.h"
 
 #include "experimental/graphite/src/GraphicsPipelineDesc.h"
+#include "experimental/graphite/src/Renderer.h"
 #include "experimental/graphite/src/mtl/MtlGpu.h"
 #include "experimental/graphite/src/mtl/MtlUtils.h"
+#include "include/core/SkSpan.h"
 #include "include/private/SkSLString.h"
 
 namespace skgpu::mtl {
 
-static const char* kTestingOnlyShaders[] = {
-    // clear viewport to blue
-    "#include <metal_stdlib>\n"
-    "#include <simd/simd.h>\n"
-    "using namespace metal;\n"
-    "\n"
-    "typedef struct {\n"
-    "    float4 position [[position]];\n"
-    "} VertexOutput;\n"
-    "\n"
-    "vertex VertexOutput vertexMain(uint vertexID [[vertex_id]]) {\n"
-    "    VertexOutput out;\n"
-    "    float2 position = float2(float(vertexID >> 1), float(vertexID & 1));\n"
-    "    out.position.xy = position * 2 - 1;\n"
-    "    out.position.zw = float2(0.0, 1.0);\n"
-    "    return out;\n"
-    "}\n"
-    "\n"
-    "fragment float4 fragmentMain(VertexOutput in [[stage_in]]) {\n"
-    "    return float4(0.0, 0.0, 1.0, 1.0);\n"
-    "}",
+namespace {
 
-    // clear subarea to given color, using uniform buffer
-    "#include <metal_stdlib>\n"
-    "#include <simd/simd.h>\n"
-    "using namespace metal;\n"
-    "\n"
-    "typedef struct {\n"
-    "    float4 position [[position]];\n"
-    "} VertexOutput;\n"
-    "\n"
-    "typedef struct {\n"
-    "    float4 uPosXform;\n"
-    "    float4 uColor;\n"
-    "} UniformData;\n"
-    "\n"
-    "vertex VertexOutput vertexMain(constant UniformData& uniforms [[buffer(0)]],\n"
-    "                               uint vertexID [[vertex_id]]) {\n"
-    "    VertexOutput out;\n"
-    "    float2 position = float2(float(vertexID >> 1), float(vertexID & 1));\n"
-    "    out.position.xy = position * uniforms.uPosXform.xy + uniforms.uPosXform.zw;\n"
-    "    out.position.zw = float2(0.0, 1.0);\n"
-    "    return out;\n"
-    "}\n"
-    "\n"
-    "fragment float4 fragmentMain(constant UniformData& uniforms [[buffer(0)]],\n"
-    "                             VertexOutput in [[stage_in]]) {\n"
-    "    return uniforms.uColor;\n"
-    "}",
+SkSL::String emit_MSL_uniform_struct(const char* structName, SkSpan<const Uniform> uniforms) {
+    SkSL::String result;
 
-    // draw triangles with given color, using uniform buffer and vertex data
-    "#include <metal_stdlib>\n"
-    "#include <simd/simd.h>\n"
-    "using namespace metal;\n"
-    "\n"
-    "typedef struct {\n"
-    "    float2 position [[attribute(0)]];\n"
-    "} VertexInput;\n"
-    "\n"
-    "typedef struct {\n"
-    "    float4 position [[position]];\n"
-    "} VertexOutput;\n"
-    "\n"
-    "typedef struct {\n"
-    "    float4 uPosXform;\n"
-    "    float4 uColor;\n"
-    "} UniformData;\n"
-    "\n"
-    "vertex VertexOutput vertexMain(VertexInput in [[stage_in]],\n"
-    "                               constant UniformData& uniforms [[buffer(0)]],\n"
-    "                               uint vertexID [[vertex_id]]) {\n"
-    "    VertexOutput out;\n"
-    "    float2 position = in.position;\n"
-    "    out.position.xy = position * uniforms.uPosXform.xy + uniforms.uPosXform.zw;\n"
-    "    out.position.zw = float2(0.0, 1.0);\n"
-    "    return out;\n"
-    "}\n"
-    "\n"
-    "fragment float4 fragmentMain(constant UniformData& uniforms [[buffer(0)]],\n"
-    "                             VertexOutput in [[stage_in]]) {\n"
-    "    return uniforms.uColor;\n"
-    "}",
+    result.appendf("struct %s {\n", structName);
+    for (auto u : uniforms) {
+        // TODO: this is sufficient for the sprint but should be changed to use SkSL's
+        // machinery
+        result.append("    ");
+        switch (u.type()) {
+            case SLType::kFloat4:
+                result.append("float4");
+                break;
+            case SLType::kFloat2:
+                result.append("float2");
+                break;
+            case SLType::kFloat:
+                result.append("float");
+                break;
+            case SLType::kHalf4:
+                result.append("half4");
+                break;
+            default:
+                SkASSERT(0);
+        }
 
-    // draw triangles with vertex ID and instance buffer
-    "#include <metal_stdlib>\n"
-    "#include <simd/simd.h>\n"
-    "using namespace metal;\n"
-    "\n"
-    "typedef struct {\n"
-    "    float2 position [[attribute(0)]];\n"
-    "    float2 dims [[attribute(1)]];\n"
-    "    float4 color [[attribute(2)]];\n"
-    "} InstanceInput;\n"
-    "\n"
-    "typedef struct {\n"
-    "    float4 position [[position]];\n"
-    "    float4 color;\n"
-    "} VertexOutput;\n"
-    "\n"
-    "vertex VertexOutput vertexMain(InstanceInput in [[stage_in]],\n"
-    "                               uint vertexID [[vertex_id]]) {\n"
-    "    VertexOutput out;\n"
-    "    float2 position = float2(float(vertexID >> 1), float(vertexID & 1));\n"
-    "    out.position.xy = position * in.dims + in.position;\n"
-    "    out.position.zw = float2(0.0, 1.0);\n"
-    "    out.color = in.color;"
-    "    return out;\n"
-    "}\n"
-    "\n"
-    "fragment float4 fragmentMain(VertexOutput in [[stage_in]]) {\n"
-    "    return in.color;\n"
-    "}",
-};
+        result.append(" ");
+        result.append(u.name());
+        if (u.count()) {
+            result.append("[");
+            result.append(std::to_string(u.count()));
+            result.append("]");
+        }
+        result.append(";\n");
+    }
+    result.append("};\n\n");
+    return result;
+}
 
-static constexpr NSString* kTestingOnlyShaderLabels[]  = {
-    @"Clear viewport to blue",
-    @"Clear rect with uniforms",
-    @"Draw triangles with uniform color",
-    @"Draw triangles with instance buffer"
-};
+SkSL::String emit_MSL_vertex_struct(const char* structName,
+                                    SkSpan<const Attribute> vertexAttrs,
+                                    SkSpan<const Attribute> instanceAttrs) {
+    SkSL::String result;
 
-static inline MTLVertexFormat attribute_type_to_mtlformat(VertexAttribType type) {
+    int attr = 0;
+    auto add_attrs = [&](SkSpan<const Attribute> attrs) {
+        for (auto a : attrs) {
+            // TODO: this is sufficient for the sprint but should be changed to use SkSL's
+            // machinery
+            result.append("    ");
+            switch (a.gpuType()) {
+                case SLType::kFloat4:
+                    result.append("float4");
+                    break;
+                case SLType::kFloat2:
+                    result.append("float2");
+                    break;
+                case SLType::kFloat:
+                    result.append("float");
+                    break;
+                case SLType::kHalf4:
+                    result.append("half4");
+                    break;
+                default:
+                    SkASSERT(0);
+            }
+
+            result.appendf(" %s [[attribute(%d)]];\n", a.name(), attr++);
+        }
+    };
+
+    result.appendf("struct %s {\n", structName);
+    if (!vertexAttrs.empty()) {
+        result.append("    // vertex attrs\n");
+        add_attrs(vertexAttrs);
+    }
+    if (!instanceAttrs.empty()) {
+        result.append("    // instance attrs\n");
+        add_attrs(instanceAttrs);
+    }
+
+    result.append("};\n\n");
+    return result;
+}
+
+SkSL::String get_msl(const GraphicsPipelineDesc& desc) {
+    const RenderStep* step = desc.renderStep();
+    // TODO: To more completely support end-to-end rendering, this will need to be updated so that
+    // the RenderStep shader snippet can produce a device coord, a local coord, and depth.
+    // If the paint combination doesn't need the local coord it can be ignored, otherwise we need
+    // a varying for it. The fragment function's output will need to be updated to have a color and
+    // the depth, or when there's no combination, just the depth. Lastly, we also should add the
+    // static/intrinsic uniform binding point so that we can handle normalizing the device position
+    // produced by the RenderStep automatically.
+
+    // Fixed program header
+    SkSL::String msl =
+            "#include <metal_stdlib>\n"
+            "#include <simd/simd.h>\n"
+            "using namespace metal;\n"
+            "\n"
+            "typedef struct {\n"
+            "    float4 position [[position]];\n"
+            "} VertexOutput;\n"
+            "\n";
+
+    // Typedefs needed by RenderStep
+    if (step->numUniforms() > 0) {
+        msl += emit_MSL_uniform_struct("StepUniforms", step->uniforms());
+    }
+    if (step->numVertexAttributes() > 0 || step->numInstanceAttributes() > 0) {
+        msl += emit_MSL_vertex_struct("VertexAttrs",
+                                      step->vertexAttributes(),
+                                      step->instanceAttributes());
+    }
+
+    // Vertex shader function declaration
+    msl += "vertex VertexOutput vertexMain(uint vertexID [[vertex_id]],\n"
+           "                               uint instanceID [[instance_id]]";
+    if (step->numVertexAttributes() > 0 || step->numInstanceAttributes() > 0) {
+        msl += ",\n                        VertexAttrs vtx [[stage_in]]";
+    }
+    if (step->numUniforms() > 0) {
+        msl += ",\n                        constant StepUniforms& uniforms [[buffer(0)]]";
+    }
+    msl += ") {\n";
+
+    // Vertex shader body
+    msl += "    VertexOutput out;\n";
+    msl += step->vertexMSL();
+    msl += "    return out;\n"
+           "}\n";
+
+    // Typedefs needed for painting
+    // TODO: Right now hardcoded float4 color uniform but will come from Combination once that is
+    // stored on the GraphicsPipelineDesc.
+    msl += "struct PaintUniforms {\n"
+           "    float4 color;\n"
+           "};\n";
+    msl += "fragment float4 fragmentMain(VertexOutput in [[stage_in]],\n"
+           "                             constant PaintUniforms& uniforms [[buffer(1)]]) {\n"
+           "    return uniforms.color;\n"
+           "}\n";
+
+    return msl;
+}
+
+inline MTLVertexFormat attribute_type_to_mtlformat(VertexAttribType type) {
     switch (type) {
         case VertexAttribType::kFloat:
             return MTLVertexFormatFloat;
@@ -213,13 +242,13 @@
     SK_ABORT("Unknown vertex attribute type");
 }
 
-static MTLVertexDescriptor* create_vertex_descriptor(const GraphicsPipelineDesc& desc) {
+MTLVertexDescriptor* create_vertex_descriptor(const RenderStep* step) {
     auto vertexDescriptor = [[MTLVertexDescriptor alloc] init];
     int attributeIndex = 0;
 
-    int vertexAttributeCount = desc.numVertexAttributes();
+    int vertexAttributeCount = step->numVertexAttributes();
     size_t vertexAttributeOffset = 0;
-    for (const auto& attribute : desc.vertexAttributes()) {
+    for (const auto& attribute : step->vertexAttributes()) {
         MTLVertexAttributeDescriptor* mtlAttribute = vertexDescriptor.attributes[attributeIndex];
         MTLVertexFormat format = attribute_type_to_mtlformat(attribute.cpuType());
         SkASSERT(MTLVertexFormatInvalid != format);
@@ -230,7 +259,7 @@
         vertexAttributeOffset += attribute.sizeAlign4();
         attributeIndex++;
     }
-    SkASSERT(vertexAttributeOffset == desc.vertexStride());
+    SkASSERT(vertexAttributeOffset == step->vertexStride());
 
     if (vertexAttributeCount) {
         MTLVertexBufferLayoutDescriptor* vertexBufferLayout =
@@ -240,9 +269,9 @@
         vertexBufferLayout.stride = vertexAttributeOffset;
     }
 
-    int instanceAttributeCount = desc.numInstanceAttributes();
+    int instanceAttributeCount = step->numInstanceAttributes();
     size_t instanceAttributeOffset = 0;
-    for (const auto& attribute : desc.instanceAttributes()) {
+    for (const auto& attribute : step->instanceAttributes()) {
         MTLVertexAttributeDescriptor* mtlAttribute = vertexDescriptor.attributes[attributeIndex];
         MTLVertexFormat format = attribute_type_to_mtlformat(attribute.cpuType());
         SkASSERT(MTLVertexFormatInvalid != format);
@@ -253,7 +282,7 @@
         instanceAttributeOffset += attribute.sizeAlign4();
         attributeIndex++;
     }
-    SkASSERT(instanceAttributeOffset == desc.instanceStride());
+    SkASSERT(instanceAttributeOffset == step->instanceStride());
 
     if (instanceAttributeCount) {
         MTLVertexBufferLayoutDescriptor* instanceBufferLayout =
@@ -265,21 +294,18 @@
     return vertexDescriptor;
 }
 
+} // anonymous namespace
+
 sk_sp<GraphicsPipeline> GraphicsPipeline::Make(const Gpu* gpu,
                                                const skgpu::GraphicsPipelineDesc& desc) {
     sk_cfp<MTLRenderPipelineDescriptor*> psoDescriptor([[MTLRenderPipelineDescriptor alloc] init]);
 
-    // Temp pipeline for now that just fills the viewport with blue
-    int shaderIndex = desc.testingOnlyShaderIndex();
-    SkSL::String shaderText;
-    shaderText.append(kTestingOnlyShaders[shaderIndex]);
-
-    auto metallib = CompileShaderLibrary(gpu, shaderText);
+    auto metallib = CompileShaderLibrary(gpu, get_msl(desc));
     if (!metallib) {
         return nullptr;
     }
 
-    (*psoDescriptor).label = kTestingOnlyShaderLabels[shaderIndex];
+    (*psoDescriptor).label = @(desc.renderStep()->name());
 
     (*psoDescriptor).vertexFunction =
             [*metallib newFunctionWithName: @"vertexMain"];
@@ -287,7 +313,7 @@
             [*metallib newFunctionWithName: @"fragmentMain"];
 
     // TODO: I *think* this gets cleaned up by the pipelineDescriptor?
-    (*psoDescriptor).vertexDescriptor = create_vertex_descriptor(desc);
+    (*psoDescriptor).vertexDescriptor = create_vertex_descriptor(desc.renderStep());
 
     // TODO: I *think* this gets cleaned up by the pipelineDescriptor as well?
     auto mtlColorAttachment = [[MTLRenderPipelineColorAttachmentDescriptor alloc] init];
@@ -308,8 +334,9 @@
         SkDebugf("Errors:\n%s", error.debugDescription.UTF8String);
         return nullptr;
     }
-    return sk_sp<GraphicsPipeline>(new GraphicsPipeline(std::move(pso), desc.vertexStride(),
-                                                        desc.instanceStride()));
+    return sk_sp<GraphicsPipeline>(new GraphicsPipeline(std::move(pso),
+                                                        desc.renderStep()->vertexStride(),
+                                                        desc.renderStep()->instanceStride()));
 }
 
 } // namespace skgpu::mtl
diff --git a/experimental/graphite/src/render/StencilAndFillPathRenderer.cpp b/experimental/graphite/src/render/StencilAndFillPathRenderer.cpp
index a855804..d32e86b 100644
--- a/experimental/graphite/src/render/StencilAndFillPathRenderer.cpp
+++ b/experimental/graphite/src/render/StencilAndFillPathRenderer.cpp
@@ -70,6 +70,14 @@
 
     const char* name() const override { return "fill-bounds"; }
 
+    const char* vertexMSL() const override {
+        // TODO: apply transform matrix from uniform data
+        // TODO: RenderSteps should not worry about RTAdjust, but currently the mtl pipeline does
+        // account for it, so this geometry won't be in the right coordinate system yet.
+        return "out.position.xy = vtx.position;\n"
+               "out.position.zw = float2(0.0, 1.0);\n";
+    }
+
     void writeVertices(DrawWriter* writer, const Shape& shape) const override {
         // TODO: Need to account for the transform eventually, but that requires more plumbing
         writer->appendVertices(4)
diff --git a/tests/graphite/CommandBufferTest.cpp b/tests/graphite/CommandBufferTest.cpp
index 1027d27..9ec6b06 100644
--- a/tests/graphite/CommandBufferTest.cpp
+++ b/tests/graphite/CommandBufferTest.cpp
@@ -13,13 +13,17 @@
 #include "experimental/graphite/include/mtl/MtlTypes.h"
 #include "experimental/graphite/src/Buffer.h"
 #include "experimental/graphite/src/CommandBuffer.h"
+#include "experimental/graphite/src/ContextUtils.h"
 #include "experimental/graphite/src/DrawBufferManager.h"
 #include "experimental/graphite/src/DrawWriter.h"
 #include "experimental/graphite/src/Gpu.h"
 #include "experimental/graphite/src/GraphicsPipeline.h"
+#include "experimental/graphite/src/Renderer.h"
 #include "experimental/graphite/src/ResourceProvider.h"
 #include "experimental/graphite/src/Texture.h"
 #include "experimental/graphite/src/TextureProxy.h"
+#include "experimental/graphite/src/UniformManager.h"
+#include "experimental/graphite/src/geom/Shape.h"
 
 #if GRAPHITE_TEST_UTILS
 // set to 1 if you want to do GPU capture of the commandBuffer
@@ -28,6 +32,188 @@
 
 using namespace skgpu;
 
+namespace {
+
+class FullscreenRectDraw final : public RenderStep {
+public:
+    ~FullscreenRectDraw() override {}
+
+    static const RenderStep* Singleton() {
+        static const FullscreenRectDraw kSingleton;
+        return &kSingleton;
+    }
+
+    const char* name() const override { return "fullscreen-rect"; }
+
+    const char* vertexMSL() const override {
+        return "float2 position = float2(float(vertexID >> 1), float(vertexID & 1));\n"
+               "out.position.xy = position * 2 - 1;\n"
+               "out.position.zw = float2(0.0, 1.0);\n";
+    }
+
+    void writeVertices(DrawWriter* writer, const Shape&) const override {
+        // This RenderStep ignores shape and just needs to draw 4 data-less vertices
+        writer->draw({}, {}, 4);
+    }
+
+    sk_sp<UniformData> writeUniforms(Layout, const Shape&) const override {
+        return nullptr;
+    }
+
+private:
+    FullscreenRectDraw() : RenderStep(Flags::kPerformsShading,
+                                      /*uniforms=*/{},
+                                      PrimitiveType::kTriangleStrip,
+                                      /*vertexAttrs=*/{},
+                                      /*instanceAttrs=*/{}) {}
+};
+
+class UniformRectDraw final : public RenderStep {
+public:
+    ~UniformRectDraw() override {}
+
+    static const RenderStep* Singleton() {
+        static const UniformRectDraw kSingleton;
+        return &kSingleton;
+    }
+
+    const char* name() const override { return "uniform-rect"; }
+
+    const char* vertexMSL() const override {
+        return "float2 position = float2(float(vertexID >> 1), float(vertexID & 1));\n"
+               "out.position.xy = position * uniforms.scale + uniforms.translate;\n"
+               "out.position.zw = float2(0.0, 1.0);\n";
+    }
+
+    void writeVertices(DrawWriter* writer, const Shape&) const override {
+        // The shape is upload via uniforms, so this just needs to record 4 data-less vertices
+        writer->draw({}, {}, 4);
+    }
+
+    sk_sp<UniformData> writeUniforms(Layout layout, const Shape& shape) const override {
+        SkASSERT(shape.isRect());
+        // TODO: A << API for uniforms would be nice, particularly if it could take pre-computed
+        // offsets for each uniform.
+        auto uniforms = UniformData::Make(this->numUniforms(), this->uniforms().data(),
+                                          sizeof(float) * 4);
+        float2 scale = shape.rect().size();
+        float2 translate = shape.rect().topLeft();
+        memcpy(uniforms->data(), &scale, sizeof(float2));
+        memcpy(uniforms->data() + sizeof(float2), &translate, sizeof(float2));
+        return uniforms;
+    }
+
+private:
+    UniformRectDraw() : RenderStep(Flags::kPerformsShading,
+                                   /*uniforms=*/{{"scale",     SLType::kFloat2},
+                                                 {"translate", SLType::kFloat2}},
+                                   PrimitiveType::kTriangleStrip,
+                                   /*vertexAttrs=*/{},
+                                   /*instanceAttrs=*/{}) {}
+};
+
+class TriangleRectDraw final : public RenderStep {
+public:
+    ~TriangleRectDraw() override {}
+
+    static const RenderStep* Singleton() {
+        static const TriangleRectDraw kSingleton;
+        return &kSingleton;
+    }
+
+    const char* name() const override { return "triangle-rect"; }
+
+    const char* vertexMSL() const override {
+        return "float2 position = vtx.position;\n"
+               "out.position.xy = position * uniforms.scale + uniforms.translate;\n"
+               "out.position.zw = float2(0.0, 1.0);\n";
+    }
+
+    void writeVertices(DrawWriter* writer, const Shape& shape) const override {
+        DrawBufferManager* bufferMgr = writer->bufferManager();
+        auto [vertexWriter, vertices] = bufferMgr->getVertexWriter(4 * this->vertexStride());
+        vertexWriter << 0.5f * (shape.rect().left() + 1.f)  << 0.5f * (shape.rect().top() + 1.f)
+                     << 0.5f * (shape.rect().left() + 1.f)  << 0.5f * (shape.rect().bot() + 1.f)
+                     << 0.5f * (shape.rect().right() + 1.f) << 0.5f * (shape.rect().top() + 1.f)
+                     << 0.5f * (shape.rect().right() + 1.f) << 0.5f * (shape.rect().bot() + 1.f);
+
+        // TODO: Would be nice to re-use this
+        auto [indexWriter, indices] = bufferMgr->getIndexWriter(6 * sizeof(uint16_t));
+        indexWriter << 0 << 1 << 2
+                    << 2 << 1 << 3;
+
+        writer->draw(vertices, indices, 6);
+    }
+
+    sk_sp<UniformData> writeUniforms(Layout layout, const Shape&) const override {
+        auto uniforms = UniformData::Make(this->numUniforms(), this->uniforms().data(),
+                                          sizeof(float) * 4);
+        float data[4] = {2.f, 2.f, -1.f, -1.f};
+        memcpy(uniforms->data(), data, 4 * sizeof(float));
+        return uniforms;
+    }
+
+private:
+    TriangleRectDraw()
+            : RenderStep(Flags::kPerformsShading,
+                         /*uniforms=*/{{"scale",     SLType::kFloat2},
+                                       {"translate", SLType::kFloat2}},
+                         PrimitiveType::kTriangles,
+                         /*vertexAttrs=*/{{"position", VertexAttribType::kFloat2, SLType::kFloat2}},
+                         /*instanceAttrs=*/{}) {}
+};
+
+class InstanceRectDraw final : public RenderStep {
+public:
+    ~InstanceRectDraw() override {}
+
+    static const RenderStep* Singleton() {
+        static const InstanceRectDraw kSingleton;
+        return &kSingleton;
+    }
+
+    const char* name() const override { return "instance-rect"; }
+
+    const char* vertexMSL() const override {
+        return "float2 position = float2(float(vertexID >> 1), float(vertexID & 1));\n"
+               "out.position.xy = position * vtx.dims + vtx.position;\n"
+               "out.position.zw = float2(0.0, 1.0);\n";
+    }
+
+    void writeVertices(DrawWriter* writer, const Shape& shape) const override {
+        SkASSERT(shape.isRect());
+
+        DrawBufferManager* bufferMgr = writer->bufferManager();
+
+        // TODO: To truly test draw merging, this index buffer needs to remembered across
+        // writeVertices calls
+        auto [indexWriter, indices] = bufferMgr->getIndexWriter(6 * sizeof(uint16_t));
+        indexWriter << 0 << 1 << 2
+                    << 2 << 1 << 3;
+
+        writer->setInstanceTemplate({}, indices, 6);
+        auto instanceWriter = writer->appendInstances(1);
+        instanceWriter << shape.rect().topLeft() << shape.rect().size();
+    }
+
+    sk_sp<UniformData> writeUniforms(Layout, const Shape&) const override {
+        return nullptr;
+    }
+
+private:
+    InstanceRectDraw()
+            : RenderStep(Flags::kPerformsShading,
+                         /*uniforms=*/{},
+                         PrimitiveType::kTriangles,
+                         /*vertexAttrs=*/{},
+                         /*instanceAttrs=*/ {
+                                { "position", VertexAttribType::kFloat2, SLType::kFloat2 },
+                                { "dims",     VertexAttribType::kFloat2, SLType::kFloat2 }
+                         }) {}
+};
+
+} // anonymous namespace
+
 /*
  * This is to test the various pieces of the CommandBuffer interface.
  */
@@ -71,95 +257,62 @@
     DrawBufferManager bufferMgr(gpu->resourceProvider(), 4);
 
     commandBuffer->beginRenderPass(renderPassDesc);
-
     DrawWriter drawWriter(commandBuffer->asDrawDispatcher(), &bufferMgr);
 
-    // Shared uniform buffer
-    struct UniformData {
-        SkPoint fScale;
-        SkPoint fTranslate;
+    struct RectAndColor {
+        SkRect    fRect;
         SkColor4f fColor;
     };
-    sk_sp<Buffer> uniformBuffer = gpu->resourceProvider()->findOrCreateBuffer(
-            2*sizeof(UniformData), BufferType::kUniform, PrioritizeGpuReads::kNo);
-    size_t uniformOffset = 0;
+
+    auto draw = [&](const RenderStep* step, std::vector<RectAndColor> draws) {
+        GraphicsPipelineDesc pipelineDesc;
+        pipelineDesc.setRenderStep(step);
+        drawWriter.newPipelineState(step->primitiveType(),
+                                    step->vertexStride(),
+                                    step->instanceStride());
+        auto pipeline = gpu->resourceProvider()->findOrCreateGraphicsPipeline(pipelineDesc);
+        commandBuffer->bindGraphicsPipeline(std::move(pipeline));
+
+        for (auto d : draws) {
+            drawWriter.newDynamicState();
+            Shape shape(d.fRect);
+
+            auto renderStepUniforms = step->writeUniforms(Layout::kMetal, shape);
+            if (renderStepUniforms) {
+                auto [writer, bindInfo] =
+                        bufferMgr.getUniformWriter(renderStepUniforms->dataSize());
+                writer.write(renderStepUniforms->data(), renderStepUniforms->dataSize());
+                commandBuffer->bindUniformBuffer(UniformSlot::kRenderStep,
+                                                    sk_ref_sp(bindInfo.fBuffer),
+                                                    bindInfo.fOffset);
+            }
+
+            // TODO: Hard-coded solid color uniform for the fragment shader is always combined
+            // with the RenderStep's vertex shader.
+            auto [writer, bindInfo] = bufferMgr.getUniformWriter(sizeof(SkColor4f));
+            writer.write(&d.fColor, sizeof(SkColor4f));
+            commandBuffer->bindUniformBuffer(UniformSlot::kPaint,
+                                                sk_ref_sp(bindInfo.fBuffer),
+                                                bindInfo.fOffset);
+
+            step->writeVertices(&drawWriter, shape);
+        }
+    };
 
     // Draw blue rectangle over entire rendertarget (which was red)
-    GraphicsPipelineDesc pipelineDesc;
-    pipelineDesc.setTestingOnlyShaderIndex(0);
-    auto graphicsPipeline = gpu->resourceProvider()->findOrCreateGraphicsPipeline(pipelineDesc);
-    drawWriter.newPipelineState(PrimitiveType::kTriangleStrip,
-                                pipelineDesc.vertexStride(),
-                                pipelineDesc.instanceStride());
-    commandBuffer->bindGraphicsPipeline(std::move(graphicsPipeline));
-    drawWriter.draw({}, {}, 4);
+    draw(FullscreenRectDraw::Singleton(), {{SkRect::MakeEmpty(), SkColors::kBlue}});
 
     // Draw inset yellow rectangle using uniforms
-    pipelineDesc.setTestingOnlyShaderIndex(1);
-    graphicsPipeline = gpu->resourceProvider()->findOrCreateGraphicsPipeline(pipelineDesc);
-    drawWriter.newPipelineState(PrimitiveType::kTriangleStrip,
-                                pipelineDesc.vertexStride(),
-                                pipelineDesc.instanceStride());
-    commandBuffer->bindGraphicsPipeline(std::move(graphicsPipeline));
-    UniformData* uniforms = (UniformData*)uniformBuffer->map();
-    uniforms->fScale = SkPoint({1.8, 1.8});
-    uniforms->fTranslate = SkPoint({-0.9, -0.9});
-    uniforms->fColor = SkColors::kYellow;
-    commandBuffer->bindUniformBuffer(UniformSlot::kRenderStep, uniformBuffer, uniformOffset);
-    drawWriter.draw({}, {}, 4);
-    uniformOffset += sizeof(UniformData);
-    ++uniforms;
+    draw(UniformRectDraw::Singleton(), {{{-0.9f, -0.9f, 0.9f, 0.9f}, SkColors::kYellow}});
 
     // Draw inset magenta rectangle with triangles in vertex buffer
-    pipelineDesc.setTestingOnlyShaderIndex(2);
-    Attribute vertexAttributes[1] = {
-        { "position", VertexAttribType::kFloat2, SLType::kFloat2 }
-    };
-    pipelineDesc.setVertexAttributes(vertexAttributes, 1);
-    graphicsPipeline = gpu->resourceProvider()->findOrCreateGraphicsPipeline(pipelineDesc);
-    drawWriter.newPipelineState(PrimitiveType::kTriangles,
-                                pipelineDesc.vertexStride(),
-                                pipelineDesc.instanceStride());
-    commandBuffer->bindGraphicsPipeline(std::move(graphicsPipeline));
+    draw(TriangleRectDraw::Singleton(), {{{-.5f, -.5f, .5f, .5f}, SkColors::kMagenta}});
 
-    auto [vertexWriter, vertices] = bufferMgr.getVertexWriter(4 * pipelineDesc.vertexStride());
-    vertexWriter << SkPoint{0.25f, 0.25f}
-                 << SkPoint{0.25f, 0.75f}
-                 << SkPoint{0.75f, 0.25f}
-                 << SkPoint{0.75f, 0.75f};
-    auto [indexWriter, indices] = bufferMgr.getIndexWriter(6 * sizeof(uint16_t));
-    indexWriter << 0 << 1 << 2
-                << 2 << 1 << 3;
-    uniforms->fScale = SkPoint({2, 2});
-    uniforms->fTranslate = SkPoint({-1, -1});
-    uniforms->fColor = SkColors::kMagenta;
-    commandBuffer->bindUniformBuffer(UniformSlot::kRenderStep, uniformBuffer, uniformOffset);
-
-    drawWriter.draw(vertices, indices, 6);
-
-    // draw rects using instance buffer
-    pipelineDesc.setTestingOnlyShaderIndex(3);
-    Attribute instanceAttributes[3] = {
-        { "position", VertexAttribType::kFloat2, SLType::kFloat2 },
-        { "dims", VertexAttribType::kFloat2, SLType::kFloat2 },
-        { "color", VertexAttribType::kFloat4, SLType::kFloat4 }
-    };
-    pipelineDesc.setVertexAttributes(nullptr, 0);
-    pipelineDesc.setInstanceAttributes(instanceAttributes, 3);
-    graphicsPipeline = gpu->resourceProvider()->findOrCreateGraphicsPipeline(pipelineDesc);
-    drawWriter.newPipelineState(PrimitiveType::kTriangles,
-                                pipelineDesc.vertexStride(),
-                                pipelineDesc.instanceStride());
-    commandBuffer->bindGraphicsPipeline(std::move(graphicsPipeline));
-
-    drawWriter.setInstanceTemplate({}, indices, 6);
-    auto instanceWriter = drawWriter.appendInstances(2);
-    instanceWriter << SkPoint{-0.4f, -0.4f}  << SkPoint{0.4f, 0.4f}   << SkColors::kGreen
-                   << SkPoint{0.f, 0.f}      << SkPoint{0.25f, 0.25f} << SkColors::kCyan;
+    // Draw green and cyan rects using instance buffer
+    draw(InstanceRectDraw::Singleton(), { {{0.4f, -0.4f, 0.4f, 0.4f}, SkColors::kGreen},
+                                            {{0.f, 0.f, 0.25f, 0.25f},  SkColors::kCyan} });
 
     drawWriter.flush();
-    uniformBuffer->unmap();
-
     bufferMgr.transferToCommandBuffer(commandBuffer.get());
     commandBuffer->endRenderPass();