Reland "Reland "[graphite] Extracts early in drawGeometry""

This reverts commit 5aafacb1358d9dbc00f6b30a056e016e0091aa04.

Original change's description:
> Revert "Reland "[graphite] Extracts early in drawGeometry""
>
> This reverts commit 81de4113e3e7cfe8ec91413fbbe51101dcb354e3.
>
> Reason for revert: Now breaking chromium roll.
>
> Original change's description:
> > Reland "[graphite] Extracts early in drawGeometry"
> >
> > * Reintroduce notify image in use and flush in snapDrawTask.
> >
> > * Fixes an issue where multi-draw dependencies were not correctly tracked.
> >
> > This reverts commit 1b271fd02a65ba97e12bcaa32f67afa50b5d9b52.
> >
> >
> > Original change's description:
> > > Revert "[graphite] Extracts early in drawGeometry"
> > >
> > > This reverts commit 25f00cb247f23b4a8cbe7a1245bdf609fa0be846.
> > >
> > > Reason for revert: Breaks android roll
> > >
> > > Original change's description:
> > > > [graphite] Extracts early in drawGeometry
> > > >
> > > > * Moves the creation of UniquePaintIDs from DrawPass::Snap to PaintParams::toKey, which is called in Device::drawGeometry
> > > >
> > > > * Moves blend mode calculations into PaintParams, and adds an enum DstUsage to DrawTypes.
> > > >
> > > > * Moves the creation of a draw pass from DrawPass::Make to DrawList::snapDrawPass.
> > > >
> > > > * Texture and uniform trackers commensurately moved to DrawList.
> > > >
> > > > Change-Id: Ie843db44bfad0cd51773ffa7e42050fdbd7c22e3
> > > > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1045336
> > > > Commit-Queue: Thomas Smith <thomsmit@google.com>
> > > > Reviewed-by: Michael Ludwig <michaelludwig@google.com>
> > >
> > > No-Presubmit: true
> > > No-Tree-Checks: true
> > > No-Try: true
> > > Change-Id: I19ad73d77051295e37ac9adaae77f228e4934834
> > > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1052396
> > > Commit-Queue: Thomas Smith <thomsmit@google.com>
> > > Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
> >
> > Change-Id: Ib8b9aa5b3ed998bdecd3b56a03ca13f189518178
> > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1052657
> > Reviewed-by: Michael Ludwig <michaelludwig@google.com>
> > Commit-Queue: Thomas Smith <thomsmit@google.com>
>
> No-Presubmit: true
> No-Tree-Checks: true
> No-Try: true
> Change-Id: I0132ab1e71955f6a8b35b3107afe9ae48f5654aa
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1059636
> Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
> Commit-Queue: Thomas Smith <thomsmit@google.com>

Change-Id: I945aeeb179ca3d8ac1fcab857cdd8e641ba25c7a
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/1059899
Commit-Queue: Thomas Smith <thomsmit@google.com>
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
diff --git a/fuzz/FuzzPrecompile.cpp b/fuzz/FuzzPrecompile.cpp
index 81d3006..db5a758 100644
--- a/fuzz/FuzzPrecompile.cpp
+++ b/fuzz/FuzzPrecompile.cpp
@@ -25,6 +25,7 @@
 #include "src/core/SkBlenderBase.h"
 #include "src/gpu/graphite/ContextPriv.h"
 #include "src/gpu/graphite/ContextUtils.h"
+#include "src/gpu/graphite/DrawContext.h"
 #include "src/gpu/graphite/KeyContext.h"
 #include "src/gpu/graphite/PaintParams.h"
 #include "src/gpu/graphite/PaintParamsKey.h"
@@ -300,7 +301,34 @@
     SkASSERT_RELEASE(before == after);
 }
 
+sk_sp<DrawContext> get_precompile_draw_context(
+            const skgpu::graphite::Caps* caps, Context* context) {
+    std::unique_ptr<Recorder> drawRecorder = context->makeRecorder();
+    ResourceProvider* resourceProvider = drawRecorder->priv().resourceProvider();
+    constexpr SkISize drawSize = {128, 128};
+    const SkColorInfo colorInfo = SkColorInfo(kRGBA_8888_SkColorType,
+                                              kPremul_SkAlphaType,
+                                              SkColorSpace::MakeSRGB());
+    TextureInfo texInfo = caps->getDefaultSampledTextureInfo(colorInfo.colorType(),
+                                                             skgpu::Mipmapped::kNo,
+                                                             skgpu::Protected::kNo,
+                                                             skgpu::Renderable::kYes);
+    sk_sp<TextureProxy> target = TextureProxy::Make(caps,
+                                                    resourceProvider,
+                                                    drawSize,
+                                                    texInfo,
+                                                    "PrecompileTarget",
+                                                    skgpu::Budgeted::kYes);
+    sk_sp<DrawContext> precompileDrawContext = DrawContext::Make(caps,
+                                                                 std::move(target),
+                                                                 drawSize,
+                                                                 colorInfo,
+                                                                 {});
+    return precompileDrawContext;
+}
+
 void fuzz_graphite(Fuzz* fuzz, Context* context, int depth = 9) {
+    const skgpu::graphite::Caps* caps = context->priv().caps();
     std::unique_ptr<PrecompileContext> precompileContext = context->makePrecompileContext();
     std::unique_ptr<Recorder> recorder = context->makeRecorder();
     ShaderCodeDictionary* dict = context->priv().shaderCodeDictionary();
@@ -310,12 +338,15 @@
     Layout layout = context->backend() == skgpu::BackendApi::kMetal ? Layout::kMetal
                                                                     : Layout::kStd140;
 
+    // Currently, we just use this as a valid parameter for keyContext (will hit asserts otherwise)
+    sk_sp<DrawContext> drawContext = get_precompile_draw_context(caps, context);
+
     FloatStorageManager floatStorageManager;
     PaintParamsKeyBuilder builder(dict);
     PipelineDataGatherer gatherer(layout);
     sk_sp<RuntimeEffectDictionary> rtDict = sk_make_sp<RuntimeEffectDictionary>();
-    KeyContext precompileKeyContext(recorder->priv().caps(), &floatStorageManager,
-                                    &builder, &gatherer, dict, rtDict, ci);
+    KeyContext precompileKeyContext(caps, &floatStorageManager, &builder, &gatherer, dict, rtDict,
+                                    ci);
 
     DrawTypeFlags kDrawType = DrawTypeFlags::kSimpleShape;
     SkPath path = make_path();
@@ -328,7 +359,8 @@
     fuzz->next(&temp);
     Coverage coverage = coverageOptions[temp % 3];
 
-    PaintParams paintParams = PaintParams(paint,
+    PaintParams paintParams = PaintParams(recorder->priv().caps(),
+                                          paint,
                                           /* primitiveBlender= */ nullptr,
                                           /* nonMSAAClip= */ {},
                                           /* clipShader= */ nullptr,
@@ -338,6 +370,7 @@
     SkDEBUGCODE(builder.checkReset());
     SkDEBUGCODE(gatherer.checkReset());
     KeyContext keyContext(recorder.get(),
+                          drawContext.get(),
                           &floatStorageManager,
                           &builder,
                           &gatherer,
diff --git a/src/gpu/graphite/Device.cpp b/src/gpu/graphite/Device.cpp
index 0046acf..875d4fa 100644
--- a/src/gpu/graphite/Device.cpp
+++ b/src/gpu/graphite/Device.cpp
@@ -71,6 +71,7 @@
 #include "src/gpu/graphite/DrawTypes.h"
 #include "src/gpu/graphite/Image_Base_Graphite.h"
 #include "src/gpu/graphite/Image_Graphite.h"
+#include "src/gpu/graphite/KeyContext.h"
 #include "src/gpu/graphite/Log.h"
 #include "src/gpu/graphite/PaintParams.h"
 #include "src/gpu/graphite/PathAtlas.h"
@@ -141,49 +142,10 @@
     return kFillStyle;
 }
 
-bool paint_depends_on_dst(const PaintParams& paintParams) {
-    std::optional<SkBlendMode> bm = paintParams.asFinalBlendMode();
-    if (!bm.has_value()) {
-        return true; // Runtime blenders always depend on the dst
-    }
-
-    if (bm == SkBlendMode::kClear || bm == SkBlendMode::kSrc) {
-        // src and clear blending never depend on dst
-        return false;
-    } else if (bm != SkBlendMode::kSrcOver && bm != SkBlendMode::kDstOut) {
-        // any other blend mode besides src-over and dst-out use dst in some way
-        return true;
-    }
-
-    // At this point, we depend on the dst if source alpha != 1, so analyze the paint to
-    // see if it's opaque.
-    bool srcIsTransparent = !paintParams.color().isOpaque() ||
-                                  (paintParams.shader() && !paintParams.shader()->isOpaque()) ||
-                                  (paintParams.colorFilter() &&
-                                        !paintParams.colorFilter()->isAlphaUnchanged());
-
-    if (paintParams.primitiveBlender()) {
-        std::optional<SkBlendMode> primBlend = as_BB(paintParams.primitiveBlender())->asBlendMode();
-        // The primitive blender does not blend against the dst color, but it might change whether
-        // or not the src is transparent.
-        if (primBlend && !srcIsTransparent) {
-            // Since dst might be transparent, we can only preserve opacity for cases where the
-            // src coefficient is one and the dst coefficient is zero (when src alpha = 1).
-            srcIsTransparent = primBlend != SkBlendMode::kSrcOver && primBlend != SkBlendMode::kSrc;
-        } else {
-            // Runtime blender or complex blend modifies the final src color so assume it has alpha
-            srcIsTransparent = true;
-        }
-    }
-    return srcIsTransparent;
-}
-
 /** If the paint can be reduced to a solid flood-fill, determine the correct color to fill with. */
 std::optional<SkColor4f> extract_paint_color(const PaintParams& paint,
                                              const SkColorInfo& dstColorInfo) {
-    SkASSERT(!paint_depends_on_dst(paint));
-
-    std::optional<SkBlendMode> bm = paint.asFinalBlendMode();
+    std::optional<SkBlendMode> bm = paint.finalBlendMode();
     // Since we don't depend on the dst, a dst-out blend mode implies source is
     // opaque, which causes dst-out to behave like clear.
     if (bm == SkBlendMode::kClear || bm == SkBlendMode::kDstOut) {
@@ -358,6 +320,23 @@
            strategy == PathRendererStrategy::kDefault;
 }
 
+class AutoResetForDraw {
+public:
+    explicit AutoResetForDraw(PipelineDataGatherer* gatherer) : fDataGatherer(gatherer) {}
+
+    ~AutoResetForDraw() {
+        if (fDataGatherer) {
+            fDataGatherer->resetForDraw();
+        }
+    }
+
+    AutoResetForDraw(const AutoResetForDraw&) = delete;
+    AutoResetForDraw& operator=(const AutoResetForDraw&) = delete;
+
+private:
+    PipelineDataGatherer* fDataGatherer;
+};
+
 } // anonymous namespace
 
 /**
@@ -518,6 +497,12 @@
                     fRecorder->priv().caps()->defaultMSAASamplesCount());
         }
     }
+
+    const bool useStorageBuffers = fRecorder->priv().caps()->storageBufferSupport();
+    const auto& bindingReq = fRecorder->priv().caps()->resourceBindingRequirements();
+    fDataGatherer = std::make_unique<PipelineDataGatherer>(
+            useStorageBuffers ? bindingReq.fStorageBufferLayout : bindingReq.fUniformBufferLayout);
+    fKeyBuilder = std::make_unique<PaintParamsKeyBuilder>(fRecorder->priv().shaderCodeDictionary());
 }
 
 Device::~Device() {
@@ -1383,7 +1368,7 @@
                           sk_sp<SkBlender> primitiveBlender,
                           bool skipColorXform) {
     ASSERT_SINGLE_OWNER
-
+    AutoResetForDraw autoReset(fDataGatherer.get());
     if (!localToDevice.valid()) {
         // If the transform is not invertible or not finite then drawing isn't well defined.
         SKGPU_LOG_W("Skipping draw with non-invertible/non-finite transform.");
@@ -1488,47 +1473,93 @@
         clip.outsetBoundsForAA();
     }
 
-    // Figure out what dst color requirements we have, if any.
-    const SkBlenderBase* blender = as_BB(paint.getBlender());
-    const std::optional<SkBlendMode> blendMode = blender ? blender->asBlendMode()
-                                                         : SkBlendMode::kSrcOver;
-
-    // A primitive blender should be ignored if there is no primitive color to blend against.
-    // Additionally, if a renderer emits a primitive color, then a null primitive blender should
-    // be interpreted as SrcOver blending mode.
     if (!renderer || !renderer->emitsPrimitiveColor()) {
+        // Ignore primitive blender if the renderer doesn't support it
         primitiveBlender = nullptr;
     } else if (!SkToBool(primitiveBlender)) {
+        // A null blender is normally equivalent to SrcOver; coerce it to non-null so that nullity
+        // can be used by PaintParamsKeyBuilder to know when to add primitive blending blocks.
         primitiveBlender = SkBlender::Mode(SkBlendMode::kSrcOver);
     }
 
-    Coverage rendererCoverage = renderer ? renderer->coverage()
-                                         : Coverage::kSingleChannel;
-    TextureFormat targetFormat = TextureInfoPriv::ViewFormat(fDC->target()->textureInfo());
-    PaintParams shading{paint,
+    PaintParams shading{fRecorder->priv().caps(),
+                        paint,
                         std::move(primitiveBlender),
                         clip.nonMSAAClip(),
                         sk_ref_sp(clip.shader()),
-                        rendererCoverage,
-                        targetFormat,
+                        renderer ? renderer->coverage() : Coverage::kSingleChannel,
+                        TextureInfoPriv::ViewFormat(fDC->target()->textureInfo()),
                         skipColorXform};
-    if (clip.needsCoverage() && rendererCoverage == Coverage::kNone) {
-        // Must upgrade to single channel coverage if the clip requires coverage;
-        // but preserve LCD coverage if the Renderer uses that.
-        rendererCoverage = Coverage::kSingleChannel;
+
+    // Some shapes and styles combine multiple draws so the total render step count is split between
+    // the main renderer and possibly a secondaryRenderer. As we can't be sure whether a secondary
+    // renderer is required prior to getting the dstUsage from shading.toKey(), we pessimistically
+    // assume it's required for needsFlushBeforeDraw().
+    int numNewRenderSteps = 1;
+    SkStrokeRec::Style styleType = style.getStyle();
+    if (renderer) {
+        numNewRenderSteps = renderer->numRenderSteps();
+        if (styleType == SkStrokeRec::kStrokeAndFill_Style) {
+            numNewRenderSteps +=
+                fRecorder->priv().rendererProvider()->tessellatedStrokes()->numRenderSteps();
+        } else if (style.isFillStyle() && renderer->useNonAAInnerFill()) {
+            numNewRenderSteps +=
+                fRecorder->priv().rendererProvider()->nonAABounds()->numRenderSteps();
+        }
     }
-    bool dstReadRequired = !CanUseHardwareBlending(fRecorder->priv().caps(),
-                                                   targetFormat,
-                                                   blendMode,
-                                                   rendererCoverage);
-    const bool dependsOnDst = paint_depends_on_dst(shading) ||
-                              clip.shader() || !clip.nonMSAAClip().isEmpty();
+
+    // Decide if we have any reason to flush pending work. A flush may be necessary for two reasons:
+    //      1) A flush is required before updating the clip state or making any permanent changes to
+    //         a path atlas, since otherwise clip operations and/or atlas entries for the current
+    //         draw will be flushed.
+    //      2) A flush is required before shading.toKey() is called so that child tasks required by
+    //         this draw are associated with the DrawContext after any instead of being added as a
+    //         child of the current draw. See "Layer" tests in NotifyInUseTest.cpp.
+    DstReadStrategy dstReadStrategy = shading.dstReadRequired() ?
+                                      fDC->dstReadStrategy() : DstReadStrategy::kNoneRequired;
+    const bool needsFlush = this->needsFlushBeforeDraw(numNewRenderSteps, dstReadStrategy);
+    if (needsFlush) {
+        if (pathAtlas != nullptr) {
+            // We need to flush work for all devices associated with the current Recorder.
+            // Otherwise we may end up with outstanding draws that depend on past atlas state.
+            fRecorder->priv().flushTrackedDevices(
+                    SK_DUMP_TASKS_CODE("Device::drawGeometry Flush Before Draw"));
+        } else {
+            this->flushPendingWork(/*drawContext=*/nullptr);
+        }
+    }
+
+    // Determine the paint ID and collect the paint uniforms now before anything has been recorded.
+    // The paint may reference an SkPicture or a Graphite-backed dynamic SkImage that can trigger
+    // a flush of the Recorder.
+    KeyContext keyContext{fRecorder,
+                          fDC.get(),
+                          fRecorder->priv().refFloatStorageManager().get(),
+                          fKeyBuilder.get(),
+                          fDataGatherer.get(),
+                          localToDevice.matrix(),
+                          fDC->colorInfo(),
+                          geometry.isShape() || geometry.isEdgeAAQuad()
+                                ? KeyGenFlags::kDefault
+                                : KeyGenFlags::kDisableSamplingOptimization,
+                          paint.getColor4f()};
+    SkDEBUGCODE(fDataGatherer->checkReset());
+    SkDEBUGCODE(fKeyBuilder->checkReset());
+
+    auto keyResult = shading.toKey(keyContext);
+    if (!keyResult) {
+        // Converting the SkPaint to a pipeline and set of uniform values + sampled textures failed.
+        SKGPU_LOG_W("Key context creation failed in Device::drawGeometry, draw dropped!");
+        return;
+    }
+
+    auto [paintID, dstUsage] = *keyResult;
 
     // If we are unclipped, do not depend on the dst, and cover the target, then we can adjust
     // load ops of the renderpass to more optimally handle the draw (and avoid redundant clears).
     // NOTE: We skip this for fully-lazy render targets because the load ops may impact a larger
     // area than the Device's theoretical bounds.
-    const bool overwritesAllPixels = !dependsOnDst &&
+    const bool overwritesAllPixels = dstUsage == DstUsage::kNone &&
                                      geometry.isShape() &&
                                      geometry.shape().isFloodFill() &&
                                      !fDC->target()->isFullyLazy() &&
@@ -1546,7 +1577,7 @@
             // discarded dst can still be accessed. For non-floating point formats, that is fine,
             // but float formats can have NaNs after a discard that cause blending to fail. To
             // avoid that scenario, we clear to a known value instead.
-            if (shading.asFinalBlendMode() == SkBlendMode::kSrcOver &&
+            if (shading.finalBlendMode() == SkBlendMode::kSrcOver &&
                 TextureFormatIsFloatingPoint(
                         TextureInfoPriv::ViewFormat(fDC->target()->textureInfo()))) {
                 fDC->clear(SkColors::kMagenta); // This color doesn't matter
@@ -1557,43 +1588,6 @@
         }
     }
 
-    // Some shapes and styles combine multiple draws so the total render step count is split between
-    // the main renderer and possibly a secondaryRenderer.
-    SkStrokeRec::Style styleType = style.getStyle();
-    const Renderer* secondaryRenderer = nullptr;
-    Rect innerFillBounds = Rect::InfiniteInverted();
-    if (renderer) {
-        if (styleType == SkStrokeRec::kStrokeAndFill_Style) {
-            // `renderer` covers the fill, `secondaryRenderer` covers the stroke
-            secondaryRenderer = fRecorder->priv().rendererProvider()->tessellatedStrokes();
-        } else if (style.isFillStyle() && renderer->useNonAAInnerFill() && !dependsOnDst) {
-            // `renderer` opts into drawing a non-AA inner fill
-            innerFillBounds = get_inner_bounds(geometry, localToDevice);
-            if (!innerFillBounds.isEmptyNegativeOrNaN()) {
-                secondaryRenderer = fRecorder->priv().rendererProvider()->nonAABounds();
-            }
-        }
-    }
-    const int numNewRenderSteps = (renderer ? renderer->numRenderSteps() : 1) +
-                                  (secondaryRenderer ? secondaryRenderer->numRenderSteps() : 0);
-
-    // Decide if we have any reason to flush pending work. We want to flush before updating the clip
-    // state or making any permanent changes to a path atlas, since otherwise clip operations and/or
-    // atlas entries for the current draw will be flushed.
-    DstReadStrategy dstReadStrategy =
-            dstReadRequired ? fDC->dstReadStrategy() : DstReadStrategy::kNoneRequired;
-    const bool needsFlush = this->needsFlushBeforeDraw(numNewRenderSteps, dstReadStrategy);
-    if (needsFlush) {
-        if (pathAtlas != nullptr) {
-            // We need to flush work for all devices associated with the current Recorder.
-            // Otherwise we may end up with outstanding draws that depend on past atlas state.
-            fRecorder->priv().flushTrackedDevices(
-                    SK_DUMP_TASKS_CODE("Device::drawGeometry Flush Before Draw"));
-        } else {
-            this->flushPendingWork(/*drawContext=*/nullptr);
-        }
-    }
-
     // If an atlas path renderer was chosen we need to insert the shape into the atlas and schedule
     // it to be drawn.
     std::optional<PathAtlas::MaskAndOrigin> atlasMask;  // only used if `pathAtlas != nullptr`
@@ -1665,7 +1659,7 @@
     order.dependsOnPaintersOrder(clipOrder);
     // If a draw is not opaque, it must be drawn after the most recent draw it intersects with in
     // order to blend correctly.
-    if (rendererCoverage != Coverage::kNone || dependsOnDst) {
+    if (shading.rendererCoverage() != Coverage::kNone || dstUsage != DstUsage::kNone) {
         CompressedPaintersOrder prevDraw =
             fColorDepthBoundsManager->getMostRecentDraw(clip.drawBounds());
         order.dependsOnPaintersOrder(prevDraw);
@@ -1678,7 +1672,8 @@
         DisjointStencilIndex setIndex = fDisjointStencilSet->add(order.paintOrder(),
                                                                  clip.drawBounds());
         order.dependsOnStencil(setIndex);
-    } else if (!dependsOnDst && renderer->coverage() == Coverage::kNone && style.isFillStyle() &&
+    } else if (dstUsage == DstUsage::kNone && renderer->coverage() == Coverage::kNone &&
+               style.isFillStyle() &&
                ((geometry.isEdgeAAQuad() && geometry.edgeAAQuad().isRect()) ||
                 (geometry.isShape() && geometry.shape().isRect()))) {
         // Sort this draw front to back since it will not blend against what came before it.
@@ -1688,10 +1683,6 @@
         order.reverseDepthAsStencil();
     }
 
-    // TODO(b/330864257): This is an extra traversal of all paint effects, that can be avoided when
-    // the paint key itself is determined inside this function.
-    shading.notifyImagesInUse(fRecorder, fDC.get());
-
     // If an atlas path renderer was chosen, then record a single CoverageMaskShape draw.
     // The shape will be scheduled to be rendered or uploaded into the atlas during the
     // next invocation of flushPendingWork().
@@ -1699,8 +1690,8 @@
         // Record the draw as a fill since stroking is handled by the atlas render/upload.
         SkASSERT(atlasMask.has_value());
         auto [mask, origin] = *atlasMask;
-        fDC->recordDraw(renderer, Transform::Translate(origin.fX, origin.fY), Geometry(mask),
-                        clip, order, &shading, nullptr, dependsOnDst, dstReadRequired);
+        fDC->recordDraw(renderer, Transform::Translate(origin.fX, origin.fY), Geometry(mask), clip,
+                        order, paintID, dstUsage, fDataGatherer.get(), nullptr);
     } else {
         if (styleType == SkStrokeRec::kStroke_Style ||
             styleType == SkStrokeRec::kHairline_Style ||
@@ -1711,30 +1702,33 @@
             fDC->recordDraw(styleType == SkStrokeRec::kStrokeAndFill_Style
                                    ? fRecorder->priv().rendererProvider()->tessellatedStrokes()
                                    : renderer,
-                            localToDevice, geometry, clip, order, &shading, &stroke, dependsOnDst,
-                            dstReadRequired);
+                            localToDevice, geometry, clip, order, paintID, dstUsage,
+                            fDataGatherer.get(), &stroke);
         }
         if (styleType == SkStrokeRec::kFill_Style ||
             styleType == SkStrokeRec::kStrokeAndFill_Style) {
             // Possibly record an additional draw using the non-AA bounds renderer to fill the
             // interior with a renderer that can disable blending entirely.
+            Rect innerFillBounds = renderer->useNonAAInnerFill() && dstUsage == DstUsage::kNone &&
+                styleType != SkStrokeRec::kStrokeAndFill_Style
+                            ? get_inner_bounds(geometry, localToDevice)
+                            : Rect::InfiniteInverted();
             if (!innerFillBounds.isEmptyNegativeOrNaN()) {
-                SkASSERT(!dependsOnDst && renderer->useNonAAInnerFill());
+                SkASSERT(dstUsage == DstUsage::kNone && renderer->useNonAAInnerFill());
                 DrawOrder orderWithoutCoverage{order.depth()};
                 orderWithoutCoverage.dependsOnPaintersOrder(clipOrder);
                 // The regular draw has analytic coverage, so isn't being sorted front to back, but
                 // we do want to sort the inner fill to maximize overdraw reduction
                 orderWithoutCoverage.reverseDepthAsStencil();
-
                 fDC->recordDraw(fRecorder->priv().rendererProvider()->nonAABounds(), localToDevice,
                                 Geometry(Shape(innerFillBounds)), clip, orderWithoutCoverage,
-                                &shading, nullptr,  dependsOnDst, dstReadRequired);
+                                paintID, dstUsage, fDataGatherer.get(), nullptr);
                 // Force the coverage draw to come after the non-AA draw in order to benefit from
                 // early depth testing.
                 order.dependsOnPaintersOrder(orderWithoutCoverage.paintOrder());
             }
-            fDC->recordDraw(renderer, localToDevice, geometry, clip, order, &shading, nullptr,
-                            dependsOnDst, dstReadRequired);
+            fDC->recordDraw(renderer, localToDevice, geometry, clip, order, paintID, dstUsage,
+                            fDataGatherer.get(), nullptr);
         }
     }
 
@@ -1752,6 +1746,8 @@
                            const Shape& shape,
                            const Clip& clip,
                            DrawOrder order) {
+    AutoResetForDraw autoReset(fDataGatherer.get());
+
     // A clip draw's state is almost fully defined by the ClipStack. The only thing we need
     // to account for is selecting a Renderer and tracking the stencil buffer usage.
     Geometry geometry{shape};
@@ -1778,16 +1774,17 @@
     SkASSERT(renderer->coverage() == Coverage::kNone && renderer->requiresMSAA());
     SkASSERT(pathAtlas == nullptr);
 
-    // Clips draws are depth-only (null PaintParams), and filled (null StrokeStyle).
-    // TODO: Remove this CPU-transform once perspective is supported for all path renderers
+    // Clips draws are depth-only (invalid UniquePaintParamsID), and filled (null StrokeStyle).
+    // The data gatherer must be reset so that the DrawList can use it for any RenderStep data.
     if (localToDevice.type() == Transform::Type::kPerspective) {
         SkPath devicePath = geometry.shape().asPath().makeTransform(localToDevice.matrix().asM33());
         fDC->recordDraw(renderer, Transform::Identity(), Geometry(Shape(devicePath)), clip, order,
-                        /*paint*/nullptr, /*stroke*/nullptr, /*dependsOnDst*/false,
-                        /*dstReadReq*/false);
+                        UniquePaintParamsID::Invalid(), DstUsage::kNone, fDataGatherer.get(),
+                        /*stroke=*/nullptr);
     } else {
-        fDC->recordDraw(renderer, localToDevice, geometry, clip, order, /*paint*/nullptr,
-                        /*stroke*/nullptr, /*dependsOnDst*/false,  /*dstReadReq*/false);
+        fDC->recordDraw(renderer, localToDevice, geometry, clip, order,
+                        UniquePaintParamsID::Invalid(), DstUsage::kNone, fDataGatherer.get(),
+                        /*stroke=*/nullptr);
     }
     // This ensures that draws recorded after this clip shape has been popped off the stack will
     // be unaffected by the Z value the clip shape wrote to the depth attachment.
@@ -2023,7 +2020,7 @@
     }
 
     this->internalFlush();
-    sk_sp<Task> drawTask = fDC->snapDrawTask(fRecorder);
+    sk_sp<Task> drawTask = fDC->snapDrawTask();
     if (drawContext) {
         drawContext->recordDependency(std::move(drawTask));
     } else {
@@ -2072,7 +2069,7 @@
     fCurrentDepth = DrawOrder::kClearDepth;
     fAtlasedPathCount = 0;
 
-     // Any cleanup in the AtlasProvider
+    // Any cleanup in the AtlasProvider
     fRecorder->priv().atlasProvider()->compact();
 }
 
diff --git a/src/gpu/graphite/Device.h b/src/gpu/graphite/Device.h
index ca7451f..2919523 100644
--- a/src/gpu/graphite/Device.h
+++ b/src/gpu/graphite/Device.h
@@ -76,6 +76,8 @@
 class DrawContext;
 class Geometry;
 class Image;
+class PaintParamsKeyBuilder;
+class PipelineDataGatherer;
 class PathAtlas;
 class Renderer;
 class Shape;
@@ -341,6 +343,9 @@
     // some other task chain that makes it to the root list.
     sk_sp<Task> fLastTask;
 
+    std::unique_ptr<PaintParamsKeyBuilder> fKeyBuilder;
+    std::unique_ptr<PipelineDataGatherer> fDataGatherer;
+
     ClipStack fClip;
 
     // Tracks accumulated intersections for ordering dependent use of the color and depth attachment
diff --git a/src/gpu/graphite/DrawContext.cpp b/src/gpu/graphite/DrawContext.cpp
index fb7a295..ae4443f 100644
--- a/src/gpu/graphite/DrawContext.cpp
+++ b/src/gpu/graphite/DrawContext.cpp
@@ -86,6 +86,9 @@
         , fImageInfo(ii)
         , fSurfaceProps(props)
         , fDstReadStrategy(caps->getDstReadStrategy())
+        , fSupportsHardwareAdvancedBlend(caps->supportsHardwareAdvancedBlending())
+        , fAdvancedBlendsRequireBarrier(caps->blendEquationSupport() ==
+                                            Caps::BlendEquationSupport::kAdvancedNoncoherent)
         , fCurrentDrawTask(sk_make_sp<DrawTask>(fTarget))
         , fPendingDraws(std::make_unique<DrawList>())
         , fPendingUploads(std::make_unique<UploadList>()) {
@@ -105,37 +108,28 @@
 DrawContext::~DrawContext() = default;
 
 void DrawContext::clear(const SkColor4f& clearColor) {
-    this->discard();
-
-    fPendingLoadOp = LoadOp::kClear;
-    SkPMColor4f pmColor = clearColor.premul();
-    fPendingClearColor = pmColor.array();
+    this->resetForClearOrDiscard();
+    fPendingDraws->reset(LoadOp::kClear, clearColor);
 }
 
 void DrawContext::discard() {
+    this->resetForClearOrDiscard();
+    fPendingDraws->reset(LoadOp::kDiscard);
+}
+
+void DrawContext::resetForClearOrDiscard() {
     // Non-loading operations on a fully lazy target can corrupt data beyond the DrawContext's
     // region so should be avoided.
     SkASSERT(!fTarget->isFullyLazy());
 
-    // A fullscreen clear or discard will overwrite anything that came before, so clear the DrawList
     // NOTE: Eventually the current DrawTask should be reset, once there are no longer implicit
     // dependencies on atlas tasks between DrawContexts. When that's resolved, the only tasks in the
     // current DrawTask are those that directly impact the target, which becomes irrelevant with the
     // clear op overwriting it. For now, preserve the previous tasks that might include atlas
     // uploads that are not explicitly shared between DrawContexts.
-    if (fPendingDraws->renderStepCount() > 0) {
-        fPendingDraws = std::make_unique<DrawList>();
-    }
     if (fComputePathAtlas) {
         fComputePathAtlas->reset();
     }
-
-    // NOTE: Historically, we would switch to a clear load op on floating point render targets
-    // because analytic coverage would turn on blending for kSrc draws that filled the target. When
-    // this happened, the discard could introduce NaNs into the dst color values that would cause
-    // pixels to drop. Now we should only be calling discard() in situations that won't trigger
-    // analytic coverage, so we can still benefit from the kDiscard performance.
-    fPendingLoadOp = LoadOp::kDiscard;
 }
 
 void DrawContext::recordDraw(const Renderer* renderer,
@@ -143,17 +137,31 @@
                              const Geometry& geometry,
                              const Clip& clip,
                              DrawOrder ordering,
-                             const PaintParams* paint,
-                             const StrokeStyle* stroke,
-                             bool dependsOnDst,
-                             bool dstReadReq) {
+                             UniquePaintParamsID paintID,
+                             SkEnumBitMask<DstUsage> dstUsage,
+                             PipelineDataGatherer* gatherer,
+                             const StrokeStyle* stroke) {
     SkASSERTF(SkIRect::MakeSize(this->imageInfo().dimensions()).contains(clip.scissor()),
               "Image %dx%d, scissor %d,%d,%d,%d",
               this->imageInfo().width(), this->imageInfo().height(),
               clip.scissor().left(), clip.scissor().top(),
               clip.scissor().right(), clip.scissor().bottom());
-    fPendingDraws->recordDraw(renderer, localToDevice, geometry, clip, ordering, paint, stroke,
-                              dependsOnDst, dstReadReq);
+
+    // Determine whether a draw requies a barrier
+    BarrierType barrierBeforeDraws = BarrierType::kNone;
+    if (fDstReadStrategy == DstReadStrategy::kReadFromInput &&
+        (dstUsage & DstUsage::kDstReadRequired)) {
+        barrierBeforeDraws = BarrierType::kReadDstFromInput;
+    }
+    if ((dstUsage & DstUsage::kAdvancedBlend) &&
+        fSupportsHardwareAdvancedBlend && fAdvancedBlendsRequireBarrier) {
+        // A draw should only read from the dst OR use hardware for advanced blend modes.
+        SkASSERT(!(dstUsage & DstUsage::kDstReadRequired));
+        barrierBeforeDraws = BarrierType::kAdvancedNoncoherentBlend;
+    }
+
+    fPendingDraws->recordDraw(renderer, localToDevice, geometry, clip, ordering, paintID, dstUsage,
+                              barrierBeforeDraws, gatherer, stroke);
 }
 
 bool DrawContext::recordUpload(Recorder* recorder,
@@ -217,7 +225,7 @@
         fComputePathAtlas->reset();
     } // else platform doesn't support compute or atlas was never initialized.
 
-    if (fPendingDraws->renderStepCount() == 0 && fPendingLoadOp != LoadOp::kClear) {
+    if (!fPendingDraws->modifiesTarget()) {
         // Nothing will be rasterized to the target that warrants a RenderPassTask, but we preserve
         // any added uploads or compute tasks since those could also affect the target w/o
         // rasterizing anything directly.
@@ -239,18 +247,11 @@
     // TODO: At this point, there's only ever one DrawPass in a RenderPassTask to a target. When
     // subpasses are implemented, they will either be collected alongside fPendingDraws or added
     // to the RenderPassTask separately.
-    std::unique_ptr<DrawPass> pass = DrawPass::Make(recorder,
-                                                    std::move(fPendingDraws),
-                                                    fTarget,
-                                                    this->imageInfo(),
-                                                    std::make_pair(fPendingLoadOp, fPendingStoreOp),
-                                                    fPendingClearColor,
-                                                    drawPassDstReadStrategy);
-    fPendingDraws = std::make_unique<DrawList>();
-    // Now that there is content drawn to the target, that content must be loaded on any subsequent
-    // render pass.
-    fPendingLoadOp = LoadOp::kLoad;
-    fPendingStoreOp = StoreOp::kStore;
+    std::unique_ptr<DrawPass> pass = fPendingDraws->snapDrawPass(recorder,
+                                                                 fTarget,
+                                                                 this->imageInfo(),
+                                                                 drawPassDstReadStrategy);
+    SkASSERT(!fPendingDraws->modifiesTarget()); // Should be drained into `pass`.
 
     if (pass) {
         SkASSERT(fTarget.get() == pass->target());
@@ -308,11 +309,7 @@
     // DrawContexts now implicitly depend on.
 }
 
-sk_sp<Task> DrawContext::snapDrawTask(Recorder* recorder) {
-    // If flush() was explicitly called earlier and no new work was recorded, this call to flush()
-    // is a no-op and shouldn't hurt performance.
-    this->flush(recorder);
-
+sk_sp<Task> DrawContext::snapDrawTask() {
     if (!fCurrentDrawTask->hasTasks()) {
         return nullptr;
     }
diff --git a/src/gpu/graphite/DrawContext.h b/src/gpu/graphite/DrawContext.h
index 9b68fd5..75dd1ed 100644
--- a/src/gpu/graphite/DrawContext.h
+++ b/src/gpu/graphite/DrawContext.h
@@ -13,6 +13,7 @@
 #include "include/core/SkRefCnt.h"
 #include "include/core/SkSurfaceProps.h"
 #include "src/gpu/graphite/DrawList.h"
+#include "src/gpu/graphite/PaintParams.h"
 #include "src/gpu/graphite/ResourceTypes.h"
 #include "src/gpu/graphite/TextureProxy.h"
 #include "src/gpu/graphite/TextureProxyView.h"
@@ -57,11 +58,11 @@
 
     ~DrawContext() override;
 
-    const SkImageInfo& imageInfo() const { return fImageInfo;    }
-    const SkColorInfo& colorInfo() const { return fImageInfo.colorInfo(); }
-    TextureProxy* target()                { return fTarget.get(); }
-    const TextureProxy* target()    const { return fTarget.get(); }
-    sk_sp<TextureProxy> refTarget() const { return fTarget; }
+    const SkImageInfo& imageInfo() const  { return fImageInfo;             }
+    const SkColorInfo& colorInfo() const  { return fImageInfo.colorInfo(); }
+    TextureProxy* target()                { return fTarget.get();          }
+    const TextureProxy* target()    const { return fTarget.get();          }
+    sk_sp<TextureProxy> refTarget() const { return fTarget;                }
 
     // May be null if the target is not texturable.
     const TextureProxyView& readSurfaceView() const { return fReadView; }
@@ -78,10 +79,10 @@
                     const Geometry& geometry,
                     const Clip& clip,
                     DrawOrder ordering,
-                    const PaintParams* paint,
-                    const StrokeStyle* stroke,
-                    bool dependsOnDst,
-                    bool dstReadReq);
+                    UniquePaintParamsID paintID,
+                    SkEnumBitMask<DstUsage> dstUsage,
+                    PipelineDataGatherer* gatherer,
+                    const StrokeStyle* stroke);
 
     bool recordUpload(Recorder* recorder,
                       sk_sp<TextureProxy> targetProxy,
@@ -104,9 +105,10 @@
     // dependent tasks into the DrawTask currently being built.
     void flush(Recorder*);
 
-    // Flushes (if needed) and completes the current DrawTask, returning it to the caller.
-    // Subsequent recorded operations will be added to a new DrawTask.
-    sk_sp<Task> snapDrawTask(Recorder*);
+    // Returns the current DrawTask to the caller, so all pending draws and uploads (if flush()
+    // was not immediately called prior to this) and subsequently recorded draws and uploads will
+    // go into a new DrawTask.
+    sk_sp<Task> snapDrawTask();
 
     // Returns the dst read strategy to use when/if a paint requires a dst read
     DstReadStrategy dstReadStrategy() const { return fDstReadStrategy; }
@@ -114,6 +116,8 @@
 private:
     DrawContext(const Caps*, sk_sp<TextureProxy>, const SkImageInfo&, const SkSurfaceProps&);
 
+    void resetForClearOrDiscard();
+
     sk_sp<TextureProxy> fTarget;
     TextureProxyView fReadView;
     SkImageInfo fImageInfo;
@@ -122,6 +126,8 @@
     // Does *not* reflect whether a dst read is needed by the DrawLists - simply specifies the
     // strategies to use should any encountered paint require it.
     const DstReadStrategy fDstReadStrategy;
+    const bool fSupportsHardwareAdvancedBlend;
+    const bool fAdvancedBlendsRequireBarrier;
 
     // The in-progress DrawTask that will be snapped and returned when some external requirement
     // must depend on the contents of this DrawContext's target. As higher-level Skia operations
@@ -135,10 +141,6 @@
     // flushing.
     std::unique_ptr<DrawList> fPendingDraws;
     std::unique_ptr<UploadList> fPendingUploads;
-    // Load and store information for the current pending draws.
-    LoadOp fPendingLoadOp = LoadOp::kLoad;
-    StoreOp fPendingStoreOp = StoreOp::kStore;
-    std::array<float, 4> fPendingClearColor = { 0, 0, 0, 0 };
 
     // Accumulates atlas coverage masks generated by compute dispatches that are required by one or
     // more entries in `fPendingDraws`. When pending draws are snapped into a new DrawPass, a
diff --git a/src/gpu/graphite/DrawList.cpp b/src/gpu/graphite/DrawList.cpp
index 1d24a9b..365e747 100644
--- a/src/gpu/graphite/DrawList.cpp
+++ b/src/gpu/graphite/DrawList.cpp
@@ -7,11 +7,143 @@
 #include "src/gpu/graphite/DrawList.h"
 
 #include "include/core/SkTypes.h"
+#include "include/gpu/graphite/Recorder.h"
+#include "src/core/SkTraceEvent.h"
+#include "src/gpu/graphite/DrawPass.h"
+#include "src/gpu/graphite/DrawWriter.h"
+#include "src/gpu/graphite/KeyContext.h"
+#include "src/gpu/graphite/RecorderPriv.h"
 #include "src/gpu/graphite/Renderer.h"
 #include "src/gpu/graphite/geom/Geometry.h"
 
 namespace skgpu::graphite {
 
+namespace {
+
+// Writes uniform data either to uniform buffers or to shared storage buffers, and tracks when
+// bindings need to change between draws.
+class UniformTracker {
+public:
+    UniformTracker(bool useStorageBuffers) : fUseStorageBuffers(useStorageBuffers) {}
+
+    bool writeUniforms(UniformDataCache& uniformCache,
+                       DrawBufferManager* bufferMgr,
+                       UniformDataCache::Index index) {
+        if (index >= UniformDataCache::kInvalidIndex) {
+            return false;
+        }
+
+        if (index == fLastIndex) {
+            return false;
+        }
+        fLastIndex = index;
+
+        UniformDataCache::Entry& uniformData = uniformCache.lookup(index);
+        const size_t uniformDataSize = uniformData.fCpuData.size();
+
+        // Upload the uniform data if we haven't already.
+        // Alternatively, re-upload the uniform data to avoid a rebind if we're using storage
+        // buffers. This will result in more data uploaded, but the tradeoff seems worthwhile.
+        if (!uniformData.fBufferBinding.fBuffer ||
+            (fUseStorageBuffers && uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer)) {
+            UniformWriter writer;
+            std::tie(writer, uniformData.fBufferBinding) =
+                    fUseStorageBuffers ? bufferMgr->getAlignedSsboWriter(1, uniformDataSize)
+                                       : bufferMgr->getUniformWriter(1, uniformDataSize);
+
+            // Early out if buffer mapping failed.
+            if (!writer) {
+                return {};
+            }
+
+            writer.write(uniformData.fCpuData.data(), uniformDataSize);
+
+            if (fUseStorageBuffers) {
+                // When using storage buffers, store the SSBO index in the binding's offset field
+                // and always use the entire buffer's size in the size field.
+                SkASSERT(uniformData.fBufferBinding.fOffset % uniformDataSize == 0);
+                uniformData.fBufferBinding.fOffset /= uniformDataSize;
+                uniformData.fBufferBinding.fSize = uniformData.fBufferBinding.fBuffer->size();
+            }
+        }
+
+        const bool needsRebind =
+                uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer ||
+                (!fUseStorageBuffers && uniformData.fBufferBinding.fOffset != fLastBinding.fOffset);
+
+        fLastBinding = uniformData.fBufferBinding;
+
+        return needsRebind;
+    }
+
+    void bindUniforms(UniformSlot slot, DrawPassCommands::List* commandList) {
+        BindBufferInfo binding = fLastBinding;
+        if (fUseStorageBuffers) {
+            // Track the SSBO index in fLastBinding, but set offset = 0 in the actual used binding.
+            binding.fOffset = 0;
+        }
+        commandList->bindUniformBuffer(binding, slot);
+    }
+
+    uint32_t ssboIndex() const {
+        // The SSBO index for the last-bound storage buffer is stored in the binding's offset field.
+        return fLastBinding.fOffset;
+    }
+
+private:
+    // Internally track the last binding returned, so that we know whether new uploads or rebindings
+    // are necessary. If we're using SSBOs, this is treated specially -- the fOffset field holds the
+    // index in the storage buffer of the last-written uniforms, and the offsets used for actual
+    // bindings are always zero.
+    BindBufferInfo fLastBinding;
+
+    // This keeps track of the last index used for writing uniforms from a provided uniform cache.
+    // If a provided index matches the last index, the uniforms are assumed to already be written
+    // and no additional uploading is performed. This assumes a UniformTracker will always be
+    // provided with the same uniform cache.
+    UniformDataCache::Index fLastIndex = UniformDataCache::kInvalidIndex;
+
+    const bool fUseStorageBuffers;
+};
+
+// Tracks when to issue BindTexturesAndSamplers commands to a command list and converts
+// TextureDataBlocks to that representation as needed.
+class TextureTracker {
+public:
+    TextureTracker(TextureDataCache* textureCache)
+            : fTextureCache(textureCache) {}
+
+    bool setCurrentTextureBindings(TextureDataCache::Index bindingIndex) {
+        if (bindingIndex < TextureDataCache::kInvalidIndex && fLastIndex != bindingIndex) {
+            fLastIndex = bindingIndex;
+            return true;
+        }
+        // No binding change
+        return false;
+    }
+
+    void bindTextures(DrawPassCommands::List* commandList) {
+        SkASSERT(fLastIndex < TextureDataCache::kInvalidIndex);
+        TextureDataBlock binding = fTextureCache->lookup(fLastIndex);
+
+        auto [textures, samplers] =
+                commandList->bindDeferredTexturesAndSamplers(binding.numTextures());
+
+        for (int i = 0; i < binding.numTextures(); ++i) {
+            auto [t, s] = binding.texture(i);
+            textures[i] = t.get();
+            samplers[i] = s;
+        }
+    }
+
+private:
+    TextureDataCache::Index fLastIndex = TextureDataCache::kInvalidIndex;
+
+    TextureDataCache* const fTextureCache;
+};
+
+} // anonymous namespace
+
 const Transform& DrawList::deduplicateTransform(const Transform& localToDevice) {
     // TODO: This is a pretty simple deduplication strategy and doesn't take advantage of the stack
     // knowledge that Device has.
@@ -26,10 +158,11 @@
                           const Geometry& geometry,
                           const Clip& clip,
                           DrawOrder ordering,
-                          const PaintParams* paint,
-                          const StrokeStyle* stroke,
-                          bool dependsOnDst,
-                          bool dstReadReq) {
+                          UniquePaintParamsID paintID,
+                          SkEnumBitMask<DstUsage> dstUsage,
+                          BarrierType barrierBeforeDraws,
+                          PipelineDataGatherer* gatherer,
+                          const StrokeStyle* stroke) {
     SkASSERT(localToDevice.valid());
     SkASSERT(!geometry.isEmpty() && !clip.drawBounds().isEmptyNegativeOrNaN());
     SkASSERT(!(renderer->depthStencilFlags() & DepthStencilFlags::kStencil) ||
@@ -37,14 +170,57 @@
 
     // TODO: Add validation that the renderer's expected shape type and stroke params match provided
 
-    fDraws.emplace_back(renderer, this->deduplicateTransform(localToDevice), geometry, clip,
-                        ordering, paint, stroke, dependsOnDst, dstReadReq);
+    // Create a sort key for every render step in this draw, extracting out any
+    // RenderStep-specific data.
+    UniformDataCache::Index shadingUniformIndex = UniformDataCache::kInvalidIndex;
+    if (paintID.isValid()) {
+        UniformDataBlock paintUniforms = gatherer->endPaintData();
+        if (paintUniforms) {
+            shadingUniformIndex = fShadingUniformDataCache.insert(paintUniforms);
+        }
+    }
 
-    // Accumulate renderer information for each draw added to this list
+    const Draw& draw = fDraws.emplace_back(renderer,
+                                           this->deduplicateTransform(localToDevice),
+                                           geometry,
+                                           clip,
+                                           ordering,
+                                           barrierBeforeDraws,
+                                           stroke);
+
     fRenderStepCount += renderer->numRenderSteps();
+
+    gatherer->setRenderStepManagerActive();
+    for (int stepIndex = 0; stepIndex < draw.renderer()->numRenderSteps(); ++stepIndex) {
+        gatherer->rewindForRenderStep();
+
+        const RenderStep* const step = draw.renderer()->steps()[stepIndex];
+        const bool performsShading = step->performsShading();
+
+        GraphicsPipelineCache::Index pipelineIndex = fPipelineCache.insert(
+                { step->renderStepID(),
+                  performsShading ? paintID : UniquePaintParamsID::Invalid() });
+
+        step->writeUniformsAndTextures(draw.drawParams(), gatherer);
+        auto [stepUniforms, combinedTextures] = gatherer->endRenderStepData(performsShading);
+
+        UniformDataCache::Index geomUniformIndex = stepUniforms ?
+                fGeometryUniformDataCache.insert(stepUniforms) : UniformDataCache::kInvalidIndex;
+        TextureDataCache::Index textureBindingIndex = combinedTextures ?
+                fTextureDataCache.insert(combinedTextures) : TextureDataCache::kInvalidIndex;
+
+        fSortKeys.push_back({&draw,
+                             stepIndex,
+                             pipelineIndex,
+                             geomUniformIndex,
+                             performsShading ? shadingUniformIndex : UniformDataCache::kInvalidIndex,
+                             textureBindingIndex});
+    }
+
+    fPassBounds.join(clip.drawBounds());
     fRequiresMSAA |= renderer->requiresMSAA();
     fDepthStencilFlags |= renderer->depthStencilFlags();
-    if (paint && dstReadReq) {
+    if (dstUsage & DstUsage::kDstReadRequired) {
         // For paints that read from the dst, update the bounds. It may later be determined that the
         // DstReadStrategy does not require them, but they are inexpensive to track.
         fDstReadBounds.join(clip.drawBounds());
@@ -56,4 +232,200 @@
     }
 #endif
 }
+
+std::unique_ptr<DrawPass> DrawList::snapDrawPass(Recorder* recorder,
+                                                 sk_sp<TextureProxy> target,
+                                                 const SkImageInfo& targetInfo,
+                                                 const DstReadStrategy dstReadStrategy) {
+    // NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
+    // its size should be done with care and good reason. The performance of sorting the keys is
+    // heavily tied to the total size.
+    //
+    // At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
+    // 16 bytes. There are several ways this could be done if necessary:
+    //  - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
+    //    the uniform data => 8 bytes of key, 8 bytes of pointer.
+    //  - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
+    //    4 low bits from the Draw* pointer since it's 16 byte aligned.
+    //  - Compact the Draw* to an index into the original collection, although that has extra
+    //    indirection and does not work as well with SkTBlockList.
+    // In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
+    // than an 8 byte key and unmodified pointer.
+    static_assert(sizeof(SortKey) == SkAlignTo(16 + sizeof(void*), alignof(SortKey)));
+
+    // TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
+    // algorithms that approach O(n) in that condition may be favorable. Alternatively, could
+    // explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
+    // std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
+    // vs. algorithms that require an extra O(n) storage.
+    // TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
+    // bugs in the DrawOrder determination code?
+    std::sort(fSortKeys.begin(), fSortKeys.end());
+
+    TRACE_EVENT1("skia.gpu", TRACE_FUNC, "draw count", fDraws.count());
+
+    // The DrawList is converted directly into the DrawPass' data structures, but once the DrawPass
+    // is returned from Make(), it is considered immutable.
+    std::unique_ptr<DrawPass> drawPass(new DrawPass(target, {fLoadOp, StoreOp::kStore}, fClearColor,
+                                                    recorder->priv().refFloatStorageManager()));
+
+    DrawBufferManager* bufferMgr = recorder->priv().drawBufferManager();
+    DrawWriter drawWriter(&drawPass->fCommandList, bufferMgr);
+    GraphicsPipelineCache::Index lastPipeline = GraphicsPipelineCache::kInvalidIndex;
+    const SkIRect targetBounds = SkIRect::MakeSize(targetInfo.dimensions());
+    SkIRect lastScissor = targetBounds;
+
+    SkASSERT(drawPass->fTarget->isFullyLazy() ||
+             SkIRect::MakeSize(drawPass->fTarget->dimensions()).contains(lastScissor));
+    drawPass->fCommandList.setScissor(lastScissor);
+
+    const Caps* caps = recorder->priv().caps();
+    const bool useStorageBuffers = caps->storageBufferSupport();
+    UniformTracker geometryUniformTracker(useStorageBuffers);
+    UniformTracker shadingUniformTracker(useStorageBuffers);
+
+    // TODO(b/372953722): Remove this forced binding command behavior once dst copies are always
+    // bound separately from the rest of the textures.
+    const bool rebindTexturesOnPipelineChange = dstReadStrategy == DstReadStrategy::kTextureCopy;
+    // Keep track of the prior draw's PaintOrder. If the current draw requires barriers and there
+    // is no pipeline or state change, then we must compare the current and prior draw's PaintOrders
+    // to determine if the draws overlap. If they do, we must inject a flush between them such that
+    // the barrier addition and draw commands are ordered correctly.
+    CompressedPaintersOrder priorDrawPaintOrder {};
+
+#if defined(SK_TRACE_GRAPHITE_PIPELINE_USE)
+    // Accumulate rough pixel area touched by each pipeline as we iterate the SortKeys
+    drawPass->fPipelineDrawAreas.push_back_n(fPipelineCache.count(), 0.f);
+#endif
+
+    TextureTracker textureBindingTracker(&fTextureDataCache);
+    for (const DrawList::SortKey& key : fSortKeys) {
+        const DrawList::Draw& draw = key.draw();
+        const RenderStep& renderStep = key.renderStep();
+
+        const bool pipelineChange = key.pipelineIndex() != lastPipeline;
+#if defined(SK_TRACE_GRAPHITE_PIPELINE_USE)
+        drawPass->fPipelineDrawAreas[key.pipelineIndex()] +=
+                draw.drawParams().clip().drawBounds().area();
+#endif
+
+        const bool geomBindingChange = geometryUniformTracker.writeUniforms(
+                fGeometryUniformDataCache, bufferMgr, key.geometryUniformIndex());
+        const bool shadingBindingChange = shadingUniformTracker.writeUniforms(
+                fShadingUniformDataCache, bufferMgr, key.shadingUniformIndex());
+
+        // TODO(b/372953722): The Dawn and Vulkan CommandBuffer implementations currently append any
+        // dst copy to the texture bind group/descriptor set automatically when processing a
+        // BindTexturesAndSamplers call because they use a single group to contain all textures.
+        // However, from the DrawPass POV, we can run into the scenario where two pipelines have the
+        // same textures+samplers except one requires a dst-copy and the other does not. In this
+        // case we wouldn't necessarily insert a new command when the pipeline changed and then
+        // end up with layout validation errors.
+        const bool textureBindingsChange = textureBindingTracker.setCurrentTextureBindings(
+                key.textureBindingIndex()) ||
+                (rebindTexturesOnPipelineChange && pipelineChange &&
+                 key.textureBindingIndex() != TextureDataCache::kInvalidIndex);
+
+        std::optional<SkIRect> newScissor =
+                renderStep.getScissor(draw.drawParams(), lastScissor, targetBounds);
+
+        const bool stateChange = geomBindingChange     ||
+                                 shadingBindingChange  ||
+                                 textureBindingsChange ||
+                                 newScissor.has_value();
+
+        // Update DrawWriter *before* we actually change any state so that accumulated draws from
+        // the previous state use the proper state.
+        if (pipelineChange) {
+            drawWriter.newPipelineState(renderStep.primitiveType(),
+                                        renderStep.staticDataStride(),
+                                        renderStep.appendDataStride(),
+                                        renderStep.getRenderStateFlags(),
+                                        draw.barrierBeforeDraws());
+        } else if (stateChange) {
+            drawWriter.newDynamicState();
+        } else if (draw.barrierBeforeDraws() != BarrierType::kNone &&
+                   priorDrawPaintOrder != draw.drawParams().order().paintOrder()) {
+            // Even if there is no pipeline or state change, we must consider whether a
+            // DrawPassCommand to add barriers must be inserted before any draw commands. If so,
+            // then determine if the current and prior draws overlap (ie, their PaintOrders are
+            // unequal). If so, perform a flush() to make sure the draw and add barrier commands are
+            // appended to the command list in the proper order.
+            drawWriter.flush();
+        }
+
+        // Make state changes before accumulating new draw data
+        if (pipelineChange) {
+            drawPass->fCommandList.bindGraphicsPipeline(key.pipelineIndex());
+            lastPipeline = key.pipelineIndex();
+        }
+        if (stateChange) {
+            if (geomBindingChange) {
+                geometryUniformTracker.bindUniforms(UniformSlot::kRenderStep,
+                                                    &drawPass->fCommandList);
+            }
+            if (shadingBindingChange) {
+                shadingUniformTracker.bindUniforms(UniformSlot::kPaint, &drawPass->fCommandList);
+            }
+            if (textureBindingsChange) {
+                textureBindingTracker.bindTextures(&drawPass->fCommandList);
+            }
+            if (newScissor.has_value()) {
+                drawPass->fCommandList.setScissor(*newScissor);
+                lastScissor = *newScissor;
+            }
+        }
+
+        uint32_t geometrySsboIndex = useStorageBuffers ? geometryUniformTracker.ssboIndex() : 0;
+        uint32_t shadingSsboIndex = useStorageBuffers ? shadingUniformTracker.ssboIndex() : 0;
+        skvx::uint2 ssboIndices = {geometrySsboIndex, shadingSsboIndex};
+        renderStep.writeVertices(&drawWriter, draw.drawParams(), ssboIndices);
+
+        if (bufferMgr->hasMappingFailed()) {
+            SKGPU_LOG_W("Failed to write necessary vertex/instance data for DrawPass, dropping!");
+            return nullptr;
+        }
+
+        // Update priorDrawPaintOrder value before iterating to analyze the next draw.
+        priorDrawPaintOrder = draw.drawParams().order().paintOrder();
+    }
+    // Finish recording draw calls for any collected data still pending at end of the loop
+    drawWriter.flush();
+
+    drawPass->fBounds = fPassBounds.roundOut().asSkIRect();
+    drawPass->fPipelineDescs   = fPipelineCache.detach();
+    drawPass->fSampledTextures = fTextureDataCache.detachTextures();
+
+    TRACE_COUNTER1("skia.gpu", "# pipelines", drawPass->fPipelineDescs.size());
+    TRACE_COUNTER1("skia.gpu", "# textures", drawPass->fSampledTextures.size());
+    TRACE_COUNTER1("skia.gpu", "# commands", drawPass->fCommandList.count());
+
+    this->reset(LoadOp::kLoad);
+
+    return drawPass;
+}
+
+void DrawList::reset(LoadOp loadOp, SkColor4f color) {
+    fLoadOp = loadOp;
+    fClearColor = color.premul().array();
+
+    fSortKeys.clear();
+    fDraws.reset();
+    fTransforms.reset();
+
+    // Accumulate renderer information for each draw added to this list
+    fRenderStepCount = 0;
+    fRequiresMSAA = false;
+    fDepthStencilFlags = DepthStencilFlags::kNone;
+    SkDEBUGCODE(fCoverageMaskShapeDrawCount = 0);
+
+    fDstReadBounds = Rect::InfiniteInverted();
+    fPassBounds = Rect::InfiniteInverted();
+
+    fGeometryUniformDataCache.reset();
+    fShadingUniformDataCache.reset();
+    fTextureDataCache.reset();
+    fPipelineCache.reset();
+}
+
 } // namespace skgpu::graphite
diff --git a/src/gpu/graphite/DrawList.h b/src/gpu/graphite/DrawList.h
index c704bc7..e5247df 100644
--- a/src/gpu/graphite/DrawList.h
+++ b/src/gpu/graphite/DrawList.h
@@ -13,9 +13,12 @@
 #include "src/base/SkBlockAllocator.h"
 #include "src/base/SkEnumBitMask.h"
 #include "src/base/SkTBlockList.h"
+#include "src/gpu/graphite/ContextUtils.h"
+#include "src/gpu/graphite/DrawCommands.h"
 #include "src/gpu/graphite/DrawOrder.h"
 #include "src/gpu/graphite/DrawParams.h"
 #include "src/gpu/graphite/PaintParams.h"
+#include "src/gpu/graphite/PipelineData.h"
 #include "src/gpu/graphite/geom/Rect.h"
 #include "src/gpu/graphite/geom/Transform.h"
 
@@ -25,6 +28,7 @@
 
 namespace skgpu::graphite {
 
+class DrawPass;
 class Geometry;
 class Renderer;
 
@@ -84,21 +88,35 @@
                     const Geometry& geometry,
                     const Clip& clip,
                     DrawOrder ordering,
-                    const PaintParams* paint,
-                    const StrokeStyle* stroke,
-                    bool dependsOnDst,
-                    bool dstReadReq);
+                    UniquePaintParamsID paintID,
+                    SkEnumBitMask<DstUsage> dstUsage,
+                    BarrierType barrierBeforeDraws,
+                    PipelineDataGatherer* gatherer,
+                    const StrokeStyle* stroke);
+
+    std::unique_ptr<DrawPass> snapDrawPass(Recorder* recorder,
+                                           sk_sp<TextureProxy> target,
+                                           const SkImageInfo& targetInfo,
+                                           const DstReadStrategy dstReadStrategy);
 
     int renderStepCount() const { return fRenderStepCount; }
 
+    bool modifiesTarget() const {
+        return this->renderStepCount() > 0 || fLoadOp == LoadOp::kClear;
+    }
+
+    // Discard all previously recorded draws and set to the requested load op (with optional clear
+    // color).
+    void reset(LoadOp op, SkColor4f clearColor = {0.f, 0.f, 0.f, 0.f});
+
     // Bounds for a dst read required by this DrawList. These bounds are only valid if drawsReadDst
     // returns true.
     const Rect& dstReadBounds() const { return fDstReadBounds; }
+    const Rect& passBounds() const { return fPassBounds; }
     bool drawsReadDst() const { return !fDstReadBounds.isEmptyNegativeOrNaN(); }
     bool drawsRequireMSAA() const { return fRequiresMSAA; }
     SkEnumBitMask<DepthStencilFlags> depthStencilFlags() const { return fDepthStencilFlags; }
 
-
     SkDEBUGCODE(bool hasCoverageMaskDraws() const { return fCoverageMaskShapeDrawCount > 0; })
 
 private:
@@ -107,27 +125,133 @@
     struct Draw {
     public:
         Draw(const Renderer* renderer, const Transform& transform, const Geometry& geometry,
-             const Clip& clip, DrawOrder order, const PaintParams* paint, const StrokeStyle* stroke,
-             bool dependsOnDst, bool dstReadReq)
+             const Clip& clip, DrawOrder order, BarrierType barrierBeforeDraws,
+             const StrokeStyle* stroke)
                 : fRenderer(renderer)
                 , fDrawParams(transform, geometry, clip, order, stroke)
-                , fPaintParams(paint ? std::optional<PaintParams>(*paint) : std::nullopt)
-                , fDependsOnDst(dependsOnDst)
-                , fDstReadReq(dstReadReq) {}
-        const Renderer* renderer()                      const { return fRenderer;     }
-        const DrawParams& drawParams()                  const { return fDrawParams;   }
-        const std::optional<PaintParams>& paintParams() const { return fPaintParams;  }
-        bool dependsOnDst()                             const { return fDependsOnDst; }
-        bool dstReadReq()                               const { return fDstReadReq;   }
+                , fBarrierBeforeDraws(barrierBeforeDraws) {}
+
+        const Renderer* renderer()                             const { return fRenderer;           }
+        const DrawParams& drawParams()                         const { return fDrawParams;         }
+        const BarrierType& barrierBeforeDraws()                const { return fBarrierBeforeDraws; }
 
     private:
         const Renderer* fRenderer; // Owned by SharedContext of Recorder that recorded the draw
         DrawParams fDrawParams; // The DrawParam's transform is owned by fTransforms of the DrawList
-        std::optional<PaintParams> fPaintParams;
-        bool fDependsOnDst;
-        bool fDstReadReq;
+        BarrierType fBarrierBeforeDraws;
     };
 
+    template <uint64_t Bits, uint64_t Offset>
+    struct Bitfield {
+        static constexpr uint64_t kMask = ((uint64_t) 1 << Bits) - 1;
+        static constexpr uint64_t kOffset = Offset;
+        static constexpr uint64_t kBits = Bits;
+
+        static uint32_t get(uint64_t v) { return static_cast<uint32_t>((v >> kOffset) & kMask); }
+        static uint64_t set(uint32_t v) { return (v & kMask) << kOffset; }
+    };
+
+    /**
+     * Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's
+     * Renderer), which can be sorted independently. Each (step, draw) pair produces its own
+     * SortKey.
+     *
+     * The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic
+     * binds within a pipeline, while still respecting the overall painter's order. This decreases
+     * the number of low-level draw commands in a command buffer and increases the size of those,
+     * allowing the GPU to operate more efficiently and have fewer bubbles within its own
+     * instruction stream.
+     *
+     * The Draw's CompresssedPaintersOrder and DisjointStencilIndex represent the most significant
+     * bits of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline
+     * description is encoded in two steps:
+     *  1. The index of the RenderStep packed in the high bits to ensure each step for a draw is
+     *     ordered correctly.
+     *  2. An index into a cache of pipeline descriptions is used to encode the identity of the
+     *     pipeline (SortKeys that differ in the bits from #1 necessarily would have different
+     *     descriptions, but then the specific ordering of the RenderSteps isn't enforced). Last,
+     *     the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass.
+     *     This allows the SortKey to cluster draw steps that have both a compatible pipeline and do
+     *     not require rebinding uniform data or other state (e.g. scissor). Since the uniform data
+     *     index and the pipeline description index are packed into indices and not actual pointers,
+     *     a given SortKey is only valid for the a specific DrawList->DrawPass conversion.
+     */
+    class SortKey {
+    public:
+        SortKey(const DrawList::Draw* draw,
+                int renderStep,
+                GraphicsPipelineCache::Index pipelineIndex,
+                UniformDataCache::Index geomUniformIndex,
+                UniformDataCache::Index shadingUniformIndex,
+                TextureDataCache::Index textureBindingIndex)
+                : fPipelineKey(
+                          ColorDepthOrderField::set(draw->drawParams().order().paintOrder().bits())
+                          | StencilIndexField::set(draw->drawParams().order().stencilIndex().bits())
+                          | RenderStepField::set(static_cast<uint32_t>(renderStep))
+                          | PipelineField::set(pipelineIndex))
+                , fUniformKey(GeometryUniformField::set(geomUniformIndex)   |
+                              ShadingUniformField::set(shadingUniformIndex) |
+                              TextureBindingsField::set(textureBindingIndex))
+                , fDraw(draw) {
+            SkASSERT(pipelineIndex < GraphicsPipelineCache::kInvalidIndex);
+            SkASSERT(renderStep <= draw->renderer()->numRenderSteps());
+        }
+
+        bool operator<(const SortKey& k) const {
+            return fPipelineKey < k.fPipelineKey ||
+                (fPipelineKey == k.fPipelineKey && fUniformKey < k.fUniformKey);
+        }
+
+        const RenderStep& renderStep() const {
+            return fDraw->renderer()->step(RenderStepField::get(fPipelineKey));
+        }
+
+        const DrawList::Draw& draw() const { return *fDraw; }
+
+        GraphicsPipelineCache::Index pipelineIndex() const {
+            return PipelineField::get(fPipelineKey);
+        }
+        UniformDataCache::Index geometryUniformIndex() const {
+            return GeometryUniformField::get(fUniformKey);
+        }
+        UniformDataCache::Index shadingUniformIndex() const {
+            return ShadingUniformField::get(fUniformKey);
+        }
+        TextureDataCache::Index textureBindingIndex() const {
+            return TextureBindingsField::get(fUniformKey);
+        }
+
+    private:
+        // Fields are ordered from most-significant to least when sorting by 128-bit value.
+        // NOTE: We don't use C++ bit fields because field ordering is implementation defined and we
+        // need to sort consistently.
+        using ColorDepthOrderField = Bitfield<16, 48>; // sizeof(CompressedPaintersOrder)
+        using StencilIndexField    = Bitfield<16, 32>; // sizeof(DisjointStencilIndex)
+        using RenderStepField      = Bitfield<2,  30>; // bits >= log2(Renderer::kMaxRenderSteps)
+        using PipelineField        = Bitfield<30, 0>;  // bits >= log2(max total steps in draw list)
+        uint64_t fPipelineKey;
+
+        // The uniform/texture index fields need 1 extra bit to encode "no-data". Values that are
+        // greater than or equal to 2^(bits-1) represent "no-data", while values between
+        // [0, 2^(bits-1)-1] can access data arrays without extra logic.
+        using GeometryUniformField = Bitfield<17, 47>; // bits >= 1+log2(max total steps)
+        using ShadingUniformField  = Bitfield<17, 30>; // bits >= 1+log2(max total steps)
+        using TextureBindingsField = Bitfield<30, 0>;  // bits >= 1+log2(max total steps)
+        uint64_t fUniformKey;
+
+        // Backpointer to the draw that produced the sort key
+        const DrawList::Draw* fDraw;
+
+        static_assert(ColorDepthOrderField::kBits >= sizeof(CompressedPaintersOrder));
+        static_assert(StencilIndexField::kBits    >= sizeof(DisjointStencilIndex));
+        static_assert(RenderStepField::kBits      >= SkNextLog2_portable(Renderer::kMaxRenderSteps));
+        static_assert(PipelineField::kBits        >= SkNextLog2_portable(DrawList::kMaxRenderSteps));
+        static_assert(GeometryUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
+        static_assert(ShadingUniformField::kBits  >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
+        static_assert(TextureBindingsField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
+    };
+
+
     // The returned Transform reference remains valid for the lifetime of the DrawList.
     const Transform& deduplicateTransform(const Transform&);
 
@@ -145,9 +269,20 @@
     // Tracked for all paints that read from the dst. If it is later determined that the
     // DstReadStrategy is not kTextureCopy, this value can simply be ignored.
     Rect fDstReadBounds = Rect::InfiniteInverted();
+    Rect fPassBounds = Rect::InfiniteInverted();
     // Other properties of draws contained within this DrawList
     bool fRequiresMSAA = false;
     SkEnumBitMask<DepthStencilFlags> fDepthStencilFlags = DepthStencilFlags::kNone;
+
+    std::vector<SortKey> fSortKeys;
+
+    UniformDataCache fGeometryUniformDataCache;
+    UniformDataCache fShadingUniformDataCache;
+    TextureDataCache fTextureDataCache;
+    GraphicsPipelineCache fPipelineCache;
+
+    LoadOp fLoadOp = LoadOp::kLoad;
+    std::array<float, 4> fClearColor = {0.f, 0.f, 0.f, 0.f};
 };
 
 } // namespace skgpu::graphite
diff --git a/src/gpu/graphite/DrawPass.cpp b/src/gpu/graphite/DrawPass.cpp
index 403a5d4..254cfb6 100644
--- a/src/gpu/graphite/DrawPass.cpp
+++ b/src/gpu/graphite/DrawPass.cpp
@@ -6,325 +6,27 @@
  */
 #include "src/gpu/graphite/DrawPass.h"
 
-#include "include/core/SkBlendMode.h"
-#include "include/core/SkImageInfo.h"
-#include "include/gpu/graphite/Recorder.h"
-#include "include/gpu/graphite/TextureInfo.h"
-#include "include/private/base/SkAlign.h"
-#include "include/private/base/SkAssert.h"
-#include "include/private/base/SkSpan_impl.h"
-#include "src/base/SkMathPriv.h"
-#include "src/base/SkTBlockList.h"
-#include "src/base/SkVx.h"
 #include "src/core/SkTraceEvent.h"
-#include "src/gpu/BufferWriter.h"
-#include "src/gpu/graphite/Buffer.h"
-#include "src/gpu/graphite/BufferManager.h"
-#include "src/gpu/graphite/Caps.h"
-#include "src/gpu/graphite/CommandBuffer.h"
-#include "src/gpu/graphite/ContextUtils.h"
-#include "src/gpu/graphite/DrawList.h"
-#include "src/gpu/graphite/DrawOrder.h"
-#include "src/gpu/graphite/DrawParams.h"
-#include "src/gpu/graphite/DrawTypes.h"
-#include "src/gpu/graphite/DrawWriter.h"
-#include "src/gpu/graphite/GraphicsPipeline.h"
-#include "src/gpu/graphite/GraphicsPipelineDesc.h"
-#include "src/gpu/graphite/KeyContext.h"
 #include "src/gpu/graphite/Log.h"
 #include "src/gpu/graphite/PaintParams.h"
 #include "src/gpu/graphite/PaintParamsKey.h"
 #include "src/gpu/graphite/PipelineCreationTask.h"
 #include "src/gpu/graphite/PipelineData.h"
-#include "src/gpu/graphite/RecorderPriv.h"
-#include "src/gpu/graphite/Renderer.h"
-#include "src/gpu/graphite/Resource.h"
 #include "src/gpu/graphite/Resource.h"  // IWYU pragma: keep
 #include "src/gpu/graphite/ResourceProvider.h"
 #include "src/gpu/graphite/ResourceTypes.h"
 #include "src/gpu/graphite/RuntimeEffectDictionary.h"
 #include "src/gpu/graphite/Texture.h"  // IWYU pragma: keep
 #include "src/gpu/graphite/TextureProxy.h"
-#include "src/gpu/graphite/UniquePaintParamsID.h"
-#include "src/gpu/graphite/geom/Rect.h"
-#include "src/gpu/graphite/geom/Transform.h"
 
 #if defined(SK_TRACE_GRAPHITE_PIPELINE_USE)
 #include "src/gpu/graphite/RenderPassDesc.h"
 #endif
 
-#include <algorithm>
-#include <cstdint>
-#include <optional>
-#include <tuple>
-#include <vector>
-
 using namespace skia_private;
 
 namespace skgpu::graphite {
 
-class ShaderCodeDictionary;
-
-namespace {
-
-// Helper to manage packed fields within a uint64_t
-template <uint64_t Bits, uint64_t Offset>
-struct Bitfield {
-    static constexpr uint64_t kMask = ((uint64_t) 1 << Bits) - 1;
-    static constexpr uint64_t kOffset = Offset;
-    static constexpr uint64_t kBits = Bits;
-
-    static uint32_t get(uint64_t v) { return static_cast<uint32_t>((v >> kOffset) & kMask); }
-    static uint64_t set(uint32_t v) { return (v & kMask) << kOffset; }
-};
-
-// NOTE: TextureBinding's use as a key type in DenseBiMap relies on the fact that the underlying
-// data has been de-duplicated by a PipelineDataCache earlier, so that the bit identity of the data
-// blocks (e.g. address+size) is equivalent to the content equality of the texture lists.
-
-// Tracks the combination of textures from the paint and from the RenderStep to describe the full
-// binding that needs to be in the command list.
-struct TextureBinding {
-    TextureDataBlock fPaintTextures;
-    TextureDataBlock fStepTextures;
-
-    bool operator==(const TextureBinding& other) const {
-        return fPaintTextures == other.fPaintTextures &&
-               fStepTextures == other.fStepTextures;
-    }
-    bool operator!=(const TextureBinding& other) const { return !(*this == other); }
-
-    int numTextures() const {
-        return (fPaintTextures ? fPaintTextures.numTextures() : 0) +
-               (fStepTextures ? fStepTextures.numTextures() : 0);
-    }
-};
-
-// Writes uniform data either to uniform buffers or to shared storage buffers, and tracks when
-// bindings need to change between draws.
-class UniformTracker {
-public:
-    UniformTracker(bool useStorageBuffers) : fUseStorageBuffers(useStorageBuffers) {}
-
-    bool writeUniforms(UniformDataCache& uniformCache,
-                       DrawBufferManager* bufferMgr,
-                       UniformDataCache::Index index) {
-        if (index >= UniformDataCache::kInvalidIndex) {
-            return false;
-        }
-
-        if (index == fLastIndex) {
-            return false;
-        }
-        fLastIndex = index;
-
-        UniformDataCache::Entry& uniformData = uniformCache.lookup(index);
-        const size_t uniformDataSize = uniformData.fCpuData.size();
-
-        // Upload the uniform data if we haven't already.
-        // Alternatively, re-upload the uniform data to avoid a rebind if we're using storage
-        // buffers. This will result in more data uploaded, but the tradeoff seems worthwhile.
-        if (!uniformData.fBufferBinding.fBuffer ||
-            (fUseStorageBuffers && uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer)) {
-            UniformWriter writer;
-            std::tie(writer, uniformData.fBufferBinding) =
-                    fUseStorageBuffers ? bufferMgr->getAlignedSsboWriter(1, uniformDataSize)
-                                       : bufferMgr->getUniformWriter(1, uniformDataSize);
-
-            // Early out if buffer mapping failed.
-            if (!writer) {
-                return {};
-            }
-
-            writer.write(uniformData.fCpuData.data(), uniformDataSize);
-
-            if (fUseStorageBuffers) {
-                // When using storage buffers, store the SSBO index in the binding's offset field
-                // and always use the entire buffer's size in the size field.
-                SkASSERT(uniformData.fBufferBinding.fOffset % uniformDataSize == 0);
-                uniformData.fBufferBinding.fOffset /= uniformDataSize;
-                uniformData.fBufferBinding.fSize = uniformData.fBufferBinding.fBuffer->size();
-            }
-        }
-
-        const bool needsRebind =
-                uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer ||
-                (!fUseStorageBuffers && uniformData.fBufferBinding.fOffset != fLastBinding.fOffset);
-
-        fLastBinding = uniformData.fBufferBinding;
-
-        return needsRebind;
-    }
-
-    void bindUniforms(UniformSlot slot, DrawPassCommands::List* commandList) {
-        BindBufferInfo binding = fLastBinding;
-        if (fUseStorageBuffers) {
-            // Track the SSBO index in fLastBinding, but set offset = 0 in the actual used binding.
-            binding.fOffset = 0;
-        }
-        commandList->bindUniformBuffer(binding, slot);
-    }
-
-    uint32_t ssboIndex() const {
-        // The SSBO index for the last-bound storage buffer is stored in the binding's offset field.
-        return fLastBinding.fOffset;
-    }
-
-private:
-    // Internally track the last binding returned, so that we know whether new uploads or rebindings
-    // are necessary. If we're using SSBOs, this is treated specially -- the fOffset field holds the
-    // index in the storage buffer of the last-written uniforms, and the offsets used for actual
-    // bindings are always zero.
-    BindBufferInfo fLastBinding;
-
-    // This keeps track of the last index used for writing uniforms from a provided uniform cache.
-    // If a provided index matches the last index, the uniforms are assumed to already be written
-    // and no additional uploading is performed. This assumes a UniformTracker will always be
-    // provided with the same uniform cache.
-    UniformDataCache::Index fLastIndex = UniformDataCache::kInvalidIndex;
-
-    const bool fUseStorageBuffers;
-};
-
-// Tracks when to issue BindTexturesAndSamplers commands to a command list and converts
-// TextureDataBlocks to that representation as needed.
-class TextureTracker {
-public:
-    TextureTracker(TextureDataCache* textureCache)
-            : fTextureCache(textureCache) {}
-
-    bool setCurrentTextureBindings(TextureDataCache::Index bindingIndex) {
-        if (bindingIndex < TextureDataCache::kInvalidIndex && fLastIndex != bindingIndex) {
-            fLastIndex = bindingIndex;
-            return true;
-        }
-        // No binding change
-        return false;
-    }
-
-    void bindTextures(DrawPassCommands::List* commandList) {
-        SkASSERT(fLastIndex < TextureDataCache::kInvalidIndex);
-        TextureDataBlock binding = fTextureCache->lookup(fLastIndex);
-
-        auto [textures, samplers] =
-                commandList->bindDeferredTexturesAndSamplers(binding.numTextures());
-
-        for (int i = 0; i < binding.numTextures(); ++i) {
-            auto [t, s] = binding.texture(i);
-            textures[i] = t.get();
-            samplers[i] = s;
-        }
-    }
-
-private:
-    TextureDataCache::Index fLastIndex = TextureDataCache::kInvalidIndex;
-
-    TextureDataCache* const fTextureCache;
-};
-
-} // namespace
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-/**
- * Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's
- * Renderer), which can be sorted independently. Each (step, draw) pair produces its own SortKey.
- *
- * The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic binds
- * within a pipeline, while still respecting the overall painter's order. This decreases the number
- * of low-level draw commands in a command buffer and increases the size of those, allowing the GPU
- * to operate more efficiently and have fewer bubbles within its own instruction stream.
- *
- * The Draw's CompresssedPaintersOrder and DisjointStencilIndex represent the most significant bits
- * of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline
- * description is encoded in two steps:
- *  1. The index of the RenderStep packed in the high bits to ensure each step for a draw is
- *     ordered correctly.
- *  2. An index into a cache of pipeline descriptions is used to encode the identity of the
- *     pipeline (SortKeys that differ in the bits from #1 necessarily would have different
- *     descriptions, but then the specific ordering of the RenderSteps isn't enforced).
- * Last, the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass.
- * This allows the SortKey to cluster draw steps that have both a compatible pipeline and do not
- * require rebinding uniform data or other state (e.g. scissor). Since the uniform data index and
- * the pipeline description index are packed into indices and not actual pointers, a given SortKey
- * is only valid for the a specific DrawList->DrawPass conversion.
- */
-class DrawPass::SortKey {
-public:
-    SortKey(const DrawList::Draw* draw,
-            int renderStep,
-            GraphicsPipelineCache::Index pipelineIndex,
-            UniformDataCache::Index geomUniformIndex,
-            UniformDataCache::Index shadingUniformIndex,
-            TextureDataCache::Index textureBindingIndex)
-        : fPipelineKey(ColorDepthOrderField::set(draw->drawParams().order().paintOrder().bits()) |
-                       StencilIndexField::set(draw->drawParams().order().stencilIndex().bits())  |
-                       RenderStepField::set(static_cast<uint32_t>(renderStep))                  |
-                       PipelineField::set(pipelineIndex))
-        , fUniformKey(GeometryUniformField::set(geomUniformIndex)   |
-                      ShadingUniformField::set(shadingUniformIndex) |
-                      TextureBindingsField::set(textureBindingIndex))
-        , fDraw(draw) {
-        SkASSERT(pipelineIndex < GraphicsPipelineCache::kInvalidIndex);
-        SkASSERT(renderStep <= draw->renderer()->numRenderSteps());
-    }
-
-    bool operator<(const SortKey& k) const {
-        return fPipelineKey < k.fPipelineKey ||
-               (fPipelineKey == k.fPipelineKey && fUniformKey < k.fUniformKey);
-    }
-
-    const RenderStep& renderStep() const {
-        return fDraw->renderer()->step(RenderStepField::get(fPipelineKey));
-    }
-
-    const DrawList::Draw& draw() const { return *fDraw; }
-
-    GraphicsPipelineCache::Index pipelineIndex() const {
-        return PipelineField::get(fPipelineKey);
-    }
-    UniformDataCache::Index geometryUniformIndex() const {
-        return GeometryUniformField::get(fUniformKey);
-    }
-    UniformDataCache::Index shadingUniformIndex() const {
-        return ShadingUniformField::get(fUniformKey);
-    }
-    TextureDataCache::Index textureBindingIndex() const {
-        return TextureBindingsField::get(fUniformKey);
-    }
-
-private:
-    // Fields are ordered from most-significant to least when sorting by 128-bit value.
-    // NOTE: We don't use C++ bit fields because field ordering is implementation defined and we
-    // need to sort consistently.
-    using ColorDepthOrderField = Bitfield<16, 48>; // sizeof(CompressedPaintersOrder)
-    using StencilIndexField    = Bitfield<16, 32>; // sizeof(DisjointStencilIndex)
-    using RenderStepField      = Bitfield<2,  30>; // bits >= log2(Renderer::kMaxRenderSteps)
-    using PipelineField        = Bitfield<30, 0>;  // bits >= log2(max total steps in draw list)
-    uint64_t fPipelineKey;
-
-    // The uniform/texture index fields need 1 extra bit to encode "no-data". Values that are
-    // greater than or equal to 2^(bits-1) represent "no-data", while values between
-    // [0, 2^(bits-1)-1] can access data arrays without extra logic.
-    using GeometryUniformField = Bitfield<17, 47>; // bits >= 1+log2(max total steps)
-    using ShadingUniformField  = Bitfield<17, 30>; // bits >= 1+log2(max total steps)
-    using TextureBindingsField = Bitfield<30, 0>;  // bits >= 1+log2(max total steps)
-    uint64_t fUniformKey;
-
-    // Backpointer to the draw that produced the sort key
-    const DrawList::Draw* fDraw;
-
-    static_assert(ColorDepthOrderField::kBits >= sizeof(CompressedPaintersOrder));
-    static_assert(StencilIndexField::kBits    >= sizeof(DisjointStencilIndex));
-    static_assert(RenderStepField::kBits      >= SkNextLog2_portable(Renderer::kMaxRenderSteps));
-    static_assert(PipelineField::kBits        >= SkNextLog2_portable(DrawList::kMaxRenderSteps));
-    static_assert(GeometryUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
-    static_assert(ShadingUniformField::kBits  >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
-    static_assert(TextureBindingsField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
-};
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
 DrawPass::DrawPass(sk_sp<TextureProxy> target,
                    std::pair<LoadOp, StoreOp> ops,
                    std::array<float, 4> clearColor,
@@ -337,306 +39,6 @@
 
 DrawPass::~DrawPass() = default;
 
-namespace {
-bool paint_uses_advanced_blend_equation(std::optional<PaintParams> drawPaintParams) {
-    if (!drawPaintParams.has_value() || !drawPaintParams.value().asFinalBlendMode().has_value()) {
-        return false;
-    }
-
-    return (int)drawPaintParams.value().asFinalBlendMode().value() >
-           (int)SkBlendMode::kLastCoeffMode;
-}
-} // anonymous
-
-std::unique_ptr<DrawPass> DrawPass::Make(Recorder* recorder,
-                                         std::unique_ptr<DrawList> draws,
-                                         sk_sp<TextureProxy> target,
-                                         const SkImageInfo& targetInfo,
-                                         std::pair<LoadOp, StoreOp> ops,
-                                         std::array<float, 4> clearColor,
-                                         const DstReadStrategy dstReadStrategy) {
-    // NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
-    // its size should be done with care and good reason. The performance of sorting the keys is
-    // heavily tied to the total size.
-    //
-    // At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
-    // 16 bytes. There are several ways this could be done if necessary:
-    //  - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
-    //    the uniform data => 8 bytes of key, 8 bytes of pointer.
-    //  - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
-    //    4 low bits from the Draw* pointer since it's 16 byte aligned.
-    //  - Compact the Draw* to an index into the original collection, although that has extra
-    //    indirection and does not work as well with SkTBlockList.
-    // In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
-    // than an 8 byte key and unmodified pointer.
-    static_assert(sizeof(DrawPass::SortKey) ==
-                  SkAlignTo(16 + sizeof(void*), alignof(DrawPass::SortKey)));
-
-    TRACE_EVENT1("skia.gpu", TRACE_FUNC, "draw count", draws->fDraws.count());
-
-    // The DrawList is converted directly into the DrawPass' data structures, but once the DrawPass
-    // is returned from Make(), it is considered immutable.
-    std::unique_ptr<DrawPass> drawPass(new DrawPass(target, ops, clearColor,
-                                                    recorder->priv().refFloatStorageManager()));
-
-    Rect passBounds = Rect::InfiniteInverted();
-
-    UniformDataCache geometryUniformDataCache;
-    UniformDataCache shadingUniformDataCache;
-    TextureDataCache textureDataCache;
-    DrawBufferManager* bufferMgr = recorder->priv().drawBufferManager();
-    if (bufferMgr->hasMappingFailed()) {
-        SKGPU_LOG_W("Buffer mapping has already failed; dropping draw pass!");
-        return nullptr;
-    }
-
-    GraphicsPipelineCache pipelineCache;
-
-    // Geometry uniforms are currently always UBO-backed.
-    const Caps* caps = recorder->priv().caps();
-    const bool useStorageBuffers = caps->storageBufferSupport();
-    const ResourceBindingRequirements& bindingReqs = caps->resourceBindingRequirements();
-    Layout uniformLayout =
-            useStorageBuffers ? bindingReqs.fStorageBufferLayout : bindingReqs.fUniformBufferLayout;
-
-    ShaderCodeDictionary* dict = recorder->priv().shaderCodeDictionary();
-    PaintParamsKeyBuilder builder(dict);
-
-    // The initial layout we pass here is not important as it will be re-assigned when writing
-    // shading and geometry uniforms below.
-    PipelineDataGatherer gatherer(uniformLayout);
-    std::vector<SortKey> keys;
-    keys.reserve(draws->renderStepCount());
-
-    for (const DrawList::Draw& draw : draws->fDraws.items()) {
-        gatherer.resetForDraw();
-
-        UniquePaintParamsID shaderID = UniquePaintParamsID::Invalid();
-        UniformDataCache::Index shadingUniformIndex = UniformDataCache::kInvalidIndex;
-
-        if (draw.paintParams().has_value()) {
-            SkDEBUGCODE(builder.checkReset());
-            SkDEBUGCODE(gatherer.checkReset());
-
-            auto& geometry = draw.drawParams().geometry();
-            KeyContext keyContext(recorder,
-                                  drawPass->floatStorageManager(),
-                                  &builder,
-                                  &gatherer,
-                                  draw.drawParams().transform(),
-                                  targetInfo.colorInfo(),
-                                  geometry.isShape() || geometry.isEdgeAAQuad()
-                                    ? KeyGenFlags::kDefault
-                                    : KeyGenFlags::kDisableSamplingOptimization,
-                                  draw.paintParams().value().color());
-#if defined(SK_DEBUG)
-            auto result = draw.paintParams().value().toKey(keyContext);
-            auto [dependsOnDst, dstReadReq, usesAdvancedBlend] = *result;
-#else
-            draw.paintParams().value().toKey(keyContext);
-#endif
-            SkASSERT(dependsOnDst == draw.dependsOnDst());
-            SkASSERT(dstReadReq == draw.dstReadReq());
-            SkASSERT(usesAdvancedBlend == paint_uses_advanced_blend_equation(draw.paintParams()));
-
-            shaderID = recorder->priv().shaderCodeDictionary()->findOrCreate(&builder);
-            if (shaderID.isValid()) {
-                UniformDataBlock paintUniforms = gatherer.endPaintData();
-                if (paintUniforms) {
-                    shadingUniformIndex = shadingUniformDataCache.insert(paintUniforms);
-                }
-            }
-        } // else depth-only, no paint data
-
-        // Create a sort key for every render step in this draw, extracting out any
-        // RenderStep-specific data.
-        gatherer.setRenderStepManagerActive();
-        for (int stepIndex = 0; stepIndex < draw.renderer()->numRenderSteps(); ++stepIndex) {
-            gatherer.rewindForRenderStep();
-
-            const RenderStep* const step = draw.renderer()->steps()[stepIndex];
-            const bool performsShading = draw.paintParams().has_value() && step->performsShading();
-
-            GraphicsPipelineCache::Index pipelineIndex = pipelineCache.insert(
-                    { step->renderStepID(),
-                    performsShading ? shaderID : UniquePaintParamsID::Invalid() });
-
-            step->writeUniformsAndTextures(draw.drawParams(), &gatherer);
-            auto [stepUniforms, combinedTextures] = gatherer.endRenderStepData(performsShading);
-
-            UniformDataCache::Index geomUniformIndex = stepUniforms ?
-                    geometryUniformDataCache.insert(stepUniforms) : UniformDataCache::kInvalidIndex;
-            TextureDataCache::Index textureBindingIndex = combinedTextures ?
-                    textureDataCache.insert(combinedTextures) : TextureDataCache::kInvalidIndex;
-
-            keys.push_back({&draw, stepIndex, pipelineIndex,
-                            geomUniformIndex, shadingUniformIndex, textureBindingIndex});
-        }
-
-        passBounds.join(draw.drawParams().clip().drawBounds());
-    }
-
-    // TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
-    // algorithms that approach O(n) in that condition may be favorable. Alternatively, could
-    // explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
-    // std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
-    // vs. algorithms that require an extra O(n) storage.
-    // TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
-    // bugs in the DrawOrder determination code?
-    std::sort(keys.begin(), keys.end());
-    DrawWriter drawWriter(&drawPass->fCommandList, bufferMgr);
-    GraphicsPipelineCache::Index lastPipeline = GraphicsPipelineCache::kInvalidIndex;
-    const SkIRect targetBounds = SkIRect::MakeSize(targetInfo.dimensions());
-    SkIRect lastScissor = targetBounds;
-
-    SkASSERT(drawPass->fTarget->isFullyLazy() ||
-             SkIRect::MakeSize(drawPass->fTarget->dimensions()).contains(lastScissor));
-    drawPass->fCommandList.setScissor(lastScissor);
-
-    UniformTracker geometryUniformTracker(useStorageBuffers);
-    UniformTracker shadingUniformTracker(useStorageBuffers);
-
-    // TODO(b/372953722): Remove this forced binding command behavior once dst copies are always
-    // bound separately from the rest of the textures.
-    const bool rebindTexturesOnPipelineChange = dstReadStrategy == DstReadStrategy::kTextureCopy;
-    // Keep track of the prior draw's PaintOrder. If the current draw requires barriers and there
-    // is no pipeline or state change, then we must compare the current and prior draw's PaintOrders
-    // to determine if the draws overlap. If they do, we must inject a flush between them such that
-    // the barrier addition and draw commands are ordered correctly.
-    CompressedPaintersOrder priorDrawPaintOrder {};
-
-    // If a draw uses an advanced blend mode and the device supports this via noncoherent blending,
-    // then we must insert the appropriate barrier and ensure that the draws do not overlap.
-    const bool advancedBlendsRequireBarrier =
-            caps->blendEquationSupport() == Caps::BlendEquationSupport::kAdvancedNoncoherent;
-
-#if defined(SK_TRACE_GRAPHITE_PIPELINE_USE)
-    // Accumulate rough pixel area touched by each pipeline as we iterate the SortKeys
-    drawPass->fPipelineDrawAreas.push_back_n(pipelineCache.count(), 0.f);
-#endif
-
-    TextureTracker textureBindingTracker(&textureDataCache);
-    for (const SortKey& key : keys) {
-        const DrawList::Draw& draw = key.draw();
-        const RenderStep& renderStep = key.renderStep();
-
-        const bool pipelineChange = key.pipelineIndex() != lastPipeline;
-#if defined(SK_TRACE_GRAPHITE_PIPELINE_USE)
-        drawPass->fPipelineDrawAreas[key.pipelineIndex()] +=
-                draw.drawParams().clip().drawBounds().area();
-#endif
-
-        const bool geomBindingChange = geometryUniformTracker.writeUniforms(
-                geometryUniformDataCache, bufferMgr, key.geometryUniformIndex());
-        const bool shadingBindingChange = shadingUniformTracker.writeUniforms(
-                shadingUniformDataCache, bufferMgr, key.shadingUniformIndex());
-
-        // TODO(b/372953722): The Dawn and Vulkan CommandBuffer implementations currently append any
-        // dst copy to the texture bind group/descriptor set automatically when processing a
-        // BindTexturesAndSamplers call because they use a single group to contain all textures.
-        // However, from the DrawPass POV, we can run into the scenario where two pipelines have the
-        // same textures+samplers except one requires a dst-copy and the other does not. In this
-        // case we wouldn't necessarily insert a new command when the pipeline changed and then
-        // end up with layout validation errors.
-        const bool textureBindingsChange = textureBindingTracker.setCurrentTextureBindings(
-                key.textureBindingIndex()) ||
-                (rebindTexturesOnPipelineChange && pipelineChange &&
-                 key.textureBindingIndex() != TextureDataCache::kInvalidIndex);
-
-        std::optional<SkIRect> newScissor =
-                renderStep.getScissor(draw.drawParams(), lastScissor, targetBounds);
-
-        // Determine + analyze draw properties to inform whether we need to issue barriers before
-        // issuing draw calls.
-        bool drawsOverlap = priorDrawPaintOrder != draw.drawParams().order().paintOrder();
-        bool drawUsesAdvancedBlendMode = paint_uses_advanced_blend_equation(draw.paintParams());
-
-        std::optional<BarrierType> barrierToAddBeforeDraws = std::nullopt;
-        if (dstReadStrategy == DstReadStrategy::kReadFromInput && draw.dstReadReq()) {
-            barrierToAddBeforeDraws = BarrierType::kReadDstFromInput;
-        }
-        if (drawUsesAdvancedBlendMode &&
-            caps->supportsHardwareAdvancedBlending() &&
-            advancedBlendsRequireBarrier) {
-            // A draw should only read from the dst OR use hardware for advanced blend modes.
-            SkASSERT(!draw.dstReadReq());
-
-            barrierToAddBeforeDraws = BarrierType::kAdvancedNoncoherentBlend;
-        }
-
-        const bool stateChange = geomBindingChange ||
-                                 shadingBindingChange ||
-                                 textureBindingsChange ||
-                                 newScissor.has_value();
-
-        // Update DrawWriter *before* we actually change any state so that accumulated draws from
-        // the previous state use the proper state.
-        if (pipelineChange) {
-            drawWriter.newPipelineState(renderStep.primitiveType(),
-                                        renderStep.staticDataStride(),
-                                        renderStep.appendDataStride(),
-                                        renderStep.getRenderStateFlags(),
-                                        barrierToAddBeforeDraws);
-        } else if (stateChange) {
-            drawWriter.newDynamicState();
-        } else if (barrierToAddBeforeDraws.has_value() && drawsOverlap) {
-            // Even if there is no pipeline or state change, we must consider whether a
-            // DrawPassCommand to add barriers must be inserted before any draw commands. If so,
-            // then determine if the current and prior draws overlap (ie, their PaintOrders are
-            // unequal). If so, perform a flush() to make sure the draw and add barrier commands are
-            // appended to the command list in the proper order.
-            drawWriter.flush();
-        }
-
-        // Make state changes before accumulating new draw data
-        if (pipelineChange) {
-            drawPass->fCommandList.bindGraphicsPipeline(key.pipelineIndex());
-            lastPipeline = key.pipelineIndex();
-        }
-        if (stateChange) {
-            if (geomBindingChange) {
-                geometryUniformTracker.bindUniforms(UniformSlot::kRenderStep,
-                                                    &drawPass->fCommandList);
-            }
-            if (shadingBindingChange) {
-                shadingUniformTracker.bindUniforms(UniformSlot::kPaint, &drawPass->fCommandList);
-            }
-            if (textureBindingsChange) {
-                textureBindingTracker.bindTextures(&drawPass->fCommandList);
-            }
-            if (newScissor.has_value()) {
-                drawPass->fCommandList.setScissor(*newScissor);
-                lastScissor = *newScissor;
-            }
-        }
-
-        uint32_t geometrySsboIndex = useStorageBuffers ? geometryUniformTracker.ssboIndex() : 0;
-        uint32_t shadingSsboIndex = useStorageBuffers ? shadingUniformTracker.ssboIndex() : 0;
-        skvx::uint2 ssboIndices = {geometrySsboIndex, shadingSsboIndex};
-        renderStep.writeVertices(&drawWriter, draw.drawParams(), ssboIndices);
-
-        if (bufferMgr->hasMappingFailed()) {
-            SKGPU_LOG_W("Failed to write necessary vertex/instance data for DrawPass, dropping!");
-            return nullptr;
-        }
-
-        // Update priorDrawPaintOrder value before iterating to analyze the next draw.
-        priorDrawPaintOrder = draw.drawParams().order().paintOrder();
-    }
-    // Finish recording draw calls for any collected data still pending at end of the loop
-    drawWriter.flush();
-
-    drawPass->fBounds = passBounds.roundOut().asSkIRect();
-    drawPass->fPipelineDescs   = pipelineCache.detach();
-    drawPass->fSampledTextures = textureDataCache.detachTextures();
-
-    TRACE_COUNTER1("skia.gpu", "# pipelines", drawPass->fPipelineDescs.size());
-    TRACE_COUNTER1("skia.gpu", "# textures", drawPass->fSampledTextures.size());
-    TRACE_COUNTER1("skia.gpu", "# commands", drawPass->fCommandList.count());
-
-    return drawPass;
-}
-
 bool DrawPass::prepareResources(ResourceProvider* resourceProvider,
                                 sk_sp<const RuntimeEffectDictionary> runtimeDict,
                                 const RenderPassDesc& renderPassDesc) {
diff --git a/src/gpu/graphite/DrawPass.h b/src/gpu/graphite/DrawPass.h
index 8738756..71fa368 100644
--- a/src/gpu/graphite/DrawPass.h
+++ b/src/gpu/graphite/DrawPass.h
@@ -15,12 +15,6 @@
 #include "src/gpu/graphite/GraphicsPipelineDesc.h"
 #include "src/gpu/graphite/GraphicsPipelineHandle.h"
 
-#include <array>
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <utility>
-
 struct SkImageInfo;
 
 namespace skgpu::graphite {
@@ -29,7 +23,6 @@
 class DrawList;
 class FloatStorageManager;
 class GraphicsPipeline;
-class Recorder;
 struct RenderPassDesc;
 class ResourceProvider;
 class RuntimeEffectDictionary;
@@ -55,16 +48,6 @@
 public:
     ~DrawPass();
 
-    // Create a DrawPass that renders the DrawList into `target` with the given load/store ops and
-    // clear color.
-    static std::unique_ptr<DrawPass> Make(Recorder*,
-                                          std::unique_ptr<DrawList>,
-                                          sk_sp<TextureProxy> target,
-                                          const SkImageInfo& targetInfo,
-                                          std::pair<LoadOp, StoreOp>,
-                                          std::array<float, 4> clearColor,
-                                          const DstReadStrategy dstReadStrategy);
-
     // Defined relative to the top-left corner of the surface the DrawPass renders to, and is
     // contained within its dimensions.
     const SkIRect&      bounds() const { return fBounds;       }
@@ -101,7 +84,7 @@
     [[nodiscard]] bool addResourceRefs(ResourceProvider*, CommandBuffer*);
 
 private:
-    class SortKey;
+    friend class DrawList; // For the constructor
 
     DrawPass(sk_sp<TextureProxy> target,
              std::pair<LoadOp, StoreOp> ops,
diff --git a/src/gpu/graphite/DrawTypes.h b/src/gpu/graphite/DrawTypes.h
index 3da8167..972f6af 100644
--- a/src/gpu/graphite/DrawTypes.h
+++ b/src/gpu/graphite/DrawTypes.h
@@ -175,10 +175,19 @@
 // These barrier types are not utilized by all backends, but we define them at this level anyhow
 // since it impacts the logic used to group & sort draws.
 enum class BarrierType : uint8_t {
+    kNone,
     kAdvancedNoncoherentBlend,
     kReadDstFromInput,
 };
 
+enum class DstUsage : uint8_t {
+    kNone            = 0,
+    kDependsOnDst    = 0b001,
+    kDstReadRequired = 0b010,
+    kAdvancedBlend   = 0b100,
+};
+SK_MAKE_BITMASK_OPS(DstUsage)
+
 enum class RenderStateFlags : unsigned {
     kNone                   = 0b0000,
     kFixed                  = 0b0001,   // Uses explicit DrawWriter::draw functions
diff --git a/src/gpu/graphite/DrawWriter.cpp b/src/gpu/graphite/DrawWriter.cpp
index f35680b..753bd63 100644
--- a/src/gpu/graphite/DrawWriter.cpp
+++ b/src/gpu/graphite/DrawWriter.cpp
@@ -92,8 +92,8 @@
 
     // Before any draw commands are added, check if the DrawWriter has an assigned barrier type
     // to issue prior to draw calls.
-    if (fBarrierToIssueBeforeDraws.has_value()) {
-        fCommandList->addBarrier(fBarrierToIssueBeforeDraws.value());
+    if (fBarrierToIssueBeforeDraws != BarrierType::kNone) {
+        fCommandList->addBarrier(fBarrierToIssueBeforeDraws);
     }
 
     // Issue the appropriate draw call (instanced vs. non-instanced) based on the current
diff --git a/src/gpu/graphite/DrawWriter.h b/src/gpu/graphite/DrawWriter.h
index 9475782..213eeb2 100644
--- a/src/gpu/graphite/DrawWriter.h
+++ b/src/gpu/graphite/DrawWriter.h
@@ -99,7 +99,7 @@
                           size_t staticStride,
                           size_t appendStride,
                           SkEnumBitMask<RenderStateFlags> newRenderState,
-                          std::optional<BarrierType> barrierType) {
+                          BarrierType barrierType) {
         this->flush();
 
         // Once flushed, any pending data must have been drawn.
@@ -114,7 +114,7 @@
         // aligned, regardless of the previous render state.
         fShouldAlign4 = SkToBool(newRenderState & RenderStateFlags::kAppendVertices);
 
-        // Assign the (optional) barrier type. If a valid value, then the DrawWriter will append
+        // Assign the barrier type. If a valid value, then the DrawWriter will append
         // AddBarrier commands of the indicated type prior to appending any draw commands used with
         // this pipeline.
         fBarrierToIssueBeforeDraws = barrierType;
@@ -271,7 +271,7 @@
     // to let the next reserve() call know that we need a 4 count aligned offset.
     bool fShouldAlign4;
 
-    std::optional<BarrierType> fBarrierToIssueBeforeDraws = std::nullopt;
+    BarrierType fBarrierToIssueBeforeDraws = BarrierType::kNone;
 
     void flushInternal();
 
diff --git a/src/gpu/graphite/KeyContext.cpp b/src/gpu/graphite/KeyContext.cpp
index d6cb80f..83fe12c 100644
--- a/src/gpu/graphite/KeyContext.cpp
+++ b/src/gpu/graphite/KeyContext.cpp
@@ -9,6 +9,7 @@
 
 #include "include/effects/SkRuntimeEffect.h"
 #include "src/core/SkRuntimeEffectPriv.h"
+#include "src/gpu/graphite/DrawContext.h"
 #include "src/gpu/graphite/PaintParams.h"
 #include "src/gpu/graphite/RecorderPriv.h"
 #include "src/gpu/graphite/RuntimeEffectDictionary.h"
@@ -31,6 +32,7 @@
             , fCaps(caps) {}
 
 KeyContext::KeyContext(skgpu::graphite::Recorder* recorder,
+                       DrawContext* drawContext,
                        FloatStorageManager* floatStorageManager,
                        PaintParamsKeyBuilder* paintParamsKeyBuilder,
                        PipelineDataGatherer* pipelineDataGatherer,
@@ -39,6 +41,7 @@
                        SkEnumBitMask<KeyGenFlags> initialFlags,
                        const SkColor4f& paintColor)
         : fRecorder(recorder)
+        , fDC(drawContext)
         , fFloatStorageManager(floatStorageManager)
         , fPaintParamsKeyBuilder(paintParamsKeyBuilder)
         , fPipelineDataGatherer(pipelineDataGatherer)
@@ -55,6 +58,7 @@
 
 KeyContext::KeyContext(const KeyContext& other)
         : fRecorder(other.fRecorder)
+        , fDC(other.fDC)
         , fFloatStorageManager(other.fFloatStorageManager)
         , fPaintParamsKeyBuilder(other.fPaintParamsKeyBuilder)
         , fPipelineDataGatherer(other.fPipelineDataGatherer)
diff --git a/src/gpu/graphite/KeyContext.h b/src/gpu/graphite/KeyContext.h
index 6bb76e9..9b6126e 100644
--- a/src/gpu/graphite/KeyContext.h
+++ b/src/gpu/graphite/KeyContext.h
@@ -21,6 +21,7 @@
 namespace skgpu::graphite {
 
 class Caps;
+class DrawContext;
 enum class DstReadStrategy : uint8_t;
 class FloatStorageManager;
 class PaintParamsKeyBuilder;
@@ -62,6 +63,7 @@
 
     // Constructor for the ExtractPaintData code path (i.e., with a Recorder)
     KeyContext(Recorder*,
+               DrawContext*,
                FloatStorageManager*,
                PaintParamsKeyBuilder*,
                PipelineDataGatherer*,
@@ -74,6 +76,7 @@
     ~KeyContext();
 
     Recorder* recorder() const { return fRecorder; }
+    DrawContext* drawContext() const { return fDC; }
 
     const Caps* caps() const { return fCaps; }
 
@@ -95,6 +98,7 @@
 
 protected:
     Recorder* fRecorder = nullptr;
+    DrawContext* fDC = nullptr;
     FloatStorageManager* fFloatStorageManager;
     PaintParamsKeyBuilder* fPaintParamsKeyBuilder;
     PipelineDataGatherer* fPipelineDataGatherer;
diff --git a/src/gpu/graphite/KeyHelpers.cpp b/src/gpu/graphite/KeyHelpers.cpp
index ebc02aa..b4a4e5e 100644
--- a/src/gpu/graphite/KeyHelpers.cpp
+++ b/src/gpu/graphite/KeyHelpers.cpp
@@ -1503,26 +1503,6 @@
     keyContext.paintParamsKeyBuilder()->endBlock();
 }
 
-void notify_in_use(Recorder* recorder,
-                   DrawContext* drawContext,
-                   SkSpan<const SkRuntimeEffect::ChildPtr> children) {
-    for (const auto& child : children) {
-        if (child.type().has_value()) {
-            switch (*child.type()) {
-                case SkRuntimeEffect::ChildType::kShader:
-                    NotifyImagesInUse(recorder, drawContext, child.shader());
-                    break;
-                case SkRuntimeEffect::ChildType::kColorFilter:
-                    NotifyImagesInUse(recorder, drawContext, child.colorFilter());
-                    break;
-                case SkRuntimeEffect::ChildType::kBlender:
-                    NotifyImagesInUse(recorder, drawContext, child.blender());
-                    break;
-            }
-        } // else a null child is a no-op, so cannot sample an image
-    }
-}
-
 } // anonymous namespace
 
 void AddToKey(const KeyContext& keyContext, const SkBlender* blender) {
@@ -1546,16 +1526,6 @@
     SkUNREACHABLE;
 }
 
-void NotifyImagesInUse(Recorder* recorder, DrawContext* drawContext, const SkBlender* blender) {
-    if (!blender) {
-        return;
-    }
-    if (as_BB(blender)->type() == SkBlenderBase::BlenderType::kRuntime) {
-        const auto* rbb = static_cast<const SkRuntimeBlender*>(blender);
-        notify_in_use(recorder, drawContext, rbb->children());
-    } // else blend mode doesn't reference images
-}
-
 //--------------------------------------------------------------------------------------------------
 //--------------------------------------------------------------------------------------------------
 static SkPMColor4f map_color(const SkColor4f& c,
@@ -1713,26 +1683,6 @@
     SkUNREACHABLE;
 }
 
-void NotifyImagesInUse(Recorder* recorder, DrawContext* drawContext, const SkColorFilter* filter) {
-    if (!filter) {
-        return;
-    }
-    if (as_CFB(filter)->type() == SkColorFilterBase::Type::kCompose) {
-        // Recurse to two children
-        const auto* cf = static_cast<const SkComposeColorFilter*>(filter);
-        NotifyImagesInUse(recorder, drawContext, cf->inner().get());
-        NotifyImagesInUse(recorder, drawContext, cf->outer().get());
-    } else if (as_CFB(filter)->type() == SkColorFilterBase::Type::kWorkingFormat) {
-        // Recurse to one child
-        const auto* wfcf = static_cast<const SkWorkingFormatColorFilter*>(filter);
-        NotifyImagesInUse(recorder, drawContext, wfcf->child().get());
-    } else if (as_CFB(filter)->type() == SkColorFilterBase::Type::kRuntime) {
-        // Recurse to all children
-        const auto* rcf = static_cast<const SkRuntimeColorFilter*>(filter);
-        notify_in_use(recorder, drawContext, rcf->children());
-    } // else other color filters do not rely on SkImages
-}
-
 // ==================================================================
 
 static void add_to_key(const KeyContext& keyContext, const SkBlendShader* shader) {
@@ -1749,13 +1699,6 @@
                 AddToKey(keyContext, shader->dst().get());
             });
 }
-static void notify_in_use(Recorder* recorder,
-                          DrawContext* drawContext,
-                          const SkBlendShader* shader) {
-    // SkBlendShader uses a fixed blend mode, so there's no blender to recurse through
-    NotifyImagesInUse(recorder, drawContext, shader->src().get());
-    NotifyImagesInUse(recorder, drawContext, shader->dst().get());
-}
 
 static SkMatrix matrix_invert_or_identity(const SkMatrix& matrix) {
     SkMatrix inverseMatrix;
@@ -1781,9 +1724,6 @@
 
     keyContext.paintParamsKeyBuilder()->endBlock();
 }
-static void notify_in_use(Recorder* recorder, DrawContext* drawContext, const SkCTMShader* shader) {
-    NotifyImagesInUse(recorder, drawContext, shader->proxyShader().get());
-}
 
 static void add_to_key(const KeyContext& keyContext, const SkColorShader* shader) {
     SkASSERT(shader);
@@ -1794,9 +1734,6 @@
 
     SolidColorShaderBlock::AddBlock(keyContext, color);
 }
-static void notify_in_use(Recorder*, DrawContext*, const SkColorShader*) {
-    // No-op
-}
 
 static void add_to_key(const KeyContext& keyContext, const SkColorFilterShader* shader) {
     SkASSERT(shader);
@@ -1809,12 +1746,6 @@
                 AddToKey(keyContext, shader->filter().get());
             });
 }
-static void notify_in_use(Recorder* recorder,
-                          DrawContext* drawContext,
-                          const SkColorFilterShader* shader) {
-    NotifyImagesInUse(recorder, drawContext, shader->shader().get());
-    NotifyImagesInUse(recorder, drawContext, shader->filter().get());
-}
 
 static void add_to_key(const KeyContext& keyContext, const SkCoordClampShader* shader) {
     SkASSERT(shader);
@@ -1826,18 +1757,10 @@
     AddToKey(childContext, shader->shader().get());
     keyContext.paintParamsKeyBuilder()->endBlock();
 }
-static void notify_in_use(Recorder* recorder,
-                          DrawContext* drawContext,
-                          const SkCoordClampShader* shader) {
-    NotifyImagesInUse(recorder, drawContext, shader->shader().get());
-}
 
 static void add_to_key(const KeyContext& keyContext, const SkEmptyShader*) {
     keyContext.paintParamsKeyBuilder()->addBlock(BuiltInCodeSnippetID::kPriorOutput);
 }
-static void notify_in_use(Recorder*, DrawContext*, const SkEmptyShader*) {
-    // No-op
-}
 
 static void add_yuv_image_to_key(const KeyContext& keyContext,
                                  const SkImageShader* origShader,
@@ -2000,23 +1923,20 @@
         keyContext.paintParamsKeyBuilder()->addBlock(BuiltInCodeSnippetID::kError);
         return;
     }
-    if (!as_IB(shader->image())->isGraphiteBacked()) {
-        // GetGraphiteBacked() created a new image (or fetched a cached image) from the client
-        // image provider. This image was not available when NotifyInUse() visited the shader tree,
-        // so call notify again. These images shouldn't really be producing new tasks since it's
-        // unlikely that a client will be fulfilling with a dynamic image that wraps a long-lived
-        // SkSurface. However, the images can be linked to a surface that rendered the initial
-        // content and not calling notifyInUse() prevents unlinking the image from the Device.
-        // If the client image provider then holds on to many of these images, the leaked Device and
-        // DrawContext memory can be surprisingly high. b/338453542.
-        // TODO (b/330864257): Once paint keys are extracted at draw time, AddToKey() will be
-        // fully responsible for notifyInUse() calls and then we can simply always call this on
-        // `imageToDraw`. The DrawContext that samples the image will also be available to AddToKey
-        // so we won't have to pass in nullptr.
-        SkASSERT(as_IB(imageToDraw)->isGraphiteBacked());
-        static_cast<Image_Base*>(imageToDraw.get())->notifyInUse(keyContext.recorder(),
-                                                                 /*drawContext=*/nullptr);
-    }
+
+    // We must call notifyInUse() here to link the final, Graphite-backed 'imageToDraw'
+    // to the DrawContext that will sample it.
+    //
+    // This is necessary for two primary cases:
+    // 1. The original image was not Graphite-backed.
+    // 2. The original image was already Graphite-backed, but produced through Image::Copy, possibly
+    //    from a different DrawContext.
+    //
+    // Failing to call this can lead to leaked Device and DrawContext memory (b/338453542).
+    SkASSERT(as_IB(imageToDraw)->isGraphiteBacked());
+    SkASSERT(keyContext.drawContext());
+    static_cast<Image_Base*>(imageToDraw.get())->notifyInUse(keyContext.recorder(),
+                                                             keyContext.drawContext());
     if (as_IB(imageToDraw)->isYUVA()) {
         return add_yuv_image_to_key(keyContext,
                                     shader,
@@ -2102,17 +2022,6 @@
                 ColorSpaceTransformBlock::AddBlock(keyContext, colorXformData);
             });
 }
-static void notify_in_use(Recorder* recorder,
-                          DrawContext* drawContext,
-                          const SkImageShader* shader) {
-    auto image = as_IB(shader->image());
-    if (!image->isGraphiteBacked()) {
-        // If it's not graphite-backed, there's no pending graphite work.
-        return;
-    }
-
-    static_cast<Image_Base*>(image)->notifyInUse(recorder, drawContext);
-}
 
 static void add_to_key(const KeyContext& keyContext, const SkLocalMatrixShader* shader) {
     SkASSERT(shader);
@@ -2187,12 +2096,6 @@
     keyContext.paintParamsKeyBuilder()->endBlock();
 }
 
-static void notify_in_use(Recorder* recorder,
-                          DrawContext* drawContext,
-                          const SkLocalMatrixShader* shader) {
-    NotifyImagesInUse(recorder, drawContext, shader->wrappedShader().get());
-}
-
 // If either of these change then the corresponding change must also be made in the SkSL
 // perlin_noise_shader function.
 static_assert((int)SkPerlinNoiseShaderType::kFractalNoise ==
@@ -2233,9 +2136,6 @@
 
     PerlinNoiseShaderBlock::AddBlock(keyContext, perlinData);
 }
-static void notify_in_use(Recorder*, DrawContext*, const SkPerlinNoiseShader*) {
-    // No-op, perlin noise has no children.
-}
 
 static void add_to_key(const KeyContext& keyContext,
                        const SkPictureShader* shader) {
@@ -2309,10 +2209,6 @@
 
     AddToKey(keyContext, imgShader.get());
 }
-static void notify_in_use(Recorder*, DrawContext*, const SkPictureShader*) {
-    // While the SkPicture the shader points to, may have Graphite-backed shaders that need to be
-    // notified, that will happen when the picture is rendered into an image in add_to_key
-}
 
 static void add_to_key(const KeyContext& keyContext,
                        const SkRuntimeShader* shader) {
@@ -2333,29 +2229,18 @@
 
     keyContext.paintParamsKeyBuilder()->endBlock();
 }
-static void notify_in_use(Recorder* recorder,
-                          DrawContext* drawContext,
-                          const SkRuntimeShader* shader) {
-    notify_in_use(recorder, drawContext, shader->children());
-}
 
 static void add_to_key(const KeyContext& keyContext,
                        const SkTransformShader* shader) {
     SKGPU_LOG_W("Raster-only SkShader (SkTransformShader) encountered");
     keyContext.paintParamsKeyBuilder()->addBlock(BuiltInCodeSnippetID::kError);
 }
-static void notify_in_use(Recorder*, DrawContext*, const SkTransformShader*) {
-    // no-op
-}
 
 static void add_to_key(const KeyContext& keyContext,
                        const SkTriColorShader* shader) {
     SKGPU_LOG_W("Raster-only SkShader (SkTriColorShader) encountered");
     keyContext.paintParamsKeyBuilder()->addBlock(BuiltInCodeSnippetID::kError);
 }
-static void notify_in_use(Recorder*, DrawContext*, const SkTriColorShader*) {
-    // no-op
-}
 
 static void add_to_key(const KeyContext& keyContext,
                        const SkWorkingColorSpaceShader* shader) {
@@ -2388,11 +2273,6 @@
             ColorSpaceTransformBlock::AddBlock(keyContext, data);
         });
 }
-static void notify_in_use(Recorder* recorder,
-                          DrawContext* drawContext,
-                          const SkWorkingColorSpaceShader* shader) {
-    NotifyImagesInUse(recorder, drawContext, shader->shader().get());
-}
 
 static SkBitmap create_color_and_offset_bitmap(int numStops,
                                                const SkPMColor4f* colors,
@@ -2622,9 +2502,6 @@
     }
     SkUNREACHABLE;
 }
-static void notify_in_use(Recorder*, DrawContext*, const SkGradientBaseShader*) {
-    // Gradients do not have children, so no images to notify
-}
 
 void AddToKey(const KeyContext& keyContext, const SkShader* shader) {
     if (!shader) {
@@ -2647,24 +2524,4 @@
     SkUNREACHABLE;
 }
 
-void NotifyImagesInUse(Recorder* recorder,
-                       DrawContext* drawContext,
-                       const SkShader* shader) {
-    if (!shader) {
-        return;
-    }
-    switch (as_SB(shader)->type()) {
-#define M(type)                                                      \
-    case SkShaderBase::ShaderType::k##type:                          \
-        notify_in_use(recorder,                                      \
-                      drawContext,                                   \
-                      static_cast<const Sk##type##Shader*>(shader)); \
-        return;
-        SK_ALL_SHADERS(M)
-#undef M
-    }
-    SkUNREACHABLE;
-}
-
-
 } // namespace skgpu::graphite
diff --git a/src/gpu/graphite/KeyHelpers.h b/src/gpu/graphite/KeyHelpers.h
index 154e6d5..1281303 100644
--- a/src/gpu/graphite/KeyHelpers.h
+++ b/src/gpu/graphite/KeyHelpers.h
@@ -418,14 +418,6 @@
  */
 void AddToKey(const KeyContext& keyContext, const SkShader* shader);
 
-// TODO(b/330864257) These visitation functions are redundant with AddToKey, except that they are
-// executed in the Device::drawGeometry() stack frame, whereas the keys are currently deferred until
-// DrawPass::Make. Image use needs to be detected in the draw frame to split tasks to match client
-// actions. Once paint keys are extracted in the draw frame, this can go away entirely.
-void NotifyImagesInUse(Recorder*, DrawContext*, const SkBlender*);
-void NotifyImagesInUse(Recorder*, DrawContext*, const SkColorFilter*);
-void NotifyImagesInUse(Recorder*, DrawContext*, const SkShader*);
-
 template <typename AddBlendToKeyT, typename AddSrcToKeyT, typename AddDstToKeyT>
 void Blend(const KeyContext& keyContext,
            AddBlendToKeyT addBlendToKey,
diff --git a/src/gpu/graphite/PaintParams.cpp b/src/gpu/graphite/PaintParams.cpp
index 4c73fc4..d137d9b 100644
--- a/src/gpu/graphite/PaintParams.cpp
+++ b/src/gpu/graphite/PaintParams.cpp
@@ -18,12 +18,9 @@
 #include "src/gpu/graphite/ContextUtils.h"
 #include "src/gpu/graphite/KeyContext.h"
 #include "src/gpu/graphite/KeyHelpers.h"
-#include "src/gpu/graphite/Log.h"
 #include "src/gpu/graphite/PaintParamsKey.h"
 #include "src/gpu/graphite/PipelineData.h"
 #include "src/gpu/graphite/RecorderPriv.h"
-#include "src/gpu/graphite/Uniform.h"
-#include "src/shaders/SkShaderBase.h"
 
 namespace skgpu::graphite {
 
@@ -64,9 +61,35 @@
     return true;
 }
 
+std::optional<SkBlendMode> get_final_blendmode(SkBlender* blender) {
+    return blender ? as_BB(blender)->asBlendMode() : SkBlendMode::kSrcOver;
+}
+
+Coverage get_renderer_coverage(Coverage coverage,
+                               SkShader* clipShader,
+                               const NonMSAAClip& nonMSAAClip) {
+    return (clipShader || !nonMSAAClip.isEmpty()) && coverage == Coverage::kNone ?
+            Coverage::kSingleChannel : coverage;
+}
+
+SkEnumBitMask<DstUsage> get_dst_usage(const Caps* caps,
+                                      TextureFormat targetFormat,
+                                      std::optional<SkBlendMode> finalBlendMode,
+                                      Coverage rendererCoverage) {
+    SkEnumBitMask<DstUsage> dstUsage =
+            CanUseHardwareBlending(caps, targetFormat, finalBlendMode, rendererCoverage)
+                            ? DstUsage::kNone
+                            : DstUsage::kDstReadRequired;
+    if (finalBlendMode.has_value() && finalBlendMode.value() > SkBlendMode::kLastCoeffMode) {
+        dstUsage |= DstUsage::kAdvancedBlend;
+    }
+    return dstUsage;
+}
+
 } // anonymous namespace
 
-PaintParams::PaintParams(const SkPaint& paint,
+PaintParams::PaintParams(const Caps* caps,
+                         const SkPaint& paint,
                          sk_sp<SkBlender> primitiveBlender,
                          const NonMSAAClip& nonMSAAClip,
                          sk_sp<SkShader> clipShader,
@@ -75,15 +98,17 @@
                          bool skipColorXform)
         : fColor(paint.getColor4f())
         , fFinalBlender(paint.refBlender())
+        , fFinalBlendMode(get_final_blendmode(fFinalBlender.get()))
         , fShader(paint.refShader())
         , fColorFilter(paint.refColorFilter())
         , fPrimitiveBlender(std::move(primitiveBlender))
         , fNonMSAAClip(nonMSAAClip)
         , fClipShader(std::move(clipShader))
-        , fRendererCoverage(coverage)
+        , fRendererCoverage(get_renderer_coverage(coverage, fClipShader.get(), fNonMSAAClip))
         , fTargetFormat(targetFormat)
         , fSkipColorXform(skipColorXform)
-        , fDither(paint.isDither()) {
+        , fDither(paint.isDither())
+        , fDstUsage(get_dst_usage(caps, fTargetFormat, fFinalBlendMode, fRendererCoverage)) {
     if (!fPrimitiveBlender) {
         SkColor4f constantColor;   // if filled in, will be un-premul sRGB
         // fColor is un-premul sRGB
@@ -106,11 +131,6 @@
 PaintParams::~PaintParams() = default;
 PaintParams& PaintParams::operator=(const PaintParams& other) = default;
 
-std::optional<SkBlendMode> PaintParams::asFinalBlendMode() const {
-    return fFinalBlender ? as_BB(fFinalBlender)->asBlendMode()
-                         : SkBlendMode::kSrcOver;
-}
-
 sk_sp<SkBlender> PaintParams::refFinalBlender() const { return fFinalBlender; }
 
 sk_sp<SkShader> PaintParams::refShader() const { return fShader; }
@@ -356,36 +376,22 @@
     bool isOpaque = this->handleDithering(keyContext);
 
     // Root Node 1 is the final blender
-    std::optional<SkBlendMode> finalBlendMode = this->asFinalBlendMode();
-    bool usesAdvancedBlend = finalBlendMode.has_value() &&
-                             (int)finalBlendMode.value() > (int)SkBlendMode::kLastCoeffMode;
-
-    Coverage finalCoverage = fRendererCoverage;
-    if ((fClipShader || !fNonMSAAClip.isEmpty()) && fRendererCoverage == Coverage::kNone) {
-        finalCoverage = Coverage::kSingleChannel;
-    }
-
-    bool dependsOnDst = fClipShader || !fNonMSAAClip.isEmpty();
-    bool dstReadReq = !CanUseHardwareBlending(keyContext.recorder()->priv().caps(),
-                                              fTargetFormat,
-                                              finalBlendMode,
-                                              finalCoverage);
-
-    if (finalBlendMode.has_value()) {
-        if (!dstReadReq) {
+    bool dependsOnDst = fRendererCoverage != Coverage::kNone;
+    if (fFinalBlendMode.has_value()) {
+        if (!(fDstUsage & DstUsage::kDstReadRequired)) {
             // With no shader blending, be as explicit as possible about the final blend
-            AddFixedBlendMode(keyContext, finalBlendMode.value());
+            AddFixedBlendMode(keyContext, fFinalBlendMode.value());
         } else {
             // With shader blending, use AddBlendMode() to select the more universal blend functions
             // when possible. Technically we could always use a fixed blend mode but would then
             // over-generate when encountering certain classes of blends. This is most problematic
             // on devices that wouldn't support dual-source blending, so help them out by at least
             // not requiring lots of pipelines.
-            AddBlendMode(keyContext, finalBlendMode.value());
+            AddBlendMode(keyContext, fFinalBlendMode.value());
         }
 
         // Blend modes can be analyzed to determine if specific src colors still depend on the dst.
-        dependsOnDst |= blendmode_depends_on_dst(finalBlendMode.value(), isOpaque);
+        dependsOnDst |= blendmode_depends_on_dst(fFinalBlendMode.value(), isOpaque);
     } else {
         AddToKey(keyContext, fFinalBlender.get());
         // Cannot inspect runtime blenders to pessimistically assume they will always use the dst.
@@ -395,26 +401,15 @@
     // Optional Root Node 2 is the clip
     this->handleClipping(keyContext);
 
-    return Result{dependsOnDst, dstReadReq, usesAdvancedBlend};
-}
+    UniquePaintParamsID paintID =
+            keyContext.recorder()->priv().shaderCodeDictionary()->findOrCreate(
+                    keyContext.paintParamsKeyBuilder());
 
-// TODO(b/330864257): Can be deleted once keys are determined by the Device draw.
-void PaintParams::notifyImagesInUse(Recorder* recorder,
-                                    DrawContext* drawContext) const {
-    if (fShader) {
-        NotifyImagesInUse(recorder, drawContext, fShader.get());
-    }
-    if (fPrimitiveBlender) {
-        NotifyImagesInUse(recorder, drawContext, fPrimitiveBlender.get());
-    }
-    if (fColorFilter) {
-        NotifyImagesInUse(recorder, drawContext, fColorFilter.get());
-    }
-    if (fFinalBlender) {
-        NotifyImagesInUse(recorder, drawContext, fFinalBlender.get());
-    }
-    if (fClipShader) {
-        NotifyImagesInUse(recorder, drawContext, fClipShader.get());
+    if (!paintID.isValid()) {
+        return {};
+    } else {
+        return Result{paintID,
+                      fDstUsage | (dependsOnDst ? DstUsage::kDependsOnDst : DstUsage::kNone)};
     }
 }
 
diff --git a/src/gpu/graphite/PaintParams.h b/src/gpu/graphite/PaintParams.h
index 9983d9d..f958428 100644
--- a/src/gpu/graphite/PaintParams.h
+++ b/src/gpu/graphite/PaintParams.h
@@ -10,6 +10,7 @@
 
 #include "include/core/SkColor.h"
 #include "include/core/SkPaint.h"
+#include "src/base/SkEnumBitMask.h"
 #include "src/gpu/graphite/Caps.h"
 #include "src/gpu/graphite/Renderer.h"
 #include "src/gpu/graphite/geom/NonMSAAClip.h"
@@ -26,6 +27,7 @@
 class PipelineDataGatherer;
 class Recorder;
 class TextureProxy;
+class UniquePaintParamsID;
 
 // TBD: If occlusion culling is eliminated as a phase, we can easily move the paint conversion
 // back to Device when the command is recorded (similar to SkPaint -> GrPaint), and then
@@ -35,7 +37,8 @@
 // assumed to be anti-aliased.
 class PaintParams {
 public:
-    explicit PaintParams(const SkPaint&,
+    explicit PaintParams(const Caps* caps,
+                         const SkPaint&,
                          sk_sp<SkBlender> primitiveBlender,
                          const NonMSAAClip& nonMSAAClip,
                          sk_sp<SkShader> clipShader,
@@ -50,7 +53,7 @@
 
     SkColor4f color() const { return fColor; }
 
-    std::optional<SkBlendMode> asFinalBlendMode() const;
+    std::optional<SkBlendMode> finalBlendMode() const { return fFinalBlendMode; }
     SkBlender* finalBlender() const { return fFinalBlender.get(); }
     sk_sp<SkBlender> refFinalBlender() const;
 
@@ -63,19 +66,18 @@
     SkBlender* primitiveBlender() const { return fPrimitiveBlender.get(); }
     sk_sp<SkBlender> refPrimitiveBlender() const;
 
-    TextureFormat targetFormat() const { return fTargetFormat;   }
-    bool skipColorXform()        const { return fSkipColorXform; }
-    bool dither()                const { return fDither;         }
+    Coverage rendererCoverage()  const { return fRendererCoverage; }
+    bool skipColorXform()        const { return fSkipColorXform;   }
+    bool dither()                const { return fDither;           }
 
     /** Converts an SkColor4f to the destination color space. */
     static SkColor4f Color4fPrepForDst(SkColor4f srgb, const SkColorInfo& dstColorInfo);
 
-    using Result = std::tuple</*dependsOnDst*/bool, /*dstReadRequired*/bool,
-                              /*usesAdvancedBlend*/bool>;
+    using Result = std::tuple<UniquePaintParamsID, SkEnumBitMask<DstUsage>>;
     std::optional<Result> toKey(const KeyContext&) const;
 
-    void notifyImagesInUse(Recorder*, DrawContext*) const;
-
+    bool dstReadRequired() const { return (fDstUsage & DstUsage::kDstReadRequired) ==
+                                          DstUsage::kDstReadRequired; }
 private:
     bool addPaintColorToKey(const KeyContext&) const;
     bool handlePrimitiveColor(const KeyContext&) const;
@@ -85,20 +87,22 @@
     bool handleDstRead(const KeyContext&) const;
     void handleClipping(const KeyContext&) const;
 
-    SkColor4f            fColor;
-    sk_sp<SkBlender>     fFinalBlender; // A nullptr here means SrcOver blending
-    sk_sp<SkShader>      fShader;
-    sk_sp<SkColorFilter> fColorFilter;
+    SkColor4f                  fColor;
+    sk_sp<SkBlender>           fFinalBlender;   // A nullptr here means SrcOver blending
+    std::optional<SkBlendMode> fFinalBlendMode; // A nullptr here means we have a runtime blendmode
+    sk_sp<SkShader>            fShader;
+    sk_sp<SkColorFilter>       fColorFilter;
     // A nullptr fPrimitiveBlender means there's no primitive color blending and it is skipped.
     // In the case where there is primitive blending, the primitive color is the source color and
     // the dest is the paint's color (or the paint's shader's computed color).
-    sk_sp<SkBlender>     fPrimitiveBlender;
-    NonMSAAClip          fNonMSAAClip;
-    sk_sp<SkShader>      fClipShader;
-    Coverage             fRendererCoverage;
-    TextureFormat        fTargetFormat;
-    bool                 fSkipColorXform;
-    bool                 fDither;
+    sk_sp<SkBlender>           fPrimitiveBlender;
+    NonMSAAClip                fNonMSAAClip;
+    sk_sp<SkShader>            fClipShader;
+    Coverage                   fRendererCoverage;
+    TextureFormat              fTargetFormat;
+    bool                       fSkipColorXform;
+    bool                       fDither;
+    SkEnumBitMask<DstUsage>    fDstUsage;
 };
 
 // Add a fixed blend mode node for a specific SkBlendMode.
diff --git a/src/gpu/graphite/PipelineData.h b/src/gpu/graphite/PipelineData.h
index 55e7dd3..d8083fe 100644
--- a/src/gpu/graphite/PipelineData.h
+++ b/src/gpu/graphite/PipelineData.h
@@ -19,8 +19,8 @@
 #include "src/core/SkColorData.h"
 #include "src/core/SkTHash.h"
 #include "src/gpu/graphite/BufferManager.h"
-#include "src/gpu/graphite/DrawList.h"
 #include "src/gpu/graphite/DrawTypes.h"
+#include "src/gpu/graphite/GraphicsPipelineDesc.h"
 #include "src/gpu/graphite/TextureProxy.h"
 #include "src/gpu/graphite/UniformManager.h"
 #include "src/shaders/gradients/SkGradientBaseShader.h"
diff --git a/src/gpu/graphite/compute/VelloRenderer.cpp b/src/gpu/graphite/compute/VelloRenderer.cpp
index afbbf07..d8137f7 100644
--- a/src/gpu/graphite/compute/VelloRenderer.cpp
+++ b/src/gpu/graphite/compute/VelloRenderer.cpp
@@ -23,6 +23,7 @@
 #include "src/gpu/graphite/TextureUtils.h"
 #include "src/gpu/graphite/UniformManager.h"
 #include "src/gpu/graphite/compute/DispatchGroup.h"
+#include "src/gpu/graphite/geom/Transform.h"
 
 #include <algorithm>
 
diff --git a/tests/graphite/VerticesPaddingTest.cpp b/tests/graphite/VerticesPaddingTest.cpp
index 5d239ae..87f3107 100644
--- a/tests/graphite/VerticesPaddingTest.cpp
+++ b/tests/graphite/VerticesPaddingTest.cpp
@@ -248,7 +248,7 @@
 
     auto vertsNewPipeline = [&]() {
         dw->newPipelineState(/*type=*/{}, kStride, kStride, RenderStateFlags::kAppendVertices,
-                             std::nullopt);
+                             BarrierType::kNone);
         return;
     };
 
diff --git a/tests/graphite/precompile/PaintParamsKeyTest.cpp b/tests/graphite/precompile/PaintParamsKeyTest.cpp
index 850db73..a356ee2 100644
--- a/tests/graphite/precompile/PaintParamsKeyTest.cpp
+++ b/tests/graphite/precompile/PaintParamsKeyTest.cpp
@@ -49,6 +49,7 @@
 #include "src/core/SkRuntimeEffectPriv.h"
 #include "src/gpu/graphite/ContextPriv.h"
 #include "src/gpu/graphite/ContextUtils.h"
+#include "src/gpu/graphite/DrawContext.h"
 #include "src/gpu/graphite/GraphicsPipelineDesc.h"
 #include "src/gpu/graphite/KeyContext.h"
 #include "src/gpu/graphite/KeyHelpers.h"
@@ -101,6 +102,34 @@
 
 //--------------------------------------------------------------------------------------------------
 //--------------------------------------------------------------------------------------------------
+sk_sp<DrawContext> get_precompile_draw_context(
+            const skgpu::graphite::Caps* caps, Context* context) {
+    std::unique_ptr<Recorder> drawRecorder = context->makeRecorder();
+    ResourceProvider* resourceProvider = drawRecorder->priv().resourceProvider();
+    constexpr SkISize drawSize = {128, 128};
+    const SkColorInfo colorInfo = SkColorInfo(kRGBA_8888_SkColorType,
+                                              kPremul_SkAlphaType,
+                                              SkColorSpace::MakeSRGB());
+    TextureInfo texInfo = caps->getDefaultSampledTextureInfo(colorInfo.colorType(),
+                                                             skgpu::Mipmapped::kNo,
+                                                             skgpu::Protected::kNo,
+                                                             skgpu::Renderable::kYes);
+    sk_sp<TextureProxy> target = TextureProxy::Make(caps,
+                                                    resourceProvider,
+                                                    drawSize,
+                                                    texInfo,
+                                                    "PrecompileTarget",
+                                                    skgpu::Budgeted::kYes);
+    sk_sp<DrawContext> precompileDrawContext = DrawContext::Make(caps,
+                                                                 std::move(target),
+                                                                 drawSize,
+                                                                 colorInfo,
+                                                                 {});
+    return precompileDrawContext;
+}
+
+//--------------------------------------------------------------------------------------------------
+//--------------------------------------------------------------------------------------------------
 //    M(Empty)
 #define SK_ALL_TEST_SHADERS(M) \
     M(Blend)              \
@@ -1927,6 +1956,7 @@
 [[maybe_unused]]
 void extract_vs_build_subtest(skiatest::Reporter* reporter,
                               Context* context,
+                              DrawContext* drawContext,
                               skiatest::graphite::GraphiteTestContext* /* testContext */,
                               const KeyContext& precompileKeyContext,
                               Recorder* recorder,
@@ -1981,7 +2011,8 @@
             clipData.fAnalyticClip.fRadius = 5;
         }
 
-        PaintParams paintParams = PaintParams(paint,
+        PaintParams paintParams = PaintParams(recorder->priv().caps(),
+                                              paint,
                                               primitiveBlender,
                                               clipData,
                                               std::move(modifiedClipShader),
@@ -1990,6 +2021,7 @@
                                               /* skipColorXform= */ false);
         paramsGatherer.resetForDraw();
         KeyContext keyContext(recorder,
+                              drawContext,
                               precompileKeyContext.floatStorageManager(),
                               precompileKeyContext.paintParamsKeyBuilder(),
                               &paramsGatherer,
@@ -1997,10 +2029,9 @@
                               precompileKeyContext.dstColorInfo(),
                               KeyGenFlags::kDisableSamplingOptimization,
                               paintParams.color());
-        paintParams.toKey(keyContext);
-        UniquePaintParamsID paintID =
-                recorder->priv().shaderCodeDictionary()->findOrCreate(
-                        precompileKeyContext.paintParamsKeyBuilder());
+        auto keyResult = paintParams.toKey(keyContext);
+        UniquePaintParamsID paintID = keyResult.has_value() ? std::get<0>(*keyResult)
+                                                            : UniquePaintParamsID::Invalid();
 
         RenderPassDesc unusedRenderPassDesc;
         std::vector<UniquePaintParamsID> precompileIDs;
@@ -2137,6 +2168,7 @@
 
 void run_test(skiatest::Reporter* reporter,
               Context* context,
+              DrawContext* drawContext,
               PrecompileContext* precompileContext,
               skiatest::graphite::GraphiteTestContext* testContext,
               const KeyContext& precompileKeyContext,
@@ -2171,7 +2203,7 @@
     // a SkCanvas::clipShader call).
     paintOptions.priv().setClipShaders({clipShaderOption});
 
-    extract_vs_build_subtest(reporter, context, testContext, precompileKeyContext,
+    extract_vs_build_subtest(reporter, context, drawContext, testContext, precompileKeyContext,
                              recorder.get(), paint, paintOptions, s, bm, cf, mf, imageFilter,
                              clipType, clipShader, dt, seed, &rand, verbose);
     precompile_vs_real_draws_subtest(reporter, context, precompileContext,
@@ -2187,14 +2219,17 @@
                                                testContext,
                                                true,
                                                CtsEnforcement::kNever) {
+    const skgpu::graphite::Caps* caps = context->priv().caps();
     std::unique_ptr<PrecompileContext> precompileContext = context->makePrecompileContext();
-    sk_sp<RuntimeEffectDictionary> rtDict = sk_make_sp<RuntimeEffectDictionary>();
+    // Currently, we just use this as a valid parameter for keyContext (will hit asserts otherwise)
+    sk_sp<DrawContext> precompileDrawContext = get_precompile_draw_context(caps, context);
 
     FloatStorageManager floatStorageManager;
     ShaderCodeDictionary* dict = context->priv().shaderCodeDictionary();
     PaintParamsKeyBuilder builder(dict);
     PipelineDataGatherer gatherer(Layout::kMetal);
-    KeyContext keyContext(context->priv().caps(),
+    sk_sp<RuntimeEffectDictionary> rtDict = sk_make_sp<RuntimeEffectDictionary>();
+    KeyContext keyContext(caps,
                           &floatStorageManager,
                           &builder,
                           &gatherer,
@@ -2237,6 +2272,7 @@
 
     run_test(reporter,
              context,
+             precompileDrawContext.get(),
              precompileContext.get(),
              testContext,
              keyContext,
@@ -2264,14 +2300,17 @@
                                                testContext,
                                                true,
                                                CtsEnforcement::kNever) {
+    const skgpu::graphite::Caps* caps = context->priv().caps();
     std::unique_ptr<PrecompileContext> precompileContext = context->makePrecompileContext();
-    sk_sp<RuntimeEffectDictionary> rtDict = sk_make_sp<RuntimeEffectDictionary>();
+    // Currently, we just use this as a valid parameter for keyContext (will hit asserts otherwise)
+    sk_sp<DrawContext> precompileDrawContext = get_precompile_draw_context(caps, context);
 
     FloatStorageManager floatStorageManager;
     ShaderCodeDictionary* dict = context->priv().shaderCodeDictionary();
     PaintParamsKeyBuilder builder(dict);
     PipelineDataGatherer gatherer(Layout::kMetal);
-    KeyContext precompileKeyContext(context->priv().caps(),
+    sk_sp<RuntimeEffectDictionary> rtDict = sk_make_sp<RuntimeEffectDictionary>();
+    KeyContext precompileKeyContext(caps,
                                     &floatStorageManager,
                                     &builder,
                                     &gatherer,
@@ -2399,7 +2438,8 @@
                                 ++current;
 #endif
 
-                                run_test(reporter, context, precompileContext.get(),
+                                run_test(reporter, context, precompileDrawContext.get(),
+                                         precompileContext.get(),
                                          testContext, precompileKeyContext,
                                          shader, blender, cf, mf, imageFilter, clip, dt,
                                          kDefaultSeed, /* verbose= */ false);