diff --git a/gn/core.gni b/gn/core.gni
index df6d672..55b5cea 100644
--- a/gn/core.gni
+++ b/gn/core.gni
@@ -406,6 +406,7 @@
   "$_src/core/SkVM.cpp",
   "$_src/core/SkVM.h",
   "$_src/core/SkVMBlitter.cpp",
+  "$_src/core/SkVMBlitter.h",
   "$_src/core/SkVM_fwd.h",
   "$_src/core/SkValidationUtils.h",
   "$_src/core/SkVertState.cpp",
diff --git a/src/core/SkBlitter.cpp b/src/core/SkBlitter.cpp
index 2ce47e4..f77f9cf 100644
--- a/src/core/SkBlitter.cpp
+++ b/src/core/SkBlitter.cpp
@@ -22,6 +22,7 @@
 #include "src/core/SkRegionPriv.h"
 #include "src/core/SkTLazy.h"
 #include "src/core/SkUtils.h"
+#include "src/core/SkVMBlitter.h"
 #include "src/core/SkWriteBuffer.h"
 #include "src/core/SkXfermodeInterpretation.h"
 #include "src/shaders/SkShaderBase.h"
@@ -748,8 +749,8 @@
     }
 
     if (gUseSkVMBlitter) {
-        if (auto blitter = SkCreateSkVMBlitter(device, *paint, matrixProvider,
-                                               alloc, clipShader)) {
+        if (auto blitter = SkVMBlitter::Make(device, *paint, matrixProvider,
+                                             alloc, clipShader)) {
             return blitter;
         }
     }
@@ -761,8 +762,8 @@
                                                          alloc, clipShader)) {
             return blitter;
         }
-        if (auto blitter = SkCreateSkVMBlitter(device, *paint, matrixProvider,
-                                               alloc, clipShader)) {
+        if (auto blitter = SkVMBlitter::Make(device, *paint, matrixProvider,
+                                             alloc, clipShader)) {
             return blitter;
         }
         return alloc->make<SkNullBlitter>();
diff --git a/src/core/SkBlitter_Sprite.cpp b/src/core/SkBlitter_Sprite.cpp
index 3f35d42..8c1b2ef 100644
--- a/src/core/SkBlitter_Sprite.cpp
+++ b/src/core/SkBlitter_Sprite.cpp
@@ -13,6 +13,7 @@
 #include "src/core/SkOpts.h"
 #include "src/core/SkRasterPipeline.h"
 #include "src/core/SkSpriteBlitter.h"
+#include "src/core/SkVMBlitter.h"
 
 extern bool gUseSkVMBlitter;
 
@@ -187,7 +188,7 @@
     SkASSERT(alloc != nullptr);
 
     if (gUseSkVMBlitter) {
-        return SkCreateSkVMSpriteBlitter(dst, paint, source,left,top, alloc, std::move(clipShader));
+        return SkVMBlitter::Make(dst, paint, source,left,top, alloc, std::move(clipShader));
     }
 
     // TODO: in principle SkRasterPipelineSpriteBlitter could be made to handle this.
@@ -225,5 +226,5 @@
         return blitter;
     }
 
-    return SkCreateSkVMSpriteBlitter(dst, paint, source,left,top, alloc, std::move(clipShader));
+    return SkVMBlitter::Make(dst, paint, source,left,top, alloc, std::move(clipShader));
 }
diff --git a/src/core/SkCoreBlitters.h b/src/core/SkCoreBlitters.h
index 51e73a8..92e4924 100644
--- a/src/core/SkCoreBlitters.h
+++ b/src/core/SkCoreBlitters.h
@@ -173,17 +173,4 @@
                                          bool shader_is_opaque,
                                          SkArenaAlloc*, sk_sp<SkShader> clipShader);
 
-SkBlitter* SkCreateSkVMBlitter(const SkPixmap& dst,
-                               const SkPaint&,
-                               const SkMatrixProvider&,
-                               SkArenaAlloc*,
-                               sk_sp<SkShader> clipShader);
-
-SkBlitter* SkCreateSkVMSpriteBlitter(const SkPixmap& dst,
-                                     const SkPaint&,
-                                     const SkPixmap& sprite,
-                                     int left, int top,
-                                     SkArenaAlloc*,
-                                     sk_sp<SkShader> clipShader);
-
 #endif
diff --git a/src/core/SkDraw_vertices.cpp b/src/core/SkDraw_vertices.cpp
index 14a5e88..d2d2191 100644
--- a/src/core/SkDraw_vertices.cpp
+++ b/src/core/SkDraw_vertices.cpp
@@ -17,6 +17,7 @@
 #include "src/core/SkRasterPipeline.h"
 #include "src/core/SkScan.h"
 #include "src/core/SkVM.h"
+#include "src/core/SkVMBlitter.h"
 #include "src/core/SkVertState.h"
 #include "src/core/SkVerticesPriv.h"
 #include "src/shaders/SkComposeShader.h"
@@ -308,7 +309,7 @@
     // No colors are changing and no texture coordinates are changing, so no updates between
     // triangles are needed. Use SkVM to blit the triangles.
     if (!colors && (!texCoords || texCoords == positions)) {
-        if (auto blitter = SkCreateSkVMBlitter(
+        if (auto blitter = SkVMBlitter::Make(
                 fDst, paint, *fMatrixProvider, outerAlloc, this->fRC->clipShader())) {
             while (vertProc(&state)) {
                 fill_triangle(state, blitter, *fRC, dev2, dev3);
diff --git a/src/core/SkVMBlitter.cpp b/src/core/SkVMBlitter.cpp
index 4821358..c388dbe 100644
--- a/src/core/SkVMBlitter.cpp
+++ b/src/core/SkVMBlitter.cpp
@@ -19,6 +19,7 @@
 #include "src/core/SkOpts.h"
 #include "src/core/SkPaintPriv.h"
 #include "src/core/SkVM.h"
+#include "src/core/SkVMBlitter.h"
 #include "src/shaders/SkColorFilterShader.h"
 
 #include <cinttypes>
@@ -34,83 +35,6 @@
     static_assert(SkIsAlign4(sizeof(BlitterUniforms)), "");
     static constexpr int kBlitterUniformsCount = sizeof(BlitterUniforms) / 4;
 
-    enum class Coverage { Full, UniformF, MaskA8, MaskLCD16, Mask3D };
-
-    struct Params {
-        sk_sp<SkShader>         shader;
-        sk_sp<SkShader>         clip;
-        sk_sp<SkBlender>        blender;    // never null
-        SkColorInfo             dst;
-        Coverage                coverage;
-        SkColor4f               paint;
-        const SkMatrixProvider& matrices;
-
-        Params withCoverage(Coverage c) const {
-            Params p = *this;
-            p.coverage = c;
-            return p;
-        }
-    };
-
-    SK_BEGIN_REQUIRE_DENSE;
-    struct Key {
-        uint64_t shader,
-                 clip,
-                 blender,
-                 colorSpace;
-        uint8_t  colorType,
-                 alphaType,
-                 coverage;
-        uint8_t  padding8{0};
-        uint32_t padding{0};
-        // Params::{paint,quality,matrices} are only passed to {shader,clip}->program(),
-        // not used here by the blitter itself.  No need to include them in the key;
-        // they'll be folded into the shader key if used.
-
-        bool operator==(const Key& that) const {
-            return this->shader      == that.shader
-                && this->clip        == that.clip
-                && this->blender     == that.blender
-                && this->colorSpace  == that.colorSpace
-                && this->colorType   == that.colorType
-                && this->alphaType   == that.alphaType
-                && this->coverage    == that.coverage;
-        }
-
-        Key withCoverage(Coverage c) const {
-            Key k = *this;
-            k.coverage = SkToU8(c);
-            return k;
-        }
-    };
-    SK_END_REQUIRE_DENSE;
-
-    static SkString debug_name(const Key& key) {
-        return SkStringPrintf("Shader-%" PRIx64 "_Clip-%" PRIx64 "_Blender-%" PRIx64
-                              "_CS-%" PRIx64 "_CT-%d_AT-%d_Cov-%d",
-                              key.shader,
-                              key.clip,
-                              key.blender,
-                              key.colorSpace,
-                              key.colorType,
-                              key.alphaType,
-                              key.coverage);
-    }
-
-    static SkLRUCache<Key, skvm::Program>* try_acquire_program_cache() {
-    #if 1 && defined(SKVM_JIT)
-        thread_local static SkLRUCache<Key, skvm::Program> cache{64};
-        return &cache;
-    #else
-        // iOS in particular does not support thread_local until iOS 9.0.
-        // On the other hand, we'll never be able to JIT there anyway.
-        // It's probably fine to not cache any interpreted programs, anywhere.
-        return nullptr;
-    #endif
-    }
-
-    static void release_program_cache() { }
-
     static skvm::Coord device_coord(skvm::Builder* p, skvm::Uniforms* uniforms) {
         skvm::I32 dx = p->uniform32(uniforms->base, offsetof(BlitterUniforms, right))
                      - p->index(),
@@ -121,267 +45,6 @@
         };
     }
 
-    static skvm::Color dst_color(skvm::Builder* p, const Params& params) {
-        skvm::PixelFormat dstFormat = skvm::SkColorType_to_PixelFormat(params.dst.colorType());
-        skvm::Ptr dst_ptr = p->arg(SkColorTypeBytesPerPixel(params.dst.colorType()));
-        return p->load(dstFormat, dst_ptr);
-    }
-
-    // If build_program() can't build this program, cache_key() sets *ok to false.
-    static Key cache_key(const Params& params,
-                         skvm::Uniforms* uniforms, SkArenaAlloc* alloc, bool* ok) {
-        // Take care to match build_program()'s reuse of the paint color uniforms.
-        skvm::Uniform r = uniforms->pushF(params.paint.fR),
-                      g = uniforms->pushF(params.paint.fG),
-                      b = uniforms->pushF(params.paint.fB),
-                      a = uniforms->pushF(params.paint.fA);
-
-        auto hash_shader = [&](skvm::Builder& p, const sk_sp<SkShader>& shader,
-                               skvm::Color* outColor) {
-            const SkShaderBase* sb = as_SB(shader);
-
-            skvm::Coord device = device_coord(&p, uniforms);
-            skvm::Color paint = {
-                p.uniformF(r),
-                p.uniformF(g),
-                p.uniformF(b),
-                p.uniformF(a),
-            };
-
-            uint64_t hash = 0;
-            *outColor = sb->program(&p, device, /*local=*/device, paint, params.matrices,
-                                    /*localM=*/nullptr, params.dst, uniforms, alloc);
-            if (*outColor) {
-                hash = p.hash();
-                // p.hash() folds in all instructions to produce r,g,b,a but does not know
-                // precisely which value we'll treat as which channel.  Imagine the shader
-                // called std::swap(*r,*b)... it draws differently, but p.hash() is unchanged.
-                // We'll fold the hash of their IDs in order to disambiguate.
-                const skvm::Val outputs[] = {
-                    outColor->r.id,
-                    outColor->g.id,
-                    outColor->b.id,
-                    outColor->a.id
-                };
-                hash ^= SkOpts::hash(outputs, sizeof(outputs));
-            } else {
-                *ok = false;
-            }
-            return hash;
-        };
-
-        // Use this builder for shader, clip and blender, so that color objects that pass
-        // from one to the other all 'make sense' -- i.e. have the same builder and/or have
-        // meaningful values for the hash.
-        //
-        // Question: better if we just pass in mock uniform colors, so we don't need to
-        //           explicitly use the output color from one stage as input to another?
-        //
-        skvm::Builder p;
-
-        // Calculate a hash for the color shader.
-        SkASSERT(params.shader);
-        skvm::Color src;
-        uint64_t shaderHash = hash_shader(p, params.shader, &src);
-
-        // Calculate a hash for the clip shader, if one exists.
-        uint64_t clipHash = 0;
-        if (params.clip) {
-            skvm::Color cov;
-            clipHash = hash_shader(p, params.clip, &cov);
-            if (clipHash == 0) {
-                clipHash = 1;
-            }
-        }
-
-        // Calculate a hash for the blender.
-        uint64_t blendHash = 0;
-        if (auto bm = as_BB(params.blender)->asBlendMode()) {
-            blendHash = static_cast<uint8_t>(bm.value());
-        } else if (*ok) {
-            const SkBlenderBase* blender = as_BB(params.blender);
-
-            skvm::Color dst = dst_color(&p, params);
-            skvm::Color outColor = blender->program(&p, src, dst, params.dst, uniforms, alloc);
-            if (outColor) {
-                blendHash = p.hash();
-                // Like in `hash_shader` above, we must fold the color component IDs into our hash.
-                const skvm::Val outputs[] = {
-                    outColor.r.id,
-                    outColor.g.id,
-                    outColor.b.id,
-                    outColor.a.id
-                };
-                blendHash ^= SkOpts::hash(outputs, sizeof(outputs));
-            } else {
-                *ok = false;
-            }
-            if (blendHash == 0) {
-                blendHash = 1;
-            }
-        }
-
-        return {
-            shaderHash,
-              clipHash,
-             blendHash,
-            params.dst.colorSpace() ? params.dst.colorSpace()->hash() : 0,
-            SkToU8(params.dst.colorType()),
-            SkToU8(params.dst.alphaType()),
-            SkToU8(params.coverage),
-        };
-    }
-
-    static void build_program(skvm::Builder* p, const Params& params,
-                              skvm::Uniforms* uniforms, SkArenaAlloc* alloc) {
-        // First two arguments are always uniforms and the destination buffer.
-        uniforms->base    = p->uniform();
-        skvm::Ptr dst_ptr = p->arg(SkColorTypeBytesPerPixel(params.dst.colorType()));
-        // A SpriteShader (in this file) may next use one argument as its varying source.
-        // Subsequent arguments depend on params.coverage:
-        //    - Full:      (no more arguments)
-        //    - Mask3D:    mul varying, add varying, 8-bit coverage varying
-        //    - MaskA8:    8-bit coverage varying
-        //    - MaskLCD16: 565 coverage varying
-        //    - UniformF:  float coverage uniform
-
-        skvm::Coord device = device_coord(p, uniforms);
-        skvm::Color paint = p->uniformColor(params.paint, uniforms);
-
-        // See note about arguments above: a SpriteShader will call p->arg() once during program().
-        skvm::Color src = as_SB(params.shader)->program(p, device, /*local=*/device, paint,
-                                                        params.matrices, /*localM=*/nullptr,
-                                                        params.dst, uniforms, alloc);
-        SkASSERT(src);
-        if (params.coverage == Coverage::Mask3D) {
-            skvm::F32 M = from_unorm(8, p->load8(p->varying<uint8_t>())),
-                      A = from_unorm(8, p->load8(p->varying<uint8_t>()));
-
-            src.r = min(src.r * M + A, src.a);
-            src.g = min(src.g * M + A, src.a);
-            src.b = min(src.b * M + A, src.a);
-        }
-
-        // If we can determine this we can skip a fair bit of clamping!
-        bool src_in_gamut = false;
-
-        // Normalized premul formats can surprisingly represent some out-of-gamut
-        // values (e.g. r=0xff, a=0xee fits in unorm8 but r = 1.07), but most code
-        // working with normalized premul colors is not prepared to handle r,g,b > a.
-        // So we clamp the shader to gamut here before blending and coverage.
-        //
-        // In addition, GL clamps all its color channels to limits of the format just
-        // before the blend step (~here).  To match that auto-clamp, we clamp alpha to
-        // [0,1] too, just in case someone gave us a crazy alpha.
-        if (!src_in_gamut
-                && params.dst.alphaType() == kPremul_SkAlphaType
-                && SkColorTypeIsNormalized(params.dst.colorType())) {
-            src.a = clamp(src.a, 0.0f,  1.0f);
-            src.r = clamp(src.r, 0.0f, src.a);
-            src.g = clamp(src.g, 0.0f, src.a);
-            src.b = clamp(src.b, 0.0f, src.a);
-            src_in_gamut = true;
-        }
-
-        // Load the destination color.
-        skvm::PixelFormat dstFormat = skvm::SkColorType_to_PixelFormat(params.dst.colorType());
-        skvm::Color dst = p->load(dstFormat, dst_ptr);
-        if (params.dst.isOpaque()) {
-            // When a destination is known opaque, we may assume it both starts and stays fully
-            // opaque, ignoring any math that disagrees.  This sometimes trims a little work.
-            dst.a = p->splat(1.0f);
-        } else if (params.dst.alphaType() == kUnpremul_SkAlphaType) {
-            // All our blending works in terms of premul.
-            dst = premul(dst);
-        }
-
-        // Load coverage.
-        skvm::Color cov;
-        switch (params.coverage) {
-            case Coverage::Full:
-                cov.r = cov.g = cov.b = cov.a = p->splat(1.0f);
-                break;
-
-            case Coverage::UniformF:
-                cov.r = cov.g = cov.b = cov.a = p->uniformF(p->uniform(), 0);
-                break;
-
-            case Coverage::Mask3D:
-            case Coverage::MaskA8:
-                cov.r = cov.g = cov.b = cov.a = from_unorm(8, p->load8(p->varying<uint8_t>()));
-                break;
-
-            case Coverage::MaskLCD16: {
-                skvm::PixelFormat fmt = skvm::SkColorType_to_PixelFormat(kRGB_565_SkColorType);
-                cov = p->load(fmt, p->varying<uint16_t>());
-                cov.a = select(src.a < dst.a, min(cov.r, min(cov.g, cov.b))
-                                            , max(cov.r, max(cov.g, cov.b)));
-            } break;
-        }
-        if (params.clip) {
-            skvm::Color clip = as_SB(params.clip)->program(p, device, /*local=*/device, paint,
-                                                           params.matrices, /*localM=*/nullptr,
-                                                           params.dst, uniforms, alloc);
-            SkAssertResult(clip);
-            cov.r *= clip.a;  // We use the alpha channel of clip for all four.
-            cov.g *= clip.a;
-            cov.b *= clip.a;
-            cov.a *= clip.a;
-        }
-
-        const SkBlenderBase* blender = as_BB(params.blender);
-        const auto as_blendmode = blender->asBlendMode();
-
-        // The math for some blend modes lets us fold coverage into src before the blend, which is
-        // simpler than the canonical post-blend lerp().
-        bool applyPostBlendCoverage = true;
-        if (as_blendmode &&
-            SkBlendMode_ShouldPreScaleCoverage(as_blendmode.value(),
-                                               params.coverage == Coverage::MaskLCD16)) {
-            applyPostBlendCoverage = false;
-            src.r *= cov.r;
-            src.g *= cov.g;
-            src.b *= cov.b;
-            src.a *= cov.a;
-        }
-
-        // Apply our blend function to the computed color.
-        src = blender->program(p, src, dst, params.dst, uniforms, alloc);
-
-        if (applyPostBlendCoverage) {
-            src.r = lerp(dst.r, src.r, cov.r);
-            src.g = lerp(dst.g, src.g, cov.g);
-            src.b = lerp(dst.b, src.b, cov.b);
-            src.a = lerp(dst.a, src.a, cov.a);
-        }
-
-        if (params.dst.isOpaque()) {
-            // (See the note above when loading the destination color.)
-            src.a = p->splat(1.0f);
-        } else if (params.dst.alphaType() == kUnpremul_SkAlphaType) {
-            src = unpremul(src);
-        }
-
-        // Clamp to fit destination color format if needed.
-        if (as_blendmode && src_in_gamut) {
-            // An in-gamut src blended with an in-gamut dst should stay in gamut.
-            // Being in-gamut implies all channels are in [0,1], so no need to clamp.
-            // We allow one ulp error above 1.0f, and about that much (~1.2e-7) below 0.
-            skvm::F32 lo = pun_to_F32(p->splat(0xb400'0000)),
-                      hi = pun_to_F32(p->splat(0x3f80'0001));
-            assert_true(src.r == clamp(src.r, lo, hi), src.r);
-            assert_true(src.g == clamp(src.g, lo, hi), src.g);
-            assert_true(src.b == clamp(src.b, lo, hi), src.b);
-            assert_true(src.a == clamp(src.a, lo, hi), src.a);
-        } else if (SkColorTypeIsNormalized(params.dst.colorType())) {
-            src = clamp01(src);
-        }
-
-        // Write it out!
-        store(dstFormat, dst_ptr, src);
-    }
-
-
     struct NoopColorFilter : public SkColorFilterBase {
         skvm::Color onProgram(skvm::Builder*, skvm::Color c,
                               const SkColorInfo&, skvm::Uniforms*, SkArenaAlloc*) const override {
@@ -528,302 +191,592 @@
             return premul(paint);
         }
     };
-
-    static Params effective_params(const SkPixmap& device,
-                                   const SkPixmap* sprite,
-                                   SkPaint paint,
-                                   const SkMatrixProvider& matrices,
-                                   sk_sp<SkShader> clip) {
-        // Sprites take priority over any shader.  (There's rarely one set, and it's meaningless.)
-        if (sprite) {
-            paint.setShader(sk_make_sp<SpriteShader>(*sprite));
-        }
-
-        // Normal blitters will have already folded color filters into their shader,
-        // but we may still need to do that here for SpriteShaders.
-        if (paint.getColorFilter()) {
-            SkPaintPriv::RemoveColorFilter(&paint, device.colorSpace());
-        }
-        SkASSERT(!paint.getColorFilter());
-
-        // If there's no explicit shader, the paint color is the shader,
-        // but if there is a shader, it's modulated by the paint alpha.
-        sk_sp<SkShader> shader = paint.refShader();
-        if (!shader) {
-            shader = sk_make_sp<PaintColorShader>(paint.getColor4f().isOpaque());
-        } else if (paint.getAlphaf() < 1.0f) {
-            shader = sk_make_sp<SkColorFilterShader>(std::move(shader),
-                                                     paint.getAlphaf(),
-                                                     sk_make_sp<NoopColorFilter>());
-        }
-
-        // Add dither to the end of the shader pipeline if requested and needed.
-        if (paint.isDither() && !as_SB(shader)->isConstant()) {
-            shader = sk_make_sp<DitherShader>(std::move(shader));
-        }
-
-        // Add the blender.
-        sk_sp<SkBlender> blender = paint.refBlender();
-        if (!blender) {
-            blender = SkBlender::Mode(SkBlendMode::kSrcOver);
-        }
-
-        // The most common blend mode is SrcOver, and it can be strength-reduced
-        // _greatly_ to Src mode when the shader is opaque.
-        //
-        // In general all the information we use to make decisions here need to
-        // be reflected in Params and Key to make program caching sound, and it
-        // might appear that shader->isOpaque() is a property of the shader's
-        // uniforms than its fundamental program structure and so unsafe to use.
-        //
-        // Opacity is such a powerful property that SkShaderBase::program()
-        // forces opacity for any shader subclass that claims isOpaque(), so
-        // the opaque bit is strongly guaranteed to be part of the program and
-        // not just a property of the uniforms.  The shader program hash includes
-        // this information, making it safe to use anywhere in the blitter codegen.
-        if (as_BB(blender)->asBlendMode() == SkBlendMode::kSrcOver && shader->isOpaque()) {
-            blender = SkBlender::Mode(SkBlendMode::kSrc);
-        }
-
-        SkColor4f paintColor = paint.getColor4f();
-        SkColorSpaceXformSteps{sk_srgb_singleton(), kUnpremul_SkAlphaType,
-                               device.colorSpace(), kUnpremul_SkAlphaType}
-            .apply(paintColor.vec());
-
-        return {
-            std::move(shader),
-            std::move(clip),
-            std::move(blender),
-            { device.colorType(), device.alphaType(), device.refColorSpace() },
-            Coverage::Full,  // Placeholder... withCoverage() will change as needed.
-            paintColor,
-            matrices,
-        };
-    }
-
-    class Blitter final : public SkBlitter {
-    public:
-        Blitter(const SkPixmap&         device,
-                const SkPaint&          paint,
-                const SkPixmap*         sprite,
-                SkIPoint                spriteOffset,
-                const SkMatrixProvider& matrices,
-                sk_sp<SkShader>         clip,
-                bool* ok)
-            : fDevice(device)
-            , fSprite(sprite ? *sprite : SkPixmap{})
-            , fSpriteOffset(spriteOffset)
-            , fUniforms(skvm::Ptr{0}, kBlitterUniformsCount)
-            , fParams(effective_params(device, sprite, paint, matrices, std::move(clip)))
-            , fKey(cache_key(fParams, &fUniforms, &fAlloc, ok))
-        {}
-
-        ~Blitter() override {
-            if (SkLRUCache<Key, skvm::Program>* cache = try_acquire_program_cache()) {
-                auto cache_program = [&](skvm::Program&& program, Coverage coverage) {
-                    if (!program.empty()) {
-                        cache->insert_or_update(fKey.withCoverage(coverage), std::move(program));
-                    }
-                };
-                cache_program(std::move(fBlitH),         Coverage::Full);
-                cache_program(std::move(fBlitAntiH),     Coverage::UniformF);
-                cache_program(std::move(fBlitMaskA8),    Coverage::MaskA8);
-                cache_program(std::move(fBlitMask3D),    Coverage::Mask3D);
-                cache_program(std::move(fBlitMaskLCD16), Coverage::MaskLCD16);
-
-                release_program_cache();
-            }
-        }
-
-    private:
-        SkPixmap        fDevice;
-        const SkPixmap  fSprite;                  // See isSprite().
-        const SkIPoint  fSpriteOffset;
-        skvm::Uniforms  fUniforms;                // Most data is copied directly into fUniforms,
-        SkArenaAlloc    fAlloc{2*sizeof(void*)};  // but a few effects need to ref large content.
-        const Params    fParams;
-        const Key       fKey;
-        skvm::Program   fBlitH,
-                        fBlitAntiH,
-                        fBlitMaskA8,
-                        fBlitMask3D,
-                        fBlitMaskLCD16;
-
-        skvm::Program buildProgram(Coverage coverage) {
-            Key key = fKey.withCoverage(coverage);
-            {
-                skvm::Program p;
-                if (SkLRUCache<Key, skvm::Program>* cache = try_acquire_program_cache()) {
-                    if (skvm::Program* found = cache->find(key)) {
-                        p = std::move(*found);
-                    }
-                    release_program_cache();
-                }
-                if (!p.empty()) {
-                    return p;
-                }
-            }
-            // We don't really _need_ to rebuild fUniforms here.
-            // It's just more natural to have effects unconditionally emit them,
-            // and more natural to rebuild fUniforms than to emit them into a temporary buffer.
-            // fUniforms should reuse the exact same memory, so this is very cheap.
-            SkDEBUGCODE(size_t prev = fUniforms.buf.size();)
-            fUniforms.buf.resize(kBlitterUniformsCount);
-            skvm::Builder builder;
-            build_program(&builder, fParams.withCoverage(coverage), &fUniforms, &fAlloc);
-            SkASSERTF(fUniforms.buf.size() == prev,
-                      "%zu, prev was %zu", fUniforms.buf.size(), prev);
-
-            skvm::Program program = builder.done(debug_name(key).c_str());
-            if (false) {
-                static std::atomic<int> missed{0},
-                                         total{0};
-                if (!program.hasJIT()) {
-                    SkDebugf("\ncouldn't JIT %s\n", debug_name(key).c_str());
-                    builder.dump();
-                    program.dump();
-
-                    missed++;
-                }
-                if (0 == total++) {
-                    atexit([]{ SkDebugf("SkVMBlitter compiled %d programs, %d without JIT.\n",
-                                        total.load(), missed.load()); });
-                }
-            }
-            return program;
-        }
-
-        void updateUniforms(int right, int y) {
-            BlitterUniforms uniforms{right, y};
-            memcpy(fUniforms.buf.data(), &uniforms, sizeof(BlitterUniforms));
-        }
-
-        const void* isSprite(int x, int y) const {
-            if (fSprite.colorType() != kUnknown_SkColorType) {
-                return fSprite.addr(x - fSpriteOffset.x(),
-                                    y - fSpriteOffset.y());
-            }
-            return nullptr;
-        }
-
-        void blitH(int x, int y, int w) override {
-            if (fBlitH.empty()) {
-                fBlitH = this->buildProgram(Coverage::Full);
-            }
-            this->updateUniforms(x+w, y);
-            if (const void* sprite = this->isSprite(x,y)) {
-                fBlitH.eval(w, fUniforms.buf.data(), fDevice.addr(x,y), sprite);
-            } else {
-                fBlitH.eval(w, fUniforms.buf.data(), fDevice.addr(x,y));
-            }
-        }
-
-        void blitAntiH(int x, int y, const SkAlpha cov[], const int16_t runs[]) override {
-            if (fBlitAntiH.empty()) {
-                fBlitAntiH = this->buildProgram(Coverage::UniformF);
-            }
-            for (int16_t run = *runs; run > 0; run = *runs) {
-                this->updateUniforms(x+run, y);
-                const float covF = *cov * (1/255.0f);
-                if (const void* sprite = this->isSprite(x,y)) {
-                    fBlitAntiH.eval(run, fUniforms.buf.data(), fDevice.addr(x,y), sprite, &covF);
-                } else {
-                    fBlitAntiH.eval(run, fUniforms.buf.data(), fDevice.addr(x,y), &covF);
-                }
-                x    += run;
-                runs += run;
-                cov  += run;
-            }
-        }
-
-        void blitMask(const SkMask& mask, const SkIRect& clip) override {
-            if (mask.fFormat == SkMask::kBW_Format) {
-                return SkBlitter::blitMask(mask, clip);
-            }
-
-            const skvm::Program* program = nullptr;
-            switch (mask.fFormat) {
-                default: SkUNREACHABLE;     // ARGB and SDF masks shouldn't make it here.
-
-                case SkMask::k3D_Format:
-                    if (fBlitMask3D.empty()) {
-                        fBlitMask3D = this->buildProgram(Coverage::Mask3D);
-                    }
-                    program = &fBlitMask3D;
-                    break;
-
-                case SkMask::kA8_Format:
-                    if (fBlitMaskA8.empty()) {
-                        fBlitMaskA8 = this->buildProgram(Coverage::MaskA8);
-                    }
-                    program = &fBlitMaskA8;
-                    break;
-
-                case SkMask::kLCD16_Format:
-                    if (fBlitMaskLCD16.empty()) {
-                        fBlitMaskLCD16 = this->buildProgram(Coverage::MaskLCD16);
-                    }
-                    program = &fBlitMaskLCD16;
-                    break;
-            }
-
-            SkASSERT(program);
-            if (program) {
-                for (int y = clip.top(); y < clip.bottom(); y++) {
-                    int x = clip.left(),
-                        w = clip.width();
-                    void* dptr =        fDevice.writable_addr(x,y);
-                    auto  mptr = (const uint8_t*)mask.getAddr(x,y);
-                    this->updateUniforms(x+w,y);
-
-                    if (program == &fBlitMask3D) {
-                        size_t plane = mask.computeImageSize();
-                        if (const void* sprite = this->isSprite(x,y)) {
-                            program->eval(w, fUniforms.buf.data(), dptr, sprite, mptr + 1*plane
-                                                                               , mptr + 2*plane
-                                                                               , mptr + 0*plane);
-                        } else {
-                            program->eval(w, fUniforms.buf.data(), dptr, mptr + 1*plane
-                                                                       , mptr + 2*plane
-                                                                       , mptr + 0*plane);
-                        }
-                    } else {
-                        if (const void* sprite = this->isSprite(x,y)) {
-                            program->eval(w, fUniforms.buf.data(), dptr, sprite, mptr);
-                        } else {
-                            program->eval(w, fUniforms.buf.data(), dptr, mptr);
-                        }
-                    }
-                }
-            }
-        }
-    };
-
 }  // namespace
 
-SkBlitter* SkCreateSkVMBlitter(const SkPixmap& device,
+bool SkVMBlitter::Key::operator==(const Key& that) const {
+    return this->shader      == that.shader
+        && this->clip        == that.clip
+        && this->blender     == that.blender
+        && this->colorSpace  == that.colorSpace
+        && this->colorType   == that.colorType
+        && this->alphaType   == that.alphaType
+        && this->coverage    == that.coverage;
+}
+
+SkVMBlitter::Key SkVMBlitter::Key::withCoverage(Coverage c) const {
+    Key k = *this;
+    k.coverage = SkToU8(c);
+    return k;
+}
+
+SkVMBlitter::Params SkVMBlitter::Params::withCoverage(Coverage c) const {
+    Params p = *this;
+    p.coverage = c;
+    return p;
+}
+
+SkVMBlitter::Params SkVMBlitter::EffectiveParams(const SkPixmap& device,
+                                                 const SkPixmap* sprite,
+                                                 SkPaint paint,
+                                                 const SkMatrixProvider& matrices,
+                                                 sk_sp<SkShader> clip) {
+    // Sprites take priority over any shader.  (There's rarely one set, and it's meaningless.)
+    if (sprite) {
+        paint.setShader(sk_make_sp<SpriteShader>(*sprite));
+    }
+
+    // Normal blitters will have already folded color filters into their shader,
+    // but we may still need to do that here for SpriteShaders.
+    if (paint.getColorFilter()) {
+        SkPaintPriv::RemoveColorFilter(&paint, device.colorSpace());
+    }
+    SkASSERT(!paint.getColorFilter());
+
+    // If there's no explicit shader, the paint color is the shader,
+    // but if there is a shader, it's modulated by the paint alpha.
+    sk_sp<SkShader> shader = paint.refShader();
+    if (!shader) {
+        shader = sk_make_sp<PaintColorShader>(paint.getColor4f().isOpaque());
+    } else if (paint.getAlphaf() < 1.0f) {
+        shader = sk_make_sp<SkColorFilterShader>(std::move(shader),
+                                                 paint.getAlphaf(),
+                                                 sk_make_sp<NoopColorFilter>());
+    }
+
+    // Add dither to the end of the shader pipeline if requested and needed.
+    if (paint.isDither() && !as_SB(shader)->isConstant()) {
+        shader = sk_make_sp<DitherShader>(std::move(shader));
+    }
+
+    // Add the blender.
+    sk_sp<SkBlender> blender = paint.refBlender();
+    if (!blender) {
+        blender = SkBlender::Mode(SkBlendMode::kSrcOver);
+    }
+
+    // The most common blend mode is SrcOver, and it can be strength-reduced
+    // _greatly_ to Src mode when the shader is opaque.
+    //
+    // In general all the information we use to make decisions here need to
+    // be reflected in Params and Key to make program caching sound, and it
+    // might appear that shader->isOpaque() is a property of the shader's
+    // uniforms than its fundamental program structure and so unsafe to use.
+    //
+    // Opacity is such a powerful property that SkShaderBase::program()
+    // forces opacity for any shader subclass that claims isOpaque(), so
+    // the opaque bit is strongly guaranteed to be part of the program and
+    // not just a property of the uniforms.  The shader program hash includes
+    // this information, making it safe to use anywhere in the blitter codegen.
+    if (as_BB(blender)->asBlendMode() == SkBlendMode::kSrcOver && shader->isOpaque()) {
+        blender = SkBlender::Mode(SkBlendMode::kSrc);
+    }
+
+    SkColor4f paintColor = paint.getColor4f();
+    SkColorSpaceXformSteps{sk_srgb_singleton(), kUnpremul_SkAlphaType,
+                           device.colorSpace(), kUnpremul_SkAlphaType}
+            .apply(paintColor.vec());
+
+    return {
+        std::move(shader),
+        std::move(clip),
+        std::move(blender),
+        { device.colorType(), device.alphaType(), device.refColorSpace() },
+        Coverage::Full,  // Placeholder... withCoverage() will change as needed.
+        paintColor,
+        matrices,
+    };
+}
+
+skvm::Color SkVMBlitter::DstColor(skvm::Builder* p, const Params& params) {
+    skvm::PixelFormat dstFormat = skvm::SkColorType_to_PixelFormat(params.dst.colorType());
+    skvm::Ptr dst_ptr = p->arg(SkColorTypeBytesPerPixel(params.dst.colorType()));
+    return p->load(dstFormat, dst_ptr);
+}
+
+void SkVMBlitter::BuildProgram(skvm::Builder* p, const Params& params,
+                               skvm::Uniforms* uniforms, SkArenaAlloc* alloc) {
+    // First two arguments are always uniforms and the destination buffer.
+    uniforms->base    = p->uniform();
+    skvm::Ptr dst_ptr = p->arg(SkColorTypeBytesPerPixel(params.dst.colorType()));
+    // A SpriteShader (in this file) may next use one argument as its varying source.
+    // Subsequent arguments depend on params.coverage:
+    //    - Full:      (no more arguments)
+    //    - Mask3D:    mul varying, add varying, 8-bit coverage varying
+    //    - MaskA8:    8-bit coverage varying
+    //    - MaskLCD16: 565 coverage varying
+    //    - UniformF:  float coverage uniform
+
+    skvm::Coord device = device_coord(p, uniforms);
+    skvm::Color paint = p->uniformColor(params.paint, uniforms);
+
+    // See note about arguments above: a SpriteShader will call p->arg() once during program().
+    skvm::Color src = as_SB(params.shader)->program(p, device, /*local=*/device, paint,
+                                                    params.matrices, /*localM=*/nullptr,
+                                                    params.dst, uniforms, alloc);
+    SkASSERT(src);
+    if (params.coverage == Coverage::Mask3D) {
+        skvm::F32 M = from_unorm(8, p->load8(p->varying<uint8_t>())),
+                  A = from_unorm(8, p->load8(p->varying<uint8_t>()));
+
+        src.r = min(src.r * M + A, src.a);
+        src.g = min(src.g * M + A, src.a);
+        src.b = min(src.b * M + A, src.a);
+    }
+
+    // If we can determine this we can skip a fair bit of clamping!
+    bool src_in_gamut = false;
+
+    // Normalized premul formats can surprisingly represent some out-of-gamut
+    // values (e.g. r=0xff, a=0xee fits in unorm8 but r = 1.07), but most code
+    // working with normalized premul colors is not prepared to handle r,g,b > a.
+    // So we clamp the shader to gamut here before blending and coverage.
+    //
+    // In addition, GL clamps all its color channels to limits of the format just
+    // before the blend step (~here).  To match that auto-clamp, we clamp alpha to
+    // [0,1] too, just in case someone gave us an out of range alpha.
+    if (!src_in_gamut
+            && params.dst.alphaType() == kPremul_SkAlphaType
+            && SkColorTypeIsNormalized(params.dst.colorType())) {
+        src.a = clamp(src.a, 0.0f,  1.0f);
+        src.r = clamp(src.r, 0.0f, src.a);
+        src.g = clamp(src.g, 0.0f, src.a);
+        src.b = clamp(src.b, 0.0f, src.a);
+        src_in_gamut = true;
+    }
+
+    // Load the destination color.
+    skvm::PixelFormat dstFormat = skvm::SkColorType_to_PixelFormat(params.dst.colorType());
+    skvm::Color dst = p->load(dstFormat, dst_ptr);
+    if (params.dst.isOpaque()) {
+        // When a destination is known opaque, we may assume it both starts and stays fully
+        // opaque, ignoring any math that disagrees.  This sometimes trims a little work.
+        dst.a = p->splat(1.0f);
+    } else if (params.dst.alphaType() == kUnpremul_SkAlphaType) {
+        // All our blending works in terms of premul.
+        dst = premul(dst);
+    }
+
+    // Load coverage.
+    skvm::Color cov;
+    switch (params.coverage) {
+        case Coverage::Full:
+            cov.r = cov.g = cov.b = cov.a = p->splat(1.0f);
+            break;
+
+        case Coverage::UniformF:
+            cov.r = cov.g = cov.b = cov.a = p->uniformF(p->uniform(), 0);
+            break;
+
+        case Coverage::Mask3D:
+        case Coverage::MaskA8:
+            cov.r = cov.g = cov.b = cov.a = from_unorm(8, p->load8(p->varying<uint8_t>()));
+            break;
+
+        case Coverage::MaskLCD16: {
+            skvm::PixelFormat fmt = skvm::SkColorType_to_PixelFormat(kRGB_565_SkColorType);
+            cov = p->load(fmt, p->varying<uint16_t>());
+            cov.a = select(src.a < dst.a, min(cov.r, min(cov.g, cov.b)),
+                           max(cov.r, max(cov.g, cov.b)));
+        } break;
+    }
+    if (params.clip) {
+        skvm::Color clip = as_SB(params.clip)->program(p, device, /*local=*/device, paint,
+                                                       params.matrices, /*localM=*/nullptr,
+                                                       params.dst, uniforms, alloc);
+        SkAssertResult(clip);
+        cov.r *= clip.a;  // We use the alpha channel of clip for all four.
+        cov.g *= clip.a;
+        cov.b *= clip.a;
+        cov.a *= clip.a;
+    }
+
+    const SkBlenderBase* blender = as_BB(params.blender);
+    const auto as_blendmode = blender->asBlendMode();
+
+    // The math for some blend modes lets us fold coverage into src before the blend, which is
+    // simpler than the canonical post-blend lerp().
+    bool applyPostBlendCoverage = true;
+    if (as_blendmode &&
+        SkBlendMode_ShouldPreScaleCoverage(as_blendmode.value(),
+                                           params.coverage == Coverage::MaskLCD16)) {
+        applyPostBlendCoverage = false;
+        src.r *= cov.r;
+        src.g *= cov.g;
+        src.b *= cov.b;
+        src.a *= cov.a;
+    }
+
+    // Apply our blend function to the computed color.
+    src = blender->program(p, src, dst, params.dst, uniforms, alloc);
+
+    if (applyPostBlendCoverage) {
+        src.r = lerp(dst.r, src.r, cov.r);
+        src.g = lerp(dst.g, src.g, cov.g);
+        src.b = lerp(dst.b, src.b, cov.b);
+        src.a = lerp(dst.a, src.a, cov.a);
+    }
+
+    if (params.dst.isOpaque()) {
+        // (See the note above when loading the destination color.)
+        src.a = p->splat(1.0f);
+    } else if (params.dst.alphaType() == kUnpremul_SkAlphaType) {
+        src = unpremul(src);
+    }
+
+    // Clamp to fit destination color format if needed.
+    if (as_blendmode && src_in_gamut) {
+        // An in-gamut src blended with an in-gamut dst should stay in gamut.
+        // Being in-gamut implies all channels are in [0,1], so no need to clamp.
+        // We allow one ulp error above 1.0f, and about that much (~1.2e-7) below 0.
+        skvm::F32 lo = pun_to_F32(p->splat(0xb400'0000)),
+                  hi = pun_to_F32(p->splat(0x3f80'0001));
+        assert_true(src.r == clamp(src.r, lo, hi), src.r);
+        assert_true(src.g == clamp(src.g, lo, hi), src.g);
+        assert_true(src.b == clamp(src.b, lo, hi), src.b);
+        assert_true(src.a == clamp(src.a, lo, hi), src.a);
+    } else if (SkColorTypeIsNormalized(params.dst.colorType())) {
+        src = clamp01(src);
+    }
+
+    // Write it out!
+    store(dstFormat, dst_ptr, src);
+}
+
+// If BuildProgram() can't build this program, CacheKey() sets *ok to false.
+SkVMBlitter::Key SkVMBlitter::CacheKey(
+        const Params& params, skvm::Uniforms* uniforms, SkArenaAlloc* alloc, bool* ok) {
+    // Take care to match buildProgram()'s reuse of the paint color uniforms.
+    skvm::Uniform r = uniforms->pushF(params.paint.fR),
+                  g = uniforms->pushF(params.paint.fG),
+                  b = uniforms->pushF(params.paint.fB),
+                  a = uniforms->pushF(params.paint.fA);
+
+    auto hash_shader = [&](skvm::Builder& p, const sk_sp<SkShader>& shader,
+                           skvm::Color* outColor) {
+        const SkShaderBase* sb = as_SB(shader);
+
+        skvm::Coord device = device_coord(&p, uniforms);
+        skvm::Color paint = {
+            p.uniformF(r),
+            p.uniformF(g),
+            p.uniformF(b),
+            p.uniformF(a),
+        };
+
+        uint64_t hash = 0;
+        *outColor = sb->program(&p, device, /*local=*/device, paint, params.matrices,
+                /*localM=*/nullptr, params.dst, uniforms, alloc);
+        if (*outColor) {
+            hash = p.hash();
+            // p.hash() folds in all instructions to produce r,g,b,a but does not know
+            // precisely which value we'll treat as which channel.  Imagine the shader
+            // called std::swap(*r,*b)... it draws differently, but p.hash() is unchanged.
+            // We'll fold the hash of their IDs in order to disambiguate.
+            const skvm::Val outputs[] = {
+                outColor->r.id,
+                outColor->g.id,
+                outColor->b.id,
+                outColor->a.id
+            };
+            hash ^= SkOpts::hash(outputs, sizeof(outputs));
+        } else {
+            *ok = false;
+        }
+        return hash;
+    };
+
+    // Use this builder for shader, clip and blender, so that color objects that pass
+    // from one to the other all 'make sense' -- i.e. have the same builder and/or have
+    // meaningful values for the hash.
+    //
+    // Question: better if we just pass in mock uniform colors, so we don't need to
+    //           explicitly use the output color from one stage as input to another?
+    //
+    skvm::Builder p;
+
+    // Calculate a hash for the color shader.
+    SkASSERT(params.shader);
+    skvm::Color src;
+    uint64_t shaderHash = hash_shader(p, params.shader, &src);
+
+    // Calculate a hash for the clip shader, if one exists.
+    uint64_t clipHash = 0;
+    if (params.clip) {
+        skvm::Color cov;
+        clipHash = hash_shader(p, params.clip, &cov);
+        if (clipHash == 0) {
+            clipHash = 1;
+        }
+    }
+
+    // Calculate a hash for the blender.
+    uint64_t blendHash = 0;
+    if (auto bm = as_BB(params.blender)->asBlendMode()) {
+        blendHash = static_cast<uint8_t>(bm.value());
+    } else if (*ok) {
+        const SkBlenderBase* blender = as_BB(params.blender);
+
+        skvm::Color dst = DstColor(&p, params);
+        skvm::Color outColor = blender->program(&p, src, dst, params.dst, uniforms, alloc);
+        if (outColor) {
+            blendHash = p.hash();
+            // Like in `hash_shader` above, we must fold the color component IDs into our hash.
+            const skvm::Val outputs[] = {
+                outColor.r.id,
+                outColor.g.id,
+                outColor.b.id,
+                outColor.a.id
+            };
+            blendHash ^= SkOpts::hash(outputs, sizeof(outputs));
+        } else {
+            *ok = false;
+        }
+        if (blendHash == 0) {
+            blendHash = 1;
+        }
+    }
+
+    return {
+        shaderHash,
+        clipHash,
+        blendHash,
+        params.dst.colorSpace() ? params.dst.colorSpace()->hash() : 0,
+        SkToU8(params.dst.colorType()),
+        SkToU8(params.dst.alphaType()),
+        SkToU8(params.coverage),
+    };
+}
+
+SkVMBlitter::SkVMBlitter(const SkPixmap& device,
+                         const SkPaint& paint,
+                         const SkPixmap* sprite,
+                         SkIPoint spriteOffset,
+                         const SkMatrixProvider& matrices,
+                         sk_sp<SkShader> clip,
+                         bool* ok)
+        : fDevice(device), fSprite(sprite ? *sprite : SkPixmap{})
+        , fSpriteOffset(spriteOffset)
+        , fUniforms(skvm::Ptr{0}
+        , kBlitterUniformsCount)
+        , fParams(EffectiveParams(device, sprite, paint, matrices, std::move(clip)))
+        , fKey(CacheKey(fParams, &fUniforms, &fAlloc, ok)) {}
+
+SkVMBlitter::~SkVMBlitter() {
+    if (SkLRUCache<Key, skvm::Program>* cache = TryAcquireProgramCache()) {
+        auto cache_program = [&](skvm::Program&& program, Coverage coverage) {
+            if (!program.empty()) {
+                cache->insert_or_update(fKey.withCoverage(coverage), std::move(program));
+            }
+        };
+        cache_program(std::move(fBlitH),         Coverage::Full);
+        cache_program(std::move(fBlitAntiH),     Coverage::UniformF);
+        cache_program(std::move(fBlitMaskA8),    Coverage::MaskA8);
+        cache_program(std::move(fBlitMask3D),    Coverage::Mask3D);
+        cache_program(std::move(fBlitMaskLCD16), Coverage::MaskLCD16);
+
+        ReleaseProgramCache();
+    }
+}
+
+SkLRUCache<SkVMBlitter::Key, skvm::Program>* SkVMBlitter::TryAcquireProgramCache() {
+#if 1 && defined(SKVM_JIT)
+    thread_local static SkLRUCache<Key, skvm::Program> cache{64};
+    return &cache;
+#else
+    // iOS in particular does not support thread_local until iOS 9.0.
+        // On the other hand, we'll never be able to JIT there anyway.
+        // It's probably fine to not cache any interpreted programs, anywhere.
+        return nullptr;
+#endif
+}
+
+SkString SkVMBlitter::DebugName(const Key& key) {
+    return SkStringPrintf("Shader-%" PRIx64 "_Clip-%" PRIx64 "_Blender-%" PRIx64
+                          "_CS-%" PRIx64 "_CT-%d_AT-%d_Cov-%d",
+                          key.shader,
+                          key.clip,
+                          key.blender,
+                          key.colorSpace,
+                          key.colorType,
+                          key.alphaType,
+                          key.coverage);
+}
+
+void SkVMBlitter::ReleaseProgramCache() {}
+
+skvm::Program SkVMBlitter::buildProgram(Coverage coverage) {
+    Key key = fKey.withCoverage(coverage);
+    {
+        skvm::Program p;
+        if (SkLRUCache<Key, skvm::Program>* cache = TryAcquireProgramCache()) {
+            if (skvm::Program* found = cache->find(key)) {
+                p = std::move(*found);
+            }
+            ReleaseProgramCache();
+        }
+        if (!p.empty()) {
+            return p;
+        }
+    }
+    // We don't really _need_ to rebuild fUniforms here.
+    // It's just more natural to have effects unconditionally emit them,
+    // and more natural to rebuild fUniforms than to emit them into a temporary buffer.
+    // fUniforms should reuse the exact same memory, so this is very cheap.
+    SkDEBUGCODE(size_t prev = fUniforms.buf.size();)
+    fUniforms.buf.resize(kBlitterUniformsCount);
+    skvm::Builder builder;
+    BuildProgram(&builder, fParams.withCoverage(coverage), &fUniforms, &fAlloc);
+    SkASSERTF(fUniforms.buf.size() == prev,
+              "%zu, prev was %zu", fUniforms.buf.size(), prev);
+
+    skvm::Program program = builder.done(DebugName(key).c_str());
+    if (false) {
+        static std::atomic<int> missed{0},
+                total{0};
+        if (!program.hasJIT()) {
+            SkDebugf("\ncouldn't JIT %s\n", DebugName(key).c_str());
+            builder.dump();
+            program.dump();
+
+            missed++;
+        }
+        if (0 == total++) {
+            atexit([]{ SkDebugf("SkVMBlitter compiled %d programs, %d without JIT.\n",
+                                total.load(), missed.load()); });
+        }
+    }
+    return program;
+}
+
+void SkVMBlitter::updateUniforms(int right, int y) {
+    BlitterUniforms uniforms{right, y};
+    memcpy(fUniforms.buf.data(), &uniforms, sizeof(BlitterUniforms));
+}
+
+const void* SkVMBlitter::isSprite(int x, int y) const {
+    if (fSprite.colorType() != kUnknown_SkColorType) {
+        return fSprite.addr(x - fSpriteOffset.x(),
+                            y - fSpriteOffset.y());
+    }
+    return nullptr;
+}
+
+void SkVMBlitter::blitH(int x, int y, int w) {
+    if (fBlitH.empty()) {
+        fBlitH = this->buildProgram(Coverage::Full);
+    }
+    this->updateUniforms(x+w, y);
+    if (const void* sprite = this->isSprite(x,y)) {
+        fBlitH.eval(w, fUniforms.buf.data(), fDevice.addr(x,y), sprite);
+    } else {
+        fBlitH.eval(w, fUniforms.buf.data(), fDevice.addr(x,y));
+    }
+}
+
+void SkVMBlitter::blitAntiH(int x, int y, const SkAlpha cov[], const int16_t runs[]) {
+    if (fBlitAntiH.empty()) {
+        fBlitAntiH = this->buildProgram(Coverage::UniformF);
+    }
+    for (int16_t run = *runs; run > 0; run = *runs) {
+        this->updateUniforms(x+run, y);
+        const float covF = *cov * (1/255.0f);
+        if (const void* sprite = this->isSprite(x,y)) {
+            fBlitAntiH.eval(run, fUniforms.buf.data(), fDevice.addr(x,y), sprite, &covF);
+        } else {
+            fBlitAntiH.eval(run, fUniforms.buf.data(), fDevice.addr(x,y), &covF);
+        }
+        x    += run;
+        runs += run;
+        cov  += run;
+    }
+}
+
+void SkVMBlitter::blitMask(const SkMask& mask, const SkIRect& clip) {
+    if (mask.fFormat == SkMask::kBW_Format) {
+        return SkBlitter::blitMask(mask, clip);
+    }
+
+    const skvm::Program* program = nullptr;
+    switch (mask.fFormat) {
+        default: SkUNREACHABLE;     // ARGB and SDF masks shouldn't make it here.
+
+        case SkMask::k3D_Format:
+            if (fBlitMask3D.empty()) {
+                fBlitMask3D = this->buildProgram(Coverage::Mask3D);
+            }
+            program = &fBlitMask3D;
+            break;
+
+        case SkMask::kA8_Format:
+            if (fBlitMaskA8.empty()) {
+                fBlitMaskA8 = this->buildProgram(Coverage::MaskA8);
+            }
+            program = &fBlitMaskA8;
+            break;
+
+        case SkMask::kLCD16_Format:
+            if (fBlitMaskLCD16.empty()) {
+                fBlitMaskLCD16 = this->buildProgram(Coverage::MaskLCD16);
+            }
+            program = &fBlitMaskLCD16;
+            break;
+    }
+
+    SkASSERT(program);
+    if (program) {
+        for (int y = clip.top(); y < clip.bottom(); y++) {
+             int x = clip.left(),
+                 w = clip.width();
+            void* dptr =        fDevice.writable_addr(x,y);
+            auto  mptr = (const uint8_t*)mask.getAddr(x,y);
+            this->updateUniforms(x+w,y);
+
+            if (program == &fBlitMask3D) {
+                size_t plane = mask.computeImageSize();
+                if (const void* sprite = this->isSprite(x,y)) {
+                    program->eval(w, fUniforms.buf.data(), dptr, sprite, mptr + 1*plane
+                                                                       , mptr + 2*plane
+                                                                       , mptr + 0*plane);
+                } else {
+                    program->eval(w, fUniforms.buf.data(), dptr, mptr + 1*plane
+                                                               , mptr + 2*plane
+                                                               , mptr + 0*plane);
+                }
+            } else {
+                if (const void* sprite = this->isSprite(x,y)) {
+                    program->eval(w, fUniforms.buf.data(), dptr, sprite, mptr);
+                } else {
+                    program->eval(w, fUniforms.buf.data(), dptr, mptr);
+                }
+            }
+        }
+    }
+}
+
+SkVMBlitter* SkVMBlitter::Make(const SkPixmap& device,
                                const SkPaint& paint,
                                const SkMatrixProvider& matrices,
                                SkArenaAlloc* alloc,
                                sk_sp<SkShader> clip) {
     bool ok = true;
-    auto blitter = alloc->make<Blitter>(device, paint, /*sprite=*/nullptr, SkIPoint{0,0},
-                                        matrices, std::move(clip), &ok);
+    SkVMBlitter* blitter = alloc->make<SkVMBlitter>(
+            device, paint, /*sprite=*/nullptr, SkIPoint{0,0}, matrices, std::move(clip), &ok);
     return ok ? blitter : nullptr;
 }
 
-SkBlitter* SkCreateSkVMSpriteBlitter(const SkPixmap& device,
-                                     const SkPaint& paint,
-                                     const SkPixmap& sprite,
-                                     int left, int top,
-                                     SkArenaAlloc* alloc,
-                                     sk_sp<SkShader> clip) {
+SkVMBlitter* SkVMBlitter::Make(const SkPixmap& device,
+                               const SkPaint& paint,
+                               const SkPixmap& sprite,
+                               int left, int top,
+                               SkArenaAlloc* alloc,
+                               sk_sp<SkShader> clip) {
     if (paint.getMaskFilter()) {
         // TODO: SkVM support for mask filters?  definitely possible!
         return nullptr;
     }
     bool ok = true;
-    auto blitter = alloc->make<Blitter>(device, paint, &sprite, SkIPoint{left,top},
-                                        SkSimpleMatrixProvider{SkMatrix{}}, std::move(clip), &ok);
+    auto blitter = alloc->make<SkVMBlitter>(
+            device, paint, &sprite, SkIPoint{left,top},
+            SkSimpleMatrixProvider{SkMatrix{}}, std::move(clip), &ok);
     return ok ? blitter : nullptr;
 }
diff --git a/src/core/SkVMBlitter.h b/src/core/SkVMBlitter.h
new file mode 100644
index 0000000..940ae77
--- /dev/null
+++ b/src/core/SkVMBlitter.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkVMBlitter_DEFINED
+#define SkVMBlitter_DEFINED
+
+#include "src/core/SkLRUCache.h"
+#include "src/core/SkVM.h"
+
+class SkVMBlitter final : public SkBlitter {
+public:
+    static SkVMBlitter* Make(const SkPixmap& dst,
+                             const SkPaint&,
+                             const SkMatrixProvider&,
+                             SkArenaAlloc*,
+                             sk_sp<SkShader> clipShader);
+
+    static SkVMBlitter* Make(const SkPixmap& dst,
+                             const SkPaint&,
+                             const SkPixmap& sprite,
+                             int left, int top,
+                             SkArenaAlloc*,
+                             sk_sp<SkShader> clipShader);
+
+    SkVMBlitter(const SkPixmap& device,
+                const SkPaint& paint,
+                const SkPixmap* sprite,
+                SkIPoint spriteOffset,
+                const SkMatrixProvider& matrices,
+                sk_sp<SkShader> clip,
+                bool* ok);
+
+    ~SkVMBlitter() override;
+
+private:
+    enum class Coverage { Full, UniformF, MaskA8, MaskLCD16, Mask3D };
+    struct Key {
+        uint64_t shader,
+                 clip,
+                 blender,
+                 colorSpace;
+        uint8_t  colorType,
+                 alphaType,
+                 coverage;
+        uint8_t  padding8{0};
+        uint32_t padding{0};
+        // Params::{paint,quality,matrices} are only passed to {shader,clip}->program(),
+        // not used here by the blitter itself.  No need to include them in the key;
+        // they'll be folded into the shader key if used.
+
+        bool operator==(const Key& that) const;
+        Key withCoverage(Coverage c) const;
+    };
+
+    struct Params {
+        sk_sp<SkShader>         shader;
+        sk_sp<SkShader>         clip;
+        sk_sp<SkBlender>        blender;    // never null
+        SkColorInfo             dst;
+        Coverage                coverage;
+        SkColor4f               paint;
+        const SkMatrixProvider& matrices;
+
+        Params withCoverage(Coverage c) const;
+    };
+
+    static Params EffectiveParams(const SkPixmap& device,
+                                  const SkPixmap* sprite,
+                                  SkPaint paint,
+                                  const SkMatrixProvider& matrices,
+                                  sk_sp<SkShader> clip);
+    static skvm::Color DstColor(skvm::Builder* p, const Params& params);
+    static void BuildProgram(skvm::Builder* p, const Params& params,
+                             skvm::Uniforms* uniforms, SkArenaAlloc* alloc);
+    static Key CacheKey(const Params& params,
+                        skvm::Uniforms* uniforms, SkArenaAlloc* alloc, bool* ok);
+    static SkLRUCache<Key, skvm::Program>* TryAcquireProgramCache();
+    static SkString DebugName(const Key& key);
+    static void ReleaseProgramCache();
+
+    skvm::Program buildProgram(Coverage coverage);
+    void updateUniforms(int right, int y);
+    const void* isSprite(int x, int y) const;
+
+    void blitH(int x, int y, int w) override;
+    void blitAntiH(int x, int y, const SkAlpha cov[], const int16_t runs[]) override;
+    void blitMask(const SkMask& mask, const SkIRect& clip) override;
+
+    SkPixmap        fDevice;
+    const SkPixmap  fSprite;                  // See isSprite().
+    const SkIPoint  fSpriteOffset;
+    skvm::Uniforms  fUniforms;                // Most data is copied directly into fUniforms,
+    SkArenaAlloc    fAlloc{2*sizeof(void*)};  // but a few effects need to ref large content.
+    const Params    fParams;
+    const Key       fKey;
+    skvm::Program   fBlitH,
+                    fBlitAntiH,
+                    fBlitMaskA8,
+                    fBlitMask3D,
+                    fBlitMaskLCD16;
+};
+#endif  // SkVMBlitter_DEFINED
