Take the dest color as an input to SkBlenderBase::asFragmentProcessor.

Previously, the dest color was implicitly the surface that we were
drawing onto. When setting the blender on an SkPaint, this is still the
case. For other uses, e.g. a SkBlendImageFilter, the caller may want to
supply an arbitrary input FP.

Bug: skia:12085, skia:12205
Change-Id: I254be006a0f374711fa64f49e0ba339578d8d83a
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/430059
Commit-Queue: John Stiles <johnstiles@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
diff --git a/src/core/SkBlendModeBlender.cpp b/src/core/SkBlendModeBlender.cpp
index 19dcc78..5b261e3 100644
--- a/src/core/SkBlendModeBlender.cpp
+++ b/src/core/SkBlendModeBlender.cpp
@@ -69,11 +69,16 @@
 
 #if SK_SUPPORT_GPU
 std::unique_ptr<GrFragmentProcessor> SkBlendModeBlender::asFragmentProcessor(
-        std::unique_ptr<GrFragmentProcessor> inputFP, const GrFPArgs& fpArgs) const {
-    // Using a SkBlendModeBlender on the GPU side isn't supported; we should use GrXferProcessor to
-    // perform this blend instead. The Xfer processor is able to perform coefficient-based blends
-    // without readback, so it's more efficient.
-    SkDEBUGFAIL("SkBlendModeBlender::asFragmentProcessor is not supported");
+        std::unique_ptr<GrFragmentProcessor> srcFP,
+        std::unique_ptr<GrFragmentProcessor> dstFP,
+        const GrFPArgs& fpArgs) const {
+    // Note that for the final blend onto the canvas, we should prefer to use the GrXferProcessor
+    // instead of a SkBlendModeBlender to perform the blend. The Xfer processor is able to perform
+    // coefficient-based blends directly, without readback. This will be much more efficient.
+
+    // TODO(skia:12085, skia:12205): We will be able to use Blenders in other contexts, such as
+    // in a SkBlendImageFilter; in that case we will need to supply a blend function.
+    SkDEBUGFAIL("SkBlendModeBlender::asFragmentProcessor is not yet implemented");
     return nullptr;
 }
 #endif
diff --git a/src/core/SkBlendModeBlender.h b/src/core/SkBlendModeBlender.h
index 39d8218..a518618 100644
--- a/src/core/SkBlendModeBlender.h
+++ b/src/core/SkBlendModeBlender.h
@@ -18,7 +18,9 @@
 
 #if SK_SUPPORT_GPU
     std::unique_ptr<GrFragmentProcessor> asFragmentProcessor(
-            std::unique_ptr<GrFragmentProcessor> inputFP, const GrFPArgs& fpArgs) const override;
+            std::unique_ptr<GrFragmentProcessor> srcFP,
+            std::unique_ptr<GrFragmentProcessor> dstFP,
+            const GrFPArgs& fpArgs) const override;
 #endif
 
     SK_FLATTENABLE_HOOKS(SkBlendModeBlender)
diff --git a/src/core/SkBlenderBase.h b/src/core/SkBlenderBase.h
index 431b26c..bc55139 100644
--- a/src/core/SkBlenderBase.h
+++ b/src/core/SkBlenderBase.h
@@ -42,10 +42,12 @@
 #if SK_SUPPORT_GPU
     /**
      * Returns a GrFragmentProcessor that implements this blend for the GPU backend.
-     * The GrFragmentProcessor expects a premultiplied input and returns a premultiplied output.
+     * The GrFragmentProcessor expects premultiplied inputs and returns a premultiplied output.
      */
     virtual std::unique_ptr<GrFragmentProcessor> asFragmentProcessor(
-            std::unique_ptr<GrFragmentProcessor> inputFP, const GrFPArgs& fpArgs) const = 0;
+            std::unique_ptr<GrFragmentProcessor> srcFP,
+            std::unique_ptr<GrFragmentProcessor> dstFP,
+            const GrFPArgs& fpArgs) const = 0;
 #endif
 
     virtual SkRuntimeEffect* asRuntimeEffect() const { return nullptr; }
diff --git a/src/core/SkRuntimeEffect.cpp b/src/core/SkRuntimeEffect.cpp
index 7e35ba7..03c7a63 100644
--- a/src/core/SkRuntimeEffect.cpp
+++ b/src/core/SkRuntimeEffect.cpp
@@ -694,8 +694,12 @@
             childFPs.push_back(nullptr);
         }
     }
-    auto fp = GrSkSLFP::MakeWithData(
-            std::move(effect), name, std::move(inputFP), std::move(uniforms), SkMakeSpan(childFPs));
+    auto fp = GrSkSLFP::MakeWithData(std::move(effect),
+                                     name,
+                                     std::move(inputFP),
+                                     /*destColorFP=*/nullptr,
+                                     std::move(uniforms),
+                                     SkMakeSpan(childFPs));
     SkASSERT(fp);
     return GrFPSuccess(std::move(fp));
 }
@@ -1066,13 +1070,19 @@
 
 #if SK_SUPPORT_GPU
     std::unique_ptr<GrFragmentProcessor> asFragmentProcessor(
-            std::unique_ptr<GrFragmentProcessor> inputFP, const GrFPArgs& args) const override {
+            std::unique_ptr<GrFragmentProcessor> srcFP,
+            std::unique_ptr<GrFragmentProcessor> dstFP,
+            const GrFPArgs& args) const override {
         sk_sp<SkData> uniforms = get_xformed_uniforms(fEffect.get(), fUniforms,
                                                       args.fDstColorInfo->colorSpace());
         SkASSERT(uniforms);
 
-        return GrSkSLFP::MakeWithData(fEffect, "runtime_blender", std::move(inputFP),
-                                      std::move(uniforms), /*childFPs=*/{});
+        return GrSkSLFP::MakeWithData(fEffect,
+                                      "runtime_blender",
+                                      std::move(srcFP),
+                                      std::move(dstFP),
+                                      std::move(uniforms),
+                                      /*childFPs=*/{});
     }
 #endif
 
@@ -1185,6 +1195,7 @@
         auto fp = GrSkSLFP::MakeWithData(sk_ref_sp(this),
                                          "runtime_image",
                                          /*inputFP=*/nullptr,
+                                         /*destColorFP=*/nullptr,
                                          std::move(uniforms),
                                          SkMakeSpan(childFPs));
 
diff --git a/src/gpu/SkGr.cpp b/src/gpu/SkGr.cpp
index e80361d..72b7d10 100644
--- a/src/gpu/SkGr.cpp
+++ b/src/gpu/SkGr.cpp
@@ -512,9 +512,12 @@
             grPaint->setXPFactory(SkBlendMode_AsXPFactory(bm.value()));
         }
     } else {
-        // Apply the custom blend, and force the XP to kSrc. We don't honor the SkBlendMode when a
-        // custom blend is applied.
-        paintFP = as_BB(skPaint.getBlender())->asFragmentProcessor(std::move(paintFP), fpArgs);
+        // Apply a custom blend against the destination color, and force the XP to kSrc so that the
+        // computed result is applied directly to the canvas while still honoring the alpha.
+        paintFP = as_BB(skPaint.getBlender())->asFragmentProcessor(
+                std::move(paintFP),
+                GrFragmentProcessor::DestColor(),
+                fpArgs);
         grPaint->setXPFactory(SkBlendMode_AsXPFactory(SkBlendMode::kSrc));
     }
 
diff --git a/src/gpu/effects/GrSkSLFP.cpp b/src/gpu/effects/GrSkSLFP.cpp
index 1bd7a3d..0166ce9 100644
--- a/src/gpu/effects/GrSkSLFP.cpp
+++ b/src/gpu/effects/GrSkSLFP.cpp
@@ -194,14 +194,15 @@
             args.fFragBuilder->codeAppendf("float2 %s = %s;\n", coords, args.fSampleCoord);
         }
 
-        // For blend effects, we need to copy the dest-color to a local variable as well.
+        // For runtime blends, the destination color is stored as a child FP.
+        // Invoke that FP here and store it in a local variable.
         const char* destColor = "half4(1)";
         SkString destColorVarName;
-        if (fp.willReadDstColor()) {
+        if (fp.fDestColorChildIndex >= 0) {
             destColorVarName = args.fFragBuilder->newTmpVarName("destColor");
             destColor = destColorVarName.c_str();
-            args.fFragBuilder->codeAppendf(
-                    "half4 %s = %s;\n", destColor, args.fFragBuilder->dstColor());
+            SkString childFP = this->invokeChild(fp.fDestColorChildIndex, args);
+            args.fFragBuilder->codeAppendf("half4 %s = %s;", destColor, childFP.c_str());
         }
 
         FPCallbacks callbacks(this,
@@ -257,6 +258,7 @@
         sk_sp<SkRuntimeEffect> effect,
         const char* name,
         std::unique_ptr<GrFragmentProcessor> inputFP,
+        std::unique_ptr<GrFragmentProcessor> destColorFP,
         sk_sp<SkData> uniforms,
         SkSpan<std::unique_ptr<GrFragmentProcessor>> childFPs) {
     if (uniforms->size() != effect->uniformSize()) {
@@ -273,6 +275,9 @@
     if (inputFP) {
         fp->setInput(std::move(inputFP));
     }
+    if (destColorFP) {
+        fp->setDestColorFP(std::move(destColorFP));
+    }
     return fp;
 }
 
@@ -289,10 +294,6 @@
     if (fEffect->usesSampleCoords()) {
         this->setUsesSampleCoordsDirectly();
     }
-
-    if (fEffect->allowBlender()) {
-        this->setWillReadDstColor();
-    }
 }
 
 GrSkSLFP::GrSkSLFP(const GrSkSLFP& other)
@@ -310,15 +311,12 @@
         this->setUsesSampleCoordsDirectly();
     }
 
-    if (fEffect->allowBlender()) {
-        this->setWillReadDstColor();
-    }
-
     this->cloneAndRegisterAllChildProcessors(other);
 }
 
 void GrSkSLFP::addChild(std::unique_ptr<GrFragmentProcessor> child, bool mergeOptFlags) {
     SkASSERTF(fInputChildIndex == -1, "all addChild calls must happen before setInput");
+    SkASSERTF(fDestColorChildIndex == -1, "all addChild calls must happen before setDestColorFP");
     int childIndex = this->numChildProcessors();
     SkASSERT((size_t)childIndex < fEffect->fSampleUsages.size());
     if (mergeOptFlags) {
@@ -330,11 +328,20 @@
 void GrSkSLFP::setInput(std::unique_ptr<GrFragmentProcessor> input) {
     SkASSERTF(fInputChildIndex == -1, "setInput should not be called more than once");
     fInputChildIndex = this->numChildProcessors();
-    SkASSERT((size_t)fInputChildIndex == fEffect->fSampleUsages.size());
+    SkASSERT((size_t)fInputChildIndex >= fEffect->fSampleUsages.size());
     this->mergeOptimizationFlags(ProcessorOptimizationFlags(input.get()));
     this->registerChild(std::move(input), SkSL::SampleUsage::PassThrough());
 }
 
+void GrSkSLFP::setDestColorFP(std::unique_ptr<GrFragmentProcessor> destColorFP) {
+    SkASSERTF(fEffect->allowBlender(), "dest colors are only used by blend effects");
+    SkASSERTF(fDestColorChildIndex == -1, "setDestColorFP should not be called more than once");
+    fDestColorChildIndex = this->numChildProcessors();
+    SkASSERT((size_t)fDestColorChildIndex >= fEffect->fSampleUsages.size());
+    this->mergeOptimizationFlags(ProcessorOptimizationFlags(destColorFP.get()));
+    this->registerChild(std::move(destColorFP), SkSL::SampleUsage::PassThrough());
+}
+
 std::unique_ptr<GrGLSLFragmentProcessor> GrSkSLFP::onMakeProgramImpl() const {
     return std::make_unique<GrGLSLSkSLFP>();
 }
diff --git a/src/gpu/effects/GrSkSLFP.h b/src/gpu/effects/GrSkSLFP.h
index 5448884..b7affa6 100644
--- a/src/gpu/effects/GrSkSLFP.h
+++ b/src/gpu/effects/GrSkSLFP.h
@@ -100,6 +100,7 @@
             sk_sp<SkRuntimeEffect> effect,
             const char* name,
             std::unique_ptr<GrFragmentProcessor> inputFP,
+            std::unique_ptr<GrFragmentProcessor> destColorFP,
             sk_sp<SkData> uniforms,
             SkSpan<std::unique_ptr<GrFragmentProcessor>> childFPs);
 
@@ -164,6 +165,7 @@
 
     void addChild(std::unique_ptr<GrFragmentProcessor> child, bool mergeOptFlags);
     void setInput(std::unique_ptr<GrFragmentProcessor> input);
+    void setDestColorFP(std::unique_ptr<GrFragmentProcessor> destColorFP);
 
     std::unique_ptr<GrGLSLFragmentProcessor> onMakeProgramImpl() const override;
 
@@ -365,6 +367,7 @@
     const char*            fName;
     uint32_t               fUniformSize;
     int                    fInputChildIndex = -1;
+    int                    fDestColorChildIndex = -1;
 
     GR_DECLARE_FRAGMENT_PROCESSOR_TEST