Support premul/unpremul of F16 during read/writePixels

Added PremulOutput and UnpremulOutput FP helpers. These are used
(rather than GrConfigConversionEffect) when working with FP16
textures (and will also be used for other configs that can't be
round-tripped via rounding).

BUG=skia:5853

Change-Id: I101592c26c4f0b379d5e5a8678ef7b2f08e6ad56
Reviewed-on: https://skia-review.googlesource.com/9980
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/include/gpu/GrColor.h b/include/gpu/GrColor.h
index 64368b9..ad66523 100644
--- a/include/gpu/GrColor.h
+++ b/include/gpu/GrColor.h
@@ -259,6 +259,15 @@
         float a = fRGBA[3];
         return GrColor4f(fRGBA[0] * a, fRGBA[1] * a, fRGBA[2] * a, a);
     }
+
+    GrColor4f unpremul() const {
+        float a = fRGBA[3];
+        if (a <= 0.0f) {
+            return GrColor4f(0.0f, 0.0f, 0.0f, 0.0f);
+        }
+        float invAlpha = 1.0f / a;
+        return GrColor4f(fRGBA[0] * invAlpha, fRGBA[1] * invAlpha, fRGBA[2] * invAlpha, a);
+    }
 };
 
 /**
diff --git a/include/gpu/GrContext.h b/include/gpu/GrContext.h
index 57500cd..678add7 100644
--- a/include/gpu/GrContext.h
+++ b/include/gpu/GrContext.h
@@ -432,9 +432,9 @@
     /** Called before either of the above two functions to determine the appropriate fragment
         processors for conversions. */
     void testPMConversionsIfNecessary(uint32_t flags);
-    /** Returns true if we've already determined that createPMtoUPMEffect and createUPMToPMEffect
-        will fail. In such cases fall back to SW conversion. */
-    bool didFailPMUPMConversionTest() const;
+    /** Returns true if we've determined that createPMtoUPMEffect and createUPMToPMEffect will
+        succeed for the passed in config. Otherwise we fall back to SW conversion. */
+    bool validPMUPMConversionExists(GrPixelConfig) const;
 
     /**
      * A callback similar to the above for use by the TextBlobCache
diff --git a/include/gpu/GrFragmentProcessor.h b/include/gpu/GrFragmentProcessor.h
index 55dc08f..faf17c1 100644
--- a/include/gpu/GrFragmentProcessor.h
+++ b/include/gpu/GrFragmentProcessor.h
@@ -56,6 +56,18 @@
     static sk_sp<GrFragmentProcessor> PremulInput(sk_sp<GrFragmentProcessor>);
 
     /**
+     *  Returns a fragment processor that calls the passed in fragment processor, and then premuls
+     *  the output.
+     */
+    static sk_sp<GrFragmentProcessor> PremulOutput(sk_sp<GrFragmentProcessor>);
+
+    /**
+     *  Returns a fragment processor that calls the passed in fragment processor, and then unpremuls
+     *  the output.
+     */
+    static sk_sp<GrFragmentProcessor> UnpremulOutput(sk_sp<GrFragmentProcessor>);
+
+    /**
      * Returns a fragment processor that runs the passed in array of fragment processors in a
      * series. The original input is passed to the first, the first's output is passed to the
      * second, etc. The output of the returned processor is the output of the last processor of the
diff --git a/src/gpu/GrContext.cpp b/src/gpu/GrContext.cpp
index bcb93b3..671376f 100644
--- a/src/gpu/GrContext.cpp
+++ b/src/gpu/GrContext.cpp
@@ -266,7 +266,7 @@
 
     bool applyPremulToSrc = false;
     if (kUnpremul_PixelOpsFlag & pixelOpsFlags) {
-        if (!GrPixelConfigIs8888Unorm(srcConfig)) {
+        if (!GrPixelConfigIs8888Unorm(srcConfig) && kRGBA_half_GrPixelConfig != srcConfig) {
             return false;
         }
         applyPremulToSrc = true;
@@ -279,7 +279,7 @@
     GrGpu::DrawPreference drawPreference = GrGpu::kNoDraw_DrawPreference;
     // Don't prefer to draw for the conversion (and thereby access a texture from the cache) when
     // we've already determined that there isn't a roundtrip preserving conversion processor pair.
-    if (applyPremulToSrc && !this->didFailPMUPMConversionTest()) {
+    if (applyPremulToSrc && this->validPMUPMConversionExists(srcConfig)) {
         drawPreference = GrGpu::kCallerPrefersDraw_DrawPreference;
     }
 
@@ -422,8 +422,8 @@
     }
 
     bool unpremul = SkToBool(kUnpremul_PixelOpsFlag & flags);
-    if (unpremul && !GrPixelConfigIs8888Unorm(dstConfig)) {
-        // The unpremul flag is only allowed for 8888 configs.
+    if (unpremul && !GrPixelConfigIs8888Unorm(dstConfig) && kRGBA_half_GrPixelConfig != dstConfig) {
+        // The unpremul flag is only allowed for 8888 and F16 configs.
         return false;
     }
     // We don't allow conversion between integer configs and float/fixed configs.
@@ -434,7 +434,7 @@
     GrGpu::DrawPreference drawPreference = GrGpu::kNoDraw_DrawPreference;
     // Don't prefer to draw for the conversion (and thereby access a texture from the cache) when
     // we've already determined that there isn't a roundtrip preserving conversion processor pair.
-    if (unpremul && !this->didFailPMUPMConversionTest()) {
+    if (unpremul && this->validPMUPMConversionExists(src->config())) {
         drawPreference = GrGpu::kCallerPrefersDraw_DrawPreference;
     }
 
@@ -865,12 +865,18 @@
     ASSERT_SINGLE_OWNER
     // We should have already called this->testPMConversionsIfNecessary().
     SkASSERT(fDidTestPMConversions);
-    GrConfigConversionEffect::PMConversion pmToUPM =
-        static_cast<GrConfigConversionEffect::PMConversion>(fPMToUPMConversion);
-    if (GrConfigConversionEffect::kNone_PMConversion != pmToUPM) {
-        return GrConfigConversionEffect::Make(texture, swizzle, pmToUPM, matrix);
+    if (kRGBA_half_GrPixelConfig == texture->config()) {
+        SkASSERT(swizzle == GrSwizzle::RGBA());
+        return GrFragmentProcessor::UnpremulOutput(
+                GrSimpleTextureEffect::Make(texture, nullptr, matrix));
     } else {
-        return nullptr;
+        GrConfigConversionEffect::PMConversion pmToUPM =
+            static_cast<GrConfigConversionEffect::PMConversion>(fPMToUPMConversion);
+        if (GrConfigConversionEffect::kNone_PMConversion != pmToUPM) {
+            return GrConfigConversionEffect::Make(texture, swizzle, pmToUPM, matrix);
+        } else {
+            return nullptr;
+        }
     }
 }
 
@@ -880,13 +886,20 @@
     ASSERT_SINGLE_OWNER
     // We should have already called this->testPMConversionsIfNecessary().
     SkASSERT(fDidTestPMConversions);
-    GrConfigConversionEffect::PMConversion pmToUPM =
-        static_cast<GrConfigConversionEffect::PMConversion>(fPMToUPMConversion);
-    if (GrConfigConversionEffect::kNone_PMConversion != pmToUPM) {
-        return GrConfigConversionEffect::Make(this->resourceProvider(),
-                                              proxy, swizzle, pmToUPM, matrix);
+    if (kRGBA_half_GrPixelConfig == proxy->config()) {
+        SkASSERT(swizzle == GrSwizzle::RGBA());
+        return GrFragmentProcessor::UnpremulOutput(
+                GrSimpleTextureEffect::Make(this->resourceProvider(), std::move(proxy),
+                                            nullptr, matrix));
     } else {
-        return nullptr;
+        GrConfigConversionEffect::PMConversion pmToUPM =
+            static_cast<GrConfigConversionEffect::PMConversion>(fPMToUPMConversion);
+        if (GrConfigConversionEffect::kNone_PMConversion != pmToUPM) {
+            return GrConfigConversionEffect::Make(this->resourceProvider(), std::move(proxy),
+                                                  swizzle, pmToUPM, matrix);
+        } else {
+            return nullptr;
+        }
     }
 }
 
@@ -896,22 +909,31 @@
     ASSERT_SINGLE_OWNER
     // We should have already called this->testPMConversionsIfNecessary().
     SkASSERT(fDidTestPMConversions);
-    GrConfigConversionEffect::PMConversion upmToPM =
-        static_cast<GrConfigConversionEffect::PMConversion>(fUPMToPMConversion);
-    if (GrConfigConversionEffect::kNone_PMConversion != upmToPM) {
-        return GrConfigConversionEffect::Make(this->resourceProvider(),
-                                              std::move(proxy), swizzle, upmToPM, matrix);
+    if (kRGBA_half_GrPixelConfig == proxy->config()) {
+        SkASSERT(swizzle == GrSwizzle::RGBA());
+        return GrFragmentProcessor::PremulOutput(
+                GrSimpleTextureEffect::Make(this->resourceProvider(), std::move(proxy),
+                                            nullptr, matrix));
     } else {
-        return nullptr;
+        GrConfigConversionEffect::PMConversion upmToPM =
+            static_cast<GrConfigConversionEffect::PMConversion>(fUPMToPMConversion);
+        if (GrConfigConversionEffect::kNone_PMConversion != upmToPM) {
+            return GrConfigConversionEffect::Make(this->resourceProvider(), std::move(proxy),
+                                                  swizzle, upmToPM, matrix);
+        } else {
+            return nullptr;
+        }
     }
 }
 
-bool GrContext::didFailPMUPMConversionTest() const {
+bool GrContext::validPMUPMConversionExists(GrPixelConfig config) const {
     ASSERT_SINGLE_OWNER
     // We should have already called this->testPMConversionsIfNecessary().
     SkASSERT(fDidTestPMConversions);
     // The PM<->UPM tests fail or succeed together so we only need to check one.
-    return GrConfigConversionEffect::kNone_PMConversion == fPMToUPMConversion;
+    // For F16, we always allow PM/UPM conversion on the GPU, even if it doesn't round-trip.
+    return GrConfigConversionEffect::kNone_PMConversion != fPMToUPMConversion ||
+           kRGBA_half_GrPixelConfig == config;
 }
 
 //////////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/GrFragmentProcessor.cpp b/src/gpu/GrFragmentProcessor.cpp
index 30b3061..27d0bcd 100644
--- a/src/gpu/GrFragmentProcessor.cpp
+++ b/src/gpu/GrFragmentProcessor.cpp
@@ -106,42 +106,84 @@
     return GrXfermodeFragmentProcessor::MakeFromDstProcessor(std::move(fp), SkBlendMode::kDstIn);
 }
 
+namespace {
+
+class PremulInputFragmentProcessor : public GrFragmentProcessor {
+public:
+    PremulInputFragmentProcessor()
+            : INHERITED(kPreservesOpaqueInput_OptimizationFlag |
+                        kConstantOutputForConstantInput_OptimizationFlag) {
+        this->initClassID<PremulInputFragmentProcessor>();
+    }
+
+    const char* name() const override { return "PremultiplyInput"; }
+
+private:
+    GrGLSLFragmentProcessor* onCreateGLSLInstance() const override {
+        class GLFP : public GrGLSLFragmentProcessor {
+        public:
+            void emitCode(EmitArgs& args) override {
+                GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
+
+                fragBuilder->codeAppendf("%s = %s;", args.fOutputColor, args.fInputColor);
+                fragBuilder->codeAppendf("%s.rgb *= %s.a;",
+                                            args.fOutputColor, args.fInputColor);
+            }
+        };
+        return new GLFP;
+    }
+
+    void onGetGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override {}
+
+    bool onIsEqual(const GrFragmentProcessor&) const override { return true; }
+
+    GrColor4f constantOutputForConstantInput(GrColor4f input) const override {
+        return input.premul();
+    }
+
+    typedef GrFragmentProcessor INHERITED;
+};
+
+class UnpremulInputFragmentProcessor : public GrFragmentProcessor {
+public:
+    UnpremulInputFragmentProcessor()
+            : INHERITED(kPreservesOpaqueInput_OptimizationFlag |
+                        kConstantOutputForConstantInput_OptimizationFlag) {
+        this->initClassID<UnpremulInputFragmentProcessor>();
+    }
+
+    const char* name() const override { return "UnpremultiplyInput"; }
+
+private:
+    GrGLSLFragmentProcessor* onCreateGLSLInstance() const override {
+        class GLFP : public GrGLSLFragmentProcessor {
+        public:
+            void emitCode(EmitArgs& args) override {
+                GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
+
+                fragBuilder->codeAppendf("%s = %s;", args.fOutputColor, args.fInputColor);
+                fragBuilder->codeAppendf("float invAlpha = %s.a <= 0.0 ? 0.0 : 1.0 / %s.a;",
+                                         args.fInputColor, args.fInputColor);
+                fragBuilder->codeAppendf("%s.rgb *= invAlpha;", args.fOutputColor);
+            }
+        };
+        return new GLFP;
+    }
+
+    void onGetGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override {}
+
+    bool onIsEqual(const GrFragmentProcessor&) const override { return true; }
+
+    GrColor4f constantOutputForConstantInput(GrColor4f input) const override {
+        return input.unpremul();
+    }
+
+    typedef GrFragmentProcessor INHERITED;
+};
+
+}
+
 sk_sp<GrFragmentProcessor> GrFragmentProcessor::PremulInput(sk_sp<GrFragmentProcessor> fp) {
-
-    class PremulInputFragmentProcessor : public GrFragmentProcessor {
-    public:
-        PremulInputFragmentProcessor()
-                : INHERITED(kPreservesOpaqueInput_OptimizationFlag |
-                            kConstantOutputForConstantInput_OptimizationFlag) {
-            this->initClassID<PremulInputFragmentProcessor>();
-        }
-
-        const char* name() const override { return "PremultiplyInput"; }
-    private:
-        GrGLSLFragmentProcessor* onCreateGLSLInstance() const override {
-            class GLFP : public GrGLSLFragmentProcessor {
-            public:
-                void emitCode(EmitArgs& args) override {
-                    GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
-
-                    fragBuilder->codeAppendf("%s = %s;", args.fOutputColor, args.fInputColor);
-                    fragBuilder->codeAppendf("%s.rgb *= %s.a;",
-                                             args.fOutputColor, args.fInputColor);
-                }
-            };
-            return new GLFP;
-        }
-
-        void onGetGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override {}
-
-        bool onIsEqual(const GrFragmentProcessor&) const override { return true; }
-
-        GrColor4f constantOutputForConstantInput(GrColor4f input) const override {
-            return input.premul();
-        }
-
-        typedef GrFragmentProcessor INHERITED;
-    };
     if (!fp) {
         return nullptr;
     }
@@ -149,6 +191,22 @@
     return GrFragmentProcessor::RunInSeries(fpPipeline, 2);
 }
 
+sk_sp<GrFragmentProcessor> GrFragmentProcessor::PremulOutput(sk_sp<GrFragmentProcessor> fp) {
+    if (!fp) {
+        return nullptr;
+    }
+    sk_sp<GrFragmentProcessor> fpPipeline[] = { fp, sk_make_sp<PremulInputFragmentProcessor>() };
+    return GrFragmentProcessor::RunInSeries(fpPipeline, 2);
+}
+
+sk_sp<GrFragmentProcessor> GrFragmentProcessor::UnpremulOutput(sk_sp<GrFragmentProcessor> fp) {
+    if (!fp) {
+        return nullptr;
+    }
+    sk_sp<GrFragmentProcessor> fpPipeline[] = { fp, sk_make_sp<UnpremulInputFragmentProcessor>() };
+    return GrFragmentProcessor::RunInSeries(fpPipeline, 2);
+}
+
 sk_sp<GrFragmentProcessor> GrFragmentProcessor::MakeInputPremulAndMulByOutput(
         sk_sp<GrFragmentProcessor> fp) {