clamp after color pipeline when dst is normalized

Most code working with unorm values doesn't expect to see out-of-gamut
colors, but unorms _can_ store some out-of-gamut colors when
premultiplied and alpha is <1.

Clamping after the color (= shader + color filter) pipeline prevents us
from creating those values.  This CL does appear to fix the problem we
were seeing with xfermodes2 in the narrow config.

I've kept scalepixels_unpremul working by using our old trick,
remembering that unpremul->unpremul involves the same steps as
premul->opaque.  We'll take the unpremul pixels, pun them to premul to
avoid data loss, scale them up, and then avoid unpremul on the way out
by punning the surface as opaque.  It all kind of just barely hangs
together.

Bug: chromium:867813
Change-Id: I677d00ecfe4692ba7799ae963d8b8d1ccd4b00ac
Reviewed-on: https://skia-review.googlesource.com/152200
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
Auto-Submit: Mike Klein <mtklein@google.com>
Reviewed-on: https://skia-review.googlesource.com/c/164624
diff --git a/src/core/SkPixmap.cpp b/src/core/SkPixmap.cpp
index bfbb7a4..a799e5f 100644
--- a/src/core/SkPixmap.cpp
+++ b/src/core/SkPixmap.cpp
@@ -295,16 +295,18 @@
         return src.readPixels(dst);
     }
 
-    // If src and dst are both unpremul, we'll fake them out to appear as if premul.
+    // If src and dst are both unpremul, we'll fake the source out to appear as if premul,
+    // and mark the destination as opaque.  This odd combination allows us to scale unpremul
+    // pixels without ever premultiplying them (perhaps losing information in the color channels).
+    // This is an idiosyncratic feature of scalePixels(), and is tested by scalepixels_unpremul GM.
     bool clampAsIfUnpremul = false;
     if (src.alphaType() == kUnpremul_SkAlphaType &&
         dst.alphaType() == kUnpremul_SkAlphaType) {
         src.reset(src.info().makeAlphaType(kPremul_SkAlphaType), src.addr(), src.rowBytes());
-        dst.reset(dst.info().makeAlphaType(kPremul_SkAlphaType), dst.addr(), dst.rowBytes());
+        dst.reset(dst.info().makeAlphaType(kOpaque_SkAlphaType), dst.addr(), dst.rowBytes());
 
-        // In turn, we'll need to tell the image shader to clamp to [0,1] instead
-        // of the usual [0,a] when using a bicubic scaling (kHigh_SkFilterQuality)
-        // or a gamut transformation.
+        // We'll need to tell the image shader to clamp to [0,1] instead of the
+        // usual [0,a] when using a bicubic scaling (kHigh_SkFilterQuality).
         clampAsIfUnpremul = true;
     }
 
diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp
index 3e8399e..0ef37a8 100644
--- a/src/core/SkRasterPipelineBlitter.cpp
+++ b/src/core/SkRasterPipelineBlitter.cpp
@@ -50,8 +50,9 @@
     void blitV     (int x, int y, int height, SkAlpha alpha)        override;
 
 private:
-    void append_load_dst(SkRasterPipeline*) const;
-    void append_store   (SkRasterPipeline*) const;
+    void append_color_pipeline(SkRasterPipeline*) const;
+    void append_load_dst      (SkRasterPipeline*) const;
+    void append_store         (SkRasterPipeline*) const;
 
     // If we have an burst context, use it to fill our shader buffer.
     void burst_shade(int x, int y, int w);
@@ -239,6 +240,21 @@
     return blitter;
 }
 
+void SkRasterPipelineBlitter::append_color_pipeline(SkRasterPipeline* p) const {
+    p->extend(fColorPipeline);
+
+    // TODO: can we refine this condition further to avoid clamps when we're known in-gamut?
+    // When opaque we could _probably_ get away without a clamp, but for consistency we keep it.
+    if (fDst.info().colorType() != kRGBA_F16_SkColorType &&
+        fDst.info().colorType() != kRGBA_F32_SkColorType &&
+        fDst.info().alphaType() == kPremul_SkAlphaType)
+    {
+        // TODO: this will be common enough that we may want to fuse into ::clamp_premul.
+        p->append(SkRasterPipeline::clamp_0);
+        p->append(SkRasterPipeline::clamp_a);
+    }
+}
+
 void SkRasterPipelineBlitter::append_load_dst(SkRasterPipeline* p) const {
     const void* ctx = &fDstPtr;
     switch (fDst.info().colorType()) {
@@ -324,7 +340,7 @@
 
     if (!fBlitRect) {
         SkRasterPipeline p(fAlloc);
-        p.extend(fColorPipeline);
+        this->append_color_pipeline(&p);
         if (fBlend == SkBlendMode::kSrcOver
                 && (fDst.info().colorType() == kRGBA_8888_SkColorType ||
                     fDst.info().colorType() == kBGRA_8888_SkColorType)
@@ -360,7 +376,7 @@
 void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const int16_t runs[]) {
     if (!fBlitAntiH) {
         SkRasterPipeline p(fAlloc);
-        p.extend(fColorPipeline);
+        this->append_color_pipeline(&p);
         if (SkBlendMode_ShouldPreScaleCoverage(fBlend, /*rgb_coverage=*/false)) {
             p.append(SkRasterPipeline::scale_1_float, &fCurrentCoverage);
             this->append_load_dst(&p);
@@ -444,7 +460,7 @@
     // Lazily build whichever pipeline we need, specialized for each mask format.
     if (effectiveMaskFormat == SkMask::kA8_Format && !fBlitMaskA8) {
         SkRasterPipeline p(fAlloc);
-        p.extend(fColorPipeline);
+        this->append_color_pipeline(&p);
         if (SkBlendMode_ShouldPreScaleCoverage(fBlend, /*rgb_coverage=*/false)) {
             p.append(SkRasterPipeline::scale_u8, &fMaskPtr);
             this->append_load_dst(&p);
@@ -459,7 +475,7 @@
     }
     if (effectiveMaskFormat == SkMask::kLCD16_Format && !fBlitMaskLCD16) {
         SkRasterPipeline p(fAlloc);
-        p.extend(fColorPipeline);
+        this->append_color_pipeline(&p);
         if (SkBlendMode_ShouldPreScaleCoverage(fBlend, /*rgb_coverage=*/true)) {
             // Somewhat unusually, scale_565 needs dst loaded first.
             this->append_load_dst(&p);