Make CPU nearest image sampling round down and remove matrix tweak

Manipulating the "total matrix" is incompatible with the
idea that effects might insert non-linear transformations of the
local coords.

Instead we modify the code that samples images to round down at
exact integers by subtracting one ULP before taking the floor. This makes drawing an image at 1:1 device to image pixels but with a half pixel skew continue to select each image pixel once. However -1:1 with
a half pixel skew will repeat one value and miss one value.

Put behind guard for now pending Chrome test rebaseline.

Bug: skia:13752
Change-Id: If86f6cf9821c0dd23bc7835d89a789d0d70e546a
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/585397
Commit-Queue: Brian Salomon <bsalomon@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
diff --git a/src/core/SkBitmapProcState.h b/src/core/SkBitmapProcState.h
index 39c9e41..85c3f42 100644
--- a/src/core/SkBitmapProcState.h
+++ b/src/core/SkBitmapProcState.h
@@ -160,17 +160,26 @@
                    SkIntToScalar(x) + SK_ScalarHalf,
                    SkIntToScalar(y) + SK_ScalarHalf, &pt);
 
-        SkFixed biasX, biasY;
+        SkFixed biasX = 0, biasY = 0;
         if (s.fBilerp) {
             biasX = s.fFilterOneX >> 1;
             biasY = s.fFilterOneY >> 1;
         } else {
+#if defined(SK_LEGACY_NEAREST_SAMPLE_MATRIX_TWEAK)
             // SkFixed epsilon bias to ensure inverse-mapped bitmap coordinates are rounded
             // consistently WRT geometry.  Note that we only need the bias for positive scales:
             // for negative scales, the rounding is intrinsically correct.
             // We scale it to persist SkFractionalInt -> SkFixed conversions.
             biasX = (s.fInvMatrix.getScaleX() > 0);
             biasY = (s.fInvMatrix.getScaleY() > 0);
+#else
+            // Our rasterizer biases upward. That is a rect from 0.5...1.5 fills pixel 1 and not
+            // pixel 0. To make an image that is mapped 1:1 with device pixels but at a half pixel
+            // offset select every pixel from the src image once we make exact integer pixel sample
+            // values round down not up. Note that a mirror mapping will not have this property.
+            biasX = 1;
+            biasY = 1;
+#endif
         }
 
         // punt to unsigned for defined underflow behavior
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index 655ce12..714aad3 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -144,8 +144,8 @@
     int         stride;
     float       width;
     float       height;
-
     float       weights[16];  // for bicubic and bicubic_clamp_8888
+    int         coordBiasInULPs = 0;
 };
 
 // State shared by save_xy, accumulate, and bilinear_* / bicubic_*.
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 9f0a1b49..01b244f 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -1427,9 +1427,8 @@
 // Used by gather_ stages to calculate the base pointer and a vector of indices to load.
 template <typename T>
 SI U32 ix_and_ptr(T** ptr, const SkRasterPipeline_GatherCtx* ctx, F x, F y) {
-    x = clamp(x, ctx->width);
-    y = clamp(y, ctx->height);
-
+    x = clamp(sk_bit_cast<F>(sk_bit_cast<U32>(x) + ctx->coordBiasInULPs), ctx->width );
+    y = clamp(sk_bit_cast<F>(sk_bit_cast<U32>(y) + ctx->coordBiasInULPs), ctx->height);
     *ptr = (const T*)ctx->pixels;
     return trunc_(y)*ctx->stride + trunc_(x);
 }
@@ -3567,8 +3566,8 @@
     const F w = sk_bit_cast<float>( sk_bit_cast<uint32_t>(ctx->width ) - 1),
             h = sk_bit_cast<float>( sk_bit_cast<uint32_t>(ctx->height) - 1);
 
-    x = min(max(0, x), w);
-    y = min(max(0, y), h);
+    x = min(max(0, sk_bit_cast<F>(sk_bit_cast<U32>(x) + ctx->coordBiasInULPs)), w);
+    y = min(max(0, sk_bit_cast<F>(sk_bit_cast<U32>(y) + ctx->coordBiasInULPs)), h);
 
     *ptr = (const T*)ctx->pixels;
     return trunc_(y)*ctx->stride + trunc_(x);
@@ -3576,6 +3575,7 @@
 
 template <typename T>
 SI U32 ix_and_ptr(T** ptr, const SkRasterPipeline_GatherCtx* ctx, I32 x, I32 y) {
+    SkASSERT(ctx->coordBiasInULPs == 0);
     // Exclusive -> inclusive.
     const I32 w =  ctx->width - 1,
               h = ctx->height - 1;
diff --git a/src/shaders/SkImageShader.cpp b/src/shaders/SkImageShader.cpp
index 3a6db42..7d53737 100755
--- a/src/shaders/SkImageShader.cpp
+++ b/src/shaders/SkImageShader.cpp
@@ -473,22 +473,24 @@
 }
 
 static SkMatrix tweak_inv_matrix(SkFilterMode filter, SkMatrix matrix) {
+#if defined(SK_LEGACY_NEAREST_SAMPLE_MATRIX_TWEAK)
     // See skia:4649 and the GM nearest_half_pixel_image.
     if (filter == SkFilterMode::kNearest) {
         if (matrix.getScaleX() >= 0) {
             matrix.setTranslateX(nextafterf(matrix.getTranslateX(),
-                                            floorf(matrix.getTranslateX())));
+                                           floorf(matrix.getTranslateX())));
         }
         if (matrix.getScaleY() >= 0) {
             matrix.setTranslateY(nextafterf(matrix.getTranslateY(),
                                             floorf(matrix.getTranslateY())));
         }
     }
+#endif
     return matrix;
 }
 
 bool SkImageShader::doStages(const SkStageRec& rec, TransformShader* updater) const {
-    SkASSERT(!needs_subset(fImage.get(), fSubset)); // TODO(skbug.com/12784)
+    SkASSERT(!needs_subset(fImage.get(), fSubset));  // TODO(skbug.com/12784)
     // We only support certain sampling options in stages so far
     auto sampling = fSampling;
     if (sampling.isAniso()) {
@@ -541,8 +543,18 @@
     auto gather = alloc->make<SkRasterPipeline_GatherCtx>();
     gather->pixels = pm.addr();
     gather->stride = pm.rowBytesAsPixels();
-    gather->width  = pm.width();
+    gather->width = pm.width();
     gather->height = pm.height();
+    // Our rasterizer biases upward. That is a rect from 0.5...1.5 fills pixel 1 and not pixel 0.
+    // To make an image that is mapped 1:1 with device pixels but at a half pixel offset select
+    // every pixel from the src image once we make exact integer pixel sample values round down not
+    // up. Note that a mirror mapping will not have this property.
+#if !defined(SK_LEGACY_NEAREST_SAMPLE_MATRIX_TWEAK)
+    if (!sampling.useCubic && sampling.filter == SkFilterMode::kNearest) {
+        gather->coordBiasInULPs = -1;
+    }
+#endif
+
     if (sampling.useCubic) {
         CubicResamplerMatrix(sampling.cubic.B, sampling.cubic.C).getColMajor(gather->weights);
     }
@@ -983,6 +995,14 @@
                         lerp(sample_texel(u, left,bottom), sample_texel(u, right,bottom), fx), fy);
         } else {
             SkASSERT(sampling.filter == SkFilterMode::kNearest);
+            // Our rasterizer biases upward. That is a rect from 0.5...1.5 fills pixel 1 and not
+            // pixel 0. To make an image that is mapped 1:1 with device pixels but at a half pixel
+            // offset select every pixel from the src image once we make exact integer pixel sample
+            // values round down not up. Note that a mirror mapping will not have this property.
+#if !defined(SK_LEGACY_NEAREST_SAMPLE_MATRIX_TWEAK)
+            local.x = skvm::pun_to_F32(skvm::pun_to_I32(local.x) - 1);
+            local.y = skvm::pun_to_F32(skvm::pun_to_I32(local.y) - 1);
+#endif
             return sample_texel(u, local.x,local.y);
         }
     };