might as well lerp_SWAR() if only for code size

Change-Id: I55093bfb97ea8ca8868b77d6f964157a3356be23
Reviewed-on: https://skia-review.googlesource.com/c/192854
Reviewed-by: Mike Klein <mtklein@google.com>
diff --git a/src/eskia.cc b/src/eskia.cc
index 94dd0be..cfebb2c 100644
--- a/src/eskia.cc
+++ b/src/eskia.cc
@@ -84,20 +84,47 @@
     static_assert(lerp(42, 47, 128) == 42, "");
     static_assert(lerp(42, 47,  64) == 44, "");
 
-    static constexpr RGBA_8888 lerp(RGBA_8888 src, RGBA_8888 dst, int coverage) {
+    static constexpr ALWAYS_INLINE
+    RGBA_8888 lerp_SWAR(RGBA_8888 src, RGBA_8888 dst, uint8_t c) {
+        // See notes in srcover_SWAR() below for the general approach.
+        // Here we're just doing lerp()'s math instead.
+
+        auto  s_rb = implicit_cast<uint32_t>( (src.r << 0) | (src.b << 16) ),
+              s_ga = implicit_cast<uint32_t>( (src.g << 8) | (src.a << 24) ),
+              d_rb = implicit_cast<uint32_t>( (dst.r << 0) | (dst.b << 16) ),
+              d_ga = implicit_cast<uint32_t>( (dst.g << 8) | (dst.a << 24) );
+
+        uint32_t rgba = (( ( (s_rb>>0)*c + (d_rb>>0)*(128-c) ) & 0x7f807f80) >> 7)
+                      | (( ( (s_ga>>8)*c + (d_ga>>8)*(128-c) ) & 0x7f807f80) << 1);
+
         return {
-            lerp(src.r, dst.r, coverage),
-            lerp(src.g, dst.g, coverage),
-            lerp(src.b, dst.b, coverage),
-            lerp(src.a, dst.a, coverage),
+            implicit_cast<uint8_t>((rgba >>  0) & 0xff),
+            implicit_cast<uint8_t>((rgba >>  8) & 0xff),
+            implicit_cast<uint8_t>((rgba >> 16) & 0xff),
+            implicit_cast<uint8_t>((rgba >> 24) & 0xff),
         };
     }
-    // TODO(mtklein): lerp_SWAR like srcover_SWAR
 
     // The srcover blend mode, s + d*(1-sa), for 8-bit unorms.
     static constexpr uint8_t srcover(int s, int sa, int d) {
         return implicit_cast<uint8_t>(s + approx_div255(d * (255 - sa)));
     }
+    static constexpr bool test_srcover() {
+        for (int d = 0; d <= 255; d++) {
+            // Transparent src -> dst.
+            if (srcover(0, 0, d) != d) {
+                return false;
+            }
+            // Opaque src -> src.
+            for (int s = 0; s <= 255; s++) {
+                if (srcover(s, 255, d) != s) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+    static_assert(test_srcover(), "");
 
     // srcover() applied to all four channels at once,
     // using a SIMD-within-a-register approach to cut the number of multiplies in half.
@@ -139,27 +166,9 @@
             && x.b == y.b
             && x.a == y.a;
     }
-
     static_assert(srcover_SWAR({127,0,0,127}, {255,255,255,255}, 128)
                   == RGBA_8888{255,128,128,255}, "");
 
-    static constexpr bool test_srcover() {
-        for (int d = 0; d <= 255; d++) {
-            // Transparent src -> dst.
-            if (srcover(0, 0, d) != d) {
-                return false;
-            }
-            // Opaque src -> src.
-            for (int s = 0; s <= 255; s++) {
-                if (srcover(s, 255, d) != s) {
-                    return false;
-                }
-            }
-        }
-        return true;
-    }
-    static_assert(test_srcover(), "");
-
     static constexpr RGBA_8888 to_RGBA_8888(RGBA_8888 c) { return c; }
     static constexpr RGBA_8888 to_RGBA_8888(BGRA_8888 c) { return {c.r,c.g,c.b,c.a}; }
     static constexpr RGBA_8888 to_RGBA_8888(RGB_888   c) { return {c.r,c.g,c.b,255}; }
@@ -198,35 +207,35 @@
 
     void* src_RGB_565(void* dst, const RGBA_8888* src, uint8_t coverage, int n) {
         return blend(dst,src,n, [=](RGBA_8888 s, RGB_565 d) {
-            from_RGBA_8888(lerp(s, to_RGBA_8888(d), coverage), &d);
+            from_RGBA_8888(lerp_SWAR(s, to_RGBA_8888(d), coverage), &d);
             return d;
         });
     }
 
     void* src_BGR_565(void* dst, const RGBA_8888* src, uint8_t coverage, int n) {
         return blend(dst,src,n, [=](RGBA_8888 s, BGR_565 d) {
-            from_RGBA_8888(lerp(s, to_RGBA_8888(d), coverage), &d);
+            from_RGBA_8888(lerp_SWAR(s, to_RGBA_8888(d), coverage), &d);
             return d;
         });
     }
 
     void* src_RGB_888(void* dst, const RGBA_8888* src, uint8_t coverage, int n) {
         return blend(dst,src,n, [=](RGBA_8888 s, RGB_888 d) {
-            from_RGBA_8888(lerp(s, to_RGBA_8888(d), coverage), &d);
+            from_RGBA_8888(lerp_SWAR(s, to_RGBA_8888(d), coverage), &d);
             return d;
         });
     }
 
     void* src_RGBA_8888(void* dst, const RGBA_8888* src, uint8_t coverage, int n) {
         return blend(dst,src,n, [=](RGBA_8888 s, RGBA_8888 d) {
-            from_RGBA_8888(lerp(s, to_RGBA_8888(d), coverage), &d);
+            from_RGBA_8888(lerp_SWAR(s, to_RGBA_8888(d), coverage), &d);
             return d;
         });
     }
 
     void* src_BGRA_8888(void* dst, const RGBA_8888* src, uint8_t coverage, int n) {
         return blend(dst,src,n, [=](RGBA_8888 s, BGRA_8888 d) {
-            from_RGBA_8888(lerp(s, to_RGBA_8888(d), coverage), &d);
+            from_RGBA_8888(lerp_SWAR(s, to_RGBA_8888(d), coverage), &d);
             return d;
         });
     }
diff --git a/tools/bench/myriad.local b/tools/bench/myriad.local
index b7aa20b..0e425c0 100644
--- a/tools/bench/myriad.local
+++ b/tools/bench/myriad.local
@@ -1,4 +1,4 @@
-    rotate	    116.5µs
+    rotate	    115.2µs
 transforms	     34.2µs
- two_rects	     58.7µs
-rand_rects	  18714.6µs
+ two_rects	     58.9µs
+rand_rects	  18779.1µs
diff --git a/tools/size/myriad.local b/tools/size/myriad.local
index 4dd58b4..699da8a 100644
--- a/tools/size/myriad.local
+++ b/tools/size/myriad.local
@@ -1,2 +1,2 @@
 __TEXT	__DATA	__OBJC	others	dec	hex
-6353	0	0	60491	66844	1051c	/Users/mtklein/eskia/out/dev/libeskia.a(eskia.cc.o)
+6196	0	0	60431	66627	10443	/Users/mtklein/eskia/out/dev/libeskia.a(eskia.cc.o)