replace Sk4px with SkVx in SkBlitter_ARGB32 This is a 1:1 transliteration, with the skvx code more explicit about what's going on, the old Sk4px code more abstracted and specificially designed to work with unorm8 types. It's not as terse, but I think I've got it equally or more clear? I haven't changed anything here, but I think it should be pretty clear how to generalize this up to say, 8 pixels at a time? Writing this CL has got me pretty strongly considering adding explicit bit-pun constructors to the skvx::Vec types, given how often I'm finding them to be useful, particularly between uint32 and 4x uint8. Change-Id: Ie62340e571258fc82176ae3d6605b1f96695f90e Reviewed-on: https://skia-review.googlesource.com/c/skia/+/207722 Commit-Queue: Mike Klein <mtklein@google.com> Reviewed-by: Michael Ludwig <michaelludwig@google.com> Auto-Submit: Mike Klein <mtklein@google.com>

commit: 3d6f8d0a46d57efda289d271647ef4d8b0f6f855 [log] [tgz]
author: Mike Klein <mtklein@google.com> Thu Apr 11 13:12:06 2019 -0500
committer: Skia Commit-Bot <skia-commit-bot@chromium.org> Fri Apr 12 17:00:46 2019 +0000
tree: 76fbb68b288fbd30aa765dcadd62512871137c11
parent: 0b8bb88cbb2258bff5205c67db3d678a197addb2 [diff]
diff --git a/src/core/SkBlitter_ARGB32.cpp b/src/core/SkBlitter_ARGB32.cpp
index abd63b7..c41579f 100644
--- a/src/core/SkBlitter_ARGB32.cpp
+++ b/src/core/SkBlitter_ARGB32.cpp

@@ -5,12 +5,12 @@
  * found in the LICENSE file.
  */
 
-#include "Sk4px.h"
 #include "SkColorData.h"
 #include "SkCoreBlitters.h"
 #include "SkShader.h"
 #include "SkUtils.h"
 #include "SkXfermodePriv.h"
+#include "SkVx.h"
 
 static inline int upscale_31_to_32(int value) {
     SkASSERT((unsigned)value <= 31);
@@ -1152,36 +1152,68 @@
     }
 }
 
-static void blend_row_A8(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) {
-    auto mask = (const uint8_t*)vmask;
+#ifndef SK_SUPPORT_LEGACY_A8_MASKBLITTER
+using U32  = skvx::Vec< 4, uint32_t>;
+using U8x4 = skvx::Vec<16, uint8_t>;
+using U8   = skvx::Vec< 4, uint8_t>;
+
+static void drive(SkPMColor* dst, const SkPMColor* src, const uint8_t* cov, int n,
+                  U8x4 (*kernel)(U8x4,U8x4,U8x4)) {
+
+    auto apply = [kernel](U32 dst, U32 src, U8 cov) -> U32 {
+        U8x4 cov_splat = skvx::shuffle<0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3>(cov);
+        return skvx::bit_pun<U32>(kernel(skvx::bit_pun<U8x4>(dst),
+                                         skvx::bit_pun<U8x4>(src),
+                                         cov_splat));
+    };
+    while (n >= 4) {
+        apply(U32::Load(dst), U32::Load(src), U8::Load(cov)).store(dst);
+        dst += 4;
+        src += 4;
+        cov += 4;
+        n   -= 4;
+    }
+    while (n --> 0) {
+        *dst = apply(U32{*dst}, U32{*src}, U8{*cov})[0];
+        dst++;
+        src++;
+        cov++;
+    }
+}
+#endif
+
+static void blend_row_A8(SkPMColor* dst, const void* mask, const SkPMColor* src, int n) {
+    auto cov = (const uint8_t*)mask;
 
 #ifdef SK_SUPPORT_LEGACY_A8_MASKBLITTER
     for (int i = 0; i < n; ++i) {
-        if (mask[i]) {
-            dst[i] = SkBlendARGB32(src[i], dst[i], mask[i]);
+        if (cov[i]) {
+            dst[i] = SkBlendARGB32(src[i], dst[i], cov[i]);
         }
     }
 #else
-    Sk4px::MapDstSrcAlpha(n, dst, src, mask, [](const Sk4px& d, const Sk4px& s, const Sk4px& aa) {
-        const auto s_aa = s.approxMulDiv255(aa);
-        return s_aa + d.approxMulDiv255(s_aa.alphas().inv());
+    drive(dst, src, cov, n, [](U8x4 d, U8x4 s, U8x4 c) {
+        U8x4 s_aa  = skvx::approx_scale(s, c),
+             alpha = skvx::shuffle<3,3,3,3, 7,7,7,7, 11,11,11,11, 15,15,15,15>(s_aa);
+        return s_aa + skvx::approx_scale(d, 255 - alpha);
     });
 #endif
 }
 
-static void blend_row_A8_opaque(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) {
-    auto mask = (const uint8_t*)vmask;
+static void blend_row_A8_opaque(SkPMColor* dst, const void* mask, const SkPMColor* src, int n) {
+    auto cov = (const uint8_t*)mask;
 
 #ifdef SK_SUPPORT_LEGACY_A8_MASKBLITTER
     for (int i = 0; i < n; ++i) {
-        if (int m = mask[i]) {
-            m += (m >> 7);
-            dst[i] = SkAlphaMulQ(src[i], m) + SkAlphaMulQ(dst[i], 256 - m);
+        if (int c = cov[i]) {
+            c += (c >> 7);
+            dst[i] = SkAlphaMulQ(src[i], c) + SkAlphaMulQ(dst[i], 256 - c);
         }
     }
 #else
-    Sk4px::MapDstSrcAlpha(n, dst, src, mask, [](const Sk4px& d, const Sk4px& s, const Sk4px& aa) {
-        return (s * aa + d * aa.inv()).div255();
+    drive(dst, src, cov, n, [](U8x4 d, U8x4 s, U8x4 c) {
+        return skvx::div255( skvx::cast<uint16_t>(s) * skvx::cast<uint16_t>(  c  )
+                           + skvx::cast<uint16_t>(d) * skvx::cast<uint16_t>(255-c));
     });
 #endif
 }
commit	3d6f8d0a46d57efda289d271647ef4d8b0f6f855	[log] [tgz]
author	Mike Klein <mtklein@google.com>	Thu Apr 11 13:12:06 2019 -0500
committer	Skia Commit-Bot <skia-commit-bot@chromium.org>	Fri Apr 12 17:00:46 2019 +0000
tree	76fbb68b288fbd30aa765dcadd62512871137c11
parent	0b8bb88cbb2258bff5205c67db3d678a197addb2 [diff]