replace Sk4px with SkVx in SkBlitter_ARGB32
This is a 1:1 transliteration, with the skvx code more explicit about
what's going on, the old Sk4px code more abstracted and specificially
designed to work with unorm8 types. It's not as terse, but I think I've
got it equally or more clear?
I haven't changed anything here, but I think it should be pretty clear
how to generalize this up to say, 8 pixels at a time?
Writing this CL has got me pretty strongly considering adding explicit
bit-pun constructors to the skvx::Vec types, given how often I'm finding
them to be useful, particularly between uint32 and 4x uint8.
Change-Id: Ie62340e571258fc82176ae3d6605b1f96695f90e
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/207722
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Auto-Submit: Mike Klein <mtklein@google.com>
diff --git a/src/core/SkBlitter_ARGB32.cpp b/src/core/SkBlitter_ARGB32.cpp
index abd63b7..c41579f 100644
--- a/src/core/SkBlitter_ARGB32.cpp
+++ b/src/core/SkBlitter_ARGB32.cpp
@@ -5,12 +5,12 @@
* found in the LICENSE file.
*/
-#include "Sk4px.h"
#include "SkColorData.h"
#include "SkCoreBlitters.h"
#include "SkShader.h"
#include "SkUtils.h"
#include "SkXfermodePriv.h"
+#include "SkVx.h"
static inline int upscale_31_to_32(int value) {
SkASSERT((unsigned)value <= 31);
@@ -1152,36 +1152,68 @@
}
}
-static void blend_row_A8(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) {
- auto mask = (const uint8_t*)vmask;
+#ifndef SK_SUPPORT_LEGACY_A8_MASKBLITTER
+using U32 = skvx::Vec< 4, uint32_t>;
+using U8x4 = skvx::Vec<16, uint8_t>;
+using U8 = skvx::Vec< 4, uint8_t>;
+
+static void drive(SkPMColor* dst, const SkPMColor* src, const uint8_t* cov, int n,
+ U8x4 (*kernel)(U8x4,U8x4,U8x4)) {
+
+ auto apply = [kernel](U32 dst, U32 src, U8 cov) -> U32 {
+ U8x4 cov_splat = skvx::shuffle<0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3>(cov);
+ return skvx::bit_pun<U32>(kernel(skvx::bit_pun<U8x4>(dst),
+ skvx::bit_pun<U8x4>(src),
+ cov_splat));
+ };
+ while (n >= 4) {
+ apply(U32::Load(dst), U32::Load(src), U8::Load(cov)).store(dst);
+ dst += 4;
+ src += 4;
+ cov += 4;
+ n -= 4;
+ }
+ while (n --> 0) {
+ *dst = apply(U32{*dst}, U32{*src}, U8{*cov})[0];
+ dst++;
+ src++;
+ cov++;
+ }
+}
+#endif
+
+static void blend_row_A8(SkPMColor* dst, const void* mask, const SkPMColor* src, int n) {
+ auto cov = (const uint8_t*)mask;
#ifdef SK_SUPPORT_LEGACY_A8_MASKBLITTER
for (int i = 0; i < n; ++i) {
- if (mask[i]) {
- dst[i] = SkBlendARGB32(src[i], dst[i], mask[i]);
+ if (cov[i]) {
+ dst[i] = SkBlendARGB32(src[i], dst[i], cov[i]);
}
}
#else
- Sk4px::MapDstSrcAlpha(n, dst, src, mask, [](const Sk4px& d, const Sk4px& s, const Sk4px& aa) {
- const auto s_aa = s.approxMulDiv255(aa);
- return s_aa + d.approxMulDiv255(s_aa.alphas().inv());
+ drive(dst, src, cov, n, [](U8x4 d, U8x4 s, U8x4 c) {
+ U8x4 s_aa = skvx::approx_scale(s, c),
+ alpha = skvx::shuffle<3,3,3,3, 7,7,7,7, 11,11,11,11, 15,15,15,15>(s_aa);
+ return s_aa + skvx::approx_scale(d, 255 - alpha);
});
#endif
}
-static void blend_row_A8_opaque(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) {
- auto mask = (const uint8_t*)vmask;
+static void blend_row_A8_opaque(SkPMColor* dst, const void* mask, const SkPMColor* src, int n) {
+ auto cov = (const uint8_t*)mask;
#ifdef SK_SUPPORT_LEGACY_A8_MASKBLITTER
for (int i = 0; i < n; ++i) {
- if (int m = mask[i]) {
- m += (m >> 7);
- dst[i] = SkAlphaMulQ(src[i], m) + SkAlphaMulQ(dst[i], 256 - m);
+ if (int c = cov[i]) {
+ c += (c >> 7);
+ dst[i] = SkAlphaMulQ(src[i], c) + SkAlphaMulQ(dst[i], 256 - c);
}
}
#else
- Sk4px::MapDstSrcAlpha(n, dst, src, mask, [](const Sk4px& d, const Sk4px& s, const Sk4px& aa) {
- return (s * aa + d * aa.inv()).div255();
+ drive(dst, src, cov, n, [](U8x4 d, U8x4 s, U8x4 c) {
+ return skvx::div255( skvx::cast<uint16_t>(s) * skvx::cast<uint16_t>( c )
+ + skvx::cast<uint16_t>(d) * skvx::cast<uint16_t>(255-c));
});
#endif
}