Improve SkARGB32_A8_BlitMask_SSE2
With clang this:
- movzbl -3(%rbx), %edx
- pxor %xmm5, %xmm5
- pinsrw $0, %edx, %xmm5
- pinsrw $1, %edx, %xmm5
- movzbl -2(%rbx), %edx
- pinsrw $2, %edx, %xmm5
- pinsrw $3, %edx, %xmm5
- movzbl -1(%rbx), %edx
- pinsrw $4, %edx, %xmm5
- pinsrw $5, %edx, %xmm5
- movzbl (%rbx), %edx
- pinsrw $6, %edx, %xmm5
- pinsrw $7, %edx, %xmm5
becomes:
+ movd (%rbx), %xmm4
+ punpcklbw %xmm9, %xmm4
+ punpcklwd %xmm4, %xmm4
And clang already does better codegen than msvc 2013 on this.
BUG=skia:
Review URL: https://codereview.chromium.org/609823003
diff --git a/AUTHORS b/AUTHORS
index c8827a5..ed4c657 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -15,6 +15,7 @@
ARM <*@arm.com>
Ehsan Akhgari <ehsan.akhgari@gmail.com>
George Wright <george@mozilla.com>
+Jeff Muizelaar <jmuizelaar@mozilla.com>
Google Inc. <*@google.com>
Igalia <*@igalia.com>
Intel <*@intel.com>
diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp
index 391b24c..363cdab 100644
--- a/src/opts/SkBlitRow_opts_SSE2.cpp
+++ b/src/opts/SkBlitRow_opts_SSE2.cpp
@@ -441,11 +441,10 @@
__m128i dst_pixel = _mm_load_si128(d);
//set the aphla value
- __m128i src_scale_wide = _mm_set_epi8(0, *(mask+3),\
- 0, *(mask+3),0, \
- *(mask+2),0, *(mask+2),\
- 0,*(mask+1), 0,*(mask+1),\
- 0, *mask,0,*mask);
+ __m128i src_scale_wide = _mm_cvtsi32_si128(*reinterpret_cast<const uint32_t*>(mask));
+ src_scale_wide = _mm_unpacklo_epi8(src_scale_wide,
+ _mm_setzero_si128());
+ src_scale_wide = _mm_unpacklo_epi16(src_scale_wide, src_scale_wide);
//call SkAlpha255To256()
src_scale_wide = _mm_add_epi16(src_scale_wide, c_1);