|  | /* | 
|  | * Copyright 2016 Google Inc. | 
|  | * | 
|  | * Use of this source code is governed by a BSD-style license that can be | 
|  | * found in the LICENSE file. | 
|  | */ | 
|  |  | 
|  | /* | 
|  | ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; and ./out/Release/nanobench  --samples 300 --nompd --match LinearSrcOver -q | 
|  | */ | 
|  |  | 
|  | #ifndef SkBlend_opts_DEFINED | 
|  | #define SkBlend_opts_DEFINED | 
|  |  | 
|  | #include "SkNx.h" | 
|  | #include "SkPM4fPriv.h" | 
|  |  | 
|  | #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 | 
|  | #include <immintrin.h> | 
|  | #endif | 
|  |  | 
|  | namespace SK_OPTS_NS { | 
|  |  | 
|  | static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { | 
|  | if (src >= 0xFF000000) { | 
|  | *dst = src; | 
|  | return; | 
|  | } | 
|  | auto d = Sk4f_fromS32(*dst), | 
|  | s = Sk4f_fromS32( src); | 
|  | *dst = Sk4f_toS32(s + d * (1.0f - s[3])); | 
|  | } | 
|  |  | 
|  | static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { | 
|  | srcover_srgb_srgb_1(dst++, *src++); | 
|  | srcover_srgb_srgb_1(dst++, *src++); | 
|  | srcover_srgb_srgb_1(dst++, *src++); | 
|  | srcover_srgb_srgb_1(dst  , *src  ); | 
|  | } | 
|  |  | 
|  | #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 | 
|  |  | 
|  | static inline __m128i load(const uint32_t* p) { | 
|  | return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); | 
|  | } | 
|  |  | 
|  | static inline void store(uint32_t* p, __m128i v) { | 
|  | _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v); | 
|  | } | 
|  |  | 
|  | static void srcover_srgb_srgb( | 
|  | uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) { | 
|  | const __m128i alphaMask = _mm_set1_epi32(0xFF000000); | 
|  | while (ndst > 0) { | 
|  | int count = SkTMin(ndst, nsrc); | 
|  | ndst -= count; | 
|  | const uint32_t* src = srcStart; | 
|  | const uint32_t* end = dst + (count & ~3); | 
|  |  | 
|  | while (dst < end) { | 
|  | __m128i pixels = load(src); | 
|  |  | 
|  | if (_mm_testc_si128(pixels, alphaMask)) { | 
|  | store(dst, pixels); | 
|  | } else if (!_mm_testz_si128(pixels, alphaMask)) { | 
|  | srcover_srgb_srgb_4(dst, src); | 
|  | } | 
|  |  | 
|  | dst += 4; | 
|  | src += 4; | 
|  | } | 
|  |  | 
|  | count = count & 3; | 
|  | while (count-- > 0) { | 
|  | srcover_srgb_srgb_1(dst++, *src++); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | #else | 
|  |  | 
|  | static void srcover_srgb_srgb( | 
|  | uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { | 
|  | while (ndst > 0) { | 
|  | int n = SkTMin(ndst, nsrc); | 
|  |  | 
|  | for (int i = 0; i < n; i++) { | 
|  | srcover_srgb_srgb_1(dst++, src[i]); | 
|  | } | 
|  | ndst -= n; | 
|  | } | 
|  | } | 
|  |  | 
|  | #endif | 
|  |  | 
|  | }  // namespace SK_OPTS_NS | 
|  |  | 
|  | #endif//SkBlend_opts_DEFINED |