blob: 44fe643276cb5139fca3ebfc8af14cbf03f369a3 [file] [log] [blame]
/*
* Copyright 2015 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkUtils_opts_DEFINED
#define SkUtils_opts_DEFINED
namespace SK_OPTS_NS {
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
static void memset16(uint16_t* dst, uint16_t val, int n) {
auto dst8 = (__m128i*)dst;
auto val8 = _mm_set1_epi16(val);
for ( ; n >= 8; n -= 8) {
_mm_storeu_si128(dst8++, val8);
}
dst = (uint16_t*)dst8;
if (n & 4) {
_mm_storel_epi64((__m128i*)dst, val8);
dst += 4;
}
if (n & 2) {
*(uint32_t*)dst = _mm_cvtsi128_si32(val8);
dst += 2;
}
if (n & 1) {
*dst = val;
}
}
static void memset32(uint32_t* dst, uint32_t val, int n) {
auto dst4 = (__m128i*)dst;
auto val4 = _mm_set1_epi32(val);
for ( ; n >= 4; n -= 4) {
_mm_storeu_si128(dst4++, val4);
}
dst = (uint32_t*)dst4;
if (n & 2) {
_mm_storel_epi64((__m128i*)dst, val4);
dst += 2;
}
if (n & 1) {
*dst = val;
}
}
#elif defined(SK_ARM_HAS_NEON)
static void memset16(uint16_t* dst, uint16_t value, int n) {
uint16x8_t v8 = vdupq_n_u16(value);
uint16x8x4_t v32 = {{ v8, v8, v8, v8 }};
while (n >= 32) {
vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are the same, value.
dst += 32;
n -= 32;
}
switch (n / 8) {
case 3: vst1q_u16(dst, v8); dst += 8;
case 2: vst1q_u16(dst, v8); dst += 8;
case 1: vst1q_u16(dst, v8); dst += 8;
}
if (n & 4) {
vst1_u16(dst, vget_low_u16(v8));
dst += 4;
}
switch (n & 3) {
case 3: *dst++ = value;
case 2: *dst++ = value;
case 1: *dst = value;
}
}
static void memset32(uint32_t* dst, uint32_t value, int n) {
uint32x4_t v4 = vdupq_n_u32(value);
uint32x4x4_t v16 = {{ v4, v4, v4, v4 }};
while (n >= 16) {
vst4q_u32(dst, v16); // This swizzles, but we don't care: all lanes are the same, value.
dst += 16;
n -= 16;
}
switch (n / 4) {
case 3: vst1q_u32(dst, v4); dst += 4;
case 2: vst1q_u32(dst, v4); dst += 4;
case 1: vst1q_u32(dst, v4); dst += 4;
}
if (n & 2) {
vst1_u32(dst, vget_low_u32(v4));
dst += 2;
}
if (n & 1) {
*dst = value;
}
}
#else // Neither NEON nor SSE2.
static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *dst++ = val; } }
static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *dst++ = val; } }
#endif
} // namespace SK_OPTS_NS
#endif//SkUtils_opts_DEFINED