Only use __builtin_reduce_add for integer types The spec only defines this function for integer types: https://clang.llvm.org/docs/LanguageExtensions.html Fixes #4458 Diffs= df91086ce Only __builtin_reduce_add for integer types
diff --git a/.rive_head b/.rive_head index ef506ed..26d733e 100644 --- a/.rive_head +++ b/.rive_head
@@ -1 +1 @@ -4e221ab4f7d90ef3c6611d59ed8f989075431f5b +df91086cea806be55fc82d13563318c2a0f4e4b1
diff --git a/include/rive/math/simd.hpp b/include/rive/math/simd.hpp index 84341b3..4ca983c 100644 --- a/include/rive/math/simd.hpp +++ b/include/rive/math/simd.hpp
@@ -308,21 +308,40 @@ template <typename T, int N> SIMD_ALWAYS_INLINE T dot(gvec<T, N> a, gvec<T, N> b) { - gvec<T, N> d = a * b; -#if __has_builtin(__builtin_reduce_add) - return __builtin_reduce_add(d); -#else -#pragma message("performance: __builtin_reduce_add() not supported. Consider updating clang.") - T s = d[0]; - for (int i = 1; i < N; ++i) - s += d[i]; - return s; -#endif + auto d = a * b; + if constexpr (N == 2) + return d.x + d.y; + else if constexpr (N == 3) + return d.x + d.y + d.z; + else if constexpr (N == 4) + return d.x + d.y + d.z + d.w; + else + { + T s = d[0]; + for (int i = 1; i < N; ++i) + s += d[i]; + return s; + } } +// We can use __builtin_reduce_add for integer types. +#if __has_builtin(__builtin_reduce_add) +template <int N> SIMD_ALWAYS_INLINE int32_t dot(gvec<int32_t, N> a, gvec<int32_t, N> b) +{ + auto d = a * b; + return __builtin_reduce_add(d); +} + +template <int N> SIMD_ALWAYS_INLINE uint32_t dot(gvec<uint32_t, N> a, gvec<uint32_t, N> b) +{ + auto d = a * b; + return __builtin_reduce_add(d); +} +#endif + SIMD_ALWAYS_INLINE float cross(gvec<float, 2> a, gvec<float, 2> b) { - gvec<float, 2> c = a * b.yx; + auto c = a * b.yx; return c.x - c.y; }
diff --git a/test/simd_test.cpp b/test/simd_test.cpp index ed937ac..6ec5a99 100644 --- a/test/simd_test.cpp +++ b/test/simd_test.cpp
@@ -351,13 +351,20 @@ TEST_CASE("dot", "[simd]") { CHECK(simd::dot(int2{0, 1}, int2{1, 0}) == 0); - CHECK(simd::dot(int2{1, 0}, int2{0, 1}) == 0); + CHECK(simd::dot(uint2{1, 0}, uint2{0, 1}) == 0); CHECK(simd::dot(int2{1, 1}, int2{1, -1}) == 0); - CHECK(simd::dot(int2{1, 1}, int2{1, 1}) == 2); + CHECK(simd::dot(uint2{1, 1}, uint2{1, 1}) == 2); CHECK(simd::dot(int2{1, 1}, int2{-1, -1}) == -2); - CHECK(simd::dot(simd::gvec<int, 3>{1, 2, 3}, simd::gvec<int, 3>{1, 2, 3}) == 14); + CHECK(simd::dot(ivec<3>{1, 2, -3}, ivec<3>{1, 2, 3}) == -4); + CHECK(simd::dot(uvec<3>{1, 2, 3}, uvec<3>{1, 2, 3}) == 14); CHECK(simd::dot(int4{1, 2, 3, 4}, int4{1, 2, 3, 4}) == 30); - CHECK(simd::dot(ivec<5>{1, 2, 3, 4, 5}, ivec<5>{1, 2, 3, 4, 5}) == 55); + CHECK(simd::dot(ivec<5>{1, 2, 3, 4, 5}, ivec<5>{1, 2, 3, 4, -5}) == 5); + CHECK(simd::dot(uvec<5>{1, 2, 3, 4, 5}, uvec<5>{1, 2, 3, 4, 5}) == 55); + + CHECK(simd::dot(float4{1, 2, 3, 4}, float4{4, 3, 2, 1}) == 20); + CHECK(simd::dot(vec<3>{1, 2, 3}, vec<3>{3, 2, 1}) == 10); + CHECK(simd::dot(float2{0, 1}, float2{1, 0}) == 0); + CHECK(simd::dot(vec<5>{1, 2, 3, 4, 5}, vec<5>{1, 2, 3, 4, 5}) == 55); } // Check simd::cross.