Add SIMD fallbacks for missing builtins
We can't expect every platform that uses Rive to have access to a recent clang. Add fallbacks with a performance warning when builtins aren't supported.
It's worth considering adding a gcc or older clang runner in order to test these fallbacks.
Diffs=
e98b93a61 Add SIMD fallbacks for missing builtins
diff --git a/.rive_head b/.rive_head
index ed79e9f..04fcdcd 100644
--- a/.rive_head
+++ b/.rive_head
@@ -1 +1 @@
-466f68e3a09af0e4d1a3d391eb5a1bcff64840b9
+e98b93a61cfa2a193ac2850b91f878be994fcdb4
diff --git a/include/rive/math/simd.hpp b/include/rive/math/simd.hpp
index 726a5c7..dfc38fb 100644
--- a/include/rive/math/simd.hpp
+++ b/include/rive/math/simd.hpp
@@ -32,41 +32,6 @@
template <typename T, int N>
using gvec = T __attribute__((ext_vector_type(N))) __attribute__((aligned(sizeof(T) * N)));
-////// Math //////
-
-// Similar to std::min(), with a noteworthy difference:
-// If a[i] or b[i] is NaN and the other is not, returns whichever is _not_ NaN.
-template <typename T, int N> SIMD_ALWAYS_INLINE gvec<T, N> min(gvec<T, N> a, gvec<T, N> b)
-{
- return __builtin_elementwise_min(a, b);
-}
-
-// Similar to std::max(), with a noteworthy difference:
-// If a[i] or b[i] is NaN and the other is not, returns whichever is _not_ NaN.
-template <typename T, int N> SIMD_ALWAYS_INLINE gvec<T, N> max(gvec<T, N> a, gvec<T, N> b)
-{
- return __builtin_elementwise_max(a, b);
-}
-
-// Unlike std::clamp(), simd::clamp() always returns a value between lo and hi.
-//
-// Returns lo if x == NaN, but std::clamp() returns NaN.
-// Returns hi if hi <= lo.
-// Ignores hi and/or lo if they are NaN.
-//
-template <typename T, int N>
-SIMD_ALWAYS_INLINE gvec<T, N> clamp(gvec<T, N> x, gvec<T, N> lo, gvec<T, N> hi)
-{
- return min(max(lo, x), hi);
-}
-
-// Returns the absolute value of x per element, with one exception:
-// If x[i] is an integer type and equal to the minimum representable value, returns x[i].
-template <typename T, int N> SIMD_ALWAYS_INLINE gvec<T, N> abs(gvec<T, N> x)
-{
- return __builtin_elementwise_abs(x);
-}
-
////// Boolean logic //////
//
// Vector booleans are of type int32_t, where true is ~0 and false is 0. Vector booleans can be
@@ -94,13 +59,79 @@
return !any(~x);
}
+template <typename T,
+ int N,
+ typename std::enable_if<std::is_floating_point<T>::value>::type* = nullptr>
+SIMD_ALWAYS_INLINE gvec<int32_t, N> isnan(gvec<T, N> x)
+{
+ return !(x == x);
+}
+
+template <typename T, int N, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
+constexpr gvec<int32_t, N> isnan(gvec<T, N>)
+{
+ return {}; // Integer types are never NaN.
+}
+
+////// Math //////
+
+// Similar to std::min(), with a noteworthy difference:
+// If a[i] or b[i] is NaN and the other is not, returns whichever is _not_ NaN.
+template <typename T, int N> SIMD_ALWAYS_INLINE gvec<T, N> min(gvec<T, N> a, gvec<T, N> b)
+{
+#if __has_builtin(__builtin_elementwise_min)
+ return __builtin_elementwise_min(a, b);
+#else
+#pragma message("performance: __builtin_elementwise_min() not supported. Consider updating clang.")
+ // Generate the same behavior for NaN as the SIMD builtins. (isnan() is a no-op for int types.)
+ return b < a || isnan(a) ? b : a;
+#endif
+}
+
+// Similar to std::max(), with a noteworthy difference:
+// If a[i] or b[i] is NaN and the other is not, returns whichever is _not_ NaN.
+template <typename T, int N> SIMD_ALWAYS_INLINE gvec<T, N> max(gvec<T, N> a, gvec<T, N> b)
+{
+#if __has_builtin(__builtin_elementwise_max)
+ return __builtin_elementwise_max(a, b);
+#else
+#pragma message("performance: __builtin_elementwise_max() not supported. Consider updating clang.")
+ // Generate the same behavior for NaN as the SIMD builtins. (isnan() is a no-op for int types.)
+ return a < b || isnan(a) ? b : a;
+#endif
+}
+
+// Unlike std::clamp(), simd::clamp() always returns a value between lo and hi.
+//
+// Returns lo if x == NaN, but std::clamp() returns NaN.
+// Returns hi if hi <= lo.
+// Ignores hi and/or lo if they are NaN.
+//
+template <typename T, int N>
+SIMD_ALWAYS_INLINE gvec<T, N> clamp(gvec<T, N> x, gvec<T, N> lo, gvec<T, N> hi)
+{
+ return min(max(lo, x), hi);
+}
+
+// Returns the absolute value of x per element, with one exception:
+// If x[i] is an integer type and equal to the minimum representable value, returns x[i].
+template <typename T, int N> SIMD_ALWAYS_INLINE gvec<T, N> abs(gvec<T, N> x)
+{
+#if __has_builtin(__builtin_elementwise_abs)
+ return __builtin_elementwise_abs(x);
+#else
+#pragma message("performance: __builtin_elementwise_abs() not supported. Consider updating clang.")
+ return x < 0 ? -x : x; // But the negation in the "true" side so we never negate NaN.
+#endif
+}
+
////// Loading and storing //////
template <typename T, int N> SIMD_ALWAYS_INLINE gvec<T, N> load(const void* ptr)
{
- gvec<T, N> vec;
- __builtin_memcpy(&vec, ptr, sizeof(vec));
- return vec;
+ gvec<T, N> ret;
+ __builtin_memcpy(&ret, ptr, sizeof(T) * N);
+ return ret;
}
SIMD_ALWAYS_INLINE gvec<float, 2> load2f(const void* ptr) { return load<float, 2>(ptr); }
SIMD_ALWAYS_INLINE gvec<float, 4> load4f(const void* ptr) { return load<float, 4>(ptr); }
@@ -111,7 +142,7 @@
template <typename T, int N> SIMD_ALWAYS_INLINE void store(void* ptr, gvec<T, N> vec)
{
- __builtin_memcpy(ptr, &vec, sizeof(vec));
+ __builtin_memcpy(ptr, &vec, sizeof(T) * N);
}
template <typename T, int M, int N>
diff --git a/test/simd_test.cpp b/test/simd_test.cpp
index fadef68..20ed98d 100644
--- a/test/simd_test.cpp
+++ b/test/simd_test.cpp
@@ -143,6 +143,7 @@
{
CHECK(simd::all(simd::abs(float4{-1, 2, -3, 4}) == float4{1, 2, 3, 4}));
CHECK(simd::all(simd::abs(float2{-5, 6}) == float2{5, 6}));
+ CHECK(simd::all(simd::abs(float2{-0, 0}) == float2{0, 0}));
CHECK(simd::all(float4{-std::numeric_limits<float>::epsilon(),
-std::numeric_limits<float>::denorm_min(),
-std::numeric_limits<float>::max(),
@@ -153,9 +154,9 @@
));
float2 nan2 = simd::abs(float2{kNaN, -kNaN});
- CHECK(std::isnan(nan2.x));
- CHECK(std::isnan(nan2.y));
+ CHECK(simd::all(simd::isnan(nan2)));
CHECK(simd::all(simd::abs(int4{7, -8, 9, -10}) == int4{7, 8, 9, 10}));
+ CHECK(simd::all(simd::abs(int2{0, -0}) == int2{0, 0}));
// abs(INT_MIN) returns INT_MIN.
CHECK(
simd::all(simd::abs(int2{-std::numeric_limits<int32_t>::max(),