| /* |
| * Copyright 2019 Google Inc. |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| * |
| * "fast_acos" test imported from skia:tests/SkVxTest.cpp |
| * |
| * Copyright 2022 Rive |
| */ |
| |
| // Ignore performance warnings in this file about having AVX disabled. We test vectors larger than 4 |
| // and aren't worried about performance. |
| // |
| // If we try to use large vectors in other parts of the code with AVX disabled, we definitely still |
| // want this warning. |
| #if defined(__clang__) || defined(__GNUC__) |
| #pragma GCC diagnostic ignored "-Wpsabi" |
| #endif |
| |
| #include <catch.hpp> |
| |
| #include "rive/math/math_types.hpp" |
| #include "rive/math/simd.hpp" |
| #include <limits> |
| |
| #define CHECK_ALL(B) CHECK(simd::all(B)) |
| #define CHECK_ANY(B) CHECK(simd::any(B)) |
| |
| namespace rive |
| { |
| constexpr float kInf = std::numeric_limits<float>::infinity(); |
| constexpr float kNaN = std::numeric_limits<float>::quiet_NaN(); |
| constexpr double kInf_double = std::numeric_limits<double>::infinity(); |
| constexpr double kNaN_double = std::numeric_limits<double>::quiet_NaN(); |
| |
| // Check simd::any. |
| TEST_CASE("any", "[simd]") |
| { |
| CHECK(!simd::any(int4{0, 0, 0, 0})); |
| CHECK_ANY((int4{-1, 0, 0, 0})); |
| CHECK_ANY((int4{0, -1, 0, 0})); |
| CHECK_ANY((int4{0, 0, -1, 0})); |
| CHECK_ANY((int4{0, 0, 0, -1})); |
| CHECK(!simd::any(ivec<3>{0, 0, 0})); |
| CHECK_ANY((ivec<3>{-1, 0, 0})); |
| CHECK_ANY((ivec<3>{0, -1, 0})); |
| CHECK_ANY((ivec<3>{0, 0, -1})); |
| CHECK(!simd::any(int2{0, 0})); |
| CHECK_ANY((int2{-1, 0})); |
| CHECK_ANY((int2{0, -1})); |
| CHECK(!simd::any(ivec<1>{0})); |
| CHECK_ANY((ivec<1>{-1})); |
| } |
| |
| // Check simd::all. |
| TEST_CASE("all", "[simd]") |
| { |
| CHECK_ALL((int4{-1, -1, -1, -1})); |
| CHECK(!simd::all(int4{0, -1, -1, -1})); |
| CHECK(!simd::all(int4{-1, 0, -1, -1})); |
| CHECK(!simd::all(int4{-1, -1, 0, -1})); |
| CHECK(!simd::all(int4{-1, -1, -1, 0})); |
| CHECK_ALL((ivec<3>{-1, -1, -1})); |
| CHECK(!simd::all(ivec<3>{0, -1, -1})); |
| CHECK(!simd::all(ivec<3>{-1, 0, -1})); |
| CHECK(!simd::all(ivec<3>{-1, -1, 0})); |
| CHECK_ALL((int2{-1, -1})); |
| CHECK(!simd::all(int2{0, -1})); |
| CHECK(!simd::all(int2{-1, 0})); |
| CHECK_ALL((ivec<1>{-1})); |
| CHECK(!simd::all(ivec<1>{0})); |
| } |
| |
| TEST_CASE("operators", "[simd]") |
| { |
| float4 a{1, 2, 3, 4}; |
| float4 b{5, 6, 7, 8}; |
| CHECK_ALL((a + b == float4{6, 8, 10, 12})); |
| CHECK_ALL((a - b == float4(-4))); |
| CHECK_ALL((a * b == float4{5, 12, 21, 32})); |
| CHECK_ALL((a / a == b / b)); |
| CHECK_ALL((a + 10.f == 10.f + a)); |
| CHECK_ALL((+(a - 10.f) == -(10.f - a))); |
| CHECK_ALL((a * 2.f == 2.f * a)); |
| CHECK_ALL((a / .5f == 2.f * a)); |
| |
| int4 i{1, 2, 4, 8}; |
| int4 j{0, 1, 3, 7}; |
| CHECK(!simd::any((i & j))); |
| CHECK_ALL(((i | j) == int4{1, 3, 7, 15})); |
| CHECK_ALL(((i | j) == (i ^ j))); |
| CHECK_ALL((~i + 1 == -i)); // Assume two's compliment for now... |
| } |
| |
| TEST_CASE("swizzles", "[simd]") |
| { |
| float2 v2{1, -2}; |
| CHECK(v2.x == 1); |
| CHECK(v2.y == -2); |
| CHECK(v2.yx[0] == v2[1]); |
| CHECK(v2.yx[1] == v2[0]); |
| CHECK_ALL((float2(v2.yx) == float2{-2, 1})); |
| CHECK_ALL((v2.yx == float2{-2, 1})); |
| CHECK_ALL((v2.xyxy == float4{1, -2, 1, -2})); |
| CHECK_ALL((v2.yxyx == float4{-2, 1, -2, 1})); |
| |
| float4 v4{1, -2, 3, -1}; |
| CHECK(v4.x == 1); |
| CHECK(v4.y == -2); |
| CHECK(v4.z == 3); |
| CHECK(v4.w == -1); |
| CHECK_ALL((v4.xy == float2{1, -2})); |
| CHECK_ALL((v4.yz == float2{-2, 3})); |
| CHECK_ALL((v4.zw == float2{3, -1})); |
| CHECK_ALL((v4.xyz == vec<3>{1, -2, 3})); |
| CHECK_ALL((v4.yzw == vec<3>{-2, 3, -1})); |
| CHECK_ALL((v4.yxwz == float4{-2, 1, -1, 3})); |
| CHECK_ALL((v4.zwxy == float4{3, -1, 1, -2})); |
| CHECK_ALL((v4.zyxw == float4{3, -2, 1, -1})); |
| CHECK_ALL((v4.xwzy == float4{1, -1, 3, -2})); |
| |
| v4.xy = v2.yx; |
| CHECK_ALL((v4 == float4{-2, 1, 3, -1})); |
| v4.zw = v4.yz; |
| CHECK_ALL((v4 == float4{-2, 1, 1, 3})); |
| v4.yz = -7.f; |
| CHECK_ALL((v4 == float4{-2, -7, -7, 3})); |
| v4.xyz = 0.f; |
| CHECK_ALL((v4 == float4{0, 0, 0, 3})); |
| v4.yzw = -9.f; |
| CHECK_ALL((v4 == float4{0, -9, -9, -9})); |
| v4.x = 1; |
| v4.y = 2; |
| v4.z = -8; |
| v4.w = .5f; |
| CHECK_ALL((v4 == float4{1, 2, -8, .5f})); |
| |
| v2.y = -9; |
| v2.x = 88; |
| CHECK_ALL((v2.yx == float2{-9, 88})); |
| |
| ivec<3> v3; |
| v3.x = 0; |
| v3.y = 9; |
| v3.z = -1; |
| CHECK_ALL((v3 == ivec<3>{0, 9, -1})); |
| CHECK(v3.x == 0); |
| CHECK(v3.y == 9); |
| CHECK(v3.z == -1); |
| |
| uvec<1> v1; |
| v1.x = 7; |
| CHECK(v1[0] == 7); |
| CHECK_ALL((v1 == uvec<1>{7})); |
| |
| float4 a = {0, 1, 12, 99.9f}; |
| float4 a_ = a.yxwz; |
| float4 b = {.1f, -1, -9, -20}; |
| float4 b_ = b.yxwz; |
| CHECK_ALL((simd::abs(b.yxwz) == simd::abs(b_))); |
| CHECK_ALL((simd::floor(a.yxwz) == simd::floor(a_))); |
| CHECK_ALL((simd::ceil(a.yxwz) == simd::ceil(a_))); |
| CHECK_ALL((simd::sqrt(a.yxwz) == simd::sqrt(a_))); |
| CHECK_ALL((simd::fast_acos(a.yxwz) == simd::fast_acos(a_))); |
| CHECK_ALL((simd::min(a.yxwz, b.yxwz) == simd::min(a_, b_))); |
| CHECK_ALL((simd::max(a.yxwz, b.yxwz) == simd::max(a_, b_))); |
| CHECK_ALL( |
| (simd::clamp(a.yxwz, float4(2), float4(10)) == simd::clamp(a_, float4(2), float4(10)))); |
| CHECK_ALL((simd::mix(a.yxwz, b.yxwz, float4(.5f)) == simd::mix(a_, b_, float4(.5f)))); |
| CHECK_ALL( |
| (simd::precise_mix(a.yxwz, b.yxwz, float4(.5f)) == simd::precise_mix(a_, b_, float4(.5f)))); |
| CHECK_ALL( |
| (simd::if_then_else(int4{~0}, a.yxwz, b.yxwz) == simd::if_then_else(int4{~0}, a_, b_))); |
| CHECK_ALL((simd::if_then_else(int4{~0}, a.yxwz, b.yxwz) == float4{a.y, b.x, b.w, b.z})); |
| CHECK_ALL((simd::if_then_else(~int4{~0}, a_, b_) == float4{b.y, a.x, a.w, a.z})); |
| CHECK_ALL( |
| (simd::if_then_else(int4{~0}, a.yxwz, b.yxwz) != simd::if_then_else(~int4{~0}, a_, b_))); |
| |
| float mem[4]{}; |
| simd::store(mem, a.yxwz); |
| CHECK(!memcmp(mem, &a_, 4 * 4)); |
| |
| // Unfortunate, but there's no way to block the default assignment operator and still be a POD |
| // type. |
| a.zwxy = b.zwxy; |
| CHECK_ALL((a == b)); |
| } |
| |
| // Verify the simd float types are IEEE 754 compliant for infinity and NaN. |
| template <typename T> void check_ieee_compliance() |
| { |
| using vec4 = simd::gvec<T, 4>; |
| using vec2 = simd::gvec<T, 2>; |
| constexpr T kTInf = std::numeric_limits<T>::infinity(); |
| |
| vec4 test = vec4{1, -kTInf, 1, 4} / vec4{0, 2, kTInf, 4}; |
| CHECK_ALL((test == vec4{kTInf, -kTInf, 0, 1})); |
| |
| // Inf * Inf == Inf |
| test = vec4{kTInf, -kTInf, kTInf, -kTInf} * vec4{kTInf, kTInf, -kTInf, -kTInf}; |
| CHECK_ALL((test == vec4{kTInf, -kTInf, -kTInf, kTInf})); |
| |
| // Inf/0 == Inf, 0/Inf == 0 |
| test = vec4{kTInf, -kTInf, 0, 0} / vec4{0, 0, kTInf, -kTInf}; |
| CHECK_ALL((test == vec4{kTInf, -kTInf, 0, 0})); |
| |
| // Inf/Inf, 0/0, 0 * Inf, Inf - Inf == NaN |
| test = {kTInf, 0, 0, kTInf}; |
| test.xy /= vec2{kTInf, 0}; |
| test.z *= kTInf; |
| test.w -= kTInf; |
| for (int i = 0; i < 4; ++i) |
| { |
| CHECK(std::isnan(test[i])); |
| } |
| // NaN always fails comparisons. |
| CHECK(!simd::any(test == test)); |
| CHECK_ALL((test != test)); |
| CHECK(!simd::any(test <= test)); |
| CHECK(!simd::any(test >= test)); |
| CHECK(!simd::any(test < test)); |
| CHECK(!simd::any(test > test)); |
| |
| // Inf + Inf == Inf, Inf + -Inf == NaN |
| test = vec4{kTInf, -kTInf, kTInf, -kTInf} + vec4{kTInf, -kTInf, -kTInf, kTInf}; |
| CHECK_ALL((test.xy == vec2{kTInf, -kTInf})); |
| CHECK(!simd::any(test.zw == test.zw)); // NaN |
| } |
| |
| TEST_CASE("ieee-compliance", "[simd]") |
| { |
| check_ieee_compliance<float>(); |
| check_ieee_compliance<double>(); |
| } |
| |
| // Check simd::if_then_else. |
| template <typename T> void check_if_then_else() |
| { |
| using vec4 = simd::gvec<T, 4>; |
| using vec2 = simd::gvec<T, 2>; |
| |
| // Vector condition. |
| vec4 f4 = simd::if_then_else(vec4{1, 2, 3, 4} < vec4{4, 3, 2, 1}, vec4(1), vec4(2)); |
| CHECK_ALL((f4 == vec4{1, 1, 2, 2})); |
| |
| // In vector, -1 is true, 0 is false. |
| vec2 u2 = simd::if_then_else(simd::gvec<typename simd::boolean_mask_type<T>::type, 2>{0, -1}, |
| vec2{1, 2}, |
| vec2{3, 4}); |
| CHECK_ALL((u2 == vec2{3, 2})); |
| |
| // Scalar condition. |
| f4 = u2.x == u2.y ? vec4{1, 2, 3, 4} : vec4{5, 6, 7, 8}; |
| CHECK_ALL((f4 == vec4{5, 6, 7, 8})); |
| } |
| |
| TEST_CASE("ternary-operator", "[simd]") |
| { |
| check_if_then_else<int8_t>(); |
| check_if_then_else<uint8_t>(); |
| check_if_then_else<int16_t>(); |
| check_if_then_else<uint16_t>(); |
| check_if_then_else<float>(); |
| check_if_then_else<int32_t>(); |
| check_if_then_else<uint32_t>(); |
| check_if_then_else<size_t>(); |
| check_if_then_else<double>(); |
| check_if_then_else<int64_t>(); |
| check_if_then_else<uint64_t>(); |
| } |
| |
| // Check simd::min/max compliance. |
| TEST_CASE("min-max", "[simd]") |
| { |
| float4 f4 = simd::min(float4{1, 2, 3, 4}, float4{4, 3, 2}); |
| CHECK_ALL((f4 == float4{1, 2, 2, 0})); |
| f4 = simd::max(float4{1, 2, 3, 4}, float4{4, 3, 2}); |
| CHECK_ALL((f4 == float4{4, 3, 3, 4})); |
| |
| int2 i2 = simd::max(int2(-1), int2{-2}); |
| CHECK_ALL((i2 == int2{-1, 0})); |
| i2 = simd::min(int2(-1), int2{-2}); |
| CHECK_ALL((i2 == int2{-2, -1})); |
| |
| // Infinity works as expected. |
| f4 = simd::min(float4{100, -kInf, -kInf, kInf}, float4{kInf, 100, kInf, -kInf}); |
| CHECK_ALL((f4 == float4{100, -kInf, -kInf, -kInf})); |
| f4 = simd::max(float4{100, -kInf, -kInf, kInf}, float4{kInf, 100, kInf, -kInf}); |
| CHECK_ALL((f4 == float4{kInf, 100, kInf, kInf})); |
| |
| // If a or b is NaN, min returns whichever is not NaN. |
| f4 = simd::min(float4{1, kNaN, 2, kNaN}, float4{kNaN, 1, 1, kNaN}); |
| CHECK_ALL((f4.xyz == 1.f)); |
| CHECK(std::isnan(f4.w)); |
| f4 = simd::max(float4{1, kNaN, 2, kNaN}, float4{kNaN, 1, 1, kNaN}); |
| CHECK_ALL((f4.xyz == vec<3>{1, 1, 2})); |
| CHECK(std::isnan(f4.w)); |
| |
| // fminf/fmaxf behaves the same as simd::min/max. |
| // simd::min/max differs from std::min/max when the first argument is NaN. |
| for (float f : {-2.f, -kInf, kInf}) |
| { |
| CHECK(simd::min<float, 1>(kNaN, f).x == f); |
| CHECK(fminf(kNaN, f) == f); |
| CHECK(std::isnan(std::min<float>(kNaN, f))); |
| CHECK(simd::max<float, 1>(kNaN, f).x == f); |
| CHECK(fmaxf(kNaN, f) == f); |
| CHECK(std::isnan(std::max<float>(kNaN, f))); |
| } |
| for (double d : {-1.0, -kInf_double, kInf_double}) |
| { |
| CHECK(simd::min<double, 1>(kNaN_double, d).x == d); |
| CHECK(fmin(kNaN_double, d) == d); |
| CHECK(std::isnan(std::min<double>(kNaN_double, d))); |
| CHECK(simd::max<double, 1>(kNaN_double, d).x == d); |
| CHECK(fmax(kNaN_double, d) == d); |
| CHECK(std::isnan(std::max<double>(kNaN_double, d))); |
| } |
| |
| // fminf/fmaxf/std::min/std::max/simd::min/stmd::max all behave the same when the second |
| // argument is NaN. |
| for (float f : {1.f, -kInf, kInf}) |
| { |
| CHECK(simd::min<float, 1>(f, kNaN).x == f); |
| CHECK(fminf(f, kNaN) == f); |
| CHECK(std::min<float>(f, kNaN) == f); |
| CHECK(simd::max<float, 1>(f, kNaN).x == f); |
| CHECK(fmaxf(f, kNaN) == f); |
| CHECK(std::max<float>(f, kNaN) == f); |
| } |
| for (double d : {2.0, -kInf_double, kInf_double}) |
| { |
| CHECK(simd::min<double, 1>(d, kNaN_double).x == d); |
| CHECK(fmin(d, kNaN_double) == d); |
| CHECK(std::min<double>(d, kNaN_double) == d); |
| CHECK(simd::max<double, 1>(d, kNaN_double).x == d); |
| CHECK(fmax(d, kNaN_double) == d); |
| CHECK(std::max<double>(d, kNaN_double) == d); |
| } |
| |
| // check non-32-bit types. |
| CHECK_ALL((simd::max(simd::gvec<double, 2>{3, 4}, simd::gvec<double, 2>{4, 3}) == |
| simd::gvec<double, 2>{4, 4})); |
| CHECK_ALL((simd::min(simd::gvec<uint64_t, 2>{3, 4}, simd::gvec<uint64_t, 2>{4, 3}) == |
| simd::gvec<uint64_t, 2>{3, 3})); |
| CHECK_ALL((simd::max(simd::gvec<size_t, 2>{3, 4}, simd::gvec<size_t, 2>{4, 3}) == |
| simd::gvec<size_t, 2>{4, 4})); |
| CHECK_ALL( |
| (simd::max(simd::gvec<uint8_t, 16>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, |
| simd::gvec<uint8_t, 16>{15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}) == |
| simd::gvec<uint8_t, 16>{15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15})); |
| } |
| |
| // Check simd::clamp. |
| TEST_CASE("clamp", "[simd]") |
| { |
| CHECK_ALL( |
| (simd::clamp(float4{1, 2, kInf, -kInf}, float4{2, 1, kInf, 0}, float4{3, 1, kInf, kInf}) == |
| float4{2, 1, kInf, 0})); |
| CHECK_ALL((simd::clamp(float4{1, kNaN, kInf, -kInf}, |
| float4{kNaN, 2, kNaN, 0}, |
| float4{kNaN, 3, kInf, kNaN}) == float4{1, 2, kInf, 0})); |
| float4 f4 = simd::clamp(float4{1, kNaN, kNaN, kNaN}, |
| float4{kNaN, 1, kNaN, kNaN}, |
| float4{kNaN, kNaN, 1, kNaN}); |
| CHECK_ALL((1.f == f4.xyz)); |
| CHECK(std::isnan(f4.w)); |
| |
| // Returns lo if x == NaN, but std::clamp() returns NaN. |
| CHECK(simd::clamp<float, 1>(kNaN, 1, 2).x == 1); |
| // Matches math::clamp(). |
| CHECK(simd::clamp<float, 1>(kNaN, 1, 2).x == math::clamp(kNaN, 1, 2)); |
| |
| // Returns hi if hi <= lo. |
| CHECK(simd::clamp<float, 1>(3, 2, 1).x == 1); |
| CHECK(simd::clamp<float, 1>(kNaN, 2, 1).x == 1); |
| CHECK(simd::clamp<float, 1>(kNaN, kNaN, 1).x == 1); |
| // Matches math::clamp(). |
| CHECK(simd::clamp<float, 1>(3, 2, 1).x == math::clamp(3, 2, 1)); |
| CHECK(simd::clamp<float, 1>(kNaN, 2, 1).x == math::clamp(kNaN, 2, 1)); |
| CHECK(simd::clamp<float, 1>(kNaN, kNaN, 1).x == math::clamp(kNaN, kNaN, 1)); |
| |
| // Ignores hi and/or lo if they are NaN. |
| CHECK(simd::clamp<float, 1>(3, 4, kNaN).x == 4); |
| CHECK(simd::clamp<float, 1>(3, 2, kNaN).x == 3); |
| CHECK(simd::clamp<float, 1>(3, kNaN, 2).x == 2); |
| CHECK(simd::clamp<float, 1>(3, kNaN, 4).x == 3); |
| CHECK(simd::clamp<float, 1>(3, kNaN, kNaN).x == 3); |
| // Matches math::clamp(). |
| CHECK(simd::clamp<float, 1>(3, 4, kNaN).x == math::clamp(3, 4, kNaN)); |
| CHECK(simd::clamp<float, 1>(3, 2, kNaN).x == math::clamp(3, 2, kNaN)); |
| CHECK(simd::clamp<float, 1>(3, kNaN, 2).x == math::clamp(3, kNaN, 2)); |
| CHECK(simd::clamp<float, 1>(3, kNaN, 4).x == math::clamp(3, kNaN, 4)); |
| CHECK(simd::clamp<float, 1>(3, kNaN, kNaN).x == math::clamp(3, kNaN, kNaN)); |
| } |
| |
| // Check simd::abs. |
| TEST_CASE("abs", "[simd]") |
| { |
| CHECK_ALL((simd::abs(float4{-1, 2, -3, 4}) == float4{1, 2, 3, 4})); |
| CHECK_ALL((simd::abs(float2{-5, 6}) == float2{5, 6})); |
| CHECK_ALL((simd::abs(float2{-0, 0}) == float2{0, 0})); |
| CHECK_ALL((float4{-std::numeric_limits<float>::epsilon(), |
| -std::numeric_limits<float>::denorm_min(), |
| -std::numeric_limits<float>::max(), |
| -kInf} == float4{-std::numeric_limits<float>::epsilon(), |
| -std::numeric_limits<float>::denorm_min(), |
| -std::numeric_limits<float>::max(), |
| -kInf} |
| |
| )); |
| float2 nan2 = simd::abs(float2{kNaN, -kNaN}); |
| CHECK_ALL((simd::isnan(nan2))); |
| CHECK_ALL((simd::abs(int4{7, -8, 9, -10}) == int4{7, 8, 9, 10})); |
| CHECK_ALL((simd::abs(int2{0, -0}) == int2{0, 0})); |
| // abs(INT_MIN) returns INT_MIN. |
| CHECK( |
| simd::all(simd::abs(int2{-std::numeric_limits<int32_t>::max(), |
| std::numeric_limits<int32_t>::min()}) == |
| int2{std::numeric_limits<int32_t>::max(), std::numeric_limits<int32_t>::min()})); |
| } |
| |
| // Check simd::reduce* methods. |
| TEST_CASE("reduce", "[simd]") |
| { |
| { |
| float4 v = {1, 2, 3, 4}; |
| CHECK(simd::reduce_add(v) == 10); |
| CHECK(simd::reduce_add(v.zwxy) == 10); |
| CHECK(simd::reduce_add(v.xyz) == 6); |
| CHECK(simd::reduce_add(v.yz) == 5); |
| CHECK(simd::reduce_add(v.xy.yxyx) == 6); |
| CHECK(simd::reduce_min(v) == 1); |
| CHECK(simd::reduce_min(v.zwxy) == 1); |
| CHECK(simd::reduce_min(v.xyz) == 1); |
| CHECK(simd::reduce_min(v.yz) == 2); |
| CHECK(simd::reduce_min(v.xy.yxyx) == 1); |
| CHECK(simd::reduce_max(v) == 4); |
| CHECK(simd::reduce_max(v.zwxy) == 4); |
| CHECK(simd::reduce_max(v.xyz) == 3); |
| CHECK(simd::reduce_max(v.yz) == 3); |
| CHECK(simd::reduce_max(v.xy.yxyx) == 2); |
| } |
| |
| { |
| int4 v = {1, 2, 3, 4}; |
| CHECK(simd::reduce_add(v) == 10); |
| CHECK(simd::reduce_add(v.zwxy) == 10); |
| CHECK(simd::reduce_add(v.xyz) == 6); |
| CHECK(simd::reduce_add(v.yz) == 5); |
| CHECK(simd::reduce_add(v.xy.yxyx) == 6); |
| CHECK(simd::reduce_min(v) == 1); |
| CHECK(simd::reduce_min(v.zwxy) == 1); |
| CHECK(simd::reduce_min(v.xyz) == 1); |
| CHECK(simd::reduce_min(v.yz) == 2); |
| CHECK(simd::reduce_min(v.xy.yxyx) == 1); |
| CHECK(simd::reduce_max(v) == 4); |
| CHECK(simd::reduce_max(v.zwxy) == 4); |
| CHECK(simd::reduce_max(v.xyz) == 3); |
| CHECK(simd::reduce_max(v.yz) == 3); |
| CHECK(simd::reduce_max(v.xy.yxyx) == 2); |
| CHECK(simd::reduce_and(v) == 0); |
| CHECK(simd::reduce_and(v.zwxy) == 0); |
| CHECK(simd::reduce_and(v.xyz) == 0); |
| CHECK(simd::reduce_and(v.yz) == 2); |
| CHECK(simd::reduce_and(v.xy.yxyx) == 0); |
| CHECK(simd::reduce_or(v) == 7); |
| CHECK(simd::reduce_or(v.zwxy) == 7); |
| CHECK(simd::reduce_or(v.xyz) == 3); |
| CHECK(simd::reduce_or(v.yz) == 3); |
| CHECK(simd::reduce_or(v.xy.yxyx) == 3); |
| } |
| |
| { |
| uint4 v = {1, 2, 3, 4}; |
| CHECK(simd::reduce_add(v) == 10); |
| CHECK(simd::reduce_add(v.zwxy) == 10); |
| CHECK(simd::reduce_add(v.xyz) == 6); |
| CHECK(simd::reduce_add(v.yz) == 5); |
| CHECK(simd::reduce_add(v.xy.yxyx) == 6); |
| CHECK(simd::reduce_min(v) == 1); |
| CHECK(simd::reduce_min(v.zwxy) == 1); |
| CHECK(simd::reduce_min(v.xyz) == 1); |
| CHECK(simd::reduce_min(v.yz) == 2); |
| CHECK(simd::reduce_min(v.xy.yxyx) == 1); |
| CHECK(simd::reduce_max(v) == 4); |
| CHECK(simd::reduce_max(v.zwxy) == 4); |
| CHECK(simd::reduce_max(v.xyz) == 3); |
| CHECK(simd::reduce_max(v.yz) == 3); |
| CHECK(simd::reduce_max(v.xy.yxyx) == 2); |
| CHECK(simd::reduce_and(v) == 0); |
| CHECK(simd::reduce_and(v.zwxy) == 0); |
| CHECK(simd::reduce_and(v.xyz) == 0); |
| CHECK(simd::reduce_and(v.yz) == 2); |
| CHECK(simd::reduce_and(v.xy.yxyx) == 0); |
| CHECK(simd::reduce_or(v) == 7); |
| CHECK(simd::reduce_or(v.zwxy) == 7); |
| CHECK(simd::reduce_or(v.xyz) == 3); |
| CHECK(simd::reduce_or(v.yz) == 3); |
| CHECK(simd::reduce_or(v.xy.yxyx) == 3); |
| } |
| } |
| |
| // Check simd::floor. |
| TEST_CASE("floor", "[simd]") |
| { |
| CHECK_ALL((simd::floor(float4{-1.9f, 1.9f, 2, -2}) == float4{-2, 1, 2, -2})); |
| CHECK_ALL((simd::floor(float2{kInf, -kInf}) == float2{kInf, -kInf})); |
| CHECK_ALL((simd::isnan(simd::floor(float2{kNaN, -kNaN})))); |
| } |
| |
| // Check simd::ceil. |
| TEST_CASE("ceil", "[simd]") |
| { |
| CHECK_ALL((simd::ceil(float4{-1.9f, 1.9f, 2, -2}) == float4{-1, 2, 2, -2})); |
| CHECK_ALL((simd::ceil(float2{kInf, -kInf}) == float2{kInf, -kInf})); |
| CHECK_ALL((simd::isnan(simd::ceil(float2{kNaN, -kNaN})))); |
| } |
| |
| // Check simd::sqrt. |
| TEST_CASE("sqrt", "[simd]") |
| { |
| CHECK_ALL((simd::sqrt(float4{1, 4, 9, 16}) == float4{1, 2, 3, 4})); |
| CHECK_ALL((simd::sqrt(float2{25, 36}) == float2{5, 6})); |
| CHECK_ALL((simd::sqrt(vec<1>{36}) == vec<1>{6})); |
| CHECK_ALL((simd::sqrt(vec<5>{49, 64, 81, 100, 121}) == vec<5>{7, 8, 9, 10, 11})); |
| CHECK_ALL((simd::isnan(simd::sqrt(float4{-1, -kInf, kNaN, -2})))); |
| CHECK_ALL((simd::sqrt(vec<3>{kInf, 0, 1}) == vec<3>{kInf, 0, 1})); |
| } |
| |
| static bool check_fast_acos(float x, float fast_acos_x) |
| { |
| float acosf_x = acosf(x); |
| float error = acosf_x - fast_acos_x; |
| if (!(fabsf(error) <= SIMD_FAST_ACOS_MAX_ERROR)) |
| { |
| auto rad2deg = [](float rad) { return rad * 180 / math::PI; }; |
| fprintf(stderr, |
| "Larger-than-expected error from skvx::fast_acos\n" |
| " x= %f\n" |
| " fast_acos_x= %f (%f degrees\n" |
| " acosf_x= %f (%f degrees\n" |
| " error= %f (%f degrees)\n" |
| " tolerance= %f (%f degrees)\n\n", |
| x, |
| fast_acos_x, |
| rad2deg(fast_acos_x), |
| acosf_x, |
| rad2deg(acosf_x), |
| error, |
| rad2deg(error), |
| SIMD_FAST_ACOS_MAX_ERROR, |
| rad2deg(SIMD_FAST_ACOS_MAX_ERROR)); |
| CHECK(false); |
| return false; |
| } |
| return true; |
| } |
| |
| TEST_CASE("fast_acos", "[simd]") |
| { |
| float4 boundaries = simd::fast_acos(float4{-1, 0, 1, 0}); |
| check_fast_acos(-1, boundaries[0]); |
| check_fast_acos(0, boundaries[1]); |
| check_fast_acos(+1, boundaries[2]); |
| |
| // Select a distribution of starting points around which to begin testing fast_acos. These |
| // fall roughly around the known minimum and maximum errors. No need to include -1, 0, or 1 |
| // since those were just tested above. (Those are tricky because 0 is an inflection and the |
| // derivative is infinite at 1 and -1.) |
| using float8 = vec<8>; |
| float8 x = {-.99f, -.8f, -.4f, -.2f, .2f, .4f, .8f, .99f}; |
| |
| // Converge at the various local minima and maxima of "fast_acos(x) - cosf(x)" and verify that |
| // fast_acos is always within "kTolerance" degrees of the expected answer. |
| float8 err_; |
| for (int iter = 0; iter < 10; ++iter) |
| { |
| // Run our approximate inverse cosine approximation. |
| auto fast_acos_x = simd::fast_acos(x); |
| |
| // Find d/dx(error) |
| // = d/dx(fast_acos(x) - acos(x)) |
| // = (f'g - fg')/gg + 1/sqrt(1 - x^2), [where f = bx^3 + ax, g = dx^4 + cx^2 + 1] |
| float8 xx = x * x; |
| float8 a = -0.939115566365855f; |
| float8 b = 0.9217841528914573f; |
| float8 c = -1.2845906244690837f; |
| float8 d = 0.295624144969963174f; |
| float8 f = (b * xx + a) * x; |
| float8 f_ = 3.f * b * xx + a; |
| float8 g = (d * xx + c) * xx + 1.f; |
| float8 g_ = (4.f * d * xx + 2.f * c) * x; |
| float8 gg = g * g; |
| float8 q = simd::sqrt(1.f - xx); |
| err_ = (f_ * g - f * g_) / gg + 1.f / q; |
| |
| // Find d^2/dx^2(error) |
| // = ((f''g - fg'')g^2 - (f'g - fg')2gg') / g^4 + x(1 - x^2)^(-3/2) |
| // = ((f''g - fg'')g - (f'g - fg')2g') / g^3 + x(1 - x^2)^(-3/2) |
| float8 f__ = 6.f * b * x; |
| float8 g__ = 12.f * d * xx + 2.f * c; |
| float8 err__ = ((f__ * g - f * g__) * g - (f_ * g - f * g_) * 2.f * g_) / (gg * g) + |
| x / ((1.f - xx) * q); |
| |
| #if 0 |
| SkDebugf("\n\niter %i\n", iter); |
| #endif |
| // Ensure each lane's approximation is within maximum error. |
| for (int j = 0; j < 8; ++j) |
| { |
| #if 0 |
| SkDebugf("x=%f err=%f err'=%f err''=%f\n", |
| x[j], rad2deg(skvx::fast_acos_x[j] - acosf(x[j])), |
| rad2deg(err_[j]), rad2deg(err__[j])); |
| #endif |
| if (!check_fast_acos(x[j], fast_acos_x[j])) |
| { |
| return; |
| } |
| } |
| |
| // Use Newton's method to update the x values to locations closer to their local minimum or |
| // maximum. (This is where d/dx(error) == 0.) |
| x -= err_ / err__; |
| x = simd::clamp<float, 8>(x, -.99f, .99f); |
| } |
| |
| // Verify each lane converged to a local minimum or maximum. |
| for (int j = 0; j < 8; ++j) |
| { |
| REQUIRE(math::nearly_zero(err_[j])); |
| } |
| |
| // Make sure we found all the actual known locations of local min/max error. |
| for (float knownRoot : {-0.983536f, -0.867381f, -0.410923f, 0.410923f, 0.867381f, 0.983536f}) |
| { |
| CHECK_ANY((simd::abs(x - knownRoot) < math::EPSILON)); |
| } |
| } |
| |
| TEST_CASE("cast", "[simd]") |
| { |
| float4 f4 = float4{-1.9f, -1.5f, 1.5f, 1.1f}; |
| CHECK(simd::all(simd::cast<int>(f4) == int4{-1, -1, 1, 1})); |
| CHECK(simd::all(simd::cast<int>(simd::floor(f4)) == int4{-2, -2, 1, 1})); |
| CHECK(simd::all(simd::cast<int>(simd::ceil(f4)) == int4{-1, -1, 2, 2})); |
| CHECK(simd::all(simd::cast<int>(simd::ceil(f4.zwxy)) == int4{2, 2, -1, -1})); |
| CHECK(simd::all(simd::cast<int>(simd::ceil(f4).zwxy) == int4{2, 2, -1, -1})); |
| } |
| |
| // Check simd::dot. |
| TEST_CASE("dot", "[simd]") |
| { |
| CHECK(simd::dot(int2{0, 1}, int2{1, 0}) == 0); |
| CHECK(simd::dot(uint2{1, 0}, uint2{0, 1}) == 0); |
| CHECK(simd::dot(int2{1, 1}, int2{1, -1}) == 0); |
| CHECK(simd::dot(uint2{1, 1}, uint2{1, 1}) == 2); |
| CHECK(simd::dot(int2{1, 1}, int2{-1, -1}) == -2); |
| CHECK(simd::dot(ivec<3>{1, 2, -3}, ivec<3>{1, 2, 3}) == -4); |
| CHECK(simd::dot(uvec<3>{1, 2, 3}, uvec<3>{1, 2, 3}) == 14); |
| CHECK(simd::dot(int4{1, 2, 3, 4}, int4{1, 2, 3, 4}) == 30); |
| CHECK(simd::dot(ivec<5>{1, 2, 3, 4, 5}, ivec<5>{1, 2, 3, 4, -5}) == 5); |
| CHECK(simd::dot(uvec<5>{1, 2, 3, 4, 5}, uvec<5>{1, 2, 3, 4, 5}) == 55); |
| |
| CHECK(simd::dot(float4{1, 2, 3, 4}, float4{4, 3, 2, 1}) == 20); |
| CHECK(simd::dot(vec<3>{1, 2, 3}, vec<3>{3, 2, 1}) == 10); |
| CHECK(simd::dot(float2{0, 1}, float2{1, 0}) == 0); |
| CHECK(simd::dot(vec<5>{1, 2, 3, 4, 5}, vec<5>{1, 2, 3, 4, 5}) == 55); |
| } |
| |
| // Check simd::cross. |
| TEST_CASE("cross", "[simd]") |
| { |
| CHECK(simd::cross({0, 1}, {0, 1}) == 0); |
| CHECK(simd::cross({1, 0}, {1, 0}) == 0); |
| CHECK(simd::cross({1, 1}, {1, 1}) == 0); |
| CHECK(simd::cross({1, 1}, {1, -1}) == -2); |
| CHECK(simd::cross({1, 1}, {-1, 1}) == 2); |
| } |
| |
| // Check simd::join |
| TEST_CASE("join", "[simd]") |
| { |
| CHECK_ALL((simd::join(int2{1, 2}, int4{3, 4, 5, 6}) == ivec<6>{1, 2, 3, 4, 5, 6})); |
| CHECK_ALL((simd::join(vec<1>{1}, vec<3>{2, 3, 4}) == float4{1, 2, 3, 4})); |
| CHECK_ALL((simd::join(vec<1>{1}, vec<2>{2, 3}, vec<3>{4, 5, 6}) == vec<6>{1, 2, 3, 4, 5, 6})); |
| CHECK_ALL((simd::join(vec<1>{1}, vec<2>{2, 3}, vec<3>{4, 5, 6}, float4{7, 8, 9, 10}) == |
| vec<10>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10})); |
| uint8x8 a = 3, b = 9, c = 3, d = 100; |
| CHECK_ALL((simd::join(a, b, c, d) == uint8x32{3, 3, 3, 3, 3, 3, 3, 3, 9, 9, 9, |
| 9, 9, 9, 9, 9, 3, 3, 3, 3, 3, 3, |
| 3, 3, 100, 100, 100, 100, 100, 100, 100, 100})); |
| } |
| |
| // Check simd::zip |
| TEST_CASE("zip", "[simd]") |
| { |
| CHECK_ALL((simd::zip(simd::gvec<char, 1>{'a'}, simd::gvec<char, 1>{'b'}) == |
| simd::gvec<char, 2>{'a', 'b'})); |
| CHECK_ALL((simd::zip(int2{1, 2}, int2{3, 4}) == int4{1, 3, 2, 4})); |
| CHECK_ALL((simd::zip(int4{1, 2, 3, 4}, int4{5, 6, 7, 8}) == ivec<8>{1, 5, 2, 6, 3, 7, 4, 8})); |
| CHECK_ALL((simd::zip(simd::gvec<uint8_t, 8>{1, 2, 3, 4, 5, 6, 7, 8}, |
| simd::gvec<uint8_t, 8>{9, 10, 11, 12, 13, 14, 15, 16}) == |
| simd::gvec<uint8_t, 16>{1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16})); |
| CHECK_ALL( |
| (simd::zip(float4{1, 2, 3, 4}, float4{5, 6, 7, 8}) == vec<8>{1, 5, 2, 6, 3, 7, 4, 8})); |
| } |
| |
| template <int N> static vec<N> mix_reference_impl(vec<N> a, vec<N> b, float t) |
| { |
| return a * (1.f - t) + b * t; |
| } |
| template <int N> static vec<N> mix_reference_impl(vec<N> a, vec<N> b, vec<N> t) |
| { |
| return a * (1.f - t) + b * t; |
| } |
| |
| template <typename T, int N> static bool fuzzy_equal(simd::gvec<T, N> a, simd::gvec<T, N> b) |
| { |
| return simd::all(b - a < 1e-4f); |
| } |
| |
| static float frand() |
| { |
| float kMaxBelow1 = math::bit_cast<float>(math::bit_cast<uint32_t>(1.f) - 1); |
| float f = static_cast<float>(rand()) / static_cast<float>(RAND_MAX); |
| return std::min(kMaxBelow1, f); |
| } |
| template <int N> vec<N> vrand() |
| { |
| vec<N> vrand{}; |
| for (int i = 0; i < N; ++i) |
| { |
| vrand[i] = frand(); |
| } |
| return vrand; |
| } |
| |
| template <int N> void check_mix() |
| { |
| vec<N> a = vrand<N>(); |
| vec<N> b = vrand<N>(); |
| float t = frand(); |
| CHECK(fuzzy_equal(simd::mix(a, b, vec<N>(t)), mix_reference_impl(a, b, t))); |
| CHECK(fuzzy_equal(simd::precise_mix(a, b, vec<N>(t)), mix_reference_impl(a, b, t))); |
| vec<N> tt = vrand<N>(); |
| CHECK(fuzzy_equal(simd::mix(a, b, tt), mix_reference_impl(a, b, tt))); |
| CHECK(fuzzy_equal(simd::precise_mix(a, b, tt), mix_reference_impl(a, b, tt))); |
| } |
| |
| // Check simd::mix |
| TEST_CASE("mix", "[simd]") |
| { |
| srand(0); |
| check_mix<1>(); |
| check_mix<2>(); |
| check_mix<3>(); |
| check_mix<4>(); |
| check_mix<5>(); |
| CHECK_ALL((simd::mix(float4{1, 2, 3, 4}, float4{5, 6, 7, 8}, float4(0)) == float4{1, 2, 3, 4})); |
| CHECK_ALL((simd::precise_mix(float4{-1, 2, 3, 4}, float4{5, 6, 7, 8}, float4(0)) == |
| float4{-1, 2, 3, 4})); |
| CHECK_ALL((simd::precise_mix(float4{1, 2, 3, 4}, float4{5, -6, 7, -8}, float4(1)) == |
| float4{5, -6, 7, -8})); |
| } |
| |
| // Check simd::load4x4f |
| TEST_CASE("load4x4f", "[simd]") |
| { |
| // Column major. |
| float m[16] = {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}; |
| auto c = simd::load4x4f(m); |
| CHECK(simd::all(std::get<0>(c) == float4{0, 1, 2, 3})); |
| CHECK(simd::all(std::get<1>(c) == float4{4, 5, 6, 7})); |
| CHECK(simd::all(std::get<2>(c) == float4{8, 9, 10, 11})); |
| CHECK(simd::all(std::get<3>(c) == float4{12, 13, 14, 15})); |
| } |
| |
| } // namespace rive |