blob: 20660605a4d036982c1d652234827c6a9df33831 [file] [log] [blame]
/*
* Copyright 2022 Rive
*/
// This header provides a fallback gvec<> implementation for when we don't have gcc/clang vector
// extensions. Swizzles are implemented as unions, which is questionably undefined due to the
// "active member" C++ restriction on unions, however, since the members all have the same
// underlying type, this is a gray area. See:
//
// https://stackoverflow.com/questions/11373203/accessing-inactive-union-member-and-undefined-behavior
//
// This works in Visual Studio, which is the main reason for having this header.
#ifndef _RIVE_SIMD_GVEC_POLYFILL_HPP_
#define _RIVE_SIMD_GVEC_POLYFILL_HPP_
#include <algorithm>
#include <initializer_list>
#include <stdint.h>
namespace rive
{
namespace simd
{
using Swizzle = uint32_t;
constexpr static Swizzle PackSwizzle2(uint32_t sourceVectorLength, uint32_t i0, uint32_t i1)
{
return (i1 << 5) | (i0 << 3) | sourceVectorLength;
}
constexpr static Swizzle
PackSwizzle4(uint32_t sourceVectorLength, uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3)
{
return (i3 << 9) | (i2 << 7) | PackSwizzle2(sourceVectorLength, i0, i1);
}
constexpr static uint32_t UnpackSwizzleSourceVectorLength(Swizzle swizzle) { return swizzle & 7; }
constexpr static uint32_t UnpackSwizzleIdx(Swizzle swizzle, uint32_t i)
{
return (swizzle >> (i * 2 + 3)) & 3;
}
template <typename T, int N, Swizzle Z = 0> struct gvec
{
T operator[](size_t i) const { return data[UnpackSwizzleIdx(Z, i)]; }
T& operator[](size_t i) { return data[UnpackSwizzleIdx(Z, i)]; }
operator gvec<T, N>() const
{
gvec<T, N> ret;
for (int i = 0; i < N; ++i)
ret[i] = (*this)[i];
return ret;
}
T data[UnpackSwizzleSourceVectorLength(Z)];
};
template <typename T, int N> struct gvec_data
{
T data[N];
};
template <typename T> struct gvec_data<T, 1>
{
union
{
T data[1];
T x;
};
};
template <typename T> struct gvec_data<T, 2>
{
union
{
T data[2];
struct
{
T x, y;
};
gvec<T, 2, PackSwizzle2(2, 1, 0)> yx;
gvec<T, 4, PackSwizzle4(2, 0, 1, 0, 1)> xyxy;
gvec<T, 4, PackSwizzle4(2, 1, 0, 1, 0)> yxyx;
};
};
template <typename T> struct gvec_data<T, 3>
{
union
{
T data[2];
struct
{
T x, y, z;
};
};
};
template <typename T> struct gvec_data<T, 4>
{
union
{
T data[4];
gvec<T, 3> xyz;
struct
{
gvec<T, 2> xy, zw;
};
struct
{
T x;
union
{
gvec<T, 3> yzw;
gvec<T, 2> yz;
struct
{
T y, z, w;
};
};
};
// **WARNING**!! Only add swizzles that include ALL components of the vector. Since these
// types are POD, it's not possible to overwrite their default operator=, and their default
// operator= is just a memcpy. So: "float.xz = float4.xz" would also assign y and w.
gvec<T, 4, PackSwizzle4(4, 1, 0, 3, 2)> yxwz;
gvec<T, 4, PackSwizzle4(4, 2, 3, 0, 1)> zwxy;
};
};
template <typename T, int N> struct gvec<T, N, 0> : public gvec_data<T, N>
{
gvec() = default;
gvec(T val)
{
for (int i = 0; i < N; ++i)
gvec_data<T, N>::data[i] = val;
}
gvec(std::initializer_list<T> vals)
{
memset(gvec_data<T, N>::data, 0, sizeof(gvec_data<T, N>::data));
std::copy(vals.begin(),
vals.begin() + std::min<size_t>(vals.size(), N),
gvec_data<T, N>::data);
}
T operator[](size_t i) const { return gvec_data<T, N>::data[i]; }
T& operator[](size_t i) { return gvec_data<T, N>::data[i]; }
};
static_assert(sizeof(gvec<float, 1>) == 4, "gvec<1> is expected to be tightly packed");
static_assert(sizeof(gvec<float, 2>) == 8, "gvec<2> is expected to be tightly packed");
static_assert(sizeof(gvec<float, 4>) == 16, "gvec<4> is expected to be tightly packed");
#define DECL_UNARY_OP(_OP_) \
template <typename T, int N, Swizzle Z> gvec<T, N> operator _OP_(gvec<T, N, Z> x) \
{ \
gvec<T, N> ret; \
for (int i = 0; i < N; ++i) \
ret[i] = _OP_ x[i]; \
return ret; \
}
DECL_UNARY_OP(+)
DECL_UNARY_OP(-)
DECL_UNARY_OP(~)
#undef DECL_UNARY_OP
#define DECL_ARITHMETIC_OP(_OP_) \
template <typename T, int N, Swizzle Z0, Swizzle Z1> \
gvec<T, N, Z0>& operator _OP_##=(gvec<T, N, Z0>& a, gvec<T, N, Z1> b) \
{ \
for (int i = 0; i < N; ++i) \
a[i] _OP_## = b[i]; \
return a; \
} \
template <typename T, int N, Swizzle Z> gvec<T, N, Z>& operator _OP_##=(gvec<T, N, Z>& a, T b) \
{ \
for (int i = 0; i < N; ++i) \
a[i] _OP_## = b; \
return a; \
} \
template <typename T, int N, Swizzle Z0, Swizzle Z1> \
gvec<T, N> operator _OP_(gvec<T, N, Z0> a, gvec<T, N, Z1> b) \
{ \
gvec<T, N> ret; \
for (int i = 0; i < N; ++i) \
ret[i] = a[i] _OP_ b[i]; \
return ret; \
} \
template <typename T, int N, Swizzle Z> gvec<T, N> operator _OP_(gvec<T, N, Z> a, T b) \
{ \
gvec<T, N> ret; \
for (int i = 0; i < N; ++i) \
ret[i] = a[i] _OP_ b; \
return ret; \
} \
template <typename T, int N, Swizzle Z> gvec<T, N> operator _OP_(T a, gvec<T, N, Z> b) \
{ \
gvec<T, N> ret; \
for (int i = 0; i < N; ++i) \
ret[i] = a _OP_ b[i]; \
return ret; \
}
DECL_ARITHMETIC_OP(+);
DECL_ARITHMETIC_OP(-);
DECL_ARITHMETIC_OP(*);
DECL_ARITHMETIC_OP(/);
DECL_ARITHMETIC_OP(|);
DECL_ARITHMETIC_OP(&);
DECL_ARITHMETIC_OP(^);
DECL_ARITHMETIC_OP(<<);
DECL_ARITHMETIC_OP(>>);
#undef DECL_ARITHMETIC_OP
#define DECL_BOOLEAN_OP(_OP_) \
template <typename T, int N, Swizzle Z0, Swizzle Z1> \
gvec<int32_t, N> operator _OP_(gvec<T, N, Z0> a, gvec<T, N, Z1> b) \
{ \
gvec<int32_t, N> ret; \
for (int i = 0; i < N; ++i) \
ret[i] = a[i] _OP_ b[i] ? ~0 : 0; \
return ret; \
} \
template <typename T, int N, Swizzle Z> gvec<int32_t, N> operator _OP_(gvec<T, N, Z> a, T b) \
{ \
gvec<int32_t, N> ret; \
for (int i = 0; i < N; ++i) \
ret[i] = a[i] _OP_ b ? ~0 : 0; \
return ret; \
} \
template <typename T, int N, Swizzle Z> gvec<int32_t, N> operator _OP_(T a, gvec<T, N, Z> b) \
{ \
gvec<int32_t, N> ret; \
for (int i = 0; i < N; ++i) \
ret[i] = a _OP_ b[i] ? ~0 : 0; \
return ret; \
}
DECL_BOOLEAN_OP(==)
DECL_BOOLEAN_OP(!=)
DECL_BOOLEAN_OP(<)
DECL_BOOLEAN_OP(<=)
DECL_BOOLEAN_OP(>)
DECL_BOOLEAN_OP(>=)
DECL_BOOLEAN_OP(&&)
DECL_BOOLEAN_OP(||)
#undef DECL_BOOLEAN_OP
#define ENABLE_SWIZZLE1(F) \
template <typename T, int N, Swizzle Z0> gvec<T, N> F(gvec<T, N, Z0> x) \
{ \
return F((gvec<T, N>)x); \
}
#define ENABLE_SWIZZLE_REDUCE(F) \
template <typename T, int N, Swizzle Z0> T F(gvec<T, N, Z0> x) { return F((gvec<T, N>)x); }
#define ENABLE_SWIZZLE1F(F) \
template <int N, Swizzle Z0> gvec<float, N> F(gvec<float, N, Z0> x) \
{ \
return F((gvec<float, N>)x); \
}
#define ENABLE_SWIZZLE1B(F) \
template <int N, Swizzle Z0> bool F(gvec<int32_t, N, Z0> x) { return F((gvec<int32_t, N>)x); }
#define ENABLE_SWIZZLEUT(F) \
template <typename T, typename U, int N, Swizzle Z0> gvec<U, N> F(gvec<T, N, Z0> x) \
{ \
return F((gvec<T, N>)x); \
}
#define ENABLE_SWIZZLE2(F) \
template <typename T, int N, Swizzle Z0, Swizzle Z1> \
gvec<T, N> F(gvec<T, N, Z0> a, gvec<T, N, Z1> b) \
{ \
return F((gvec<T, N>)a, (gvec<T, N>)b); \
}
#define ENABLE_SWIZZLE3(F) \
template <typename T, int N, Swizzle Z0, Swizzle Z1, Swizzle Z2> \
gvec<T, N> F(gvec<T, N, Z0> a, gvec<T, N, Z1> b, gvec<T, N, Z2> c) \
{ \
return F((gvec<T, N>)a, (gvec<T, N>)b, (gvec<T, N>)c); \
}
#define ENABLE_SWIZZLE3F(F) \
template <int N, Swizzle Z0, Swizzle Z1, Swizzle Z2> \
gvec<float, N> F(gvec<float, N, Z0> a, gvec<float, N, Z1> b, gvec<float, N, Z2> c) \
{ \
return F((gvec<float, N>)a, (gvec<float, N>)b, (gvec<float, N>)c); \
}
#define ENABLE_SWIZZLE3IT(F) \
template <typename T, int N, Swizzle Z0, Swizzle Z1, Swizzle Z2> \
gvec<T, N> F(gvec<int32_t, N, Z0> a, gvec<T, N, Z1> b, gvec<T, N, Z2> c) \
{ \
return F((gvec<int32_t, N>)a, (gvec<T, N>)b, (gvec<T, N>)c); \
}
ENABLE_SWIZZLE1(abs)
ENABLE_SWIZZLE_REDUCE(sum)
ENABLE_SWIZZLE1F(floor)
ENABLE_SWIZZLE1F(ceil)
ENABLE_SWIZZLE1F(sqrt)
ENABLE_SWIZZLE1F(fast_acos)
ENABLE_SWIZZLE1B(any)
ENABLE_SWIZZLE1B(all)
ENABLE_SWIZZLE2(min)
ENABLE_SWIZZLE2(max)
ENABLE_SWIZZLE3(clamp)
ENABLE_SWIZZLE3F(mix)
ENABLE_SWIZZLE3IT(if_then_else)
template <typename T, int N, Swizzle Z> void store(void* dst, gvec<T, N, Z> vec)
{
store(dst, (gvec<T, N>)vec);
}
template <typename U, typename T, int N, Swizzle Z> gvec<U, N> cast(gvec<T, N, Z> x)
{
return cast<U>((gvec<T, N>)x);
}
#undef ENABLE_SWIZZLE1
#undef ENABLE_SWIZZLE_REDUCE
#undef ENABLE_SWIZZLE1F
#undef ENABLE_SWIZZLE1B
#undef ENABLE_SWIZZLEUT
#undef ENABLE_SWIZZLE2
#undef ENABLE_SWIZZLE3
#undef ENABLE_SWIZZLE3F
#undef ENABLE_SWIZZLE3IT
} // namespace simd
} // namespace rive
#endif