/*
 * Copyright 2022 Rive
 */

// This header provides a fallback gvec<> implementation for when we don't have gcc/clang vector
// extensions. Swizzles are implemented as unions, which is questionably undefined due to the
// "active member" C++ restriction on unions, however, since the members all have the same
// underlying type, this is a gray area. See:
//
// https://stackoverflow.com/questions/11373203/accessing-inactive-union-member-and-undefined-behavior
//
// This works in Visual Studio, which is the main reason for having this header.

#ifndef _RIVE_SIMD_GVEC_POLYFILL_HPP_
#define _RIVE_SIMD_GVEC_POLYFILL_HPP_

#include <algorithm>
#include <initializer_list>
#include <stdint.h>

namespace rive
{
namespace simd
{
using Swizzle = uint32_t;
constexpr static Swizzle PackSwizzle2(uint32_t sourceVectorLength, uint32_t i0, uint32_t i1)
{
    return (i1 << 5) | (i0 << 3) | sourceVectorLength;
}
constexpr static Swizzle
PackSwizzle4(uint32_t sourceVectorLength, uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3)
{
    return (i3 << 9) | (i2 << 7) | PackSwizzle2(sourceVectorLength, i0, i1);
}
constexpr static uint32_t UnpackSwizzleSourceVectorLength(Swizzle swizzle) { return swizzle & 7; }
constexpr static uint32_t UnpackSwizzleIdx(Swizzle swizzle, uint32_t i)
{
    return (swizzle >> (i * 2 + 3)) & 3;
}

template <typename T, int N, Swizzle Z = 0> struct gvec
{
    T operator[](size_t i) const { return data[UnpackSwizzleIdx(Z, i)]; }
    T& operator[](size_t i) { return data[UnpackSwizzleIdx(Z, i)]; }
    operator gvec<T, N>() const
    {
        gvec<T, N> ret;
        for (int i = 0; i < N; ++i)
            ret[i] = (*this)[i];
        return ret;
    }
    T data[UnpackSwizzleSourceVectorLength(Z)];
};

template <typename T, int N> struct gvec_data
{
    T data[N];
};

template <typename T> struct gvec_data<T, 1>
{
    union
    {
        T data[1];
        T x;
    };
};

template <typename T> struct gvec_data<T, 2>
{
    union
    {
        T data[2];
        struct
        {
            T x, y;
        };
        gvec<T, 2, PackSwizzle2(2, 1, 0)> yx;
        gvec<T, 4, PackSwizzle4(2, 0, 1, 0, 1)> xyxy;
        gvec<T, 4, PackSwizzle4(2, 1, 0, 1, 0)> yxyx;
    };
};

template <typename T> struct gvec_data<T, 3>
{
    union
    {
        T data[2];
        struct
        {
            T x, y, z;
        };
    };
};

template <typename T> struct gvec_data<T, 4>
{
    union
    {
        T data[4];
        gvec<T, 3> xyz;
        struct
        {
            gvec<T, 2> xy, zw;
        };
        struct
        {
            T x;
            union
            {
                gvec<T, 3> yzw;
                gvec<T, 2> yz;
                struct
                {
                    T y, z, w;
                };
            };
        };
        // **WARNING**!! Only add swizzles that include ALL components of the vector. Since these
        // types are POD, it's not possible to overwrite their default operator=, and their default
        // operator= is just a memcpy. So: "float.xz = float4.xz" would also assign y and w.
        gvec<T, 4, PackSwizzle4(4, 1, 0, 3, 2)> yxwz;
        gvec<T, 4, PackSwizzle4(4, 2, 3, 0, 1)> zwxy;
    };
};

template <typename T, int N> struct gvec<T, N, 0> : public gvec_data<T, N>
{
    gvec() = default;
    gvec(T val)
    {
        for (int i = 0; i < N; ++i)
            gvec_data<T, N>::data[i] = val;
    }
    gvec(std::initializer_list<T> vals)
    {
        memset(gvec_data<T, N>::data, 0, sizeof(gvec_data<T, N>::data));
        std::copy(vals.begin(),
                  vals.begin() + std::min<size_t>(vals.size(), N),
                  gvec_data<T, N>::data);
    }
    T operator[](size_t i) const { return gvec_data<T, N>::data[i]; }
    T& operator[](size_t i) { return gvec_data<T, N>::data[i]; }
};

static_assert(sizeof(gvec<float, 1>) == 4, "gvec<1> is expected to be tightly packed");
static_assert(sizeof(gvec<float, 2>) == 8, "gvec<2> is expected to be tightly packed");
static_assert(sizeof(gvec<float, 4>) == 16, "gvec<4> is expected to be tightly packed");

#define DECL_UNARY_OP(_OP_)                                                                        \
    template <typename T, int N, Swizzle Z> gvec<T, N> operator _OP_(gvec<T, N, Z> x)              \
    {                                                                                              \
        gvec<T, N> ret;                                                                            \
        for (int i = 0; i < N; ++i)                                                                \
            ret[i] = _OP_ x[i];                                                                    \
        return ret;                                                                                \
    }

DECL_UNARY_OP(+)
DECL_UNARY_OP(-)
DECL_UNARY_OP(~)

#undef DECL_UNARY_OP

#define DECL_ARITHMETIC_OP(_OP_)                                                                   \
    template <typename T, int N, Swizzle Z0, Swizzle Z1>                                           \
    gvec<T, N, Z0>& operator _OP_##=(gvec<T, N, Z0>& a, gvec<T, N, Z1> b)                          \
    {                                                                                              \
        for (int i = 0; i < N; ++i)                                                                \
            a[i] _OP_## = b[i];                                                                    \
        return a;                                                                                  \
    }                                                                                              \
    template <typename T, int N, Swizzle Z> gvec<T, N, Z>& operator _OP_##=(gvec<T, N, Z>& a, T b) \
    {                                                                                              \
        for (int i = 0; i < N; ++i)                                                                \
            a[i] _OP_## = b;                                                                       \
        return a;                                                                                  \
    }                                                                                              \
    template <typename T, int N, Swizzle Z0, Swizzle Z1>                                           \
    gvec<T, N> operator _OP_(gvec<T, N, Z0> a, gvec<T, N, Z1> b)                                   \
    {                                                                                              \
        gvec<T, N> ret;                                                                            \
        for (int i = 0; i < N; ++i)                                                                \
            ret[i] = a[i] _OP_ b[i];                                                               \
        return ret;                                                                                \
    }                                                                                              \
    template <typename T, int N, Swizzle Z> gvec<T, N> operator _OP_(gvec<T, N, Z> a, T b)         \
    {                                                                                              \
        gvec<T, N> ret;                                                                            \
        for (int i = 0; i < N; ++i)                                                                \
            ret[i] = a[i] _OP_ b;                                                                  \
        return ret;                                                                                \
    }                                                                                              \
    template <typename T, int N, Swizzle Z> gvec<T, N> operator _OP_(T a, gvec<T, N, Z> b)         \
    {                                                                                              \
        gvec<T, N> ret;                                                                            \
        for (int i = 0; i < N; ++i)                                                                \
            ret[i] = a _OP_ b[i];                                                                  \
        return ret;                                                                                \
    }

DECL_ARITHMETIC_OP(+);
DECL_ARITHMETIC_OP(-);
DECL_ARITHMETIC_OP(*);
DECL_ARITHMETIC_OP(/);
DECL_ARITHMETIC_OP(|);
DECL_ARITHMETIC_OP(&);
DECL_ARITHMETIC_OP(^);

#undef DECL_ARITHMETIC_OP

#define DECL_BOOLEAN_OP(_OP_)                                                                      \
    template <typename T, int N, Swizzle Z0, Swizzle Z1>                                           \
    gvec<int32_t, N> operator _OP_(gvec<T, N, Z0> a, gvec<T, N, Z1> b)                             \
    {                                                                                              \
        gvec<int32_t, N> ret;                                                                      \
        for (int i = 0; i < N; ++i)                                                                \
            ret[i] = a[i] _OP_ b[i] ? ~0 : 0;                                                      \
        return ret;                                                                                \
    }                                                                                              \
    template <typename T, int N, Swizzle Z> gvec<int32_t, N> operator _OP_(gvec<T, N, Z> a, T b)   \
    {                                                                                              \
        gvec<int32_t, N> ret;                                                                      \
        for (int i = 0; i < N; ++i)                                                                \
            ret[i] = a[i] _OP_ b ? ~0 : 0;                                                         \
        return ret;                                                                                \
    }                                                                                              \
    template <typename T, int N, Swizzle Z> gvec<int32_t, N> operator _OP_(T a, gvec<T, N, Z> b)   \
    {                                                                                              \
        gvec<int32_t, N> ret;                                                                      \
        for (int i = 0; i < N; ++i)                                                                \
            ret[i] = a _OP_ b[i] ? ~0 : 0;                                                         \
        return ret;                                                                                \
    }

DECL_BOOLEAN_OP(==)
DECL_BOOLEAN_OP(!=)
DECL_BOOLEAN_OP(<)
DECL_BOOLEAN_OP(<=)
DECL_BOOLEAN_OP(>)
DECL_BOOLEAN_OP(>=)
DECL_BOOLEAN_OP(&&)
DECL_BOOLEAN_OP(||)

#undef DECL_BOOLEAN_OP

#define ENABLE_SWIZZLE1(F)                                                                         \
    template <typename T, int N, Swizzle Z0> gvec<T, N> F(gvec<T, N, Z0> x)                        \
    {                                                                                              \
        return F((gvec<T, N>)x);                                                                   \
    }
#define ENABLE_SWIZZLE1F(F)                                                                        \
    template <int N, Swizzle Z0> gvec<float, N> F(gvec<float, N, Z0> x)                            \
    {                                                                                              \
        return F((gvec<float, N>)x);                                                               \
    }
#define ENABLE_SWIZZLE1B(F)                                                                        \
    template <int N, Swizzle Z0> bool F(gvec<int32_t, N, Z0> x) { return F((gvec<int32_t, N>)x); }
#define ENABLE_SWIZZLEUT(F)                                                                        \
    template <typename T, typename U, int N, Swizzle Z0> gvec<U, N> F(gvec<T, N, Z0> x)            \
    {                                                                                              \
        return F((gvec<T, N>)x);                                                                   \
    }
#define ENABLE_SWIZZLE2(F)                                                                         \
    template <typename T, int N, Swizzle Z0, Swizzle Z1>                                           \
    gvec<T, N> F(gvec<T, N, Z0> a, gvec<T, N, Z1> b)                                               \
    {                                                                                              \
        return F((gvec<T, N>)a, (gvec<T, N>)b);                                                    \
    }
#define ENABLE_SWIZZLE3(F)                                                                         \
    template <typename T, int N, Swizzle Z0, Swizzle Z1, Swizzle Z2>                               \
    gvec<T, N> F(gvec<T, N, Z0> a, gvec<T, N, Z1> b, gvec<T, N, Z2> c)                             \
    {                                                                                              \
        return F((gvec<T, N>)a, (gvec<T, N>)b, (gvec<T, N>)c);                                     \
    }
#define ENABLE_SWIZZLE3F(F)                                                                        \
    template <int N, Swizzle Z0, Swizzle Z1, Swizzle Z2>                                           \
    gvec<float, N> F(gvec<float, N, Z0> a, gvec<float, N, Z1> b, gvec<float, N, Z2> c)             \
    {                                                                                              \
        return F((gvec<float, N>)a, (gvec<float, N>)b, (gvec<float, N>)c);                         \
    }
#define ENABLE_SWIZZLE3IT(F)                                                                       \
    template <typename T, int N, Swizzle Z0, Swizzle Z1, Swizzle Z2>                               \
    gvec<T, N> F(gvec<int32_t, N, Z0> a, gvec<T, N, Z1> b, gvec<T, N, Z2> c)                       \
    {                                                                                              \
        return F((gvec<int32_t, N>)a, (gvec<T, N>)b, (gvec<T, N>)c);                               \
    }

ENABLE_SWIZZLE1(abs)
ENABLE_SWIZZLE1F(floor)
ENABLE_SWIZZLE1F(ceil)
ENABLE_SWIZZLE1F(sqrt)
ENABLE_SWIZZLE1F(fast_acos)
ENABLE_SWIZZLE1B(any)
ENABLE_SWIZZLE1B(all)
ENABLE_SWIZZLE2(min)
ENABLE_SWIZZLE2(max)
ENABLE_SWIZZLE3(clamp)
ENABLE_SWIZZLE3F(mix)
ENABLE_SWIZZLE3IT(if_then_else)
template <typename T, int N, Swizzle Z> void store(void* dst, gvec<T, N, Z> vec)
{
    store(dst, (gvec<T, N>)vec);
}

#undef ENABLE_SWIZZLE1
#undef ENABLE_SWIZZLE1F
#undef ENABLE_SWIZZLE1B
#undef ENABLE_SWIZZLEUT
#undef ENABLE_SWIZZLE2
#undef ENABLE_SWIZZLE3
#undef ENABLE_SWIZZLE3F
#undef ENABLE_SWIZZLE3IT
} // namespace simd
} // namespace rive

#endif
