feat(scripting): first-class Mat4 type with SIMD multiply (#12445) a076a8abde Co-authored-by: Luigi Rosso <luigi-rosso@users.noreply.github.com>
diff --git a/.rive_head b/.rive_head index 1fa9a0b..1bf2a3e 100644 --- a/.rive_head +++ b/.rive_head
@@ -1 +1 @@ -66b9d31e8ff505fe8c9800e865e933a452abc5b4 +a076a8abde3ac7c5af06e53cc55f0fedfb6b4bf4
diff --git a/include/rive/lua/rive_lua_libs.hpp b/include/rive/lua/rive_lua_libs.hpp index a79dc66..3527173 100644 --- a/include/rive/lua/rive_lua_libs.hpp +++ b/include/rive/lua/rive_lua_libs.hpp
@@ -10,6 +10,7 @@ #include "rive/math/raw_path.hpp" #include "rive/renderer.hpp" #include "rive/math/vec2d.hpp" +#include "rive/math/mat4.hpp" #include "rive/math/contour_measure.hpp" #include "rive/math/path_measure.hpp" #include "rive/shapes/paint/image_sampler.hpp" @@ -332,6 +333,12 @@ // Image decode decodeImage, + + // Mat4 + transpose, + transformPoint, + transformVec4, + writeToBuffer, }; struct ScriptedMat2D @@ -354,6 +361,21 @@ static_assert(std::is_trivially_destructible<ScriptedMat2D>::value, "ScriptedMat2D must be trivially destructible"); +struct ScriptedMat4 +{ + static constexpr uint8_t luaTag = LUA_T_COUNT + 62; + static constexpr const char* luaName = "Mat4"; + static constexpr bool hasMetatable = true; + + ScriptedMat4() {} + ScriptedMat4(const Mat4& mat) : value(mat) {} + + rive::Mat4 value; +}; + +static_assert(std::is_trivially_destructible<ScriptedMat4>::value, + "ScriptedMat4 must be trivially destructible"); + class ScriptedPathCommand { public:
diff --git a/include/rive/math/mat4.hpp b/include/rive/math/mat4.hpp new file mode 100644 index 0000000..3b0351d --- /dev/null +++ b/include/rive/math/mat4.hpp
@@ -0,0 +1,288 @@ +#ifndef _RIVE_MAT4_HPP_ +#define _RIVE_MAT4_HPP_ + +#include "rive/math/simd.hpp" +#include "rive/math/vec2d.hpp" +#include <array> +#include <cmath> +#include <cstddef> + +namespace rive +{ +// Column-major 4x4 single-precision matrix. The 64-byte storage can be +// uploaded directly to a GPU uniform buffer. +// +// Column 0 = m[0..3], Column 1 = m[4..7], Column 2 = m[8..11], Column 3 = +// m[12..15]. +class Mat4 +{ +public: + constexpr Mat4() : + m_buffer{{1.f, + 0.f, + 0.f, + 0.f, + 0.f, + 1.f, + 0.f, + 0.f, + 0.f, + 0.f, + 1.f, + 0.f, + 0.f, + 0.f, + 0.f, + 1.f}} + {} + + constexpr Mat4(float c0x, + float c0y, + float c0z, + float c0w, + float c1x, + float c1y, + float c1z, + float c1w, + float c2x, + float c2y, + float c2z, + float c2w, + float c3x, + float c3y, + float c3z, + float c3w) : + m_buffer{{c0x, + c0y, + c0z, + c0w, + c1x, + c1y, + c1z, + c1w, + c2x, + c2y, + c2z, + c2w, + c3x, + c3y, + c3z, + c3w}} + {} + + const float* values() const { return m_buffer.data(); } + float* values() { return m_buffer.data(); } + + float& operator[](size_t i) { return m_buffer[i]; } + float operator[](size_t i) const { return m_buffer[i]; } + + static Mat4 identity() { return Mat4(); } + + static Mat4 fromTranslation(float x, float y, float z) + { + Mat4 m; + m.m_buffer[12] = x; + m.m_buffer[13] = y; + m.m_buffer[14] = z; + return m; + } + + static Mat4 fromScale(float sx, float sy, float sz) + { + Mat4 m; + m.m_buffer[0] = sx; + m.m_buffer[5] = sy; + m.m_buffer[10] = sz; + return m; + } + + static Mat4 fromRotationX(float rad) + { + float c = std::cos(rad), s = std::sin(rad); + Mat4 m; + m.m_buffer[5] = c; + m.m_buffer[6] = s; + m.m_buffer[9] = -s; + m.m_buffer[10] = c; + return m; + } + + static Mat4 fromRotationY(float rad) + { + float c = std::cos(rad), s = std::sin(rad); + Mat4 m; + m.m_buffer[0] = c; + m.m_buffer[2] = -s; + m.m_buffer[8] = s; + m.m_buffer[10] = c; + return m; + } + + static Mat4 fromRotationZ(float rad) + { + float c = std::cos(rad), s = std::sin(rad); + Mat4 m; + m.m_buffer[0] = c; + m.m_buffer[1] = s; + m.m_buffer[4] = -s; + m.m_buffer[5] = c; + return m; + } + + // Right-handed perspective. Maps view-space z=[-near, -far] to NDC z in + // either [0, 1] (default, depthZeroToOne=true) or [-1, 1]. + static Mat4 perspective(float fovYRadians, + float aspect, + float near_, + float far_, + bool depthZeroToOne = true) + { + float f = 1.f / std::tan(fovYRadians * 0.5f); + float nf = 1.f / (near_ - far_); + Mat4 m{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + m.m_buffer[0] = f / aspect; + m.m_buffer[5] = f; + if (depthZeroToOne) + { + m.m_buffer[10] = far_ * nf; + m.m_buffer[14] = far_ * near_ * nf; + } + else + { + m.m_buffer[10] = (far_ + near_) * nf; + m.m_buffer[14] = 2.f * far_ * near_ * nf; + } + m.m_buffer[11] = -1.f; + return m; + } + + // SIMD: out = lhs * rhs. Both column-major. + static Mat4 multiply(const Mat4& lhs, const Mat4& rhs) + { + // Each output column j is a linear combination of lhs's columns + // weighted by rhs's column j. + const float* L = lhs.m_buffer.data(); + const float* R = rhs.m_buffer.data(); + float4 c0 = simd::load4f(L); + float4 c1 = simd::load4f(L + 4); + float4 c2 = simd::load4f(L + 8); + float4 c3 = simd::load4f(L + 12); + + Mat4 out; + for (int j = 0; j < 4; ++j) + { + const float* rcol = R + j * 4; + float4 result = + c0 * rcol[0] + c1 * rcol[1] + c2 * rcol[2] + c3 * rcol[3]; + simd::store(out.m_buffer.data() + j * 4, result); + } + return out; + } + + Mat4 operator*(const Mat4& rhs) const { return multiply(*this, rhs); } + + // SIMD: out = M * (x, y, z, w). Returns a 4-component vector (xyzw). + void transformVec4(float out[4], float x, float y, float z, float w) const + { + float4 c0 = simd::load4f(m_buffer.data()); + float4 c1 = simd::load4f(m_buffer.data() + 4); + float4 c2 = simd::load4f(m_buffer.data() + 8); + float4 c3 = simd::load4f(m_buffer.data() + 12); + simd::store(out, c0 * x + c1 * y + c2 * z + c3 * w); + } + + Mat4 transposed() const + { + Mat4 t; + for (int r = 0; r < 4; ++r) + for (int c = 0; c < 4; ++c) + t.m_buffer[r * 4 + c] = m_buffer[c * 4 + r]; + return t; + } + + // Returns true and writes inverse if invertible. Otherwise returns false + // and `result` is unchanged. Cofactor method. + bool invert(Mat4* result) const + { + const float* m = m_buffer.data(); + float inv[16]; + + inv[0] = m[5] * m[10] * m[15] - m[5] * m[11] * m[14] - + m[9] * m[6] * m[15] + m[9] * m[7] * m[14] + + m[13] * m[6] * m[11] - m[13] * m[7] * m[10]; + inv[4] = -m[4] * m[10] * m[15] + m[4] * m[11] * m[14] + + m[8] * m[6] * m[15] - m[8] * m[7] * m[14] - + m[12] * m[6] * m[11] + m[12] * m[7] * m[10]; + inv[8] = m[4] * m[9] * m[15] - m[4] * m[11] * m[13] - + m[8] * m[5] * m[15] + m[8] * m[7] * m[13] + + m[12] * m[5] * m[11] - m[12] * m[7] * m[9]; + inv[12] = -m[4] * m[9] * m[14] + m[4] * m[10] * m[13] + + m[8] * m[5] * m[14] - m[8] * m[6] * m[13] - + m[12] * m[5] * m[10] + m[12] * m[6] * m[9]; + inv[1] = -m[1] * m[10] * m[15] + m[1] * m[11] * m[14] + + m[9] * m[2] * m[15] - m[9] * m[3] * m[14] - + m[13] * m[2] * m[11] + m[13] * m[3] * m[10]; + inv[5] = m[0] * m[10] * m[15] - m[0] * m[11] * m[14] - + m[8] * m[2] * m[15] + m[8] * m[3] * m[14] + + m[12] * m[2] * m[11] - m[12] * m[3] * m[10]; + inv[9] = -m[0] * m[9] * m[15] + m[0] * m[11] * m[13] + + m[8] * m[1] * m[15] - m[8] * m[3] * m[13] - + m[12] * m[1] * m[11] + m[12] * m[3] * m[9]; + inv[13] = m[0] * m[9] * m[14] - m[0] * m[10] * m[13] - + m[8] * m[1] * m[14] + m[8] * m[2] * m[13] + + m[12] * m[1] * m[10] - m[12] * m[2] * m[9]; + inv[2] = m[1] * m[6] * m[15] - m[1] * m[7] * m[14] - + m[5] * m[2] * m[15] + m[5] * m[3] * m[14] + + m[13] * m[2] * m[7] - m[13] * m[3] * m[6]; + inv[6] = -m[0] * m[6] * m[15] + m[0] * m[7] * m[14] + + m[4] * m[2] * m[15] - m[4] * m[3] * m[14] - + m[12] * m[2] * m[7] + m[12] * m[3] * m[6]; + inv[10] = m[0] * m[5] * m[15] - m[0] * m[7] * m[13] - + m[4] * m[1] * m[15] + m[4] * m[3] * m[13] + + m[12] * m[1] * m[7] - m[12] * m[3] * m[5]; + inv[14] = -m[0] * m[5] * m[14] + m[0] * m[6] * m[13] + + m[4] * m[1] * m[14] - m[4] * m[2] * m[13] - + m[12] * m[1] * m[6] + m[12] * m[2] * m[5]; + inv[3] = -m[1] * m[6] * m[11] + m[1] * m[7] * m[10] + + m[5] * m[2] * m[11] - m[5] * m[3] * m[10] - + m[9] * m[2] * m[7] + m[9] * m[3] * m[6]; + inv[7] = m[0] * m[6] * m[11] - m[0] * m[7] * m[10] - + m[4] * m[2] * m[11] + m[4] * m[3] * m[10] + + m[8] * m[2] * m[7] - m[8] * m[3] * m[6]; + inv[11] = -m[0] * m[5] * m[11] + m[0] * m[7] * m[9] + + m[4] * m[1] * m[11] - m[4] * m[3] * m[9] - + m[8] * m[1] * m[7] + m[8] * m[3] * m[5]; + inv[15] = m[0] * m[5] * m[10] - m[0] * m[6] * m[9] - + m[4] * m[1] * m[10] + m[4] * m[2] * m[9] + + m[8] * m[1] * m[6] - m[8] * m[2] * m[5]; + + float det = + m[0] * inv[0] + m[1] * inv[4] + m[2] * inv[8] + m[3] * inv[12]; + if (det == 0.f) + return false; + float invDet = 1.f / det; + for (int i = 0; i < 16; ++i) + (*result)[i] = inv[i] * invDet; + return true; + } + +private: + std::array<float, 16> m_buffer; +}; + +static_assert(std::is_trivially_destructible<Mat4>::value, + "Mat4 must be trivially destructible"); +static_assert(sizeof(Mat4) == 16 * sizeof(float), + "Mat4 must be 64 bytes (no padding)"); + +inline bool operator==(const Mat4& a, const Mat4& b) +{ + for (size_t i = 0; i < 16; ++i) + if (a[i] != b[i]) + return false; + return true; +} +inline bool operator!=(const Mat4& a, const Mat4& b) { return !(a == b); } + +} // namespace rive +#endif
diff --git a/src/lua/math/lua_mat4.cpp b/src/lua/math/lua_mat4.cpp new file mode 100644 index 0000000..402b64a --- /dev/null +++ b/src/lua/math/lua_mat4.cpp
@@ -0,0 +1,351 @@ +#ifdef WITH_RIVE_SCRIPTING +#include "rive/math/mat4.hpp" +#include "rive/lua/rive_lua_libs.hpp" +#include <cstdlib> +#include <cstring> + +using namespace rive; + +static ScriptedMat4* lua_pushmat4(lua_State* L, const Mat4& mat) +{ + return lua_newrive<ScriptedMat4>(L, mat); +} + +static ScriptedMat4* lua_pushmat4(lua_State* L) +{ + return lua_newrive<ScriptedMat4>(L); +} + +// Mat4.values(c0x, c0y, c0z, c0w, c1x, ..., c3w) — column-major. +static int mat4_values(lua_State* L) +{ + auto out = lua_pushmat4(L); + float* m = out->value.values(); + for (int i = 0; i < 16; ++i) + { + m[i] = float(luaL_checknumber(L, 1 + i)); + } + return 1; +} + +static int mat4_identity(lua_State* L) +{ + lua_pushmat4(L, Mat4::identity()); + return 1; +} + +static int mat4_fromTranslation(lua_State* L) +{ + float x = float(luaL_checknumber(L, 1)); + float y = float(luaL_checknumber(L, 2)); + float z = float(luaL_checknumber(L, 3)); + lua_pushmat4(L, Mat4::fromTranslation(x, y, z)); + return 1; +} + +static int mat4_fromScale(lua_State* L) +{ + float sx = float(luaL_checknumber(L, 1)); + float sy = lua_isnumber(L, 2) ? float(luaL_checknumber(L, 2)) : sx; + float sz = lua_isnumber(L, 3) ? float(luaL_checknumber(L, 3)) : sx; + lua_pushmat4(L, Mat4::fromScale(sx, sy, sz)); + return 1; +} + +static int mat4_fromRotationX(lua_State* L) +{ + lua_pushmat4(L, Mat4::fromRotationX(float(luaL_checknumber(L, 1)))); + return 1; +} + +static int mat4_fromRotationY(lua_State* L) +{ + lua_pushmat4(L, Mat4::fromRotationY(float(luaL_checknumber(L, 1)))); + return 1; +} + +static int mat4_fromRotationZ(lua_State* L) +{ + lua_pushmat4(L, Mat4::fromRotationZ(float(luaL_checknumber(L, 1)))); + return 1; +} + +static int mat4_perspective(lua_State* L) +{ + float fov = float(luaL_checknumber(L, 1)); + float aspect = float(luaL_checknumber(L, 2)); + float n = float(luaL_checknumber(L, 3)); + float f = float(luaL_checknumber(L, 4)); + lua_pushmat4(L, Mat4::perspective(fov, aspect, n, f, /*zeroToOne=*/true)); + return 1; +} + +// In-place: Mat4.multiply(out, a, b) -> out = a * b. Returns out. +// Avoids per-call userdata allocation in tight loops. +static int mat4_static_multiply(lua_State* L) +{ + auto out = lua_torive<ScriptedMat4>(L, 1); + auto a = lua_torive<ScriptedMat4>(L, 2); + auto b = lua_torive<ScriptedMat4>(L, 3); + out->value = Mat4::multiply(a->value, b->value); + lua_pushvalue(L, 1); + return 1; +} + +static int mat4_static_invert(lua_State* L) +{ + auto out = lua_torive<ScriptedMat4>(L, 1); + auto in = lua_torive<ScriptedMat4>(L, 2); + lua_pushboolean(L, in->value.invert(&out->value)); + return 1; +} + +// Field index lookup. Supports m11..m44 (row,col 1-indexed) and 1..16 +// (column-major linear index, 1-indexed). +static int mat4_index_field(lua_State* L, + ScriptedMat4* mat, + const char* name, + size_t namelen) +{ + if (namelen == 3 && name[0] == 'm') + { + int row = name[1] - '0'; + int col = name[2] - '0'; + if (row >= 1 && row <= 4 && col >= 1 && col <= 4) + { + // m[row][col] 1-indexed; column-major storage means + // index = (col-1)*4 + (row-1). + lua_pushnumber(L, mat->value[(col - 1) * 4 + (row - 1)]); + return 1; + } + } + if (namelen >= 1 && namelen <= 2) + { + char* end = nullptr; + long n = std::strtol(name, &end, 10); + if (end && *end == '\0' && n >= 1 && n <= 16) + { + lua_pushnumber(L, mat->value[n - 1]); + return 1; + } + } + return 0; +} + +static int mat4_newindex_field(lua_State* L, + ScriptedMat4* mat, + const char* name, + size_t namelen, + float value) +{ + if (namelen == 3 && name[0] == 'm') + { + int row = name[1] - '0'; + int col = name[2] - '0'; + if (row >= 1 && row <= 4 && col >= 1 && col <= 4) + { + mat->value[(col - 1) * 4 + (row - 1)] = value; + return 0; + } + } + if (namelen >= 1 && namelen <= 2) + { + char* end = nullptr; + long n = std::strtol(name, &end, 10); + if (end && *end == '\0' && n >= 1 && n <= 16) + { + mat->value[n - 1] = value; + return 0; + } + } + return -1; +} + +static int mat4_index(lua_State* L) +{ + auto mat = lua_torive<ScriptedMat4>(L, 1); + size_t namelen = 0; + const char* name = luaL_checklstring(L, 2, &namelen); + if (mat4_index_field(L, mat, name, namelen) == 1) + return 1; + luaL_error(L, + "'%s' is not a valid index of %s", + name, + ScriptedMat4::luaName); + return 0; +} + +static int mat4_newindex(lua_State* L) +{ + auto mat = lua_torive<ScriptedMat4>(L, 1); + size_t namelen = 0; + const char* name = luaL_checklstring(L, 2, &namelen); + float value = float(luaL_checknumber(L, 3)); + if (mat4_newindex_field(L, mat, name, namelen, value) == 0) + return 0; + luaL_error(L, + "'%s' is not a valid index of %s", + name, + ScriptedMat4::luaName); + return 0; +} + +static int mat4_mul(lua_State* L) +{ + auto a = lua_torive<ScriptedMat4>(L, 1); + auto b = lua_torive<ScriptedMat4>(L, 2); + lua_pushmat4(L, Mat4::multiply(a->value, b->value)); + return 1; +} + +static int mat4_eq(lua_State* L) +{ + auto a = lua_torive<ScriptedMat4>(L, 1); + auto b = lua_torive<ScriptedMat4>(L, 2); + lua_pushboolean(L, a->value == b->value); + return 1; +} + +static int mat4_invert(lua_State* L) +{ + auto mat = lua_torive<ScriptedMat4>(L, 1); + Mat4 result; + if (mat->value.invert(&result)) + { + lua_pushmat4(L, result); + return 1; + } + lua_pushnil(L); + return 1; +} + +static int mat4_transpose(lua_State* L) +{ + auto mat = lua_torive<ScriptedMat4>(L, 1); + lua_pushmat4(L, mat->value.transposed()); + return 1; +} + +// mat:transformPoint(x, y, z) -> vector(x', y', z') (w=1, perspective +// divide) +static int mat4_transformPoint(lua_State* L) +{ + auto mat = lua_torive<ScriptedMat4>(L, 1); + float x = float(luaL_checknumber(L, 2)); + float y = float(luaL_checknumber(L, 3)); + float z = float(luaL_checknumber(L, 4)); + float out[4]; + mat->value.transformVec4(out, x, y, z, 1.f); + if (out[3] != 0.f && out[3] != 1.f) + { + float inv = 1.f / out[3]; + lua_pushvector(L, out[0] * inv, out[1] * inv, out[2] * inv); + } + else + { + lua_pushvector(L, out[0], out[1], out[2]); + } + return 1; +} + +// mat:transformVec4(x, y, z, w) -> x', y', z', w' (no perspective divide) +// Useful for clip-space transforms where the caller wants the homogeneous w +// preserved. +static int mat4_transformVec4(lua_State* L) +{ + auto mat = lua_torive<ScriptedMat4>(L, 1); + float x = float(luaL_checknumber(L, 2)); + float y = float(luaL_checknumber(L, 3)); + float z = float(luaL_checknumber(L, 4)); + float w = float(luaL_checknumber(L, 5)); + float out[4]; + mat->value.transformVec4(out, x, y, z, w); + lua_pushnumber(L, out[0]); + lua_pushnumber(L, out[1]); + lua_pushnumber(L, out[2]); + lua_pushnumber(L, out[3]); + return 4; +} + +// mat:writeToBuffer(buf, byteOffset) — direct 64-byte memcpy of the +// column-major matrix into a Luau buffer (uniform-buffer-friendly). +static int mat4_writeToBuffer(lua_State* L) +{ + auto mat = lua_torive<ScriptedMat4>(L, 1); + size_t bufLen = 0; + void* buf = luaL_checkbuffer(L, 2, &bufLen); + int off = int(luaL_checkinteger(L, 3)); + if (off < 0 || size_t(off) + 64 > bufLen) + { + luaL_error(L, "Mat4:writeToBuffer offset out of range"); + return 0; + } + std::memcpy(static_cast<uint8_t*>(buf) + off, mat->value.values(), 64); + return 0; +} + +static int mat4_namecall(lua_State* L) +{ + int atom; + if (lua_namecallatom(L, &atom)) + { + switch (atom) + { + case (int)LuaAtoms::invert: + return mat4_invert(L); + case (int)LuaAtoms::transpose: + return mat4_transpose(L); + case (int)LuaAtoms::transformPoint: + return mat4_transformPoint(L); + case (int)LuaAtoms::transformVec4: + return mat4_transformVec4(L); + case (int)LuaAtoms::writeToBuffer: + return mat4_writeToBuffer(L); + } + } + luaL_error(L, + "%s is not a valid method of %s", + luaL_checkstring(L, 1), + ScriptedMat4::luaName); + return 0; +} + +static const luaL_Reg mat4StaticMethods[] = { + {"identity", mat4_identity}, + {"values", mat4_values}, + {"fromTranslation", mat4_fromTranslation}, + {"fromScale", mat4_fromScale}, + {"fromRotationX", mat4_fromRotationX}, + {"fromRotationY", mat4_fromRotationY}, + {"fromRotationZ", mat4_fromRotationZ}, + {"perspective", mat4_perspective}, + {"multiply", mat4_static_multiply}, + {"invert", mat4_static_invert}, + {nullptr, nullptr}}; + +int luaopen_rive_mat4(lua_State* L) +{ + luaL_register(L, ScriptedMat4::luaName, mat4StaticMethods); + lua_register_rive<ScriptedMat4>(L); + + lua_pushcfunction(L, mat4_index, nullptr); + lua_setfield(L, -2, "__index"); + + lua_pushcfunction(L, mat4_newindex, nullptr); + lua_setfield(L, -2, "__newindex"); + + lua_pushcfunction(L, mat4_mul, nullptr); + lua_setfield(L, -2, "__mul"); + + lua_pushcfunction(L, mat4_eq, nullptr); + lua_setfield(L, -2, "__eq"); + + lua_pushcfunction(L, mat4_namecall, nullptr); + lua_setfield(L, -2, "__namecall"); + + lua_setreadonly(L, -1, true); + lua_pop(L, 1); // pop metatable + return 1; +} + +#endif
diff --git a/src/lua/math/lua_math.cpp b/src/lua/math/lua_math.cpp index dbdf8f2..f7987c8 100644 --- a/src/lua/math/lua_math.cpp +++ b/src/lua/math/lua_math.cpp
@@ -3,10 +3,12 @@ int luaopen_rive_vector(lua_State* L); int luaopen_rive_mat2d(lua_State* L); +int luaopen_rive_mat4(lua_State* L); int luaopen_rive_color(lua_State* L); static const lua_CFunction mathTypes[] = {luaopen_rive_vector, luaopen_rive_mat2d, + luaopen_rive_mat4, luaopen_rive_color}; int luaopen_rive_math(lua_State* L)
diff --git a/src/lua/math/lua_vec2d.cpp b/src/lua/math/lua_vec2d.cpp index 4b4d73e..d9cdd01 100644 --- a/src/lua/math/lua_vec2d.cpp +++ b/src/lua/math/lua_vec2d.cpp
@@ -24,6 +24,13 @@ case '2': lua_pushnumber(L, vec[1]); return 1; + case 'z': + case '3': + // Luau's vector type stores 3 components; .z is reachable + // intrinsically through the VM's named-axis fastpath, but + // numeric `[3]` indexing routes through this metamethod. + lua_pushnumber(L, vec[2]); + return 1; default: break; }
diff --git a/src/lua/rive_lua_libs.cpp b/src/lua/rive_lua_libs.cpp index 8963773..fcce41b 100644 --- a/src/lua/rive_lua_libs.cpp +++ b/src/lua/rive_lua_libs.cpp
@@ -245,6 +245,11 @@ {"onCancel", (int16_t)LuaAtoms::onCancel}, {"getStatus", (int16_t)LuaAtoms::getStatus}, {"decodeImage", (int16_t)LuaAtoms::decodeImage}, + // Mat4 + {"transpose", (int16_t)LuaAtoms::transpose}, + {"transformPoint", (int16_t)LuaAtoms::transformPoint}, + {"transformVec4", (int16_t)LuaAtoms::transformVec4}, + {"writeToBuffer", (int16_t)LuaAtoms::writeToBuffer}, }; static const luaL_Reg lualibs[] = {
diff --git a/tests/unit_tests/runtime/scripting/scripting_mat4_test.cpp b/tests/unit_tests/runtime/scripting/scripting_mat4_test.cpp new file mode 100644 index 0000000..3562c01 --- /dev/null +++ b/tests/unit_tests/runtime/scripting/scripting_mat4_test.cpp
@@ -0,0 +1,287 @@ +// Tests for the Mat4 scripted type and a perf benchmark comparing the C++ +// SIMD-accelerated path against a pure-Luau buffer-based mat4 implementation +// modelled on examples/SpinningCube.luau. + +#include "catch.hpp" +#include "scripting_test_utilities.hpp" + +#include <chrono> +#include <climits> +#include <cstdio> + +using namespace rive; + +TEST_CASE("Mat4 identity has expected values", "[scripting]") +{ + CHECK(lua_tonumber(ScriptingTest("return Mat4.identity().m11").state(), + -1) == 1.0); + CHECK(lua_tonumber(ScriptingTest("return Mat4.identity().m22").state(), + -1) == 1.0); + CHECK(lua_tonumber(ScriptingTest("return Mat4.identity().m33").state(), + -1) == 1.0); + CHECK(lua_tonumber(ScriptingTest("return Mat4.identity().m44").state(), + -1) == 1.0); + CHECK(lua_tonumber(ScriptingTest("return Mat4.identity().m12").state(), + -1) == 0.0); + CHECK(lua_tonumber(ScriptingTest("return Mat4.identity()[1]").state(), + -1) == 1.0); + CHECK(lua_tonumber(ScriptingTest("return Mat4.identity()[6]").state(), + -1) == 1.0); +} + +TEST_CASE("Mat4.values stores column-major", "[scripting]") +{ + // Column-major: indices 1..4 are column 0, 5..8 column 1, ... + const char* src = "local m = Mat4.values(\n" + " 1, 2, 3, 4,\n" // column 0 + " 5, 6, 7, 8,\n" // column 1 + " 9,10,11,12,\n" // column 2 + " 13,14,15,16)\n" + "return m.m11, m.m21, m.m31, m.m41, m.m14, m.m44\n"; + auto t = ScriptingTest(src, 6); + CHECK(lua_tonumber(t.state(), -6) == 1.0); // m11 = column 0, row 0 + CHECK(lua_tonumber(t.state(), -5) == 2.0); // m21 = column 0, row 1 + CHECK(lua_tonumber(t.state(), -4) == 3.0); + CHECK(lua_tonumber(t.state(), -3) == 4.0); + CHECK(lua_tonumber(t.state(), -2) == 13.0); // m14 = column 3, row 0 + CHECK(lua_tonumber(t.state(), -1) == 16.0); // m44 = column 3, row 3 +} + +TEST_CASE("Mat4 translation transforms a point", "[scripting]") +{ + const char* src = "local m = Mat4.fromTranslation(10, 20, 30)\n" + "local v = m:transformPoint(1, 2, 3)\n" + "return v.x, v.y, v.z\n"; + auto t = ScriptingTest(src, 3); + CHECK(lua_tonumber(t.state(), -3) == 11.0); + CHECK(lua_tonumber(t.state(), -2) == 22.0); + CHECK(lua_tonumber(t.state(), -1) == 33.0); +} + +TEST_CASE("Mat4 transformVec4 returns homogeneous components", "[scripting]") +{ + // No perspective divide: w is preserved as the final return value. + const char* src = "local m = Mat4.fromTranslation(10, 20, 30)\n" + "return m:transformVec4(1, 2, 3, 1)\n"; + auto t = ScriptingTest(src, 4); + CHECK(lua_tonumber(t.state(), -4) == 11.0); + CHECK(lua_tonumber(t.state(), -3) == 22.0); + CHECK(lua_tonumber(t.state(), -2) == 33.0); + CHECK(lua_tonumber(t.state(), -1) == 1.0); +} + +TEST_CASE("transformPoint result supports z and [3]", "[scripting]") +{ + // Pins down that the 3D Vector returned from a Mat4 transform is + // reachable via both .z (intrinsic VM fastpath) and v[3] (metamethod). + const char* src = "local m = Mat4.fromTranslation(10, 20, 30)\n" + "local v = m:transformPoint(1, 2, 3)\n" + "return v[1], v[2], v[3]\n"; + auto t = ScriptingTest(src, 3); + CHECK(lua_tonumber(t.state(), -3) == 11.0); + CHECK(lua_tonumber(t.state(), -2) == 22.0); + CHECK(lua_tonumber(t.state(), -1) == 33.0); +} + +TEST_CASE("Mat4 multiply composes transforms", "[scripting]") +{ + const char* src = "local t = Mat4.fromTranslation(10, 0, 0)\n" + "local s = Mat4.fromScale(2, 2, 2)\n" + "local m = t * s\n" + // m * (1,1,1) = scale then translate => (2+10, 2, 2) + "local v = m:transformPoint(1, 1, 1)\n" + "return v.x, v.y, v.z\n"; + auto t = ScriptingTest(src, 3); + CHECK(lua_tonumber(t.state(), -3) == 12.0); + CHECK(lua_tonumber(t.state(), -2) == 2.0); + CHECK(lua_tonumber(t.state(), -1) == 2.0); +} + +TEST_CASE("Mat4 invert round-trips", "[scripting]") +{ + const char* src = + "local m = Mat4.fromTranslation(3, -4, 5) * Mat4.fromScale(2, 2, 2)\n" + "local inv = m:invert()\n" + "local r = m * inv\n" + "local id = Mat4.identity()\n" + // Compare diagonal — full equality may fail on FP rounding. + "return math.abs(r.m11 - 1) + math.abs(r.m22 - 1) + math.abs(r.m33 - 1) + math.abs(r.m44 - 1)\n"; + auto t = ScriptingTest(src); + CHECK(lua_tonumber(t.state(), -1) < 1e-5); +} + +TEST_CASE("Mat4.multiply writes in place", "[scripting]") +{ + // Verifies the alloc-free static API used in tight loops. + const char* src = "local out = Mat4.identity()\n" + "local a = Mat4.fromTranslation(1, 2, 3)\n" + "local b = Mat4.fromScale(4, 4, 4)\n" + "Mat4.multiply(out, a, b)\n" + "return out.m14, out.m24, out.m34, out.m11\n"; + auto t = ScriptingTest(src, 4); + CHECK(lua_tonumber(t.state(), -4) == 1.0); + CHECK(lua_tonumber(t.state(), -3) == 2.0); + CHECK(lua_tonumber(t.state(), -2) == 3.0); + CHECK(lua_tonumber(t.state(), -1) == 4.0); +} + +TEST_CASE("Mat4:writeToBuffer stores 64 bytes column-major", "[scripting]") +{ + const char* src = + "local m = Mat4.values(\n" + " 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12, 13,14,15,16)\n" + "local buf = buffer.create(80)\n" + "m:writeToBuffer(buf, 16)\n" + "return buffer.readf32(buf, 16), buffer.readf32(buf, 16+4*4), buffer.readf32(buf, 16+15*4)\n"; + auto t = ScriptingTest(src, 3); + CHECK(lua_tonumber(t.state(), -3) == 1.0); + CHECK(lua_tonumber(t.state(), -2) == 5.0); + CHECK(lua_tonumber(t.state(), -1) == 16.0); +} + +namespace +{ +// Pure-Luau reference implementation of mat4 multiply on Luau buffers. +// Mirrors the m4mul() pattern used in examples/SpinningCube.luau. +const char* kLuauBufferMatMulPrelude = + R"( +local function m4get(buf: buffer, i: number): number + return buffer.readf32(buf, i * 4) +end +local function m4set(buf: buffer, i: number, v: number) + buffer.writef32(buf, i * 4, v) +end +local function m4identity(): buffer + local b = buffer.create(64) + m4set(b, 0, 1) + m4set(b, 5, 1) + m4set(b, 10, 1) + m4set(b, 15, 1) + return b +end +local function m4mul(out: buffer, a: buffer, b: buffer) + for col = 0, 3 do + for row = 0, 3 do + local sum: number = 0 + for k = 0, 3 do + sum += m4get(a, k * 4 + row) * m4get(b, col * 4 + k) + end + m4set(out, col * 4 + row, sum) + end + end +end +)"; +} // namespace + +TEST_CASE("Mat4 perf — C++ vs Luau-buffer matmul", "[scripting][benchmark]") +{ + const int N = 20000; + const int WARMUP = 1; + const int RUNS = 3; + + // Compile-and-run timing: ScriptingTest constructs a fresh VM, compiles, + // and runs the script. The compile/setup overhead is the same for every + // variant, so the cross-comparison is valid even though absolute numbers + // include startup time. + auto bestRun = [&](const char* src) -> long long { + long long best = LLONG_MAX; + for (int run = 0; run < WARMUP + RUNS; ++run) + { + auto t0 = std::chrono::high_resolution_clock::now(); + ScriptingTest test(src, 0); + auto t1 = std::chrono::high_resolution_clock::now(); + auto us = + std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0) + .count(); + if (run >= WARMUP && us < best) + best = us; + } + return best; + }; + + char src[4096]; + + // Variant A: Mat4 * Mat4 (C++, allocates a Mat4 each iteration) + snprintf(src, + sizeof(src), + "local a = Mat4.fromTranslation(1, 2, 3)\n" + "local b = Mat4.fromRotationZ(0.1)\n" + "local m = Mat4.identity()\n" + "for i = 1, %d do m = a * b end\n", + N); + long long cppMul = bestRun(src); + + // Variant B: Mat4.multiply(out, a, b) (C++, in-place, no alloc) + snprintf(src, + sizeof(src), + "local a = Mat4.fromTranslation(1, 2, 3)\n" + "local b = Mat4.fromRotationZ(0.1)\n" + "local out = Mat4.identity()\n" + "for i = 1, %d do Mat4.multiply(out, a, b) end\n", + N); + long long cppInPlace = bestRun(src); + + // Variant C: pure-Luau buffer matmul (the SpinningCube approach) + char luauSrc[4096]; + snprintf( + luauSrc, + sizeof(luauSrc), + "%s\n" + "local a = m4identity()\n" + "local b = m4identity()\n" + // Plant a few non-zero entries so the inner loop does real work. + "m4set(a, 12, 1); m4set(a, 13, 2); m4set(a, 14, 3)\n" + "m4set(b, 0, 0.99); m4set(b, 1, 0.099); m4set(b, 4, -0.099); m4set(b, 5, 0.99)\n" + "local out = m4identity()\n" + "for i = 1, %d do m4mul(out, a, b) end\n", + kLuauBufferMatMulPrelude, + N); + long long luauBuf = bestRun(luauSrc); + + // Variant D: matmul on a Luau table of 16 numbers — no SIMD, no buffer + // reads, but every entry is a Lua TValue (8-byte tag + double). + snprintf( + luauSrc, + sizeof(luauSrc), + "local function tnew()\n" + " return {1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1}\n" + "end\n" + "local function tmul(out, a, b)\n" + " for col = 0, 3 do\n" + " for row = 0, 3 do\n" + " local s = 0\n" + " for k = 0, 3 do\n" + " s += a[k*4 + row + 1] * b[col*4 + k + 1]\n" + " end\n" + " out[col*4 + row + 1] = s\n" + " end\n" + " end\n" + "end\n" + "local a = tnew(); a[13] = 1; a[14] = 2; a[15] = 3\n" + "local b = tnew(); b[1] = 0.99; b[2] = 0.099; b[5] = -0.099; b[6] = 0.99\n" + "local out = tnew()\n" + "for i = 1, %d do tmul(out, a, b) end\n", + N); + long long luauTable = bestRun(luauSrc); + + fprintf(stderr, + "\n" + "Mat4 matmul perf (%d iterations, best of %d, includes VM setup):\n" + " C++ a*b : %lld us\n" + " C++ multiply(out) : %lld us\n" + " Luau buffer mul : %lld us\n" + " Luau table mul : %lld us\n" + "\n", + N, + RUNS, + cppMul, + cppInPlace, + luauBuf, + luauTable); + + // Sanity: the in-place C++ path must be at least as fast as either Luau + // approach. If this ever fails we have a real perf regression worth + // investigating. + CHECK(cppInPlace <= luauBuf); + CHECK(cppInPlace <= luauTable); +}