feat(scripting): Mat4 affine fast paths + reverse-Z perspective (#12454) 072832aecc Co-authored-by: Luigi Rosso <luigi-rosso@users.noreply.github.com>
diff --git a/.rive_head b/.rive_head index d3f91ae..857b9de 100644 --- a/.rive_head +++ b/.rive_head
@@ -1 +1 @@ -7c539a46ff2f6ba167a5f4104f173b5861b5917f +072832aecc271e330aa5c04ff9c8d2834f3d369e
diff --git a/include/rive/lua/rive_lua_libs.hpp b/include/rive/lua/rive_lua_libs.hpp index 3527173..1a5674e 100644 --- a/include/rive/lua/rive_lua_libs.hpp +++ b/include/rive/lua/rive_lua_libs.hpp
@@ -339,6 +339,7 @@ transformPoint, transformVec4, writeToBuffer, + invertAffine, }; struct ScriptedMat2D
diff --git a/include/rive/math/mat4.hpp b/include/rive/math/mat4.hpp index 3b0351d..341ab2b 100644 --- a/include/rive/math/mat4.hpp +++ b/include/rive/math/mat4.hpp
@@ -156,6 +156,28 @@ return m; } + // Right-handed perspective with reverse-Z (near -> 1, far -> 0) and an + // infinite far plane. Combined with a float depth buffer this gives a + // near-uniform 1/z depth distribution across the entire frustum — the + // best precision an arbitrary scene can hope for. See Upchurch & Desbrun, + // "Tightening the Precision of Perspective Rendering" (2012). + // + // Caller's depth buffer must be cleared to 0 (not 1) and the depth test + // flipped (GREATER, not LESS). + static Mat4 perspectiveReverseZ(float fovYRadians, + float aspect, + float near_) + { + float f = 1.f / std::tan(fovYRadians * 0.5f); + Mat4 m{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + m.m_buffer[0] = f / aspect; + m.m_buffer[5] = f; + m.m_buffer[10] = 0.f; + m.m_buffer[11] = -1.f; + m.m_buffer[14] = near_; + return m; + } + // SIMD: out = lhs * rhs. Both column-major. static Mat4 multiply(const Mat4& lhs, const Mat4& rhs) { @@ -181,6 +203,38 @@ Mat4 operator*(const Mat4& rhs) const { return multiply(*this, rhs); } + // SIMD: out = lhs * rhs, assuming both are affine (bottom row + // [0, 0, 0, 1]). Skips the four FMAs that would multiply lhs's bottom- + // row zeros and the rhs[3]=0 entries of the first three columns. + // + // Result is always affine. Passing a non-affine input gives an + // incorrect result — callers must ensure the contract. + static Mat4 multiplyAffine(const Mat4& lhs, const Mat4& rhs) + { + const float* L = lhs.m_buffer.data(); + const float* R = rhs.m_buffer.data(); + float4 c0 = simd::load4f(L); // [_,_,_,0] + float4 c1 = simd::load4f(L + 4); // [_,_,_,0] + float4 c2 = simd::load4f(L + 8); // [_,_,_,0] + float4 c3 = simd::load4f(L + 12); // [_,_,_,1] + + Mat4 out; + // Cols 0..2: rhs[3] is 0, so the c3*rhs[3] term vanishes. + for (int j = 0; j < 3; ++j) + { + const float* rcol = R + j * 4; + float4 result = c0 * rcol[0] + c1 * rcol[1] + c2 * rcol[2]; + simd::store(out.m_buffer.data() + j * 4, result); + } + // Col 3: rhs[3] is 1, so c3 contributes directly. + { + const float* rcol = R + 12; + float4 result = c0 * rcol[0] + c1 * rcol[1] + c2 * rcol[2] + c3; + simd::store(out.m_buffer.data() + 12, result); + } + return out; + } + // SIMD: out = M * (x, y, z, w). Returns a 4-component vector (xyzw). void transformVec4(float out[4], float x, float y, float z, float w) const { @@ -266,6 +320,66 @@ return true; } + // Closed-form inverse for affine matrices (bottom row [0, 0, 0, 1]). + // Inverts the 3x3 linear part R via cofactors, then writes -R^-1 * t + // into the translation column. Much smaller and faster than the full + // 4x4 cofactor `invert`. + // + // Returns false (and leaves `result` unchanged) only if the linear part + // is singular. Caller must ensure the input is actually affine. + bool invertAffine(Mat4* result) const + { + const float* m = m_buffer.data(); + // The 3x3 linear part R has R[row][col] = m[col*4 + row]. + // Cofactors C[i][j] of column 0 of R, expanded for det along col 0. + float c00 = m[5] * m[10] - m[6] * m[9]; + float c10 = m[6] * m[8] - m[4] * m[10]; + float c20 = m[4] * m[9] - m[5] * m[8]; + float det = m[0] * c00 + m[1] * c10 + m[2] * c20; + if (det == 0.f) + return false; + float invDet = 1.f / det; + // Remaining 6 cofactors. + float c01 = m[2] * m[9] - m[1] * m[10]; + float c02 = m[1] * m[6] - m[2] * m[5]; + float c11 = m[0] * m[10] - m[2] * m[8]; + float c12 = m[2] * m[4] - m[0] * m[6]; + float c21 = m[1] * m[8] - m[0] * m[9]; + float c22 = m[0] * m[5] - m[1] * m[4]; + + // R^-1 = (cofactor matrix)^T / det, so Rinv[i][j] = C[j][i] / det. + // Naming below: ri_j = Rinv[i][j]. + float r0_0 = c00 * invDet, r0_1 = c10 * invDet, r0_2 = c20 * invDet; + float r1_0 = c01 * invDet, r1_1 = c11 * invDet, r1_2 = c21 * invDet; + float r2_0 = c02 * invDet, r2_1 = c12 * invDet, r2_2 = c22 * invDet; + + // Translation column: -R^-1 * t. + float tx = m[12], ty = m[13], tz = m[14]; + float ix = -(r0_0 * tx + r0_1 * ty + r0_2 * tz); + float iy = -(r1_0 * tx + r1_1 * ty + r1_2 * tz); + float iz = -(r2_0 * tx + r2_1 * ty + r2_2 * tz); + + // Store column-major: o[col*4 + row] = Rinv[row][col]. + float* o = result->m_buffer.data(); + o[0] = r0_0; + o[1] = r1_0; + o[2] = r2_0; + o[3] = 0.f; + o[4] = r0_1; + o[5] = r1_1; + o[6] = r2_1; + o[7] = 0.f; + o[8] = r0_2; + o[9] = r1_2; + o[10] = r2_2; + o[11] = 0.f; + o[12] = ix; + o[13] = iy; + o[14] = iz; + o[15] = 1.f; + return true; + } + private: std::array<float, 16> m_buffer; };
diff --git a/src/lua/math/lua_mat4.cpp b/src/lua/math/lua_mat4.cpp index 402b64a..1cb3344 100644 --- a/src/lua/math/lua_mat4.cpp +++ b/src/lua/math/lua_mat4.cpp
@@ -80,6 +80,16 @@ return 1; } +// Reverse-Z infinite-far perspective. See Mat4::perspectiveReverseZ. +static int mat4_perspectiveReverseZ(lua_State* L) +{ + float fov = float(luaL_checknumber(L, 1)); + float aspect = float(luaL_checknumber(L, 2)); + float n = float(luaL_checknumber(L, 3)); + lua_pushmat4(L, Mat4::perspectiveReverseZ(fov, aspect, n)); + return 1; +} + // In-place: Mat4.multiply(out, a, b) -> out = a * b. Returns out. // Avoids per-call userdata allocation in tight loops. static int mat4_static_multiply(lua_State* L) @@ -92,6 +102,19 @@ return 1; } +// In-place: Mat4.multiplyAffine(out, a, b) -> out = a * b, assuming both +// inputs are affine (bottom row [0,0,0,1]). Faster than `multiply` (skips +// the bottom-row work). +static int mat4_static_multiplyAffine(lua_State* L) +{ + auto out = lua_torive<ScriptedMat4>(L, 1); + auto a = lua_torive<ScriptedMat4>(L, 2); + auto b = lua_torive<ScriptedMat4>(L, 3); + out->value = Mat4::multiplyAffine(a->value, b->value); + lua_pushvalue(L, 1); + return 1; +} + static int mat4_static_invert(lua_State* L) { auto out = lua_torive<ScriptedMat4>(L, 1); @@ -100,6 +123,16 @@ return 1; } +// In-place: Mat4.invertAffine(out, in) — closed-form affine inverse. +// Returns true if invertible. Caller must ensure the input is affine. +static int mat4_static_invertAffine(lua_State* L) +{ + auto out = lua_torive<ScriptedMat4>(L, 1); + auto in = lua_torive<ScriptedMat4>(L, 2); + lua_pushboolean(L, in->value.invertAffine(&out->value)); + return 1; +} + // Field index lookup. Supports m11..m44 (row,col 1-indexed) and 1..16 // (column-major linear index, 1-indexed). static int mat4_index_field(lua_State* L, @@ -219,6 +252,19 @@ return 1; } +static int mat4_invertAffine(lua_State* L) +{ + auto mat = lua_torive<ScriptedMat4>(L, 1); + Mat4 result; + if (mat->value.invertAffine(&result)) + { + lua_pushmat4(L, result); + return 1; + } + lua_pushnil(L); + return 1; +} + static int mat4_transpose(lua_State* L) { auto mat = lua_torive<ScriptedMat4>(L, 1); @@ -293,6 +339,8 @@ { case (int)LuaAtoms::invert: return mat4_invert(L); + case (int)LuaAtoms::invertAffine: + return mat4_invertAffine(L); case (int)LuaAtoms::transpose: return mat4_transpose(L); case (int)LuaAtoms::transformPoint: @@ -319,8 +367,11 @@ {"fromRotationY", mat4_fromRotationY}, {"fromRotationZ", mat4_fromRotationZ}, {"perspective", mat4_perspective}, + {"perspectiveReverseZ", mat4_perspectiveReverseZ}, {"multiply", mat4_static_multiply}, + {"multiplyAffine", mat4_static_multiplyAffine}, {"invert", mat4_static_invert}, + {"invertAffine", mat4_static_invertAffine}, {nullptr, nullptr}}; int luaopen_rive_mat4(lua_State* L)
diff --git a/src/lua/rive_lua_libs.cpp b/src/lua/rive_lua_libs.cpp index fcce41b..7c9ac12 100644 --- a/src/lua/rive_lua_libs.cpp +++ b/src/lua/rive_lua_libs.cpp
@@ -250,6 +250,7 @@ {"transformPoint", (int16_t)LuaAtoms::transformPoint}, {"transformVec4", (int16_t)LuaAtoms::transformVec4}, {"writeToBuffer", (int16_t)LuaAtoms::writeToBuffer}, + {"invertAffine", (int16_t)LuaAtoms::invertAffine}, }; static const luaL_Reg lualibs[] = {
diff --git a/tests/unit_tests/runtime/scripting/scripting_mat4_test.cpp b/tests/unit_tests/runtime/scripting/scripting_mat4_test.cpp index 3562c01..0b4fe1f 100644 --- a/tests/unit_tests/runtime/scripting/scripting_mat4_test.cpp +++ b/tests/unit_tests/runtime/scripting/scripting_mat4_test.cpp
@@ -125,6 +125,83 @@ CHECK(lua_tonumber(t.state(), -1) == 4.0); } +TEST_CASE("Mat4.multiplyAffine matches multiply for affine inputs", + "[scripting]") +{ + // For two affine matrices the fast and slow paths must agree + // bit-exactly on every entry. + const char* src = + "local a = Mat4.fromTranslation(3, -1, 5) * Mat4.fromRotationY(0.7)\n" + "local b = Mat4.fromScale(2, 0.5, 1) * Mat4.fromRotationZ(-0.3)\n" + "local slow = Mat4.identity()\n" + "local fast = Mat4.identity()\n" + "Mat4.multiply(slow, a, b)\n" + "Mat4.multiplyAffine(fast, a, b)\n" + // Sum |slow[i] - fast[i]| over i=1..16; must be 0. + "local diff = 0\n" + "for i = 1, 16 do diff = diff + math.abs(slow[i] - fast[i]) end\n" + "return diff, fast.m41, fast.m42, fast.m43, fast.m44\n"; + auto t = ScriptingTest(src, 5); + CHECK(lua_tonumber(t.state(), -5) == 0.0); + // Bottom row stays [0, 0, 0, 1] (affine invariant). + CHECK(lua_tonumber(t.state(), -4) == 0.0); + CHECK(lua_tonumber(t.state(), -3) == 0.0); + CHECK(lua_tonumber(t.state(), -2) == 0.0); + CHECK(lua_tonumber(t.state(), -1) == 1.0); +} + +TEST_CASE("Mat4:invertAffine round-trips", "[scripting]") +{ + const char* src = + "local m = Mat4.fromTranslation(3, -4, 5) * Mat4.fromRotationY(0.4)" + " * Mat4.fromScale(2, 2, 2)\n" + "local inv = m:invertAffine()\n" + "assert(inv ~= nil)\n" + "local r = m * inv\n" + "return math.abs(r.m11 - 1) + math.abs(r.m22 - 1)" + " + math.abs(r.m33 - 1) + math.abs(r.m44 - 1)" + " + math.abs(r.m14) + math.abs(r.m24) + math.abs(r.m34)\n"; + auto t = ScriptingTest(src); + CHECK(lua_tonumber(t.state(), -1) < 1e-5); +} + +TEST_CASE("Mat4.invertAffine writes in place", "[scripting]") +{ + const char* src = "local m = Mat4.fromTranslation(10, 0, 0)\n" + "local out = Mat4.identity()\n" + "local ok = Mat4.invertAffine(out, m)\n" + "return ok, out.m14, out.m24, out.m34\n"; + auto t = ScriptingTest(src, 4); + CHECK(lua_toboolean(t.state(), -4) == 1); + CHECK(lua_tonumber(t.state(), -3) == -10.0); + CHECK(lua_tonumber(t.state(), -2) == 0.0); + CHECK(lua_tonumber(t.state(), -1) == 0.0); +} + +TEST_CASE("Mat4:invertAffine returns nil on singular linear part", + "[scripting]") +{ + // Zero scale on Y collapses the linear part — singular. + const char* src = "local m = Mat4.fromScale(2, 0, 1)\n" + "return m:invertAffine()\n"; + auto t = ScriptingTest(src); + CHECK(lua_isnil(t.state(), -1)); +} + +TEST_CASE("Mat4.perspectiveReverseZ has expected layout", "[scripting]") +{ + // For aspect=1, fovY=90deg: f = 1/tan(45deg) = 1. + // m11 = f/aspect = 1, m22 = f = 1, m33 = 0, m43 = -1, m34 = near. + const char* src = "local p = Mat4.perspectiveReverseZ(math.rad(90), 1, 5)\n" + "return p.m11, p.m22, p.m33, p.m43, p.m34\n"; + auto t = ScriptingTest(src, 5); + CHECK(lua_tonumber(t.state(), -5) == Approx(1.0).margin(1e-6)); + CHECK(lua_tonumber(t.state(), -4) == Approx(1.0).margin(1e-6)); + CHECK(lua_tonumber(t.state(), -3) == 0.0); + CHECK(lua_tonumber(t.state(), -2) == -1.0); + CHECK(lua_tonumber(t.state(), -1) == 5.0); +} + TEST_CASE("Mat4:writeToBuffer stores 64 bytes column-major", "[scripting]") { const char* src =