blob: 94b0c21b4a1f3feb0051642ae6254fae5ce1562a [file] [log] [blame]
/*
* Copyright 2018 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "../skcms.h"
#include "GaussNewton.h"
#include "LinearAlgebra.h"
#include <assert.h>
bool skcms_gauss_newton_step(float (* t)(float x, const void*), const void* t_ctx,
float (* f)(float x, const float P[4]),
void (*grad_f)(float x, float dfdP[4]),
float P[4],
float x0, float x1, int N,
void (*fixup)(skcms_Matrix4x4*, const float P[4])) {
// We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
//
// We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
// where r(P) is the residual vector t(x) - f(x,P)
// and Jf is the Jacobian matrix of f(), ∂r/∂P.
//
// Let's review the shape of each of these expressions:
// r(P) is [N x 1], a column vector with one entry per value of x tested
// Jf is [N x 4], a matrix with an entry for each (x,P) pair
// Jf^T is [4 x N], the transpose of Jf
//
// Jf^T Jf is [4 x N] * [N x 4] == [4 x 4], a 4x4 matrix,
// and so is its inverse (Jf^T Jf)^-1
// Jf^T r(P) is [4 x N] * [N x 1] == [4 x 1], a column vector with the same shape as P
//
// Our implementation strategy to get to the final ∆P is
// 1) evaluate Jf^T Jf, call that lhs
// 2) evaluate Jf^T r(P), call that rhs
// 3) invert lhs
// 4) multiply inverse lhs by rhs
//
// This is a friendly implementation strategy because we don't have to have any
// buffers that scale with N, and equally nice don't have to perform any matrix
// operations that are variable size.
//
// Other implementation strategies could trade this off, e.g. evaluating the
// pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
// the residuals. That would probably require implementing singular value
// decomposition, and would create a [4 x N] matrix to be multiplied by the
// [N x 1] residual vector, but on the upside I think that'd eliminate the
// possibility of this skcms_gauss_newton_step() function ever failing.
// 0) start off with lhs and rhs safely zeroed.
skcms_Matrix4x4 lhs = {{ {0,0,0,0}, {0,0,0,0}, {0,0,0,0}, {0,0,0,0} }};
skcms_Vector4 rhs = { {0,0,0,0} };
// 1,2) evaluate lhs and evaluate rhs
// We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
// so we'll have to update lhs and rhs at the same time.
float dx = (x1-x0)/(N-1);
for (int i = 0; i < N; i++) {
float x = x0 + i*dx;
float resid = t(x,t_ctx) - f(x,P);
float dfdP[4] = {0,0,0,0};
grad_f(x, dfdP);
// TODO: allow a bias(x) function?
// As-is, this bias(x) can be folded into t(x), f(x), and grad_f(x).
#if 0
float b = bias(x, bias_ctx);
resid *= b;
dfdP[0] *= b;
dfdP[1] *= b;
dfdP[2] *= b;
dfdP[3] *= b;
#endif
for (int r = 0; r < 4; r++) {
for (int c = 0; c < 4; c++) {
lhs.vals[r][c] += dfdP[r] * dfdP[c];
}
rhs.vals[r] += dfdP[r] * resid;
}
}
// 3) invert lhs
if (fixup) {
fixup(&lhs, P);
}
skcms_Matrix4x4 lhs_inv;
if (!skcms_Matrix4x4_invert(&lhs, &lhs_inv)) {
return false;
}
// 4) multiply inverse lhs by rhs
skcms_Vector4 dP = skcms_Matrix4x4_Vector4_mul(&lhs_inv, &rhs);
P[0] += dP.vals[0];
P[1] += dP.vals[1];
P[2] += dP.vals[2];
P[3] += dP.vals[3];
return true;
}