src/GaussNewton.c - skcms - Git at Google

 /*
  * Copyright 2018 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include "../skcms.h"
 #include "GaussNewton.h"
 #include "LinearAlgebra.h"
 #include <assert.h>

 bool skcms_gauss_newton_step(float (*     t)(float x, const void*), const void* t_ctx,
                              float (*     f)(float x, const float    P[4]),
                              void  (*grad_f)(float x,       float dfdP[4]),
                              float P[4],
                              float x0, float x1, int N,
                              void  (*fixup)(skcms_Matrix4x4*, const float P[4])) {
     // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
     //
     // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
     //   where r(P) is the residual vector t(x) - f(x,P)
     //   and Jf is the Jacobian matrix of f(), ∂r/∂P.
     //
     // Let's review the shape of each of these expressions:
     //   r(P)   is [N x 1], a column vector with one entry per value of x tested
     //   Jf     is [N x 4], a matrix with an entry for each (x,P) pair
     //   Jf^T   is [4 x N], the transpose of Jf
     //
     //   Jf^T Jf   is [4 x N] * [N x 4] == [4 x 4], a 4x4 matrix,
     //                                              and so is its inverse (Jf^T Jf)^-1
     //   Jf^T r(P) is [4 x N] * [N x 1] == [4 x 1], a column vector with the same shape as P
     //
     // Our implementation strategy to get to the final ∆P is
     //   1) evaluate Jf^T Jf,   call that lhs
     //   2) evaluate Jf^T r(P), call that rhs
     //   3) invert lhs
     //   4) multiply inverse lhs by rhs
     //
     // This is a friendly implementation strategy because we don't have to have any
     // buffers that scale with N, and equally nice don't have to perform any matrix
     // operations that are variable size.
     //
     // Other implementation strategies could trade this off, e.g. evaluating the
     // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
     // the residuals.  That would probably require implementing singular value
     // decomposition, and would create a [4 x N] matrix to be multiplied by the
     // [N x 1] residual vector, but on the upside I think that'd eliminate the
     // possibility of this skcms_gauss_newton_step() function ever failing.

     // 0) start off with lhs and rhs safely zeroed.
     skcms_Matrix4x4 lhs = {{ {0,0,0,0}, {0,0,0,0}, {0,0,0,0}, {0,0,0,0} }};
     skcms_Vector4   rhs = {  {0,0,0,0} };

     // 1,2) evaluate lhs and evaluate rhs
     //   We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
     //   so we'll have to update lhs and rhs at the same time.
     float dx = (x1-x0)/(N-1);
     for (int i = 0; i < N; i++) {
         float x = x0 + i*dx;

         float resid = t(x,t_ctx) - f(x,P);

         float dfdP[4] = {0,0,0,0};
         grad_f(x, dfdP);

         // TODO: allow a bias(x) function?
         // As-is, this bias(x) can be folded into t(x), f(x), and grad_f(x).
     #if 0
         float b = bias(x, bias_ctx);
         resid   *= b;
         dfdP[0] *= b;
         dfdP[1] *= b;
         dfdP[2] *= b;
         dfdP[3] *= b;
     #endif

         for (int r = 0; r < 4; r++) {
             for (int c = 0; c < 4; c++) {
                 lhs.vals[r][c] += dfdP[r] * dfdP[c];
             }
             rhs.vals[r] += dfdP[r] * resid;
         }
     }

     // 3) invert lhs
     if (fixup) {
         fixup(&lhs, P);
     }
     skcms_Matrix4x4 lhs_inv;
     if (!skcms_Matrix4x4_invert(&lhs, &lhs_inv)) {
         return false;
     }

     // 4) multiply inverse lhs by rhs
     skcms_Vector4 dP = skcms_Matrix4x4_Vector4_mul(&lhs_inv, &rhs);
     P[0] += dP.vals[0];
     P[1] += dP.vals[1];
     P[2] += dP.vals[2];
     P[3] += dP.vals[3];
     return true;
 }
	/*
	* Copyright 2018 Google Inc.
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include "../skcms.h"
	#include "GaussNewton.h"
	#include "LinearAlgebra.h"
	#include <assert.h>

	bool skcms_gauss_newton_step(float (* t)(float x, const void), const void t_ctx,
	float (* f)(float x, const float P[4]),
	void (*grad_f)(float x, float dfdP[4]),
	float P[4],
	float x0, float x1, int N,
	void (fixup)(skcms_Matrix4x4, const float P[4])) {
	// We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
	//
	// We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
	// where r(P) is the residual vector t(x) - f(x,P)
	// and Jf is the Jacobian matrix of f(), ∂r/∂P.
	//
	// Let's review the shape of each of these expressions:
	// r(P) is [N x 1], a column vector with one entry per value of x tested
	// Jf is [N x 4], a matrix with an entry for each (x,P) pair
	// Jf^T is [4 x N], the transpose of Jf
	//
	// Jf^T Jf is [4 x N] * [N x 4] == [4 x 4], a 4x4 matrix,
	// and so is its inverse (Jf^T Jf)^-1
	// Jf^T r(P) is [4 x N] * [N x 1] == [4 x 1], a column vector with the same shape as P
	//
	// Our implementation strategy to get to the final ∆P is
	// 1) evaluate Jf^T Jf, call that lhs
	// 2) evaluate Jf^T r(P), call that rhs
	// 3) invert lhs
	// 4) multiply inverse lhs by rhs
	//
	// This is a friendly implementation strategy because we don't have to have any
	// buffers that scale with N, and equally nice don't have to perform any matrix
	// operations that are variable size.
	//
	// Other implementation strategies could trade this off, e.g. evaluating the
	// pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
	// the residuals. That would probably require implementing singular value
	// decomposition, and would create a [4 x N] matrix to be multiplied by the
	// [N x 1] residual vector, but on the upside I think that'd eliminate the
	// possibility of this skcms_gauss_newton_step() function ever failing.

	// 0) start off with lhs and rhs safely zeroed.
	skcms_Matrix4x4 lhs = {{ {0,0,0,0}, {0,0,0,0}, {0,0,0,0}, {0,0,0,0} }};
	skcms_Vector4 rhs = { {0,0,0,0} };

	// 1,2) evaluate lhs and evaluate rhs
	// We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
	// so we'll have to update lhs and rhs at the same time.
	float dx = (x1-x0)/(N-1);
	for (int i = 0; i < N; i++) {
	float x = x0 + i*dx;

	float resid = t(x,t_ctx) - f(x,P);

	float dfdP[4] = {0,0,0,0};
	grad_f(x, dfdP);

	// TODO: allow a bias(x) function?
	// As-is, this bias(x) can be folded into t(x), f(x), and grad_f(x).
	#if 0
	float b = bias(x, bias_ctx);
	resid *= b;
	dfdP[0] *= b;
	dfdP[1] *= b;
	dfdP[2] *= b;
	dfdP[3] *= b;
	#endif

	for (int r = 0; r < 4; r++) {
	for (int c = 0; c < 4; c++) {
	lhs.vals[r][c] += dfdP[r] * dfdP[c];
	}
	rhs.vals[r] += dfdP[r] * resid;
	}
	}

	// 3) invert lhs
	if (fixup) {
	fixup(&lhs, P);
	}
	skcms_Matrix4x4 lhs_inv;
	if (!skcms_Matrix4x4_invert(&lhs, &lhs_inv)) {
	return false;
	}

	// 4) multiply inverse lhs by rhs
	skcms_Vector4 dP = skcms_Matrix4x4_Vector4_mul(&lhs_inv, &rhs);
	P[0] += dP.vals[0];
	P[1] += dP.vals[1];
	P[2] += dP.vals[2];
	P[3] += dP.vals[3];
	return true;
	}