src/TransferFunction.c - skcms - Git at Google

 /*
  * Copyright 2018 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include "../skcms.h"
 #include "LinearAlgebra.h"
 #include "Macros.h"
 #include "PortableMath.h"
 #include "TransferFunction.h"
 #include <assert.h>
 #include <string.h>

 // Enable to do thorough logging of the nonlinear regression to stderr
 #if 0
     #include <stdio.h>
     #define LOG(...) fprintf(stderr, __VA_ARGS__)
 #else
     #define LOG(...) do {} while(false)
 #endif

 #define LOG_TF(tf)                                              \
     LOG("[%.25g %.25g %.25g %.25g]\n",                          \
         tf->g, tf->a, tf->b, tf->e)

 #define LOG_VEC(v)                                              \
     LOG("[%.25g %.25g %.25g %.25g]\n",                          \
         v.vals[0], v.vals[1], v.vals[2], v.vals[3])

 #define LOG_MTX(m)                                              \
     LOG("| %.25g %.25g %.25g %.25g |\n"                         \
         "| %.25g %.25g %.25g %.25g |\n"                         \
         "| %.25g %.25g %.25g %.25g |\n"                         \
         "| %.25g %.25g %.25g %.25g |\n",                        \
         m.vals[0][0], m.vals[0][1], m.vals[0][2], m.vals[0][3], \
         m.vals[1][0], m.vals[1][1], m.vals[1][2], m.vals[1][3], \
         m.vals[2][0], m.vals[2][1], m.vals[2][2], m.vals[2][3], \
         m.vals[3][0], m.vals[3][1], m.vals[3][2], m.vals[3][3])

 float skcms_TransferFunction_eval(const skcms_TransferFunction* fn, float x) {
     float sign = x < 0 ? -1.0f : 1.0f;
     x *= sign;

     return sign * (x < fn->d ? fn->c * x + fn->f
                              : powf_(fn->a * x + fn->b, fn->g) + fn->e);
 }

 static float TF_Nonlinear_eval(const skcms_TransferFunction* fn, float x) {
     // We strive to never allow negative ax+b, but values can drift slightly. Guard against NaN.
     float base = fmaxf_(fn->a * x + fn->b, 0.0f);
     return powf_(base, fn->g) + fn->e;
 }

 // Evaluate the gradient of the nonlinear component of fn
 static void tf_eval_gradient_nonlinear(const skcms_TransferFunction* fn,
                                        float x,
                                        float* d_fn_d_A_at_x,
                                        float* d_fn_d_B_at_x,
                                        float* d_fn_d_E_at_x,
                                        float* d_fn_d_G_at_x) {
     float base = fn->a * x + fn->b;
     if (base > 0.0f) {
         *d_fn_d_A_at_x = fn->g * x * powf_(base, fn->g - 1.0f);
         *d_fn_d_B_at_x = fn->g * powf_(base, fn->g - 1.0f);
         *d_fn_d_E_at_x = 1.0f;
         // Scale by 1/log_2(e)
         *d_fn_d_G_at_x = powf_(base, fn->g) * log2f_(base) * 0.69314718f;
     } else {
         *d_fn_d_A_at_x = 0.0f;
         *d_fn_d_B_at_x = 0.0f;
         *d_fn_d_E_at_x = 0.0f;
         *d_fn_d_G_at_x = 0.0f;
     }
 }

 // Take one Gauss-Newton step updating A, B, E, and G, given D.
 static bool tf_gauss_newton_step_nonlinear(skcms_TableFunc* t, const void* ctx, int start, int n,
                                            skcms_TransferFunction* fn, float* error_Linfty_after) {
     LOG("tf_gauss_newton_step_nonlinear (%d, %d)\n", start, n);
     LOG("fn: "); LOG_TF(fn);

     // Let ne_lhs be the left hand side of the normal equations, and let ne_rhs
     // be the right hand side. Zero the diagonal [sic] of |ne_lhs| and all of |ne_rhs|.
     skcms_Matrix4x4 ne_lhs;
     skcms_Vector4 ne_rhs;
     for (int row = 0; row < 4; ++row) {
         for (int col = 0; col < 4; ++col) {
             ne_lhs.vals[row][col] = 0;
         }
         ne_rhs.vals[row] = 0;
     }

     // Add the contributions from each sample to the normal equations.
     for (int i = start; i < n; ++i) {
         float xi = i / (n - 1.0f);
         LOG("%d (%.25g)\n", i, xi);

         // Let J be the gradient of fn with respect to parameters A, B, E, and G,
         // evaulated at this point.
         skcms_Vector4 J;
         tf_eval_gradient_nonlinear(fn, xi, &J.vals[0], &J.vals[1], &J.vals[2], &J.vals[3]);
         LOG("J: "); LOG_VEC(J);

         // Let r be the residual at this point;
         float r = t(i, ctx) - TF_Nonlinear_eval(fn, xi);
         LOG("r: %.25g\n", r);

         if (i == start) {
             // Weight the D point much higher, so that the two pieces of the approximation line up
             float w = (n - start) * 0.5f;
             J.vals[0] *= w;
             J.vals[1] *= w;
             J.vals[2] *= w;
             J.vals[3] *= w;
             r *= w;
         }

         // Update the normal equations left hand side with the outer product of J
         // with itself.
         for (int row = 0; row < 4; ++row) {
             for (int col = 0; col < 4; ++col) {
                 ne_lhs.vals[row][col] += J.vals[row] * J.vals[col];
             }

             // Update the normal equations right hand side the product of J with the
             // residual
             ne_rhs.vals[row] += J.vals[row] * r;
         }
         LOG("LHS/RHS:\n"); LOG_MTX(ne_lhs); LOG_VEC(ne_rhs);
     }

     // Note that if G = 1, then the normal equations will be singular
     // (because when G = 1, B and E are equivalent parameters).
     // To avoid problems, fix E (row/column 3) in these circumstances.
     const float kEpsilonForG = 1.0f / 1024.0f;
     if (fabsf_(fn->g - 1.0f) < kEpsilonForG) {
         LOG("G ~= 1, pinning E\n");
         for (int row = 0; row < 4; ++row) {
             float value = (row == 2) ? 1.0f : 0.0f;
             ne_lhs.vals[row][2] = value;
             ne_lhs.vals[2][row] = value;
         }
         ne_rhs.vals[2] = 0.0f;
     }

     // Solve the normal equations.
     skcms_Matrix4x4 ne_lhs_inv;
     if (!skcms_Matrix4x4_invert(&ne_lhs, &ne_lhs_inv)) {
         return false;
     }
     LOG("LHS Inverse:\n"); LOG_MTX(ne_lhs_inv);

     skcms_Vector4 step = skcms_Matrix4x4_Vector4_mul(&ne_lhs_inv, &ne_rhs);
     LOG("step: "); LOG_VEC(step);

     // Update the transfer function.
     fn->a += step.vals[0];
     fn->b += step.vals[1];
     fn->e += step.vals[2];
     fn->g += step.vals[3];

     // A should always be positive.
     fn->a = fmaxf_(fn->a, 0.0f);

     // Ensure that fn be defined at D.
     if (fn->a * fn->d + fn->b < 0.0f) {
         LOG("AD+B = %.25g, ", fn->a * fn->d + fn->b);
         fn->b = -fn->a * fn->d;
         LOG("B -> %.25g\n", fn->b);
     }

     // Compute the Linfinity error.
     *error_Linfty_after = 0;
     for (int i = start; i < n; ++i) {
         float xi = i / (n - 1.0f);
         float error = fabsf_(t(i, ctx) - TF_Nonlinear_eval(fn, xi));
         *error_Linfty_after = fmaxf_(error, *error_Linfty_after);
     }

     return true;
 }

 // Solve for A, B, E, and G, given D. The initial value of |fn| is the
 // point from which iteration starts.
 static bool tf_solve_nonlinear(skcms_TableFunc* t, const void* ctx, int start, int n,
                                skcms_TransferFunction* fn) {
     // Take a maximum of 16 Gauss-Newton steps.
     enum { kNumSteps = 16 };

     // The L-infinity error after each step.
     float step_error[kNumSteps] = { 0 };
     int step = 0;
     for (;; ++step) {
         // If the normal equations are singular, we can't continue.
         if (!tf_gauss_newton_step_nonlinear(t, ctx, start, n, fn, &step_error[step])) {
             return false;
         }

         // If the error is inf or nan, we are clearly not converging.
         if (!isfinitef_(step_error[step])) {
             return false;
         }

         // Stop if our error is tiny.
         const float kEarlyOutTinyErrorThreshold = (1.0f / 16.0f) / 256.0f;
         if (step_error[step] < kEarlyOutTinyErrorThreshold) {
             break;
         }

         // Stop if our error is not changing, or changing in the wrong direction.
         if (step > 1) {
             // If our error is is huge for two iterations, we're probably not in the
             // region of convergence.
             if (step_error[step] > 1.0f && step_error[step - 1] > 1.0f) {
                 return false;
             }

             // If our error didn't change by ~1%, assume we've converged as much as we
             // are going to.
             const float kEarlyOutByPercentChangeThreshold = 32.0f / 256.0f;
             const float kMinimumPercentChange = 1.0f / 128.0f;
             float percent_change =
                 fabsf_(step_error[step] - step_error[step - 1]) / step_error[step];
             if (percent_change < kMinimumPercentChange &&
                 step_error[step] < kEarlyOutByPercentChangeThreshold) {
                 break;
             }
         }
         if (step == kNumSteps - 1) {
             break;
         }
     }

     // Declare failure if our error is obviously too high.
     const float kDidNotConvergeThreshold = 64.0f / 256.0f;
     if (step_error[step] > kDidNotConvergeThreshold) {
         return false;
     }

     return true;
 }

 // Returns the number of points that are approximated by the line, to within tol.
 static int tf_fit_linear(skcms_TableFunc* t, const void* ctx, int n, float tol,
                          skcms_TransferFunction* fn) {
     // Idea: We fit the first N points to the linear portion of the TF. We want the line to pass
     // through the first and last points exactly.
     //
     // We walk along the points, and find the minimum and maximum slope of the line before the
     // error would exceed our tolerance. Once the range [slope_min, slope_max] would be empty,
     // we definitely can't add any more points, so we're done.
     //
     // However, some points error intervals' may intersect the running interval, but not lie within
     // it. So we keep track of the last point we saw that is a valid candidate for being the end
     // point, and once the search is done, back up to build the line through *that* point.
     const float x_scale = 1.0f / (n - 1);

     int lin_points = 1;
     fn->f = t(0, ctx);
     float slope_min = -INFINITY_;
     float slope_max = INFINITY_;
     for (int i = 1; i < n; ++i) {
         float xi = i * x_scale;
         float yi = t(i, ctx);
         float slope_max_i = (yi + tol - fn->f) / xi;
         float slope_min_i = (yi - tol - fn->f) / xi;
         if (slope_max_i < slope_min || slope_max < slope_min_i) {
             // Slope intervals no longer overlap.
             break;
         }
         slope_max = fminf_(slope_max, slope_max_i);
         slope_min = fmaxf_(slope_min, slope_min_i);
         float cur_slope = (yi - fn->f) / xi;
         if (slope_min <= cur_slope && cur_slope <= slope_max) {
             lin_points = i + 1;
             fn->c = cur_slope;
         }
     }

     // Set D to the last point from above
     fn->d = (lin_points - 1) * x_scale;
     return lin_points;
 }

 static float tf_max_error(skcms_TableFunc* t, const void* ctx, int n,
                           const skcms_TransferFunction* fn) {
     const float x_scale = 1.0f / (n - 1);
     float max_error = 0;
     for (int i = 0; i < n; ++i) {
         float xi = i * x_scale;
         float fn_of_xi = skcms_TransferFunction_eval(fn, xi);
         float error_at_xi = fabsf_(t(i, ctx) - fn_of_xi);
         max_error = fmaxf_(max_error, error_at_xi);
     }
     return max_error;
 }

 bool skcms_TransferFunction_approximate(skcms_TableFunc* t, const void* ctx, int n,
                                         skcms_TransferFunction* fn, float* max_error) {
     if (n < 2) {
         return false;
     }

     const float x_scale = 1.0f / (n - 1);
     const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
     float min_error = INFINITY_;

     for (int tol = 0; tol < ARRAY_COUNT(kTolerances); ++tol) {
         skcms_TransferFunction tf;
         int lin_points = tf_fit_linear(t, ctx, n,kTolerances[tol], &tf);

         // If the entire data set was linear, move the coefficients to the nonlinear portion with
         // G == 1. This lets use a canonical representation with D == 0.
         if (lin_points == n) {
             tf.g = 1;
             tf.b = tf.f;
             tf.a = tf.c;
             tf.c = tf.d = tf.e = tf.f = 0;
         } else if (lin_points == n - 1) {
             // Degenerate case with only two points in the nonlinear segment. Solve directly.
             tf.g = 1;
             tf.a = (t(n - 1, ctx) - t(n - 2, ctx)) * (n - 1);
             tf.b = t(n - 2, ctx) - (tf.a * (n - 2) * x_scale);
             tf.e = 0;
         } else {
             // Do a nonlinear regression on the nonlinear segment. Include the 'D' point in the
             // nonlinear regression, so the two pieces are more likely to line up.
             int start = lin_points > 0 ? lin_points - 1 : 0;

             // We need G to be in right vicinity, or the regression may not converge. Solve exactly for
             // for midpoint of the nonlinear range, assuming B = E = 0 & A = 1.
             int mid = (start + n) / 2;
             float mid_x = mid / (n - 1.0f);
             float mid_y = t(mid, ctx);
             tf.g = log2f_(mid_y) / log2f_(mid_x);;
             tf.a = 1;
             tf.b = 0;
             tf.e = 0;

             if (!tf_solve_nonlinear(t, ctx, start, n, &tf)) {
                 continue;
             }
         }

         float err = tf_max_error(t, ctx, n, &tf);
         if (min_error > err) {
             min_error = err;
             *fn = tf;
         }
     }

     if (!isfinitef_(min_error)) {
         return false;
     }
     if (max_error) {
         *max_error = min_error;
     }

     return true;
 }

 // TODO: Adjust logic here? This still assumes that purely linear inputs will have D > 1, which
 // we never generate. It also emits inverted linear using the same formulation. Standardize on
 // G == 1 here, too?
 bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
     // Original equation is:       y = (ax + b)^g + e   for x >= d
     //                             y = cx + f           otherwise
     //
     // so 1st inverse is:          (y - e)^(1/g) = ax + b
     //                             x = ((y - e)^(1/g) - b) / a
     //
     // which can be re-written as: x = (1/a)(y - e)^(1/g) - b/a
     //                             x = ((1/a)^g)^(1/g) * (y - e)^(1/g) - b/a
     //                             x = ([(1/a)^g]y + [-((1/a)^g)e]) ^ [1/g] + [-b/a]
     //
     // and 2nd inverse is:         x = (y - f) / c
     // which can be re-written as: x = [1/c]y + [-f/c]
     //
     // and now both can be expressed in terms of the same parametric form as the
     // original - parameters are enclosed in square brackets.
     skcms_TransferFunction fn_inv = { 0, 0, 0, 0, 0, 0, 0 };

     // Reject obviously malformed inputs
     if (!isfinitef_(src->a + src->b + src->c + src->d + src->e + src->f + src->g)) {
         return false;
     }

     bool has_nonlinear = (src->d <= 1);
     bool has_linear = (src->d > 0);

     // Is the linear section decreasing or not invertible?
     if (has_linear && src->c <= 0) {
         return false;
     }

     // Is the nonlinear section decreasing or not invertible?
     if (has_nonlinear && (src->a <= 0 || src->g <= 0)) {
         return false;
     }

     // If both segments are present, they need to line up
     if (has_linear && has_nonlinear) {
         float l_at_d = src->c * src->d + src->f;
         float n_at_d = powf_(src->a * src->d + src->b, src->g) + src->e;
         if (fabsf_(l_at_d - n_at_d) > (1 / 512.0f)) {
             return false;
         }
     }

     // Invert linear segment
     if (has_linear) {
         fn_inv.c = 1.0f / src->c;
         fn_inv.f = -src->f / src->c;
     }

     // Invert nonlinear segment
     if (has_nonlinear) {
         fn_inv.g = 1.0f / src->g;
         fn_inv.a = powf_(1.0f / src->a, src->g);
         fn_inv.b = -fn_inv.a * src->e;
         fn_inv.e = -src->b / src->a;
     }

     if (!has_linear) {
         fn_inv.d = 0;
     } else if (!has_nonlinear) {
         // Any value larger than 1 works
         fn_inv.d = 2.0f;
     } else {
         fn_inv.d = src->c * src->d + src->f;
     }

     *dst = fn_inv;
     return true;
 }
	/*
	* Copyright 2018 Google Inc.
	*
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include "../skcms.h"
	#include "LinearAlgebra.h"
	#include "Macros.h"
	#include "PortableMath.h"
	#include "TransferFunction.h"
	#include <assert.h>
	#include <string.h>

	// Enable to do thorough logging of the nonlinear regression to stderr
	#if 0
	#include <stdio.h>
	#define LOG(...) fprintf(stderr, __VA_ARGS__)
	#else
	#define LOG(...) do {} while(false)
	#endif

	#define LOG_TF(tf) \
	LOG("[%.25g %.25g %.25g %.25g]\n", \
	tf->g, tf->a, tf->b, tf->e)

	#define LOG_VEC(v) \
	LOG("[%.25g %.25g %.25g %.25g]\n", \
	v.vals[0], v.vals[1], v.vals[2], v.vals[3])

	#define LOG_MTX(m) \
	LOG("\| %.25g %.25g %.25g %.25g \|\n" \
	"\| %.25g %.25g %.25g %.25g \|\n" \
	"\| %.25g %.25g %.25g %.25g \|\n" \
	"\| %.25g %.25g %.25g %.25g \|\n", \
	m.vals[0][0], m.vals[0][1], m.vals[0][2], m.vals[0][3], \
	m.vals[1][0], m.vals[1][1], m.vals[1][2], m.vals[1][3], \
	m.vals[2][0], m.vals[2][1], m.vals[2][2], m.vals[2][3], \
	m.vals[3][0], m.vals[3][1], m.vals[3][2], m.vals[3][3])

	float skcms_TransferFunction_eval(const skcms_TransferFunction* fn, float x) {
	float sign = x < 0 ? -1.0f : 1.0f;
	x *= sign;

	return sign * (x < fn->d ? fn->c * x + fn->f
	: powf_(fn->a * x + fn->b, fn->g) + fn->e);
	}

	static float TF_Nonlinear_eval(const skcms_TransferFunction* fn, float x) {
	// We strive to never allow negative ax+b, but values can drift slightly. Guard against NaN.
	float base = fmaxf_(fn->a * x + fn->b, 0.0f);
	return powf_(base, fn->g) + fn->e;
	}

	// Evaluate the gradient of the nonlinear component of fn
	static void tf_eval_gradient_nonlinear(const skcms_TransferFunction* fn,
	float x,
	float* d_fn_d_A_at_x,
	float* d_fn_d_B_at_x,
	float* d_fn_d_E_at_x,
	float* d_fn_d_G_at_x) {
	float base = fn->a * x + fn->b;
	if (base > 0.0f) {
	d_fn_d_A_at_x = fn->g x * powf_(base, fn->g - 1.0f);
	d_fn_d_B_at_x = fn->g powf_(base, fn->g - 1.0f);
	*d_fn_d_E_at_x = 1.0f;
	// Scale by 1/log_2(e)
	d_fn_d_G_at_x = powf_(base, fn->g) log2f_(base) * 0.69314718f;
	} else {
	*d_fn_d_A_at_x = 0.0f;
	*d_fn_d_B_at_x = 0.0f;
	*d_fn_d_E_at_x = 0.0f;
	*d_fn_d_G_at_x = 0.0f;
	}
	}

	// Take one Gauss-Newton step updating A, B, E, and G, given D.
	static bool tf_gauss_newton_step_nonlinear(skcms_TableFunc* t, const void* ctx, int start, int n,
	skcms_TransferFunction* fn, float* error_Linfty_after) {
	LOG("tf_gauss_newton_step_nonlinear (%d, %d)\n", start, n);
	LOG("fn: "); LOG_TF(fn);

	// Let ne_lhs be the left hand side of the normal equations, and let ne_rhs
	// be the right hand side. Zero the diagonal [sic] of \|ne_lhs\| and all of \|ne_rhs\|.
	skcms_Matrix4x4 ne_lhs;
	skcms_Vector4 ne_rhs;
	for (int row = 0; row < 4; ++row) {
	for (int col = 0; col < 4; ++col) {
	ne_lhs.vals[row][col] = 0;
	}
	ne_rhs.vals[row] = 0;
	}

	// Add the contributions from each sample to the normal equations.
	for (int i = start; i < n; ++i) {
	float xi = i / (n - 1.0f);
	LOG("%d (%.25g)\n", i, xi);

	// Let J be the gradient of fn with respect to parameters A, B, E, and G,
	// evaulated at this point.
	skcms_Vector4 J;
	tf_eval_gradient_nonlinear(fn, xi, &J.vals[0], &J.vals[1], &J.vals[2], &J.vals[3]);
	LOG("J: "); LOG_VEC(J);

	// Let r be the residual at this point;
	float r = t(i, ctx) - TF_Nonlinear_eval(fn, xi);
	LOG("r: %.25g\n", r);

	if (i == start) {
	// Weight the D point much higher, so that the two pieces of the approximation line up
	float w = (n - start) * 0.5f;
	J.vals[0] *= w;
	J.vals[1] *= w;
	J.vals[2] *= w;
	J.vals[3] *= w;
	r *= w;
	}

	// Update the normal equations left hand side with the outer product of J
	// with itself.
	for (int row = 0; row < 4; ++row) {
	for (int col = 0; col < 4; ++col) {
	ne_lhs.vals[row][col] += J.vals[row] * J.vals[col];
	}

	// Update the normal equations right hand side the product of J with the
	// residual
	ne_rhs.vals[row] += J.vals[row] * r;
	}
	LOG("LHS/RHS:\n"); LOG_MTX(ne_lhs); LOG_VEC(ne_rhs);
	}

	// Note that if G = 1, then the normal equations will be singular
	// (because when G = 1, B and E are equivalent parameters).
	// To avoid problems, fix E (row/column 3) in these circumstances.
	const float kEpsilonForG = 1.0f / 1024.0f;
	if (fabsf_(fn->g - 1.0f) < kEpsilonForG) {
	LOG("G ~= 1, pinning E\n");
	for (int row = 0; row < 4; ++row) {
	float value = (row == 2) ? 1.0f : 0.0f;
	ne_lhs.vals[row][2] = value;
	ne_lhs.vals[2][row] = value;
	}
	ne_rhs.vals[2] = 0.0f;
	}

	// Solve the normal equations.
	skcms_Matrix4x4 ne_lhs_inv;
	if (!skcms_Matrix4x4_invert(&ne_lhs, &ne_lhs_inv)) {
	return false;
	}
	LOG("LHS Inverse:\n"); LOG_MTX(ne_lhs_inv);

	skcms_Vector4 step = skcms_Matrix4x4_Vector4_mul(&ne_lhs_inv, &ne_rhs);
	LOG("step: "); LOG_VEC(step);

	// Update the transfer function.
	fn->a += step.vals[0];
	fn->b += step.vals[1];
	fn->e += step.vals[2];
	fn->g += step.vals[3];

	// A should always be positive.
	fn->a = fmaxf_(fn->a, 0.0f);

	// Ensure that fn be defined at D.
	if (fn->a * fn->d + fn->b < 0.0f) {
	LOG("AD+B = %.25g, ", fn->a * fn->d + fn->b);
	fn->b = -fn->a * fn->d;
	LOG("B -> %.25g\n", fn->b);
	}

	// Compute the Linfinity error.
	*error_Linfty_after = 0;
	for (int i = start; i < n; ++i) {
	float xi = i / (n - 1.0f);
	float error = fabsf_(t(i, ctx) - TF_Nonlinear_eval(fn, xi));
	error_Linfty_after = fmaxf_(error, error_Linfty_after);
	}

	return true;
	}

	// Solve for A, B, E, and G, given D. The initial value of \|fn\| is the
	// point from which iteration starts.
	static bool tf_solve_nonlinear(skcms_TableFunc* t, const void* ctx, int start, int n,
	skcms_TransferFunction* fn) {
	// Take a maximum of 16 Gauss-Newton steps.
	enum { kNumSteps = 16 };

	// The L-infinity error after each step.
	float step_error[kNumSteps] = { 0 };
	int step = 0;
	for (;; ++step) {
	// If the normal equations are singular, we can't continue.
	if (!tf_gauss_newton_step_nonlinear(t, ctx, start, n, fn, &step_error[step])) {
	return false;
	}

	// If the error is inf or nan, we are clearly not converging.
	if (!isfinitef_(step_error[step])) {
	return false;
	}

	// Stop if our error is tiny.
	const float kEarlyOutTinyErrorThreshold = (1.0f / 16.0f) / 256.0f;
	if (step_error[step] < kEarlyOutTinyErrorThreshold) {
	break;
	}

	// Stop if our error is not changing, or changing in the wrong direction.
	if (step > 1) {
	// If our error is is huge for two iterations, we're probably not in the
	// region of convergence.
	if (step_error[step] > 1.0f && step_error[step - 1] > 1.0f) {
	return false;
	}

	// If our error didn't change by ~1%, assume we've converged as much as we
	// are going to.
	const float kEarlyOutByPercentChangeThreshold = 32.0f / 256.0f;
	const float kMinimumPercentChange = 1.0f / 128.0f;
	float percent_change =
	fabsf_(step_error[step] - step_error[step - 1]) / step_error[step];
	if (percent_change < kMinimumPercentChange &&
	step_error[step] < kEarlyOutByPercentChangeThreshold) {
	break;
	}
	}
	if (step == kNumSteps - 1) {
	break;
	}
	}

	// Declare failure if our error is obviously too high.
	const float kDidNotConvergeThreshold = 64.0f / 256.0f;
	if (step_error[step] > kDidNotConvergeThreshold) {
	return false;
	}

	return true;
	}

	// Returns the number of points that are approximated by the line, to within tol.
	static int tf_fit_linear(skcms_TableFunc* t, const void* ctx, int n, float tol,
	skcms_TransferFunction* fn) {
	// Idea: We fit the first N points to the linear portion of the TF. We want the line to pass
	// through the first and last points exactly.
	//
	// We walk along the points, and find the minimum and maximum slope of the line before the
	// error would exceed our tolerance. Once the range [slope_min, slope_max] would be empty,
	// we definitely can't add any more points, so we're done.
	//
	// However, some points error intervals' may intersect the running interval, but not lie within
	// it. So we keep track of the last point we saw that is a valid candidate for being the end
	// point, and once the search is done, back up to build the line through that point.
	const float x_scale = 1.0f / (n - 1);

	int lin_points = 1;
	fn->f = t(0, ctx);
	float slope_min = -INFINITY_;
	float slope_max = INFINITY_;
	for (int i = 1; i < n; ++i) {
	float xi = i * x_scale;
	float yi = t(i, ctx);
	float slope_max_i = (yi + tol - fn->f) / xi;
	float slope_min_i = (yi - tol - fn->f) / xi;
	if (slope_max_i < slope_min \|\| slope_max < slope_min_i) {
	// Slope intervals no longer overlap.
	break;
	}
	slope_max = fminf_(slope_max, slope_max_i);
	slope_min = fmaxf_(slope_min, slope_min_i);
	float cur_slope = (yi - fn->f) / xi;
	if (slope_min <= cur_slope && cur_slope <= slope_max) {
	lin_points = i + 1;
	fn->c = cur_slope;
	}
	}

	// Set D to the last point from above
	fn->d = (lin_points - 1) * x_scale;
	return lin_points;
	}

	static float tf_max_error(skcms_TableFunc* t, const void* ctx, int n,
	const skcms_TransferFunction* fn) {
	const float x_scale = 1.0f / (n - 1);
	float max_error = 0;
	for (int i = 0; i < n; ++i) {
	float xi = i * x_scale;
	float fn_of_xi = skcms_TransferFunction_eval(fn, xi);
	float error_at_xi = fabsf_(t(i, ctx) - fn_of_xi);
	max_error = fmaxf_(max_error, error_at_xi);
	}
	return max_error;
	}

	bool skcms_TransferFunction_approximate(skcms_TableFunc* t, const void* ctx, int n,
	skcms_TransferFunction* fn, float* max_error) {
	if (n < 2) {
	return false;
	}

	const float x_scale = 1.0f / (n - 1);
	const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
	float min_error = INFINITY_;

	for (int tol = 0; tol < ARRAY_COUNT(kTolerances); ++tol) {
	skcms_TransferFunction tf;
	int lin_points = tf_fit_linear(t, ctx, n,kTolerances[tol], &tf);

	// If the entire data set was linear, move the coefficients to the nonlinear portion with
	// G == 1. This lets use a canonical representation with D == 0.
	if (lin_points == n) {
	tf.g = 1;
	tf.b = tf.f;
	tf.a = tf.c;
	tf.c = tf.d = tf.e = tf.f = 0;
	} else if (lin_points == n - 1) {
	// Degenerate case with only two points in the nonlinear segment. Solve directly.
	tf.g = 1;
	tf.a = (t(n - 1, ctx) - t(n - 2, ctx)) * (n - 1);
	tf.b = t(n - 2, ctx) - (tf.a * (n - 2) * x_scale);
	tf.e = 0;
	} else {
	// Do a nonlinear regression on the nonlinear segment. Include the 'D' point in the
	// nonlinear regression, so the two pieces are more likely to line up.
	int start = lin_points > 0 ? lin_points - 1 : 0;

	// We need G to be in right vicinity, or the regression may not converge. Solve exactly for
	// for midpoint of the nonlinear range, assuming B = E = 0 & A = 1.
	int mid = (start + n) / 2;
	float mid_x = mid / (n - 1.0f);
	float mid_y = t(mid, ctx);
	tf.g = log2f_(mid_y) / log2f_(mid_x);;
	tf.a = 1;
	tf.b = 0;
	tf.e = 0;

	if (!tf_solve_nonlinear(t, ctx, start, n, &tf)) {
	continue;
	}
	}

	float err = tf_max_error(t, ctx, n, &tf);
	if (min_error > err) {
	min_error = err;
	*fn = tf;
	}
	}

	if (!isfinitef_(min_error)) {
	return false;
	}
	if (max_error) {
	*max_error = min_error;
	}

	return true;
	}

	// TODO: Adjust logic here? This still assumes that purely linear inputs will have D > 1, which
	// we never generate. It also emits inverted linear using the same formulation. Standardize on
	// G == 1 here, too?
	bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
	// Original equation is: y = (ax + b)^g + e for x >= d
	// y = cx + f otherwise
	//
	// so 1st inverse is: (y - e)^(1/g) = ax + b
	// x = ((y - e)^(1/g) - b) / a
	//
	// which can be re-written as: x = (1/a)(y - e)^(1/g) - b/a
	// x = ((1/a)^g)^(1/g) * (y - e)^(1/g) - b/a
	// x = ([(1/a)^g]y + [-((1/a)^g)e]) ^ [1/g] + [-b/a]
	//
	// and 2nd inverse is: x = (y - f) / c
	// which can be re-written as: x = [1/c]y + [-f/c]
	//
	// and now both can be expressed in terms of the same parametric form as the
	// original - parameters are enclosed in square brackets.
	skcms_TransferFunction fn_inv = { 0, 0, 0, 0, 0, 0, 0 };

	// Reject obviously malformed inputs
	if (!isfinitef_(src->a + src->b + src->c + src->d + src->e + src->f + src->g)) {
	return false;
	}

	bool has_nonlinear = (src->d <= 1);
	bool has_linear = (src->d > 0);

	// Is the linear section decreasing or not invertible?
	if (has_linear && src->c <= 0) {
	return false;
	}

	// Is the nonlinear section decreasing or not invertible?
	if (has_nonlinear && (src->a <= 0 \|\| src->g <= 0)) {
	return false;
	}

	// If both segments are present, they need to line up
	if (has_linear && has_nonlinear) {
	float l_at_d = src->c * src->d + src->f;
	float n_at_d = powf_(src->a * src->d + src->b, src->g) + src->e;
	if (fabsf_(l_at_d - n_at_d) > (1 / 512.0f)) {
	return false;
	}
	}

	// Invert linear segment
	if (has_linear) {
	fn_inv.c = 1.0f / src->c;
	fn_inv.f = -src->f / src->c;
	}

	// Invert nonlinear segment
	if (has_nonlinear) {
	fn_inv.g = 1.0f / src->g;
	fn_inv.a = powf_(1.0f / src->a, src->g);
	fn_inv.b = -fn_inv.a * src->e;
	fn_inv.e = -src->b / src->a;
	}

	if (!has_linear) {
	fn_inv.d = 0;
	} else if (!has_nonlinear) {
	// Any value larger than 1 works
	fn_inv.d = 2.0f;
	} else {
	fn_inv.d = src->c * src->d + src->f;
	}

	*dst = fn_inv;
	return true;
	}