use GaussNewton for 7-parameter approx

Deleted tests that were approximating skcms_TransferFunctions with
skcms_TransferFunctions, for now.

More tweaks to GaussNewton API to allow non-tuned parameters
when evaluating the approximating function and its gradient.

Change-Id: I27363e5960feaf03ec8176900693995563c7552c
Reviewed-on: https://skia-review.googlesource.com/119772
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
diff --git a/profiles/color.org/sRGB2014.icc.txt b/profiles/color.org/sRGB2014.icc.txt
index 35b736e..091018a 100644
--- a/profiles/color.org/sRGB2014.icc.txt
+++ b/profiles/color.org/sRGB2014.icc.txt
@@ -23,14 +23,14 @@
  'chad' : 'sf32' :     44 : 2980
 
 rTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 gTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 bTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
  XYZ : | 0.436065674 0.385147095 0.143066406 |
        | 0.222488403 0.716873169 0.060607910 |
        | 0.013916016 0.097076416 0.714096069 |
diff --git a/profiles/misc/Apple_Color_LCD.icc.txt b/profiles/misc/Apple_Color_LCD.icc.txt
index 3c6d918..fd877ee 100644
--- a/profiles/misc/Apple_Color_LCD.icc.txt
+++ b/profiles/misc/Apple_Color_LCD.icc.txt
@@ -24,14 +24,14 @@
  'aagg' : 'para' :     32 : 3652
 
 rTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 gTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 bTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
  XYZ : | 0.444335938 0.379440308 0.140411377 |
        | 0.224761963 0.726165771 0.049072266 |
        | 0.005477905 0.077972412 0.741455078 |
diff --git a/profiles/misc/Coated_FOGRA39_CMYK.icc.txt b/profiles/misc/Coated_FOGRA39_CMYK.icc.txt
index 366eb54..cd51005 100644
--- a/profiles/misc/Coated_FOGRA39_CMYK.icc.txt
+++ b/profiles/misc/Coated_FOGRA39_CMYK.icc.txt
@@ -23,13 +23,13 @@
  A2B : "A", CLUT, "B"
  "A" : 4 inputs
   A0 : 16-bit table with 256 entries
-  ~= : 0.968403, 1.06641, 0.0476338, 0.935616, 0.294118, -0.0979506, 0  (Max error: 0.012) (D-gap: -3.7e-05)
+  ~= : 1.04986, 1.01943, -0.00799555, 0.836576, 0.00784314, 0.00656138, 0  (Max error: 0.019) (D-gap: 0)
   ~= : -0.2772x^3 + 0.4481x^2 + 0.8291x (Max error: 0.005832)
   A1 : 16-bit table with 256 entries
-  ~= : 1.08106, 1.04424, -0.00819014, 0.766537, 0.00784314, 0.0741637, 0  (Max error: 0.11) (D-gap: 0.068)
+  ~= : 1.10368, 1.0238, -0.00473501, 0.766537, 0.00784314, 0.00419043, 0  (Max error: 0.025) (D-gap: 4.7e-10)
   ~= : -0.3874x^3 + 0.6655x^2 + 0.7219x (Max error: 0.01174)
   A2 : 16-bit table with 256 entries
-  ~= : 1.15888, 1.00891, 0.0094616, 0.708171, 0.00784314, -0.00356377, 0  (Max error: 0.018) (D-gap: 7.3e-06)
+  ~= : 1.16035, 1.00803, 0.0113021, 0.708171, 0.00784314, -0.00463676, 0  (Max error: 0.018) (D-gap: 0)
   ~= : -0.3333x^3 + 0.6616x^2 + 0.6717x (Max error: 0.00757)
   A3 : 16-bit table with 256 entries
   ~= : 0.1588x^3 + 0.08634x^2 + 0.7548x (Max error: 0.001332)
diff --git a/profiles/misc/ColorLogic_ISO_Coated_CMYK.icc.txt b/profiles/misc/ColorLogic_ISO_Coated_CMYK.icc.txt
index 5283a8b..d348c5d 100644
--- a/profiles/misc/ColorLogic_ISO_Coated_CMYK.icc.txt
+++ b/profiles/misc/ColorLogic_ISO_Coated_CMYK.icc.txt
@@ -23,15 +23,16 @@
  A2B : "A", CLUT, "B"
  "A" : 4 inputs
   A0 : 16-bit table with 256 entries
+  ~= : 0.209217, 25.4362, 8.21554, 1.20078, 0.0196078, -1.54935, 0  (Max error: 0.46) (D-gap: 0)
   ~= : 0.2321x^3 + -0.6549x^2 + 1.423x (Max error: 0.02083)
   A1 : 16-bit table with 256 entries
-  ~= : 0.741035, 0.963444, -0.0709743, 1.24477, 0.337255, 0.0336971, 0  (Max error: 0.047) (D-gap: -0.024)
+  ~= : -0.190365, 50.034, 15.4877, 1.20545, 0.0196078, -0.563011, 0  (Max error: 1.1) (D-gap: -3.7e-09)
   ~= : 0.2312x^3 + -0.6533x^2 + 1.422x (Max error: 0.02089)
   A2 : 16-bit table with 256 entries
-  ~= : 0.703794, 1.0482, -0.068751, 1.24155, 0.14902, 0.00502586, 0  (Max error: 0.017) (D-gap: -8.5e-06)
+  ~= : 0.791702, 0.910815, -0.135729, 1.24155, 0.14902, 0.185016, 0  (Max error: 0.026) (D-gap: 0)
   ~= : 0.232x^3 + -0.6542x^2 + 1.422x (Max error: 0.02094)
   A3 : 16-bit table with 256 entries
-  ~= : 0.673778, 1.10422, 0.073473, 1.18288, 0.0156863, -0.18047, 0  (Max error: 0.064) (D-gap: -0.00044)
+  ~= : 0.65406, 1.13213, 0.0862551, 1.18288, 0.0156863, -0.208999, 0  (Max error: 0.071) (D-gap: 0)
   ~= : 0.9762x^3 + -1.635x^2 + 1.659x (Max error: 0.03557)
 CLUT : 17 x 17 x 17 x 17 (16 bpp)
  "B" : 3 outputs
diff --git a/profiles/misc/DisplayCal_ASUS_NonMonotonic.icc.txt b/profiles/misc/DisplayCal_ASUS_NonMonotonic.icc.txt
index 335bb7e..d4211eb 100644
--- a/profiles/misc/DisplayCal_ASUS_NonMonotonic.icc.txt
+++ b/profiles/misc/DisplayCal_ASUS_NonMonotonic.icc.txt
@@ -31,13 +31,13 @@
  'meta' : 'dict' :   2312 : 738448
 
 rTRC : 16-bit table with 256 entries
-  ~= : 2.0009, 1.11115, -0.1145, 0.0357977, 0.117647, 0.00394287, 0  (Max error: 0.011) (D-gap: -6.4e-06)
+  ~= : 1.99944, 1.11172, -0.115069, 0.0357977, 0.117647, 0.00396373, 0  (Max error: 0.011) (D-gap: 0)
   ~= : 0.1266x^3 + 1.011x^2 + -0.1377x (Max error: 0.008777)
 gTRC : 16-bit table with 256 entries
-  ~= : 2.27405, 0.980809, 0.0314213, 0.0311284, 0.192157, -0.0259428, 0  (Max error: 0.0038) (D-gap: 5.7e-07)
+  ~= : 2.27403, 0.980813, 0.0314167, 0.0311284, 0.192157, -0.0259432, 0  (Max error: 0.0038) (D-gap: 0)
   ~= : 0.09834x^3 + 1.047x^2 + -0.1455x (Max error: 0.006362)
 bTRC : 16-bit table with 256 entries
-  ~= : 2.19998, 1.0228, -0.0143508, 0.00389105, 0.160784, -0.014793, 0  (Max error: 0.004) (D-gap: 4.3e-07)
+  ~= : 2.19997, 1.0228, -0.0143504, 0.00389105, 0.160784, -0.0147935, 0  (Max error: 0.004) (D-gap: -2.3e-10)
   ~= : 0.09596x^3 + 1.055x^2 + -0.1509x (Max error: 0.005556)
  XYZ : | 0.436737061 0.380325317 0.147140503 |
        | 0.217636108 0.729843140 0.052520752 |
@@ -45,13 +45,13 @@
  A2B : "A", CLUT, "B"
  "A" : 3 inputs
   A0 : 16-bit table with 2049 entries
-  ~= : 1.00376, 0.999203, 0.014153, 1.00002, 0.125, -0.0130375, 0  (Max error: 0.018) (D-gap: -1.2e-05)
+  ~= : 0.997553, 1.00181, -0.125226, 1.00002, 0.125, 0.125002, 0  (Max error: 0.019) (D-gap: 0)
   ~= : -0.01022x^3 + 0.01737x^2 + 0.9928x (Max error: 0.01741)
   A1 : 16-bit table with 2049 entries
-  ~= : 1.00313, 0.999434, -0.00321463, 1.00002, 0.125, 0.00408629, 0  (Max error: 0.016) (D-gap: -1e-06)
+  ~= : 0.994697, 1.0024, -0.1253, 1.00002, 0.125, 0.125002, 0  (Max error: 0.017) (D-gap: 0)
   ~= : -0.01251x^3 + 0.02103x^2 + 0.9915x (Max error: 0.01542)
   A2 : 16-bit table with 2049 entries
-  ~= : 1.00655, 0.995309, 0.359156, 1.00002, 0.03125, -0.356609, 0  (Max error: 0.019) (D-gap: 4.6e-06)
+  ~= : 0.997287, 1.00252, -0.0313289, 1.00002, 0.03125, 0.0312505, 0  (Max error: 0.02) (D-gap: 0)
   ~= : -0.01502x^3 + 0.02485x^2 + 0.9902x (Max error: 0.01886)
 CLUT : 33 x 33 x 33 (16 bpp)
  "B" : 3 outputs
diff --git a/profiles/misc/Dot_Gain_20_Grayscale.icc.txt b/profiles/misc/Dot_Gain_20_Grayscale.icc.txt
index f586103..407b58a 100644
--- a/profiles/misc/Dot_Gain_20_Grayscale.icc.txt
+++ b/profiles/misc/Dot_Gain_20_Grayscale.icc.txt
@@ -12,13 +12,13 @@
  'kTRC' : 'curv' :    524 : 388
 
 rTRC : 16-bit table with 256 entries
-  ~= : 1.73713, 0.999983, 6.48912e-05, 0.0629053, 0.0235294, -1.0356e-05, 0  (Max error: 8e-05) (D-gap: -9.4e-09)
+  ~= : 1.73715, 0.999979, 6.92596e-05, 0.0629053, 0.0235294, -1.07513e-05, 0  (Max error: 8e-05) (D-gap: 0)
   ~= : -0.1835x^3 + 1.081x^2 + 0.103x (Max error: 0.002633)
 gTRC : 16-bit table with 256 entries
-  ~= : 1.73713, 0.999983, 6.48912e-05, 0.0629053, 0.0235294, -1.0356e-05, 0  (Max error: 8e-05) (D-gap: -9.4e-09)
+  ~= : 1.73715, 0.999979, 6.92596e-05, 0.0629053, 0.0235294, -1.07513e-05, 0  (Max error: 8e-05) (D-gap: 0)
   ~= : -0.1835x^3 + 1.081x^2 + 0.103x (Max error: 0.002633)
 bTRC : 16-bit table with 256 entries
-  ~= : 1.73713, 0.999983, 6.48912e-05, 0.0629053, 0.0235294, -1.0356e-05, 0  (Max error: 8e-05) (D-gap: -9.4e-09)
+  ~= : 1.73715, 0.999979, 6.92596e-05, 0.0629053, 0.0235294, -1.07513e-05, 0  (Max error: 8e-05) (D-gap: 0)
   ~= : -0.1835x^3 + 1.081x^2 + 0.103x (Max error: 0.002633)
  XYZ : | 0.964202881 0.000000000 0.000000000 |
        | 0.000000000 1.000000000 0.000000000 |
diff --git a/profiles/misc/HD_709.icc.txt b/profiles/misc/HD_709.icc.txt
index b064335..11a095e 100644
--- a/profiles/misc/HD_709.icc.txt
+++ b/profiles/misc/HD_709.icc.txt
@@ -24,14 +24,14 @@
  'aagg' : 'para' :     32 : 2696
 
 rTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 gTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 bTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
  XYZ : | 0.358963013 0.446350098 0.158889771 |
        | 0.195922852 0.742843628 0.061233521 |
        | 0.009674072 0.043518066 0.771713257 |
diff --git a/profiles/misc/Japan_Color_2001_Coated.icc.txt b/profiles/misc/Japan_Color_2001_Coated.icc.txt
index fac184d..629d75f 100644
--- a/profiles/misc/Japan_Color_2001_Coated.icc.txt
+++ b/profiles/misc/Japan_Color_2001_Coated.icc.txt
@@ -19,16 +19,16 @@
  A2B : "A", CLUT, "B"
  "A" : 4 inputs
   A0 : 16-bit table with 256 entries
-  ~= : 1.2043, 0.854942, 0.610626, 0.525292, 0.00392157, -0.550543, 0  (Max error: 0.034) (D-gap: 0.0031)
+  ~= : 1.035, 1.04085, -0.0326543, 0.780156, 0.0313726, 0.0244755, 0  (Max error: 0.033) (D-gap: 0)
   ~= : -0.405x^3 + 0.6245x^2 + 0.7805x (Max error: 0.007027)
   A1 : 16-bit table with 256 entries
-  ~= : 1.10615, 1.02819, -0.00403214, 0.350195, 0.00392157, 0.0344724, 0  (Max error: 0.061) (D-gap: 0.033)
+  ~= : 1.11391, 1.02128, -0.0220004, 0.627015, 0.027451, 0.0138406, 0  (Max error: 0.013) (D-gap: 0)
   ~= : -0.3044x^3 + 0.6003x^2 + 0.704x (Max error: 0.003362)
   A2 : 16-bit table with 256 entries
-  ~= : 1.1484, 0.997667, 0.0234146, 0.619326, 0.0235294, -0.015198, 0  (Max error: 0.009) (D-gap: 4.4e-06)
+  ~= : 1.14818, 0.997828, 0.0230241, 0.619326, 0.0235294, -0.014941, 0  (Max error: 0.009) (D-gap: 0)
   ~= : -0.2304x^3 + 0.5108x^2 + 0.7196x (Max error: 0.004616)
   A3 : 16-bit table with 256 entries
-  ~= : 3.37872, 0.28468, 0.859486, 0.322957, 0.00392157, -0.559585, 0  (Max error: 0.043) (D-gap: 0.041)
+  ~= : 2.30032, 0.409413, 0.795021, 0.322957, 0.00392157, -0.591436, 0  (Max error: 0.065) (D-gap: -1.9e-08)
   ~= : 0.01829x^3 + 0.3262x^2 + 0.6555x (Max error: 0.01191)
 CLUT : 9 x 9 x 9 x 9 (16 bpp)
  "B" : 3 outputs
diff --git a/profiles/misc/Kodak_sRGB.icc.txt b/profiles/misc/Kodak_sRGB.icc.txt
index 08f1a96..4845a5d 100644
--- a/profiles/misc/Kodak_sRGB.icc.txt
+++ b/profiles/misc/Kodak_sRGB.icc.txt
@@ -21,13 +21,13 @@
  'bTRC' : 'curv' :    524 : 149844
 
 rTRC : 16-bit table with 256 entries
-  ~= : 2.41906, 0.940164, 0.0585121, 0.0696852, 0.0431373, 0.00320272, 0.00392157  (Max error: 0.00044) (D-gap: -1e-07)
+  ~= : 2.41905, 0.940167, 0.0585088, 0.0696852, 0.0431373, 0.00320302, 0.00392157  (Max error: 0.00044) (D-gap: 0)
   ~= : 0.3366x^3 + 0.6215x^2 + 0.04197x (Max error: 0.004218)
 gTRC : 16-bit table with 256 entries
-  ~= : 2.41906, 0.940164, 0.0585121, 0.0696852, 0.0431373, 0.00320272, 0.00392157  (Max error: 0.00044) (D-gap: -1e-07)
+  ~= : 2.41905, 0.940167, 0.0585088, 0.0696852, 0.0431373, 0.00320302, 0.00392157  (Max error: 0.00044) (D-gap: 0)
   ~= : 0.3366x^3 + 0.6215x^2 + 0.04197x (Max error: 0.004218)
 bTRC : 16-bit table with 256 entries
-  ~= : 2.41906, 0.940164, 0.0585121, 0.0696852, 0.0431373, 0.00320272, 0.00392157  (Max error: 0.00044) (D-gap: -1e-07)
+  ~= : 2.41905, 0.940167, 0.0585088, 0.0696852, 0.0431373, 0.00320302, 0.00392157  (Max error: 0.00044) (D-gap: 0)
   ~= : 0.3366x^3 + 0.6215x^2 + 0.04197x (Max error: 0.004218)
  XYZ : | 0.437637329 0.388412476 0.142410278 |
        | 0.214950562 0.712905884 0.072128296 |
@@ -35,21 +35,24 @@
  A2B : "A", CLUT, "B"
  "A" : 3 inputs
   A0 : 16-bit table with 256 entries
+  ~= : 0.708634, 1.48685, 0.435539, 0.745866, 0.0627451, -0.589851, 0  (Max error: 0.0039) (D-gap: 0)
   ~= : -0.1058x^3 + 0.03758x^2 + 1.068x (Max error: 0.02093)
   A1 : 16-bit table with 256 entries
+  ~= : 0.708634, 1.48685, 0.435539, 0.745866, 0.0627451, -0.589851, 0  (Max error: 0.0039) (D-gap: 0)
   ~= : -0.1058x^3 + 0.03758x^2 + 1.068x (Max error: 0.02093)
   A2 : 16-bit table with 256 entries
+  ~= : 0.708634, 1.48685, 0.435539, 0.745866, 0.0627451, -0.589851, 0  (Max error: 0.0039) (D-gap: 0)
   ~= : -0.1058x^3 + 0.03758x^2 + 1.068x (Max error: 0.02093)
 CLUT : 8 x 8 x 8 (16 bpp)
  "B" : 3 outputs
   B0 : 16-bit table with 4096 entries
-  ~= : 4.65661e-07, 6.0642, -6.04012, 1, 0.996337, -0.003668, 0  (Max error: 1.7e-05) (D-gap: 9.4e-06)
+  ~= : 5.99888, 0.75, 0, 0.998044, 0.998291, 0.82012, 0  (Max error: 0.0019) (D-gap: 0)
   ~= : 3.879e-06x^3 + -5.442e-06x^2 + 1x (Max error: 0.003662)
   B1 : 16-bit table with 4096 entries
-  ~= : 4.65661e-07, 6.0642, -6.04012, 1, 0.996337, -0.003668, 0  (Max error: 1.7e-05) (D-gap: 9.4e-06)
+  ~= : 5.99888, 0.75, 0, 0.998044, 0.998291, 0.82012, 0  (Max error: 0.0019) (D-gap: 0)
   ~= : 3.879e-06x^3 + -5.442e-06x^2 + 1x (Max error: 0.003662)
   B2 : 16-bit table with 4096 entries
-  ~= : 4.65661e-07, 6.0642, -6.04012, 1, 0.996337, -0.003668, 0  (Max error: 1.7e-05) (D-gap: 9.4e-06)
+  ~= : 5.99888, 0.75, 0, 0.998044, 0.998291, 0.82012, 0  (Max error: 0.0019) (D-gap: 0)
   ~= : 3.879e-06x^3 + -5.442e-06x^2 + 1x (Max error: 0.003662)
 252 random bytes transformed to linear XYZD50 bytes:
 	355632 a5d31d 4c6517 190f30 1e124a 5d4709 4e8727
diff --git a/profiles/misc/MartiMaria_browsertest_HARD.icc.txt b/profiles/misc/MartiMaria_browsertest_HARD.icc.txt
index b27b9e6..96d2bea 100644
--- a/profiles/misc/MartiMaria_browsertest_HARD.icc.txt
+++ b/profiles/misc/MartiMaria_browsertest_HARD.icc.txt
@@ -19,12 +19,12 @@
  'A2B2' : 'mft2' :  29554 : 2184
 
 rTRC : 16-bit table with 255 entries
-  ~= : 5.615x^3 + -10.81x^2 + 6.19x (Max error: 0.2712)
+  ~= : 5.627x^3 + -10.82x^2 + 6.198x (Max error: 0.2705)
 gTRC : 16-bit table with 255 entries
   ~= : 1, 0.00387579, 0, 0, 0, 0, 0  (Max error: 2.3e-10)
   ~= : 6.972x^3 + -7.968x^2 + 1.996x (Max error: 0.9961)
 bTRC : 16-bit table with 255 entries
-  ~= : 5.615x^3 + -10.81x^2 + 6.19x (Max error: 0.2712)
+  ~= : 5.627x^3 + -10.82x^2 + 6.198x (Max error: 0.2705)
  XYZ : | 0.964202881 0.000000000 0.964202881 |
        | 1.000000000 0.000000000 1.000000000 |
        | 0.824905396 0.000000000 0.824905396 |
diff --git a/profiles/misc/Phase_One_P25.icc.txt b/profiles/misc/Phase_One_P25.icc.txt
index 3628a5d..9158c6f 100644
--- a/profiles/misc/Phase_One_P25.icc.txt
+++ b/profiles/misc/Phase_One_P25.icc.txt
@@ -19,12 +19,13 @@
  'tech' : 'sig ' :     12 : 219352
 
 rTRC : 16-bit table with 256 entries
-  ~= : 0.557427, 0.823753, -0.0355345, 3.73293, 0.0431373, 0.142111, 0  (Max error: 0.019) (D-gap: -0.019)
+  ~= : 0.41882, 1.35722, 0.00130201, 3.84436, 0.00784314, -0.126417, 0  (Max error: 0.025) (D-gap: 1.9e-09)
   ~= : 1.474x^3 + -3.205x^2 + 2.731x (Max error: 0.0825)
 gTRC : 16-bit table with 256 entries
-  ~= : 0.400724, 1.03417, -0.022065, 5.60534, 0.027451, 0.0223994, 0  (Max error: 0.027) (D-gap: -1.7e-05)
+  ~= : 0.293277, 2.59732, 0.0114139, 5.75486, 0.00392157, -0.302166, 0  (Max error: 0.032) (D-gap: 1.3e-08)
   ~= : 1.873x^3 + -4.076x^2 + 3.203x (Max error: 0.1346)
 bTRC : 16-bit table with 256 entries
+  ~= : 0.630719, 0.929499, -0.0109353, 2.72114, 0.0117647, 0.0320134, 0  (Max error: 0.035) (D-gap: 0)
   ~= : 1.302x^3 + -2.583x^2 + 2.281x (Max error: 0.05044)
  XYZ : | 0.647903442 0.357360840 0.156417847 |
        | 0.382919312 1.109725952 0.000000000 |
diff --git a/profiles/misc/US_Web_Coated_SWOP_CMYK.icc.txt b/profiles/misc/US_Web_Coated_SWOP_CMYK.icc.txt
index 5c759dc..185c787 100644
--- a/profiles/misc/US_Web_Coated_SWOP_CMYK.icc.txt
+++ b/profiles/misc/US_Web_Coated_SWOP_CMYK.icc.txt
@@ -19,16 +19,15 @@
  A2B : "A", CLUT, "B"
  "A" : 4 inputs
   A0 : 16-bit table with 256 entries
-  ~= : 0.642081, 1.93472, 1.00137, 1.84533, 0.0156863, -0.991272, 0  (Max error: 0.0056) (D-gap: -2.3e-05)
+  ~= : 0.773486, 1.17185, 0.175413, 2.1323, 0.00392157, -0.257074, 0  (Max error: 0.0057) (D-gap: -8.4e-09)
   ~= : 0.1055x^3 + -0.3823x^2 + 1.277x (Max error: 0.01024)
   A1 : 16-bit table with 256 entries
-  ~= : 0.97251, 1.02769, 1.19953, 1.62257, 0.0156863, -1.18274, 0  (Max error: 0.0073) (D-gap: 0.00084)
+  ~= : 1.01301, 0.972757, 1.55681, 1.62257, 0.0156863, -1.55588, 0  (Max error: 0.011) (D-gap: -3e-08)
   ~= : 0.1635x^3 + -0.2655x^2 + 1.102x (Max error: 0.009072)
   A2 : 16-bit table with 256 entries
-  ~= : 1.03739, 0.967143, 0.737392, 2.01167, 0.00392157, -0.725022, 0  (Max error: 0.014) (D-gap: 1.8e-05)
+  ~= : 1.08355, 0.963537, 0.207506, 1.68677, 0.0156863, -0.169893, 0  (Max error: 0.017) (D-gap: -1.9e-09)
   ~= : -0.01858x^3 + -0.0008925x^2 + 1.019x (Max error: 0.01557)
   A3 : 16-bit table with 256 entries
-  ~= : 1.12285, 0.822891, 0.770402, 1.84047, 0.00392157, -0.742872, 0  (Max error: 0.056) (D-gap: -0.00049)
   ~= : 0.4144x^3 + -0.5098x^2 + 1.095x (Max error: 0.008042)
 CLUT : 9 x 9 x 9 x 9 (16 bpp)
  "B" : 3 outputs
diff --git a/profiles/misc/sRGB_HP.icc.txt b/profiles/misc/sRGB_HP.icc.txt
index be143c7..b8d915f 100644
--- a/profiles/misc/sRGB_HP.icc.txt
+++ b/profiles/misc/sRGB_HP.icc.txt
@@ -24,14 +24,14 @@
  'bTRC' : 'curv' :   2060 : 1084
 
 rTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 gTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 bTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
  XYZ : | 0.436065674 0.385147095 0.143066406 |
        | 0.222488403 0.716873169 0.060607910 |
        | 0.013916016 0.097076416 0.714096069 |
diff --git a/profiles/misc/sRGB_HP_2.icc.txt b/profiles/misc/sRGB_HP_2.icc.txt
index aa4bb0a..e4cb0d7 100644
--- a/profiles/misc/sRGB_HP_2.icc.txt
+++ b/profiles/misc/sRGB_HP_2.icc.txt
@@ -24,14 +24,14 @@
  'bTRC' : 'curv' :   2060 : 5201
 
 rTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 gTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 bTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
  XYZ : | 0.436065674 0.385147095 0.143066406 |
        | 0.222488403 0.716873169 0.060607910 |
        | 0.013916016 0.097076416 0.714096069 |
diff --git a/profiles/misc/sRGB_black_scaled.icc.txt b/profiles/misc/sRGB_black_scaled.icc.txt
index f807911..b060689 100644
--- a/profiles/misc/sRGB_black_scaled.icc.txt
+++ b/profiles/misc/sRGB_black_scaled.icc.txt
@@ -23,14 +23,14 @@
  'chad' : 'sf32' :     44 : 3004
 
 rTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 gTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 bTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
  XYZ : | 0.436065674 0.385147095 0.143066406 |
        | 0.222488403 0.716873169 0.060607910 |
        | 0.013916016 0.097076416 0.714096069 |
diff --git a/profiles/mobile/Display_P3_LUT.icc.txt b/profiles/mobile/Display_P3_LUT.icc.txt
index 8c0eb2b..8aeb549 100644
--- a/profiles/mobile/Display_P3_LUT.icc.txt
+++ b/profiles/mobile/Display_P3_LUT.icc.txt
@@ -18,14 +18,14 @@
  'gTRC' : 'curv' :   2060 : 508
 
 rTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 gTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 bTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
  XYZ : | 0.515121460 0.291976929 0.157104492 |
        | 0.241195679 0.692245483 0.066574097 |
        | -0.001037598 0.041885376 0.784072876 |
diff --git a/profiles/mobile/sRGB_LUT.icc.txt b/profiles/mobile/sRGB_LUT.icc.txt
index fb6e6b3..40d0588 100644
--- a/profiles/mobile/sRGB_LUT.icc.txt
+++ b/profiles/mobile/sRGB_LUT.icc.txt
@@ -18,14 +18,14 @@
  'gTRC' : 'curv' :   2060 : 520
 
 rTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 gTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
 bTRC : 16-bit table with 1024 entries
-  ~= : 2.39936, 0.948025, 0.0519888, 0.0777105, 0.0449658, 7.48783e-06, 0  (Max error: 5.7e-05) (D-gap: 4.4e-06)
-  ~= : 0.3053x^3 + 0.6822x^2 + 0.01249x (Max error: 0.001803)
+  ~= : 2.39978, 0.947934, 0.052081, 0.0777105, 0.0449658, -1.28639e-06, 0  (Max error: 5e-05) (D-gap: 0)
+  ~= : 0.3053x^3 + 0.6822x^2 + 0.01252x (Max error: 0.001676)
  XYZ : | 0.436035156 0.385116577 0.143051147 |
        | 0.222488403 0.716903687 0.060607910 |
        | 0.013916016 0.097061157 0.713912964 |
diff --git a/profiles/sRGB_Facebook.icc.txt b/profiles/sRGB_Facebook.icc.txt
index 4fa4d50..6c92b86 100644
--- a/profiles/sRGB_Facebook.icc.txt
+++ b/profiles/sRGB_Facebook.icc.txt
@@ -17,14 +17,14 @@
  'bTRC' : 'curv' :     64 : 460
 
 rTRC : 16-bit table with 26 entries
-  ~= : 2.39736, 0.949206, 0.050643, 0.0774395, 0.04, 9.82733e-05, 0  (Max error: 0.00025) (D-gap: -1.8e-06)
-  ~= : 0.492x^3 + 0.4458x^2 + 0.06218x (Max error: 0.06965)
+  ~= : 2.39737, 0.949209, 0.0506402, 0.0774395, 0.04, 0.000100383, 0  (Max error: 0.00025) (D-gap: 0)
+  ~= : 0.3071x^3 + 0.681x^2 + 0.01189x (Max error: 0.001513)
 gTRC : 16-bit table with 26 entries
-  ~= : 2.39736, 0.949206, 0.050643, 0.0774395, 0.04, 9.82733e-05, 0  (Max error: 0.00025) (D-gap: -1.8e-06)
-  ~= : 0.492x^3 + 0.4458x^2 + 0.06218x (Max error: 0.06965)
+  ~= : 2.39737, 0.949209, 0.0506402, 0.0774395, 0.04, 0.000100383, 0  (Max error: 0.00025) (D-gap: 0)
+  ~= : 0.3071x^3 + 0.681x^2 + 0.01189x (Max error: 0.001513)
 bTRC : 16-bit table with 26 entries
-  ~= : 2.39736, 0.949206, 0.050643, 0.0774395, 0.04, 9.82733e-05, 0  (Max error: 0.00025) (D-gap: -1.8e-06)
-  ~= : 0.492x^3 + 0.4458x^2 + 0.06218x (Max error: 0.06965)
+  ~= : 2.39737, 0.949209, 0.0506402, 0.0774395, 0.04, 0.000100383, 0  (Max error: 0.00025) (D-gap: 0)
+  ~= : 0.3071x^3 + 0.681x^2 + 0.01189x (Max error: 0.001513)
  XYZ : | 0.436065674 0.385147095 0.143066406 |
        | 0.222488403 0.716873169 0.060607910 |
        | 0.013916016 0.097076416 0.714096069 |
diff --git a/src/GaussNewton.c b/src/GaussNewton.c
index 1f7d168..842f7ed 100644
--- a/src/GaussNewton.c
+++ b/src/GaussNewton.c
@@ -8,11 +8,16 @@
 #include "../skcms.h"
 #include "GaussNewton.h"
 #include "LinearAlgebra.h"
+#include "TransferFunction.h"
 #include <assert.h>
+#include <string.h>
 
-bool skcms_gauss_newton_step(float (*     t)(float x, const void*), const void* t_ctx,
-                             float (*     f)(float x, const float P[4]),
-                             void  (*grad_f)(float x, const float P[4], float dfdP[4]),
+bool skcms_gauss_newton_step(float (*     t)(float x, const void*),
+                             const void* t_ctx,
+                             float (*     f)(float x, const void*, const float P[4]),
+                             const void* f_ctx,
+                             void  (*grad_f)(float x, const void*, const float P[4], float dfdP[4]),
+                             const void* g_ctx,
                              float P[4],
                              float x0, float x1, int N) {
     // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
@@ -58,10 +63,10 @@
     for (int i = 0; i < N; i++) {
         float x = x0 + i*dx;
 
-        float resid = t(x,t_ctx) - f(x,P);
+        float resid = t(x,t_ctx) - f(x,f_ctx,P);
 
         float dfdP[4] = {0,0,0,0};
-        grad_f(x,P, dfdP);
+        grad_f(x,g_ctx,P, dfdP);
 
         for (int r = 0; r < 4; r++) {
             for (int c = 0; c < 4; c++) {
@@ -94,3 +99,25 @@
     P[3] += dP.vals[3];
     return true;
 }
+
+float skcms_eval_curve(float x, const void* vctx) {
+    const skcms_Curve* curve = (const skcms_Curve*)vctx;
+
+    if (curve->table_entries == 0) {
+        return skcms_TransferFunction_eval(&curve->parametric, x);
+    }
+
+    // TODO: today we should always hit an entry exactly, but if that changes, lerp?
+    // (We add half to account for slight int -> float -> int round tripping issues.)
+    int ix = (int)( x*(curve->table_entries - 1) + 0.5f );
+
+    if (curve->table_8) {
+        return curve->table_8[ix] * (1/255.0f);
+    } else {
+        uint16_t be;
+        memcpy(&be, curve->table_16 + 2*ix, 2);
+
+        uint16_t le = ((be << 8) | (be >> 8)) & 0xffff;
+        return le * (1/65535.0f);
+    }
+}
diff --git a/src/GaussNewton.h b/src/GaussNewton.h
index 87d01c5..16ad80b 100644
--- a/src/GaussNewton.h
+++ b/src/GaussNewton.h
@@ -8,7 +8,6 @@
 #pragma once
 
 #include <stdbool.h>
-#include "LinearAlgebra.h"
 
 // One Gauss-Newton step, tuning up to 4 parameters P to minimize [ t(x,ctx) - f(x,P) ]^2.
 //
@@ -22,8 +21,14 @@
 // If you have fewer than 4 parameters, set the unused P to zero, don't touch their dfdP.
 //
 // Returns true and updates P on success, or returns false on failure.
-bool skcms_gauss_newton_step(float (*     t)(float x, const void*), const void* t_ctx,
-                             float (*     f)(float x, const float P[4]),
-                             void  (*grad_f)(float x, const float P[4], float dfdP[4]),
+bool skcms_gauss_newton_step(float (*     t)(float x, const void*),
+                             const void* t_ctx,
+                             float (*     f)(float x, const void*, const float P[4]),
+                             const void* f_ctx,
+                             void  (*grad_f)(float x, const void*, const float P[4], float dfdP[4]),
+                             const void* g_ctx,
                              float P[4],
                              float x0, float x1, int N);
+
+// A target function for skcms_Curve, passed as ctx.
+float skcms_eval_curve(float x, const void* ctx);
diff --git a/src/ICCProfile.c b/src/ICCProfile.c
index 5e532ac..9c504cd 100644
--- a/src/ICCProfile.c
+++ b/src/ICCProfile.c
@@ -259,33 +259,6 @@
     return false;
 }
 
-static float table_func_16(int i, const void* ctx) {
-    return read_big_u16((const uint8_t*)ctx + 2 * i) * (1 / 65535.0f);
-}
-
-static float table_func_8(int i, const void* ctx) {
-    return ((const uint8_t*)ctx)[i] * (1 / 255.0f);
-}
-
-bool skcms_ApproximateCurve(const skcms_Curve* curve, skcms_TransferFunction* approx,
-                            float* max_error) {
-    if (!curve || !curve->table_entries || !approx) {
-        return false;
-    }
-
-    if (curve->table_entries > (uint32_t)INT_MAX) {
-        return false;
-    }
-
-    if (curve->table_16) {
-        return skcms_TransferFunction_approximate(table_func_16, curve->table_16,
-                                                  (int)curve->table_entries, approx, max_error);
-    } else {
-        return skcms_TransferFunction_approximate(table_func_8, curve->table_8,
-                                                  (int)curve->table_entries, approx, max_error);
-    }
-}
-
 // mft1 and mft2 share a large chunk of data
 typedef struct {
     uint8_t type                 [ 4];
diff --git a/src/LinearAlgebra.c b/src/LinearAlgebra.c
index e7e335e..4fe539c 100644
--- a/src/LinearAlgebra.c
+++ b/src/LinearAlgebra.c
@@ -8,6 +8,7 @@
 #include "../skcms.h"
 #include "LinearAlgebra.h"
 #include "PortableMath.h"
+#include <float.h>
 
 bool skcms_Matrix4x4_invert(const skcms_Matrix4x4* src, skcms_Matrix4x4* dst) {
     double a00 = (double)src->vals[0][0],
@@ -52,7 +53,7 @@
     }
 
     double invdet = 1.0 / determinant;
-    if (!isfinitef_((float)invdet)) {
+    if (invdet > +(double)FLT_MAX || invdet < -(double)FLT_MAX || !isfinitef_((float)invdet)) {
         return false;
     }
 
diff --git a/src/TF13.c b/src/TF13.c
index 4209673..281718f 100644
--- a/src/TF13.c
+++ b/src/TF13.c
@@ -8,30 +8,7 @@
 #include "../skcms.h"
 #include "GaussNewton.h"
 #include "PortableMath.h"
-#include "TransferFunction.h"
 #include <limits.h>
-#include <string.h>
-
-static float eval_curve(float x, const void* vctx) {
-    const skcms_Curve* curve = (const skcms_Curve*)vctx;
-
-    if (curve->table_entries == 0) {
-        return skcms_TransferFunction_eval(&curve->parametric, x);
-    }
-
-    // TODO: today we should always hit an entry exactly, but if that changes, lerp?
-    int ix = (int)( x * (curve->table_entries - 1) );
-
-    if (curve->table_8) {
-        return curve->table_8[ix] * (1/255.0f);
-    } else {
-        uint16_t be;
-        memcpy(&be, curve->table_16 + 2*ix, 2);
-
-        uint16_t le = ((be << 8) | (be >> 8)) & 0xffff;
-        return le * (1/65535.0f);
-    }
-}
 
 // Evaluating skcms_TF13{A,B} at x:
 //   f(x) = Ax^3 + Bx^2 + (1-A-B)x
@@ -39,12 +16,14 @@
 //   ∂f/∂A = x^3 - x
 //   ∂f/∂B = x^2 - x
 
-static float eval_13(float x, const float P[4]) {
+static float eval_13(float x, const void* ctx, const float P[4]) {
+    (void)ctx;
     return P[0]*x*x*x
          + P[1]*x*x
          + (1 - P[0] - P[1])*x;
 }
-static void grad_13(float x, const float P[4], float dfdP[4]) {
+static void grad_13(float x, const void* ctx, const float P[4], float dfdP[4]) {
+    (void)ctx;
     (void)P;
     dfdP[0] = x*x*x - x;
     dfdP[1] = x*x   - x;
@@ -63,9 +42,11 @@
                                             : (int)curve->table_entries;
 
     for (int i = 0; i < 3/*TODO: Tune???*/; i++) {
-        if (!skcms_gauss_newton_step(eval_curve, curve,
-                                     eval_13, grad_13,
-                                     P, 0,1,N)) {
+        if (!skcms_gauss_newton_step(skcms_eval_curve, curve,
+                                     eval_13, NULL,
+                                     grad_13, NULL,
+                                     P,
+                                     0,1,N)) {
             return false;
         }
     }
@@ -74,7 +55,7 @@
     for (int i = 0; i < N; i++) {
         float x = i * (1.0f / (N-1));
 
-        float err = fabsf_( eval_curve(x, curve) - eval_13(x, P) );
+        float err = fabsf_( skcms_eval_curve(x, curve) - eval_13(x,NULL,P) );
         if (err > *max_error) {
             *max_error = err;
         }
diff --git a/src/TransferFunction.c b/src/TransferFunction.c
index c76d094..20742b2 100644
--- a/src/TransferFunction.c
+++ b/src/TransferFunction.c
@@ -6,359 +6,21 @@
  */
 
 #include "../skcms.h"
+#include "GaussNewton.h"
 #include "LinearAlgebra.h"
 #include "Macros.h"
 #include "PortableMath.h"
 #include "TransferFunction.h"
 #include <assert.h>
+#include <limits.h>
 #include <string.h>
 
-// Enable to do thorough logging of the nonlinear regression to stderr
-#if 0
-    #include <stdio.h>
-    #define LOG(...) fprintf(stderr, __VA_ARGS__)
-#else
-    #define LOG(...) do {} while(false)
-#endif
-
-#define LOG_TF(tf)                                              \
-    LOG("[%.25g %.25g %.25g %.25g]\n",                          \
-        tf->g, tf->a, tf->b, tf->e)
-
-#define LOG_VEC(v)                                              \
-    LOG("[%.25g %.25g %.25g %.25g]\n",                          \
-        v.vals[0], v.vals[1], v.vals[2], v.vals[3])
-
-#define LOG_MTX(m)                                              \
-    LOG("| %.25g %.25g %.25g %.25g |\n"                         \
-        "| %.25g %.25g %.25g %.25g |\n"                         \
-        "| %.25g %.25g %.25g %.25g |\n"                         \
-        "| %.25g %.25g %.25g %.25g |\n",                        \
-        m.vals[0][0], m.vals[0][1], m.vals[0][2], m.vals[0][3], \
-        m.vals[1][0], m.vals[1][1], m.vals[1][2], m.vals[1][3], \
-        m.vals[2][0], m.vals[2][1], m.vals[2][2], m.vals[2][3], \
-        m.vals[3][0], m.vals[3][1], m.vals[3][2], m.vals[3][3])
-
-float skcms_TransferFunction_eval(const skcms_TransferFunction* fn, float x) {
+float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
     float sign = x < 0 ? -1.0f : 1.0f;
     x *= sign;
 
-    return sign * (x < fn->d ? fn->c * x + fn->f
-                             : powf_(fn->a * x + fn->b, fn->g) + fn->e);
-}
-
-static float TF_Nonlinear_eval(const skcms_TransferFunction* fn, float x) {
-    // We strive to never allow negative ax+b, but values can drift slightly. Guard against NaN.
-    float base = fmaxf_(fn->a * x + fn->b, 0.0f);
-    return powf_(base, fn->g) + fn->e;
-}
-
-// Evaluate the gradient of the nonlinear component of fn
-static void tf_eval_gradient_nonlinear(const skcms_TransferFunction* fn,
-                                       float x,
-                                       float* d_fn_d_A_at_x,
-                                       float* d_fn_d_B_at_x,
-                                       float* d_fn_d_E_at_x,
-                                       float* d_fn_d_G_at_x) {
-    float base = fn->a * x + fn->b;
-    if (base > 0.0f) {
-        *d_fn_d_A_at_x = fn->g * x * powf_(base, fn->g - 1.0f);
-        *d_fn_d_B_at_x = fn->g * powf_(base, fn->g - 1.0f);
-        *d_fn_d_E_at_x = 1.0f;
-        // Scale by 1/log_2(e)
-        *d_fn_d_G_at_x = powf_(base, fn->g) * log2f_(base) * 0.69314718f;
-    } else {
-        *d_fn_d_A_at_x = 0.0f;
-        *d_fn_d_B_at_x = 0.0f;
-        *d_fn_d_E_at_x = 0.0f;
-        *d_fn_d_G_at_x = 0.0f;
-    }
-}
-
-// Take one Gauss-Newton step updating A, B, E, and G, given D.
-static bool tf_gauss_newton_step_nonlinear(skcms_TableFunc* t, const void* ctx, int start, int n,
-                                           skcms_TransferFunction* fn, float* error_Linfty_after) {
-    LOG("tf_gauss_newton_step_nonlinear (%d, %d)\n", start, n);
-    LOG("fn: "); LOG_TF(fn);
-
-    // Let ne_lhs be the left hand side of the normal equations, and let ne_rhs
-    // be the right hand side. Zero the diagonal [sic] of |ne_lhs| and all of |ne_rhs|.
-    skcms_Matrix4x4 ne_lhs;
-    skcms_Vector4 ne_rhs;
-    for (int row = 0; row < 4; ++row) {
-        for (int col = 0; col < 4; ++col) {
-            ne_lhs.vals[row][col] = 0;
-        }
-        ne_rhs.vals[row] = 0;
-    }
-
-    // Add the contributions from each sample to the normal equations.
-    for (int i = start; i < n; ++i) {
-        float xi = i / (n - 1.0f);
-        LOG("%d (%.25g)\n", i, xi);
-
-        // Let J be the gradient of fn with respect to parameters A, B, E, and G,
-        // evaulated at this point.
-        skcms_Vector4 J;
-        tf_eval_gradient_nonlinear(fn, xi, &J.vals[0], &J.vals[1], &J.vals[2], &J.vals[3]);
-        LOG("J: "); LOG_VEC(J);
-
-        // Let r be the residual at this point;
-        float r = t(i, ctx) - TF_Nonlinear_eval(fn, xi);
-        LOG("r: %.25g\n", r);
-
-        if (i == start) {
-            // Weight the D point much higher, so that the two pieces of the approximation line up
-            float w = (n - start) * 0.5f;
-            J.vals[0] *= w;
-            J.vals[1] *= w;
-            J.vals[2] *= w;
-            J.vals[3] *= w;
-            r *= w;
-        }
-
-        // Update the normal equations left hand side with the outer product of J
-        // with itself.
-        for (int row = 0; row < 4; ++row) {
-            for (int col = 0; col < 4; ++col) {
-                ne_lhs.vals[row][col] += J.vals[row] * J.vals[col];
-            }
-
-            // Update the normal equations right hand side the product of J with the
-            // residual
-            ne_rhs.vals[row] += J.vals[row] * r;
-        }
-        LOG("LHS/RHS:\n"); LOG_MTX(ne_lhs); LOG_VEC(ne_rhs);
-    }
-
-    // Note that if G = 1, then the normal equations will be singular
-    // (because when G = 1, B and E are equivalent parameters).
-    // To avoid problems, fix E (row/column 3) in these circumstances.
-    const float kEpsilonForG = 1.0f / 1024.0f;
-    if (fabsf_(fn->g - 1.0f) < kEpsilonForG) {
-        LOG("G ~= 1, pinning E\n");
-        for (int row = 0; row < 4; ++row) {
-            float value = (row == 2) ? 1.0f : 0.0f;
-            ne_lhs.vals[row][2] = value;
-            ne_lhs.vals[2][row] = value;
-        }
-        ne_rhs.vals[2] = 0.0f;
-    }
-
-    // Solve the normal equations.
-    skcms_Matrix4x4 ne_lhs_inv;
-    if (!skcms_Matrix4x4_invert(&ne_lhs, &ne_lhs_inv)) {
-        return false;
-    }
-    LOG("LHS Inverse:\n"); LOG_MTX(ne_lhs_inv);
-
-    skcms_Vector4 step = skcms_Matrix4x4_Vector4_mul(&ne_lhs_inv, &ne_rhs);
-    LOG("step: "); LOG_VEC(step);
-
-    // Update the transfer function.
-    fn->a += step.vals[0];
-    fn->b += step.vals[1];
-    fn->e += step.vals[2];
-    fn->g += step.vals[3];
-
-    // A should always be positive.
-    fn->a = fmaxf_(fn->a, 0.0f);
-
-    // Ensure that fn be defined at D.
-    if (fn->a * fn->d + fn->b < 0.0f) {
-        LOG("AD+B = %.25g, ", fn->a * fn->d + fn->b);
-        fn->b = -fn->a * fn->d;
-        LOG("B -> %.25g\n", fn->b);
-    }
-
-    // Compute the Linfinity error.
-    *error_Linfty_after = 0;
-    for (int i = start; i < n; ++i) {
-        float xi = i / (n - 1.0f);
-        float error = fabsf_(t(i, ctx) - TF_Nonlinear_eval(fn, xi));
-        *error_Linfty_after = fmaxf_(error, *error_Linfty_after);
-    }
-
-    return true;
-}
-
-// Solve for A, B, E, and G, given D. The initial value of |fn| is the
-// point from which iteration starts.
-static bool tf_solve_nonlinear(skcms_TableFunc* t, const void* ctx, int start, int n,
-                               skcms_TransferFunction* fn) {
-    // Take a maximum of 16 Gauss-Newton steps.
-    enum { kNumSteps = 16 };
-
-    // The L-infinity error after each step.
-    float step_error[kNumSteps] = { 0 };
-    int step = 0;
-    for (;; ++step) {
-        // If the normal equations are singular, we can't continue.
-        if (!tf_gauss_newton_step_nonlinear(t, ctx, start, n, fn, &step_error[step])) {
-            return false;
-        }
-
-        // If the error is inf or nan, we are clearly not converging.
-        if (!isfinitef_(step_error[step])) {
-            return false;
-        }
-
-        // Stop if our error is tiny.
-        const float kEarlyOutTinyErrorThreshold = (1.0f / 16.0f) / 256.0f;
-        if (step_error[step] < kEarlyOutTinyErrorThreshold) {
-            break;
-        }
-
-        // Stop if our error is not changing, or changing in the wrong direction.
-        if (step > 1) {
-            // If our error is is huge for two iterations, we're probably not in the
-            // region of convergence.
-            if (step_error[step] > 1.0f && step_error[step - 1] > 1.0f) {
-                return false;
-            }
-
-            // If our error didn't change by ~1%, assume we've converged as much as we
-            // are going to.
-            const float kEarlyOutByPercentChangeThreshold = 32.0f / 256.0f;
-            const float kMinimumPercentChange = 1.0f / 128.0f;
-            float percent_change =
-                fabsf_(step_error[step] - step_error[step - 1]) / step_error[step];
-            if (percent_change < kMinimumPercentChange &&
-                step_error[step] < kEarlyOutByPercentChangeThreshold) {
-                break;
-            }
-        }
-        if (step == kNumSteps - 1) {
-            break;
-        }
-    }
-
-    // Declare failure if our error is obviously too high.
-    const float kDidNotConvergeThreshold = 64.0f / 256.0f;
-    if (step_error[step] > kDidNotConvergeThreshold) {
-        return false;
-    }
-
-    return true;
-}
-
-// Returns the number of points that are approximated by the line, to within tol.
-static int tf_fit_linear(skcms_TableFunc* t, const void* ctx, int n, float tol,
-                         skcms_TransferFunction* fn) {
-    // Idea: We fit the first N points to the linear portion of the TF. We want the line to pass
-    // through the first and last points exactly.
-    //
-    // We walk along the points, and find the minimum and maximum slope of the line before the
-    // error would exceed our tolerance. Once the range [slope_min, slope_max] would be empty,
-    // we definitely can't add any more points, so we're done.
-    //
-    // However, some points error intervals' may intersect the running interval, but not lie within
-    // it. So we keep track of the last point we saw that is a valid candidate for being the end
-    // point, and once the search is done, back up to build the line through *that* point.
-    const float x_scale = 1.0f / (n - 1);
-
-    int lin_points = 1;
-    fn->f = t(0, ctx);
-    float slope_min = -INFINITY_;
-    float slope_max = INFINITY_;
-    for (int i = 1; i < n; ++i) {
-        float xi = i * x_scale;
-        float yi = t(i, ctx);
-        float slope_max_i = (yi + tol - fn->f) / xi;
-        float slope_min_i = (yi - tol - fn->f) / xi;
-        if (slope_max_i < slope_min || slope_max < slope_min_i) {
-            // Slope intervals no longer overlap.
-            break;
-        }
-        slope_max = fminf_(slope_max, slope_max_i);
-        slope_min = fmaxf_(slope_min, slope_min_i);
-        float cur_slope = (yi - fn->f) / xi;
-        if (slope_min <= cur_slope && cur_slope <= slope_max) {
-            lin_points = i + 1;
-            fn->c = cur_slope;
-        }
-    }
-
-    // Set D to the last point from above
-    fn->d = (lin_points - 1) * x_scale;
-    return lin_points;
-}
-
-static float tf_max_error(skcms_TableFunc* t, const void* ctx, int n,
-                          const skcms_TransferFunction* fn) {
-    const float x_scale = 1.0f / (n - 1);
-    float max_error = 0;
-    for (int i = 0; i < n; ++i) {
-        float xi = i * x_scale;
-        float fn_of_xi = skcms_TransferFunction_eval(fn, xi);
-        float error_at_xi = fabsf_(t(i, ctx) - fn_of_xi);
-        max_error = fmaxf_(max_error, error_at_xi);
-    }
-    return max_error;
-}
-
-bool skcms_TransferFunction_approximate(skcms_TableFunc* t, const void* ctx, int n,
-                                        skcms_TransferFunction* fn, float* max_error) {
-    if (n < 2) {
-        return false;
-    }
-
-    const float x_scale = 1.0f / (n - 1);
-    const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
-    float min_error = INFINITY_;
-
-    for (int tol = 0; tol < ARRAY_COUNT(kTolerances); ++tol) {
-        skcms_TransferFunction tf;
-        int lin_points = tf_fit_linear(t, ctx, n,kTolerances[tol], &tf);
-
-        // If the entire data set was linear, move the coefficients to the nonlinear portion with
-        // G == 1. This lets use a canonical representation with D == 0.
-        if (lin_points == n) {
-            tf.g = 1;
-            tf.b = tf.f;
-            tf.a = tf.c;
-            tf.c = tf.d = tf.e = tf.f = 0;
-        } else if (lin_points == n - 1) {
-            // Degenerate case with only two points in the nonlinear segment. Solve directly.
-            tf.g = 1;
-            tf.a = (t(n - 1, ctx) - t(n - 2, ctx)) * (n - 1);
-            tf.b = t(n - 2, ctx) - (tf.a * (n - 2) * x_scale);
-            tf.e = 0;
-        } else {
-            // Do a nonlinear regression on the nonlinear segment. Include the 'D' point in the
-            // nonlinear regression, so the two pieces are more likely to line up.
-            int start = lin_points > 0 ? lin_points - 1 : 0;
-
-            // We need G to be in right vicinity, or the regression may not converge. Solve exactly for
-            // for midpoint of the nonlinear range, assuming B = E = 0 & A = 1.
-            int mid = (start + n) / 2;
-            float mid_x = mid / (n - 1.0f);
-            float mid_y = t(mid, ctx);
-            tf.g = log2f_(mid_y) / log2f_(mid_x);;
-            tf.a = 1;
-            tf.b = 0;
-            tf.e = 0;
-
-            if (!tf_solve_nonlinear(t, ctx, start, n, &tf)) {
-                continue;
-            }
-        }
-
-        float err = tf_max_error(t, ctx, n, &tf);
-        if (min_error > err) {
-            min_error = err;
-            *fn = tf;
-        }
-    }
-
-    if (!isfinitef_(min_error)) {
-        return false;
-    }
-    if (max_error) {
-        *max_error = min_error;
-    }
-
-    return true;
+    return sign * (x < tf->d ? tf->c * x + tf->f
+                             : powf_(tf->a * x + tf->b, tf->g) + tf->e);
 }
 
 // TODO: Adjust logic here? This still assumes that purely linear inputs will have D > 1, which
@@ -380,7 +42,7 @@
     //
     // and now both can be expressed in terms of the same parametric form as the
     // original - parameters are enclosed in square brackets.
-    skcms_TransferFunction fn_inv = { 0, 0, 0, 0, 0, 0, 0 };
+    skcms_TransferFunction tf_inv = { 0, 0, 0, 0, 0, 0, 0 };
 
     // Reject obviously malformed inputs
     if (!isfinitef_(src->a + src->b + src->c + src->d + src->e + src->f + src->g)) {
@@ -411,27 +73,242 @@
 
     // Invert linear segment
     if (has_linear) {
-        fn_inv.c = 1.0f / src->c;
-        fn_inv.f = -src->f / src->c;
+        tf_inv.c = 1.0f / src->c;
+        tf_inv.f = -src->f / src->c;
     }
 
     // Invert nonlinear segment
     if (has_nonlinear) {
-        fn_inv.g = 1.0f / src->g;
-        fn_inv.a = powf_(1.0f / src->a, src->g);
-        fn_inv.b = -fn_inv.a * src->e;
-        fn_inv.e = -src->b / src->a;
+        tf_inv.g = 1.0f / src->g;
+        tf_inv.a = powf_(1.0f / src->a, src->g);
+        tf_inv.b = -tf_inv.a * src->e;
+        tf_inv.e = -src->b / src->a;
     }
 
     if (!has_linear) {
-        fn_inv.d = 0;
+        tf_inv.d = 0;
     } else if (!has_nonlinear) {
         // Any value larger than 1 works
-        fn_inv.d = 2.0f;
+        tf_inv.d = 2.0f;
     } else {
-        fn_inv.d = src->c * src->d + src->f;
+        tf_inv.d = src->c * src->d + src->f;
     }
 
-    *dst = fn_inv;
+    *dst = tf_inv;
     return true;
 }
+
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
+
+// From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
+//
+//   tf(x) =  cx + f          x < d
+//   tf(x) = (ax + b)^g + e   x ≥ d
+//
+// When fitting, we add the additional constraint that both pieces meet at d:
+//
+//   cd + f = (ad + b)^g + e
+//
+// Solving for e and folding it through gives an alternate formulation of the non-linear piece:
+//
+//   tf(x) =                           cx + f   x < d
+//   tf(x) = (ax + b)^g - (ad + b)^g + cd + f   x ≥ d
+//
+// Our overall strategy is then:
+//    For a couple tolerances,
+//       - fit_linear():     fit c,d,f iteratively to as many points as our tolerance allows
+//       - fit_nonlinear():  fit g,a,b using Gauss-Newton given c,d,f (and by constraint, e)
+//    Return the parameters with least maximum error.
+//
+// To run Gauss-Newton to find g,a,b, we'll also need the gradient of the non-linear piece:
+//    ∂tf/∂g = ln(ax + b)*(ax + b)^g
+//           - ln(ad + b)*(ad + b)^g
+//    ∂tf/∂a = xg(ax + b)^(g-1)
+//           - dg(ad + b)^(g-1)
+//    ∂tf/∂b =  g(ax + b)^(g-1)
+//           -  g(ad + b)^(g-1)
+
+static float eval_nonlinear(float x, const void* ctx, const float P[4]) {
+    const skcms_TransferFunction* tf = (const skcms_TransferFunction*)ctx;
+
+    const float g = P[0],  a = P[1],  b = P[2],
+                c = tf->c, d = tf->d, f = tf->f;
+
+    const float X = a*x+b,
+                D = a*d+b;
+    assert (X >= 0 && D >= 0);
+
+    // (Notice how a large amount of this work is independent of x.)
+    return powf_(X, g)
+         - powf_(D, g)
+         + c*d + f;
+}
+static void grad_nonlinear(float x, const void* ctx, const float P[4], float dfdP[4]) {
+    const skcms_TransferFunction* tf = (const skcms_TransferFunction*)ctx;
+
+    const float g = P[0], a = P[1], b = P[2],
+                d = tf->d;
+
+    const float X = a*x+b,
+                D = a*d+b;
+    assert (X >= 0 && D >= 0);
+
+    dfdP[0] = 0.69314718f*log2f_(X)*powf_(X, g)
+            - 0.69314718f*log2f_(D)*powf_(D, g);
+    dfdP[1] = x*g*powf_(X, g-1)
+            - d*g*powf_(D, g-1);
+    dfdP[2] =   g*powf_(X, g-1)
+            -   g*powf_(D, g-1);
+}
+
+// Returns the number of points that are approximated by the line, to within tol.
+static int fit_linear(const skcms_Curve* curve, int N, float tol, skcms_TransferFunction* tf) {
+    // We iteratively fit the first points to the TF's linear piece.
+    // We want the cx + f line to pass through the first and last points we fit exactly.
+    //
+    // As we walk along the points we find the minimum and maximum slope of the line before the
+    // error would exceed our tolerance.  We stop when the range [slope_min, slope_max] becomes
+    // emtpy, when we definitely can't add any more points.
+    //
+    // Some points' error intervals may intersect the running interval but not lie fully
+    // within it.  So we keep track of the last point we saw that is a valid end point candidate,
+    // and once the search is done, back up to build the line through *that* point.
+    const float x_scale = 1.0f / (N - 1);
+
+    int lin_points = 1;
+    tf->f = skcms_eval_curve(0, curve);
+
+    float slope_min = -INFINITY_;
+    float slope_max = +INFINITY_;
+    for (int i = 1; i < N; ++i) {
+        float x = i * x_scale;
+        float y = skcms_eval_curve(x, curve);
+
+        float slope_max_i = (y + tol - tf->f) / x,
+              slope_min_i = (y - tol - tf->f) / x;
+        if (slope_max_i < slope_min || slope_max < slope_min_i) {
+            // Slope intervals would no longer overlap.
+            break;
+        }
+        slope_max = fminf_(slope_max, slope_max_i);
+        slope_min = fmaxf_(slope_min, slope_min_i);
+
+        float cur_slope = (y - tf->f) / x;
+        if (slope_min <= cur_slope && cur_slope <= slope_max) {
+            lin_points = i + 1;
+            tf->c = cur_slope;
+        }
+    }
+
+    // Set D to the last point that met our tolerance.
+    tf->d = (lin_points - 1) * x_scale;
+    return lin_points;
+}
+
+// Fit the points in [start,N) to the non-linear piece of tf, or return false if we can't.
+static bool fit_nonlinear(const skcms_Curve* curve, int start, int N, skcms_TransferFunction* tf) {
+    float P[4] = { tf->g, tf->a, tf->b, 0 };
+
+    // No matter where we start, x_scale should always represent N even steps from 0 to 1.
+    const float x_scale = 1.0f / (N-1);
+
+    for (int j = 0; j < 3/*TODO: tune*/; j++) {
+        // These extra constraints a >= 0 and ad+b >= 0 are not modeled in the optimization.
+        assert (P[1] >= 0 &&
+                P[1] * tf->d + P[2] >= 0);
+
+        if (!skcms_gauss_newton_step(skcms_eval_curve, curve,
+                                     eval_nonlinear, tf,
+                                     grad_nonlinear, tf,
+                                     P,
+                                     start*x_scale, 1, N-start)) {
+            return false;
+        }
+
+        // We don't really know how to fix up a if it goes negative.
+        if (P[1] < 0) {
+            return false;
+        }
+        // If ad+b goes negative, we feel just barely not uneasy enough to tweak b so ad+b is zero.
+        if (P[1] * tf->d + P[2] < 0) {
+            P[2] = -P[1] * tf->d;
+        }
+    }
+
+    tf->g = P[0];
+    tf->a = P[1];
+    tf->b = P[2];
+    tf->e =   tf->c*tf->d + tf->f
+      - powf_(tf->a*tf->d + tf->b, tf->g);
+    return true;
+}
+
+bool skcms_ApproximateCurve(const skcms_Curve* curve,
+                            skcms_TransferFunction* approx,
+                            float* max_error) {
+    if (!curve || !approx || !max_error) {
+        return false;
+    }
+
+    if (curve->table_entries == 0) {
+        // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
+        return false;
+    }
+
+    if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
+        // We need at least two points, and must put some reasonable cap on the maximum number.
+        return false;
+    }
+
+    int N = (int)curve->table_entries;
+    const float x_scale = 1.0f / (N - 1);
+
+    *max_error = INFINITY_;
+    const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
+    for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
+        skcms_TransferFunction tf;
+        int L = fit_linear(curve, N, kTolerances[t], &tf);
+
+        if (L == N) {
+            // If the entire data set was linear, move the coefficients to the nonlinear portion
+            // with G == 1.  This lets use a canonical representation with d == 0.
+            tf.g = 1;
+            tf.a = tf.c;
+            tf.b = tf.f;
+            tf.c = tf.d = tf.e = tf.f = 0;
+        } else if (L == N - 1) {
+            // Degenerate case with only two points in the nonlinear segment. Solve directly.
+            tf.g = 1;
+            tf.a = (skcms_eval_curve((N-1)*x_scale, curve) - skcms_eval_curve((N-2)*x_scale, curve))
+                 * (N-1);
+            tf.b = skcms_eval_curve((N-1)*x_scale, curve)
+                 - tf.a * (N-2) * x_scale;
+            tf.e = 0;
+        } else {
+            // Start by guessing a gamma-only curve through the midpoint.
+            int mid = (L + N) / 2;
+            float mid_x = mid / (N - 1.0f);
+            float mid_y = skcms_eval_curve(mid_x, curve);
+            tf.g = log2f_(mid_y) / log2f_(mid_x);;
+            tf.a = 1;
+            tf.b = 0;
+
+            if (!fit_nonlinear(curve, L,N, &tf)) {
+                continue;
+            }
+        }
+
+        float err = 0;
+        for (int i = 0; i < N; i++) {
+            float x = i * x_scale;
+            float got  = skcms_TransferFunction_eval(&tf, x),
+                  want = skcms_eval_curve(x, curve);
+            err = fmaxf_(err, fabsf_( want - got ));
+        }
+        if (*max_error > err) {
+            *max_error = err;
+            *approx    = tf;
+        }
+    }
+    return isfinitef_(*max_error);
+}
diff --git a/src/TransferFunction.h b/src/TransferFunction.h
index 220292f..c98c78d 100644
--- a/src/TransferFunction.h
+++ b/src/TransferFunction.h
@@ -14,7 +14,3 @@
 float skcms_TransferFunction_eval(const skcms_TransferFunction*, float);
 
 bool skcms_TransferFunction_invert(const skcms_TransferFunction*, skcms_TransferFunction*);
-
-typedef float skcms_TableFunc(int, const void*);
-bool skcms_TransferFunction_approximate(skcms_TableFunc* t, const void* ctx, int n,
-                                        skcms_TransferFunction*, float* max_error);
diff --git a/tests.c b/tests.c
index 943064d..ec176e7 100644
--- a/tests.c
+++ b/tests.c
@@ -425,23 +425,6 @@
     { 2.4f, 1 / 1.055f, 0.055f / 1.055f, 1 / 12.92f, 0.04045f, 0.0f, 0.0f };
 static const skcms_TransferFunction kodak_transfer_fn =
     { 2.42f, 0.939f, 0.0595f, 0.0691f, 0.0392f, 0.00305f, 0.00392f };
-// static const skcms_TransferFunction gamma_1_8_transfer_fn =
-//     { 1.8f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
-static const skcms_TransferFunction gamma_2_2_transfer_fn =
-    { 2.2f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
-static const skcms_TransferFunction gamma_2_4_transfer_fn =
-    { 2.4f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
-static const skcms_TransferFunction gamma_2_8_transfer_fn =
-    { 2.8f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
-static const skcms_TransferFunction bt_709_transfer_fn =
-    { 1 / 0.45f, 1 / 1.099f, 0.099f / 1.099f, 1 / 4.5f, 0.081f, 0.0f, 0.0f };
-static const skcms_TransferFunction smpte_240m_transfer_fn =
-    { 1 / 0.45f, 1 / 1.1115f, 0.1115f / 1.1115f, 0.25f, 0.0913f, 0.0f, 0.0f };
-// These are two different ways to represent the linear (identity) transfer function
-static const skcms_TransferFunction gamma_1_transfer_fn =
-    { 1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
-static const skcms_TransferFunction linear_transfer_fn =
-    { 0.0f, 1.0f, 0.0f, 1.0f, 2.0f, 0.0f, 0.0f };
 static const skcms_TransferFunction dot_gain_transfer_fn =
     { 1.737f, 1.0f, 0.0f, 0.0626f, 0.02353f, 0.0f, 0.0f };
 
@@ -635,54 +618,7 @@
     }
 }
 
-typedef struct {
-    const skcms_TransferFunction* tf;
-    int n;
-} TransferFunctionAndSampleCount;
-
-static float table_func_tf_eval(int i, const void* ctx) {
-    const TransferFunctionAndSampleCount* tfasc = (const TransferFunctionAndSampleCount*)ctx;
-    float xi = i / (tfasc->n - 1.0f);
-    return skcms_TransferFunction_eval(tfasc->tf, xi);
-}
-
-static void test_TransferFunction_approximate() {
-    const skcms_TransferFunction* transfer_fns[] = {
-//        &gamma_1_8_transfer_fn,
-        &gamma_2_2_transfer_fn,
-        &gamma_2_4_transfer_fn,
-        &gamma_2_8_transfer_fn,
-        &srgb_transfer_fn,
-        &kodak_transfer_fn,
-        &smpte_240m_transfer_fn,
-        &bt_709_transfer_fn,
-        &gamma_1_transfer_fn,
-        &linear_transfer_fn,
-    };
-    const int num_transfer_fns = ARRAY_COUNT(transfer_fns);
-
-    int table_sizes[] = { 512, 256, 128, 64, 16, 11, 8, 7, 6, 5 };
-    const int num_table_sizes = ARRAY_COUNT(table_sizes);
-    TransferFunctionAndSampleCount tfasc;
-
-    for (int ts = 0; ts < num_table_sizes; ++ts) {
-        for (int tf = 0; tf < num_transfer_fns; ++tf) {
-            skcms_TransferFunction fn_approx;
-            float max_error;
-            tfasc.tf = transfer_fns[tf];
-            tfasc.n = table_sizes[ts];
-            expect(skcms_TransferFunction_approximate(table_func_tf_eval, &tfasc, table_sizes[ts],
-                                                      &fn_approx, &max_error));
-            expect(max_error < 3.f / 256.f);
-        }
-    }
-}
-
-static float table_func_float(int i, const void* ctx) {
-    return ((const float*)ctx)[i];
-}
-
-static void test_TransferFunction_approximate_clamped() {
+static void test_ApproximateCurve_clamped() {
     // These data represent a transfer function that is clamped at the high
     // end of its domain. It comes from the color profile attached to
     // https://crbug.com/750459
@@ -732,9 +668,18 @@
         0.980240f, 0.989075f, 0.997955f, 1.000000f,
     };
 
-    skcms_TransferFunction fn_approx;
+    uint8_t table_8[ARRAY_COUNT(t)];
+    for (int i = 0; i < ARRAY_COUNT(t); i++) {
+        table_8[i] = (uint8_t)(t[i] * 255.0f + 0.5f);
+    }
+
+    skcms_Curve curve;
+    curve.table_entries = (uint32_t)ARRAY_COUNT(t);
+    curve.table_8       = table_8;
+
+    skcms_TransferFunction tf;
     float max_error;
-    expect(skcms_TransferFunction_approximate(table_func_float, t, 256, &fn_approx, &max_error));
+    expect(skcms_ApproximateCurve(&curve, &tf, &max_error));
 
     // The approximation isn't very good.
     expect(max_error < 1 / 128.0f);
@@ -1116,8 +1061,7 @@
     test_FormatConversions_half();
     test_FormatConversions_float();
     test_Parse(regenTestData);
-    test_TransferFunction_approximate();
-    test_TransferFunction_approximate_clamped();
+    test_ApproximateCurve_clamped();
     test_Matrix3x3_invert();
     test_SimpleRoundTrip();
     test_FloatRoundTrips();