fold together table ops

This cuts some code size and doesn't appear to affect performance.

Change-Id: Iac2aa7cc28dd923fe6e1264b87d2959bc0a932f5
Reviewed-on: https://skia-review.googlesource.com/c/162421
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
diff --git a/skcms.cc b/skcms.cc
index da031ad..fb42ecb 100644
--- a/skcms.cc
+++ b/skcms.cc
@@ -1781,15 +1781,10 @@
     Op_tf_b,
     Op_tf_a,
 
-    Op_table_8_r,
-    Op_table_8_g,
-    Op_table_8_b,
-    Op_table_8_a,
-
-    Op_table_16_r,
-    Op_table_16_g,
-    Op_table_16_b,
-    Op_table_16_a,
+    Op_table_r,
+    Op_table_g,
+    Op_table_b,
+    Op_table_a,
 
     Op_clut,
 
@@ -1964,11 +1959,11 @@
 } OpAndArg;
 
 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
-    static const struct { Op parametric, table_8, table_16; } ops[] = {
-        { Op_tf_r, Op_table_8_r, Op_table_16_r },
-        { Op_tf_g, Op_table_8_g, Op_table_16_g },
-        { Op_tf_b, Op_table_8_b, Op_table_16_b },
-        { Op_tf_a, Op_table_8_a, Op_table_16_a },
+    static const struct { Op parametric, table; } ops[] = {
+        { Op_tf_r, Op_table_r },
+        { Op_tf_g, Op_table_g },
+        { Op_tf_b, Op_table_b },
+        { Op_tf_a, Op_table_a },
     };
 
     const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
@@ -1977,14 +1972,9 @@
         return is_identity_tf(&curve->parametric)
             ? noop
             : OpAndArg{ ops[channel].parametric, &curve->parametric };
-    } else if (curve->table_8) {
-        return OpAndArg{ ops[channel].table_8,  curve };
-    } else if (curve->table_16) {
-        return OpAndArg{ ops[channel].table_16, curve };
     }
 
-    assert(false);
-    return noop;
+    return OpAndArg{ ops[channel].table, curve };
 }
 
 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
diff --git a/src/Transform_inl.h b/src/Transform_inl.h
index 09f4a6a..465fb27 100644
--- a/src/Transform_inl.h
+++ b/src/Transform_inl.h
@@ -476,7 +476,7 @@
     return bit_pun<F>( bit_pun<I32>(v) - 1 );
 }
 
-SI F table_8(const skcms_Curve* curve, F v) {
+SI F table(const skcms_Curve* curve, F v) {
     // Clamp the input to [0,1], then scale to a table index.
     F ix = max_(F0, min_(v, F1)) * (float)(curve->table_entries - 1);
 
@@ -489,23 +489,14 @@
     // the same as in 'l' or adjacent.  We have a rough idea that's it'd always be safe
     // to read adjacent entries and perhaps underflow the table by a byte or two
     // (it'd be junk, but always safe to read).  Not sure how to lerp yet.
-    F l = F_from_U8(gather_8(curve->table_8, lo)),
-      h = F_from_U8(gather_8(curve->table_8, hi));
-    return l + (h-l)*t;
-}
-
-SI F table_16(const skcms_Curve* curve, F v) {
-    // All just as in table_8() until the gathers.
-    F ix = max_(F0, min_(v, F1)) * (float)(curve->table_entries - 1);
-
-    I32 lo = cast<I32>(            ix      ),
-        hi = cast<I32>(minus_1_ulp(ix+1.0f));
-    F t = ix - cast<F>(lo);
-
-    // TODO: as above, load l and h simultaneously?
-    // Here we could even use AVX2-style 32-bit gathers.
-    F l = F_from_U16_BE(gather_16(curve->table_16, lo)),
-      h = F_from_U16_BE(gather_16(curve->table_16, hi));
+    F l,h;
+    if (curve->table_8) {
+        l = F_from_U8(gather_8(curve->table_8, lo));
+        h = F_from_U8(gather_8(curve->table_8, hi));
+    } else {
+        l = F_from_U16_BE(gather_16(curve->table_16, lo));
+        h = F_from_U16_BE(gather_16(curve->table_16, hi));
+    }
     return l + (h-l)*t;
 }
 
@@ -916,15 +907,10 @@
             case Op_tf_b:{ b = apply_tf((const skcms_TransferFunction*)*args++, b); } break;
             case Op_tf_a:{ a = apply_tf((const skcms_TransferFunction*)*args++, a); } break;
 
-            case Op_table_8_r: { r = table_8((const skcms_Curve*)*args++, r); } break;
-            case Op_table_8_g: { g = table_8((const skcms_Curve*)*args++, g); } break;
-            case Op_table_8_b: { b = table_8((const skcms_Curve*)*args++, b); } break;
-            case Op_table_8_a: { a = table_8((const skcms_Curve*)*args++, a); } break;
-
-            case Op_table_16_r:{ r = table_16((const skcms_Curve*)*args++, r); } break;
-            case Op_table_16_g:{ g = table_16((const skcms_Curve*)*args++, g); } break;
-            case Op_table_16_b:{ b = table_16((const skcms_Curve*)*args++, b); } break;
-            case Op_table_16_a:{ a = table_16((const skcms_Curve*)*args++, a); } break;
+            case Op_table_r: { r = table((const skcms_Curve*)*args++, r); } break;
+            case Op_table_g: { g = table((const skcms_Curve*)*args++, g); } break;
+            case Op_table_b: { b = table((const skcms_Curve*)*args++, b); } break;
+            case Op_table_a: { a = table((const skcms_Curve*)*args++, a); } break;
 
             case Op_clut: {
                 const skcms_A2B* a2b = (const skcms_A2B*) *args++;