fold together table ops
This cuts some code size and doesn't appear to affect performance.
Change-Id: Iac2aa7cc28dd923fe6e1264b87d2959bc0a932f5
Reviewed-on: https://skia-review.googlesource.com/c/162421
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
diff --git a/skcms.cc b/skcms.cc
index da031ad..fb42ecb 100644
--- a/skcms.cc
+++ b/skcms.cc
@@ -1781,15 +1781,10 @@
Op_tf_b,
Op_tf_a,
- Op_table_8_r,
- Op_table_8_g,
- Op_table_8_b,
- Op_table_8_a,
-
- Op_table_16_r,
- Op_table_16_g,
- Op_table_16_b,
- Op_table_16_a,
+ Op_table_r,
+ Op_table_g,
+ Op_table_b,
+ Op_table_a,
Op_clut,
@@ -1964,11 +1959,11 @@
} OpAndArg;
static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
- static const struct { Op parametric, table_8, table_16; } ops[] = {
- { Op_tf_r, Op_table_8_r, Op_table_16_r },
- { Op_tf_g, Op_table_8_g, Op_table_16_g },
- { Op_tf_b, Op_table_8_b, Op_table_16_b },
- { Op_tf_a, Op_table_8_a, Op_table_16_a },
+ static const struct { Op parametric, table; } ops[] = {
+ { Op_tf_r, Op_table_r },
+ { Op_tf_g, Op_table_g },
+ { Op_tf_b, Op_table_b },
+ { Op_tf_a, Op_table_a },
};
const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
@@ -1977,14 +1972,9 @@
return is_identity_tf(&curve->parametric)
? noop
: OpAndArg{ ops[channel].parametric, &curve->parametric };
- } else if (curve->table_8) {
- return OpAndArg{ ops[channel].table_8, curve };
- } else if (curve->table_16) {
- return OpAndArg{ ops[channel].table_16, curve };
}
- assert(false);
- return noop;
+ return OpAndArg{ ops[channel].table, curve };
}
static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
diff --git a/src/Transform_inl.h b/src/Transform_inl.h
index 09f4a6a..465fb27 100644
--- a/src/Transform_inl.h
+++ b/src/Transform_inl.h
@@ -476,7 +476,7 @@
return bit_pun<F>( bit_pun<I32>(v) - 1 );
}
-SI F table_8(const skcms_Curve* curve, F v) {
+SI F table(const skcms_Curve* curve, F v) {
// Clamp the input to [0,1], then scale to a table index.
F ix = max_(F0, min_(v, F1)) * (float)(curve->table_entries - 1);
@@ -489,23 +489,14 @@
// the same as in 'l' or adjacent. We have a rough idea that's it'd always be safe
// to read adjacent entries and perhaps underflow the table by a byte or two
// (it'd be junk, but always safe to read). Not sure how to lerp yet.
- F l = F_from_U8(gather_8(curve->table_8, lo)),
- h = F_from_U8(gather_8(curve->table_8, hi));
- return l + (h-l)*t;
-}
-
-SI F table_16(const skcms_Curve* curve, F v) {
- // All just as in table_8() until the gathers.
- F ix = max_(F0, min_(v, F1)) * (float)(curve->table_entries - 1);
-
- I32 lo = cast<I32>( ix ),
- hi = cast<I32>(minus_1_ulp(ix+1.0f));
- F t = ix - cast<F>(lo);
-
- // TODO: as above, load l and h simultaneously?
- // Here we could even use AVX2-style 32-bit gathers.
- F l = F_from_U16_BE(gather_16(curve->table_16, lo)),
- h = F_from_U16_BE(gather_16(curve->table_16, hi));
+ F l,h;
+ if (curve->table_8) {
+ l = F_from_U8(gather_8(curve->table_8, lo));
+ h = F_from_U8(gather_8(curve->table_8, hi));
+ } else {
+ l = F_from_U16_BE(gather_16(curve->table_16, lo));
+ h = F_from_U16_BE(gather_16(curve->table_16, hi));
+ }
return l + (h-l)*t;
}
@@ -916,15 +907,10 @@
case Op_tf_b:{ b = apply_tf((const skcms_TransferFunction*)*args++, b); } break;
case Op_tf_a:{ a = apply_tf((const skcms_TransferFunction*)*args++, a); } break;
- case Op_table_8_r: { r = table_8((const skcms_Curve*)*args++, r); } break;
- case Op_table_8_g: { g = table_8((const skcms_Curve*)*args++, g); } break;
- case Op_table_8_b: { b = table_8((const skcms_Curve*)*args++, b); } break;
- case Op_table_8_a: { a = table_8((const skcms_Curve*)*args++, a); } break;
-
- case Op_table_16_r:{ r = table_16((const skcms_Curve*)*args++, r); } break;
- case Op_table_16_g:{ g = table_16((const skcms_Curve*)*args++, g); } break;
- case Op_table_16_b:{ b = table_16((const skcms_Curve*)*args++, b); } break;
- case Op_table_16_a:{ a = table_16((const skcms_Curve*)*args++, a); } break;
+ case Op_table_r: { r = table((const skcms_Curve*)*args++, r); } break;
+ case Op_table_g: { g = table((const skcms_Curve*)*args++, g); } break;
+ case Op_table_b: { b = table((const skcms_Curve*)*args++, b); } break;
+ case Op_table_a: { a = table((const skcms_Curve*)*args++, a); } break;
case Op_clut: {
const skcms_A2B* a2b = (const skcms_A2B*) *args++;