bottom out clut recursion at dim=1
This is a modest (~10%) speedup for couple hundred extra bytes.
Change-Id: I80ffb0b13c8c6cfc5b03ff2b58d3a28bc876d769
Reviewed-on: https://skia-review.googlesource.com/c/162104
Auto-Submit: Mike Klein <mtklein@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
diff --git a/src/Transform_inl.h b/src/Transform_inl.h
index 978a157..5baf5ef 100644
--- a/src/Transform_inl.h
+++ b/src/Transform_inl.h
@@ -549,10 +549,7 @@
template <int kBitDepth>
MAYBE_NOINLINE
static void clut(const skcms_A2B* a2b, int dim, I32 ix, I32 stride, F* r, F* g, F* b, F a) {
- if (dim == 0) {
- sample_clut<kBitDepth>(a2b,ix, r,g,b);
- return;
- }
+ assert (0 < dim && dim <= 4);
I32 limit = cast<I32>(F0);
limit += a2b->grid_points[dim-1];
@@ -566,8 +563,14 @@
hi = cast<I32>(minus_1_ulp(x+1.0f));
F lr = *r, lg = *g, lb = *b,
hr = *r, hg = *g, hb = *b;
- clut<kBitDepth>(a2b, dim-1, stride*lo + ix, stride*limit, &lr,&lg,&lb,a);
- clut<kBitDepth>(a2b, dim-1, stride*hi + ix, stride*limit, &hr,&hg,&hb,a);
+
+ if (dim == 1) {
+ sample_clut<kBitDepth>(a2b, stride*lo + ix, &lr,&lg,&lb);
+ sample_clut<kBitDepth>(a2b, stride*hi + ix, &hr,&hg,&hb);
+ } else {
+ clut<kBitDepth>(a2b, dim-1, stride*lo + ix, stride*limit, &lr,&lg,&lb,a);
+ clut<kBitDepth>(a2b, dim-1, stride*hi + ix, stride*limit, &hr,&hg,&hb,a);
+ }
F t = x - cast<F>(lo);
*r = lr + (hr-lr)*t;