move clamp outside of clut

This is a tiny code size win and, unexpectedly, a tiny perf win.
I really just wanted to focus the code while I try to refactor it.

It's safe to do this because the CLUT R,G,B inputs will be overwritten,
and A will either be overwritten to 1 or pass through.  It's harmless to
clamp a pass-through alpha to [0,1].

I almost did the same to table lookups, but that's a bad idea.  Tables
are sometimes used in mixed situations where we clamp some channels as
inputs to tables but leave others as unclamped inputs to parametrics.
Kind of rare, I guess, but possible.

Change-Id: Ifa777cb0af4dfa1991238468db38dc846a650251
Reviewed-on: https://skia-review.googlesource.com/c/162202
Auto-Submit: Mike Klein <mtklein@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
diff --git a/skcms.cc b/skcms.cc
index 55eb23b..b746879 100644
--- a/skcms.cc
+++ b/skcms.cc
@@ -2134,6 +2134,7 @@
                         *args++ = oa.arg;
                     }
                 }
+                *ops++ = Op_clamp;
                 switch (srcProfile->A2B.input_channels) {
                     case 1: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_1D_8 : Op_clut_1D_16; break;
                     case 2: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_2D_8 : Op_clut_2D_16; break;
diff --git a/src/Transform_inl.h b/src/Transform_inl.h
index db028d1..faaa372 100644
--- a/src/Transform_inl.h
+++ b/src/Transform_inl.h
@@ -556,7 +556,7 @@
     const F* srcs[] = { r,g,b,&a };
     F src = *srcs[dim-1];
 
-    F x = max_(F0, min_(src, F1)) * (float)(limit - 1);
+    F x = src * (float)(limit - 1);
 
     I32 lo = cast<I32>(            x      ),
         hi = cast<I32>(minus_1_ulp(x+1.0f));