pay some attention to apply_tf()

Our sign-handling code here is very first-draft.

This new version does the same thing with much cheaper instructions.
Should be a small, free performance win.

Change-Id: I858ae734b57a0595fbf3f72c97bbe5a5a6d3d6b6
Reviewed-on: https://skia-review.googlesource.com/144624
Auto-Submit: Mike Klein <mtklein@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/Transform_inl.h b/src/Transform_inl.h
index 49d324a..86d9c9b 100644
--- a/src/Transform_inl.h
+++ b/src/Transform_inl.h
@@ -236,13 +236,17 @@
 
 // Return tf(x).
 SI ATTR F apply_tf(const skcms_TransferFunction* tf, F x) {
-    F sign = if_then_else(x < 0, -F1, F1);
-    x *= sign;
+    // Peel off the sign bit and set x = |x|.
+    U32 bits = bit_pun<U32>(x),
+        sign = bits & 0x80000000;
+    x = bit_pun<F>(bits ^ sign);
 
-    F linear    =            tf->c*x + tf->f;
-    F nonlinear = approx_pow(tf->a*x + tf->b, tf->g) + tf->e;
+    // The transfer function has a linear part up to d, exponential at d and after.
+    F v = if_then_else(x < tf->d,            tf->c*x + tf->f
+                                , approx_pow(tf->a*x + tf->b, tf->g) + tf->e);
 
-    return sign * if_then_else(x < tf->d, linear, nonlinear);
+    // Tack the sign bit back on.
+    return bit_pun<F>(sign | bit_pun<U32>(v));
 }