Implement basic RGB op fusing in skcms.

Change-Id: I8f5ce4cba60bfd644c61e46bca2303b0a846c1c3
Reviewed-on: https://skia-review.googlesource.com/c/skcms/+/771528
Commit-Queue: Brian Osman <brianosman@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
diff --git a/skcms.cc b/skcms.cc
index 7786e41..0e2770d 100644
--- a/skcms.cc
+++ b/skcms.cc
@@ -2350,26 +2350,31 @@
     M(gamma_g)            \
     M(gamma_b)            \
     M(gamma_a)            \
+    M(gamma_rgb)          \
                           \
     M(tf_r)               \
     M(tf_g)               \
     M(tf_b)               \
     M(tf_a)               \
+    M(tf_rgb)             \
                           \
     M(pq_r)               \
     M(pq_g)               \
     M(pq_b)               \
     M(pq_a)               \
+    M(pq_rgb)             \
                           \
     M(hlg_r)              \
     M(hlg_g)              \
     M(hlg_b)              \
     M(hlg_a)              \
+    M(hlg_rgb)            \
                           \
     M(hlginv_r)           \
     M(hlginv_g)           \
     M(hlginv_b)           \
     M(hlginv_a)           \
+    M(hlginv_rgb)         \
                           \
     M(table_r)            \
     M(table_g)            \
@@ -2616,7 +2621,35 @@
         if (ops[position].arg) {
             ++position;
         }
+
+        // Identify separate R/G/B functions which can be fused into a single op.
+        // (We do this check inside the loop in order to allow R+G+B+A to be fused into RGB+A.)
+        if (index == 2 && position == 3) {
+            struct FusableOps {
+                Op r, g, b, rgb;
+            };
+            static constexpr FusableOps kFusableOps[] = {
+                {Op_gamma_r,  Op_gamma_g,  Op_gamma_b,  Op_gamma_rgb},
+                {Op_tf_r,     Op_tf_g,     Op_tf_b,     Op_tf_rgb},
+                {Op_pq_r,     Op_pq_g,     Op_pq_b,     Op_pq_rgb},
+                {Op_hlg_r,    Op_hlg_g,    Op_hlg_b,    Op_hlg_rgb},
+                {Op_hlginv_r, Op_hlginv_g, Op_hlginv_b, Op_hlginv_rgb},
+            };
+            for (const FusableOps& fusableOp : kFusableOps) {
+                if (ops[0].op == fusableOp.r &&
+                    ops[1].op == fusableOp.g &&
+                    ops[2].op == fusableOp.b &&
+                    (0 == memcmp(ops[0].arg, ops[1].arg, sizeof(skcms_TransferFunction))) &&
+                    (0 == memcmp(ops[0].arg, ops[2].arg, sizeof(skcms_TransferFunction)))) {
+
+                    ops[0].op = fusableOp.rgb;
+                    position = 1;
+                    break;
+                }
+            }
+        }
     }
+
     return position;
 }
 
@@ -2712,9 +2745,14 @@
         *contexts++ = c;
     };
 
-    auto add_op_list = [&](const OpAndArg* oa, int count) {
-        for (const OpAndArg* end = oa + count; oa != end; ++oa) {
-            add_op_ctx(oa->op, oa->arg);
+    auto add_curve_ops = [&](const skcms_Curve* curves, int numChannels) {
+        OpAndArg oa[4];
+        assert(numChannels <= ARRAY_COUNT(oa));
+
+        int numOps = select_curve_ops(curves, numChannels, oa);
+
+        for (int i = 0; i < numOps; ++i) {
+            add_op_ctx(oa[i].op, oa[i].arg);
         }
     };
 
@@ -2749,9 +2787,7 @@
 
         case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
             add_op(Op_load_8888);
-            add_op_ctx(Op_tf_r, skcms_sRGB_TransferFunction());
-            add_op_ctx(Op_tf_g, skcms_sRGB_TransferFunction());
-            add_op_ctx(Op_tf_b, skcms_sRGB_TransferFunction());
+            add_op_ctx(Op_tf_rgb, skcms_sRGB_TransferFunction());
             break;
     }
     if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
@@ -2796,21 +2832,14 @@
 
         if (srcProfile->has_A2B) {
             if (srcProfile->A2B.input_channels) {
-                OpAndArg oa[4];
-                assert(srcProfile->A2B.input_channels <= ARRAY_COUNT(oa));
-                int numOps = select_curve_ops(srcProfile->A2B.input_curves,
-                                              (int)srcProfile->A2B.input_channels,
-                                              oa);
-                add_op_list(oa, numOps);
-
+                add_curve_ops(srcProfile->A2B.input_curves,
+                              (int)srcProfile->A2B.input_channels);
                 add_op(Op_clamp);
                 add_op_ctx(Op_clut_A2B, &srcProfile->A2B);
             }
 
             if (srcProfile->A2B.matrix_channels == 3) {
-                OpAndArg oa[3];
-                int numOps = select_curve_ops(srcProfile->A2B.matrix_curves, /*numChannels=*/3, oa);
-                add_op_list(oa, numOps);
+                add_curve_ops(srcProfile->A2B.matrix_curves, /*numChannels=*/3);
 
                 static const skcms_Matrix3x4 I = {{
                     {1,0,0,0},
@@ -2823,9 +2852,7 @@
             }
 
             if (srcProfile->A2B.output_channels == 3) {
-                OpAndArg oa[3];
-                int numOps = select_curve_ops(srcProfile->A2B.output_curves, /*numChannels=*/3, oa);
-                add_op_list(oa, numOps);
+                add_curve_ops(srcProfile->A2B.output_curves, /*numChannels=*/3);
             }
 
             if (srcProfile->pcs == skcms_Signature_Lab) {
@@ -2833,9 +2860,7 @@
             }
 
         } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
-            OpAndArg oa[3];
-            int numOps = select_curve_ops(srcProfile->trc, /*numChannels=*/3, oa);
-            add_op_list(oa, numOps);
+            add_curve_ops(srcProfile->trc, /*numChannels=*/3);
         } else {
             return false;
         }
@@ -2854,9 +2879,7 @@
             }
 
             if (dstProfile->B2A.input_channels == 3) {
-                OpAndArg oa[3];
-                int numOps = select_curve_ops(dstProfile->B2A.input_curves, /*numChannels=*/3, oa);
-                add_op_list(oa, numOps);
+                add_curve_ops(dstProfile->B2A.input_curves, /*numChannels=*/3);
             }
 
             if (dstProfile->B2A.matrix_channels == 3) {
@@ -2869,21 +2892,15 @@
                     add_op_ctx(Op_matrix_3x4, &dstProfile->B2A.matrix);
                 }
 
-                OpAndArg oa[3];
-                int numOps = select_curve_ops(dstProfile->B2A.matrix_curves, /*numChannels=*/3, oa);
-                add_op_list(oa, numOps);
+                add_curve_ops(dstProfile->B2A.matrix_curves, /*numChannels=*/3);
             }
 
             if (dstProfile->B2A.output_channels) {
                 add_op(Op_clamp);
                 add_op_ctx(Op_clut_B2A, &dstProfile->B2A);
 
-                OpAndArg oa[4];
-                assert(dstProfile->B2A.output_channels <= ARRAY_COUNT(oa));
-                int numOps = select_curve_ops(dstProfile->B2A.output_curves,
-                                              (int)dstProfile->B2A.output_channels,
-                                              oa);
-                add_op_list(oa, numOps);
+                add_curve_ops(dstProfile->B2A.output_curves,
+                              (int)dstProfile->B2A.output_channels);
             }
         } else {
             // This is a TRC destination.
@@ -2913,8 +2930,8 @@
                        oa[index].op != Op_table_g &&
                        oa[index].op != Op_table_b &&
                        oa[index].op != Op_table_a);
+                add_op_ctx(oa[index].op, oa[index].arg);
             }
-            add_op_list(oa, numOps);
         }
     }
 
@@ -2966,9 +2983,7 @@
         case skcms_PixelFormat_RGBA_ffff       >> 1: add_op(Op_store_ffff);       break;
 
         case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
-            add_op_ctx(Op_tf_r, skcms_sRGB_Inverse_TransferFunction());
-            add_op_ctx(Op_tf_g, skcms_sRGB_Inverse_TransferFunction());
-            add_op_ctx(Op_tf_b, skcms_sRGB_Inverse_TransferFunction());
+            add_op_ctx(Op_tf_rgb, skcms_sRGB_Inverse_TransferFunction());
             add_op(Op_store_8888);
             break;
     }
diff --git a/src/Transform_inl.h b/src/Transform_inl.h
index a2b19f1..6f64b08 100644
--- a/src/Transform_inl.h
+++ b/src/Transform_inl.h
@@ -1131,26 +1131,56 @@
 STAGE(gamma_b, const skcms_TransferFunction* tf) { b = apply_gamma(tf, b); }
 STAGE(gamma_a, const skcms_TransferFunction* tf) { a = apply_gamma(tf, a); }
 
+STAGE(gamma_rgb, const skcms_TransferFunction* tf) {
+    r = apply_gamma(tf, r);
+    g = apply_gamma(tf, g);
+    b = apply_gamma(tf, b);
+}
+
 STAGE(tf_r, const skcms_TransferFunction* tf) { r = apply_tf(tf, r); }
 STAGE(tf_g, const skcms_TransferFunction* tf) { g = apply_tf(tf, g); }
 STAGE(tf_b, const skcms_TransferFunction* tf) { b = apply_tf(tf, b); }
 STAGE(tf_a, const skcms_TransferFunction* tf) { a = apply_tf(tf, a); }
 
+STAGE(tf_rgb, const skcms_TransferFunction* tf) {
+    r = apply_tf(tf, r);
+    g = apply_tf(tf, g);
+    b = apply_tf(tf, b);
+}
+
 STAGE(pq_r, const skcms_TransferFunction* tf) { r = apply_pq(tf, r); }
 STAGE(pq_g, const skcms_TransferFunction* tf) { g = apply_pq(tf, g); }
 STAGE(pq_b, const skcms_TransferFunction* tf) { b = apply_pq(tf, b); }
 STAGE(pq_a, const skcms_TransferFunction* tf) { a = apply_pq(tf, a); }
 
+STAGE(pq_rgb, const skcms_TransferFunction* tf) {
+    r = apply_pq(tf, r);
+    g = apply_pq(tf, g);
+    b = apply_pq(tf, b);
+}
+
 STAGE(hlg_r, const skcms_TransferFunction* tf) { r = apply_hlg(tf, r); }
 STAGE(hlg_g, const skcms_TransferFunction* tf) { g = apply_hlg(tf, g); }
 STAGE(hlg_b, const skcms_TransferFunction* tf) { b = apply_hlg(tf, b); }
 STAGE(hlg_a, const skcms_TransferFunction* tf) { a = apply_hlg(tf, a); }
 
+STAGE(hlg_rgb, const skcms_TransferFunction* tf) {
+    r = apply_hlg(tf, r);
+    g = apply_hlg(tf, g);
+    b = apply_hlg(tf, b);
+}
+
 STAGE(hlginv_r, const skcms_TransferFunction* tf) { r = apply_hlginv(tf, r); }
 STAGE(hlginv_g, const skcms_TransferFunction* tf) { g = apply_hlginv(tf, g); }
 STAGE(hlginv_b, const skcms_TransferFunction* tf) { b = apply_hlginv(tf, b); }
 STAGE(hlginv_a, const skcms_TransferFunction* tf) { a = apply_hlginv(tf, a); }
 
+STAGE(hlginv_rgb, const skcms_TransferFunction* tf) {
+    r = apply_hlginv(tf, r);
+    g = apply_hlginv(tf, g);
+    b = apply_hlginv(tf, b);
+}
+
 STAGE(table_r, const skcms_Curve* curve) { r = table(curve, r); }
 STAGE(table_g, const skcms_Curve* curve) { g = table(curve, g); }
 STAGE(table_b, const skcms_Curve* curve) { b = table(curve, b); }