Implement faceforward intrinsic in RP codegen.

This implementation uses bitwise logic to toggle the sign bit of N
based on the result of dot(I, NRef).

Change-Id: I1e788435d5943ea6e01a04da89289ee3a3f1fa74
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/658076
Reviewed-by: Brian Osman <brianosman@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
diff --git a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
index a8333db..5a0e7d1 100644
--- a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
+++ b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
@@ -2884,6 +2884,35 @@
             }
             return true;
 
+        case IntrinsicKind::k_faceforward_IntrinsicKind: {
+            // Implement faceforward as `N ^ ((0 <= dot(I, NRef)) & 0x80000000)`.
+            // In other words, flip the sign bit of N if `0 <= dot(I, NRef)`.
+            SkASSERT(arg0.type().matches(arg1.type()));
+            SkASSERT(arg0.type().matches(arg2.type()));
+            int slotCount = arg0.type().slotCount();
+
+            // Stack: N, 0, I, Nref
+            if (!this->pushExpression(arg0)) {
+                return unsupported();
+            }
+            fBuilder.push_literal_f(0.0);
+            if (!this->pushExpression(arg1) || !this->pushExpression(arg2)) {
+                return unsupported();
+            }
+            // Stack: N, 0, dot(I,NRef)
+            fBuilder.dot_floats(slotCount);
+            // Stack: N, (0 <= dot(I,NRef))
+            fBuilder.binary_op(BuilderOp::cmple_n_floats, 1);
+            // Stack: N, (0 <= dot(I,NRef)), 0x80000000
+            fBuilder.push_literal_i(0x80000000);
+            // Stack: N, (0 <= dot(I,NRef)) & 0x80000000)
+            fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, 1);
+            // Stack: N, vec(0 <= dot(I,NRef)) & 0x80000000)
+            fBuilder.push_duplicates(slotCount - 1);
+            // Stack: N ^ vec((0 <= dot(I,NRef)) & 0x80000000)
+            fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, slotCount);
+            return true;
+        }
         case IntrinsicKind::k_mix_IntrinsicKind:
             // Note: our SkRP mix op takes the interpolation point first, not the interpolants.
             SkASSERT(arg0.type().matches(arg1.type()));
diff --git a/tests/sksl/intrinsics/FaceForward.skrp b/tests/sksl/intrinsics/FaceForward.skrp
index 3ef3d71..dbc8b9b 100644
--- a/tests/sksl/intrinsics/FaceForward.skrp
+++ b/tests/sksl/intrinsics/FaceForward.skrp
@@ -1,4 +1,97 @@
-### Compilation failed:
-
-error: code is not supported
-1 error
+    1. store_src_rg                   xy = src.rg
+    2. init_lane_masks                CondMask = LoopMask = RetMask = true
+    3. copy_constant                  expectedPos(0) = 0x3F800000 (1.0)
+    4. copy_constant                  expectedPos(1) = 0x40000000 (2.0)
+    5. copy_constant                  expectedPos(2) = 0x40400000 (3.0)
+    6. copy_constant                  expectedPos(3) = 0x40800000 (4.0)
+    7. copy_constant                  expectedNeg(0) = 0xBF800000 (-1.0)
+    8. copy_constant                  expectedNeg(1) = 0xC0000000 (-2.0)
+    9. copy_constant                  expectedNeg(2) = 0xC0400000 (-3.0)
+   10. copy_constant                  expectedNeg(3) = 0xC0800000 (-4.0)
+   11. copy_constant                  $0 = N(0)
+   12. zero_slot_unmasked             $1 = 0
+   13. copy_constant                  $2 = I(0)
+   14. copy_constant                  $3 = NRef(0)
+   15. mul_float                      $2 *= $3
+   16. cmple_float                    $1 = lessThanEqual($1, $2)
+   17. copy_constant                  $2 = 0x80000000 (-0.0)
+   18. bitwise_and_int                $1 &= $2
+   19. bitwise_xor_int                $0 ^= $1
+   20. copy_slot_unmasked             $1 = expectedNeg(0)
+   21. cmpeq_float                    $0 = equal($0, $1)
+   22. copy_2_constants               $1..2 = N(0..1)
+   23. zero_slot_unmasked             $3 = 0
+   24. copy_2_constants               $4..5 = I(0..1)
+   25. copy_2_constants               $6..7 = NRef(0..1)
+   26. dot_2_floats                   $4 = dot($4..5, $6..7)
+   27. cmple_float                    $3 = lessThanEqual($3, $4)
+   28. copy_constant                  $4 = 0x80000000 (-0.0)
+   29. bitwise_and_int                $3 &= $4
+   30. copy_slot_unmasked             $4 = $3
+   31. bitwise_xor_2_ints             $1..2 ^= $3..4
+   32. copy_2_slots_unmasked          $3..4 = expectedNeg(0..1)
+   33. cmpeq_2_floats                 $1..2 = equal($1..2, $3..4)
+   34. bitwise_and_int                $1 &= $2
+   35. bitwise_and_int                $0 &= $1
+   36. copy_3_constants               $1..3 = N(0..2)
+   37. zero_slot_unmasked             $4 = 0
+   38. copy_3_constants               $5..7 = I(0..2)
+   39. copy_3_constants               $8..10 = NRef(0..2)
+   40. dot_3_floats                   $5 = dot($5..7, $8..10)
+   41. cmple_float                    $4 = lessThanEqual($4, $5)
+   42. copy_constant                  $5 = 0x80000000 (-0.0)
+   43. bitwise_and_int                $4 &= $5
+   44. swizzle_3                      $4..6 = ($4..6).xxx
+   45. bitwise_xor_3_ints             $1..3 ^= $4..6
+   46. copy_3_slots_unmasked          $4..6 = expectedPos(0..2)
+   47. cmpeq_3_floats                 $1..3 = equal($1..3, $4..6)
+   48. bitwise_and_int                $2 &= $3
+   49. bitwise_and_int                $1 &= $2
+   50. bitwise_and_int                $0 &= $1
+   51. copy_4_constants               $1..4 = N
+   52. zero_slot_unmasked             $5 = 0
+   53. copy_4_constants               $6..9 = I
+   54. copy_4_constants               $10..13 = NRef
+   55. dot_4_floats                   $6 = dot($6..9, $10..13)
+   56. cmple_float                    $5 = lessThanEqual($5, $6)
+   57. copy_constant                  $6 = 0x80000000 (-0.0)
+   58. bitwise_and_int                $5 &= $6
+   59. swizzle_4                      $5..8 = ($5..8).xxxx
+   60. bitwise_xor_4_ints             $1..4 ^= $5..8
+   61. copy_4_slots_unmasked          $5..8 = expectedPos
+   62. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+   63. bitwise_and_2_ints             $1..2 &= $3..4
+   64. bitwise_and_int                $1 &= $2
+   65. bitwise_and_int                $0 &= $1
+   66. copy_constant                  $1 = 0xBF800000 (-1.0)
+   67. copy_slot_unmasked             $2 = expectedNeg(0)
+   68. cmpeq_float                    $1 = equal($1, $2)
+   69. bitwise_and_int                $0 &= $1
+   70. copy_constant                  $1 = 0xBF800000 (-1.0)
+   71. copy_constant                  $2 = 0xC0000000 (-2.0)
+   72. copy_2_slots_unmasked          $3..4 = expectedNeg(0..1)
+   73. cmpeq_2_floats                 $1..2 = equal($1..2, $3..4)
+   74. bitwise_and_int                $1 &= $2
+   75. bitwise_and_int                $0 &= $1
+   76. copy_constant                  $1 = 0x3F800000 (1.0)
+   77. copy_constant                  $2 = 0x40000000 (2.0)
+   78. copy_constant                  $3 = 0x40400000 (3.0)
+   79. copy_3_slots_unmasked          $4..6 = expectedPos(0..2)
+   80. cmpeq_3_floats                 $1..3 = equal($1..3, $4..6)
+   81. bitwise_and_int                $2 &= $3
+   82. bitwise_and_int                $1 &= $2
+   83. bitwise_and_int                $0 &= $1
+   84. copy_constant                  $1 = 0x3F800000 (1.0)
+   85. copy_constant                  $2 = 0x40000000 (2.0)
+   86. copy_constant                  $3 = 0x40400000 (3.0)
+   87. copy_constant                  $4 = 0x40800000 (4.0)
+   88. copy_4_slots_unmasked          $5..8 = expectedPos
+   89. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+   90. bitwise_and_2_ints             $1..2 &= $3..4
+   91. bitwise_and_int                $1 &= $2
+   92. bitwise_and_int                $0 &= $1
+   93. swizzle_4                      $0..3 = ($0..3).xxxx
+   94. copy_4_constants               $4..7 = colorRed
+   95. copy_4_constants               $8..11 = colorGreen
+   96. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+   97. load_src                       src.rgba = $0..3