Implement reflect intrinsic in RP codegen.

This is done by manually evaluating `I - (N * dot(I,N) * 2)` on the
stack. I've verified the behavior via the existing runtime-effect test
slide.

Change-Id: Ic9953bd7af2a16565d059188b857fd83e638c227
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/657999
Commit-Queue: John Stiles <johnstiles@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
diff --git a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
index a66178f..6df0eb4 100644
--- a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
+++ b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
@@ -2791,6 +2791,32 @@
             SkASSERT(arg0.type().matches(arg1.type()));
             return this->pushIntrinsic(BuilderOp::pow_n_floats, arg0, arg1);
 
+        case IntrinsicKind::k_reflect_IntrinsicKind: {
+            // Implement reflect as `I - (N * dot(I,N) * 2)`.
+            SkASSERT(arg0.type().matches(arg1.type()));
+            SkASSERT(arg0.type().slotCount() == arg1.type().slotCount());
+            SkASSERT(arg0.type().componentType().isFloat());
+            int slotCount = arg0.type().slotCount();
+
+            // Stack: I, N.
+            if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
+                return unsupported();
+            }
+            // Stack: I, N, I, N.
+            fBuilder.push_clone(2 * slotCount);
+            // Stack: I, N, dot(I,N)
+            fBuilder.dot_floats(slotCount);
+            // Stack: I, N, dot(I,N), 2
+            fBuilder.push_literal_f(2.0);
+            // Stack: I, N, dot(I,N) * 2
+            fBuilder.binary_op(BuilderOp::mul_n_floats, 1);
+            // Stack: I, N * dot(I,N) * 2
+            fBuilder.push_duplicates(slotCount - 1);
+            fBuilder.binary_op(BuilderOp::mul_n_floats, slotCount);
+            // Stack: I - (N * dot(I,N) * 2)
+            fBuilder.binary_op(BuilderOp::sub_n_floats, slotCount);
+            return true;
+        }
         case IntrinsicKind::k_step_IntrinsicKind: {
             // Compute step as `float(lessThan(edge, x))`. We convert from boolean 0/~0 to floating
             // point zero/one by using a bitwise-and against the bit-pattern of 1.0.
diff --git a/tests/sksl/intrinsics/Reflect.skrp b/tests/sksl/intrinsics/Reflect.skrp
index 3ef3d71..55e7aa2 100644
--- a/tests/sksl/intrinsics/Reflect.skrp
+++ b/tests/sksl/intrinsics/Reflect.skrp
@@ -1,4 +1,97 @@
-### Compilation failed:
-
-error: code is not supported
-1 error
+    1. store_src_rg                   xy = src.rg
+    2. init_lane_masks                CondMask = LoopMask = RetMask = true
+    3. copy_constant                  expectedX = 0xC2440000 (-49.0)
+    4. copy_constant                  expectedXY(0) = 0xC3290000 (-169.0)
+    5. copy_constant                  expectedXY(1) = 0x434A0000 (202.0)
+    6. copy_constant                  expectedXYZ(0) = 0xC3BD8000 (-379.0)
+    7. copy_constant                  expectedXYZ(1) = 0x43E30000 (454.0)
+    8. copy_constant                  expectedXYZ(2) = 0xC4044000 (-529.0)
+    9. copy_constant                  expectedXYZW(0) = 0xC42EC000 (-699.0)
+   10. copy_constant                  expectedXYZW(1) = 0x44518000 (838.0)
+   11. copy_constant                  expectedXYZW(2) = 0xC4744000 (-977.0)
+   12. copy_constant                  expectedXYZW(3) = 0x448B8000 (1116.0)
+   13. copy_constant                  $0 = I(0)
+   14. copy_constant                  $1 = N(0)
+   15. copy_2_slots_unmasked          $2..3 = $0..1
+   16. mul_float                      $2 *= $3
+   17. copy_constant                  $3 = 0x40000000 (2.0)
+   18. mul_float                      $2 *= $3
+   19. mul_float                      $1 *= $2
+   20. sub_float                      $0 -= $1
+   21. copy_slot_unmasked             $1 = expectedX
+   22. cmpeq_float                    $0 = equal($0, $1)
+   23. copy_2_constants               $1..2 = I(0..1)
+   24. copy_2_constants               $3..4 = N(0..1)
+   25. copy_4_slots_unmasked          $5..8 = $1..4
+   26. dot_2_floats                   $5 = dot($5..6, $7..8)
+   27. copy_constant                  $6 = 0x40000000 (2.0)
+   28. mul_float                      $5 *= $6
+   29. copy_slot_unmasked             $6 = $5
+   30. mul_2_floats                   $3..4 *= $5..6
+   31. sub_2_floats                   $1..2 -= $3..4
+   32. copy_2_slots_unmasked          $3..4 = expectedXY
+   33. cmpeq_2_floats                 $1..2 = equal($1..2, $3..4)
+   34. bitwise_and_int                $1 &= $2
+   35. bitwise_and_int                $0 &= $1
+   36. copy_3_constants               $1..3 = I(0..2)
+   37. copy_3_constants               $4..6 = N(0..2)
+   38. copy_4_slots_unmasked          $7..10 = $1..4
+   39. copy_2_slots_unmasked          $11..12 = $5..6
+   40. dot_3_floats                   $7 = dot($7..9, $10..12)
+   41. copy_constant                  $8 = 0x40000000 (2.0)
+   42. mul_float                      $7 *= $8
+   43. swizzle_3                      $7..9 = ($7..9).xxx
+   44. mul_3_floats                   $4..6 *= $7..9
+   45. sub_3_floats                   $1..3 -= $4..6
+   46. copy_3_slots_unmasked          $4..6 = expectedXYZ
+   47. cmpeq_3_floats                 $1..3 = equal($1..3, $4..6)
+   48. bitwise_and_int                $2 &= $3
+   49. bitwise_and_int                $1 &= $2
+   50. bitwise_and_int                $0 &= $1
+   51. copy_4_constants               $1..4 = I
+   52. copy_4_constants               $5..8 = N
+   53. copy_4_slots_unmasked          $9..12 = $1..4
+   54. copy_4_slots_unmasked          $13..16 = $5..8
+   55. dot_4_floats                   $9 = dot($9..12, $13..16)
+   56. copy_constant                  $10 = 0x40000000 (2.0)
+   57. mul_float                      $9 *= $10
+   58. swizzle_4                      $9..12 = ($9..12).xxxx
+   59. mul_4_floats                   $5..8 *= $9..12
+   60. sub_4_floats                   $1..4 -= $5..8
+   61. copy_4_slots_unmasked          $5..8 = expectedXYZW
+   62. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+   63. bitwise_and_2_ints             $1..2 &= $3..4
+   64. bitwise_and_int                $1 &= $2
+   65. bitwise_and_int                $0 &= $1
+   66. copy_constant                  $1 = 0xC2440000 (-49.0)
+   67. copy_slot_unmasked             $2 = expectedX
+   68. cmpeq_float                    $1 = equal($1, $2)
+   69. bitwise_and_int                $0 &= $1
+   70. copy_constant                  $1 = 0xC3290000 (-169.0)
+   71. copy_constant                  $2 = 0x434A0000 (202.0)
+   72. copy_2_slots_unmasked          $3..4 = expectedXY
+   73. cmpeq_2_floats                 $1..2 = equal($1..2, $3..4)
+   74. bitwise_and_int                $1 &= $2
+   75. bitwise_and_int                $0 &= $1
+   76. copy_constant                  $1 = 0xC3BD8000 (-379.0)
+   77. copy_constant                  $2 = 0x43E30000 (454.0)
+   78. copy_constant                  $3 = 0xC4044000 (-529.0)
+   79. copy_3_slots_unmasked          $4..6 = expectedXYZ
+   80. cmpeq_3_floats                 $1..3 = equal($1..3, $4..6)
+   81. bitwise_and_int                $2 &= $3
+   82. bitwise_and_int                $1 &= $2
+   83. bitwise_and_int                $0 &= $1
+   84. copy_constant                  $1 = 0xC42EC000 (-699.0)
+   85. copy_constant                  $2 = 0x44518000 (838.0)
+   86. copy_constant                  $3 = 0xC4744000 (-977.0)
+   87. copy_constant                  $4 = 0x448B8000 (1116.0)
+   88. copy_4_slots_unmasked          $5..8 = expectedXYZW
+   89. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+   90. bitwise_and_2_ints             $1..2 &= $3..4
+   91. bitwise_and_int                $1 &= $2
+   92. bitwise_and_int                $0 &= $1
+   93. swizzle_4                      $0..3 = ($0..3).xxxx
+   94. copy_4_constants               $4..7 = colorRed
+   95. copy_4_constants               $8..11 = colorGreen
+   96. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+   97. load_src                       src.rgba = $0..3