Implement reflect intrinsic in RP codegen.
This is done by manually evaluating `I - (N * dot(I,N) * 2)` on the
stack. I've verified the behavior via the existing runtime-effect test
slide.
Change-Id: Ic9953bd7af2a16565d059188b857fd83e638c227
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/657999
Commit-Queue: John Stiles <johnstiles@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
diff --git a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
index a66178f..6df0eb4 100644
--- a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
+++ b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
@@ -2791,6 +2791,32 @@
SkASSERT(arg0.type().matches(arg1.type()));
return this->pushIntrinsic(BuilderOp::pow_n_floats, arg0, arg1);
+ case IntrinsicKind::k_reflect_IntrinsicKind: {
+ // Implement reflect as `I - (N * dot(I,N) * 2)`.
+ SkASSERT(arg0.type().matches(arg1.type()));
+ SkASSERT(arg0.type().slotCount() == arg1.type().slotCount());
+ SkASSERT(arg0.type().componentType().isFloat());
+ int slotCount = arg0.type().slotCount();
+
+ // Stack: I, N.
+ if (!this->pushExpression(arg0) || !this->pushExpression(arg1)) {
+ return unsupported();
+ }
+ // Stack: I, N, I, N.
+ fBuilder.push_clone(2 * slotCount);
+ // Stack: I, N, dot(I,N)
+ fBuilder.dot_floats(slotCount);
+ // Stack: I, N, dot(I,N), 2
+ fBuilder.push_literal_f(2.0);
+ // Stack: I, N, dot(I,N) * 2
+ fBuilder.binary_op(BuilderOp::mul_n_floats, 1);
+ // Stack: I, N * dot(I,N) * 2
+ fBuilder.push_duplicates(slotCount - 1);
+ fBuilder.binary_op(BuilderOp::mul_n_floats, slotCount);
+ // Stack: I - (N * dot(I,N) * 2)
+ fBuilder.binary_op(BuilderOp::sub_n_floats, slotCount);
+ return true;
+ }
case IntrinsicKind::k_step_IntrinsicKind: {
// Compute step as `float(lessThan(edge, x))`. We convert from boolean 0/~0 to floating
// point zero/one by using a bitwise-and against the bit-pattern of 1.0.
diff --git a/tests/sksl/intrinsics/Reflect.skrp b/tests/sksl/intrinsics/Reflect.skrp
index 3ef3d71..55e7aa2 100644
--- a/tests/sksl/intrinsics/Reflect.skrp
+++ b/tests/sksl/intrinsics/Reflect.skrp
@@ -1,4 +1,97 @@
-### Compilation failed:
-
-error: code is not supported
-1 error
+ 1. store_src_rg xy = src.rg
+ 2. init_lane_masks CondMask = LoopMask = RetMask = true
+ 3. copy_constant expectedX = 0xC2440000 (-49.0)
+ 4. copy_constant expectedXY(0) = 0xC3290000 (-169.0)
+ 5. copy_constant expectedXY(1) = 0x434A0000 (202.0)
+ 6. copy_constant expectedXYZ(0) = 0xC3BD8000 (-379.0)
+ 7. copy_constant expectedXYZ(1) = 0x43E30000 (454.0)
+ 8. copy_constant expectedXYZ(2) = 0xC4044000 (-529.0)
+ 9. copy_constant expectedXYZW(0) = 0xC42EC000 (-699.0)
+ 10. copy_constant expectedXYZW(1) = 0x44518000 (838.0)
+ 11. copy_constant expectedXYZW(2) = 0xC4744000 (-977.0)
+ 12. copy_constant expectedXYZW(3) = 0x448B8000 (1116.0)
+ 13. copy_constant $0 = I(0)
+ 14. copy_constant $1 = N(0)
+ 15. copy_2_slots_unmasked $2..3 = $0..1
+ 16. mul_float $2 *= $3
+ 17. copy_constant $3 = 0x40000000 (2.0)
+ 18. mul_float $2 *= $3
+ 19. mul_float $1 *= $2
+ 20. sub_float $0 -= $1
+ 21. copy_slot_unmasked $1 = expectedX
+ 22. cmpeq_float $0 = equal($0, $1)
+ 23. copy_2_constants $1..2 = I(0..1)
+ 24. copy_2_constants $3..4 = N(0..1)
+ 25. copy_4_slots_unmasked $5..8 = $1..4
+ 26. dot_2_floats $5 = dot($5..6, $7..8)
+ 27. copy_constant $6 = 0x40000000 (2.0)
+ 28. mul_float $5 *= $6
+ 29. copy_slot_unmasked $6 = $5
+ 30. mul_2_floats $3..4 *= $5..6
+ 31. sub_2_floats $1..2 -= $3..4
+ 32. copy_2_slots_unmasked $3..4 = expectedXY
+ 33. cmpeq_2_floats $1..2 = equal($1..2, $3..4)
+ 34. bitwise_and_int $1 &= $2
+ 35. bitwise_and_int $0 &= $1
+ 36. copy_3_constants $1..3 = I(0..2)
+ 37. copy_3_constants $4..6 = N(0..2)
+ 38. copy_4_slots_unmasked $7..10 = $1..4
+ 39. copy_2_slots_unmasked $11..12 = $5..6
+ 40. dot_3_floats $7 = dot($7..9, $10..12)
+ 41. copy_constant $8 = 0x40000000 (2.0)
+ 42. mul_float $7 *= $8
+ 43. swizzle_3 $7..9 = ($7..9).xxx
+ 44. mul_3_floats $4..6 *= $7..9
+ 45. sub_3_floats $1..3 -= $4..6
+ 46. copy_3_slots_unmasked $4..6 = expectedXYZ
+ 47. cmpeq_3_floats $1..3 = equal($1..3, $4..6)
+ 48. bitwise_and_int $2 &= $3
+ 49. bitwise_and_int $1 &= $2
+ 50. bitwise_and_int $0 &= $1
+ 51. copy_4_constants $1..4 = I
+ 52. copy_4_constants $5..8 = N
+ 53. copy_4_slots_unmasked $9..12 = $1..4
+ 54. copy_4_slots_unmasked $13..16 = $5..8
+ 55. dot_4_floats $9 = dot($9..12, $13..16)
+ 56. copy_constant $10 = 0x40000000 (2.0)
+ 57. mul_float $9 *= $10
+ 58. swizzle_4 $9..12 = ($9..12).xxxx
+ 59. mul_4_floats $5..8 *= $9..12
+ 60. sub_4_floats $1..4 -= $5..8
+ 61. copy_4_slots_unmasked $5..8 = expectedXYZW
+ 62. cmpeq_4_floats $1..4 = equal($1..4, $5..8)
+ 63. bitwise_and_2_ints $1..2 &= $3..4
+ 64. bitwise_and_int $1 &= $2
+ 65. bitwise_and_int $0 &= $1
+ 66. copy_constant $1 = 0xC2440000 (-49.0)
+ 67. copy_slot_unmasked $2 = expectedX
+ 68. cmpeq_float $1 = equal($1, $2)
+ 69. bitwise_and_int $0 &= $1
+ 70. copy_constant $1 = 0xC3290000 (-169.0)
+ 71. copy_constant $2 = 0x434A0000 (202.0)
+ 72. copy_2_slots_unmasked $3..4 = expectedXY
+ 73. cmpeq_2_floats $1..2 = equal($1..2, $3..4)
+ 74. bitwise_and_int $1 &= $2
+ 75. bitwise_and_int $0 &= $1
+ 76. copy_constant $1 = 0xC3BD8000 (-379.0)
+ 77. copy_constant $2 = 0x43E30000 (454.0)
+ 78. copy_constant $3 = 0xC4044000 (-529.0)
+ 79. copy_3_slots_unmasked $4..6 = expectedXYZ
+ 80. cmpeq_3_floats $1..3 = equal($1..3, $4..6)
+ 81. bitwise_and_int $2 &= $3
+ 82. bitwise_and_int $1 &= $2
+ 83. bitwise_and_int $0 &= $1
+ 84. copy_constant $1 = 0xC42EC000 (-699.0)
+ 85. copy_constant $2 = 0x44518000 (838.0)
+ 86. copy_constant $3 = 0xC4744000 (-977.0)
+ 87. copy_constant $4 = 0x448B8000 (1116.0)
+ 88. copy_4_slots_unmasked $5..8 = expectedXYZW
+ 89. cmpeq_4_floats $1..4 = equal($1..4, $5..8)
+ 90. bitwise_and_2_ints $1..2 &= $3..4
+ 91. bitwise_and_int $1 &= $2
+ 92. bitwise_and_int $0 &= $1
+ 93. swizzle_4 $0..3 = ($0..3).xxxx
+ 94. copy_4_constants $4..7 = colorRed
+ 95. copy_4_constants $8..11 = colorGreen
+ 96. mix_4_ints $0..3 = mix($4..7, $8..11, $0..3)
+ 97. load_src src.rgba = $0..3