Implement faceforward intrinsic in RP codegen.
This implementation uses bitwise logic to toggle the sign bit of N
based on the result of dot(I, NRef).
Change-Id: I1e788435d5943ea6e01a04da89289ee3a3f1fa74
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/658076
Reviewed-by: Brian Osman <brianosman@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
diff --git a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
index a8333db..5a0e7d1 100644
--- a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
+++ b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
@@ -2884,6 +2884,35 @@
}
return true;
+ case IntrinsicKind::k_faceforward_IntrinsicKind: {
+ // Implement faceforward as `N ^ ((0 <= dot(I, NRef)) & 0x80000000)`.
+ // In other words, flip the sign bit of N if `0 <= dot(I, NRef)`.
+ SkASSERT(arg0.type().matches(arg1.type()));
+ SkASSERT(arg0.type().matches(arg2.type()));
+ int slotCount = arg0.type().slotCount();
+
+ // Stack: N, 0, I, Nref
+ if (!this->pushExpression(arg0)) {
+ return unsupported();
+ }
+ fBuilder.push_literal_f(0.0);
+ if (!this->pushExpression(arg1) || !this->pushExpression(arg2)) {
+ return unsupported();
+ }
+ // Stack: N, 0, dot(I,NRef)
+ fBuilder.dot_floats(slotCount);
+ // Stack: N, (0 <= dot(I,NRef))
+ fBuilder.binary_op(BuilderOp::cmple_n_floats, 1);
+ // Stack: N, (0 <= dot(I,NRef)), 0x80000000
+ fBuilder.push_literal_i(0x80000000);
+ // Stack: N, (0 <= dot(I,NRef)) & 0x80000000)
+ fBuilder.binary_op(BuilderOp::bitwise_and_n_ints, 1);
+ // Stack: N, vec(0 <= dot(I,NRef)) & 0x80000000)
+ fBuilder.push_duplicates(slotCount - 1);
+ // Stack: N ^ vec((0 <= dot(I,NRef)) & 0x80000000)
+ fBuilder.binary_op(BuilderOp::bitwise_xor_n_ints, slotCount);
+ return true;
+ }
case IntrinsicKind::k_mix_IntrinsicKind:
// Note: our SkRP mix op takes the interpolation point first, not the interpolants.
SkASSERT(arg0.type().matches(arg1.type()));
diff --git a/tests/sksl/intrinsics/FaceForward.skrp b/tests/sksl/intrinsics/FaceForward.skrp
index 3ef3d71..dbc8b9b 100644
--- a/tests/sksl/intrinsics/FaceForward.skrp
+++ b/tests/sksl/intrinsics/FaceForward.skrp
@@ -1,4 +1,97 @@
-### Compilation failed:
-
-error: code is not supported
-1 error
+ 1. store_src_rg xy = src.rg
+ 2. init_lane_masks CondMask = LoopMask = RetMask = true
+ 3. copy_constant expectedPos(0) = 0x3F800000 (1.0)
+ 4. copy_constant expectedPos(1) = 0x40000000 (2.0)
+ 5. copy_constant expectedPos(2) = 0x40400000 (3.0)
+ 6. copy_constant expectedPos(3) = 0x40800000 (4.0)
+ 7. copy_constant expectedNeg(0) = 0xBF800000 (-1.0)
+ 8. copy_constant expectedNeg(1) = 0xC0000000 (-2.0)
+ 9. copy_constant expectedNeg(2) = 0xC0400000 (-3.0)
+ 10. copy_constant expectedNeg(3) = 0xC0800000 (-4.0)
+ 11. copy_constant $0 = N(0)
+ 12. zero_slot_unmasked $1 = 0
+ 13. copy_constant $2 = I(0)
+ 14. copy_constant $3 = NRef(0)
+ 15. mul_float $2 *= $3
+ 16. cmple_float $1 = lessThanEqual($1, $2)
+ 17. copy_constant $2 = 0x80000000 (-0.0)
+ 18. bitwise_and_int $1 &= $2
+ 19. bitwise_xor_int $0 ^= $1
+ 20. copy_slot_unmasked $1 = expectedNeg(0)
+ 21. cmpeq_float $0 = equal($0, $1)
+ 22. copy_2_constants $1..2 = N(0..1)
+ 23. zero_slot_unmasked $3 = 0
+ 24. copy_2_constants $4..5 = I(0..1)
+ 25. copy_2_constants $6..7 = NRef(0..1)
+ 26. dot_2_floats $4 = dot($4..5, $6..7)
+ 27. cmple_float $3 = lessThanEqual($3, $4)
+ 28. copy_constant $4 = 0x80000000 (-0.0)
+ 29. bitwise_and_int $3 &= $4
+ 30. copy_slot_unmasked $4 = $3
+ 31. bitwise_xor_2_ints $1..2 ^= $3..4
+ 32. copy_2_slots_unmasked $3..4 = expectedNeg(0..1)
+ 33. cmpeq_2_floats $1..2 = equal($1..2, $3..4)
+ 34. bitwise_and_int $1 &= $2
+ 35. bitwise_and_int $0 &= $1
+ 36. copy_3_constants $1..3 = N(0..2)
+ 37. zero_slot_unmasked $4 = 0
+ 38. copy_3_constants $5..7 = I(0..2)
+ 39. copy_3_constants $8..10 = NRef(0..2)
+ 40. dot_3_floats $5 = dot($5..7, $8..10)
+ 41. cmple_float $4 = lessThanEqual($4, $5)
+ 42. copy_constant $5 = 0x80000000 (-0.0)
+ 43. bitwise_and_int $4 &= $5
+ 44. swizzle_3 $4..6 = ($4..6).xxx
+ 45. bitwise_xor_3_ints $1..3 ^= $4..6
+ 46. copy_3_slots_unmasked $4..6 = expectedPos(0..2)
+ 47. cmpeq_3_floats $1..3 = equal($1..3, $4..6)
+ 48. bitwise_and_int $2 &= $3
+ 49. bitwise_and_int $1 &= $2
+ 50. bitwise_and_int $0 &= $1
+ 51. copy_4_constants $1..4 = N
+ 52. zero_slot_unmasked $5 = 0
+ 53. copy_4_constants $6..9 = I
+ 54. copy_4_constants $10..13 = NRef
+ 55. dot_4_floats $6 = dot($6..9, $10..13)
+ 56. cmple_float $5 = lessThanEqual($5, $6)
+ 57. copy_constant $6 = 0x80000000 (-0.0)
+ 58. bitwise_and_int $5 &= $6
+ 59. swizzle_4 $5..8 = ($5..8).xxxx
+ 60. bitwise_xor_4_ints $1..4 ^= $5..8
+ 61. copy_4_slots_unmasked $5..8 = expectedPos
+ 62. cmpeq_4_floats $1..4 = equal($1..4, $5..8)
+ 63. bitwise_and_2_ints $1..2 &= $3..4
+ 64. bitwise_and_int $1 &= $2
+ 65. bitwise_and_int $0 &= $1
+ 66. copy_constant $1 = 0xBF800000 (-1.0)
+ 67. copy_slot_unmasked $2 = expectedNeg(0)
+ 68. cmpeq_float $1 = equal($1, $2)
+ 69. bitwise_and_int $0 &= $1
+ 70. copy_constant $1 = 0xBF800000 (-1.0)
+ 71. copy_constant $2 = 0xC0000000 (-2.0)
+ 72. copy_2_slots_unmasked $3..4 = expectedNeg(0..1)
+ 73. cmpeq_2_floats $1..2 = equal($1..2, $3..4)
+ 74. bitwise_and_int $1 &= $2
+ 75. bitwise_and_int $0 &= $1
+ 76. copy_constant $1 = 0x3F800000 (1.0)
+ 77. copy_constant $2 = 0x40000000 (2.0)
+ 78. copy_constant $3 = 0x40400000 (3.0)
+ 79. copy_3_slots_unmasked $4..6 = expectedPos(0..2)
+ 80. cmpeq_3_floats $1..3 = equal($1..3, $4..6)
+ 81. bitwise_and_int $2 &= $3
+ 82. bitwise_and_int $1 &= $2
+ 83. bitwise_and_int $0 &= $1
+ 84. copy_constant $1 = 0x3F800000 (1.0)
+ 85. copy_constant $2 = 0x40000000 (2.0)
+ 86. copy_constant $3 = 0x40400000 (3.0)
+ 87. copy_constant $4 = 0x40800000 (4.0)
+ 88. copy_4_slots_unmasked $5..8 = expectedPos
+ 89. cmpeq_4_floats $1..4 = equal($1..4, $5..8)
+ 90. bitwise_and_2_ints $1..2 &= $3..4
+ 91. bitwise_and_int $1 &= $2
+ 92. bitwise_and_int $0 &= $1
+ 93. swizzle_4 $0..3 = ($0..3).xxxx
+ 94. copy_4_constants $4..7 = colorRed
+ 95. copy_4_constants $8..11 = colorGreen
+ 96. mix_4_ints $0..3 = mix($4..7, $8..11, $0..3)
+ 97. load_src src.rgba = $0..3