Simplify do-while loops that do not use `continue`.
We can omit the continue mask entirely when we know that `continue`
isn't used. This saves a slot, and two ops per loop iteration.
A similar simplification was applied to for loops at
http://review.skia.org/634359 .
Change-Id: I6c56a189c4cfeb601d9f738678a7dba257c47b74
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/635180
Reviewed-by: Brian Osman <brianosman@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
diff --git a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
index 63e256f..cb96fd9 100644
--- a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
+++ b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
@@ -840,18 +840,26 @@
fBuilder.push_loop_mask();
// Acquire a temporary slot for continue-mask storage.
- SlotRange previousContinueMask = fCurrentContinueMask;
- fCurrentContinueMask = fProgramSlots.createTemporarySlot(*fProgram.fContext->fTypes.fUInt);
+ Analysis::LoopControlFlowInfo loopInfo = Analysis::GetLoopControlFlowInfo(*d.statement());
+ SlotRange previousContinueMask;
+ if (loopInfo.fHasContinue) {
+ previousContinueMask = fCurrentContinueMask;
+ fCurrentContinueMask = fProgramSlots.createTemporarySlot(*fProgram.fContext->fTypes.fUInt);
+ }
// Write the do-loop body.
int labelID = fBuilder.nextLabelID();
fBuilder.label(labelID);
- fBuilder.zero_slots_unmasked(fCurrentContinueMask);
+ if (loopInfo.fHasContinue) {
+ fBuilder.zero_slots_unmasked(fCurrentContinueMask);
+ }
if (!this->writeStatement(*d.statement())) {
return false;
}
- fBuilder.reenable_loop_mask(fCurrentContinueMask);
+ if (loopInfo.fHasContinue) {
+ fBuilder.reenable_loop_mask(fCurrentContinueMask);
+ }
// Emit the test-expression, in order to combine it with the loop mask.
if (!this->pushExpression(*d.test())) {
@@ -869,8 +877,10 @@
// Restore the loop and continue masks.
fBuilder.pop_loop_mask();
fBuilder.disableExecutionMaskWrites();
- fProgramSlots.recycleTemporarySlot(fCurrentContinueMask);
- fCurrentContinueMask = previousContinueMask;
+ if (loopInfo.fHasContinue) {
+ fProgramSlots.recycleTemporarySlot(fCurrentContinueMask);
+ fCurrentContinueMask = previousContinueMask;
+ }
return true;
}
diff --git a/tests/sksl/shared/DeadReturnES3.skrp b/tests/sksl/shared/DeadReturnES3.skrp
index 6f96766..6be269d 100644
--- a/tests/sksl/shared/DeadReturnES3.skrp
+++ b/tests/sksl/shared/DeadReturnES3.skrp
@@ -8,169 +8,163 @@
8. store_condition_mask $31 = CondMask
9. store_condition_mask $8 = CondMask
10. store_condition_mask $35 = CondMask
- 11. branch_if_no_active_lanes branch_if_no_active_lanes +15 (#26)
+ 11. branch_if_no_active_lanes branch_if_no_active_lanes +13 (#24)
12. store_return_mask $36 = RetMask
13. store_loop_mask $37 = LoopMask
- 14. zero_slot_unmasked [temporary 0] = 0
- 15. copy_constant $38 = 0xFFFFFFFF
- 16. copy_slot_masked [test_return].result = Mask($38)
- 17. mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask)
- 18. reenable_loop_mask LoopMask |= [temporary 0]
- 19. zero_slot_unmasked $38 = 0
- 20. merge_loop_mask LoopMask &= $38
- 21. stack_rewind
- 22. branch_if_any_active_lanes branch_if_any_active_lanes -8 (#14)
- 23. load_loop_mask LoopMask = $37
- 24. load_return_mask RetMask = $36
- 25. copy_slot_unmasked $36 = [test_return].result
- 26. zero_slot_unmasked $9 = 0
- 27. merge_condition_mask CondMask = $35 & $36
- 28. branch_if_no_active_lanes branch_if_no_active_lanes +13 (#41)
- 29. store_loop_mask $10 = LoopMask
- 30. zero_slot_unmasked [temporary 0] = 0
- 31. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
- 32. reenable_loop_mask LoopMask |= [temporary 0]
- 33. zero_slot_unmasked $11 = 0
- 34. merge_loop_mask LoopMask &= $11
- 35. stack_rewind
- 36. branch_if_any_active_lanes branch_if_any_active_lanes -6 (#30)
- 37. load_loop_mask LoopMask = $10
- 38. copy_constant $10 = 0xFFFFFFFF
- 39. copy_slot_masked [test_break].result = Mask($10)
- 40. copy_slot_masked $9 = Mask($10)
- 41. load_condition_mask CondMask = $35
- 42. zero_slot_unmasked $32 = 0
- 43. merge_condition_mask CondMask = $8 & $9
- 44. branch_if_no_active_lanes branch_if_no_active_lanes +15 (#59)
- 45. store_loop_mask $33 = LoopMask
- 46. zero_slot_unmasked [temporary 0] = 0
- 47. copy_constant $34 = 0xFFFFFFFF
- 48. copy_slot_masked [temporary 0] = Mask($34)
- 49. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
- 50. reenable_loop_mask LoopMask |= [temporary 0]
- 51. zero_slot_unmasked $34 = 0
- 52. merge_loop_mask LoopMask &= $34
- 53. stack_rewind
- 54. branch_if_any_active_lanes branch_if_any_active_lanes -8 (#46)
- 55. load_loop_mask LoopMask = $33
- 56. copy_constant $33 = 0xFFFFFFFF
- 57. copy_slot_masked [test_continue].result = Mask($33)
- 58. copy_slot_masked $32 = Mask($33)
- 59. load_condition_mask CondMask = $8
- 60. zero_slot_unmasked $40 = 0
- 61. merge_condition_mask CondMask = $31 & $32
- 62. branch_if_no_active_lanes branch_if_no_active_lanes +27 (#89)
- 63. store_return_mask $41 = RetMask
- 64. store_loop_mask $42 = LoopMask
- 65. zero_slot_unmasked [temporary 0] = 0
- 66. zero_slot_unmasked $43 = 0
- 67. copy_constant $44 = colorGreen(1)
- 68. cmplt_float $43 = lessThan($43, $44)
- 69. branch_if_no_active_lanes_eq branch +5 (#74) if no lanes of $43 == 0xFFFFFFFF
- 70. copy_constant $44 = 0xFFFFFFFF
- 71. copy_slot_masked [test_if_return].result = Mask($44)
- 72. mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask)
- 73. jump jump +2 (#75)
- 74. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
- 75. copy_constant $43 = 0xFFFFFFFF
- 76. copy_slot_masked [temporary 0] = Mask($43)
- 77. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
- 78. reenable_loop_mask LoopMask |= [temporary 0]
- 79. zero_slot_unmasked $43 = 0
- 80. merge_loop_mask LoopMask &= $43
- 81. stack_rewind
- 82. branch_if_any_active_lanes branch_if_any_active_lanes -17 (#65)
- 83. load_loop_mask LoopMask = $42
- 84. zero_slot_unmasked $42 = 0
- 85. copy_slot_masked [test_if_return].result = Mask($42)
- 86. load_return_mask RetMask = $41
- 87. copy_slot_unmasked $41 = [test_if_return].result
- 88. copy_slot_masked $40 = Mask($41)
- 89. load_condition_mask CondMask = $31
- 90. zero_slot_unmasked $18 = 0
- 91. merge_condition_mask CondMask = $39 & $40
- 92. branch_if_no_active_lanes branch_if_no_active_lanes +21 (#113)
- 93. store_loop_mask $19 = LoopMask
- 94. zero_slot_unmasked [temporary 0] = 0
- 95. zero_slot_unmasked $20 = 0
- 96. copy_constant $21 = colorGreen(1)
- 97. cmplt_float $20 = lessThan($20, $21)
- 98. branch_if_no_active_lanes_eq branch +3 (#101) if no lanes of $20 == 0xFFFFFFFF
+ 14. copy_constant $38 = 0xFFFFFFFF
+ 15. copy_slot_masked [test_return].result = Mask($38)
+ 16. mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask)
+ 17. zero_slot_unmasked $38 = 0
+ 18. merge_loop_mask LoopMask &= $38
+ 19. stack_rewind
+ 20. branch_if_any_active_lanes branch_if_any_active_lanes -6 (#14)
+ 21. load_loop_mask LoopMask = $37
+ 22. load_return_mask RetMask = $36
+ 23. copy_slot_unmasked $36 = [test_return].result
+ 24. zero_slot_unmasked $9 = 0
+ 25. merge_condition_mask CondMask = $35 & $36
+ 26. branch_if_no_active_lanes branch_if_no_active_lanes +11 (#37)
+ 27. store_loop_mask $10 = LoopMask
+ 28. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
+ 29. zero_slot_unmasked $11 = 0
+ 30. merge_loop_mask LoopMask &= $11
+ 31. stack_rewind
+ 32. branch_if_any_active_lanes branch_if_any_active_lanes -4 (#28)
+ 33. load_loop_mask LoopMask = $10
+ 34. copy_constant $10 = 0xFFFFFFFF
+ 35. copy_slot_masked [test_break].result = Mask($10)
+ 36. copy_slot_masked $9 = Mask($10)
+ 37. load_condition_mask CondMask = $35
+ 38. zero_slot_unmasked $32 = 0
+ 39. merge_condition_mask CondMask = $8 & $9
+ 40. branch_if_no_active_lanes branch_if_no_active_lanes +15 (#55)
+ 41. store_loop_mask $33 = LoopMask
+ 42. zero_slot_unmasked [temporary 0] = 0
+ 43. copy_constant $34 = 0xFFFFFFFF
+ 44. copy_slot_masked [temporary 0] = Mask($34)
+ 45. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
+ 46. reenable_loop_mask LoopMask |= [temporary 0]
+ 47. zero_slot_unmasked $34 = 0
+ 48. merge_loop_mask LoopMask &= $34
+ 49. stack_rewind
+ 50. branch_if_any_active_lanes branch_if_any_active_lanes -8 (#42)
+ 51. load_loop_mask LoopMask = $33
+ 52. copy_constant $33 = 0xFFFFFFFF
+ 53. copy_slot_masked [test_continue].result = Mask($33)
+ 54. copy_slot_masked $32 = Mask($33)
+ 55. load_condition_mask CondMask = $8
+ 56. zero_slot_unmasked $40 = 0
+ 57. merge_condition_mask CondMask = $31 & $32
+ 58. branch_if_no_active_lanes branch_if_no_active_lanes +27 (#85)
+ 59. store_return_mask $41 = RetMask
+ 60. store_loop_mask $42 = LoopMask
+ 61. zero_slot_unmasked [temporary 0] = 0
+ 62. zero_slot_unmasked $43 = 0
+ 63. copy_constant $44 = colorGreen(1)
+ 64. cmplt_float $43 = lessThan($43, $44)
+ 65. branch_if_no_active_lanes_eq branch +5 (#70) if no lanes of $43 == 0xFFFFFFFF
+ 66. copy_constant $44 = 0xFFFFFFFF
+ 67. copy_slot_masked [test_if_return].result = Mask($44)
+ 68. mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask)
+ 69. jump jump +2 (#71)
+ 70. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
+ 71. copy_constant $43 = 0xFFFFFFFF
+ 72. copy_slot_masked [temporary 0] = Mask($43)
+ 73. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
+ 74. reenable_loop_mask LoopMask |= [temporary 0]
+ 75. zero_slot_unmasked $43 = 0
+ 76. merge_loop_mask LoopMask &= $43
+ 77. stack_rewind
+ 78. branch_if_any_active_lanes branch_if_any_active_lanes -17 (#61)
+ 79. load_loop_mask LoopMask = $42
+ 80. zero_slot_unmasked $42 = 0
+ 81. copy_slot_masked [test_if_return].result = Mask($42)
+ 82. load_return_mask RetMask = $41
+ 83. copy_slot_unmasked $41 = [test_if_return].result
+ 84. copy_slot_masked $40 = Mask($41)
+ 85. load_condition_mask CondMask = $31
+ 86. zero_slot_unmasked $18 = 0
+ 87. merge_condition_mask CondMask = $39 & $40
+ 88. branch_if_no_active_lanes branch_if_no_active_lanes +21 (#109)
+ 89. store_loop_mask $19 = LoopMask
+ 90. zero_slot_unmasked [temporary 0] = 0
+ 91. zero_slot_unmasked $20 = 0
+ 92. copy_constant $21 = colorGreen(1)
+ 93. cmplt_float $20 = lessThan($20, $21)
+ 94. branch_if_no_active_lanes_eq branch +3 (#97) if no lanes of $20 == 0xFFFFFFFF
+ 95. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
+ 96. jump jump +4 (#100)
+ 97. copy_constant $21 = 0xFFFFFFFF
+ 98. copy_slot_masked [temporary 0] = Mask($21)
99. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
- 100. jump jump +4 (#104)
- 101. copy_constant $21 = 0xFFFFFFFF
- 102. copy_slot_masked [temporary 0] = Mask($21)
- 103. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
- 104. reenable_loop_mask LoopMask |= [temporary 0]
- 105. zero_slot_unmasked $20 = 0
- 106. merge_loop_mask LoopMask &= $20
- 107. stack_rewind
- 108. branch_if_any_active_lanes branch_if_any_active_lanes -14 (#94)
- 109. load_loop_mask LoopMask = $19
- 110. copy_constant $19 = 0xFFFFFFFF
- 111. copy_slot_masked [test_if_break].result = Mask($19)
- 112. copy_slot_masked $18 = Mask($19)
- 113. load_condition_mask CondMask = $39
- 114. zero_slot_unmasked $26 = 0
- 115. merge_condition_mask CondMask = $17 & $18
- 116. branch_if_no_active_lanes branch_if_no_active_lanes +24 (#140)
- 117. store_return_mask $27 = RetMask
- 118. store_loop_mask $28 = LoopMask
- 119. zero_slot_unmasked [temporary 0] = 0
- 120. copy_constant $29 = colorGreen(1)
- 121. zero_slot_unmasked $30 = 0
- 122. cmpeq_float $29 = equal($29, $30)
- 123. branch_if_no_active_lanes_eq branch +5 (#128) if no lanes of $29 == 0xFFFFFFFF
- 124. zero_slot_unmasked $30 = 0
- 125. copy_slot_masked [test_else].result = Mask($30)
- 126. mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask)
- 127. jump jump +4 (#131)
- 128. copy_constant $30 = 0xFFFFFFFF
- 129. copy_slot_masked [test_else].result = Mask($30)
- 130. mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask)
- 131. reenable_loop_mask LoopMask |= [temporary 0]
- 132. zero_slot_unmasked $29 = 0
- 133. merge_loop_mask LoopMask &= $29
- 134. stack_rewind
- 135. branch_if_any_active_lanes branch_if_any_active_lanes -16 (#119)
- 136. load_loop_mask LoopMask = $28
- 137. load_return_mask RetMask = $27
- 138. copy_slot_unmasked $27 = [test_else].result
- 139. copy_slot_masked $26 = Mask($27)
- 140. load_condition_mask CondMask = $17
- 141. zero_slot_unmasked $23 = 0
- 142. merge_condition_mask CondMask = $25 & $26
- 143. branch_if_no_active_lanes branch_if_no_active_lanes +4 (#147)
- 144. copy_constant $24 = 0xFFFFFFFF
- 145. copy_slot_masked [test_loop_return].result = Mask($24)
- 146. copy_slot_masked $23 = Mask($24)
- 147. load_condition_mask CondMask = $25
- 148. zero_slot_unmasked $13 = 0
- 149. merge_condition_mask CondMask = $22 & $23
- 150. branch_if_no_active_lanes branch_if_no_active_lanes +19 (#169)
- 151. zero_slot_unmasked x = 0
- 152. store_loop_mask $14 = LoopMask
- 153. jump jump +6 (#159)
- 154. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
- 155. copy_slot_unmasked $15 = x
- 156. copy_constant $16 = 0x00000001 (1.401298e-45)
- 157. add_int $15 += $16
- 158. copy_slot_masked x = Mask($15)
- 159. copy_slot_unmasked $15 = x
- 160. copy_constant $16 = 0x00000001 (1.401298e-45)
- 161. cmple_int $15 = lessThanEqual($15, $16)
- 162. merge_loop_mask LoopMask &= $15
- 163. stack_rewind
- 164. branch_if_any_active_lanes branch_if_any_active_lanes -10 (#154)
- 165. load_loop_mask LoopMask = $14
- 166. copy_constant $14 = 0xFFFFFFFF
- 167. copy_slot_masked [test_loop_break].result = Mask($14)
- 168. copy_slot_masked $13 = Mask($14)
- 169. load_condition_mask CondMask = $22
- 170. copy_4_constants $0..3 = colorRed
- 171. merge_condition_mask CondMask = $12 & $13
- 172. copy_4_constants $4..7 = colorGreen
- 173. copy_4_slots_masked $0..3 = Mask($4..7)
- 174. load_condition_mask CondMask = $12
- 175. copy_4_slots_unmasked [main].result = $0..3
- 176. load_src src.rgba = [main].result
+ 100. reenable_loop_mask LoopMask |= [temporary 0]
+ 101. zero_slot_unmasked $20 = 0
+ 102. merge_loop_mask LoopMask &= $20
+ 103. stack_rewind
+ 104. branch_if_any_active_lanes branch_if_any_active_lanes -14 (#90)
+ 105. load_loop_mask LoopMask = $19
+ 106. copy_constant $19 = 0xFFFFFFFF
+ 107. copy_slot_masked [test_if_break].result = Mask($19)
+ 108. copy_slot_masked $18 = Mask($19)
+ 109. load_condition_mask CondMask = $39
+ 110. zero_slot_unmasked $26 = 0
+ 111. merge_condition_mask CondMask = $17 & $18
+ 112. branch_if_no_active_lanes branch_if_no_active_lanes +22 (#134)
+ 113. store_return_mask $27 = RetMask
+ 114. store_loop_mask $28 = LoopMask
+ 115. copy_constant $29 = colorGreen(1)
+ 116. zero_slot_unmasked $30 = 0
+ 117. cmpeq_float $29 = equal($29, $30)
+ 118. branch_if_no_active_lanes_eq branch +5 (#123) if no lanes of $29 == 0xFFFFFFFF
+ 119. zero_slot_unmasked $30 = 0
+ 120. copy_slot_masked [test_else].result = Mask($30)
+ 121. mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask)
+ 122. jump jump +4 (#126)
+ 123. copy_constant $30 = 0xFFFFFFFF
+ 124. copy_slot_masked [test_else].result = Mask($30)
+ 125. mask_off_return_mask RetMask &= ~(CondMask & LoopMask & RetMask)
+ 126. zero_slot_unmasked $29 = 0
+ 127. merge_loop_mask LoopMask &= $29
+ 128. stack_rewind
+ 129. branch_if_any_active_lanes branch_if_any_active_lanes -14 (#115)
+ 130. load_loop_mask LoopMask = $28
+ 131. load_return_mask RetMask = $27
+ 132. copy_slot_unmasked $27 = [test_else].result
+ 133. copy_slot_masked $26 = Mask($27)
+ 134. load_condition_mask CondMask = $17
+ 135. zero_slot_unmasked $23 = 0
+ 136. merge_condition_mask CondMask = $25 & $26
+ 137. branch_if_no_active_lanes branch_if_no_active_lanes +4 (#141)
+ 138. copy_constant $24 = 0xFFFFFFFF
+ 139. copy_slot_masked [test_loop_return].result = Mask($24)
+ 140. copy_slot_masked $23 = Mask($24)
+ 141. load_condition_mask CondMask = $25
+ 142. zero_slot_unmasked $13 = 0
+ 143. merge_condition_mask CondMask = $22 & $23
+ 144. branch_if_no_active_lanes branch_if_no_active_lanes +19 (#163)
+ 145. zero_slot_unmasked x = 0
+ 146. store_loop_mask $14 = LoopMask
+ 147. jump jump +6 (#153)
+ 148. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
+ 149. copy_slot_unmasked $15 = x
+ 150. copy_constant $16 = 0x00000001 (1.401298e-45)
+ 151. add_int $15 += $16
+ 152. copy_slot_masked x = Mask($15)
+ 153. copy_slot_unmasked $15 = x
+ 154. copy_constant $16 = 0x00000001 (1.401298e-45)
+ 155. cmple_int $15 = lessThanEqual($15, $16)
+ 156. merge_loop_mask LoopMask &= $15
+ 157. stack_rewind
+ 158. branch_if_any_active_lanes branch_if_any_active_lanes -10 (#148)
+ 159. load_loop_mask LoopMask = $14
+ 160. copy_constant $14 = 0xFFFFFFFF
+ 161. copy_slot_masked [test_loop_break].result = Mask($14)
+ 162. copy_slot_masked $13 = Mask($14)
+ 163. load_condition_mask CondMask = $22
+ 164. copy_4_constants $0..3 = colorRed
+ 165. merge_condition_mask CondMask = $12 & $13
+ 166. copy_4_constants $4..7 = colorGreen
+ 167. copy_4_slots_masked $0..3 = Mask($4..7)
+ 168. load_condition_mask CondMask = $12
+ 169. copy_4_slots_unmasked [main].result = $0..3
+ 170. load_src src.rgba = [main].result
diff --git a/tests/sksl/shared/DoWhileControlFlow.skrp b/tests/sksl/shared/DoWhileControlFlow.skrp
index 4a3a77f..90b6be8 100644
--- a/tests/sksl/shared/DoWhileControlFlow.skrp
+++ b/tests/sksl/shared/DoWhileControlFlow.skrp
@@ -5,51 +5,49 @@
5. copy_constant x(2) = 0x3F800000 (1.0)
6. copy_constant x(3) = 0x3F800000 (1.0)
7. store_loop_mask $0 = LoopMask
- 8. zero_slot_unmasked [temporary 0] = 0
- 9. copy_slot_unmasked $1 = x(0)
- 10. copy_constant $2 = 0x3E800000 (0.25)
- 11. sub_float $1 -= $2
- 12. copy_slot_masked x(0) = Mask($1)
- 13. store_condition_mask $1 = CondMask
- 14. copy_slot_unmasked $2 = x(0)
- 15. zero_slot_unmasked $3 = 0
- 16. cmple_float $2 = lessThanEqual($2, $3)
- 17. merge_condition_mask CondMask = $1 & $2
- 18. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
- 19. load_condition_mask CondMask = $1
- 20. reenable_loop_mask LoopMask |= [temporary 0]
- 21. copy_slot_unmasked $1 = x(3)
- 22. copy_constant $2 = 0x3F800000 (1.0)
- 23. cmpeq_float $1 = equal($1, $2)
- 24. merge_loop_mask LoopMask &= $1
- 25. stack_rewind
- 26. branch_if_any_active_lanes branch_if_any_active_lanes -18 (#8)
- 27. load_loop_mask LoopMask = $0
- 28. store_loop_mask $0 = LoopMask
- 29. zero_slot_unmasked [temporary 0] = 0
- 30. copy_slot_unmasked $1 = x(2)
- 31. copy_constant $2 = 0x3E800000 (0.25)
- 32. sub_float $1 -= $2
- 33. copy_slot_masked x(2) = Mask($1)
- 34. store_condition_mask $1 = CondMask
- 35. copy_slot_unmasked $2 = x(3)
- 36. copy_constant $3 = 0x3F800000 (1.0)
- 37. cmpeq_float $2 = equal($2, $3)
- 38. merge_condition_mask CondMask = $1 & $2
- 39. copy_constant $3 = 0xFFFFFFFF
- 40. copy_slot_masked [temporary 0] = Mask($3)
- 41. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
- 42. load_condition_mask CondMask = $1
- 43. zero_slot_unmasked $1 = 0
- 44. copy_slot_masked x(1) = Mask($1)
- 45. reenable_loop_mask LoopMask |= [temporary 0]
- 46. zero_slot_unmasked $1 = 0
- 47. copy_slot_unmasked $2 = x(2)
- 48. cmplt_float $1 = lessThan($1, $2)
- 49. merge_loop_mask LoopMask &= $1
- 50. stack_rewind
- 51. branch_if_any_active_lanes branch_if_any_active_lanes -22 (#29)
- 52. load_loop_mask LoopMask = $0
- 53. copy_4_slots_unmasked $0..3 = x
- 54. copy_4_slots_unmasked [main].result = $0..3
- 55. load_src src.rgba = [main].result
+ 8. copy_slot_unmasked $1 = x(0)
+ 9. copy_constant $2 = 0x3E800000 (0.25)
+ 10. sub_float $1 -= $2
+ 11. copy_slot_masked x(0) = Mask($1)
+ 12. store_condition_mask $1 = CondMask
+ 13. copy_slot_unmasked $2 = x(0)
+ 14. zero_slot_unmasked $3 = 0
+ 15. cmple_float $2 = lessThanEqual($2, $3)
+ 16. merge_condition_mask CondMask = $1 & $2
+ 17. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
+ 18. load_condition_mask CondMask = $1
+ 19. copy_slot_unmasked $1 = x(3)
+ 20. copy_constant $2 = 0x3F800000 (1.0)
+ 21. cmpeq_float $1 = equal($1, $2)
+ 22. merge_loop_mask LoopMask &= $1
+ 23. stack_rewind
+ 24. branch_if_any_active_lanes branch_if_any_active_lanes -16 (#8)
+ 25. load_loop_mask LoopMask = $0
+ 26. store_loop_mask $0 = LoopMask
+ 27. zero_slot_unmasked [temporary 0] = 0
+ 28. copy_slot_unmasked $1 = x(2)
+ 29. copy_constant $2 = 0x3E800000 (0.25)
+ 30. sub_float $1 -= $2
+ 31. copy_slot_masked x(2) = Mask($1)
+ 32. store_condition_mask $1 = CondMask
+ 33. copy_slot_unmasked $2 = x(3)
+ 34. copy_constant $3 = 0x3F800000 (1.0)
+ 35. cmpeq_float $2 = equal($2, $3)
+ 36. merge_condition_mask CondMask = $1 & $2
+ 37. copy_constant $3 = 0xFFFFFFFF
+ 38. copy_slot_masked [temporary 0] = Mask($3)
+ 39. mask_off_loop_mask LoopMask &= ~(CondMask & LoopMask & RetMask)
+ 40. load_condition_mask CondMask = $1
+ 41. zero_slot_unmasked $1 = 0
+ 42. copy_slot_masked x(1) = Mask($1)
+ 43. reenable_loop_mask LoopMask |= [temporary 0]
+ 44. zero_slot_unmasked $1 = 0
+ 45. copy_slot_unmasked $2 = x(2)
+ 46. cmplt_float $1 = lessThan($1, $2)
+ 47. merge_loop_mask LoopMask &= $1
+ 48. stack_rewind
+ 49. branch_if_any_active_lanes branch_if_any_active_lanes -22 (#27)
+ 50. load_loop_mask LoopMask = $0
+ 51. copy_4_slots_unmasked $0..3 = x
+ 52. copy_4_slots_unmasked [main].result = $0..3
+ 53. load_src src.rgba = [main].result
diff --git a/tests/sksl/shared/EmptyBlocksES3.skrp b/tests/sksl/shared/EmptyBlocksES3.skrp
index 5555ae3..6f2edbb 100644
--- a/tests/sksl/shared/EmptyBlocksES3.skrp
+++ b/tests/sksl/shared/EmptyBlocksES3.skrp
@@ -24,15 +24,13 @@
24. branch_if_any_active_lanes branch_if_any_active_lanes -5 (#19)
25. load_loop_mask LoopMask = $0
26. store_loop_mask $0 = LoopMask
- 27. zero_slot_unmasked [temporary 0] = 0
- 28. reenable_loop_mask LoopMask |= [temporary 0]
- 29. copy_constant $1 = colorWhite(0)
- 30. copy_constant $2 = 0x40000000 (2.0)
- 31. cmpeq_float $1 = equal($1, $2)
- 32. merge_loop_mask LoopMask &= $1
- 33. stack_rewind
- 34. branch_if_any_active_lanes branch_if_any_active_lanes -7 (#27)
- 35. load_loop_mask LoopMask = $0
- 36. copy_4_slots_unmasked $0..3 = color
- 37. copy_4_slots_unmasked [main].result = $0..3
- 38. load_src src.rgba = [main].result
+ 27. copy_constant $1 = colorWhite(0)
+ 28. copy_constant $2 = 0x40000000 (2.0)
+ 29. cmpeq_float $1 = equal($1, $2)
+ 30. merge_loop_mask LoopMask &= $1
+ 31. stack_rewind
+ 32. branch_if_any_active_lanes branch_if_any_active_lanes -5 (#27)
+ 33. load_loop_mask LoopMask = $0
+ 34. copy_4_slots_unmasked $0..3 = color
+ 35. copy_4_slots_unmasked [main].result = $0..3
+ 36. load_src src.rgba = [main].result