Simplify do-while loops that do not use `continue`.

We can omit the continue mask entirely when we know that `continue`
isn't used. This saves a slot, and two ops per loop iteration.

A similar simplification was applied to for loops at
http://review.skia.org/634359 .

Change-Id: I6c56a189c4cfeb601d9f738678a7dba257c47b74
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/635180
Reviewed-by: Brian Osman <brianosman@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
diff --git a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
index 63e256f..cb96fd9 100644
--- a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
+++ b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
@@ -840,18 +840,26 @@
     fBuilder.push_loop_mask();
 
     // Acquire a temporary slot for continue-mask storage.
-    SlotRange previousContinueMask = fCurrentContinueMask;
-    fCurrentContinueMask = fProgramSlots.createTemporarySlot(*fProgram.fContext->fTypes.fUInt);
+    Analysis::LoopControlFlowInfo loopInfo = Analysis::GetLoopControlFlowInfo(*d.statement());
+    SlotRange previousContinueMask;
+    if (loopInfo.fHasContinue) {
+        previousContinueMask = fCurrentContinueMask;
+        fCurrentContinueMask = fProgramSlots.createTemporarySlot(*fProgram.fContext->fTypes.fUInt);
+    }
 
     // Write the do-loop body.
     int labelID = fBuilder.nextLabelID();
     fBuilder.label(labelID);
 
-    fBuilder.zero_slots_unmasked(fCurrentContinueMask);
+    if (loopInfo.fHasContinue) {
+        fBuilder.zero_slots_unmasked(fCurrentContinueMask);
+    }
     if (!this->writeStatement(*d.statement())) {
         return false;
     }
-    fBuilder.reenable_loop_mask(fCurrentContinueMask);
+    if (loopInfo.fHasContinue) {
+        fBuilder.reenable_loop_mask(fCurrentContinueMask);
+    }
 
     // Emit the test-expression, in order to combine it with the loop mask.
     if (!this->pushExpression(*d.test())) {
@@ -869,8 +877,10 @@
     // Restore the loop and continue masks.
     fBuilder.pop_loop_mask();
     fBuilder.disableExecutionMaskWrites();
-    fProgramSlots.recycleTemporarySlot(fCurrentContinueMask);
-    fCurrentContinueMask = previousContinueMask;
+    if (loopInfo.fHasContinue) {
+        fProgramSlots.recycleTemporarySlot(fCurrentContinueMask);
+        fCurrentContinueMask = previousContinueMask;
+    }
 
     return true;
 }
diff --git a/tests/sksl/shared/DeadReturnES3.skrp b/tests/sksl/shared/DeadReturnES3.skrp
index 6f96766..6be269d 100644
--- a/tests/sksl/shared/DeadReturnES3.skrp
+++ b/tests/sksl/shared/DeadReturnES3.skrp
@@ -8,169 +8,163 @@
     8. store_condition_mask           $31 = CondMask
     9. store_condition_mask           $8 = CondMask
    10. store_condition_mask           $35 = CondMask
-   11. branch_if_no_active_lanes      branch_if_no_active_lanes +15 (#26)
+   11. branch_if_no_active_lanes      branch_if_no_active_lanes +13 (#24)
    12. store_return_mask              $36 = RetMask
    13. store_loop_mask                $37 = LoopMask
-   14. zero_slot_unmasked             [temporary 0] = 0
-   15. copy_constant                  $38 = 0xFFFFFFFF
-   16. copy_slot_masked               [test_return].result = Mask($38)
-   17. mask_off_return_mask           RetMask &= ~(CondMask & LoopMask & RetMask)
-   18. reenable_loop_mask             LoopMask |= [temporary 0]
-   19. zero_slot_unmasked             $38 = 0
-   20. merge_loop_mask                LoopMask &= $38
-   21. stack_rewind
-   22. branch_if_any_active_lanes     branch_if_any_active_lanes -8 (#14)
-   23. load_loop_mask                 LoopMask = $37
-   24. load_return_mask               RetMask = $36
-   25. copy_slot_unmasked             $36 = [test_return].result
-   26. zero_slot_unmasked             $9 = 0
-   27. merge_condition_mask           CondMask = $35 & $36
-   28. branch_if_no_active_lanes      branch_if_no_active_lanes +13 (#41)
-   29. store_loop_mask                $10 = LoopMask
-   30. zero_slot_unmasked             [temporary 0] = 0
-   31. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-   32. reenable_loop_mask             LoopMask |= [temporary 0]
-   33. zero_slot_unmasked             $11 = 0
-   34. merge_loop_mask                LoopMask &= $11
-   35. stack_rewind
-   36. branch_if_any_active_lanes     branch_if_any_active_lanes -6 (#30)
-   37. load_loop_mask                 LoopMask = $10
-   38. copy_constant                  $10 = 0xFFFFFFFF
-   39. copy_slot_masked               [test_break].result = Mask($10)
-   40. copy_slot_masked               $9 = Mask($10)
-   41. load_condition_mask            CondMask = $35
-   42. zero_slot_unmasked             $32 = 0
-   43. merge_condition_mask           CondMask = $8 & $9
-   44. branch_if_no_active_lanes      branch_if_no_active_lanes +15 (#59)
-   45. store_loop_mask                $33 = LoopMask
-   46. zero_slot_unmasked             [temporary 0] = 0
-   47. copy_constant                  $34 = 0xFFFFFFFF
-   48. copy_slot_masked               [temporary 0] = Mask($34)
-   49. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-   50. reenable_loop_mask             LoopMask |= [temporary 0]
-   51. zero_slot_unmasked             $34 = 0
-   52. merge_loop_mask                LoopMask &= $34
-   53. stack_rewind
-   54. branch_if_any_active_lanes     branch_if_any_active_lanes -8 (#46)
-   55. load_loop_mask                 LoopMask = $33
-   56. copy_constant                  $33 = 0xFFFFFFFF
-   57. copy_slot_masked               [test_continue].result = Mask($33)
-   58. copy_slot_masked               $32 = Mask($33)
-   59. load_condition_mask            CondMask = $8
-   60. zero_slot_unmasked             $40 = 0
-   61. merge_condition_mask           CondMask = $31 & $32
-   62. branch_if_no_active_lanes      branch_if_no_active_lanes +27 (#89)
-   63. store_return_mask              $41 = RetMask
-   64. store_loop_mask                $42 = LoopMask
-   65. zero_slot_unmasked             [temporary 0] = 0
-   66. zero_slot_unmasked             $43 = 0
-   67. copy_constant                  $44 = colorGreen(1)
-   68. cmplt_float                    $43 = lessThan($43, $44)
-   69. branch_if_no_active_lanes_eq   branch +5 (#74) if no lanes of $43 == 0xFFFFFFFF
-   70. copy_constant                  $44 = 0xFFFFFFFF
-   71. copy_slot_masked               [test_if_return].result = Mask($44)
-   72. mask_off_return_mask           RetMask &= ~(CondMask & LoopMask & RetMask)
-   73. jump                           jump +2 (#75)
-   74. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-   75. copy_constant                  $43 = 0xFFFFFFFF
-   76. copy_slot_masked               [temporary 0] = Mask($43)
-   77. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-   78. reenable_loop_mask             LoopMask |= [temporary 0]
-   79. zero_slot_unmasked             $43 = 0
-   80. merge_loop_mask                LoopMask &= $43
-   81. stack_rewind
-   82. branch_if_any_active_lanes     branch_if_any_active_lanes -17 (#65)
-   83. load_loop_mask                 LoopMask = $42
-   84. zero_slot_unmasked             $42 = 0
-   85. copy_slot_masked               [test_if_return].result = Mask($42)
-   86. load_return_mask               RetMask = $41
-   87. copy_slot_unmasked             $41 = [test_if_return].result
-   88. copy_slot_masked               $40 = Mask($41)
-   89. load_condition_mask            CondMask = $31
-   90. zero_slot_unmasked             $18 = 0
-   91. merge_condition_mask           CondMask = $39 & $40
-   92. branch_if_no_active_lanes      branch_if_no_active_lanes +21 (#113)
-   93. store_loop_mask                $19 = LoopMask
-   94. zero_slot_unmasked             [temporary 0] = 0
-   95. zero_slot_unmasked             $20 = 0
-   96. copy_constant                  $21 = colorGreen(1)
-   97. cmplt_float                    $20 = lessThan($20, $21)
-   98. branch_if_no_active_lanes_eq   branch +3 (#101) if no lanes of $20 == 0xFFFFFFFF
+   14. copy_constant                  $38 = 0xFFFFFFFF
+   15. copy_slot_masked               [test_return].result = Mask($38)
+   16. mask_off_return_mask           RetMask &= ~(CondMask & LoopMask & RetMask)
+   17. zero_slot_unmasked             $38 = 0
+   18. merge_loop_mask                LoopMask &= $38
+   19. stack_rewind
+   20. branch_if_any_active_lanes     branch_if_any_active_lanes -6 (#14)
+   21. load_loop_mask                 LoopMask = $37
+   22. load_return_mask               RetMask = $36
+   23. copy_slot_unmasked             $36 = [test_return].result
+   24. zero_slot_unmasked             $9 = 0
+   25. merge_condition_mask           CondMask = $35 & $36
+   26. branch_if_no_active_lanes      branch_if_no_active_lanes +11 (#37)
+   27. store_loop_mask                $10 = LoopMask
+   28. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+   29. zero_slot_unmasked             $11 = 0
+   30. merge_loop_mask                LoopMask &= $11
+   31. stack_rewind
+   32. branch_if_any_active_lanes     branch_if_any_active_lanes -4 (#28)
+   33. load_loop_mask                 LoopMask = $10
+   34. copy_constant                  $10 = 0xFFFFFFFF
+   35. copy_slot_masked               [test_break].result = Mask($10)
+   36. copy_slot_masked               $9 = Mask($10)
+   37. load_condition_mask            CondMask = $35
+   38. zero_slot_unmasked             $32 = 0
+   39. merge_condition_mask           CondMask = $8 & $9
+   40. branch_if_no_active_lanes      branch_if_no_active_lanes +15 (#55)
+   41. store_loop_mask                $33 = LoopMask
+   42. zero_slot_unmasked             [temporary 0] = 0
+   43. copy_constant                  $34 = 0xFFFFFFFF
+   44. copy_slot_masked               [temporary 0] = Mask($34)
+   45. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+   46. reenable_loop_mask             LoopMask |= [temporary 0]
+   47. zero_slot_unmasked             $34 = 0
+   48. merge_loop_mask                LoopMask &= $34
+   49. stack_rewind
+   50. branch_if_any_active_lanes     branch_if_any_active_lanes -8 (#42)
+   51. load_loop_mask                 LoopMask = $33
+   52. copy_constant                  $33 = 0xFFFFFFFF
+   53. copy_slot_masked               [test_continue].result = Mask($33)
+   54. copy_slot_masked               $32 = Mask($33)
+   55. load_condition_mask            CondMask = $8
+   56. zero_slot_unmasked             $40 = 0
+   57. merge_condition_mask           CondMask = $31 & $32
+   58. branch_if_no_active_lanes      branch_if_no_active_lanes +27 (#85)
+   59. store_return_mask              $41 = RetMask
+   60. store_loop_mask                $42 = LoopMask
+   61. zero_slot_unmasked             [temporary 0] = 0
+   62. zero_slot_unmasked             $43 = 0
+   63. copy_constant                  $44 = colorGreen(1)
+   64. cmplt_float                    $43 = lessThan($43, $44)
+   65. branch_if_no_active_lanes_eq   branch +5 (#70) if no lanes of $43 == 0xFFFFFFFF
+   66. copy_constant                  $44 = 0xFFFFFFFF
+   67. copy_slot_masked               [test_if_return].result = Mask($44)
+   68. mask_off_return_mask           RetMask &= ~(CondMask & LoopMask & RetMask)
+   69. jump                           jump +2 (#71)
+   70. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+   71. copy_constant                  $43 = 0xFFFFFFFF
+   72. copy_slot_masked               [temporary 0] = Mask($43)
+   73. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+   74. reenable_loop_mask             LoopMask |= [temporary 0]
+   75. zero_slot_unmasked             $43 = 0
+   76. merge_loop_mask                LoopMask &= $43
+   77. stack_rewind
+   78. branch_if_any_active_lanes     branch_if_any_active_lanes -17 (#61)
+   79. load_loop_mask                 LoopMask = $42
+   80. zero_slot_unmasked             $42 = 0
+   81. copy_slot_masked               [test_if_return].result = Mask($42)
+   82. load_return_mask               RetMask = $41
+   83. copy_slot_unmasked             $41 = [test_if_return].result
+   84. copy_slot_masked               $40 = Mask($41)
+   85. load_condition_mask            CondMask = $31
+   86. zero_slot_unmasked             $18 = 0
+   87. merge_condition_mask           CondMask = $39 & $40
+   88. branch_if_no_active_lanes      branch_if_no_active_lanes +21 (#109)
+   89. store_loop_mask                $19 = LoopMask
+   90. zero_slot_unmasked             [temporary 0] = 0
+   91. zero_slot_unmasked             $20 = 0
+   92. copy_constant                  $21 = colorGreen(1)
+   93. cmplt_float                    $20 = lessThan($20, $21)
+   94. branch_if_no_active_lanes_eq   branch +3 (#97) if no lanes of $20 == 0xFFFFFFFF
+   95. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+   96. jump                           jump +4 (#100)
+   97. copy_constant                  $21 = 0xFFFFFFFF
+   98. copy_slot_masked               [temporary 0] = Mask($21)
    99. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  100. jump                           jump +4 (#104)
-  101. copy_constant                  $21 = 0xFFFFFFFF
-  102. copy_slot_masked               [temporary 0] = Mask($21)
-  103. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  104. reenable_loop_mask             LoopMask |= [temporary 0]
-  105. zero_slot_unmasked             $20 = 0
-  106. merge_loop_mask                LoopMask &= $20
-  107. stack_rewind
-  108. branch_if_any_active_lanes     branch_if_any_active_lanes -14 (#94)
-  109. load_loop_mask                 LoopMask = $19
-  110. copy_constant                  $19 = 0xFFFFFFFF
-  111. copy_slot_masked               [test_if_break].result = Mask($19)
-  112. copy_slot_masked               $18 = Mask($19)
-  113. load_condition_mask            CondMask = $39
-  114. zero_slot_unmasked             $26 = 0
-  115. merge_condition_mask           CondMask = $17 & $18
-  116. branch_if_no_active_lanes      branch_if_no_active_lanes +24 (#140)
-  117. store_return_mask              $27 = RetMask
-  118. store_loop_mask                $28 = LoopMask
-  119. zero_slot_unmasked             [temporary 0] = 0
-  120. copy_constant                  $29 = colorGreen(1)
-  121. zero_slot_unmasked             $30 = 0
-  122. cmpeq_float                    $29 = equal($29, $30)
-  123. branch_if_no_active_lanes_eq   branch +5 (#128) if no lanes of $29 == 0xFFFFFFFF
-  124. zero_slot_unmasked             $30 = 0
-  125. copy_slot_masked               [test_else].result = Mask($30)
-  126. mask_off_return_mask           RetMask &= ~(CondMask & LoopMask & RetMask)
-  127. jump                           jump +4 (#131)
-  128. copy_constant                  $30 = 0xFFFFFFFF
-  129. copy_slot_masked               [test_else].result = Mask($30)
-  130. mask_off_return_mask           RetMask &= ~(CondMask & LoopMask & RetMask)
-  131. reenable_loop_mask             LoopMask |= [temporary 0]
-  132. zero_slot_unmasked             $29 = 0
-  133. merge_loop_mask                LoopMask &= $29
-  134. stack_rewind
-  135. branch_if_any_active_lanes     branch_if_any_active_lanes -16 (#119)
-  136. load_loop_mask                 LoopMask = $28
-  137. load_return_mask               RetMask = $27
-  138. copy_slot_unmasked             $27 = [test_else].result
-  139. copy_slot_masked               $26 = Mask($27)
-  140. load_condition_mask            CondMask = $17
-  141. zero_slot_unmasked             $23 = 0
-  142. merge_condition_mask           CondMask = $25 & $26
-  143. branch_if_no_active_lanes      branch_if_no_active_lanes +4 (#147)
-  144. copy_constant                  $24 = 0xFFFFFFFF
-  145. copy_slot_masked               [test_loop_return].result = Mask($24)
-  146. copy_slot_masked               $23 = Mask($24)
-  147. load_condition_mask            CondMask = $25
-  148. zero_slot_unmasked             $13 = 0
-  149. merge_condition_mask           CondMask = $22 & $23
-  150. branch_if_no_active_lanes      branch_if_no_active_lanes +19 (#169)
-  151. zero_slot_unmasked             x = 0
-  152. store_loop_mask                $14 = LoopMask
-  153. jump                           jump +6 (#159)
-  154. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  155. copy_slot_unmasked             $15 = x
-  156. copy_constant                  $16 = 0x00000001 (1.401298e-45)
-  157. add_int                        $15 += $16
-  158. copy_slot_masked               x = Mask($15)
-  159. copy_slot_unmasked             $15 = x
-  160. copy_constant                  $16 = 0x00000001 (1.401298e-45)
-  161. cmple_int                      $15 = lessThanEqual($15, $16)
-  162. merge_loop_mask                LoopMask &= $15
-  163. stack_rewind
-  164. branch_if_any_active_lanes     branch_if_any_active_lanes -10 (#154)
-  165. load_loop_mask                 LoopMask = $14
-  166. copy_constant                  $14 = 0xFFFFFFFF
-  167. copy_slot_masked               [test_loop_break].result = Mask($14)
-  168. copy_slot_masked               $13 = Mask($14)
-  169. load_condition_mask            CondMask = $22
-  170. copy_4_constants               $0..3 = colorRed
-  171. merge_condition_mask           CondMask = $12 & $13
-  172. copy_4_constants               $4..7 = colorGreen
-  173. copy_4_slots_masked            $0..3 = Mask($4..7)
-  174. load_condition_mask            CondMask = $12
-  175. copy_4_slots_unmasked          [main].result = $0..3
-  176. load_src                       src.rgba = [main].result
+  100. reenable_loop_mask             LoopMask |= [temporary 0]
+  101. zero_slot_unmasked             $20 = 0
+  102. merge_loop_mask                LoopMask &= $20
+  103. stack_rewind
+  104. branch_if_any_active_lanes     branch_if_any_active_lanes -14 (#90)
+  105. load_loop_mask                 LoopMask = $19
+  106. copy_constant                  $19 = 0xFFFFFFFF
+  107. copy_slot_masked               [test_if_break].result = Mask($19)
+  108. copy_slot_masked               $18 = Mask($19)
+  109. load_condition_mask            CondMask = $39
+  110. zero_slot_unmasked             $26 = 0
+  111. merge_condition_mask           CondMask = $17 & $18
+  112. branch_if_no_active_lanes      branch_if_no_active_lanes +22 (#134)
+  113. store_return_mask              $27 = RetMask
+  114. store_loop_mask                $28 = LoopMask
+  115. copy_constant                  $29 = colorGreen(1)
+  116. zero_slot_unmasked             $30 = 0
+  117. cmpeq_float                    $29 = equal($29, $30)
+  118. branch_if_no_active_lanes_eq   branch +5 (#123) if no lanes of $29 == 0xFFFFFFFF
+  119. zero_slot_unmasked             $30 = 0
+  120. copy_slot_masked               [test_else].result = Mask($30)
+  121. mask_off_return_mask           RetMask &= ~(CondMask & LoopMask & RetMask)
+  122. jump                           jump +4 (#126)
+  123. copy_constant                  $30 = 0xFFFFFFFF
+  124. copy_slot_masked               [test_else].result = Mask($30)
+  125. mask_off_return_mask           RetMask &= ~(CondMask & LoopMask & RetMask)
+  126. zero_slot_unmasked             $29 = 0
+  127. merge_loop_mask                LoopMask &= $29
+  128. stack_rewind
+  129. branch_if_any_active_lanes     branch_if_any_active_lanes -14 (#115)
+  130. load_loop_mask                 LoopMask = $28
+  131. load_return_mask               RetMask = $27
+  132. copy_slot_unmasked             $27 = [test_else].result
+  133. copy_slot_masked               $26 = Mask($27)
+  134. load_condition_mask            CondMask = $17
+  135. zero_slot_unmasked             $23 = 0
+  136. merge_condition_mask           CondMask = $25 & $26
+  137. branch_if_no_active_lanes      branch_if_no_active_lanes +4 (#141)
+  138. copy_constant                  $24 = 0xFFFFFFFF
+  139. copy_slot_masked               [test_loop_return].result = Mask($24)
+  140. copy_slot_masked               $23 = Mask($24)
+  141. load_condition_mask            CondMask = $25
+  142. zero_slot_unmasked             $13 = 0
+  143. merge_condition_mask           CondMask = $22 & $23
+  144. branch_if_no_active_lanes      branch_if_no_active_lanes +19 (#163)
+  145. zero_slot_unmasked             x = 0
+  146. store_loop_mask                $14 = LoopMask
+  147. jump                           jump +6 (#153)
+  148. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  149. copy_slot_unmasked             $15 = x
+  150. copy_constant                  $16 = 0x00000001 (1.401298e-45)
+  151. add_int                        $15 += $16
+  152. copy_slot_masked               x = Mask($15)
+  153. copy_slot_unmasked             $15 = x
+  154. copy_constant                  $16 = 0x00000001 (1.401298e-45)
+  155. cmple_int                      $15 = lessThanEqual($15, $16)
+  156. merge_loop_mask                LoopMask &= $15
+  157. stack_rewind
+  158. branch_if_any_active_lanes     branch_if_any_active_lanes -10 (#148)
+  159. load_loop_mask                 LoopMask = $14
+  160. copy_constant                  $14 = 0xFFFFFFFF
+  161. copy_slot_masked               [test_loop_break].result = Mask($14)
+  162. copy_slot_masked               $13 = Mask($14)
+  163. load_condition_mask            CondMask = $22
+  164. copy_4_constants               $0..3 = colorRed
+  165. merge_condition_mask           CondMask = $12 & $13
+  166. copy_4_constants               $4..7 = colorGreen
+  167. copy_4_slots_masked            $0..3 = Mask($4..7)
+  168. load_condition_mask            CondMask = $12
+  169. copy_4_slots_unmasked          [main].result = $0..3
+  170. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/DoWhileControlFlow.skrp b/tests/sksl/shared/DoWhileControlFlow.skrp
index 4a3a77f..90b6be8 100644
--- a/tests/sksl/shared/DoWhileControlFlow.skrp
+++ b/tests/sksl/shared/DoWhileControlFlow.skrp
@@ -5,51 +5,49 @@
     5. copy_constant                  x(2) = 0x3F800000 (1.0)
     6. copy_constant                  x(3) = 0x3F800000 (1.0)
     7. store_loop_mask                $0 = LoopMask
-    8. zero_slot_unmasked             [temporary 0] = 0
-    9. copy_slot_unmasked             $1 = x(0)
-   10. copy_constant                  $2 = 0x3E800000 (0.25)
-   11. sub_float                      $1 -= $2
-   12. copy_slot_masked               x(0) = Mask($1)
-   13. store_condition_mask           $1 = CondMask
-   14. copy_slot_unmasked             $2 = x(0)
-   15. zero_slot_unmasked             $3 = 0
-   16. cmple_float                    $2 = lessThanEqual($2, $3)
-   17. merge_condition_mask           CondMask = $1 & $2
-   18. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-   19. load_condition_mask            CondMask = $1
-   20. reenable_loop_mask             LoopMask |= [temporary 0]
-   21. copy_slot_unmasked             $1 = x(3)
-   22. copy_constant                  $2 = 0x3F800000 (1.0)
-   23. cmpeq_float                    $1 = equal($1, $2)
-   24. merge_loop_mask                LoopMask &= $1
-   25. stack_rewind
-   26. branch_if_any_active_lanes     branch_if_any_active_lanes -18 (#8)
-   27. load_loop_mask                 LoopMask = $0
-   28. store_loop_mask                $0 = LoopMask
-   29. zero_slot_unmasked             [temporary 0] = 0
-   30. copy_slot_unmasked             $1 = x(2)
-   31. copy_constant                  $2 = 0x3E800000 (0.25)
-   32. sub_float                      $1 -= $2
-   33. copy_slot_masked               x(2) = Mask($1)
-   34. store_condition_mask           $1 = CondMask
-   35. copy_slot_unmasked             $2 = x(3)
-   36. copy_constant                  $3 = 0x3F800000 (1.0)
-   37. cmpeq_float                    $2 = equal($2, $3)
-   38. merge_condition_mask           CondMask = $1 & $2
-   39. copy_constant                  $3 = 0xFFFFFFFF
-   40. copy_slot_masked               [temporary 0] = Mask($3)
-   41. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-   42. load_condition_mask            CondMask = $1
-   43. zero_slot_unmasked             $1 = 0
-   44. copy_slot_masked               x(1) = Mask($1)
-   45. reenable_loop_mask             LoopMask |= [temporary 0]
-   46. zero_slot_unmasked             $1 = 0
-   47. copy_slot_unmasked             $2 = x(2)
-   48. cmplt_float                    $1 = lessThan($1, $2)
-   49. merge_loop_mask                LoopMask &= $1
-   50. stack_rewind
-   51. branch_if_any_active_lanes     branch_if_any_active_lanes -22 (#29)
-   52. load_loop_mask                 LoopMask = $0
-   53. copy_4_slots_unmasked          $0..3 = x
-   54. copy_4_slots_unmasked          [main].result = $0..3
-   55. load_src                       src.rgba = [main].result
+    8. copy_slot_unmasked             $1 = x(0)
+    9. copy_constant                  $2 = 0x3E800000 (0.25)
+   10. sub_float                      $1 -= $2
+   11. copy_slot_masked               x(0) = Mask($1)
+   12. store_condition_mask           $1 = CondMask
+   13. copy_slot_unmasked             $2 = x(0)
+   14. zero_slot_unmasked             $3 = 0
+   15. cmple_float                    $2 = lessThanEqual($2, $3)
+   16. merge_condition_mask           CondMask = $1 & $2
+   17. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+   18. load_condition_mask            CondMask = $1
+   19. copy_slot_unmasked             $1 = x(3)
+   20. copy_constant                  $2 = 0x3F800000 (1.0)
+   21. cmpeq_float                    $1 = equal($1, $2)
+   22. merge_loop_mask                LoopMask &= $1
+   23. stack_rewind
+   24. branch_if_any_active_lanes     branch_if_any_active_lanes -16 (#8)
+   25. load_loop_mask                 LoopMask = $0
+   26. store_loop_mask                $0 = LoopMask
+   27. zero_slot_unmasked             [temporary 0] = 0
+   28. copy_slot_unmasked             $1 = x(2)
+   29. copy_constant                  $2 = 0x3E800000 (0.25)
+   30. sub_float                      $1 -= $2
+   31. copy_slot_masked               x(2) = Mask($1)
+   32. store_condition_mask           $1 = CondMask
+   33. copy_slot_unmasked             $2 = x(3)
+   34. copy_constant                  $3 = 0x3F800000 (1.0)
+   35. cmpeq_float                    $2 = equal($2, $3)
+   36. merge_condition_mask           CondMask = $1 & $2
+   37. copy_constant                  $3 = 0xFFFFFFFF
+   38. copy_slot_masked               [temporary 0] = Mask($3)
+   39. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+   40. load_condition_mask            CondMask = $1
+   41. zero_slot_unmasked             $1 = 0
+   42. copy_slot_masked               x(1) = Mask($1)
+   43. reenable_loop_mask             LoopMask |= [temporary 0]
+   44. zero_slot_unmasked             $1 = 0
+   45. copy_slot_unmasked             $2 = x(2)
+   46. cmplt_float                    $1 = lessThan($1, $2)
+   47. merge_loop_mask                LoopMask &= $1
+   48. stack_rewind
+   49. branch_if_any_active_lanes     branch_if_any_active_lanes -22 (#27)
+   50. load_loop_mask                 LoopMask = $0
+   51. copy_4_slots_unmasked          $0..3 = x
+   52. copy_4_slots_unmasked          [main].result = $0..3
+   53. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/EmptyBlocksES3.skrp b/tests/sksl/shared/EmptyBlocksES3.skrp
index 5555ae3..6f2edbb 100644
--- a/tests/sksl/shared/EmptyBlocksES3.skrp
+++ b/tests/sksl/shared/EmptyBlocksES3.skrp
@@ -24,15 +24,13 @@
    24. branch_if_any_active_lanes     branch_if_any_active_lanes -5 (#19)
    25. load_loop_mask                 LoopMask = $0
    26. store_loop_mask                $0 = LoopMask
-   27. zero_slot_unmasked             [temporary 0] = 0
-   28. reenable_loop_mask             LoopMask |= [temporary 0]
-   29. copy_constant                  $1 = colorWhite(0)
-   30. copy_constant                  $2 = 0x40000000 (2.0)
-   31. cmpeq_float                    $1 = equal($1, $2)
-   32. merge_loop_mask                LoopMask &= $1
-   33. stack_rewind
-   34. branch_if_any_active_lanes     branch_if_any_active_lanes -7 (#27)
-   35. load_loop_mask                 LoopMask = $0
-   36. copy_4_slots_unmasked          $0..3 = color
-   37. copy_4_slots_unmasked          [main].result = $0..3
-   38. load_src                       src.rgba = [main].result
+   27. copy_constant                  $1 = colorWhite(0)
+   28. copy_constant                  $2 = 0x40000000 (2.0)
+   29. cmpeq_float                    $1 = equal($1, $2)
+   30. merge_loop_mask                LoopMask &= $1
+   31. stack_rewind
+   32. branch_if_any_active_lanes     branch_if_any_active_lanes -5 (#27)
+   33. load_loop_mask                 LoopMask = $0
+   34. copy_4_slots_unmasked          $0..3 = color
+   35. copy_4_slots_unmasked          [main].result = $0..3
+   36. load_src                       src.rgba = [main].result