Emit trace-enter op before function parameter trace-var ops.

In order to associate the parameters with the inner function, the
trace-var op needs to happen inside the trace-enter. To make this
happen, the parameter handling logic has been moved from
`writeFunction` to `pushFunctionCall`.

Change-Id: Iafd57d0f1de8d23b5a0a98ff2518c00d07532e77
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/664336
Auto-Submit: John Stiles <johnstiles@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
index 1c52aea..4b6adaf 100644
--- a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
+++ b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
@@ -193,7 +193,8 @@
      * contained unsupported statements or expressions.
      */
     std::optional<SlotRange> writeFunction(const IRNode& callSite,
-                                           const FunctionDefinition& function);
+                                           const FunctionDefinition& function,
+                                           SkSpan<std::unique_ptr<Expression> const> arguments);
 
     /**
      * Returns the slot index of this function inside the FunctionDebugInfo array in DebugTracePriv.
@@ -1264,8 +1265,10 @@
     }
 }
 
-std::optional<SlotRange> Generator::writeFunction(const IRNode& callSite,
-                                                  const FunctionDefinition& function) {
+std::optional<SlotRange> Generator::writeFunction(
+        const IRNode& callSite,
+        const FunctionDefinition& function,
+        SkSpan<std::unique_ptr<Expression> const> arguments) {
     // Generate debug information and emit a trace-enter op.
     int funcIndex = -1;
     if (fDebugTrace) {
@@ -1276,13 +1279,52 @@
         }
     }
 
-    // We need to explicitly emit trace ops for the variables in main(), since they are initialized
-    // before it is safe to use trace-var. (We can't invoke init-lane-masks until after we've
-    // copied the inputs from main into slots, because dst.rgba is used to pass in a
-    // blend-destination color, but we clobber it and put in the execution mask instead.)
-    if (this->shouldWriteTraceOps() && function.declaration().isMain()) {
-        for (const Variable* var : function.declaration().parameters()) {
-            fBuilder.trace_var(fTraceMask->stackID(), this->getVariableSlots(*var));
+    // Handle parameter lvalues.
+    SkSpan<Variable* const> parameters = function.declaration().parameters();
+    TArray<std::unique_ptr<LValue>> lvalues;
+    if (function.declaration().isMain()) {
+        // For main(), the parameter slots have already been populated by `writeProgram`, but we
+        // still need to explicitly emit trace ops for the variables in main(), since they are
+        // initialized before it is safe to use trace-var. (We can't invoke init-lane-masks until
+        // after we've copied the inputs from main into slots, because dst.rgba is used to pass in a
+        // blend-destination color, but we clobber it and put in the execution mask instead.)
+        if (this->shouldWriteTraceOps()) {
+            for (const Variable* var : parameters) {
+                fBuilder.trace_var(fTraceMask->stackID(), this->getVariableSlots(*var));
+            }
+        }
+    } else {
+        // Write all the arguments into their parameter's variable slots. Because we never allow
+        // recursion, we don't need to worry about overwriting any existing values in those slots.
+        // (In fact, we don't even need to apply the write mask.)
+        lvalues.resize(arguments.size());
+
+        for (size_t index = 0; index < arguments.size(); ++index) {
+            const Expression& arg = *arguments[index];
+            const Variable& param = *parameters[index];
+
+            // Use LValues for out-parameters and inout-parameters, so we can store back to them
+            // later.
+            if (IsInoutParameter(param) || IsOutParameter(param)) {
+                lvalues[index] = this->makeLValue(arg);
+                if (!lvalues[index]) {
+                    return std::nullopt;
+                }
+                // There are no guarantees on the starting value of an out-parameter, so we only
+                // need to store the lvalues associated with an inout parameter.
+                if (IsInoutParameter(param)) {
+                    if (!this->push(*lvalues[index])) {
+                        return std::nullopt;
+                    }
+                    this->popToSlotRangeUnmasked(this->getVariableSlots(param));
+                }
+            } else {
+                // Copy input arguments into their respective parameter slots.
+                if (!this->pushExpression(arg)) {
+                    return std::nullopt;
+                }
+                this->popToSlotRangeUnmasked(this->getVariableSlots(param));
+            }
         }
     }
 
@@ -1320,6 +1362,22 @@
         fBuilder.trace_exit(fTraceMask->stackID(), funcIndex);
     }
 
+    // Copy out-parameters and inout-parameters back to their homes.
+    for (int index = 0; index < lvalues.size(); ++index) {
+        if (lvalues[index]) {
+            // Only out- and inout-parameters should have an associated lvalue.
+            const Variable& param = *parameters[index];
+            SkASSERT(IsInoutParameter(param) || IsOutParameter(param));
+
+            // Copy the parameter's slots directly into the lvalue.
+            fBuilder.push_slots(this->getVariableSlots(param));
+            if (!this->store(*lvalues[index])) {
+                return std::nullopt;
+            }
+            this->discardExpression(param.type().slotCount());
+        }
+    }
+
     return functionResult;
 }
 
@@ -2609,41 +2667,8 @@
     int skipLabelID = fBuilder.nextLabelID();
     fBuilder.branch_if_no_lanes_active(skipLabelID);
 
-    // Write all the arguments into their parameter's variable slots. Because we never allow
-    // recursion, we don't need to worry about overwriting any existing values in those slots.
-    // (In fact, we don't even need to apply the write mask.)
-    TArray<std::unique_ptr<LValue>> lvalues;
-    lvalues.resize(c.arguments().size());
-
-    for (int index = 0; index < c.arguments().size(); ++index) {
-        const Expression& arg = *c.arguments()[index];
-        const Variable& param = *c.function().parameters()[index];
-
-        // Use LValues for out-parameters and inout-parameters, so we can store back to them later.
-        if (IsInoutParameter(param) || IsOutParameter(param)) {
-            lvalues[index] = this->makeLValue(arg);
-            if (!lvalues[index]) {
-                return unsupported();
-            }
-            // There are no guarantees on the starting value of an out-parameter, so we only need to
-            // store the lvalues associated with an inout parameter.
-            if (IsInoutParameter(param)) {
-                if (!this->push(*lvalues[index])) {
-                    return unsupported();
-                }
-                this->popToSlotRangeUnmasked(this->getVariableSlots(param));
-            }
-        } else {
-            // Copy input arguments into their respective parameter slots.
-            if (!this->pushExpression(arg)) {
-                return unsupported();
-            }
-            this->popToSlotRangeUnmasked(this->getVariableSlots(param));
-        }
-    }
-
     // Emit the function body.
-    std::optional<SlotRange> r = this->writeFunction(c, *fCurrentFunction);
+    std::optional<SlotRange> r = this->writeFunction(c, *fCurrentFunction, c.arguments());
     if (!r.has_value()) {
         return unsupported();
     }
@@ -2656,22 +2681,6 @@
     // We've returned back to the last function.
     fCurrentFunction = lastFunction;
 
-    // Copy out-parameters and inout-parameters back to their homes.
-    for (int index = 0; index < c.arguments().size(); ++index) {
-        if (lvalues[index]) {
-            // Only out- and inout-parameters should have an associated lvalue.
-            const Variable& param = *c.function().parameters()[index];
-            SkASSERT(IsInoutParameter(param) || IsOutParameter(param));
-
-            // Copy the parameter's slots directly into the lvalue.
-            fBuilder.push_slots(this->getVariableSlots(param));
-            if (!this->store(*lvalues[index])) {
-                return unsupported();
-            }
-            this->discardExpression(param.type().slotCount());
-        }
-    }
-
     // Copy the function result from its slots onto the stack.
     fBuilder.label(skipLabelID);
     return true;
@@ -3651,7 +3660,7 @@
     }
 
     // Invoke main().
-    std::optional<SlotRange> mainResult = this->writeFunction(function, function);
+    std::optional<SlotRange> mainResult = this->writeFunction(function, function, /*arguments=*/{});
     if (!mainResult.has_value()) {
         return unsupported();
     }
diff --git a/tests/sksl/runtime/LoopFloat.skrp b/tests/sksl/runtime/LoopFloat.skrp
index 036bd16..af597ff 100644
--- a/tests/sksl/runtime/LoopFloat.skrp
+++ b/tests/sksl/runtime/LoopFloat.skrp
@@ -36,9 +36,9 @@
    36. store_condition_mask           $23 = CondMask
    37. store_condition_mask           $0 = CondMask
    38. branch_if_no_lanes_active      branch_if_no_lanes_active +66 (label 10 at #104)
-   39. copy_slot_unmasked             five₁ = five
-   40. trace_var                      TraceVar(five₁) when $40 is true
-   41. trace_enter                    TraceEnter(float return_loop(float five)) when $40 is true
+   39. trace_enter                    TraceEnter(float return_loop(float five)) when $40 is true
+   40. copy_slot_unmasked             five₁ = five
+   41. trace_var                      TraceVar(five₁) when $40 is true
    42. store_return_mask              $1 = RetMask
    43. zero_slot_unmasked             $2 = 0
    44. copy_slot_unmasked             $3 = $40
@@ -107,9 +107,9 @@
   107. zero_slot_unmasked             $24 = 0
   108. merge_condition_mask           CondMask = $0 & $1
   109. branch_if_no_lanes_active      branch_if_no_lanes_active +76 (label 9 at #185)
-  110. copy_slot_unmasked             five₂ = five
-  111. trace_var                      TraceVar(five₂) when $40 is true
-  112. trace_enter                    TraceEnter(float continue_loop(float five)) when $40 is true
+  110. trace_enter                    TraceEnter(float continue_loop(float five)) when $40 is true
+  111. copy_slot_unmasked             five₂ = five
+  112. trace_var                      TraceVar(five₂) when $40 is true
   113. zero_slot_unmasked             $25 = 0
   114. copy_slot_unmasked             $26 = $40
   115. copy_slot_masked               $25 = Mask($26)
@@ -187,9 +187,9 @@
   187. zero_slot_unmasked             $89 = 0
   188. merge_condition_mask           CondMask = $23 & $24
   189. branch_if_no_lanes_active      branch_if_no_lanes_active +77 (label 8 at #266)
-  190. copy_slot_unmasked             five₃ = five
-  191. trace_var                      TraceVar(five₃) when $40 is true
-  192. trace_enter                    TraceEnter(float break_loop(float five)) when $40 is true
+  190. trace_enter                    TraceEnter(float break_loop(float five)) when $40 is true
+  191. copy_slot_unmasked             five₃ = five
+  192. trace_var                      TraceVar(five₃) when $40 is true
   193. zero_slot_unmasked             $90 = 0
   194. copy_slot_unmasked             $91 = $40
   195. copy_slot_masked               $90 = Mask($91)
diff --git a/tests/sksl/runtime/LoopInt.skrp b/tests/sksl/runtime/LoopInt.skrp
index 8c75435..4bbfda8 100644
--- a/tests/sksl/runtime/LoopInt.skrp
+++ b/tests/sksl/runtime/LoopInt.skrp
@@ -36,9 +36,9 @@
    36. store_condition_mask           $83 = CondMask
    37. store_condition_mask           $15 = CondMask
    38. branch_if_no_lanes_active      branch_if_no_lanes_active +66 (label 9 at #104)
-   39. copy_slot_unmasked             five₁ = five
-   40. trace_var                      TraceVar(five₁) when $35 is true
-   41. trace_enter                    TraceEnter(int return_loop(int five)) when $35 is true
+   39. trace_enter                    TraceEnter(int return_loop(int five)) when $35 is true
+   40. copy_slot_unmasked             five₁ = five
+   41. trace_var                      TraceVar(five₁) when $35 is true
    42. store_return_mask              $16 = RetMask
    43. zero_slot_unmasked             $17 = 0
    44. copy_slot_unmasked             $18 = $35
@@ -107,9 +107,9 @@
   107. zero_slot_unmasked             $84 = 0
   108. merge_condition_mask           CondMask = $15 & $16
   109. branch_if_no_lanes_active      branch_if_no_lanes_active +76 (label 8 at #185)
-  110. copy_slot_unmasked             five₂ = five
-  111. trace_var                      TraceVar(five₂) when $35 is true
-  112. trace_enter                    TraceEnter(int continue_loop(int five)) when $35 is true
+  110. trace_enter                    TraceEnter(int continue_loop(int five)) when $35 is true
+  111. copy_slot_unmasked             five₂ = five
+  112. trace_var                      TraceVar(five₂) when $35 is true
   113. zero_slot_unmasked             $85 = 0
   114. copy_slot_unmasked             $86 = $35
   115. copy_slot_masked               $85 = Mask($86)
@@ -187,9 +187,9 @@
   187. zero_slot_unmasked             $26 = 0
   188. merge_condition_mask           CondMask = $83 & $84
   189. branch_if_no_lanes_active      branch_if_no_lanes_active +77 (label 7 at #266)
-  190. copy_constant                  five₃ = 0x00000005 (7.006492e-45)
-  191. trace_var                      TraceVar(five₃) when $35 is true
-  192. trace_enter                    TraceEnter(int break_loop(int five)) when $35 is true
+  190. trace_enter                    TraceEnter(int break_loop(int five)) when $35 is true
+  191. copy_constant                  five₃ = 0x00000005 (7.006492e-45)
+  192. trace_var                      TraceVar(five₃) when $35 is true
   193. zero_slot_unmasked             $27 = 0
   194. copy_slot_unmasked             $28 = $35
   195. copy_slot_masked               $27 = Mask($28)
diff --git a/tests/sksl/runtime/MultipleCallsInOneStatement.skrp b/tests/sksl/runtime/MultipleCallsInOneStatement.skrp
index 6e1fa0a..dabf835 100644
--- a/tests/sksl/runtime/MultipleCallsInOneStatement.skrp
+++ b/tests/sksl/runtime/MultipleCallsInOneStatement.skrp
@@ -12,9 +12,9 @@
    12. copy_slot_masked               $0 = Mask($1)
    13. trace_scope                    TraceScope(+1) when $0 is true
    14. trace_line                     TraceLine(11) when $13 is true
-   15. copy_constant                  x = 0x00000005 (7.006492e-45)
-   16. trace_var                      TraceVar(x) when $13 is true
-   17. trace_enter                    TraceEnter(int get(int x)) when $13 is true
+   15. trace_enter                    TraceEnter(int get(int x)) when $13 is true
+   16. copy_constant                  x = 0x00000005 (7.006492e-45)
+   17. trace_var                      TraceVar(x) when $13 is true
    18. zero_slot_unmasked             $1 = 0
    19. copy_slot_unmasked             $2 = $13
    20. copy_slot_masked               $1 = Mask($2)
@@ -26,9 +26,9 @@
    26. trace_exit                     TraceExit(int get(int x)) when $13 is true
    27. copy_slot_unmasked             $1 = [get].result
    28. label                          label 0x00000000
-   29. copy_constant                  x = 0x00000003 (4.203895e-45)
-   30. trace_var                      TraceVar(x) when $13 is true
-   31. trace_enter                    TraceEnter(int get(int x)) when $13 is true
+   29. trace_enter                    TraceEnter(int get(int x)) when $13 is true
+   30. copy_constant                  x = 0x00000003 (4.203895e-45)
+   31. trace_var                      TraceVar(x) when $13 is true
    32. zero_slot_unmasked             $2 = 0
    33. copy_slot_unmasked             $3 = $13
    34. copy_slot_masked               $2 = Mask($3)
@@ -41,9 +41,9 @@
    41. copy_slot_unmasked             $2 = [get].result
    42. label                          label 0x00000001
    43. add_int                        $1 += $2
-   44. copy_constant                  x = 0x00000002 (2.802597e-45)
-   45. trace_var                      TraceVar(x) when $13 is true
-   46. trace_enter                    TraceEnter(int get(int x)) when $13 is true
+   44. trace_enter                    TraceEnter(int get(int x)) when $13 is true
+   45. copy_constant                  x = 0x00000002 (2.802597e-45)
+   46. trace_var                      TraceVar(x) when $13 is true
    47. zero_slot_unmasked             $2 = 0
    48. copy_slot_unmasked             $3 = $13
    49. copy_slot_masked               $2 = Mask($3)
diff --git a/tests/sksl/runtime/PrecisionQualifiers.skrp b/tests/sksl/runtime/PrecisionQualifiers.skrp
index bb6a7f7..4d082a8 100644
--- a/tests/sksl/runtime/PrecisionQualifiers.skrp
+++ b/tests/sksl/runtime/PrecisionQualifiers.skrp
@@ -338,9 +338,9 @@
   338. zero_slot_unmasked             $36 = 0
   339. merge_condition_mask           CondMask = $22 & $23
   340. branch_if_no_lanes_active      branch_if_no_lanes_active +19 (label 3 at #359)
-  341. copy_constant                  value = 0x3F800000 (1.0)
-  342. trace_var                      TraceVar(value) when $18 is true
-  343. trace_enter                    TraceEnter(bool highp_param(float value)) when $18 is true
+  341. trace_enter                    TraceEnter(bool highp_param(float value)) when $18 is true
+  342. copy_constant                  value = 0x3F800000 (1.0)
+  343. trace_var                      TraceVar(value) when $18 is true
   344. zero_slot_unmasked             $37 = 0
   345. copy_slot_unmasked             $38 = $18
   346. copy_slot_masked               $37 = Mask($38)
@@ -361,9 +361,9 @@
   361. zero_slot_unmasked             $31 = 0
   362. merge_condition_mask           CondMask = $35 & $36
   363. branch_if_no_lanes_active      branch_if_no_lanes_active +19 (label 2 at #382)
-  364. copy_constant                  value₁ = 0x40000000 (2.0)
-  365. trace_var                      TraceVar(value₁) when $18 is true
-  366. trace_enter                    TraceEnter(bool mediump_param(half value)) when $18 is true
+  364. trace_enter                    TraceEnter(bool mediump_param(half value)) when $18 is true
+  365. copy_constant                  value₁ = 0x40000000 (2.0)
+  366. trace_var                      TraceVar(value₁) when $18 is true
   367. zero_slot_unmasked             $32 = 0
   368. copy_slot_unmasked             $33 = $18
   369. copy_slot_masked               $32 = Mask($33)
@@ -384,9 +384,9 @@
   384. zero_slot_unmasked             $1 = 0
   385. merge_condition_mask           CondMask = $30 & $31
   386. branch_if_no_lanes_active      branch_if_no_lanes_active +19 (label 1 at #405)
-  387. copy_constant                  value₂ = 0x40400000 (3.0)
-  388. trace_var                      TraceVar(value₂) when $18 is true
-  389. trace_enter                    TraceEnter(bool lowp_param(half value)) when $18 is true
+  387. trace_enter                    TraceEnter(bool lowp_param(half value)) when $18 is true
+  388. copy_constant                  value₂ = 0x40400000 (3.0)
+  389. trace_var                      TraceVar(value₂) when $18 is true
   390. zero_slot_unmasked             $2 = 0
   391. copy_slot_unmasked             $3 = $18
   392. copy_slot_masked               $2 = Mask($3)