src/sksl/codegen/SkSLRasterPipelineBuilder.h - skia - Git at Google

 /*
  * Copyright 2022 Google Inc.
  *
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #ifndef SKSL_RASTERPIPELINEBUILDER
 #define SKSL_RASTERPIPELINEBUILDER

 #include "include/core/SkTypes.h"

 #include "include/core/SkSpan.h"
 #include "include/core/SkTypes.h"
 #include "include/private/base/SkTArray.h"
 #include "src/base/SkUtils.h"
 #include "src/core/SkRasterPipelineOpList.h"

 #include <cstddef>
 #include <cstdint>
 #include <memory>

 class SkArenaAlloc;
 class SkRasterPipeline;
 class SkWStream;
 using SkRPOffset = uint32_t;

 namespace SkSL {

 class DebugTracePriv;
 class TraceHook;

 namespace RP {

 // A single scalar in our program consumes one slot.
 using Slot = int;
 constexpr Slot NA = -1;

 // Scalars, vectors, and matrices can be represented as a range of slot indices.
 struct SlotRange {
     Slot index = 0;
     int count = 0;
 };

 #define SKRP_EXTENDED_OPS(M)     \
     /* branch targets */         \
     M(label)                     \
                                  \
     /* child programs */         \
     M(invoke_shader)             \
     M(invoke_color_filter)       \
     M(invoke_blender)            \
                                  \
     /* color space transforms */ \
     M(invoke_to_linear_srgb)     \
     M(invoke_from_linear_srgb)

 // An RP::Program will consist entirely of ProgramOps. The ProgramOps list is a superset of the
 // native SkRasterPipelineOps op-list. It also has a few extra ops to indicate child-effect
 // invocation, and a `label` op to indicate branch targets.
 enum class ProgramOp {
     #define M(stage) stage,
         // A finished program can contain any native Raster Pipeline op...
         SK_RASTER_PIPELINE_OPS_ALL(M)

         // ... as well as our extended ops.
         SKRP_EXTENDED_OPS(M)
     #undef M
 };

 // BuilderOps are a superset of ProgramOps. They are used by the RP::Builder, which works in terms
 // of Instructions; Instructions are slightly more expressive than raw SkRasterPipelineOps. In
 // particular, the Builder supports stacks for pushing and popping scratch values.
 // RP::Program::makeStages is responsible for rewriting Instructions/BuilderOps into an array of
 // RP::Program::Stages, which will contain only native SkRasterPipelineOps and (optionally)
 // child-effect invocations.
 enum class BuilderOp {
     #define M(stage) stage,
         // An in-flight program can contain all the native Raster Pipeline ops...
         SK_RASTER_PIPELINE_OPS_ALL(M)

         // ... and our extended ops...
         SKRP_EXTENDED_OPS(M)
     #undef M

     // ... and also has Builder-specific ops. These ops generally interface with the stack, and are
     // converted into ProgramOps during `makeStages`.
     push_clone,
     push_clone_from_stack,
     push_clone_indirect_from_stack,
     push_constant,
     push_immutable,
     push_immutable_indirect,
     push_slots,
     push_slots_indirect,
     push_uniform,
     push_uniform_indirect,
     copy_stack_to_slots,
     copy_stack_to_slots_unmasked,
     copy_stack_to_slots_indirect,
     copy_uniform_to_slots_unmasked,
     store_immutable_value,
     swizzle_copy_stack_to_slots,
     swizzle_copy_stack_to_slots_indirect,
     discard_stack,
     pad_stack,
     select,
     push_condition_mask,
     pop_condition_mask,
     push_loop_mask,
     pop_loop_mask,
     pop_and_reenable_loop_mask,
     push_return_mask,
     pop_return_mask,
     push_src_rgba,
     push_dst_rgba,
     push_device_xy01,
     pop_src_rgba,
     pop_dst_rgba,
     trace_var_indirect,
     branch_if_no_active_lanes_on_stack_top_equal,
     unsupported
 };

 // If the extended ops are not in sync between enums, program creation will not work.
 static_assert((int)ProgramOp::label == (int)BuilderOp::label);

 // Represents a single raster-pipeline SkSL instruction.
 struct Instruction {
     BuilderOp fOp;
     Slot      fSlotA = NA;
     Slot      fSlotB = NA;
     int       fImmA = 0;
     int       fImmB = 0;
     int       fImmC = 0;
     int       fImmD = 0;
     int       fStackID = 0;
 };

 class Callbacks {
 public:
     virtual ~Callbacks() = default;

     virtual bool appendShader(int index) = 0;
     virtual bool appendColorFilter(int index) = 0;
     virtual bool appendBlender(int index) = 0;

     virtual void toLinearSrgb(const void* color) = 0;
     virtual void fromLinearSrgb(const void* color) = 0;
 };

 class Program {
 public:
     Program(skia_private::TArray<Instruction> instrs,
             int numValueSlots,
             int numUniformSlots,
             int numImmutableSlots,
             int numLabels,
             DebugTracePriv* debugTrace);
     ~Program();

     bool appendStages(SkRasterPipeline* pipeline,
                       SkArenaAlloc* alloc,
                       Callbacks* callbacks,
                       SkSpan<const float> uniforms) const;

     void dump(SkWStream* out, bool writeInstructionCount = false) const;

     int numUniforms() const { return fNumUniformSlots; }

 private:
     using StackDepths = skia_private::TArray<int>; // [stack index] = depth of stack

     struct SlotData {
         SkSpan<float> values;
         SkSpan<float> stack;
         SkSpan<float> immutable;
     };
     SlotData allocateSlotData(SkArenaAlloc* alloc) const;

     struct Stage {
         ProgramOp op;
         void*     ctx;
     };
     void makeStages(skia_private::TArray<Stage>* pipeline,
                     SkArenaAlloc* alloc,
                     SkSpan<const float> uniforms,
                     const SlotData& slots) const;
     void optimize();
     StackDepths tempStackMaxDepths() const;

     // These methods are used to split up multi-slot copies into multiple ops as needed.
     void appendCopy(skia_private::TArray<Stage>* pipeline,
                     SkArenaAlloc* alloc,
                     std::byte* basePtr,
                     ProgramOp baseStage,
                     SkRPOffset dst, int dstStride,
                     SkRPOffset src, int srcStride,
                     int numSlots) const;
     void appendCopyImmutableUnmasked(skia_private::TArray<Stage>* pipeline,
                                      SkArenaAlloc* alloc,
                                      std::byte* basePtr,
                                      SkRPOffset dst,
                                      SkRPOffset src,
                                      int numSlots) const;
     void appendCopySlotsUnmasked(skia_private::TArray<Stage>* pipeline,
                                  SkArenaAlloc* alloc,
                                  SkRPOffset dst,
                                  SkRPOffset src,
                                  int numSlots) const;
     void appendCopySlotsMasked(skia_private::TArray<Stage>* pipeline,
                                SkArenaAlloc* alloc,
                                SkRPOffset dst,
                                SkRPOffset src,
                                int numSlots) const;

     // Appends a single-slot single-input math operation to the pipeline. The op `stage` will
     // appended `numSlots` times, starting at position `dst` and advancing one slot for each
     // subsequent invocation.
     void appendSingleSlotUnaryOp(skia_private::TArray<Stage>* pipeline, ProgramOp stage,
                                  float* dst, int numSlots) const;

     // Appends a multi-slot single-input math operation to the pipeline. `baseStage` must refer to
     // a single-slot "apply_op" stage, which must be immediately followed by specializations for
     // 2-4 slots. For instance, {`ceil_float`, `ceil_2_floats`, `ceil_3_floats`, `ceil_4_floats`}
     // must be contiguous ops in the stage list, listed in that order; pass `ceil_float` and we
     // pick the appropriate op based on `numSlots`.
     void appendMultiSlotUnaryOp(skia_private::TArray<Stage>* pipeline, ProgramOp baseStage,
                                 float* dst, int numSlots) const;

     // Appends an immediate-mode binary operation to the pipeline. `baseStage` must refer to
     // a single-slot, immediate-mode "apply-imm" stage, which must be immediately preceded by
     // specializations for 2-4 slots if numSlots is greater than 1. For instance, {`add_imm_4_ints`,
     // `add_imm_3_ints`, `add_imm_2_ints`, `add_imm_int`} must be contiguous ops in the stage list,
     // listed in that order; pass `add_imm_int` and we pick the appropriate op based on `numSlots`.
     // Some immediate-mode binary ops are single-slot only in the interest of code size; in this
     // case, the multi-slot ops can be absent, but numSlots must be 1.
     void appendImmediateBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc,
                                  ProgramOp baseStage,
                                  SkRPOffset dst, int32_t value, int numSlots) const;

     // Appends a two-input math operation to the pipeline. `src` must be _immediately_ after `dst`
     // in memory. `baseStage` must refer to an unbounded "apply_to_n_slots" stage. A BinaryOpCtx
     // will be used to pass pointers to the destination and source; the delta between the two
     // pointers implicitly gives the number of slots.
     void appendAdjacentNWayBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc,
                                     ProgramOp stage,
                                     SkRPOffset dst, SkRPOffset src, int numSlots) const;

     // Appends a multi-slot two-input math operation to the pipeline. `src` must be _immediately_
     // after `dst` in memory. `baseStage` must refer to an unbounded "apply_to_n_slots" stage, which
     // must be immediately followed by specializations for 1-4 slots. For instance, {`add_n_floats`,
     // `add_float`, `add_2_floats`, `add_3_floats`, `add_4_floats`} must be contiguous ops in the
     // stage list, listed in that order; pass `add_n_floats` and we pick the appropriate op based on
     // `numSlots`.
     void appendAdjacentMultiSlotBinaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc,
                                          ProgramOp baseStage, std::byte* basePtr,
                                          SkRPOffset dst, SkRPOffset src, int numSlots) const;

     // Appends a multi-slot math operation having three inputs (dst, src0, src1) and one output
     // (dst) to the pipeline. The three inputs must be _immediately_ adjacent in memory. `baseStage`
     // must refer to an unbounded "apply_to_n_slots" stage, which must be immediately followed by
     // specializations for 1-4 slots.
     void appendAdjacentMultiSlotTernaryOp(skia_private::TArray<Stage>* pipeline,
                                           SkArenaAlloc* alloc, ProgramOp baseStage,
                                           std::byte* basePtr, SkRPOffset dst, SkRPOffset src0,
                                           SkRPOffset src1, int numSlots) const;

     // Appends a math operation having three inputs (dst, src0, src1) and one output (dst) to the
     // pipeline. The three inputs must be _immediately_ adjacent in memory. `baseStage` must refer
     // to an unbounded "apply_to_n_slots" stage. A TernaryOpCtx will be used to pass pointers to the
     // destination and sources; the delta between the each pointer implicitly gives the slot count.
     void appendAdjacentNWayTernaryOp(skia_private::TArray<Stage>* pipeline, SkArenaAlloc* alloc,
                                      ProgramOp stage, std::byte* basePtr, SkRPOffset dst,
                                      SkRPOffset src0, SkRPOffset src1, int numSlots) const;

     // Appends a stack_rewind op on platforms where it is needed (when SK_HAS_MUSTTAIL is not set).
     void appendStackRewind(skia_private::TArray<Stage>* pipeline) const;

     class Dumper;
     friend class Dumper;

     skia_private::TArray<Instruction> fInstructions;
     int fNumValueSlots = 0;
     int fNumUniformSlots = 0;
     int fNumImmutableSlots = 0;
     int fNumTempStackSlots = 0;
     int fNumLabels = 0;
     StackDepths fTempStackMaxDepths;
     DebugTracePriv* fDebugTrace = nullptr;
     std::unique_ptr<SkSL::TraceHook> fTraceHook;
 };

 class Builder {
 public:
     /** Finalizes and optimizes the program. */
     std::unique_ptr<Program> finish(int numValueSlots,
                                     int numUniformSlots,
                                     int numImmutableSlots,
                                     DebugTracePriv* debugTrace = nullptr);
     /**
      * Peels off a label ID for use in the program. Set the label's position in the program with
      * the `label` instruction. Actually branch to the target with an instruction like
      * `branch_if_any_lanes_active` or `jump`.
      */
     int nextLabelID() {
         return fNumLabels++;
     }

     /**
      * The builder keeps track of the state of execution masks; when we know that the execution
      * mask is unaltered, we can generate simpler code. Code which alters the execution mask is
      * required to enable this flag.
      */
     void enableExecutionMaskWrites() {
         ++fExecutionMaskWritesEnabled;
     }

     void disableExecutionMaskWrites() {
         SkASSERT(this->executionMaskWritesAreEnabled());
         --fExecutionMaskWritesEnabled;
     }

     bool executionMaskWritesAreEnabled() {
         return fExecutionMaskWritesEnabled > 0;
     }

     /** Assemble a program from the Raster Pipeline instructions below. */
     void init_lane_masks() {
         this->appendInstruction(BuilderOp::init_lane_masks, {});
     }

     void store_src_rg(SlotRange slots) {
         SkASSERT(slots.count == 2);
         this->appendInstruction(BuilderOp::store_src_rg, {slots.index});
     }

     void store_src(SlotRange slots) {
         SkASSERT(slots.count == 4);
         this->appendInstruction(BuilderOp::store_src, {slots.index});
     }

     void store_dst(SlotRange slots) {
         SkASSERT(slots.count == 4);
         this->appendInstruction(BuilderOp::store_dst, {slots.index});
     }

     void store_device_xy01(SlotRange slots) {
         SkASSERT(slots.count == 4);
         this->appendInstruction(BuilderOp::store_device_xy01, {slots.index});
     }

     void load_src(SlotRange slots) {
         SkASSERT(slots.count == 4);
         this->appendInstruction(BuilderOp::load_src, {slots.index});
     }

     void load_dst(SlotRange slots) {
         SkASSERT(slots.count == 4);
         this->appendInstruction(BuilderOp::load_dst, {slots.index});
     }

     void set_current_stack(int stackID) {
         fCurrentStackID = stackID;
     }

     // Inserts a label into the instruction stream.
     void label(int labelID);

     // Unconditionally branches to a label.
     void jump(int labelID);

     // Branches to a label if the execution mask is active in every lane.
     void branch_if_all_lanes_active(int labelID);

     // Branches to a label if the execution mask is active in any lane.
     void branch_if_any_lanes_active(int labelID);

     // Branches to a label if the execution mask is inactive across all lanes.
     void branch_if_no_lanes_active(int labelID);

     // Branches to a label if the top value on the stack is _not_ equal to `value` in any lane.
     void branch_if_no_active_lanes_on_stack_top_equal(int value, int labelID);

     // We use the same SkRasterPipeline op regardless of the literal type, and bitcast the value.
     void push_constant_i(int32_t val, int count = 1);

     void push_zeros(int count) {
         this->push_constant_i(/*val=*/0, count);
     }

     void push_constant_f(float val) {
         this->push_constant_i(sk_bit_cast<int32_t>(val), /*count=*/1);
     }

     void push_constant_u(uint32_t val, int count = 1) {
         this->push_constant_i(sk_bit_cast<int32_t>(val), count);
     }

     // Translates into copy_uniforms (from uniforms into temp stack) in Raster Pipeline.
     void push_uniform(SlotRange src);

     // Initializes the Raster Pipeline slot with a constant value when the program is first created.
     // Does not add any instructions to the program.
     void store_immutable_value_i(Slot slot, int32_t val) {
         this->appendInstruction(BuilderOp::store_immutable_value, {slot}, val);
     }

     // Translates into copy_uniforms (from uniforms into value-slots) in Raster Pipeline.
     void copy_uniform_to_slots_unmasked(SlotRange dst, SlotRange src);

     // Translates into copy_from_indirect_uniform_unmasked (from values into temp stack) in Raster
     // Pipeline. `fixedRange` denotes a fixed set of slots; this range is pushed forward by the
     // value at the top of stack `dynamicStack`. Pass the range of the uniform being indexed as
     // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds.
     void push_uniform_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange);


     // Translates into copy_slots_unmasked (from values into temp stack) in Raster Pipeline.
     void push_slots(SlotRange src) {
         this->push_slots_or_immutable(src, BuilderOp::push_slots);
     }

     // Translates into copy_immutable_unmasked (from immutables into temp stack) in Raster Pipeline.
     void push_immutable(SlotRange src) {
         this->push_slots_or_immutable(src, BuilderOp::push_immutable);
     }

     void push_slots_or_immutable(SlotRange src, BuilderOp op);

     // Translates into copy_from_indirect_unmasked (from values into temp stack) in Raster Pipeline.
     // `fixedRange` denotes a fixed set of slots; this range is pushed forward by the value at the
     // top of stack `dynamicStack`. Pass the slot range of the variable being indexed as
     // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds.
     void push_slots_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange) {
         this->push_slots_or_immutable_indirect(fixedRange, dynamicStack, limitRange,
                                                BuilderOp::push_slots_indirect);
     }

     void push_immutable_indirect(SlotRange fixedRange, int dynamicStack, SlotRange limitRange) {
         this->push_slots_or_immutable_indirect(fixedRange, dynamicStack, limitRange,
                                                BuilderOp::push_immutable_indirect);
     }

     void push_slots_or_immutable_indirect(SlotRange fixedRange, int dynamicStack,
                                           SlotRange limitRange, BuilderOp op);

     // Translates into copy_slots_masked (from temp stack to values) in Raster Pipeline.
     // Does not discard any values on the temp stack.
     void copy_stack_to_slots(SlotRange dst) {
         this->copy_stack_to_slots(dst, /*offsetFromStackTop=*/dst.count);
     }

     void copy_stack_to_slots(SlotRange dst, int offsetFromStackTop);

     // Translates into swizzle_copy_slots_masked (from temp stack to values) in Raster Pipeline.
     // Does not discard any values on the temp stack.
     void swizzle_copy_stack_to_slots(SlotRange dst,
                                      SkSpan<const int8_t> components,
                                      int offsetFromStackTop);

     // Translates into swizzle_copy_to_indirect_masked (from temp stack to values) in Raster
     // Pipeline. Does not discard any values on the temp stack.
     void swizzle_copy_stack_to_slots_indirect(SlotRange fixedRange,
                                               int dynamicStackID,
                                               SlotRange limitRange,
                                               SkSpan<const int8_t> components,
                                               int offsetFromStackTop);

     // Translates into copy_slots_unmasked (from temp stack to values) in Raster Pipeline.
     // Does not discard any values on the temp stack.
     void copy_stack_to_slots_unmasked(SlotRange dst) {
         this->copy_stack_to_slots_unmasked(dst, /*offsetFromStackTop=*/dst.count);
     }

     void copy_stack_to_slots_unmasked(SlotRange dst, int offsetFromStackTop);

     // Translates into copy_to_indirect_masked (from temp stack into values) in Raster Pipeline.
     // `fixedRange` denotes a fixed set of slots; this range is pushed forward by the value at the
     // top of stack `dynamicStack`. Pass the slot range of the variable being indexed as
     // `limitRange`; this is used as a hard cap, to avoid indexing outside of bounds.
     void copy_stack_to_slots_indirect(SlotRange fixedRange,
                                       int dynamicStackID,
                                       SlotRange limitRange);

     // Copies from temp stack to slots, including an indirect offset, then shrinks the temp stack.
     void pop_slots_indirect(SlotRange fixedRange, int dynamicStackID, SlotRange limitRange) {
         this->copy_stack_to_slots_indirect(fixedRange, dynamicStackID, limitRange);
         this->discard_stack(fixedRange.count);
     }

     // Performs a unary op (like `bitwise_not`), given a slot count of `slots`. The stack top is
     // replaced with the result.
     void unary_op(BuilderOp op, int32_t slots);

     // Performs a binary op (like `add_n_floats` or `cmpeq_n_ints`), given a slot count of
     // `slots`. Two n-slot input values are consumed, and the result is pushed onto the stack.
     void binary_op(BuilderOp op, int32_t slots);

     // Performs a ternary op (like `mix` or `smoothstep`), given a slot count of
     // `slots`. Three n-slot input values are consumed, and the result is pushed onto the stack.
     void ternary_op(BuilderOp op, int32_t slots);

     // Computes a dot product on the stack. The slots consumed (`slots`) must be between 1 and 4.
     // Two n-slot input vectors are consumed, and a scalar result is pushed onto the stack.
     void dot_floats(int32_t slots);

     // Computes refract(N, I, eta) on the stack. N and I are assumed to be 4-slot vectors, and can
     // be padded with zeros for smaller inputs. Eta is a scalar. The result is a 4-slot vector.
     void refract_floats();

     // Computes inverse(matN) on the stack. Pass 2, 3 or 4 for n to specify matrix size.
     void inverse_matrix(int32_t n);

     // Shrinks the temp stack, discarding values on top.
     void discard_stack(int32_t count, int stackID);

     void discard_stack(int32_t count) {
         this->discard_stack(count, fCurrentStackID);
     }

     // Grows the temp stack, leaving any preexisting values in place.
     void pad_stack(int32_t count);

     // Copies vales from the temp stack into slots, and then shrinks the temp stack.
     void pop_slots(SlotRange dst);

     // Creates many clones of the top single-slot item on the temp stack.
     void push_duplicates(int count);

     // Creates a single clone of an item on the current temp stack. The cloned item can consist of
     // any number of slots, and can be copied from an earlier position on the stack.
     void push_clone(int numSlots, int offsetFromStackTop = 0);

     // Clones a range of slots from another stack onto this stack.
     void push_clone_from_stack(SlotRange range, int otherStackID, int offsetFromStackTop);

     // Translates into copy_from_indirect_unmasked (from one temp stack to another) in Raster
     // Pipeline. `fixedOffset` denotes a range of slots within the top `offsetFromStackTop` slots of
     // `otherStackID`. This range is pushed forward by the value at the top of `dynamicStackID`.
     void push_clone_indirect_from_stack(SlotRange fixedOffset,
                                         int dynamicStackID,
                                         int otherStackID,
                                         int offsetFromStackTop);

     // Compares the stack top with the passed-in value; if it matches, enables the loop mask.
     void case_op(int value) {
         this->appendInstruction(BuilderOp::case_op, {}, value);
     }

     // Performs a `continue` in a loop.
     void continue_op(int continueMaskStackID) {
         this->appendInstruction(BuilderOp::continue_op, {}, continueMaskStackID);
     }

     void select(int slots) {
         // Overlays the top two entries on the stack, making one hybrid entry. The execution mask
         // is used to select which lanes are preserved.
         SkASSERT(slots > 0);
         this->appendInstruction(BuilderOp::select, {}, slots);
     }

     // The opposite of push_slots; copies values from the temp stack into value slots, then
     // shrinks the temp stack.
     void pop_slots_unmasked(SlotRange dst);

     void copy_slots_masked(SlotRange dst, SlotRange src) {
         SkASSERT(dst.count == src.count);
         this->appendInstruction(BuilderOp::copy_slot_masked, {dst.index, src.index}, dst.count);
     }

     void copy_slots_unmasked(SlotRange dst, SlotRange src);

     void copy_immutable_unmasked(SlotRange dst, SlotRange src);

     // Directly writes a constant value into a slot.
     void copy_constant(Slot slot, int constantValue);

     // Stores zeros across the entire slot range.
     void zero_slots_unmasked(SlotRange dst);

     // Consumes `consumedSlots` elements on the stack, then generates `components.size()` elements.
     void swizzle(int consumedSlots, SkSpan<const int8_t> components);

     // Transposes a matrix of size CxR on the stack (into a matrix of size RxC).
     void transpose(int columns, int rows);

     // Generates a CxR diagonal matrix from the top two scalars on the stack. The second scalar is
     // used as the diagonal value; the first scalar (usually zero) fills in the rest of the slots.
     void diagonal_matrix(int columns, int rows);

     // Resizes a CxR matrix at the top of the stack to C'xR'.
     void matrix_resize(int origColumns, int origRows, int newColumns, int newRows);

     // Multiplies a CxR matrix/vector against an adjacent CxR matrix/vector on the stack.
     void matrix_multiply(int leftColumns, int leftRows, int rightColumns, int rightRows);

     void push_condition_mask();

     void pop_condition_mask() {
         SkASSERT(this->executionMaskWritesAreEnabled());
         this->appendInstruction(BuilderOp::pop_condition_mask, {});
     }

     void merge_condition_mask();

     void merge_inv_condition_mask() {
         SkASSERT(this->executionMaskWritesAreEnabled());
         this->appendInstruction(BuilderOp::merge_inv_condition_mask, {});
     }

     void push_loop_mask() {
         SkASSERT(this->executionMaskWritesAreEnabled());
         this->appendInstruction(BuilderOp::push_loop_mask, {});
     }

     void pop_loop_mask() {
         SkASSERT(this->executionMaskWritesAreEnabled());
         this->appendInstruction(BuilderOp::pop_loop_mask, {});
     }

     // Exchanges src.rgba with the four values at the top of the stack.
     void exchange_src();

     void push_src_rgba() {
         this->appendInstruction(BuilderOp::push_src_rgba, {});
     }

     void push_dst_rgba() {
         this->appendInstruction(BuilderOp::push_dst_rgba, {});
     }

     void push_device_xy01() {
         this->appendInstruction(BuilderOp::push_device_xy01, {});
     }

     void pop_src_rgba();

     void pop_dst_rgba() {
         this->appendInstruction(BuilderOp::pop_dst_rgba, {});
     }

     void mask_off_loop_mask() {
         SkASSERT(this->executionMaskWritesAreEnabled());
         this->appendInstruction(BuilderOp::mask_off_loop_mask, {});
     }

     void reenable_loop_mask(SlotRange src) {
         SkASSERT(this->executionMaskWritesAreEnabled());
         SkASSERT(src.count == 1);
         this->appendInstruction(BuilderOp::reenable_loop_mask, {src.index});
     }

     void pop_and_reenable_loop_mask() {
         SkASSERT(this->executionMaskWritesAreEnabled());
         this->appendInstruction(BuilderOp::pop_and_reenable_loop_mask, {});
     }

     void merge_loop_mask() {
         SkASSERT(this->executionMaskWritesAreEnabled());
         this->appendInstruction(BuilderOp::merge_loop_mask, {});
     }

     void push_return_mask() {
         SkASSERT(this->executionMaskWritesAreEnabled());
         this->appendInstruction(BuilderOp::push_return_mask, {});
     }

     void pop_return_mask();

     void mask_off_return_mask() {
         SkASSERT(this->executionMaskWritesAreEnabled());
         this->appendInstruction(BuilderOp::mask_off_return_mask, {});
     }

     void invoke_shader(int childIdx) {
         this->appendInstruction(BuilderOp::invoke_shader, {}, childIdx);
     }

     void invoke_color_filter(int childIdx) {
         this->appendInstruction(BuilderOp::invoke_color_filter, {}, childIdx);
     }

     void invoke_blender(int childIdx) {
         this->appendInstruction(BuilderOp::invoke_blender, {}, childIdx);
     }

     void invoke_to_linear_srgb() {
         // The intrinsics accept a three-component value; add a fourth padding element (which
         // will be ignored) since our RP ops deal in RGBA colors.
         this->pad_stack(1);
         this->appendInstruction(BuilderOp::invoke_to_linear_srgb, {});
         this->discard_stack(1);
     }

     void invoke_from_linear_srgb() {
         // The intrinsics accept a three-component value; add a fourth padding element (which
         // will be ignored) since our RP ops deal in RGBA colors.
         this->pad_stack(1);
         this->appendInstruction(BuilderOp::invoke_from_linear_srgb, {});
         this->discard_stack(1);
     }

     // Writes the current line number to the debug trace.
     void trace_line(int traceMaskStackID, int line) {
         this->appendInstruction(BuilderOp::trace_line, {}, traceMaskStackID, line);
     }

     // Writes a variable update to the debug trace.
     void trace_var(int traceMaskStackID, SlotRange r) {
         this->appendInstruction(BuilderOp::trace_var, {r.index}, traceMaskStackID, r.count);
     }

     // Writes a variable update (via indirection) to the debug trace.
     void trace_var_indirect(int traceMaskStackID, SlotRange fixedRange,
                             int dynamicStackID, SlotRange limitRange);

     // Writes a function-entrance to the debug trace.
     void trace_enter(int traceMaskStackID, int funcID) {
         this->appendInstruction(BuilderOp::trace_enter, {}, traceMaskStackID, funcID);
     }

     // Writes a function-exit to the debug trace.
     void trace_exit(int traceMaskStackID, int funcID) {
         this->appendInstruction(BuilderOp::trace_exit, {}, traceMaskStackID, funcID);
     }

     // Writes a scope-level change to the debug trace.
     void trace_scope(int traceMaskStackID, int delta) {
         this->appendInstruction(BuilderOp::trace_scope, {}, traceMaskStackID, delta);
     }

 private:
     struct SlotList {
         SlotList(Slot a = NA, Slot b = NA) : fSlotA(a), fSlotB(b) {}
         Slot fSlotA = NA;
         Slot fSlotB = NA;
     };
     void appendInstruction(BuilderOp op, SlotList slots,
                            int a = 0, int b = 0, int c = 0, int d = 0);
     Instruction* lastInstruction(int fromBack = 0);
     Instruction* lastInstructionOnAnyStack(int fromBack = 0);
     void simplifyPopSlotsUnmasked(SlotRange* dst);
     bool simplifyImmediateUnmaskedOp();

     skia_private::TArray<Instruction> fInstructions;
     int fNumLabels = 0;
     int fExecutionMaskWritesEnabled = 0;
     int fCurrentStackID = 0;
 };

 }  // namespace RP
 }  // namespace SkSL

 #endif  // SKSL_RASTERPIPELINEBUILDER