| /* |
| * Copyright 2022 Google Inc. |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| #include "include/core/SkStream.h" |
| #include "include/private/SkSLString.h" |
| #include "include/private/base/SkMalloc.h" |
| #include "include/private/base/SkTo.h" |
| #include "src/core/SkArenaAlloc.h" |
| #include "src/core/SkOpts.h" |
| #include "src/sksl/codegen/SkSLRasterPipelineBuilder.h" |
| #include "src/sksl/tracing/SkRPDebugTrace.h" |
| #include "src/sksl/tracing/SkSLDebugInfo.h" |
| |
| #include <algorithm> |
| #include <cmath> |
| #include <cstring> |
| #include <iterator> |
| #include <string> |
| #include <tuple> |
| #include <utility> |
| #include <vector> |
| |
| namespace SkSL { |
| namespace RP { |
| |
| using SkRP = SkRasterPipeline; |
| |
| #define ALL_MULTI_SLOT_UNARY_OP_CASES \ |
| BuilderOp::abs_float: \ |
| case BuilderOp::abs_int: \ |
| case BuilderOp::bitwise_not_int: \ |
| case BuilderOp::cast_to_float_from_int: \ |
| case BuilderOp::cast_to_float_from_uint: \ |
| case BuilderOp::cast_to_int_from_float: \ |
| case BuilderOp::cast_to_uint_from_float: \ |
| case BuilderOp::ceil_float: \ |
| case BuilderOp::floor_float \ |
| |
| #define ALL_MULTI_SLOT_BINARY_OP_CASES \ |
| BuilderOp::add_n_floats: \ |
| case BuilderOp::add_n_ints: \ |
| case BuilderOp::sub_n_floats: \ |
| case BuilderOp::sub_n_ints: \ |
| case BuilderOp::mul_n_floats: \ |
| case BuilderOp::mul_n_ints: \ |
| case BuilderOp::div_n_floats: \ |
| case BuilderOp::div_n_ints: \ |
| case BuilderOp::div_n_uints: \ |
| case BuilderOp::bitwise_and_n_ints: \ |
| case BuilderOp::bitwise_or_n_ints: \ |
| case BuilderOp::bitwise_xor_n_ints: \ |
| case BuilderOp::min_n_floats: \ |
| case BuilderOp::min_n_ints: \ |
| case BuilderOp::min_n_uints: \ |
| case BuilderOp::max_n_floats: \ |
| case BuilderOp::max_n_ints: \ |
| case BuilderOp::max_n_uints: \ |
| case BuilderOp::cmple_n_floats: \ |
| case BuilderOp::cmple_n_ints: \ |
| case BuilderOp::cmple_n_uints: \ |
| case BuilderOp::cmplt_n_floats: \ |
| case BuilderOp::cmplt_n_ints: \ |
| case BuilderOp::cmplt_n_uints: \ |
| case BuilderOp::cmpeq_n_floats: \ |
| case BuilderOp::cmpeq_n_ints: \ |
| case BuilderOp::cmpne_n_floats: \ |
| case BuilderOp::cmpne_n_ints |
| |
| #define ALL_MULTI_SLOT_TERNARY_OP_CASES \ |
| BuilderOp::mix_n_floats |
| |
| void Builder::unary_op(BuilderOp op, int32_t slots) { |
| switch (op) { |
| case ALL_MULTI_SLOT_UNARY_OP_CASES: |
| fInstructions.push_back({op, {}, slots}); |
| break; |
| |
| default: |
| SkDEBUGFAIL("not a unary op"); |
| break; |
| } |
| } |
| |
| void Builder::binary_op(BuilderOp op, int32_t slots) { |
| switch (op) { |
| case ALL_MULTI_SLOT_BINARY_OP_CASES: |
| fInstructions.push_back({op, {}, slots}); |
| break; |
| |
| default: |
| SkDEBUGFAIL("not a binary op"); |
| break; |
| } |
| } |
| |
| void Builder::ternary_op(BuilderOp op, int32_t slots) { |
| switch (op) { |
| case ALL_MULTI_SLOT_TERNARY_OP_CASES: |
| fInstructions.push_back({op, {}, slots}); |
| break; |
| |
| default: |
| SkDEBUGFAIL("not a ternary op"); |
| break; |
| } |
| } |
| |
| void Builder::push_duplicates(int count) { |
| SkASSERT(count >= 0); |
| if (count >= 3) { |
| // Use a swizzle to splat the input into a 4-slot value. |
| this->swizzle(/*inputSlots=*/1, {0, 0, 0, 0}); |
| count -= 3; |
| } |
| for (; count >= 4; count -= 4) { |
| // Clone the splatted value four slots at a time. |
| this->push_clone(/*numSlots=*/4); |
| } |
| // Use a swizzle or clone to handle the trailing items. |
| switch (count) { |
| case 3: this->swizzle(/*inputSlots=*/1, {0, 0, 0, 0}); break; |
| case 2: this->swizzle(/*inputSlots=*/1, {0, 0, 0}); break; |
| case 1: this->push_clone(/*numSlots=*/1); break; |
| default: break; |
| } |
| } |
| |
| void Builder::swizzle(int inputSlots, SkSpan<const int8_t> components) { |
| // Consumes `inputSlots` elements on the stack, then generates `components.size()` elements. |
| SkASSERT(components.size() >= 1 && components.size() <= 4); |
| // Squash .xwww into 0x3330, or .zyx into 0x012. (Packed nybbles, in reverse order.) |
| int componentBits = 0; |
| for (auto iter = components.rbegin(); iter != components.rend(); ++iter) { |
| SkASSERT(*iter >= 0 && *iter < inputSlots); |
| componentBits <<= 4; |
| componentBits |= *iter; |
| } |
| |
| int op = (int)BuilderOp::swizzle_1 + components.size() - 1; |
| fInstructions.push_back({(BuilderOp)op, {}, inputSlots, componentBits}); |
| } |
| |
| std::unique_ptr<Program> Builder::finish(int numValueSlots, |
| int numUniformSlots, |
| SkRPDebugTrace* debugTrace) { |
| return std::make_unique<Program>(std::move(fInstructions), numValueSlots, numUniformSlots, |
| fNumLabels, fNumBranches, debugTrace); |
| } |
| |
| void Program::optimize() { |
| // TODO(johnstiles): perform any last-minute cleanup of the instruction stream here |
| } |
| |
| static int stack_usage(const Instruction& inst) { |
| switch (inst.fOp) { |
| case BuilderOp::push_literal_f: |
| case BuilderOp::push_condition_mask: |
| case BuilderOp::push_loop_mask: |
| case BuilderOp::push_return_mask: |
| return 1; |
| |
| case BuilderOp::push_slots: |
| case BuilderOp::push_uniform: |
| case BuilderOp::push_zeros: |
| case BuilderOp::push_clone: |
| case BuilderOp::push_clone_from_stack: |
| return inst.fImmA; |
| |
| case BuilderOp::pop_condition_mask: |
| case BuilderOp::pop_loop_mask: |
| case BuilderOp::pop_return_mask: |
| return -1; |
| |
| case ALL_MULTI_SLOT_BINARY_OP_CASES: |
| case BuilderOp::discard_stack: |
| case BuilderOp::select: |
| return -inst.fImmA; |
| |
| case ALL_MULTI_SLOT_TERNARY_OP_CASES: |
| return 2 * -inst.fImmA; |
| |
| case BuilderOp::swizzle_1: |
| return 1 - inst.fImmA; |
| case BuilderOp::swizzle_2: |
| return 2 - inst.fImmA; |
| case BuilderOp::swizzle_3: |
| return 3 - inst.fImmA; |
| case BuilderOp::swizzle_4: |
| return 4 - inst.fImmA; |
| |
| case ALL_MULTI_SLOT_UNARY_OP_CASES: |
| default: |
| return 0; |
| } |
| } |
| |
| Program::StackDepthMap Program::tempStackMaxDepths() { |
| StackDepthMap largest; |
| StackDepthMap current; |
| |
| int curIdx = 0; |
| for (const Instruction& inst : fInstructions) { |
| if (inst.fOp == BuilderOp::set_current_stack) { |
| curIdx = inst.fImmA; |
| } |
| current[curIdx] += stack_usage(inst); |
| largest[curIdx] = std::max(current[curIdx], largest[curIdx]); |
| SkASSERTF(current[curIdx] >= 0, "unbalanced temp stack push/pop on stack %d", curIdx); |
| } |
| |
| for (const auto& [stackIdx, depth] : current) { |
| (void)stackIdx; |
| SkASSERTF(depth == 0, "unbalanced temp stack push/pop"); |
| } |
| |
| return largest; |
| } |
| |
| Program::Program(SkTArray<Instruction> instrs, |
| int numValueSlots, |
| int numUniformSlots, |
| int numLabels, |
| int numBranches, |
| SkRPDebugTrace* debugTrace) |
| : fInstructions(std::move(instrs)) |
| , fNumValueSlots(numValueSlots) |
| , fNumUniformSlots(numUniformSlots) |
| , fNumLabels(numLabels) |
| , fNumBranches(numBranches) |
| , fDebugTrace(debugTrace) { |
| this->optimize(); |
| |
| fTempStackMaxDepths = this->tempStackMaxDepths(); |
| |
| fNumTempStackSlots = 0; |
| for (const auto& [stackIdx, depth] : fTempStackMaxDepths) { |
| (void)stackIdx; |
| fNumTempStackSlots += depth; |
| } |
| |
| // These are not used in SKSL_STANDALONE yet. |
| (void)fDebugTrace; |
| (void)fNumUniformSlots; |
| } |
| |
| void Program::append(SkRasterPipeline* pipeline, SkRasterPipeline::Stage stage, void* ctx) { |
| #if !defined(SKSL_STANDALONE) |
| pipeline->append(stage, ctx); |
| #endif |
| } |
| |
| void Program::rewindPipeline(SkRasterPipeline* pipeline) { |
| #if !defined(SKSL_STANDALONE) |
| #if !SK_HAS_MUSTTAIL |
| pipeline->append_stack_rewind(); |
| #endif |
| #endif |
| } |
| |
| int Program::getNumPipelineStages(SkRasterPipeline* pipeline) { |
| #if !defined(SKSL_STANDALONE) |
| return pipeline->getNumStages(); |
| #else |
| return 0; |
| #endif |
| } |
| |
| void Program::appendCopy(SkRasterPipeline* pipeline, |
| SkArenaAlloc* alloc, |
| SkRasterPipeline::Stage baseStage, |
| float* dst, int dstStride, |
| const float* src, int srcStride, |
| int numSlots) { |
| SkASSERT(numSlots >= 0); |
| while (numSlots > 4) { |
| this->appendCopy(pipeline, alloc, baseStage, dst, dstStride, src, srcStride,/*numSlots=*/4); |
| dst += 4 * dstStride; |
| src += 4 * srcStride; |
| numSlots -= 4; |
| } |
| |
| if (numSlots > 0) { |
| SkASSERT(numSlots <= 4); |
| auto stage = (SkRasterPipeline::Stage)((int)baseStage + numSlots - 1); |
| auto* ctx = alloc->make<SkRasterPipeline_BinaryOpCtx>(); |
| ctx->dst = dst; |
| ctx->src = src; |
| this->append(pipeline, stage, ctx); |
| } |
| } |
| |
| void Program::appendCopySlotsUnmasked(SkRasterPipeline* pipeline, |
| SkArenaAlloc* alloc, |
| float* dst, |
| const float* src, |
| int numSlots) { |
| this->appendCopy(pipeline, alloc, |
| SkRasterPipeline::copy_slot_unmasked, |
| dst, /*dstStride=*/SkOpts::raster_pipeline_highp_stride, |
| src, /*srcStride=*/SkOpts::raster_pipeline_highp_stride, |
| numSlots); |
| } |
| |
| void Program::appendCopySlotsMasked(SkRasterPipeline* pipeline, |
| SkArenaAlloc* alloc, |
| float* dst, |
| const float* src, |
| int numSlots) { |
| this->appendCopy(pipeline, alloc, |
| SkRasterPipeline::copy_slot_masked, |
| dst, /*dstStride=*/SkOpts::raster_pipeline_highp_stride, |
| src, /*srcStride=*/SkOpts::raster_pipeline_highp_stride, |
| numSlots); |
| } |
| |
| void Program::appendCopyConstants(SkRasterPipeline* pipeline, |
| SkArenaAlloc* alloc, |
| float* dst, |
| const float* src, |
| int numSlots) { |
| this->appendCopy(pipeline, alloc, |
| SkRasterPipeline::copy_constant, |
| dst, /*dstStride=*/SkOpts::raster_pipeline_highp_stride, |
| src, /*srcStride=*/1, |
| numSlots); |
| } |
| |
| void Program::appendMultiSlotUnaryOp(SkRasterPipeline* pipeline, SkRasterPipeline::Stage baseStage, |
| float* dst, int numSlots) { |
| SkASSERT(numSlots >= 0); |
| while (numSlots > 4) { |
| this->appendMultiSlotUnaryOp(pipeline, baseStage, dst, /*numSlots=*/4); |
| dst += 4 * SkOpts::raster_pipeline_highp_stride; |
| numSlots -= 4; |
| } |
| |
| SkASSERT(numSlots <= 4); |
| auto stage = (SkRasterPipeline::Stage)((int)baseStage + numSlots - 1); |
| this->append(pipeline, stage, dst); |
| } |
| |
| void Program::appendAdjacentMultiSlotBinaryOp(SkRasterPipeline* pipeline, SkArenaAlloc* alloc, |
| SkRasterPipeline::Stage baseStage, |
| float* dst, const float* src, int numSlots) { |
| // The source and destination must be directly next to one another. |
| SkASSERT(numSlots >= 0); |
| SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots) == src); |
| |
| if (numSlots > 4) { |
| auto ctx = alloc->make<SkRasterPipeline_BinaryOpCtx>(); |
| ctx->dst = dst; |
| ctx->src = src; |
| this->append(pipeline, baseStage, ctx); |
| return; |
| } |
| if (numSlots > 0) { |
| auto specializedStage = (SkRasterPipeline::Stage)((int)baseStage + numSlots); |
| this->append(pipeline, specializedStage, dst); |
| } |
| } |
| |
| void Program::appendAdjacentMultiSlotTernaryOp(SkRasterPipeline* pipeline, SkArenaAlloc* alloc, |
| SkRasterPipeline::Stage baseStage, float* dst, |
| const float* src0, const float* src1, int numSlots) { |
| // The float pointers must all be immediately adjacent to each other. |
| SkASSERT(numSlots >= 0); |
| SkASSERT((dst + SkOpts::raster_pipeline_highp_stride * numSlots) == src0); |
| SkASSERT((src0 + SkOpts::raster_pipeline_highp_stride * numSlots) == src1); |
| |
| if (numSlots > 4) { |
| auto ctx = alloc->make<SkRasterPipeline_TernaryOpCtx>(); |
| ctx->dst = dst; |
| ctx->src0 = src0; |
| ctx->src1 = src1; |
| this->append(pipeline, baseStage, ctx); |
| return; |
| } |
| if (numSlots > 0) { |
| auto specializedStage = (SkRasterPipeline::Stage)((int)baseStage + numSlots); |
| this->append(pipeline, specializedStage, dst); |
| } |
| } |
| |
| template <typename T> |
| [[maybe_unused]] static void* context_bit_pun(T val) { |
| static_assert(sizeof(T) <= sizeof(void*)); |
| void* contextBits = nullptr; |
| memcpy(&contextBits, &val, sizeof(val)); |
| return contextBits; |
| } |
| |
| Program::SlotData Program::allocateSlotData(SkArenaAlloc* alloc) { |
| // Allocate a contiguous slab of slot data for values and stack entries. |
| const int N = SkOpts::raster_pipeline_highp_stride; |
| const int vectorWidth = N * sizeof(float); |
| const int allocSize = vectorWidth * (fNumValueSlots + fNumTempStackSlots); |
| float* slotPtr = static_cast<float*>(alloc->makeBytesAlignedTo(allocSize, vectorWidth)); |
| sk_bzero(slotPtr, allocSize); |
| |
| // Store the temp stack immediately after the values. |
| SlotData s; |
| s.values = SkSpan{slotPtr, N * fNumValueSlots}; |
| s.stack = SkSpan{s.values.end(), N * fNumTempStackSlots}; |
| return s; |
| } |
| |
| void Program::appendStages(SkRasterPipeline* pipeline, |
| SkArenaAlloc* alloc, |
| SkSpan<const float> uniforms) { |
| this->appendStages(pipeline, alloc, uniforms, this->allocateSlotData(alloc)); |
| } |
| |
| void Program::appendStages(SkRasterPipeline* pipeline, |
| SkArenaAlloc* alloc, |
| SkSpan<const float> uniforms, |
| const SlotData& slots) { |
| SkASSERT(fNumUniformSlots == SkToInt(uniforms.size())); |
| |
| const int N = SkOpts::raster_pipeline_highp_stride; |
| StackDepthMap tempStackDepth; |
| int currentStack = 0; |
| int mostRecentRewind = 0; |
| |
| // Allocate buffers for branch targets (used when running the program) and labels (only needed |
| // during initial program construction). |
| int* branchTargets = alloc->makeArrayDefault<int>(fNumBranches); |
| SkTArray<int> labelOffsets; |
| labelOffsets.push_back_n(fNumLabels, -1); |
| SkTArray<int> branchGoesToLabel; |
| branchGoesToLabel.push_back_n(fNumBranches, -1); |
| int currentBranchOp = 0; |
| |
| // Assemble a map holding the current stack-top for each temporary stack. Position each temp |
| // stack immediately after the previous temp stack; temp stacks are never allowed to overlap. |
| int pos = 0; |
| SkTHashMap<int, float*> tempStackMap; |
| for (auto& [idx, depth] : fTempStackMaxDepths) { |
| tempStackMap[idx] = slots.stack.begin() + (pos * N); |
| pos += depth; |
| } |
| |
| // We can reuse constants from our arena by placing them in this map. |
| SkTHashMap<int, int*> constantLookupMap; // <constant value, pointer into arena> |
| |
| // Write each BuilderOp to the pipeline. |
| for (const Instruction& inst : fInstructions) { |
| auto SlotA = [&]() { return &slots.values[N * inst.fSlotA]; }; |
| auto SlotB = [&]() { return &slots.values[N * inst.fSlotB]; }; |
| auto UniformA = [&]() { return &uniforms[inst.fSlotA]; }; |
| float*& tempStackPtr = tempStackMap[currentStack]; |
| |
| switch (inst.fOp) { |
| case BuilderOp::label: |
| // Write the absolute pipeline position into the label offset list. We will go over |
| // the branch targets at the end and fix them up. |
| SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels); |
| labelOffsets[inst.fImmA] = this->getNumPipelineStages(pipeline); |
| break; |
| |
| case BuilderOp::jump: |
| case BuilderOp::branch_if_any_active_lanes: |
| case BuilderOp::branch_if_no_active_lanes: |
| // If we have already encountered the label associated with this branch, this is a |
| // backwards branch. Add a stack-rewind immediately before the branch to ensure that |
| // long-running loops don't use an unbounded amount of stack space. |
| if (labelOffsets[inst.fImmA] >= 0) { |
| this->rewindPipeline(pipeline); |
| mostRecentRewind = this->getNumPipelineStages(pipeline); |
| } |
| |
| // Write the absolute pipeline position into the branch targets, because the |
| // associated label might not have been reached yet. We will go back over the branch |
| // targets at the end and fix them up. |
| SkASSERT(inst.fImmA >= 0 && inst.fImmA < fNumLabels); |
| SkASSERT(currentBranchOp >= 0 && currentBranchOp < fNumBranches); |
| branchTargets[currentBranchOp] = this->getNumPipelineStages(pipeline); |
| branchGoesToLabel[currentBranchOp] = inst.fImmA; |
| this->append(pipeline, (SkRP::Stage)inst.fOp, &branchTargets[currentBranchOp]); |
| ++currentBranchOp; |
| break; |
| |
| case BuilderOp::init_lane_masks: |
| this->append(pipeline, SkRP::init_lane_masks); |
| break; |
| |
| case BuilderOp::store_src_rg: |
| this->append(pipeline, SkRP::store_src_rg, SlotA()); |
| break; |
| |
| case BuilderOp::store_src: |
| this->append(pipeline, SkRP::store_src, SlotA()); |
| break; |
| |
| case BuilderOp::store_dst: |
| this->append(pipeline, SkRP::store_dst, SlotA()); |
| break; |
| |
| case BuilderOp::load_src: |
| this->append(pipeline, SkRP::load_src, SlotA()); |
| break; |
| |
| case BuilderOp::load_dst: |
| this->append(pipeline, SkRP::load_dst, SlotA()); |
| break; |
| |
| case BuilderOp::immediate_f: { |
| this->append(pipeline, SkRP::immediate_f, context_bit_pun(inst.fImmA)); |
| break; |
| } |
| case BuilderOp::load_unmasked: |
| this->append(pipeline, SkRP::load_unmasked, SlotA()); |
| break; |
| |
| case BuilderOp::store_unmasked: |
| this->append(pipeline, SkRP::store_unmasked, SlotA()); |
| break; |
| |
| case BuilderOp::store_masked: |
| this->append(pipeline, SkRP::store_masked, SlotA()); |
| break; |
| |
| case ALL_MULTI_SLOT_UNARY_OP_CASES: { |
| float* dst = tempStackPtr - (inst.fImmA * N); |
| this->appendMultiSlotUnaryOp(pipeline, (SkRP::Stage)inst.fOp, dst, inst.fImmA); |
| break; |
| } |
| case ALL_MULTI_SLOT_BINARY_OP_CASES: { |
| float* src = tempStackPtr - (inst.fImmA * N); |
| float* dst = tempStackPtr - (inst.fImmA * 2 * N); |
| this->appendAdjacentMultiSlotBinaryOp(pipeline, alloc, (SkRP::Stage)inst.fOp, |
| dst, src, inst.fImmA); |
| break; |
| } |
| case ALL_MULTI_SLOT_TERNARY_OP_CASES: { |
| float* src1 = tempStackPtr - (inst.fImmA * N); |
| float* src0 = tempStackPtr - (inst.fImmA * 2 * N); |
| float* dst = tempStackPtr - (inst.fImmA * 3 * N); |
| this->appendAdjacentMultiSlotTernaryOp(pipeline, alloc, (SkRP::Stage)inst.fOp, |
| dst, src0, src1, inst.fImmA); |
| break; |
| } |
| case BuilderOp::select: { |
| float* src = tempStackPtr - (inst.fImmA * N); |
| float* dst = tempStackPtr - (inst.fImmA * 2 * N); |
| this->appendCopySlotsMasked(pipeline, alloc, dst, src, inst.fImmA); |
| break; |
| } |
| case BuilderOp::copy_slot_masked: |
| this->appendCopySlotsMasked(pipeline, alloc, SlotA(), SlotB(), inst.fImmA); |
| break; |
| |
| case BuilderOp::copy_slot_unmasked: |
| this->appendCopySlotsUnmasked(pipeline, alloc, SlotA(), SlotB(), inst.fImmA); |
| break; |
| |
| case BuilderOp::zero_slot_unmasked: |
| this->appendMultiSlotUnaryOp(pipeline, SkRP::zero_slot_unmasked, |
| SlotA(), inst.fImmA); |
| break; |
| |
| case BuilderOp::swizzle_1: |
| case BuilderOp::swizzle_2: |
| case BuilderOp::swizzle_3: |
| case BuilderOp::swizzle_4: { |
| auto* ctx = alloc->make<SkRasterPipeline_SwizzleCtx>(); |
| ctx->ptr = tempStackPtr - (N * inst.fImmA); |
| // Unpack component nybbles into byte-offsets pointing at stack slots. |
| int components = inst.fImmB; |
| for (size_t index = 0; index < std::size(ctx->offsets); ++index) { |
| ctx->offsets[index] = (components & 3) * N * sizeof(float); |
| components >>= 4; |
| } |
| this->append(pipeline, (SkRP::Stage)inst.fOp, ctx); |
| break; |
| } |
| case BuilderOp::transpose: { |
| auto* ctx = alloc->make<SkRasterPipeline_TransposeCtx>(); |
| ctx->ptr = tempStackPtr - (N * inst.fImmA * inst.fImmB); |
| ctx->count = inst.fImmA * inst.fImmB; |
| sk_bzero(ctx->offsets, std::size(ctx->offsets)); |
| size_t index = 0; |
| for (int r = 0; r < inst.fImmB; ++r) { |
| for (int c = 0; c < inst.fImmA; ++c) { |
| ctx->offsets[index++] = ((c * inst.fImmB) + r) * N * sizeof(float); |
| } |
| } |
| this->append(pipeline, SkRP::Stage::transpose, ctx); |
| break; |
| } |
| case BuilderOp::push_slots: { |
| float* dst = tempStackPtr; |
| this->appendCopySlotsUnmasked(pipeline, alloc, dst, SlotA(), inst.fImmA); |
| break; |
| } |
| case BuilderOp::push_uniform: { |
| float* dst = tempStackPtr; |
| this->appendCopyConstants(pipeline, alloc, dst, UniformA(), inst.fImmA); |
| break; |
| } |
| case BuilderOp::push_zeros: { |
| float* dst = tempStackPtr; |
| this->appendMultiSlotUnaryOp(pipeline, SkRP::zero_slot_unmasked, dst, inst.fImmA); |
| break; |
| } |
| case BuilderOp::push_condition_mask: { |
| float* dst = tempStackPtr; |
| this->append(pipeline, SkRP::store_condition_mask, dst); |
| break; |
| } |
| case BuilderOp::pop_condition_mask: { |
| float* src = tempStackPtr - (1 * N); |
| this->append(pipeline, SkRP::load_condition_mask, src); |
| break; |
| } |
| case BuilderOp::merge_condition_mask: { |
| float* ptr = tempStackPtr - (2 * N); |
| this->append(pipeline, SkRP::merge_condition_mask, ptr); |
| break; |
| } |
| case BuilderOp::push_loop_mask: { |
| float* dst = tempStackPtr; |
| this->append(pipeline, SkRP::store_loop_mask, dst); |
| break; |
| } |
| case BuilderOp::pop_loop_mask: { |
| float* src = tempStackPtr - (1 * N); |
| this->append(pipeline, SkRP::load_loop_mask, src); |
| break; |
| } |
| case BuilderOp::mask_off_loop_mask: |
| this->append(pipeline, SkRP::mask_off_loop_mask); |
| break; |
| |
| case BuilderOp::reenable_loop_mask: |
| this->append(pipeline, SkRP::reenable_loop_mask, SlotA()); |
| break; |
| |
| case BuilderOp::merge_loop_mask: { |
| float* src = tempStackPtr - (1 * N); |
| this->append(pipeline, SkRP::merge_loop_mask, src); |
| break; |
| } |
| case BuilderOp::push_return_mask: { |
| float* dst = tempStackPtr; |
| this->append(pipeline, SkRP::store_return_mask, dst); |
| break; |
| } |
| case BuilderOp::pop_return_mask: { |
| float* src = tempStackPtr - (1 * N); |
| this->append(pipeline, SkRP::load_return_mask, src); |
| break; |
| } |
| case BuilderOp::mask_off_return_mask: |
| this->append(pipeline, SkRP::mask_off_return_mask); |
| break; |
| |
| case BuilderOp::push_literal_f: { |
| float* dst = tempStackPtr; |
| if (inst.fImmA == 0) { |
| this->append(pipeline, SkRP::zero_slot_unmasked, dst); |
| break; |
| } |
| int* constantPtr; |
| if (int** lookup = constantLookupMap.find(inst.fImmA)) { |
| constantPtr = *lookup; |
| } else { |
| constantPtr = alloc->make<int>(inst.fImmA); |
| constantLookupMap[inst.fImmA] = constantPtr; |
| } |
| SkASSERT(constantPtr); |
| this->appendCopyConstants(pipeline, alloc, dst, (float*)constantPtr,/*numSlots=*/1); |
| break; |
| } |
| case BuilderOp::copy_stack_to_slots: { |
| float* src = tempStackPtr - (inst.fImmB * N); |
| this->appendCopySlotsMasked(pipeline, alloc, SlotA(), src, inst.fImmA); |
| break; |
| } |
| case BuilderOp::copy_stack_to_slots_unmasked: { |
| float* src = tempStackPtr - (inst.fImmB * N); |
| this->appendCopySlotsUnmasked(pipeline, alloc, SlotA(), src, inst.fImmA); |
| break; |
| } |
| case BuilderOp::push_clone: { |
| float* src = tempStackPtr - (inst.fImmB * N); |
| float* dst = tempStackPtr; |
| this->appendCopySlotsUnmasked(pipeline, alloc, dst, src, inst.fImmA); |
| break; |
| } |
| case BuilderOp::push_clone_from_stack: { |
| float* sourceStackPtr = tempStackMap[inst.fImmB]; |
| float* src = sourceStackPtr - (inst.fImmC * N); |
| float* dst = tempStackPtr; |
| this->appendCopySlotsUnmasked(pipeline, alloc, dst, src, inst.fImmA); |
| break; |
| } |
| case BuilderOp::discard_stack: |
| break; |
| |
| case BuilderOp::set_current_stack: |
| currentStack = inst.fImmA; |
| break; |
| |
| default: |
| SkDEBUGFAILF("Raster Pipeline: unsupported instruction %d", (int)inst.fOp); |
| break; |
| } |
| |
| tempStackPtr += stack_usage(inst) * N; |
| SkASSERT(tempStackPtr >= slots.stack.begin()); |
| SkASSERT(tempStackPtr <= slots.stack.end()); |
| |
| // Periodically rewind the stack every 500 instructions. When SK_HAS_MUSTTAIL is set, |
| // rewinds are not actually used; the rewindPipeline call becomes a no-op. On platforms that |
| // don't support SK_HAS_MUSTTAIL, rewinding the stack periodically can prevent a potential |
| // stack overflow when running a long program. |
| int numPipelineStages = this->getNumPipelineStages(pipeline); |
| if (numPipelineStages - mostRecentRewind > 500) { |
| this->rewindPipeline(pipeline); |
| mostRecentRewind = numPipelineStages; |
| } |
| } |
| |
| // Fix up every branch target. |
| for (int index = 0; index < fNumBranches; ++index) { |
| int branchFromIdx = branchTargets[index]; |
| int branchToIdx = labelOffsets[branchGoesToLabel[index]]; |
| branchTargets[index] = branchToIdx - branchFromIdx; |
| } |
| } |
| |
| void Program::dump(SkWStream* out) { |
| // TODO: skslc will want to dump these programs; we'll need to include some portion of |
| // SkRasterPipeline into skslc for this to work properly. |
| |
| #if !defined(SKSL_STANDALONE) |
| // Allocate memory for the slot and uniform data, even though the program won't ever be |
| // executed. The program requires pointer ranges for managing its data, and ASAN will report |
| // errors if those pointers are pointing at unallocated memory. |
| SkArenaAlloc alloc(/*firstHeapAllocation=*/1000); |
| const int N = SkOpts::raster_pipeline_highp_stride; |
| SlotData slots = this->allocateSlotData(&alloc); |
| float* uniformPtr = alloc.makeArray<float>(fNumUniformSlots); |
| SkSpan<float> uniforms = SkSpan(uniformPtr, fNumUniformSlots); |
| |
| // Instantiate this program. |
| SkRasterPipeline pipeline(&alloc); |
| this->appendStages(&pipeline, &alloc, uniforms, slots); |
| const SkRP::StageList* st = pipeline.getStageList(); |
| |
| // The stage list is in reverse order, so let's flip it. |
| struct Stage { |
| SkRP::Stage op; |
| void* ctx; |
| }; |
| SkTArray<Stage> stages; |
| for (; st != nullptr; st = st->prev) { |
| stages.push_back(Stage{st->stage, st->ctx}); |
| } |
| std::reverse(stages.begin(), stages.end()); |
| |
| // Emit the program's instruction list. |
| for (int index = 0; index < stages.size(); ++index) { |
| const Stage& stage = stages[index]; |
| |
| // Interpret the context value as a branch offset. |
| auto BranchOffset = [&](const void* ctx) -> std::string { |
| const int *ctxAsInt = static_cast<const int*>(ctx); |
| return SkSL::String::printf("%+d (#%d)", *ctxAsInt, *ctxAsInt + index + 1); |
| }; |
| |
| // Print a 32-bit immediate value of unknown type (int/float). |
| auto Imm = [&](float immFloat) -> std::string { |
| // Start with `0x3F800000` as a baseline. |
| uint32_t immUnsigned; |
| memcpy(&immUnsigned, &immFloat, sizeof(uint32_t)); |
| auto text = SkSL::String::printf("0x%08X", immUnsigned); |
| |
| // Extend it to `0x3F800000 (1.0)` for finite floating point values. |
| if (std::isfinite(immFloat)) { |
| text += " ("; |
| text += skstd::to_string(immFloat); |
| text += ")"; |
| } |
| return text; |
| }; |
| |
| // Interpret the context pointer as a 32-bit immediate value of unknown type (int/float). |
| auto ImmCtx = [&](const void* ctx) -> std::string { |
| float f; |
| memcpy(&f, &ctx, sizeof(float)); |
| return Imm(f); |
| }; |
| |
| // Print `1` for single slots and `1..3` for ranges of slots. |
| auto AsRange = [](int first, int count) -> std::string { |
| std::string text = std::to_string(first); |
| if (count > 1) { |
| text += ".." + std::to_string(first + count - 1); |
| } |
| return text; |
| }; |
| |
| // Attempts to interpret the passed-in pointer as a uniform range. |
| auto UniformPtrCtx = [&](const float* ptr, int numSlots) -> std::string { |
| if (fDebugTrace) { |
| // Handle pointers to named uniform slots. |
| if (ptr >= uniforms.begin() && ptr < uniforms.end()) { |
| int slotIdx = ptr - uniforms.begin(); |
| if (slotIdx < (int)fDebugTrace->fUniformInfo.size()) { |
| const SlotDebugInfo& slotInfo = fDebugTrace->fUniformInfo[slotIdx]; |
| if (!slotInfo.name.empty()) { |
| // If we're covering the entire uniform, return `uniName`. |
| if (numSlots == slotInfo.columns * slotInfo.rows) { |
| return slotInfo.name; |
| } |
| // If we are only covering part of the uniform, return `uniName(1..2)`. |
| return slotInfo.name + "(" + |
| AsRange(slotInfo.componentIndex, numSlots) + ")"; |
| } |
| } |
| } |
| } |
| // Handle pointers to uniforms (when no debug info exists). |
| if (ptr >= uniforms.begin() && ptr < uniforms.end()) { |
| int uniformIdx = ptr - uniforms.begin(); |
| return "u" + AsRange(uniformIdx, numSlots); |
| } |
| return {}; |
| }; |
| |
| // Attempts to interpret the passed-in pointer as a value slot range. |
| auto ValuePtrCtx = [&](const float* ptr, int numSlots) -> std::string { |
| if (fDebugTrace) { |
| // Handle pointers to named value slots. |
| if (ptr >= slots.values.begin() && ptr < slots.values.end()) { |
| int slotIdx = ptr - slots.values.begin(); |
| SkASSERT((slotIdx % N) == 0); |
| slotIdx /= N; |
| if (slotIdx < (int)fDebugTrace->fSlotInfo.size()) { |
| const SlotDebugInfo& slotInfo = fDebugTrace->fSlotInfo[slotIdx]; |
| if (!slotInfo.name.empty()) { |
| // If we're covering the entire slot, return `valueName`. |
| if (numSlots == slotInfo.columns * slotInfo.rows) { |
| return slotInfo.name; |
| } |
| // If we are only covering part of the slot, return `valueName(1..2)`. |
| return slotInfo.name + "(" + |
| AsRange(slotInfo.componentIndex, numSlots) + ")"; |
| } |
| } |
| } |
| } |
| // Handle pointers to value slots (when no debug info exists). |
| if (ptr >= slots.values.begin() && ptr < slots.values.end()) { |
| int valueIdx = ptr - slots.values.begin(); |
| SkASSERT((valueIdx % N) == 0); |
| return "v" + AsRange(valueIdx / N, numSlots); |
| } |
| return {}; |
| }; |
| |
| // Interpret the context value as a pointer to `count` immediate values. |
| auto MultiImmCtx = [&](const float* ptr, int count) -> std::string { |
| // If this is a uniform, print it by name. |
| if (std::string text = UniformPtrCtx(ptr, count); !text.empty()) { |
| return text; |
| } |
| // Emit a single unbracketed immediate. |
| if (count == 1) { |
| return Imm(*ptr); |
| } |
| // Emit a list like `[0x00000000 (0.0), 0x3F80000 (1.0)]`. |
| std::string text = "["; |
| auto separator = SkSL::String::Separator(); |
| while (count--) { |
| text += separator(); |
| text += Imm(*ptr++); |
| } |
| return text + "]"; |
| }; |
| |
| // Interpret the context value as a generic pointer. |
| auto PtrCtx = [&](const void* ctx, int numSlots) -> std::string { |
| const float *ctxAsSlot = static_cast<const float*>(ctx); |
| // Check for uniform and value pointers. |
| if (std::string uniform = UniformPtrCtx(ctxAsSlot, numSlots); !uniform.empty()) { |
| return uniform; |
| } |
| if (std::string value = ValuePtrCtx(ctxAsSlot, numSlots); !value.empty()) { |
| return value; |
| } |
| // Handle pointers to temporary stack slots. |
| if (ctxAsSlot >= slots.stack.begin() && ctxAsSlot < slots.stack.end()) { |
| int stackIdx = ctxAsSlot - slots.stack.begin(); |
| SkASSERT((stackIdx % N) == 0); |
| return "$" + AsRange(stackIdx / N, numSlots); |
| } |
| // This pointer is out of our expected bounds; this generally isn't expected to happen. |
| return "ExternalPtr(" + AsRange(0, numSlots) + ")"; |
| }; |
| |
| // Interpret the context value as a pointer to two adjacent values. |
| auto AdjacentPtrCtx = [&](const void* ctx, |
| int numSlots) -> std::tuple<std::string, std::string> { |
| const float *ctxAsSlot = static_cast<const float*>(ctx); |
| return std::make_tuple(PtrCtx(ctxAsSlot, numSlots), |
| PtrCtx(ctxAsSlot + (N * numSlots), numSlots)); |
| }; |
| |
| // Interpret the context value as a pointer to three adjacent values. |
| auto Adjacent3PtrCtx = [&](const void* ctx, int numSlots) -> |
| std::tuple<std::string, std::string, std::string> { |
| const float *ctxAsSlot = static_cast<const float*>(ctx); |
| return std::make_tuple(PtrCtx(ctxAsSlot, numSlots), |
| PtrCtx(ctxAsSlot + (N * numSlots), numSlots), |
| PtrCtx(ctxAsSlot + (2 * N * numSlots), numSlots)); |
| }; |
| |
| // Interpret the context value as a BinaryOp structure for copy_n_slots (numSlots is |
| // dictated by the op itself). |
| auto BinaryOpCtx = [&](const void* v, |
| int numSlots) -> std::tuple<std::string, std::string> { |
| const auto *ctx = static_cast<const SkRasterPipeline_BinaryOpCtx*>(v); |
| return std::make_tuple(PtrCtx(ctx->dst, numSlots), |
| PtrCtx(ctx->src, numSlots)); |
| }; |
| |
| // Interpret the context value as a BinaryOp structure for copy_n_constants (numSlots is |
| // dictated by the op itself). |
| auto CopyConstantCtx = [&](const void* v, |
| int numSlots) -> std::tuple<std::string, std::string> { |
| const auto *ctx = static_cast<const SkRasterPipeline_BinaryOpCtx*>(v); |
| return std::make_tuple(PtrCtx(ctx->dst, numSlots), |
| MultiImmCtx(ctx->src, numSlots)); |
| }; |
| |
| // Interpret the context value as a BinaryOp structure (numSlots is inferred from the |
| // distance between pointers). |
| auto AdjacentBinaryOpCtx = [&](const void* v) -> std::tuple<std::string, std::string> { |
| const auto *ctx = static_cast<const SkRasterPipeline_BinaryOpCtx*>(v); |
| int numSlots = (ctx->src - ctx->dst) / N; |
| return AdjacentPtrCtx(ctx->dst, numSlots); |
| }; |
| |
| // Interpret the context value as a TernaryOp structure (numSlots is inferred from the |
| // distance between pointers). |
| auto AdjacentTernaryOpCtx = [&](const void* v) -> |
| std::tuple<std::string, std::string, std::string> { |
| const auto* ctx = static_cast<const SkRasterPipeline_TernaryOpCtx*>(v); |
| int numSlots = (ctx->src0 - ctx->dst) / N; |
| return Adjacent3PtrCtx(ctx->dst, numSlots); |
| }; |
| |
| // Interpret the context value as a Swizzle structure. Note that the slot-width of the |
| // source expression is not preserved in the instruction encoding, so we need to do our best |
| // using the data we have. (e.g., myFloat4.y would be indistinguishable from myFloat2.y.) |
| auto SwizzleCtx = [&](SkRP::Stage op, |
| const void* v) -> std::tuple<std::string, std::string> { |
| const auto* ctx = static_cast<const SkRasterPipeline_SwizzleCtx*>(v); |
| |
| int destSlots = (int)op - (int)SkRP::swizzle_1 + 1; |
| int highestComponent = |
| *std::max_element(std::begin(ctx->offsets), std::end(ctx->offsets)) / |
| (N * sizeof(float)); |
| |
| std::string src = "(" + PtrCtx(ctx->ptr, std::max(destSlots, highestComponent + 1)) + |
| ")."; |
| for (int index = 0; index < destSlots; ++index) { |
| if (ctx->offsets[index] == (0 * N * sizeof(float))) { |
| src.push_back('x'); |
| } else if (ctx->offsets[index] == (1 * N * sizeof(float))) { |
| src.push_back('y'); |
| } else if (ctx->offsets[index] == (2 * N * sizeof(float))) { |
| src.push_back('z'); |
| } else if (ctx->offsets[index] == (3 * N * sizeof(float))) { |
| src.push_back('w'); |
| } else { |
| src.push_back('?'); |
| } |
| } |
| |
| return std::make_tuple(PtrCtx(ctx->ptr, destSlots), src); |
| }; |
| |
| // Interpret the context value as a Transpose structure. |
| auto TransposeCtx = [&](SkRP::Stage op, |
| const void* v) -> std::tuple<std::string, std::string> { |
| const auto* ctx = static_cast<const SkRasterPipeline_TransposeCtx*>(v); |
| |
| std::string dst = PtrCtx(ctx->ptr, ctx->count); |
| std::string src = "(" + dst + ")["; |
| for (int index = 0; index < ctx->count; ++index) { |
| if (ctx->offsets[index] % (N * sizeof(float))) { |
| src.push_back('?'); |
| } else { |
| src += std::to_string(ctx->offsets[index] / (N * sizeof(float))); |
| } |
| src.push_back(' '); |
| } |
| src.back() = ']'; |
| return std::make_tuple(dst, src); |
| }; |
| |
| std::string opArg1, opArg2, opArg3; |
| switch (stage.op) { |
| case SkRP::immediate_f: |
| opArg1 = ImmCtx(stage.ctx); |
| break; |
| |
| case SkRP::swizzle_1: |
| case SkRP::swizzle_2: |
| case SkRP::swizzle_3: |
| case SkRP::swizzle_4: |
| std::tie(opArg1, opArg2) = SwizzleCtx(stage.op, stage.ctx); |
| break; |
| |
| case SkRP::transpose: |
| std::tie(opArg1, opArg2) = TransposeCtx(stage.op, stage.ctx); |
| break; |
| |
| case SkRP::load_unmasked: |
| case SkRP::load_condition_mask: |
| case SkRP::store_condition_mask: |
| case SkRP::load_loop_mask: |
| case SkRP::store_loop_mask: |
| case SkRP::merge_loop_mask: |
| case SkRP::reenable_loop_mask: |
| case SkRP::load_return_mask: |
| case SkRP::store_return_mask: |
| case SkRP::store_masked: |
| case SkRP::store_unmasked: |
| case SkRP::zero_slot_unmasked: |
| case SkRP::bitwise_not_int: |
| case SkRP::cast_to_float_from_int: case SkRP::cast_to_float_from_uint: |
| case SkRP::cast_to_int_from_float: case SkRP::cast_to_uint_from_float: |
| case SkRP::abs_float: case SkRP::abs_int: |
| case SkRP::ceil_float: |
| case SkRP::floor_float: |
| opArg1 = PtrCtx(stage.ctx, 1); |
| break; |
| |
| case SkRP::store_src_rg: |
| case SkRP::zero_2_slots_unmasked: |
| case SkRP::bitwise_not_2_ints: |
| case SkRP::cast_to_float_from_2_ints: case SkRP::cast_to_float_from_2_uints: |
| case SkRP::cast_to_int_from_2_floats: case SkRP::cast_to_uint_from_2_floats: |
| case SkRP::abs_2_floats: case SkRP::abs_2_ints: |
| case SkRP::ceil_2_floats: |
| case SkRP::floor_2_floats: |
| opArg1 = PtrCtx(stage.ctx, 2); |
| break; |
| |
| case SkRP::zero_3_slots_unmasked: |
| case SkRP::bitwise_not_3_ints: |
| case SkRP::cast_to_float_from_3_ints: case SkRP::cast_to_float_from_3_uints: |
| case SkRP::cast_to_int_from_3_floats: case SkRP::cast_to_uint_from_3_floats: |
| case SkRP::abs_3_floats: case SkRP::abs_3_ints: |
| case SkRP::ceil_3_floats: |
| case SkRP::floor_3_floats: |
| opArg1 = PtrCtx(stage.ctx, 3); |
| break; |
| |
| case SkRP::load_src: |
| case SkRP::load_dst: |
| case SkRP::store_src: |
| case SkRP::store_dst: |
| case SkRP::zero_4_slots_unmasked: |
| case SkRP::bitwise_not_4_ints: |
| case SkRP::cast_to_float_from_4_ints: case SkRP::cast_to_float_from_4_uints: |
| case SkRP::cast_to_int_from_4_floats: case SkRP::cast_to_uint_from_4_floats: |
| case SkRP::abs_4_floats: case SkRP::abs_4_ints: |
| case SkRP::ceil_4_floats: |
| case SkRP::floor_4_floats: |
| opArg1 = PtrCtx(stage.ctx, 4); |
| break; |
| |
| case SkRP::copy_constant: |
| std::tie(opArg1, opArg2) = CopyConstantCtx(stage.ctx, 1); |
| break; |
| |
| case SkRP::copy_2_constants: |
| std::tie(opArg1, opArg2) = CopyConstantCtx(stage.ctx, 2); |
| break; |
| |
| case SkRP::copy_3_constants: |
| std::tie(opArg1, opArg2) = CopyConstantCtx(stage.ctx, 3); |
| break; |
| |
| case SkRP::copy_4_constants: |
| std::tie(opArg1, opArg2) = CopyConstantCtx(stage.ctx, 4); |
| break; |
| |
| case SkRP::copy_slot_masked: |
| case SkRP::copy_slot_unmasked: |
| std::tie(opArg1, opArg2) = BinaryOpCtx(stage.ctx, 1); |
| break; |
| |
| case SkRP::copy_2_slots_masked: |
| case SkRP::copy_2_slots_unmasked: |
| std::tie(opArg1, opArg2) = BinaryOpCtx(stage.ctx, 2); |
| break; |
| |
| case SkRP::copy_3_slots_masked: |
| case SkRP::copy_3_slots_unmasked: |
| std::tie(opArg1, opArg2) = BinaryOpCtx(stage.ctx, 3); |
| break; |
| |
| case SkRP::copy_4_slots_masked: |
| case SkRP::copy_4_slots_unmasked: |
| std::tie(opArg1, opArg2) = BinaryOpCtx(stage.ctx, 4); |
| break; |
| |
| case SkRP::merge_condition_mask: |
| case SkRP::add_float: case SkRP::add_int: |
| case SkRP::sub_float: case SkRP::sub_int: |
| case SkRP::mul_float: case SkRP::mul_int: |
| case SkRP::div_float: case SkRP::div_int: case SkRP::div_uint: |
| case SkRP::bitwise_and_int: |
| case SkRP::bitwise_or_int: |
| case SkRP::bitwise_xor_int: |
| case SkRP::min_float: case SkRP::min_int: case SkRP::min_uint: |
| case SkRP::max_float: case SkRP::max_int: case SkRP::max_uint: |
| case SkRP::cmplt_float: case SkRP::cmplt_int: case SkRP::cmplt_uint: |
| case SkRP::cmple_float: case SkRP::cmple_int: case SkRP::cmple_uint: |
| case SkRP::cmpeq_float: case SkRP::cmpeq_int: |
| case SkRP::cmpne_float: case SkRP::cmpne_int: |
| std::tie(opArg1, opArg2) = AdjacentPtrCtx(stage.ctx, 1); |
| break; |
| |
| case SkRP::mix_float: |
| std::tie(opArg1, opArg2, opArg3) = Adjacent3PtrCtx(stage.ctx, 1); |
| break; |
| |
| case SkRP::add_2_floats: case SkRP::add_2_ints: |
| case SkRP::sub_2_floats: case SkRP::sub_2_ints: |
| case SkRP::mul_2_floats: case SkRP::mul_2_ints: |
| case SkRP::div_2_floats: case SkRP::div_2_ints: case SkRP::div_2_uints: |
| case SkRP::bitwise_and_2_ints: |
| case SkRP::bitwise_or_2_ints: |
| case SkRP::bitwise_xor_2_ints: |
| case SkRP::min_2_floats: case SkRP::min_2_ints: case SkRP::min_2_uints: |
| case SkRP::max_2_floats: case SkRP::max_2_ints: case SkRP::max_2_uints: |
| case SkRP::cmplt_2_floats: case SkRP::cmplt_2_ints: case SkRP::cmplt_2_uints: |
| case SkRP::cmple_2_floats: case SkRP::cmple_2_ints: case SkRP::cmple_2_uints: |
| case SkRP::cmpeq_2_floats: case SkRP::cmpeq_2_ints: |
| case SkRP::cmpne_2_floats: case SkRP::cmpne_2_ints: |
| std::tie(opArg1, opArg2) = AdjacentPtrCtx(stage.ctx, 2); |
| break; |
| |
| case SkRP::mix_2_floats: |
| std::tie(opArg1, opArg2, opArg3) = Adjacent3PtrCtx(stage.ctx, 2); |
| break; |
| |
| case SkRP::add_3_floats: case SkRP::add_3_ints: |
| case SkRP::sub_3_floats: case SkRP::sub_3_ints: |
| case SkRP::mul_3_floats: case SkRP::mul_3_ints: |
| case SkRP::div_3_floats: case SkRP::div_3_ints: case SkRP::div_3_uints: |
| case SkRP::bitwise_and_3_ints: |
| case SkRP::bitwise_or_3_ints: |
| case SkRP::bitwise_xor_3_ints: |
| case SkRP::min_3_floats: case SkRP::min_3_ints: case SkRP::min_3_uints: |
| case SkRP::max_3_floats: case SkRP::max_3_ints: case SkRP::max_3_uints: |
| case SkRP::cmplt_3_floats: case SkRP::cmplt_3_ints: case SkRP::cmplt_3_uints: |
| case SkRP::cmple_3_floats: case SkRP::cmple_3_ints: case SkRP::cmple_3_uints: |
| case SkRP::cmpeq_3_floats: case SkRP::cmpeq_3_ints: |
| case SkRP::cmpne_3_floats: case SkRP::cmpne_3_ints: |
| std::tie(opArg1, opArg2) = AdjacentPtrCtx(stage.ctx, 3); |
| break; |
| |
| case SkRP::mix_3_floats: |
| std::tie(opArg1, opArg2, opArg3) = Adjacent3PtrCtx(stage.ctx, 3); |
| break; |
| |
| case SkRP::add_4_floats: case SkRP::add_4_ints: |
| case SkRP::sub_4_floats: case SkRP::sub_4_ints: |
| case SkRP::mul_4_floats: case SkRP::mul_4_ints: |
| case SkRP::div_4_floats: case SkRP::div_4_ints: case SkRP::div_4_uints: |
| case SkRP::bitwise_and_4_ints: |
| case SkRP::bitwise_or_4_ints: |
| case SkRP::bitwise_xor_4_ints: |
| case SkRP::min_4_floats: case SkRP::min_4_ints: case SkRP::min_4_uints: |
| case SkRP::max_4_floats: case SkRP::max_4_ints: case SkRP::max_4_uints: |
| case SkRP::cmplt_4_floats: case SkRP::cmplt_4_ints: case SkRP::cmplt_4_uints: |
| case SkRP::cmple_4_floats: case SkRP::cmple_4_ints: case SkRP::cmple_4_uints: |
| case SkRP::cmpeq_4_floats: case SkRP::cmpeq_4_ints: |
| case SkRP::cmpne_4_floats: case SkRP::cmpne_4_ints: |
| std::tie(opArg1, opArg2) = AdjacentPtrCtx(stage.ctx, 4); |
| break; |
| |
| case SkRP::mix_4_floats: |
| std::tie(opArg1, opArg2, opArg3) = Adjacent3PtrCtx(stage.ctx, 4); |
| break; |
| |
| case SkRP::add_n_floats: case SkRP::add_n_ints: |
| case SkRP::sub_n_floats: case SkRP::sub_n_ints: |
| case SkRP::mul_n_floats: case SkRP::mul_n_ints: |
| case SkRP::div_n_floats: case SkRP::div_n_ints: case SkRP::div_n_uints: |
| case SkRP::bitwise_and_n_ints: |
| case SkRP::bitwise_or_n_ints: |
| case SkRP::bitwise_xor_n_ints: |
| case SkRP::min_n_floats: case SkRP::min_n_ints: case SkRP::min_n_uints: |
| case SkRP::max_n_floats: case SkRP::max_n_ints: case SkRP::max_n_uints: |
| case SkRP::cmplt_n_floats: case SkRP::cmplt_n_ints: case SkRP::cmplt_n_uints: |
| case SkRP::cmple_n_floats: case SkRP::cmple_n_ints: case SkRP::cmple_n_uints: |
| case SkRP::cmpeq_n_floats: case SkRP::cmpeq_n_ints: |
| case SkRP::cmpne_n_floats: case SkRP::cmpne_n_ints: |
| std::tie(opArg1, opArg2) = AdjacentBinaryOpCtx(stage.ctx); |
| break; |
| |
| case SkRP::mix_n_floats: |
| std::tie(opArg1, opArg2, opArg3) = AdjacentTernaryOpCtx(stage.ctx); |
| break; |
| |
| case SkRP::jump: |
| case SkRP::branch_if_any_active_lanes: |
| case SkRP::branch_if_no_active_lanes: |
| opArg1 = BranchOffset(stage.ctx); |
| break; |
| |
| default: |
| break; |
| } |
| |
| const char* opName = SkRasterPipeline::GetStageName(stage.op); |
| std::string opText; |
| switch (stage.op) { |
| case SkRP::init_lane_masks: |
| opText = "CondMask = LoopMask = RetMask = true"; |
| break; |
| |
| case SkRP::load_condition_mask: |
| opText = "CondMask = " + opArg1; |
| break; |
| |
| case SkRP::store_condition_mask: |
| opText = opArg1 + " = CondMask"; |
| break; |
| |
| case SkRP::merge_condition_mask: |
| opText = "CondMask = " + opArg1 + " & " + opArg2; |
| break; |
| |
| case SkRP::load_loop_mask: |
| opText = "LoopMask = " + opArg1; |
| break; |
| |
| case SkRP::store_loop_mask: |
| opText = opArg1 + " = LoopMask"; |
| break; |
| |
| case SkRP::mask_off_loop_mask: |
| opText = "LoopMask &= ~(CondMask & LoopMask & RetMask)"; |
| break; |
| |
| case SkRP::reenable_loop_mask: |
| opText = "LoopMask |= " + opArg1; |
| break; |
| |
| case SkRP::merge_loop_mask: |
| opText = "LoopMask &= " + opArg1; |
| break; |
| |
| case SkRP::load_return_mask: |
| opText = "RetMask = " + opArg1; |
| break; |
| |
| case SkRP::store_return_mask: |
| opText = opArg1 + " = RetMask"; |
| break; |
| |
| case SkRP::mask_off_return_mask: |
| opText = "RetMask &= ~(CondMask & LoopMask & RetMask)"; |
| break; |
| |
| case SkRP::immediate_f: |
| case SkRP::load_unmasked: |
| opText = "src.r = " + opArg1; |
| break; |
| |
| case SkRP::store_unmasked: |
| opText = opArg1 + " = src.r"; |
| break; |
| |
| case SkRP::store_src_rg: |
| opText = opArg1 + " = src.rg"; |
| break; |
| |
| case SkRP::store_src: |
| opText = opArg1 + " = src.rgba"; |
| break; |
| |
| case SkRP::store_dst: |
| opText = opArg1 + " = dst.rgba"; |
| break; |
| |
| case SkRP::load_src: |
| opText = "src.rgba = " + opArg1; |
| break; |
| |
| case SkRP::load_dst: |
| opText = "dst.rgba = " + opArg1; |
| break; |
| |
| case SkRP::store_masked: |
| opText = opArg1 + " = Mask(src.r)"; |
| break; |
| |
| case SkRP::bitwise_and_int: |
| case SkRP::bitwise_and_2_ints: |
| case SkRP::bitwise_and_3_ints: |
| case SkRP::bitwise_and_4_ints: |
| case SkRP::bitwise_and_n_ints: |
| opText = opArg1 + " &= " + opArg2; |
| break; |
| |
| case SkRP::bitwise_or_int: |
| case SkRP::bitwise_or_2_ints: |
| case SkRP::bitwise_or_3_ints: |
| case SkRP::bitwise_or_4_ints: |
| case SkRP::bitwise_or_n_ints: |
| opText = opArg1 + " |= " + opArg2; |
| break; |
| |
| case SkRP::bitwise_xor_int: |
| case SkRP::bitwise_xor_2_ints: |
| case SkRP::bitwise_xor_3_ints: |
| case SkRP::bitwise_xor_4_ints: |
| case SkRP::bitwise_xor_n_ints: |
| opText = opArg1 + " ^= " + opArg2; |
| break; |
| |
| case SkRP::bitwise_not_int: |
| case SkRP::bitwise_not_2_ints: |
| case SkRP::bitwise_not_3_ints: |
| case SkRP::bitwise_not_4_ints: |
| opText = opArg1 + " = ~" + opArg1; |
| break; |
| |
| case SkRP::cast_to_float_from_int: |
| case SkRP::cast_to_float_from_2_ints: |
| case SkRP::cast_to_float_from_3_ints: |
| case SkRP::cast_to_float_from_4_ints: |
| opText = opArg1 + " = IntToFloat(" + opArg1 + ")"; |
| break; |
| |
| case SkRP::cast_to_float_from_uint: |
| case SkRP::cast_to_float_from_2_uints: |
| case SkRP::cast_to_float_from_3_uints: |
| case SkRP::cast_to_float_from_4_uints: |
| opText = opArg1 + " = UintToFloat(" + opArg1 + ")"; |
| break; |
| |
| case SkRP::cast_to_int_from_float: |
| case SkRP::cast_to_int_from_2_floats: |
| case SkRP::cast_to_int_from_3_floats: |
| case SkRP::cast_to_int_from_4_floats: |
| opText = opArg1 + " = FloatToInt(" + opArg1 + ")"; |
| break; |
| |
| case SkRP::cast_to_uint_from_float: |
| case SkRP::cast_to_uint_from_2_floats: |
| case SkRP::cast_to_uint_from_3_floats: |
| case SkRP::cast_to_uint_from_4_floats: |
| opText = opArg1 + " = FloatToUint(" + opArg1 + ")"; |
| break; |
| |
| case SkRP::copy_slot_masked: case SkRP::copy_2_slots_masked: |
| case SkRP::copy_3_slots_masked: case SkRP::copy_4_slots_masked: |
| opText = opArg1 + " = Mask(" + opArg2 + ")"; |
| break; |
| |
| case SkRP::copy_constant: case SkRP::copy_2_constants: |
| case SkRP::copy_3_constants: case SkRP::copy_4_constants: |
| case SkRP::copy_slot_unmasked: case SkRP::copy_2_slots_unmasked: |
| case SkRP::copy_3_slots_unmasked: case SkRP::copy_4_slots_unmasked: |
| case SkRP::swizzle_1: case SkRP::swizzle_2: |
| case SkRP::swizzle_3: case SkRP::swizzle_4: |
| case SkRP::transpose: |
| opText = opArg1 + " = " + opArg2; |
| break; |
| |
| case SkRP::zero_slot_unmasked: case SkRP::zero_2_slots_unmasked: |
| case SkRP::zero_3_slots_unmasked: case SkRP::zero_4_slots_unmasked: |
| opText = opArg1 + " = 0"; |
| break; |
| |
| case SkRP::abs_float: case SkRP::abs_int: |
| case SkRP::abs_2_floats: case SkRP::abs_2_ints: |
| case SkRP::abs_3_floats: case SkRP::abs_3_ints: |
| case SkRP::abs_4_floats: case SkRP::abs_4_ints: |
| opText = opArg1 + " = abs(" + opArg1 + ")"; |
| break; |
| |
| case SkRP::ceil_float: |
| case SkRP::ceil_2_floats: |
| case SkRP::ceil_3_floats: |
| case SkRP::ceil_4_floats: |
| opText = opArg1 + " = ceil(" + opArg1 + ")"; |
| break; |
| |
| case SkRP::floor_float: |
| case SkRP::floor_2_floats: |
| case SkRP::floor_3_floats: |
| case SkRP::floor_4_floats: |
| opText = opArg1 + " = floor(" + opArg1 + ")"; |
| break; |
| |
| case SkRP::add_float: case SkRP::add_int: |
| case SkRP::add_2_floats: case SkRP::add_2_ints: |
| case SkRP::add_3_floats: case SkRP::add_3_ints: |
| case SkRP::add_4_floats: case SkRP::add_4_ints: |
| case SkRP::add_n_floats: case SkRP::add_n_ints: |
| opText = opArg1 + " += " + opArg2; |
| break; |
| |
| case SkRP::sub_float: case SkRP::sub_int: |
| case SkRP::sub_2_floats: case SkRP::sub_2_ints: |
| case SkRP::sub_3_floats: case SkRP::sub_3_ints: |
| case SkRP::sub_4_floats: case SkRP::sub_4_ints: |
| case SkRP::sub_n_floats: case SkRP::sub_n_ints: |
| opText = opArg1 + " -= " + opArg2; |
| break; |
| |
| case SkRP::mul_float: case SkRP::mul_int: |
| case SkRP::mul_2_floats: case SkRP::mul_2_ints: |
| case SkRP::mul_3_floats: case SkRP::mul_3_ints: |
| case SkRP::mul_4_floats: case SkRP::mul_4_ints: |
| case SkRP::mul_n_floats: case SkRP::mul_n_ints: |
| opText = opArg1 + " *= " + opArg2; |
| break; |
| |
| case SkRP::div_float: case SkRP::div_int: case SkRP::div_uint: |
| case SkRP::div_2_floats: case SkRP::div_2_ints: case SkRP::div_2_uints: |
| case SkRP::div_3_floats: case SkRP::div_3_ints: case SkRP::div_3_uints: |
| case SkRP::div_4_floats: case SkRP::div_4_ints: case SkRP::div_4_uints: |
| case SkRP::div_n_floats: case SkRP::div_n_ints: case SkRP::div_n_uints: |
| opText = opArg1 + " /= " + opArg2; |
| break; |
| |
| case SkRP::min_float: case SkRP::min_int: case SkRP::min_uint: |
| case SkRP::min_2_floats: case SkRP::min_2_ints: case SkRP::min_2_uints: |
| case SkRP::min_3_floats: case SkRP::min_3_ints: case SkRP::min_3_uints: |
| case SkRP::min_4_floats: case SkRP::min_4_ints: case SkRP::min_4_uints: |
| case SkRP::min_n_floats: case SkRP::min_n_ints: case SkRP::min_n_uints: |
| opText = opArg1 + " = min(" + opArg1 + ", " + opArg2 + ")"; |
| break; |
| |
| case SkRP::max_float: case SkRP::max_int: case SkRP::max_uint: |
| case SkRP::max_2_floats: case SkRP::max_2_ints: case SkRP::max_2_uints: |
| case SkRP::max_3_floats: case SkRP::max_3_ints: case SkRP::max_3_uints: |
| case SkRP::max_4_floats: case SkRP::max_4_ints: case SkRP::max_4_uints: |
| case SkRP::max_n_floats: case SkRP::max_n_ints: case SkRP::max_n_uints: |
| opText = opArg1 + " = max(" + opArg1 + ", " + opArg2 + ")"; |
| break; |
| |
| case SkRP::cmplt_float: case SkRP::cmplt_int: case SkRP::cmplt_uint: |
| case SkRP::cmplt_2_floats: case SkRP::cmplt_2_ints: case SkRP::cmplt_2_uints: |
| case SkRP::cmplt_3_floats: case SkRP::cmplt_3_ints: case SkRP::cmplt_3_uints: |
| case SkRP::cmplt_4_floats: case SkRP::cmplt_4_ints: case SkRP::cmplt_4_uints: |
| case SkRP::cmplt_n_floats: case SkRP::cmplt_n_ints: case SkRP::cmplt_n_uints: |
| opText = opArg1 + " = lessThan(" + opArg1 + ", " + opArg2 + ")"; |
| break; |
| |
| case SkRP::cmple_float: case SkRP::cmple_int: case SkRP::cmple_uint: |
| case SkRP::cmple_2_floats: case SkRP::cmple_2_ints: case SkRP::cmple_2_uints: |
| case SkRP::cmple_3_floats: case SkRP::cmple_3_ints: case SkRP::cmple_3_uints: |
| case SkRP::cmple_4_floats: case SkRP::cmple_4_ints: case SkRP::cmple_4_uints: |
| case SkRP::cmple_n_floats: case SkRP::cmple_n_ints: case SkRP::cmple_n_uints: |
| opText = opArg1 + " = lessThanEqual(" + opArg1 + ", " + opArg2 + ")"; |
| break; |
| |
| case SkRP::cmpeq_float: case SkRP::cmpeq_int: |
| case SkRP::cmpeq_2_floats: case SkRP::cmpeq_2_ints: |
| case SkRP::cmpeq_3_floats: case SkRP::cmpeq_3_ints: |
| case SkRP::cmpeq_4_floats: case SkRP::cmpeq_4_ints: |
| case SkRP::cmpeq_n_floats: case SkRP::cmpeq_n_ints: |
| opText = opArg1 + " = equal(" + opArg1 + ", " + opArg2 + ")"; |
| break; |
| |
| case SkRP::cmpne_float: case SkRP::cmpne_int: |
| case SkRP::cmpne_2_floats: case SkRP::cmpne_2_ints: |
| case SkRP::cmpne_3_floats: case SkRP::cmpne_3_ints: |
| case SkRP::cmpne_4_floats: case SkRP::cmpne_4_ints: |
| case SkRP::cmpne_n_floats: case SkRP::cmpne_n_ints: |
| opText = opArg1 + " = notEqual(" + opArg1 + ", " + opArg2 + ")"; |
| break; |
| |
| case SkRP::mix_float: |
| case SkRP::mix_2_floats: |
| case SkRP::mix_3_floats: |
| case SkRP::mix_4_floats: |
| case SkRP::mix_n_floats: |
| opText = opArg1 + " = mix(" + opArg1 + ", " + opArg2 + ", " + opArg3 + ")"; |
| break; |
| |
| case SkRP::jump: |
| case SkRP::branch_if_any_active_lanes: |
| case SkRP::branch_if_no_active_lanes: |
| opText = std::string(opName) + " " + opArg1; |
| break; |
| |
| default: |
| break; |
| } |
| |
| std::string line = !opText.empty() |
| ? SkSL::String::printf("% 5d. %-30s %s\n", index + 1, opName, opText.c_str()) |
| : SkSL::String::printf("% 5d. %s\n", index + 1, opName); |
| |
| out->writeText(line.c_str()); |
| } |
| #endif |
| } |
| |
| } // namespace RP |
| } // namespace SkSL |