| /* |
| * Copyright 2023 Google Inc. |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| #ifndef SkRasterPipelineOpContexts_DEFINED |
| #define SkRasterPipelineOpContexts_DEFINED |
| |
| #include <algorithm> |
| #include <cstddef> |
| #include <cstdint> |
| |
| namespace SkSL { class TraceHook; } |
| // state shared by stack_checkpoint and stack_rewind |
| struct SkRasterPipelineStage; |
| enum class SkPerlinNoiseShaderType; |
| |
| namespace SkRasterPipelineContexts { |
| |
| // The largest number of pixels we handle at a time. We have a separate value for the largest number |
| // of pixels we handle in the highp pipeline. Many of the context structs in this file are only used |
| // by stages that have no lowp implementation. They can therefore use the (smaller) highp value to |
| // save memory in the arena. |
| inline static constexpr int kMaxStride = 16; |
| inline static constexpr int kMaxStride_highp = 16; |
| |
| // How much space to allocate for each MemoryCtx scratch buffer, as part of tail-pixel handling. |
| inline static constexpr size_t kMaxScratchPerPatch = |
| std::max(kMaxStride_highp * 16, // 16 == largest highp bpp (RGBA_F32) |
| kMaxStride * 4); // 4 == largest lowp bpp (RGBA_8888) |
| |
| // These structs hold the context data for many of the Raster Pipeline ops. |
| struct MemoryCtx { |
| void* pixels; |
| int stride; |
| }; |
| |
| // Raster Pipeline typically processes N (4, 8, 16) pixels at a time, in SIMT fashion. If the |
| // number of pixels in a row isn't evenly divisible by N, there will be leftover pixels; this is |
| // called the "tail". To avoid reading or writing past the end of any source or destination buffers |
| // when we reach the tail: |
| // |
| // 1) Source buffers have their tail contents copied to a scratch buffer that is at least N wide. |
| // In practice, each scratch buffer uses kMaxScratchPerPatch bytes. |
| // 2) Each MemoryCtx in the pipeline is patched, such that access to them (at the current scanline |
| // and x-offset) will land in the scratch buffer. |
| // 3) Pipeline is run as normal (with all memory access happening safely in the scratch buffers). |
| // 4) Destination buffers have their tail contents copied back from the scratch buffer. |
| // 5) Each MemoryCtx is "un-patched". |
| // |
| // To do all of this, the pipeline creates a MemoryCtxPatch for each unique MemoryCtx referenced by |
| // the pipeline. |
| struct MemoryCtxInfo { |
| MemoryCtx* context; |
| |
| int bytesPerPixel; |
| bool load; |
| bool store; |
| }; |
| |
| // Some SIMD instructions operate faster if we read from aligned memory. 64 bytes (512 bits) is |
| // the widest we have (AVX-512), so if we have the scratch field be first and the whole struct |
| // aligned that way, the memory for our tail pixels should also be aligned to 64 bytes. |
| struct alignas(64) MemoryCtxPatch { |
| std::byte scratch[kMaxScratchPerPatch]; |
| |
| MemoryCtxInfo info; |
| void* backup; // Remembers context->pixels so we can restore it |
| }; |
| |
| struct GatherCtx { |
| const void* pixels; |
| int stride; |
| float width; |
| float height; |
| float weights[16]; // for bicubic and bicubic_clamp_8888 |
| // Controls whether pixel i-1 or i is selected when floating point sample position is exactly i. |
| bool roundDownAtInteger = false; |
| }; |
| |
| // State shared by save_xy, accumulate, and bilinear_* / bicubic_*. |
| struct SamplerCtx { |
| float x[kMaxStride_highp]; |
| float y[kMaxStride_highp]; |
| float fx[kMaxStride_highp]; |
| float fy[kMaxStride_highp]; |
| float scalex[kMaxStride_highp]; |
| float scaley[kMaxStride_highp]; |
| |
| // for bicubic_[np][13][xy] |
| float weights[16]; |
| float wx[4][kMaxStride_highp]; |
| float wy[4][kMaxStride_highp]; |
| }; |
| |
| struct TileCtx { |
| float scale; |
| float invScale; // cache of 1/scale |
| // When in the reflection portion of mirror tiling we need to snap the opposite direction |
| // at integer sample points than when in the forward direction. This controls which way we bias |
| // in the reflection. It should be 1 if GatherCtx::roundDownAtInteger is true |
| // and otherwise -1. |
| int mirrorBiasDir = -1; |
| }; |
| |
| struct DecalTileCtx { |
| uint32_t mask[kMaxStride]; |
| float limit_x; |
| float limit_y; |
| // These control which edge of the interval is included (i.e. closed interval at 0 or at limit). |
| // They should be set to limit_x and limit_y if GatherCtx::roundDownAtInteger |
| // is true and otherwise zero. |
| float inclusiveEdge_x = 0; |
| float inclusiveEdge_y = 0; |
| }; |
| |
| struct PerlinNoiseCtx { |
| SkPerlinNoiseShaderType noiseType; |
| float baseFrequencyX, baseFrequencyY; |
| float stitchDataInX, stitchDataInY; |
| bool stitching; |
| int numOctaves; |
| const uint8_t* latticeSelector; // [256 values] |
| const uint16_t* noiseData; // [4 channels][256 elements][vector of 2] |
| }; |
| |
| // State used by mipmap_linear_* |
| struct MipmapCtx { |
| // Original coords, saved before the base level logic |
| float x[kMaxStride_highp]; |
| float y[kMaxStride_highp]; |
| |
| // Base level color |
| float r[kMaxStride_highp]; |
| float g[kMaxStride_highp]; |
| float b[kMaxStride_highp]; |
| float a[kMaxStride_highp]; |
| |
| // Scale factors to transform base level coords to lower level coords |
| float scaleX; |
| float scaleY; |
| |
| float lowerWeight; |
| }; |
| |
| struct CoordClampCtx { |
| float min_x, min_y; |
| float max_x, max_y; |
| }; |
| |
| struct CallbackCtx { |
| void (*fn)(CallbackCtx* self, int active_pixels /*<= kMaxStride_highp*/); |
| |
| // When called, fn() will have our active pixels available in rgba. |
| // When fn() returns, the pipeline will read back those active pixels from read_from. |
| float rgba[4 * kMaxStride_highp]; |
| float* read_from = rgba; |
| }; |
| |
| struct RewindCtx { |
| float r[kMaxStride_highp]; |
| float g[kMaxStride_highp]; |
| float b[kMaxStride_highp]; |
| float a[kMaxStride_highp]; |
| float dr[kMaxStride_highp]; |
| float dg[kMaxStride_highp]; |
| float db[kMaxStride_highp]; |
| float da[kMaxStride_highp]; |
| std::byte* base; |
| SkRasterPipelineStage* stage; |
| }; |
| |
| constexpr size_t kRGBAChannels = 4; |
| |
| struct GradientCtx { |
| size_t stopCount; |
| float* factors[kRGBAChannels]; |
| float* biases[kRGBAChannels]; |
| float* ts; |
| }; |
| |
| struct EvenlySpaced2StopGradientCtx { |
| float factor[kRGBAChannels]; |
| float bias[kRGBAChannels]; |
| }; |
| |
| struct Conical2PtCtx { |
| uint32_t fMask[kMaxStride_highp]; |
| float fP0, |
| fP1; |
| }; |
| |
| struct UniformColorCtx { |
| float r,g,b,a; |
| uint16_t rgba[4]; // [0,255] in a 16-bit lane. |
| }; |
| |
| struct EmbossCtx { |
| MemoryCtx mul, add; |
| }; |
| |
| struct TablesCtx { |
| const uint8_t *r, *g, *b, *a; |
| }; |
| |
| using SkRPOffset = uint32_t; |
| |
| struct InitLaneMasksCtx { |
| uint8_t* tail; |
| }; |
| |
| struct ConstantCtx { |
| int32_t value; |
| SkRPOffset dst; |
| }; |
| |
| struct UniformCtx { |
| int32_t* dst; |
| const int32_t* src; |
| }; |
| |
| struct BinaryOpCtx { |
| SkRPOffset dst; |
| SkRPOffset src; |
| }; |
| |
| struct TernaryOpCtx { |
| SkRPOffset dst; |
| SkRPOffset delta; |
| }; |
| |
| struct MatrixMultiplyCtx { |
| SkRPOffset dst; |
| uint8_t leftColumns, leftRows, rightColumns, rightRows; |
| }; |
| |
| struct SwizzleCtx { |
| // If we are processing more than 16 pixels at a time, an 8-bit offset won't be sufficient and |
| // `offsets` will need to use uint16_t (or dial down the premultiplication). |
| static_assert(kMaxStride_highp <= 16); |
| |
| SkRPOffset dst; |
| uint8_t offsets[4]; // values must be byte offsets (4 * highp-stride * component-index) |
| }; |
| |
| struct ShuffleCtx { |
| int32_t* ptr; |
| int count; |
| uint16_t offsets[16]; // values must be byte offsets (4 * highp-stride * component-index) |
| }; |
| |
| struct SwizzleCopyCtx { |
| int32_t* dst; |
| const int32_t* src; // src values must _not_ overlap dst values |
| uint16_t offsets[4]; // values must be byte offsets (4 * highp-stride * component-index) |
| }; |
| |
| struct CopyIndirectCtx { |
| int32_t* dst; |
| const int32_t* src; |
| const uint32_t *indirectOffset; // this applies to `src` or `dst` based on the op |
| uint32_t indirectLimit; // the indirect offset is clamped to this upper bound |
| uint32_t slots; // the number of slots to copy |
| }; |
| |
| struct SwizzleCopyIndirectCtx : public CopyIndirectCtx { |
| uint16_t offsets[4]; // values must be byte offsets (4 * highp-stride * component-index) |
| }; |
| |
| struct BranchCtx { |
| int offset; // contains the label ID during compilation, and the program offset when compiled |
| }; |
| |
| struct BranchIfAllLanesActiveCtx : public BranchCtx { |
| uint8_t* tail = nullptr; // lanes past the tail are _never_ active, so we need to exclude them |
| }; |
| |
| struct BranchIfEqualCtx : public BranchCtx { |
| int value; |
| const int* ptr; |
| }; |
| |
| struct CaseOpCtx { |
| int expectedValue; |
| SkRPOffset offset; // points to a pair of adjacent I32s: {I32 actualValue, I32 defaultMask} |
| }; |
| |
| struct TraceFuncCtx { |
| const int* traceMask; |
| SkSL::TraceHook* traceHook; |
| int funcIdx; |
| }; |
| |
| struct TraceScopeCtx { |
| const int* traceMask; |
| SkSL::TraceHook* traceHook; |
| int delta; |
| }; |
| |
| struct TraceLineCtx { |
| const int* traceMask; |
| SkSL::TraceHook* traceHook; |
| int lineNumber; |
| }; |
| |
| struct TraceVarCtx { |
| const int* traceMask; |
| SkSL::TraceHook* traceHook; |
| int slotIdx, numSlots; |
| const int* data; |
| const uint32_t *indirectOffset; // can be null; if set, an offset applied to `data` |
| uint32_t indirectLimit; // the indirect offset is clamped to this upper bound |
| }; |
| |
| } // namespace SkRasterPipelineContexts |
| |
| #endif // SkRasterPipelineOpContexts_DEFINED |