blob: 479b7ffae8596bb64ea36161319211db2987377f [file] [log] [blame]
/*
* Copyright 2023 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkRasterPipelineOpContexts_DEFINED
#define SkRasterPipelineOpContexts_DEFINED
#include <algorithm>
#include <cstddef>
#include <cstdint>
namespace SkSL { class TraceHook; }
// The largest number of pixels we handle at a time. We have a separate value for the largest number
// of pixels we handle in the highp pipeline. Many of the context structs in this file are only used
// by stages that have no lowp implementation. They can therefore use the (smaller) highp value to
// save memory in the arena.
inline static constexpr int SkRasterPipeline_kMaxStride = 16;
inline static constexpr int SkRasterPipeline_kMaxStride_highp = 16;
// How much space to allocate for each MemoryCtx scratch buffer, as part of tail-pixel handling.
inline static constexpr size_t SkRasterPipeline_MaxScratchPerPatch =
std::max(SkRasterPipeline_kMaxStride_highp * 16, // 16 == largest highp bpp (RGBA_F32)
SkRasterPipeline_kMaxStride * 4); // 4 == largest lowp bpp (RGBA_8888)
// These structs hold the context data for many of the Raster Pipeline ops.
struct SkRasterPipeline_MemoryCtx {
void* pixels;
int stride;
};
// Raster Pipeline typically processes N (4, 8, 16) pixels at a time, in SIMT fashion. If the
// number of pixels in a row isn't evenly divisible by N, there will be leftover pixels; this is
// called the "tail". To avoid reading or writing past the end of any source or destination buffers
// when we reach the tail:
//
// 1) Source buffers have their tail contents copied to a scratch buffer that is at least N wide.
// In practice, each scratch buffer uses SkRasterPipeline_MaxScratchPerPatch bytes.
// 2) Each MemoryCtx in the pipeline is patched, such that access to them (at the current scanline
// and x-offset) will land in the scratch buffer.
// 3) Pipeline is run as normal (with all memory access happening safely in the scratch buffers).
// 4) Destination buffers have their tail contents copied back from the scratch buffer.
// 5) Each MemoryCtx is "un-patched".
//
// To do all of this, the pipeline creates a MemoryCtxPatch for each unique MemoryCtx referenced by
// the pipeline.
struct SkRasterPipeline_MemoryCtxInfo {
SkRasterPipeline_MemoryCtx* context;
int bytesPerPixel;
bool load;
bool store;
};
struct SkRasterPipeline_MemoryCtxPatch {
SkRasterPipeline_MemoryCtxInfo info;
void* backup; // Remembers context->pixels so we can restore it
std::byte scratch[SkRasterPipeline_MaxScratchPerPatch];
};
struct SkRasterPipeline_GatherCtx {
const void* pixels;
int stride;
float width;
float height;
float weights[16]; // for bicubic and bicubic_clamp_8888
// Controls whether pixel i-1 or i is selected when floating point sample position is exactly i.
bool roundDownAtInteger = false;
};
// State shared by save_xy, accumulate, and bilinear_* / bicubic_*.
struct SkRasterPipeline_SamplerCtx {
float x[SkRasterPipeline_kMaxStride_highp];
float y[SkRasterPipeline_kMaxStride_highp];
float fx[SkRasterPipeline_kMaxStride_highp];
float fy[SkRasterPipeline_kMaxStride_highp];
float scalex[SkRasterPipeline_kMaxStride_highp];
float scaley[SkRasterPipeline_kMaxStride_highp];
// for bicubic_[np][13][xy]
float weights[16];
float wx[4][SkRasterPipeline_kMaxStride_highp];
float wy[4][SkRasterPipeline_kMaxStride_highp];
};
struct SkRasterPipeline_TileCtx {
float scale;
float invScale; // cache of 1/scale
// When in the reflection portion of mirror tiling we need to snap the opposite direction
// at integer sample points than when in the forward direction. This controls which way we bias
// in the reflection. It should be 1 if SkRasterPipeline_GatherCtx::roundDownAtInteger is true
// and otherwise -1.
int mirrorBiasDir = -1;
};
struct SkRasterPipeline_DecalTileCtx {
uint32_t mask[SkRasterPipeline_kMaxStride];
float limit_x;
float limit_y;
// These control which edge of the interval is included (i.e. closed interval at 0 or at limit).
// They should be set to limit_x and limit_y if SkRasterPipeline_GatherCtx::roundDownAtInteger
// is true and otherwise zero.
float inclusiveEdge_x = 0;
float inclusiveEdge_y = 0;
};
enum class SkPerlinNoiseShaderType;
struct SkRasterPipeline_PerlinNoiseCtx {
SkPerlinNoiseShaderType noiseType;
float baseFrequencyX, baseFrequencyY;
float stitchDataInX, stitchDataInY;
bool stitching;
int numOctaves;
const uint8_t* latticeSelector; // [256 values]
const uint16_t* noiseData; // [4 channels][256 elements][vector of 2]
};
// State used by mipmap_linear_*
struct SkRasterPipeline_MipmapCtx {
// Original coords, saved before the base level logic
float x[SkRasterPipeline_kMaxStride_highp];
float y[SkRasterPipeline_kMaxStride_highp];
// Base level color
float r[SkRasterPipeline_kMaxStride_highp];
float g[SkRasterPipeline_kMaxStride_highp];
float b[SkRasterPipeline_kMaxStride_highp];
float a[SkRasterPipeline_kMaxStride_highp];
// Scale factors to transform base level coords to lower level coords
float scaleX;
float scaleY;
float lowerWeight;
};
struct SkRasterPipeline_CoordClampCtx {
float min_x, min_y;
float max_x, max_y;
};
struct SkRasterPipeline_CallbackCtx {
void (*fn)(SkRasterPipeline_CallbackCtx* self,
int active_pixels /*<= SkRasterPipeline_kMaxStride_highp*/);
// When called, fn() will have our active pixels available in rgba.
// When fn() returns, the pipeline will read back those active pixels from read_from.
float rgba[4*SkRasterPipeline_kMaxStride_highp];
float* read_from = rgba;
};
// state shared by stack_checkpoint and stack_rewind
struct SkRasterPipelineStage;
struct SkRasterPipeline_RewindCtx {
float r[SkRasterPipeline_kMaxStride_highp];
float g[SkRasterPipeline_kMaxStride_highp];
float b[SkRasterPipeline_kMaxStride_highp];
float a[SkRasterPipeline_kMaxStride_highp];
float dr[SkRasterPipeline_kMaxStride_highp];
float dg[SkRasterPipeline_kMaxStride_highp];
float db[SkRasterPipeline_kMaxStride_highp];
float da[SkRasterPipeline_kMaxStride_highp];
std::byte* base;
SkRasterPipelineStage* stage;
};
struct SkRasterPipeline_GradientCtx {
size_t stopCount;
float* fs[4];
float* bs[4];
float* ts;
};
struct SkRasterPipeline_EvenlySpaced2StopGradientCtx {
float f[4];
float b[4];
};
struct SkRasterPipeline_2PtConicalCtx {
uint32_t fMask[SkRasterPipeline_kMaxStride_highp];
float fP0,
fP1;
};
struct SkRasterPipeline_UniformColorCtx {
float r,g,b,a;
uint16_t rgba[4]; // [0,255] in a 16-bit lane.
};
struct SkRasterPipeline_EmbossCtx {
SkRasterPipeline_MemoryCtx mul,
add;
};
struct SkRasterPipeline_TablesCtx {
const uint8_t *r, *g, *b, *a;
};
using SkRPOffset = uint32_t;
struct SkRasterPipeline_InitLaneMasksCtx {
uint8_t* tail;
};
struct SkRasterPipeline_ConstantCtx {
int32_t value;
SkRPOffset dst;
};
struct SkRasterPipeline_UniformCtx {
int32_t* dst;
const int32_t* src;
};
struct SkRasterPipeline_BinaryOpCtx {
SkRPOffset dst;
SkRPOffset src;
};
struct SkRasterPipeline_TernaryOpCtx {
SkRPOffset dst;
SkRPOffset delta;
};
struct SkRasterPipeline_MatrixMultiplyCtx {
SkRPOffset dst;
uint8_t leftColumns, leftRows, rightColumns, rightRows;
};
struct SkRasterPipeline_SwizzleCtx {
// If we are processing more than 16 pixels at a time, an 8-bit offset won't be sufficient and
// `offsets` will need to use uint16_t (or dial down the premultiplication).
static_assert(SkRasterPipeline_kMaxStride_highp <= 16);
SkRPOffset dst;
uint8_t offsets[4]; // values must be byte offsets (4 * highp-stride * component-index)
};
struct SkRasterPipeline_ShuffleCtx {
int32_t* ptr;
int count;
uint16_t offsets[16]; // values must be byte offsets (4 * highp-stride * component-index)
};
struct SkRasterPipeline_SwizzleCopyCtx {
int32_t* dst;
const int32_t* src; // src values must _not_ overlap dst values
uint16_t offsets[4]; // values must be byte offsets (4 * highp-stride * component-index)
};
struct SkRasterPipeline_CopyIndirectCtx {
int32_t* dst;
const int32_t* src;
const uint32_t *indirectOffset; // this applies to `src` or `dst` based on the op
uint32_t indirectLimit; // the indirect offset is clamped to this upper bound
uint32_t slots; // the number of slots to copy
};
struct SkRasterPipeline_SwizzleCopyIndirectCtx : public SkRasterPipeline_CopyIndirectCtx {
uint16_t offsets[4]; // values must be byte offsets (4 * highp-stride * component-index)
};
struct SkRasterPipeline_BranchCtx {
int offset; // contains the label ID during compilation, and the program offset when compiled
};
struct SkRasterPipeline_BranchIfAllLanesActiveCtx : public SkRasterPipeline_BranchCtx {
uint8_t* tail = nullptr; // lanes past the tail are _never_ active, so we need to exclude them
};
struct SkRasterPipeline_BranchIfEqualCtx : public SkRasterPipeline_BranchCtx {
int value;
const int* ptr;
};
struct SkRasterPipeline_CaseOpCtx {
int expectedValue;
SkRPOffset offset; // points to a pair of adjacent I32s: {I32 actualValue, I32 defaultMask}
};
struct SkRasterPipeline_TraceFuncCtx {
const int* traceMask;
SkSL::TraceHook* traceHook;
int funcIdx;
};
struct SkRasterPipeline_TraceScopeCtx {
const int* traceMask;
SkSL::TraceHook* traceHook;
int delta;
};
struct SkRasterPipeline_TraceLineCtx {
const int* traceMask;
SkSL::TraceHook* traceHook;
int lineNumber;
};
struct SkRasterPipeline_TraceVarCtx {
const int* traceMask;
SkSL::TraceHook* traceHook;
int slotIdx, numSlots;
const int* data;
const uint32_t *indirectOffset; // can be null; if set, an offset applied to `data`
uint32_t indirectLimit; // the indirect offset is clamped to this upper bound
};
#endif // SkRasterPipelineOpContexts_DEFINED