blob: 1a87d707a7ccc443133ac7b98960b1e9c5ac8470 [file]
// Copyright 2024 the Vello Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT
// A WGSL shader for rendering sparse strips with alpha blending.
//
// Each strip instance represents a horizontal slice of the rendered output and consists of:
// 1. A variable-width region of alpha values for semi-transparent rendering
// 2. A solid color region for fully opaque areas
//
// The alpha values are stored in a texture and sampled during fragment shading.
// This approach optimizes memory usage by only storing alpha data where needed.
//
//
// `StripInstance::paint` field encodes a color source, a paint type and a paint texture id
// Color source determines where the fragment shader gets color data from
// Paint type determines how the fragment shader uses the color data
// Paint texture id locates the encoded image data `EncodedImage` in `encoded_paints_texture`
// More details in the `StripInstance` documentation below.
//
// `StripInstance::payload` field can either encode a color, [x, y] for image sampling or a slot index
// - If color source is payload and the paint type is solid, the fragment shader uses the color directly.
// - If color source is payload and the paint type is image, the fragment shader samples the image.
// - Otherwise, the fragment shader samples the source clip texture using the given slot index.
// More details in the `StripInstance` documentation below.
// Color source modes - where the fragment shader gets color data from
// Use payload (color or image coordinates)
const COLOR_SOURCE_PAYLOAD: u32 = 0u;
// Sample from clip texture slot
const COLOR_SOURCE_SLOT: u32 = 1u;
const COLOR_SOURCE_BLEND: u32 = 2u;
// Paint types
const PAINT_TYPE_SOLID: u32 = 0u;
const PAINT_TYPE_IMAGE: u32 = 1u;
// Image quality
const IMAGE_QUALITY_LOW = 0u;
const IMAGE_QUALITY_MEDIUM = 1u;
const IMAGE_QUALITY_HIGH = 2u;
// Blend modes
const MIX_NORMAL: u32 = 0u;
const COMPOSE_SRC_OVER: u32 = 3u;
struct Config {
// Width of the rendering target
width: u32,
// Height of the rendering target
height: u32,
// Height of a strip in the rendering
// CAUTION: When changing this value, you must also update the fragment shader's
// logic to handle the new strip height.
strip_height: u32,
// Number of trailing zeros in alphas_tex_width (log2 of width).
// Pre-calculated on CPU since WebGL2 doesn't support `firstTrailingBit`.
alphas_tex_width_bits: u32,
}
// Strip instance data
//
// The `paint` field is packed with metadata that controls how `payload` is interpreted:
//
// `paint` bit layout:
// - Bit 31: `color_source` 0 = use payload, 1 = use slot texture
// - Bits 29-30: `paint_type` 0 = solid, 1 = image (only used when color_source = 0)
// - Bits 0-28: Usage depends on color_source:
// - When color_source = 0 and paint_type = 1: `paint_texture_id` (index of `EncodedImage`)
// - When color_source = 1: bits 0-7 contain opacity (0-255)
//
// Decision tree for paint/payload interpretation:
//
// color_source = 0 (COLOR_SOURCE_PAYLOAD) - Use payload data directly
// ├── paint_type = 0 (PAINT_TYPE_SOLID) - Solid color rendering
// │ └── payload = [r, g, b, a] RGBA (packed as u8s)
// │
// └── paint_type = 1 (PAINT_TYPE_IMAGE) - Image rendering
// ├── payload = [x, y] scene coordinates (packed as u16s)
// └── bits 0-28 = paint_texture_id
//
// color_source = 1 (COLOR_SOURCE_SLOT) - Use slot texture
// ├── payload = slot_index (u32)
// └── bits 0-7 = opacity (0-255, where 255 = fully opaque)
struct StripInstance {
// [x, y] packed as u16's
// x, y — coordinates of the strip
@location(0) xy: u32,
// [width, dense_width] packed as u16's
// width — width of the strip
// dense_width — width of the portion where alpha blending should be applied
@location(1) widths: u32,
// Alpha texture column index where this strip's alpha values begin
// There are [`Config::strip_height`] alpha values per column.
@location(2) col_idx: u32,
// See StripInstance documentation above.
@location(3) payload: u32,
// See StripInstance documentation above.
@location(4) paint: u32,
}
struct VertexOutput {
// Render type for the strip
@location(0) @interpolate(flat) paint: u32,
// Texture coordinates for the current fragment
@location(1) tex_coord: vec2<f32>,
// UV coordinates for the current fragment, used for image sampling
@location(2) sample_xy: vec2<f32>,
// Ending x-position of the dense (alpha) region
@location(3) @interpolate(flat) dense_end: u32,
// Color value or slot index when alpha is 0
@location(4) @interpolate(flat) payload: u32,
// Normalized device coordinates (NDC) for the current vertex
@builtin(position) position: vec4<f32>,
};
// TODO: Measure performance of moving to a separate group
@group(0) @binding(1)
var<uniform> config: Config;
@group(1) @binding(0)
var atlas_texture: texture_2d<f32>;
@group(2) @binding(0)
var encoded_paints_texture: texture_2d<u32>;
@vertex
fn vs_main(
@builtin(vertex_index) in_vertex_index: u32,
instance: StripInstance,
) -> VertexOutput {
var out: VertexOutput;
// Map vertex_index (0-3) to quad corners:
// 0 → (0,0), 1 → (1,0), 2 → (0,1), 3 → (1,1)
let x = f32(in_vertex_index & 1u);
let y = f32(in_vertex_index >> 1u);
// Unpack the x and y coordinates from the packed u32 instance.xy
let x0 = instance.xy & 0xffffu;
let y0 = instance.xy >> 16u;
// Unpack the total width and dense (alpha) width from the packed u32 instance.widths
let width = instance.widths & 0xffffu;
let dense_width = instance.widths >> 16u;
// Calculate the ending x-position of the dense (alpha) region
// This boundary is used in the fragment shader to determine if alpha sampling is needed
out.dense_end = instance.col_idx + dense_width;
// Calculate the pixel coordinates of the current vertex within the strip
let pix_x = f32(x0) + x * f32(width);
let pix_y = f32(y0) + y * f32(config.strip_height);
// Convert pixel coordinates to normalized device coordinates (NDC)
// NDC ranges from -1 to 1, with (0,0) at the center of the viewport
let ndc_x = pix_x * 2.0 / f32(config.width) - 1.0;
let ndc_y = 1.0 - pix_y * 2.0 / f32(config.height);
let paint_type = (instance.paint >> 29u) & 0x3u;
if paint_type == PAINT_TYPE_IMAGE {
let paint_tex_id = instance.paint & 0x1FFFFFFF;
let encoded_image = unpack_encoded_image(paint_tex_id);
// Unpack view coordinates for image sampling
let scene_strip_x = instance.payload & 0xffffu;
let scene_strip_y = instance.payload >> 16u;
// Use view coordinates for image sampling (always in global view space)
out.sample_xy = encoded_image.translate
+ encoded_image.image_offset
+ encoded_image.transform.xy * f32(scene_strip_x)
+ encoded_image.transform.zw * f32(scene_strip_y)
+ encoded_image.transform.xy * x * f32(width)
+ encoded_image.transform.zw * y * f32(config.strip_height);
}
// Regular texture coordinates for other render types
out.tex_coord = vec2<f32>(f32(instance.col_idx) + x * f32(width), y * f32(config.strip_height));
out.position = vec4<f32>(ndc_x, ndc_y, 0.0, 1.0);
out.payload = instance.payload;
out.paint = instance.paint;
return out;
}
@group(0) @binding(0)
var alphas_texture: texture_2d<u32>;
@group(0) @binding(2)
var clip_input_texture: texture_2d<f32>;
@fragment
fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
let x = u32(floor(in.tex_coord.x));
var alpha = 1.0;
// Determine if the current fragment is within the dense (alpha) region
// If so, sample the alpha value from the texture; otherwise, alpha remains fully opaque (1.0)
if x < in.dense_end {
let y = u32(floor(in.tex_coord.y));
// Retrieve alpha value from the texture. We store 16 1-byte alpha
// values per texel, with each color channel packing 4 alpha values.
// The code here assumes the strip height is 4, i.e., each color
// channel encodes the alpha values for a single column within a strip.
// Divide x by 4 to get the texel position.
let alphas_index = x;
let tex_dimensions = textureDimensions(alphas_texture);
let alphas_tex_width = tex_dimensions.x;
// Which texel contains the alpha values for this column
let texel_index = alphas_index / 4u;
// Which channel (R,G,B,A) in the texel contains the alpha values for this column
let channel_index = alphas_index % 4u;
// Calculate texel coordinates
let tex_x = texel_index & (alphas_tex_width - 1u);
let tex_y = texel_index >> config.alphas_tex_width_bits;
// Load all 4 channels from the texture
let rgba_values = textureLoad(alphas_texture, vec2<u32>(tex_x, tex_y), 0);
// Get the column's alphas from the appropriate RGBA channel based on the index
let alphas_u32 = unpack_alphas_from_channel(rgba_values, channel_index);
// Extract the alpha value for the current y-position from the packed u32 data
alpha = f32((alphas_u32 >> (y * 8u)) & 0xffu) * (1.0 / 255.0);
}
// Apply the alpha value to the unpacked RGBA color or slot index
let color_source = (in.paint >> 30u) & 0x3u;
var final_color: vec4<f32>;
if color_source == COLOR_SOURCE_PAYLOAD {
let paint_type = (in.paint >> 29u) & 0x3u;
// in.payload encodes a color for PAINT_TYPE_SOLID or sample_xy for PAINT_TYPE_IMAGE
if paint_type == PAINT_TYPE_SOLID {
final_color = alpha * unpack4x8unorm(in.payload);
} else if paint_type == PAINT_TYPE_IMAGE {
let paint_tex_id = in.paint & 0x1FFFFFFF;
let encoded_image = unpack_encoded_image(paint_tex_id);
let image_offset = encoded_image.image_offset;
let image_size = encoded_image.image_size;
let local_xy = in.sample_xy - image_offset;
let offset = 0.00001;
let extended_xy = vec2<f32>(
extend_mode(local_xy.x, encoded_image.extend_modes.x, image_size.x - offset),
extend_mode(local_xy.y, encoded_image.extend_modes.y, image_size.y - offset)
);
if encoded_image.quality == IMAGE_QUALITY_HIGH {
let final_xy = image_offset + extended_xy;
let sample_color = bicubic_sample(
atlas_texture,
final_xy,
image_offset,
image_size,
encoded_image.extend_modes
);
final_color = alpha * sample_color;
} else if encoded_image.quality == IMAGE_QUALITY_MEDIUM {
let final_xy = image_offset + extended_xy - vec2(0.5);
let sample_color = bilinear_sample(
atlas_texture,
final_xy,
image_offset,
image_size,
encoded_image.extend_modes
);
final_color = alpha * sample_color;
} else if encoded_image.quality == IMAGE_QUALITY_LOW {
let final_xy = image_offset + extended_xy;
final_color = alpha * textureLoad(atlas_texture, vec2<u32>(final_xy), 0);
}
}
} else if color_source == COLOR_SOURCE_SLOT {
// in.payload encodes a slot in the source clip texture
let clip_x = u32(in.position.x) & 0xFFu;
let clip_y = (u32(in.position.y) & 3) + in.payload * config.strip_height;
let clip_in_color = textureLoad(clip_input_texture, vec2(clip_x, clip_y), 0);
// Extract opacity from first 8 bits (quantized from [0, 255])
let opacity = f32(in.paint & 0xFFu) * (1.0 / 255.0);
final_color = alpha * opacity * clip_in_color;
} else if color_source == COLOR_SOURCE_BLEND {
let dest_slot = (in.paint >> 16u) & 0x3FFFu;
let mix_mode = (in.paint >> 8u) & 0xFFu;
let compose_mode = in.paint & 0xFFu;
// Read source color from slot
let src_slot = in.payload;
let clip_x = u32(in.position.x) & 0xFFu;
let src_y = (u32(in.position.y) & 3u) + src_slot * config.strip_height;
let src_color = textureLoad(clip_input_texture, vec2(clip_x, src_y), 0);
// Read destination color from slot
let dest_y = (u32(in.position.y) & 3u) + dest_slot * config.strip_height;
let dest_color = textureLoad(clip_input_texture, vec2(clip_x, dest_y), 0);
// Can if or switch over the compose modes....
// if compose_mode == COMPOSE_SRC_OVER {
// // SrcOver: result = src + dest * (1 - src.a)
// }
// Hard coded SrcOver...
final_color = src_color + dest_color * (1.0 - src_color.a);
final_color = alpha * final_color;
}
return final_color;
}
struct EncodedImage {
/// The rendering quality of the image.
quality: u32,
/// The extends in the horizontal and vertical direction.
extend_modes: vec2<u32>,
/// The size of the image in pixels.
image_size: vec2<f32>,
/// The offset of the image in pixels.
image_offset: vec2<f32>,
/// Linear transformation matrix coefficients for 2D affine transformation.
/// Contains [a, b, c, d] where the transformation matrix is:
/// This enables scaling, rotation, and skewing of the image coordinates.
transform: vec4<f32>,
/// Translation offset for 2D affine transformation.
/// Contains [tx, ty] representing the translation component.
translate: vec2<f32>,
}
fn unpack_encoded_image(paint_tex_id: u32) -> EncodedImage {
let texel0 = textureLoad(encoded_paints_texture, vec2<u32>(paint_tex_id, 0), 0);
let quality = texel0.x & 0x3u;
let extend_x = (texel0.x >> 2u) & 0x3u;
let extend_y = (texel0.x >> 4u) & 0x3u;
let image_size = vec2<f32>(f32(texel0.y >> 16u), f32(texel0.y & 0xFFFFu));
let image_offset = vec2<f32>(f32(texel0.z >> 16u), f32(texel0.z & 0xFFFFu));
let texel1 = textureLoad(encoded_paints_texture, vec2<u32>(paint_tex_id + 1u, 0), 0);
let texel2 = textureLoad(encoded_paints_texture, vec2<u32>(paint_tex_id + 2u, 0), 0);
let transform = vec4<f32>(
bitcast<f32>(texel1.x), bitcast<f32>(texel1.y),
bitcast<f32>(texel1.z), bitcast<f32>(texel1.w)
);
let translate = vec2<f32>(bitcast<f32>(texel2.x), bitcast<f32>(texel2.y));
return EncodedImage(
quality,
vec2<u32>(extend_x, extend_y),
image_size,
image_offset,
transform,
translate
);
}
fn unpack_alphas_from_channel(rgba: vec4<u32>, channel_index: u32) -> u32 {
switch channel_index {
case 0u: { return rgba.x; }
case 1u: { return rgba.y; }
case 2u: { return rgba.z; }
case 3u: { return rgba.w; }
// Fallback, should never happen
default: { return rgba.x; }
}
}
const EXTEND_PAD: u32 = 0u;
const EXTEND_REPEAT: u32 = 1u;
const EXTEND_REFLECT: u32 = 2u;
fn extend_mode(t: f32, mode: u32, max: f32) -> f32 {
switch mode {
case EXTEND_PAD: {
return clamp(t, 0.0, max - 1.0);
}
case EXTEND_REPEAT: {
return extend_mode_normalized(t / max, mode) * max;
}
case EXTEND_REFLECT, default: {
return extend_mode_normalized(t / max, mode) * max;
}
}
}
fn extend_mode_normalized(t: f32, mode: u32) -> f32 {
switch mode {
case EXTEND_PAD: {
return clamp(t, 0.0, 1.0);
}
case EXTEND_REPEAT: {
return fract(t);
}
case EXTEND_REFLECT, default: {
return abs(t - 2.0 * round(0.5 * t));
}
}
}
// Bilinear filtering
//
// Bilinear filtering consists of sampling the 4 surrounding pixels of the target point and
// interpolating them with a bilinear filter.
fn bilinear_sample(
tex: texture_2d<f32>,
coords: vec2<f32>,
image_offset: vec2<f32>,
image_size: vec2<f32>,
extend_modes: vec2<u32>
) -> vec4<f32> {
let atlas_max = image_offset + image_size - vec2(1.0);
let atlas_uv_clamped = clamp(coords, image_offset, atlas_max);
let uv_quad = vec4(floor(atlas_uv_clamped), ceil(atlas_uv_clamped));
let uv_frac = fract(coords);
let a = textureLoad(tex, vec2<i32>(uv_quad.xy), 0);
let b = textureLoad(tex, vec2<i32>(uv_quad.xw), 0);
let c = textureLoad(tex, vec2<i32>(uv_quad.zy), 0);
let d = textureLoad(tex, vec2<i32>(uv_quad.zw), 0);
return mix(mix(a, b, uv_frac.y), mix(c, d, uv_frac.y), uv_frac.x);
}
// Bicubic filtering using Mitchell filter with B=1/3, C=1/3
//
// Cubic resampling consists of sampling the 16 surrounding pixels of the target point and
// interpolating them with a cubic filter. The generated matrix is 4x4 and represent the coefficients
// of the cubic function used to calculate weights based on the `x_fract` and `y_fract` of the
// location we are looking at.
fn bicubic_sample(
tex: texture_2d<f32>,
coords: vec2<f32>,
image_offset: vec2<f32>,
image_size: vec2<f32>,
extend_modes: vec2<u32>,
) -> vec4<f32> {
let atlas_max = image_offset + image_size - vec2(1.0);
let frac_coords = fract(coords + 0.5);
// Get cubic weights for x and y directions
let cx = cubic_weights(frac_coords.x);
let cy = cubic_weights(frac_coords.y);
// Sample 4x4 grid around coords
let s00 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(-1.5, -1.5), image_offset, atlas_max)), 0);
let s10 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(-0.5, -1.5), image_offset, atlas_max)), 0);
let s20 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(0.5, -1.5), image_offset, atlas_max)), 0);
let s30 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(1.5, -1.5), image_offset, atlas_max)), 0);
let s01 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(-1.5, -0.5), image_offset, atlas_max)), 0);
let s11 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(-0.5, -0.5), image_offset, atlas_max)), 0);
let s21 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(0.5, -0.5), image_offset, atlas_max)), 0);
let s31 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(1.5, -0.5), image_offset, atlas_max)), 0);
let s02 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(-1.5, 0.5), image_offset, atlas_max)), 0);
let s12 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(-0.5, 0.5), image_offset, atlas_max)), 0);
let s22 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(0.5, 0.5), image_offset, atlas_max)), 0);
let s32 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(1.5, 0.5), image_offset, atlas_max)), 0);
let s03 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(-1.5, 1.5), image_offset, atlas_max)), 0);
let s13 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(-0.5, 1.5), image_offset, atlas_max)), 0);
let s23 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(0.5, 1.5), image_offset, atlas_max)), 0);
let s33 = textureLoad(tex, vec2<i32>(clamp(coords + vec2(1.5, 1.5), image_offset, atlas_max)), 0);
// Interpolate in x direction for each row
let row0 = cx.x * s00 + cx.y * s10 + cx.z * s20 + cx.w * s30;
let row1 = cx.x * s01 + cx.y * s11 + cx.z * s21 + cx.w * s31;
let row2 = cx.x * s02 + cx.y * s12 + cx.z * s22 + cx.w * s32;
let row3 = cx.x * s03 + cx.y * s13 + cx.z * s23 + cx.w * s33;
// Interpolate in y direction
let result = cy.x * row0 + cy.y * row1 + cy.z * row2 + cy.w * row3;
// Clamp each component to [0,1] and ensure color components don't exceed alpha
return vec4<f32>(
min(clamp(result.r, 0.0, 1.0), result.a),
min(clamp(result.g, 0.0, 1.0), result.a),
min(clamp(result.b, 0.0, 1.0), result.a),
min(clamp(result.a, 0.0, 1.0), result.a)
);
}
// Cubic resampler logic borrowed from Skia (same as CPU cubic_resampler function)
// Mitchell-Netravali cubic filter coefficients with parameters B=1/3 and C=1/3
const MF: array<vec4<f32>, 4> = array<vec4<f32>, 4>(
vec4<f32>(
(1.0 / 6.0) / 3.0,
-(3.0 / 6.0) / 3.0 - 1.0 / 3.0,
(3.0 / 6.0) / 3.0 + 2.0 * 1.0 / 3.0,
-(1.0 / 6.0) / 3.0 - 1.0 / 3.0
),
vec4<f32>(
1.0 - (2.0 / 6.0) / 3.0,
0.0,
-3.0 + (12.0 / 6.0) / 3.0 + 1.0 / 3.0,
2.0 - (9.0 / 6.0) / 3.0 - 1.0 / 3.0
),
vec4<f32>(
(1.0 / 6.0) / 3.0,
(3.0 / 6.0) / 3.0 + 1.0 / 3.0,
3.0 - (15.0 / 6.0) / 3.0 - 2.0 * 1.0 / 3.0,
-2.0 + (9.0 / 6.0) / 3.0 + 1.0 / 3.0
),
vec4<f32>(
0.0,
0.0,
-1.0 / 3.0,
(1.0 / 6.0) / 3.0 + 1.0 / 3.0
)
);
// Calculate the weights for a single fractional value (same as CPU weights function)
fn cubic_weights(fract: f32) -> vec4<f32> {
return vec4<f32>(
single_weight(fract, MF[0][0], MF[0][1], MF[0][2], MF[0][3]),
single_weight(fract, MF[1][0], MF[1][1], MF[1][2], MF[1][3]),
single_weight(fract, MF[2][0], MF[2][1], MF[2][2], MF[2][3]),
single_weight(fract, MF[3][0], MF[3][1], MF[3][2], MF[3][3])
);
}
// Calculate a weight based on the fractional value t and the cubic coefficients
// This matches the CPU implementation exactly
fn single_weight(t: f32, a: f32, b: f32, c: f32, d: f32) -> f32 {
return t * (t * (t * d + c) + b) + a;
}