| // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense |
| |
| // Fine rasterizer. This can run in simple (just path rendering) and full |
| // modes, controllable by #define. |
| |
| // This is a cut'n'paste w/ backdrop. |
| struct Tile { |
| backdrop: i32, |
| segments: u32, |
| } |
| |
| #import segment |
| #import config |
| |
| @group(0) @binding(0) |
| var<uniform> config: Config; |
| |
| @group(0) @binding(1) |
| var<storage> tiles: array<Tile>; |
| |
| @group(0) @binding(2) |
| var<storage> segments: array<Segment>; |
| |
| #ifdef full |
| |
| #import blend |
| #import ptcl |
| |
| let GRADIENT_WIDTH = 512; |
| |
| @group(0) @binding(3) |
| var output: texture_storage_2d<rgba8unorm, write>; |
| |
| @group(0) @binding(4) |
| var<storage> ptcl: array<u32>; |
| |
| @group(0) @binding(5) |
| var gradients: texture_2d<f32>; |
| |
| @group(0) @binding(6) |
| var<storage> info: array<u32>; |
| |
| @group(0) @binding(7) |
| var image_atlas: texture_2d<f32>; |
| |
| fn read_fill(cmd_ix: u32) -> CmdFill { |
| let tile = ptcl[cmd_ix + 1u]; |
| let backdrop = i32(ptcl[cmd_ix + 2u]); |
| return CmdFill(tile, backdrop); |
| } |
| |
| fn read_stroke(cmd_ix: u32) -> CmdStroke { |
| let tile = ptcl[cmd_ix + 1u]; |
| let half_width = bitcast<f32>(ptcl[cmd_ix + 2u]); |
| return CmdStroke(tile, half_width); |
| } |
| |
| fn read_color(cmd_ix: u32) -> CmdColor { |
| let rgba_color = ptcl[cmd_ix + 1u]; |
| return CmdColor(rgba_color); |
| } |
| |
| fn read_lin_grad(cmd_ix: u32) -> CmdLinGrad { |
| let index_mode = ptcl[cmd_ix + 1u]; |
| let index = index_mode >> 2u; |
| let extend_mode = index_mode & 0x3u; |
| let info_offset = ptcl[cmd_ix + 2u]; |
| let line_x = bitcast<f32>(info[info_offset]); |
| let line_y = bitcast<f32>(info[info_offset + 1u]); |
| let line_c = bitcast<f32>(info[info_offset + 2u]); |
| return CmdLinGrad(index, extend_mode, line_x, line_y, line_c); |
| } |
| |
| fn read_rad_grad(cmd_ix: u32) -> CmdRadGrad { |
| let index_mode = ptcl[cmd_ix + 1u]; |
| let index = index_mode >> 2u; |
| let extend_mode = index_mode & 0x3u; |
| let info_offset = ptcl[cmd_ix + 2u]; |
| let m0 = bitcast<f32>(info[info_offset]); |
| let m1 = bitcast<f32>(info[info_offset + 1u]); |
| let m2 = bitcast<f32>(info[info_offset + 2u]); |
| let m3 = bitcast<f32>(info[info_offset + 3u]); |
| let matrx = vec4(m0, m1, m2, m3); |
| let xlat = vec2(bitcast<f32>(info[info_offset + 4u]), bitcast<f32>(info[info_offset + 5u])); |
| let focal_x = bitcast<f32>(info[info_offset + 6u]); |
| let radius = bitcast<f32>(info[info_offset + 7u]); |
| let flags_kind = info[info_offset + 8u]; |
| let flags = flags_kind >> 3u; |
| let kind = flags_kind & 0x7u; |
| return CmdRadGrad(index, extend_mode, matrx, xlat, focal_x, radius, kind, flags); |
| } |
| |
| fn read_image(cmd_ix: u32) -> CmdImage { |
| let info_offset = ptcl[cmd_ix + 1u]; |
| let m0 = bitcast<f32>(info[info_offset]); |
| let m1 = bitcast<f32>(info[info_offset + 1u]); |
| let m2 = bitcast<f32>(info[info_offset + 2u]); |
| let m3 = bitcast<f32>(info[info_offset + 3u]); |
| let matrx = vec4(m0, m1, m2, m3); |
| let xlat = vec2(bitcast<f32>(info[info_offset + 4u]), bitcast<f32>(info[info_offset + 5u])); |
| let xy = info[info_offset + 6u]; |
| let width_height = info[info_offset + 7u]; |
| // The following are not intended to be bitcasts |
| let x = f32(xy >> 16u); |
| let y = f32(xy & 0xffffu); |
| let width = f32(width_height >> 16u); |
| let height = f32(width_height & 0xffffu); |
| return CmdImage(matrx, xlat, vec2(x, y), vec2(width, height)); |
| } |
| |
| fn read_end_clip(cmd_ix: u32) -> CmdEndClip { |
| let blend = ptcl[cmd_ix + 1u]; |
| let alpha = bitcast<f32>(ptcl[cmd_ix + 2u]); |
| return CmdEndClip(blend, alpha); |
| } |
| |
| fn extend_mode(t: f32, mode: u32) -> f32 { |
| let EXTEND_PAD = 0u; |
| let EXTEND_REPEAT = 1u; |
| let EXTEND_REFLECT = 2u; |
| switch mode { |
| // EXTEND_PAD |
| case 0u: { |
| return clamp(t, 0.0, 1.0); |
| } |
| // EXTEND_REPEAT |
| case 1u: { |
| return fract(t); |
| } |
| // EXTEND_REFLECT |
| default: { |
| return abs(t - 2.0 * round(0.5 * t)); |
| } |
| } |
| } |
| |
| #else |
| |
| @group(0) @binding(3) |
| var output: texture_storage_2d<r8, write>; |
| |
| #endif |
| |
| let PIXELS_PER_THREAD = 4u; |
| |
| fn fill_path(tile: Tile, xy: vec2<f32>, even_odd: bool) -> array<f32, PIXELS_PER_THREAD> { |
| var area: array<f32, PIXELS_PER_THREAD>; |
| let backdrop_f = f32(tile.backdrop); |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| area[i] = backdrop_f; |
| } |
| var segment_ix = tile.segments; |
| while segment_ix != 0u { |
| let segment = segments[segment_ix]; |
| let y = segment.origin.y - xy.y; |
| let y0 = clamp(y, 0.0, 1.0); |
| let y1 = clamp(y + segment.delta.y, 0.0, 1.0); |
| let dy = y0 - y1; |
| if dy != 0.0 { |
| let vec_y_recip = 1.0 / segment.delta.y; |
| let t0 = (y0 - y) * vec_y_recip; |
| let t1 = (y1 - y) * vec_y_recip; |
| let startx = segment.origin.x - xy.x; |
| let x0 = startx + t0 * segment.delta.x; |
| let x1 = startx + t1 * segment.delta.x; |
| let xmin0 = min(x0, x1); |
| let xmax0 = max(x0, x1); |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| let i_f = f32(i); |
| let xmin = min(xmin0 - i_f, 1.0) - 1.0e-6; |
| let xmax = xmax0 - i_f; |
| let b = min(xmax, 1.0); |
| let c = max(b, 0.0); |
| let d = max(xmin, 0.0); |
| let a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin); |
| area[i] += a * dy; |
| } |
| } |
| let y_edge = sign(segment.delta.x) * clamp(xy.y - segment.y_edge + 1.0, 0.0, 1.0); |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| area[i] += y_edge; |
| } |
| segment_ix = segment.next; |
| } |
| if even_odd { |
| // even-odd winding rule |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| let a = area[i]; |
| area[i] = abs(a - 2.0 * round(0.5 * a)); |
| } |
| } else { |
| // non-zero winding rule |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| area[i] = min(abs(area[i]), 1.0); |
| } |
| } |
| return area; |
| } |
| |
| fn stroke_path(seg: u32, half_width: f32, xy: vec2<f32>) -> array<f32, PIXELS_PER_THREAD> { |
| var df: array<f32, PIXELS_PER_THREAD>; |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| df[i] = 1e9; |
| } |
| var segment_ix = seg; |
| while segment_ix != 0u { |
| let segment = segments[segment_ix]; |
| let delta = segment.delta; |
| let dpos0 = xy + vec2(0.5, 0.5) - segment.origin; |
| let scale = 1.0 / dot(delta, delta); |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| let dpos = vec2(dpos0.x + f32(i), dpos0.y); |
| let t = clamp(dot(dpos, delta) * scale, 0.0, 1.0); |
| // performance idea: hoist sqrt out of loop |
| df[i] = min(df[i], length(delta * t - dpos)); |
| } |
| segment_ix = segment.next; |
| } |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| // reuse array; return alpha rather than distance |
| df[i] = clamp(half_width + 0.5 - df[i], 0.0, 1.0); |
| } |
| return df; |
| } |
| |
| // The X size should be 16 / PIXELS_PER_THREAD |
| @compute @workgroup_size(4, 16) |
| fn main( |
| @builtin(global_invocation_id) global_id: vec3<u32>, |
| @builtin(local_invocation_id) local_id: vec3<u32>, |
| @builtin(workgroup_id) wg_id: vec3<u32>, |
| ) { |
| let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x; |
| let xy = vec2(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y)); |
| #ifdef full |
| var rgba: array<vec4<f32>, PIXELS_PER_THREAD>; |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| rgba[i] = unpack4x8unorm(config.base_color).wzyx; |
| } |
| var blend_stack: array<array<u32, PIXELS_PER_THREAD>, BLEND_STACK_SPLIT>; |
| var clip_depth = 0u; |
| var area: array<f32, PIXELS_PER_THREAD>; |
| var cmd_ix = tile_ix * PTCL_INITIAL_ALLOC; |
| let blend_offset = ptcl[cmd_ix]; |
| cmd_ix += 1u; |
| // main interpretation loop |
| while true { |
| let tag = ptcl[cmd_ix]; |
| if tag == CMD_END { |
| break; |
| } |
| switch tag { |
| // CMD_FILL |
| case 1u: { |
| let fill = read_fill(cmd_ix); |
| let segments = fill.tile >> 1u; |
| let even_odd = (fill.tile & 1u) != 0u; |
| let tile = Tile(fill.backdrop, segments); |
| area = fill_path(tile, xy, even_odd); |
| cmd_ix += 3u; |
| } |
| // CMD_STROKE |
| case 2u: { |
| let stroke = read_stroke(cmd_ix); |
| area = stroke_path(stroke.tile, stroke.half_width, xy); |
| cmd_ix += 3u; |
| } |
| // CMD_SOLID |
| case 3u: { |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| area[i] = 1.0; |
| } |
| cmd_ix += 1u; |
| } |
| // CMD_COLOR |
| case 5u: { |
| let color = read_color(cmd_ix); |
| let fg = unpack4x8unorm(color.rgba_color).wzyx; |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| let fg_i = fg * area[i]; |
| rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; |
| } |
| cmd_ix += 2u; |
| } |
| // CMD_LIN_GRAD |
| case 6u: { |
| let lin = read_lin_grad(cmd_ix); |
| let d = lin.line_x * xy.x + lin.line_y * xy.y + lin.line_c; |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| let my_d = d + lin.line_x * f32(i); |
| let x = i32(round(extend_mode(my_d, lin.extend_mode) * f32(GRADIENT_WIDTH - 1))); |
| let fg_rgba = textureLoad(gradients, vec2(x, i32(lin.index)), 0); |
| let fg_i = fg_rgba * area[i]; |
| rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; |
| } |
| cmd_ix += 3u; |
| } |
| // CMD_RAD_GRAD |
| case 7u: { |
| let rad = read_rad_grad(cmd_ix); |
| let focal_x = rad.focal_x; |
| let radius = rad.radius; |
| let is_strip = rad.kind == RAD_GRAD_KIND_STRIP; |
| let is_circular = rad.kind == RAD_GRAD_KIND_CIRCULAR; |
| let is_focal_on_circle = rad.kind == RAD_GRAD_KIND_FOCAL_ON_CIRCLE; |
| let is_swapped = (rad.flags & RAD_GRAD_SWAPPED) != 0u; |
| let r1_recip = select(1.0 / radius, 0.0, is_circular); |
| let less_scale = select(1.0, -1.0, is_swapped || (1.0 - focal_x) < 0.0); |
| let t_sign = sign(1.0 - focal_x); |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| let my_xy = vec2(xy.x + f32(i), xy.y); |
| let local_xy = rad.matrx.xy * my_xy.x + rad.matrx.zw * my_xy.y + rad.xlat; |
| let x = local_xy.x; |
| let y = local_xy.y; |
| let xx = x * x; |
| let yy = y * y; |
| var t = 0.0; |
| var is_valid = true; |
| if is_strip { |
| let a = radius - yy; |
| t = sqrt(a) + x; |
| is_valid = a >= 0.0; |
| } else if is_focal_on_circle { |
| t = (xx + yy) / x; |
| is_valid = t >= 0.0 && x != 0.0; |
| } else if radius > 1.0 { |
| t = sqrt(xx + yy) - x * r1_recip; |
| } else { // radius < 1.0 |
| let a = xx - yy; |
| t = less_scale * sqrt(a) - x * r1_recip; |
| is_valid = a >= 0.0 && t >= 0.0; |
| } |
| if is_valid { |
| t = extend_mode(focal_x + t_sign * t, rad.extend_mode); |
| t = select(t, 1.0 - t, is_swapped); |
| let x = i32(round(t * f32(GRADIENT_WIDTH - 1))); |
| let fg_rgba = textureLoad(gradients, vec2(x, i32(rad.index)), 0); |
| let fg_i = fg_rgba * area[i]; |
| rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; |
| } |
| } |
| cmd_ix += 3u; |
| } |
| // CMD_IMAGE |
| case 8u: { |
| let image = read_image(cmd_ix); |
| let atlas_extents = image.atlas_offset + image.extents; |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| let my_xy = vec2(xy.x + f32(i), xy.y); |
| let atlas_uv = image.matrx.xy * my_xy.x + image.matrx.zw * my_xy.y + image.xlat + image.atlas_offset; |
| // This currently clips to the image bounds. TODO: extend modes |
| if all(atlas_uv < atlas_extents) && area[i] != 0.0 { |
| let uv_quad = vec4(max(floor(atlas_uv), image.atlas_offset), min(ceil(atlas_uv), atlas_extents)); |
| let uv_frac = fract(atlas_uv); |
| let a = premul_alpha(textureLoad(image_atlas, vec2<i32>(uv_quad.xy), 0)); |
| let b = premul_alpha(textureLoad(image_atlas, vec2<i32>(uv_quad.xw), 0)); |
| let c = premul_alpha(textureLoad(image_atlas, vec2<i32>(uv_quad.zy), 0)); |
| let d = premul_alpha(textureLoad(image_atlas, vec2<i32>(uv_quad.zw), 0)); |
| let fg_rgba = mix(mix(a, b, uv_frac.y), mix(c, d, uv_frac.y), uv_frac.x); |
| let fg_i = fg_rgba * area[i]; |
| rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; |
| } |
| } |
| cmd_ix += 2u; |
| } |
| // CMD_BEGIN_CLIP |
| case 9u: { |
| if clip_depth < BLEND_STACK_SPLIT { |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| blend_stack[clip_depth][i] = pack4x8unorm(rgba[i]); |
| rgba[i] = vec4(0.0); |
| } |
| } else { |
| // TODO: spill to memory |
| } |
| clip_depth += 1u; |
| cmd_ix += 1u; |
| } |
| // CMD_END_CLIP |
| case 10u: { |
| let end_clip = read_end_clip(cmd_ix); |
| clip_depth -= 1u; |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| var bg_rgba: u32; |
| if clip_depth < BLEND_STACK_SPLIT { |
| bg_rgba = blend_stack[clip_depth][i]; |
| } else { |
| // load from memory |
| } |
| let bg = unpack4x8unorm(bg_rgba); |
| let fg = rgba[i] * area[i] * end_clip.alpha; |
| rgba[i] = blend_mix_compose(bg, fg, end_clip.blend); |
| } |
| cmd_ix += 3u; |
| } |
| // CMD_JUMP |
| case 11u: { |
| cmd_ix = ptcl[cmd_ix + 1u]; |
| } |
| default: {} |
| } |
| } |
| let xy_uint = vec2<u32>(xy); |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| let coords = xy_uint + vec2(i, 0u); |
| if coords.x < config.target_width && coords.y < config.target_height { |
| let fg = rgba[i]; |
| // Max with a small epsilon to avoid NaNs |
| let a_inv = 1.0 / max(fg.a, 1e-6); |
| let rgba_sep = vec4(fg.rgb * a_inv, fg.a); |
| textureStore(output, vec2<i32>(coords), rgba_sep); |
| } |
| } |
| #else |
| let tile = tiles[tile_ix]; |
| let area = fill_path(tile, xy); |
| |
| let xy_uint = vec2<u32>(xy); |
| for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { |
| let coords = xy_uint + vec2(i, 0u); |
| if coords.x < config.target_width && coords.y < config.target_height { |
| textureStore(output, vec2<i32>(coords), vec4(area[i])); |
| } |
| } |
| #endif |
| } |
| |
| fn premul_alpha(rgba: vec4<f32>) -> vec4<f32> { |
| return vec4(rgba.rgb * rgba.a, rgba.a); |
| } |