| // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense |
| |
| // The reduce pass for clip stack processing. |
| |
| // The primary input is a sequence of path ids representing paths to |
| // push, with a special value of ~0 to represent pop. |
| |
| // For each path, the bounding box is found in the anno stream |
| // (anno_alloc), though this may change. |
| |
| // Output is a stack monoid reduction for the partition. The Bic |
| // is stored in the BicBuf, and the stack slice in StackBuf. |
| |
| // Note: for this shader, only pushes are represented in the stack |
| // monoid reduction output, so we don't have to worry about the |
| // interpretation of pops. |
| |
| #version 450 |
| #extension GL_GOOGLE_include_directive : enable |
| |
| #include "mem.h" |
| #include "setup.h" |
| |
| #define LG_WG_SIZE (7 + LG_WG_FACTOR) |
| #define WG_SIZE (1 << LG_WG_SIZE) |
| #define PARTITION_SIZE WG_SIZE |
| |
| layout(local_size_x = WG_SIZE) in; |
| |
| layout(binding = 1) readonly buffer ConfigBuf { |
| Config conf; |
| }; |
| |
| // The intermediate state for clip processing. |
| struct ClipEl { |
| // index of parent node |
| uint parent_ix; |
| // bounding box |
| vec4 bbox; |
| }; |
| |
| // The bicyclic monoid |
| struct Bic { |
| uint a; |
| uint b; |
| }; |
| |
| Bic bic_combine(Bic x, Bic y) { |
| uint m = min(x.b, y.a); |
| return Bic(x.a + y.a - m, x.b + y.b - m); |
| } |
| |
| shared Bic sh_bic[WG_SIZE]; |
| shared uint sh_parent[WG_SIZE]; |
| shared uint sh_path_ix[WG_SIZE]; |
| shared vec4 sh_bbox[WG_SIZE]; |
| |
| // Load path's bbox from bbox (as written by pathseg). |
| vec4 load_path_bbox(uint path_ix) { |
| uint base = (conf.path_bbox_alloc.offset >> 2) + 6 * path_ix; |
| float bbox_l = float(memory[base]) - 32768.0; |
| float bbox_t = float(memory[base + 1]) - 32768.0; |
| float bbox_r = float(memory[base + 2]) - 32768.0; |
| float bbox_b = float(memory[base + 3]) - 32768.0; |
| vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b); |
| return bbox; |
| } |
| |
| vec4 bbox_intersect(vec4 a, vec4 b) { |
| return vec4(max(a.xy, b.xy), min(a.zw, b.zw)); |
| } |
| |
| void store_bic(uint ix, Bic bic) { |
| uint base = (conf.clip_bic_alloc.offset >> 2) + 2 * ix; |
| memory[base] = bic.a; |
| memory[base + 1] = bic.b; |
| } |
| |
| void store_clip_el(uint ix, ClipEl el) { |
| uint base = (conf.clip_stack_alloc.offset >> 2) + 5 * ix; |
| memory[base] = el.parent_ix; |
| memory[base + 1] = floatBitsToUint(el.bbox.x); |
| memory[base + 2] = floatBitsToUint(el.bbox.y); |
| memory[base + 3] = floatBitsToUint(el.bbox.z); |
| memory[base + 4] = floatBitsToUint(el.bbox.w); |
| } |
| |
| void main() { |
| uint th = gl_LocalInvocationID.x; |
| uint inp = memory[(conf.clip_alloc.offset >> 2) + gl_GlobalInvocationID.x]; |
| bool is_push = int(inp) >= 0; |
| // reverse scan of bicyclic semigroup |
| Bic bic = Bic(1 - uint(is_push), uint(is_push)); |
| sh_bic[gl_LocalInvocationID.x] = bic; |
| for (uint i = 0; i < LG_WG_SIZE; i++) { |
| barrier(); |
| if (th + (1u << i) < WG_SIZE) { |
| Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)]; |
| bic = bic_combine(bic, other); |
| } |
| barrier(); |
| sh_bic[th] = bic; |
| } |
| if (th == 0) { |
| store_bic(gl_WorkGroupID.x, bic); |
| } |
| barrier(); |
| uint size = sh_bic[0].b; |
| bic = Bic(0, 0); |
| if (th + 1 < WG_SIZE) { |
| bic = sh_bic[th + 1]; |
| } |
| if (is_push && bic.a == 0) { |
| uint local_ix = size - bic.b - 1; |
| sh_parent[local_ix] = th; |
| sh_path_ix[local_ix] = inp; |
| } |
| barrier(); |
| // Do forward scan of bounding box intersection |
| vec4 bbox; |
| uint path_ix; |
| if (th < size) { |
| path_ix = sh_path_ix[th]; |
| bbox = load_path_bbox(path_ix); |
| } |
| // Not necessary if depth is bounded by wg size |
| #if 0 |
| for (uint i = 0; i < LG_WG_SIZE; i++) { |
| // We gate so we never access uninit data, but it might |
| // be more efficient to avoid the conditionals. |
| if (th < size) { |
| sh_bbox[th] = bbox; |
| } |
| barrier(); |
| if (th < size && th >= (1u << i)) { |
| bbox = bbox_intersect(sh_bbox[th - (1u << i)], bbox); |
| } |
| barrier(); |
| } |
| #endif |
| if (th < size) { |
| uint parent_ix = sh_parent[th] + gl_WorkGroupID.x * PARTITION_SIZE; |
| ClipEl el = ClipEl(parent_ix, bbox); |
| store_clip_el(gl_GlobalInvocationID.x, el); |
| } |
| } |