| // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense |
| |
| // The reduction phase for the blend/union test |
| |
| #version 450 |
| |
| // At least for now, N_SEQ is hardwired at 1 |
| #define LG_WG_SIZE 9 |
| #define WG_SIZE (1 << LG_WG_SIZE) |
| #define PART_SIZE WG_SIZE |
| |
| layout(local_size_x = WG_SIZE, local_size_y = 1) in; |
| |
| // The bicyclic semigroup (monoid) |
| struct Bic { |
| uint a; |
| uint b; |
| }; |
| |
| Bic bic_combine(Bic x, Bic y) { |
| uint m = min(x.b, y.a); |
| return Bic(x.a + y.a - m, x.b + y.b - m); |
| } |
| |
| // Node type values |
| #define OPEN_PAREN 0 |
| #define CLOSE_PAREN 1 |
| |
| struct Node { |
| uint node_type; |
| uint pad1; |
| uint pad2; |
| uint pad3; |
| vec4 bbox; |
| }; |
| |
| vec4 bbox_intersect(vec4 a, vec4 b) { |
| return vec4(max(a.xy, b.xy), min(a.zw, b.zw)); |
| } |
| |
| layout(binding = 0) readonly buffer InBuf { |
| Node[] inbuf; |
| }; |
| |
| layout(binding = 1) buffer BicBuf { |
| Bic[] bicbuf; |
| }; |
| |
| // Output buffer for stack slice data |
| layout(binding = 2) buffer StackBuf { |
| // open question: do we need the parent link or are bboxes sufficient? |
| vec4[] stack; |
| }; |
| |
| shared Bic sh_bic[WG_SIZE]; |
| shared vec4 sh_bbox[WG_SIZE]; |
| |
| void main() { |
| uint th = gl_LocalInvocationID.x; |
| Node inp = inbuf[gl_GlobalInvocationID.x]; |
| uint node_type = inp.node_type; |
| Bic bic = Bic(uint(node_type == CLOSE_PAREN), uint(node_type == OPEN_PAREN)); |
| sh_bic[gl_LocalInvocationID.x] = bic; |
| for (uint i = 0; i < LG_WG_SIZE; i++) { |
| barrier(); |
| uint other_ix = gl_LocalInvocationID.x + (1u << i); |
| if (other_ix < WG_SIZE) { |
| bic = bic_combine(bic, sh_bic[other_ix]); |
| } |
| barrier(); |
| sh_bic[th] = bic; |
| } |
| if (th == 0) { |
| bicbuf[gl_WorkGroupID.x] = bic; |
| } |
| barrier(); |
| // Number of elements in stack slice output by this partition |
| uint size = sh_bic[0].b; |
| bic = Bic(0, 0); |
| if (th + 1 < WG_SIZE) { |
| bic = sh_bic[th + 1]; |
| } |
| // Do stream compaction |
| if (inp.node_type == OPEN_PAREN && bic.a == 0) { |
| uint out_ix = size - bic.b - 1; |
| sh_bbox[out_ix] = inp.bbox; |
| } |
| barrier(); |
| vec4 bbox; |
| if (th < size) { |
| bbox = sh_bbox[th]; |
| } |
| // Forward scan of intersection over resulting stack slice |
| for (uint i = 0; i < LG_WG_SIZE; i++) { |
| if (th < size && th >= (1u << i)) { |
| bbox = bbox_intersect(sh_bbox[th - (1u << i)], bbox); |
| } |
| barrier(); |
| if (th < size) { |
| sh_bbox[th] = bbox; |
| } |
| barrier(); |
| } |
| if (th < size) { |
| stack[gl_GlobalInvocationID.x] = bbox; |
| } |
| } |