blob: 19e568b488b17dbda4e9659663002b5378b9f472 [file] [log] [blame]
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The reduction phase for the blend/union test
#version 450
// At least for now, N_SEQ is hardwired at 1
#define LG_WG_SIZE 9
#define WG_SIZE (1 << LG_WG_SIZE)
#define PART_SIZE WG_SIZE
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
// The bicyclic semigroup (monoid)
struct Bic {
uint a;
uint b;
};
Bic bic_combine(Bic x, Bic y) {
uint m = min(x.b, y.a);
return Bic(x.a + y.a - m, x.b + y.b - m);
}
// Node type values
#define OPEN_PAREN 0
#define CLOSE_PAREN 1
struct Node {
uint node_type;
uint pad1;
uint pad2;
uint pad3;
vec4 bbox;
};
vec4 bbox_intersect(vec4 a, vec4 b) {
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
}
layout(binding = 0) readonly buffer InBuf {
Node[] inbuf;
};
layout(binding = 1) buffer BicBuf {
Bic[] bicbuf;
};
// Output buffer for stack slice data
layout(binding = 2) buffer StackBuf {
// open question: do we need the parent link or are bboxes sufficient?
vec4[] stack;
};
shared Bic sh_bic[WG_SIZE];
shared vec4 sh_bbox[WG_SIZE];
void main() {
uint th = gl_LocalInvocationID.x;
Node inp = inbuf[gl_GlobalInvocationID.x];
uint node_type = inp.node_type;
Bic bic = Bic(uint(node_type == CLOSE_PAREN), uint(node_type == OPEN_PAREN));
sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
uint other_ix = gl_LocalInvocationID.x + (1u << i);
if (other_ix < WG_SIZE) {
bic = bic_combine(bic, sh_bic[other_ix]);
}
barrier();
sh_bic[th] = bic;
}
if (th == 0) {
bicbuf[gl_WorkGroupID.x] = bic;
}
barrier();
// Number of elements in stack slice output by this partition
uint size = sh_bic[0].b;
bic = Bic(0, 0);
if (th + 1 < WG_SIZE) {
bic = sh_bic[th + 1];
}
// Do stream compaction
if (inp.node_type == OPEN_PAREN && bic.a == 0) {
uint out_ix = size - bic.b - 1;
sh_bbox[out_ix] = inp.bbox;
}
barrier();
vec4 bbox;
if (th < size) {
bbox = sh_bbox[th];
}
// Forward scan of intersection over resulting stack slice
for (uint i = 0; i < LG_WG_SIZE; i++) {
if (th < size && th >= (1u << i)) {
bbox = bbox_intersect(sh_bbox[th - (1u << i)], bbox);
}
barrier();
if (th < size) {
sh_bbox[th] = bbox;
}
barrier();
}
if (th < size) {
stack[gl_GlobalInvocationID.x] = bbox;
}
}