| // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense |
| |
| // The reduction phase for path tag scan implemented as a tree reduction. |
| |
| #version 450 |
| #extension GL_GOOGLE_include_directive : enable |
| |
| #include "mem.h" |
| #include "setup.h" |
| #include "pathtag.h" |
| |
| // Note: the partition size is smaller than pathseg by a factor |
| // of 4, as there are 4 tag bytes to a tag word. |
| #define N_ROWS 2 |
| #define LG_WG_SIZE (6 + LG_WG_FACTOR) |
| #define WG_SIZE (1 << LG_WG_SIZE) |
| #define PARTITION_SIZE (WG_SIZE * N_ROWS) |
| |
| layout(local_size_x = WG_SIZE, local_size_y = 1) in; |
| |
| layout(binding = 1) readonly buffer ConfigBuf { |
| Config conf; |
| }; |
| |
| layout(binding = 2) readonly buffer SceneBuf { |
| uint[] scene; |
| }; |
| |
| #define Monoid TagMonoid |
| |
| layout(set = 0, binding = 3) buffer OutBuf { |
| Monoid[] outbuf; |
| }; |
| |
| shared Monoid sh_scratch[WG_SIZE]; |
| |
| void main() { |
| uint ix = gl_GlobalInvocationID.x * N_ROWS; |
| uint scene_ix = (conf.pathtag_offset >> 2) + ix; |
| uint tag_word = scene[scene_ix]; |
| |
| Monoid agg = reduce_tag(tag_word); |
| for (uint i = 1; i < N_ROWS; i++) { |
| tag_word = scene[scene_ix + i]; |
| agg = combine_tag_monoid(agg, reduce_tag(tag_word)); |
| } |
| sh_scratch[gl_LocalInvocationID.x] = agg; |
| for (uint i = 0; i < LG_WG_SIZE; i++) { |
| barrier(); |
| // We could make this predicate tighter, but would it help? |
| if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) { |
| Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)]; |
| agg = combine_tag_monoid(agg, other); |
| } |
| barrier(); |
| sh_scratch[gl_LocalInvocationID.x] = agg; |
| } |
| if (gl_LocalInvocationID.x == 0) { |
| outbuf[gl_WorkGroupID.x] = agg; |
| } |
| } |