blob: 9e84bf84d6d8b00472967aac53ed4869c50548c1 [file] [log] [blame]
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The reduction phase for path tag scan implemented as a tree reduction.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#include "pathtag.h"
// Note: the partition size is smaller than pathseg by a factor
// of 4, as there are 4 tag bytes to a tag word.
#define N_ROWS 2
#define LG_WG_SIZE (6 + LG_WG_FACTOR)
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
layout(binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
#define Monoid TagMonoid
layout(set = 0, binding = 3) buffer OutBuf {
Monoid[] outbuf;
};
shared Monoid sh_scratch[WG_SIZE];
void main() {
uint ix = gl_GlobalInvocationID.x * N_ROWS;
uint scene_ix = (conf.pathtag_offset >> 2) + ix;
uint tag_word = scene[scene_ix];
Monoid agg = reduce_tag(tag_word);
for (uint i = 1; i < N_ROWS; i++) {
tag_word = scene[scene_ix + i];
agg = combine_tag_monoid(agg, reduce_tag(tag_word));
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
// We could make this predicate tighter, but would it help?
if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) {
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)];
agg = combine_tag_monoid(agg, other);
}
barrier();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0) {
outbuf[gl_WorkGroupID.x] = agg;
}
}