blob: 7a12b81886ce5ae48323332972d8420889023a02 [file] [log] [blame]
// Copyright 2022 the Vello Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
#import config
#import drawtag
@group(0) @binding(0)
var<uniform> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage, read_write> reduced: array<DrawMonoid>;
const WG_SIZE = 256u;
var<workgroup> sh_scratch: array<DrawMonoid, WG_SIZE>;
#import util
@compute @workgroup_size(256)
fn main(
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
let num_blocks_total = (config.n_drawobj + (WG_SIZE - 1u)) / WG_SIZE;
// When the number of blocks exceeds the workgroup size, divide
// the work evenly so each workgroup handles n_blocks / wg, with
// the low workgroups doing one more each to handle the remainder.
let n_blocks_base = num_blocks_total / WG_SIZE;
let remainder = num_blocks_total % WG_SIZE;
let first_block = n_blocks_base * wg_id.x + min(wg_id.x, remainder);
let n_blocks = n_blocks_base + u32(wg_id.x < remainder);
var block_index = first_block * WG_SIZE + local_id.x;
var agg = draw_monoid_identity();
for (var i = 0u; i < n_blocks; i++) {
let tag_word = read_draw_tag_from_scene(block_index);
agg = combine_draw_monoid(agg, map_draw_tag(tag_word));
block_index += WG_SIZE;
}
sh_scratch[local_id.x] = agg;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x + (1u << i) < WG_SIZE {
let other = sh_scratch[local_id.x + (1u << i)];
agg = combine_draw_monoid(agg, other);
}
workgroupBarrier();
sh_scratch[local_id.x] = agg;
}
if local_id.x == 0u {
reduced[wg_id.x] = agg;
}
}