| // SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense |
| |
| // The binning stage |
| |
| #import config |
| #import drawtag |
| #import bbox |
| #import bump |
| |
| @group(0) @binding(0) |
| var<uniform> config: Config; |
| |
| @group(0) @binding(1) |
| var<storage> draw_monoids: array<DrawMonoid>; |
| |
| @group(0) @binding(2) |
| var<storage> path_bbox_buf: array<PathBbox>; |
| |
| @group(0) @binding(3) |
| var<storage> clip_bbox_buf: array<vec4<f32>>; |
| |
| @group(0) @binding(4) |
| var<storage, read_write> intersected_bbox: array<vec4<f32>>; |
| |
| @group(0) @binding(5) |
| var<storage, read_write> bump: BumpAllocators; |
| |
| @group(0) @binding(6) |
| var<storage, read_write> bin_data: array<u32>; |
| |
| // TODO: put in common place |
| struct BinHeader { |
| element_count: u32, |
| chunk_offset: u32, |
| } |
| |
| @group(0) @binding(7) |
| var<storage, read_write> bin_header: array<BinHeader>; |
| |
| // conversion factors from coordinates to bin |
| let SX = 0.00390625; |
| let SY = 0.00390625; |
| //let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH); |
| //let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT); |
| |
| let WG_SIZE = 256u; |
| let N_SLICE = 8u; |
| //let N_SLICE = WG_SIZE / 32u; |
| let N_SUBSLICE = 4u; |
| |
| var<workgroup> sh_bitmaps: array<array<atomic<u32>, N_TILE>, N_SLICE>; |
| // store count values packed two u16's to a u32 |
| var<workgroup> sh_count: array<array<u32, N_TILE>, N_SUBSLICE>; |
| var<workgroup> sh_chunk_offset: array<u32, N_TILE>; |
| |
| @compute @workgroup_size(256) |
| fn main( |
| @builtin(global_invocation_id) global_id: vec3<u32>, |
| @builtin(local_invocation_id) local_id: vec3<u32>, |
| @builtin(workgroup_id) wg_id: vec3<u32>, |
| ) { |
| for (var i = 0u; i < N_SLICE; i += 1u) { |
| atomicStore(&sh_bitmaps[i][local_id.x], 0u); |
| } |
| workgroupBarrier(); |
| |
| // Read inputs and determine coverage of bins |
| let element_ix = global_id.x; |
| var x0 = 0; |
| var y0 = 0; |
| var x1 = 0; |
| var y1 = 0; |
| if element_ix < config.n_drawobj { |
| let draw_monoid = draw_monoids[element_ix]; |
| var clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9); |
| if draw_monoid.clip_ix > 0u { |
| // TODO: `clip_ix` should always be valid as long as the monoids are correct. Leaving |
| // the bounds check in here for correctness but we should assert this condition instead |
| // once there is a debug-assertion mechanism. |
| clip_bbox = clip_bbox_buf[min(draw_monoid.clip_ix - 1u, config.n_clip - 1u)]; |
| } |
| // For clip elements, clip_box is the bbox of the clip path, |
| // intersected with enclosing clips. |
| // For other elements, it is the bbox of the enclosing clips. |
| // TODO check this is true |
| |
| let path_bbox = path_bbox_buf[draw_monoid.path_ix]; |
| let pb = vec4<f32>(vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1)); |
| let bbox = bbox_intersect(clip_bbox, pb); |
| |
| intersected_bbox[element_ix] = bbox; |
| |
| // `bbox_intersect` can result in a zero or negative area intersection if the path bbox lies |
| // outside the clip bbox. If that is the case, Don't round up the bottom-right corner of the |
| // and leave the coordinates at 0. This way the path will get clipped out and won't get |
| // assigned to a bin. |
| if bbox.x < bbox.z && bbox.y < bbox.w { |
| x0 = i32(floor(bbox.x * SX)); |
| y0 = i32(floor(bbox.y * SY)); |
| x1 = i32(ceil(bbox.z * SX)); |
| y1 = i32(ceil(bbox.w * SY)); |
| } |
| } |
| let width_in_bins = i32((config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X); |
| let height_in_bins = i32((config.height_in_tiles + N_TILE_Y - 1u) / N_TILE_Y); |
| x0 = clamp(x0, 0, width_in_bins); |
| y0 = clamp(y0, 0, height_in_bins); |
| x1 = clamp(x1, 0, width_in_bins); |
| y1 = clamp(y1, 0, height_in_bins); |
| if x0 == x1 { |
| y1 = y0; |
| } |
| var x = x0; |
| var y = y0; |
| let my_slice = local_id.x / 32u; |
| let my_mask = 1u << (local_id.x & 31u); |
| while y < y1 { |
| atomicOr(&sh_bitmaps[my_slice][y * width_in_bins + x], my_mask); |
| x += 1; |
| if x == x1 { |
| x = x0; |
| y += 1; |
| } |
| } |
| |
| workgroupBarrier(); |
| // Allocate output segments |
| var element_count = 0u; |
| for (var i = 0u; i < N_SUBSLICE; i += 1u) { |
| element_count += countOneBits(atomicLoad(&sh_bitmaps[i * 2u][local_id.x])); |
| let element_count_lo = element_count; |
| element_count += countOneBits(atomicLoad(&sh_bitmaps[i * 2u + 1u][local_id.x])); |
| let element_count_hi = element_count; |
| let element_count_packed = element_count_lo | (element_count_hi << 16u); |
| sh_count[i][local_id.x] = element_count_packed; |
| } |
| // element_count is the number of draw objects covering this thread's bin |
| var chunk_offset = atomicAdd(&bump.binning, element_count); |
| if chunk_offset + element_count > config.binning_size { |
| chunk_offset = 0u; |
| atomicOr(&bump.failed, STAGE_BINNING); |
| } |
| sh_chunk_offset[local_id.x] = chunk_offset; |
| bin_header[global_id.x].element_count = element_count; |
| bin_header[global_id.x].chunk_offset = chunk_offset; |
| workgroupBarrier(); |
| |
| // loop over bbox of bins touched by this draw object |
| x = x0; |
| y = y0; |
| while y < y1 { |
| let bin_ix = y * width_in_bins + x; |
| let out_mask = atomicLoad(&sh_bitmaps[my_slice][bin_ix]); |
| // I think this predicate will always be true... |
| if (out_mask & my_mask) != 0u { |
| var idx = countOneBits(out_mask & (my_mask - 1u)); |
| if my_slice > 0u { |
| let count_ix = my_slice - 1u; |
| let count_packed = sh_count[count_ix / 2u][bin_ix]; |
| idx += (count_packed >> (16u * (count_ix & 1u))) & 0xffffu; |
| } |
| let offset = config.bin_data_start + sh_chunk_offset[bin_ix]; |
| bin_data[offset + idx] = element_ix; |
| } |
| x += 1; |
| if x == x1 { |
| x = x0; |
| y += 1; |
| } |
| } |
| } |